From 6c9007f65d14c87842920b0d376c9ade34fab152 Mon Sep 17 00:00:00 2001
From: Stas Sergeev <stsp2@yandex.ru>
Date: Thu, 22 Jun 2023 21:52:23 +0500
Subject: [PATCH 0001/2047] fs/locks: F_UNLCK extension for F_OFD_GETLK

Currently F_UNLCK with F_OFD_GETLK returns -EINVAL. This patch changes
it such that specifying F_UNLCK returns information only about OFD locks
that are owned by the given file description.

Cc: Jeff Layton <jlayton@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Shuah Khan <shuah@kernel.org>
Cc: linux-kselftest@vger.kernel.org
Cc: linux-api@vger.kernel.org
Signed-off-by: Stas Sergeev <stsp2@yandex.ru>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index df8b26a42524..210766007e63 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -868,6 +868,21 @@ static bool posix_locks_conflict(struct file_lock *caller_fl,
 	return locks_conflict(caller_fl, sys_fl);
 }
 
+/* Determine if lock sys_fl blocks lock caller_fl. Used on xx_GETLK
+ * path so checks for additional GETLK-specific things like F_UNLCK.
+ */
+static bool posix_test_locks_conflict(struct file_lock *caller_fl,
+				      struct file_lock *sys_fl)
+{
+	/* F_UNLCK checks any locks on the same fd. */
+	if (caller_fl->fl_type == F_UNLCK) {
+		if (!posix_same_owner(caller_fl, sys_fl))
+			return false;
+		return locks_overlap(caller_fl, sys_fl);
+	}
+	return posix_locks_conflict(caller_fl, sys_fl);
+}
+
 /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
  * checking before calling the locks_conflict().
  */
@@ -901,7 +916,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 retry:
 	spin_lock(&ctx->flc_lock);
 	list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
-		if (!posix_locks_conflict(fl, cfl))
+		if (!posix_test_locks_conflict(fl, cfl))
 			continue;
 		if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
 			&& (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
@@ -2207,7 +2222,8 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
 	if (fl == NULL)
 		return -ENOMEM;
 	error = -EINVAL;
-	if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
+	if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
+			&& flock->l_type != F_WRLCK)
 		goto out;
 
 	error = flock_to_posix_lock(filp, fl, flock);
@@ -2414,7 +2430,8 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
 		return -ENOMEM;
 
 	error = -EINVAL;
-	if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
+	if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
+			&& flock->l_type != F_WRLCK)
 		goto out;
 
 	error = flock64_to_posix_lock(filp, fl, flock);

From 1425bdd7ef88631d0623ce3d0b8c89d8a65815d2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 22 Jun 2023 21:36:11 +0300
Subject: [PATCH 0002/2047] regmap: Revert "add 64-bit mode support" and Co.

With unsigned int type we never ever can pass 64-bit value.
Remove never properly worked code.

Note, there are no users in kernel for this size of register
offsets or data.

This reverts commit afcc00b91f1865f6d0bbdb687dd642ce8a3c3c9e.

Also revert other 64-bit code excerpts in the regmap implementation
that had been induced by the false impression made by the above
mentioned change that there is a support of that data size.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230622183613.58762-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 122 -----------------------------------
 1 file changed, 122 deletions(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 89a7f1c459c1..8b37451fa931 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -311,26 +311,6 @@ static void regmap_format_32_native(void *buf, unsigned int val,
 	memcpy(buf, &v, sizeof(v));
 }
 
-#ifdef CONFIG_64BIT
-static void regmap_format_64_be(void *buf, unsigned int val, unsigned int shift)
-{
-	put_unaligned_be64((u64) val << shift, buf);
-}
-
-static void regmap_format_64_le(void *buf, unsigned int val, unsigned int shift)
-{
-	put_unaligned_le64((u64) val << shift, buf);
-}
-
-static void regmap_format_64_native(void *buf, unsigned int val,
-				    unsigned int shift)
-{
-	u64 v = (u64) val << shift;
-
-	memcpy(buf, &v, sizeof(v));
-}
-#endif
-
 static void regmap_parse_inplace_noop(void *buf)
 {
 }
@@ -411,40 +391,6 @@ static unsigned int regmap_parse_32_native(const void *buf)
 	return v;
 }
 
-#ifdef CONFIG_64BIT
-static unsigned int regmap_parse_64_be(const void *buf)
-{
-	return get_unaligned_be64(buf);
-}
-
-static unsigned int regmap_parse_64_le(const void *buf)
-{
-	return get_unaligned_le64(buf);
-}
-
-static void regmap_parse_64_be_inplace(void *buf)
-{
-	u64 v =  get_unaligned_be64(buf);
-
-	memcpy(buf, &v, sizeof(v));
-}
-
-static void regmap_parse_64_le_inplace(void *buf)
-{
-	u64 v = get_unaligned_le64(buf);
-
-	memcpy(buf, &v, sizeof(v));
-}
-
-static unsigned int regmap_parse_64_native(const void *buf)
-{
-	u64 v;
-
-	memcpy(&v, buf, sizeof(v));
-	return v;
-}
-#endif
-
 static void regmap_lock_hwlock(void *__map)
 {
 	struct regmap *map = __map;
@@ -1005,24 +951,6 @@ struct regmap *__regmap_init(struct device *dev,
 		}
 		break;
 
-#ifdef CONFIG_64BIT
-	case 64:
-		switch (reg_endian) {
-		case REGMAP_ENDIAN_BIG:
-			map->format.format_reg = regmap_format_64_be;
-			break;
-		case REGMAP_ENDIAN_LITTLE:
-			map->format.format_reg = regmap_format_64_le;
-			break;
-		case REGMAP_ENDIAN_NATIVE:
-			map->format.format_reg = regmap_format_64_native;
-			break;
-		default:
-			goto err_hwlock;
-		}
-		break;
-#endif
-
 	default:
 		goto err_hwlock;
 	}
@@ -1086,28 +1014,6 @@ struct regmap *__regmap_init(struct device *dev,
 			goto err_hwlock;
 		}
 		break;
-#ifdef CONFIG_64BIT
-	case 64:
-		switch (val_endian) {
-		case REGMAP_ENDIAN_BIG:
-			map->format.format_val = regmap_format_64_be;
-			map->format.parse_val = regmap_parse_64_be;
-			map->format.parse_inplace = regmap_parse_64_be_inplace;
-			break;
-		case REGMAP_ENDIAN_LITTLE:
-			map->format.format_val = regmap_format_64_le;
-			map->format.parse_val = regmap_parse_64_le;
-			map->format.parse_inplace = regmap_parse_64_le_inplace;
-			break;
-		case REGMAP_ENDIAN_NATIVE:
-			map->format.format_val = regmap_format_64_native;
-			map->format.parse_val = regmap_parse_64_native;
-			break;
-		default:
-			goto err_hwlock;
-		}
-		break;
-#endif
 	}
 
 	if (map->format.format_write) {
@@ -2160,9 +2066,6 @@ static int regmap_noinc_readwrite(struct regmap *map, unsigned int reg,
 	u8 *u8p;
 	u16 *u16p;
 	u32 *u32p;
-#ifdef CONFIG_64BIT
-	u64 *u64p;
-#endif
 	int ret;
 	int i;
 
@@ -2182,13 +2085,6 @@ static int regmap_noinc_readwrite(struct regmap *map, unsigned int reg,
 		if (write)
 			lastval = (unsigned int)u32p[val_count - 1];
 		break;
-#ifdef CONFIG_64BIT
-	case 8:
-		u64p = val;
-		if (write)
-			lastval = (unsigned int)u64p[val_count - 1];
-		break;
-#endif
 	default:
 		return -EINVAL;
 	}
@@ -2226,11 +2122,6 @@ static int regmap_noinc_readwrite(struct regmap *map, unsigned int reg,
 			case 4:
 				pr_cont("%x", u32p[i]);
 				break;
-#ifdef CONFIG_64BIT
-			case 8:
-				pr_cont("%llx", u64p[i]);
-				break;
-#endif
 			default:
 				break;
 			}
@@ -2438,11 +2329,6 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 			case 4:
 				ival = *(u32 *)(val + (i * val_bytes));
 				break;
-#ifdef CONFIG_64BIT
-			case 8:
-				ival = *(u64 *)(val + (i * val_bytes));
-				break;
-#endif
 			default:
 				ret = -EINVAL;
 				goto out;
@@ -3207,9 +3093,6 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 		for (i = 0; i < val_count * val_bytes; i += val_bytes)
 			map->format.parse_inplace(val + i);
 	} else {
-#ifdef CONFIG_64BIT
-		u64 *u64 = val;
-#endif
 		u32 *u32 = val;
 		u16 *u16 = val;
 		u8 *u8 = val;
@@ -3225,11 +3108,6 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 				goto out;
 
 			switch (map->format.val_bytes) {
-#ifdef CONFIG_64BIT
-			case 8:
-				u64[i] = ival;
-				break;
-#endif
 			case 4:
 				u32[i] = ival;
 				break;

From 039fd2e4134b7b880ba83f40a136df440047594a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 22 Jun 2023 21:36:12 +0300
Subject: [PATCH 0003/2047] regmap: cache: Revert "Add 64-bit mode support"

There is no support for 64-bit data size in regmap, so
there is no point to have it in regmap cache.

This reverts commit 8b7663de6e2bfe3c40e1846e1c4625f33d138757.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230622183613.58762-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regcache.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 28bc3ae9458a..156490ab7f34 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -590,14 +590,6 @@ void regcache_set_val(struct regmap *map, void *base, unsigned int idx,
 		cache[idx] = val;
 		break;
 	}
-#ifdef CONFIG_64BIT
-	case 8: {
-		u64 *cache = base;
-
-		cache[idx] = val;
-		break;
-	}
-#endif
 	default:
 		BUG();
 	}
@@ -630,13 +622,6 @@ unsigned int regcache_get_val(struct regmap *map, const void *base,
 
 		return cache[idx];
 	}
-#ifdef CONFIG_64BIT
-	case 8: {
-		const u64 *cache = base;
-
-		return cache[idx];
-	}
-#endif
 	default:
 		BUG();
 	}

From 875403a7b524e9523e49dd32662adbc3e48cc12a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 22 Jun 2023 21:36:13 +0300
Subject: [PATCH 0004/2047] regmap: mmio: Remove unused 64-bit support code

regmap API does not support 64-bit data size, so
there is no point to have it in regmap MMIO.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230622183613.58762-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-mmio.c | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index 8132b5c101c4..99d7fd85ca7d 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -202,15 +202,6 @@ static int regmap_mmio_noinc_write(void *context, unsigned int reg,
 				writel(swab32(valp[i]), ctx->regs + reg);
 			goto out_clk;
 		}
-#ifdef CONFIG_64BIT
-		case 8:
-		{
-			const u64 *valp = (const u64 *)val;
-			for (i = 0; i < val_count; i++)
-				writeq(swab64(valp[i]), ctx->regs + reg);
-			goto out_clk;
-		}
-#endif
 		default:
 			ret = -EINVAL;
 			goto out_clk;
@@ -227,11 +218,6 @@ static int regmap_mmio_noinc_write(void *context, unsigned int reg,
 	case 4:
 		writesl(ctx->regs + reg, (const u32 *)val, val_count);
 		break;
-#ifdef CONFIG_64BIT
-	case 8:
-		writesq(ctx->regs + reg, (const u64 *)val, val_count);
-		break;
-#endif
 	default:
 		ret = -EINVAL;
 		break;
@@ -363,11 +349,6 @@ static int regmap_mmio_noinc_read(void *context, unsigned int reg,
 	case 4:
 		readsl(ctx->regs + reg, (u32 *)val, val_count);
 		break;
-#ifdef CONFIG_64BIT
-	case 8:
-		readsq(ctx->regs + reg, (u64 *)val, val_count);
-		break;
-#endif
 	default:
 		ret = -EINVAL;
 		goto out_clk;
@@ -387,11 +368,6 @@ static int regmap_mmio_noinc_read(void *context, unsigned int reg,
 		case 4:
 			swab32_array(val, val_count);
 			break;
-#ifdef CONFIG_64BIT
-		case 8:
-			swab64_array(val, val_count);
-			break;
-#endif
 		default:
 			ret = -EINVAL;
 			break;

From d16db38c2a66060ee25c6b86ee7b6d66d40fc8e0 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Mon, 3 Jul 2023 20:15:54 +0200
Subject: [PATCH 0005/2047] dt-bindings: regulator: Describe Qualcomm REFGEN
 regulator

Modern Qualcomm SoCs have a REFGEN (reference voltage generator)
regulator, providing reference voltage to on-chip IP, like PHYs.
It's controlled through MMIO and we can toggle it or read its state back.

Describe it.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230628-topic-refgen-v3-1-9fbf0e605d23@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../qcom,sdm845-refgen-regulator.yaml         | 57 +++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/qcom,sdm845-refgen-regulator.yaml

diff --git a/Documentation/devicetree/bindings/regulator/qcom,sdm845-refgen-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,sdm845-refgen-regulator.yaml
new file mode 100644
index 000000000000..f02f97d4fdd2
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/qcom,sdm845-refgen-regulator.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/qcom,sdm845-refgen-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies, Inc. REFGEN Regulator
+
+maintainers:
+  - Konrad Dybcio <konradybcio@kernel.org>
+
+description:
+  The REFGEN (reference voltage generator) regulator provides reference
+  voltage for on-chip IPs (like PHYs) on some Qualcomm SoCs.
+
+allOf:
+  - $ref: regulator.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - qcom,sc7180-refgen-regulator
+              - qcom,sc8180x-refgen-regulator
+              - qcom,sm8150-refgen-regulator
+          - const: qcom,sdm845-refgen-regulator
+
+      - items:
+          - enum:
+              - qcom,sc7280-refgen-regulator
+              - qcom,sc8280xp-refgen-regulator
+              - qcom,sm6350-refgen-regulator
+              - qcom,sm6375-refgen-regulator
+              - qcom,sm8350-refgen-regulator
+          - const: qcom,sm8250-refgen-regulator
+
+      - enum:
+          - qcom,sdm845-refgen-regulator
+          - qcom,sm8250-refgen-regulator
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    regulator@162f000 {
+      compatible = "qcom,sm8250-refgen-regulator";
+      reg = <0x0162f000 0x84>;
+    };
+...

From 7cbfbe23796086fdb72b681e2c182b02acd36a04 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Mon, 3 Jul 2023 20:15:55 +0200
Subject: [PATCH 0006/2047] regulator: Introduce Qualcomm REFGEN regulator
 driver

Modern Qualcomm SoCs have a REFGEN (reference voltage generator)
regulator, providing reference voltage to on-chip IP, like PHYs.

Add a driver to support toggling that regulator.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Link: https://lore.kernel.org/r/20230628-topic-refgen-v3-2-9fbf0e605d23@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig                 |  11 ++
 drivers/regulator/Makefile                |   1 +
 drivers/regulator/qcom-refgen-regulator.c | 154 ++++++++++++++++++++++
 3 files changed, 166 insertions(+)
 create mode 100644 drivers/regulator/qcom-refgen-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 823f8e6e4801..18a05b09406f 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -989,6 +989,17 @@ config REGULATOR_PWM
 	  This driver supports PWM controlled voltage regulators. PWM
 	  duty cycle can increase or decrease the voltage.
 
+config REGULATOR_QCOM_REFGEN
+	tristate "Qualcomm REFGEN regulator driver"
+	depends on HAS_IOMEM
+	depends on REGMAP
+	help
+	  This driver supports the MMIO-mapped reference voltage regulator,
+	  used internally by some PHYs on many Qualcomm SoCs.
+
+	  Say M here if you want to include support for this regulator as
+	  a module. The module will be named "qcom-refgen-regulator".
+
 config REGULATOR_QCOM_RPM
 	tristate "Qualcomm RPM regulator driver"
 	depends on MFD_QCOM_RPM
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 15e0d614ff66..4f4589877e81 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -108,6 +108,7 @@ obj-$(CONFIG_REGULATOR_MT6380)	+= mt6380-regulator.o
 obj-$(CONFIG_REGULATOR_MT6397)	+= mt6397-regulator.o
 obj-$(CONFIG_REGULATOR_MTK_DVFSRC) += mtk-dvfsrc-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_LABIBB) += qcom-labibb-regulator.o
+obj-$(CONFIG_REGULATOR_QCOM_REFGEN) += qcom-refgen-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_RPM) += qcom_rpm-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_RPMH) += qcom-rpmh-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_SMD_RPM) += qcom_smd-regulator.o
diff --git a/drivers/regulator/qcom-refgen-regulator.c b/drivers/regulator/qcom-refgen-regulator.c
new file mode 100644
index 000000000000..656fe330d38f
--- /dev/null
+++ b/drivers/regulator/qcom-refgen-regulator.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2017, 2019-2020, The Linux Foundation. All rights reserved.
+// Copyright (c) 2023, Linaro Limited
+
+#include <linux/bitfield.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
+
+#define REFGEN_REG_BIAS_EN		0x08
+#define REFGEN_BIAS_EN_MASK		GENMASK(2, 0)
+ #define REFGEN_BIAS_EN_ENABLE		0x7
+ #define REFGEN_BIAS_EN_DISABLE		0x6
+
+#define REFGEN_REG_BG_CTRL		0x14
+#define REFGEN_BG_CTRL_MASK		GENMASK(2, 1)
+ #define REFGEN_BG_CTRL_ENABLE		0x3
+ #define REFGEN_BG_CTRL_DISABLE		0x2
+
+#define REFGEN_REG_PWRDWN_CTRL5		0x80
+#define REFGEN_PWRDWN_CTRL5_MASK	BIT(0)
+ #define REFGEN_PWRDWN_CTRL5_ENABLE	0x1
+
+static int qcom_sdm845_refgen_enable(struct regulator_dev *rdev)
+{
+	regmap_update_bits(rdev->regmap, REFGEN_REG_BG_CTRL, REFGEN_BG_CTRL_MASK,
+			   FIELD_PREP(REFGEN_BG_CTRL_MASK, REFGEN_BG_CTRL_ENABLE));
+
+	regmap_write(rdev->regmap, REFGEN_REG_BIAS_EN,
+		     FIELD_PREP(REFGEN_BIAS_EN_MASK, REFGEN_BIAS_EN_ENABLE));
+
+	return 0;
+}
+
+static int qcom_sdm845_refgen_disable(struct regulator_dev *rdev)
+{
+	regmap_write(rdev->regmap, REFGEN_REG_BIAS_EN,
+		     FIELD_PREP(REFGEN_BIAS_EN_MASK, REFGEN_BIAS_EN_DISABLE));
+
+	regmap_update_bits(rdev->regmap, REFGEN_REG_BG_CTRL, REFGEN_BG_CTRL_MASK,
+			   FIELD_PREP(REFGEN_BG_CTRL_MASK, REFGEN_BG_CTRL_DISABLE));
+
+	return 0;
+}
+
+static int qcom_sdm845_refgen_is_enabled(struct regulator_dev *rdev)
+{
+	u32 val;
+
+	regmap_read(rdev->regmap, REFGEN_REG_BG_CTRL, &val);
+	if (FIELD_GET(REFGEN_BG_CTRL_MASK, val) != REFGEN_BG_CTRL_ENABLE)
+		return 0;
+
+	regmap_read(rdev->regmap, REFGEN_REG_BIAS_EN, &val);
+	if (FIELD_GET(REFGEN_BIAS_EN_MASK, val) != REFGEN_BIAS_EN_ENABLE)
+		return 0;
+
+	return 1;
+}
+
+static struct regulator_desc sdm845_refgen_desc = {
+	.enable_time = 5,
+	.name = "refgen",
+	.owner = THIS_MODULE,
+	.type = REGULATOR_VOLTAGE,
+	.ops = &(const struct regulator_ops) {
+		.enable		= qcom_sdm845_refgen_enable,
+		.disable	= qcom_sdm845_refgen_disable,
+		.is_enabled	= qcom_sdm845_refgen_is_enabled,
+	},
+};
+
+static struct regulator_desc sm8250_refgen_desc = {
+	.enable_reg = REFGEN_REG_PWRDWN_CTRL5,
+	.enable_mask = REFGEN_PWRDWN_CTRL5_MASK,
+	.enable_val = REFGEN_PWRDWN_CTRL5_ENABLE,
+	.disable_val = 0,
+	.enable_time = 5,
+	.name = "refgen",
+	.owner = THIS_MODULE,
+	.type = REGULATOR_VOLTAGE,
+	.ops = &(const struct regulator_ops) {
+		.enable		= regulator_enable_regmap,
+		.disable	= regulator_disable_regmap,
+		.is_enabled	= regulator_is_enabled_regmap,
+	},
+};
+
+static const struct regmap_config qcom_refgen_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.fast_io = true,
+};
+
+static int qcom_refgen_probe(struct platform_device *pdev)
+{
+	struct regulator_init_data *init_data;
+	struct regulator_config config = {};
+	const struct regulator_desc *rdesc;
+	struct device *dev = &pdev->dev;
+	struct regulator_dev *rdev;
+	struct regmap *regmap;
+	void __iomem *base;
+
+	rdesc = of_device_get_match_data(dev);
+	if (!rdesc)
+		return -ENODATA;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	regmap = devm_regmap_init_mmio(dev, base, &qcom_refgen_regmap_config);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	init_data = of_get_regulator_init_data(dev, dev->of_node, rdesc);
+	if (!init_data)
+		return -ENOMEM;
+
+	config.dev = dev;
+	config.init_data = init_data;
+	config.of_node = dev->of_node;
+	config.regmap = regmap;
+
+	rdev = devm_regulator_register(dev, rdesc, &config);
+	if (IS_ERR(rdev))
+		return PTR_ERR(rdev);
+
+	return 0;
+}
+
+static const struct of_device_id qcom_refgen_match_table[] = {
+	{ .compatible = "qcom,sdm845-refgen-regulator", .data = &sdm845_refgen_desc },
+	{ .compatible = "qcom,sm8250-refgen-regulator", .data = &sm8250_refgen_desc },
+	{ }
+};
+
+static struct platform_driver qcom_refgen_driver = {
+	.probe = qcom_refgen_probe,
+	.driver = {
+		.name = "qcom-refgen-regulator",
+		.of_match_table = qcom_refgen_match_table,
+	},
+};
+module_platform_driver(qcom_refgen_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Qualcomm REFGEN regulator driver");

From 8978af5ef662541bc0a5a7722ad6942cd19daed0 Mon Sep 17 00:00:00 2001
From: ChiYuan Huang <cy_huang@richtek.com>
Date: Thu, 29 Jun 2023 22:29:55 +0800
Subject: [PATCH 0007/2047] regulator: dt-bindings: rt5739: Add compatible for
 rt5733

Add compatible string for rt5733.

Signed-off-by: ChiYuan Huang <cy_huang@richtek.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/1688048996-25606-2-git-send-email-cy_huang@richtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/regulator/richtek,rt5739.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/regulator/richtek,rt5739.yaml b/Documentation/devicetree/bindings/regulator/richtek,rt5739.yaml
index 358297dd3fb7..e95e046e9ed6 100644
--- a/Documentation/devicetree/bindings/regulator/richtek,rt5739.yaml
+++ b/Documentation/devicetree/bindings/regulator/richtek,rt5739.yaml
@@ -21,6 +21,7 @@ allOf:
 properties:
   compatible:
     enum:
+      - richtek,rt5733
       - richtek,rt5739
 
   reg:

From 6f5e285839845729858b8f6ca7cf3dd35e1f9a29 Mon Sep 17 00:00:00 2001
From: ChiYuan Huang <cy_huang@richtek.com>
Date: Thu, 29 Jun 2023 22:29:56 +0800
Subject: [PATCH 0008/2047] regulator: rt5739: Add DID check and compatible for
 rt5733

Add compatible and use DID to check rt5733.

The only difference bwtween rt5733 and rt5739 is the output range and
voltage step. These two chips can be distinguished from the DIE id.

Signed-off-by: ChiYuan Huang <cy_huang@richtek.com>
Link: https://lore.kernel.org/r/1688048996-25606-3-git-send-email-cy_huang@richtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rt5739.c | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/drivers/regulator/rt5739.c b/drivers/regulator/rt5739.c
index 0ce6a1666752..91412c905ce6 100644
--- a/drivers/regulator/rt5739.c
+++ b/drivers/regulator/rt5739.c
@@ -31,10 +31,17 @@
 #define RT5739_MODEVSEL1_MASK	BIT(1)
 #define RT5739_MODEVSEL0_MASK	BIT(0)
 #define RT5739_VID_MASK		GENMASK(7, 5)
+#define RT5739_DID_MASK		GENMASK(3, 0)
 #define RT5739_ACTD_MASK	BIT(7)
 #define RT5739_ENVSEL1_MASK	BIT(1)
 #define RT5739_ENVSEL0_MASK	BIT(0)
 
+#define RT5733_CHIPDIE_ID	0x1
+#define RT5733_VOLT_MINUV	270000
+#define RT5733_VOLT_MAXUV	1401250
+#define RT5733_VOLT_STPUV	6250
+#define RT5733_N_VOLTS		182
+
 #define RT5739_VOLT_MINUV	300000
 #define RT5739_VOLT_MAXUV	1300000
 #define RT5739_VOLT_STPUV	5000
@@ -93,8 +100,11 @@ static int rt5739_set_suspend_voltage(struct regulator_dev *rdev, int uV)
 	const struct regulator_desc *desc = rdev->desc;
 	struct regmap *regmap = rdev_get_regmap(rdev);
 	unsigned int reg, vsel;
+	int max_uV;
 
-	if (uV < RT5739_VOLT_MINUV || uV > RT5739_VOLT_MAXUV)
+	max_uV = desc->min_uV + desc->uV_step * (desc->n_voltages - 1);
+
+	if (uV < desc->min_uV || uV > max_uV)
 		return -EINVAL;
 
 	if (desc->vsel_reg == RT5739_REG_NSEL0)
@@ -102,7 +112,7 @@ static int rt5739_set_suspend_voltage(struct regulator_dev *rdev, int uV)
 	else
 		reg = RT5739_REG_NSEL0;
 
-	vsel = (uV - RT5739_VOLT_MINUV) / RT5739_VOLT_STPUV;
+	vsel = (uV - desc->min_uV) / desc->uV_step;
 	return regmap_write(regmap, reg, vsel);
 }
 
@@ -189,15 +199,12 @@ static unsigned int rt5739_of_map_mode(unsigned int mode)
 }
 
 static void rt5739_init_regulator_desc(struct regulator_desc *desc,
-				       bool vsel_active_high)
+				       bool vsel_active_high, u8 did)
 {
 	/* Fixed */
 	desc->name = "rt5739-regulator";
 	desc->owner = THIS_MODULE;
 	desc->ops = &rt5739_regulator_ops;
-	desc->n_voltages = RT5739_N_VOLTS;
-	desc->min_uV = RT5739_VOLT_MINUV;
-	desc->uV_step = RT5739_VOLT_STPUV;
 	desc->vsel_mask = RT5739_VSEL_MASK;
 	desc->enable_reg = RT5739_REG_CNTL2;
 	desc->active_discharge_reg = RT5739_REG_CNTL1;
@@ -213,6 +220,20 @@ static void rt5739_init_regulator_desc(struct regulator_desc *desc,
 		desc->vsel_reg = RT5739_REG_NSEL0;
 		desc->enable_mask = RT5739_ENVSEL0_MASK;
 	}
+
+	/* Assigned by CHIPDIE ID */
+	switch (did) {
+	case RT5733_CHIPDIE_ID:
+		desc->n_voltages = RT5733_N_VOLTS;
+		desc->min_uV = RT5733_VOLT_MINUV;
+		desc->uV_step = RT5733_VOLT_STPUV;
+		break;
+	default:
+		desc->n_voltages = RT5739_N_VOLTS;
+		desc->min_uV = RT5739_VOLT_MINUV;
+		desc->uV_step = RT5739_VOLT_STPUV;
+		break;
+	}
 }
 
 static const struct regmap_config rt5739_regmap_config = {
@@ -258,7 +279,7 @@ static int rt5739_probe(struct i2c_client *i2c)
 
 	vsel_acth = device_property_read_bool(dev, "richtek,vsel-active-high");
 
-	rt5739_init_regulator_desc(desc, vsel_acth);
+	rt5739_init_regulator_desc(desc, vsel_acth, vid & RT5739_DID_MASK);
 
 	cfg.dev = dev;
 	cfg.of_node = dev_of_node(dev);
@@ -271,6 +292,7 @@ static int rt5739_probe(struct i2c_client *i2c)
 }
 
 static const struct of_device_id rt5739_device_table[] = {
+	{ .compatible = "richtek,rt5733" },
 	{ .compatible = "richtek,rt5739" },
 	{ /* sentinel */ }
 };

From 42a95739c5bc4d7a6e93a43117e9283598ba2287 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Thu, 29 Jun 2023 11:42:00 +0100
Subject: [PATCH 0009/2047] regulator: raa215300: Change the scope of the
 variables {clkin_name, xin_name}

Change the scope of the variables {clkin_name, xin_name} from global->local
to fix the below warning.

drivers/regulator/raa215300.c:42:12: sparse: sparse: symbol 'xin_name' was
not declared. Should it be static?

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202306250552.Fan9WTiN-lkp@intel.com/
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230629104200.102663-1-biju.das.jz@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/raa215300.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/regulator/raa215300.c b/drivers/regulator/raa215300.c
index 24a1c89f5dbc..8e1a4c86b978 100644
--- a/drivers/regulator/raa215300.c
+++ b/drivers/regulator/raa215300.c
@@ -38,8 +38,6 @@
 #define RAA215300_REG_BLOCK_EN_RTC_EN	BIT(6)
 #define RAA215300_RTC_DEFAULT_ADDR	0x6f
 
-const char *clkin_name = "clkin";
-const char *xin_name = "xin";
 static struct clk *clk;
 
 static const struct regmap_config raa215300_regmap_config = {
@@ -71,8 +69,10 @@ static int raa215300_clk_present(struct i2c_client *client, const char *name)
 static int raa215300_i2c_probe(struct i2c_client *client)
 {
 	struct device *dev = &client->dev;
-	const char *clk_name = xin_name;
+	const char *clkin_name = "clkin";
 	unsigned int pmic_version, val;
+	const char *xin_name = "xin";
+	const char *clk_name = NULL;
 	struct regmap *regmap;
 	int ret;
 
@@ -114,15 +114,17 @@ static int raa215300_i2c_probe(struct i2c_client *client)
 	ret = raa215300_clk_present(client, xin_name);
 	if (ret < 0) {
 		return ret;
-	} else if (!ret) {
+	} else if (ret) {
+		clk_name = xin_name;
+	} else {
 		ret = raa215300_clk_present(client, clkin_name);
 		if (ret < 0)
 			return ret;
-
-		clk_name = clkin_name;
+		if (ret)
+			clk_name = clkin_name;
 	}
 
-	if (ret) {
+	if (clk_name) {
 		char *name = pmic_version >= 0x12 ? "isl1208" : "raa215300_a0";
 		struct device_node *np = client->dev.of_node;
 		u32 addr = RAA215300_RTC_DEFAULT_ADDR;

From 741da3f60479acc0de3d79501c4819e49fa28639 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Mon, 26 Jun 2023 11:15:44 +0200
Subject: [PATCH 0010/2047] regulator: raa215300: Switch back to use struct
 i2c_driver::probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

struct i2c_driver::probe_new is about to go away. Switch the driver to
use the probe callback with the same prototype.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20230626091544.557403-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/raa215300.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/raa215300.c b/drivers/regulator/raa215300.c
index 8e1a4c86b978..848a03a2fbd4 100644
--- a/drivers/regulator/raa215300.c
+++ b/drivers/regulator/raa215300.c
@@ -182,7 +182,7 @@ static struct i2c_driver raa215300_i2c_driver = {
 		.name = "raa215300",
 		.of_match_table = raa215300_dt_match,
 	},
-	.probe_new = raa215300_i2c_probe,
+	.probe = raa215300_i2c_probe,
 };
 module_i2c_driver(raa215300_i2c_driver);
 

From 0605d9fb411f3337482976842a3901d6c125d298 Mon Sep 17 00:00:00 2001
From: Maksim Kiselev <bigunclemax@gmail.com>
Date: Sat, 24 Jun 2023 16:16:22 +0300
Subject: [PATCH 0011/2047] spi: sun6i: add quirk for dual and quad SPI modes
 support

New Allwinner's SPI controllers can support dual and quad SPI modes.
To enable one of these modes, we should set the corresponding bit in
the SUN6I_BURST_CTL_CNT_REG register. DRM (28 bits) for dual mode and
Quad_EN (29 bits) for quad transmission.

Signed-off-by: Maksim Kiselev <bigunclemax@gmail.com>
Link: https://lore.kernel.org/r/20230624131632.2972546-2-bigunclemax@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sun6i.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index 30d541612253..cec2747235ab 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -83,6 +83,9 @@
 #define SUN6I_XMIT_CNT_REG		0x34
 
 #define SUN6I_BURST_CTL_CNT_REG		0x38
+#define SUN6I_BURST_CTL_CNT_STC_MASK		GENMASK(23, 0)
+#define SUN6I_BURST_CTL_CNT_DRM			BIT(28)
+#define SUN6I_BURST_CTL_CNT_QUAD_EN		BIT(29)
 
 #define SUN6I_TXDATA_REG		0x200
 #define SUN6I_RXDATA_REG		0x300
@@ -90,6 +93,7 @@
 struct sun6i_spi_cfg {
 	unsigned long		fifo_depth;
 	bool			has_clk_ctl;
+	u32			mode_bits;
 };
 
 struct sun6i_spi {
@@ -266,7 +270,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 	unsigned int div, div_cdr1, div_cdr2, timeout;
 	unsigned int start, end, tx_time;
 	unsigned int trig_level;
-	unsigned int tx_len = 0, rx_len = 0;
+	unsigned int tx_len = 0, rx_len = 0, nbits = 0;
 	bool use_dma;
 	int ret = 0;
 	u32 reg;
@@ -418,13 +422,29 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 	sun6i_spi_write(sspi, SUN6I_GBL_CTL_REG, reg);
 
 	/* Setup the transfer now... */
-	if (sspi->tx_buf)
+	if (sspi->tx_buf) {
 		tx_len = tfr->len;
+		nbits = tfr->tx_nbits;
+	} else if (tfr->rx_buf) {
+		nbits = tfr->rx_nbits;
+	}
+
+	switch (nbits) {
+	case SPI_NBITS_DUAL:
+		reg = SUN6I_BURST_CTL_CNT_DRM;
+		break;
+	case SPI_NBITS_QUAD:
+		reg = SUN6I_BURST_CTL_CNT_QUAD_EN;
+		break;
+	case SPI_NBITS_SINGLE:
+	default:
+		reg = FIELD_PREP(SUN6I_BURST_CTL_CNT_STC_MASK, tx_len);
+	}
 
 	/* Setup the counters */
+	sun6i_spi_write(sspi, SUN6I_BURST_CTL_CNT_REG, reg);
 	sun6i_spi_write(sspi, SUN6I_BURST_CNT_REG, tfr->len);
 	sun6i_spi_write(sspi, SUN6I_XMIT_CNT_REG, tx_len);
-	sun6i_spi_write(sspi, SUN6I_BURST_CTL_CNT_REG, tx_len);
 
 	if (!use_dma) {
 		/* Fill the TX FIFO */
@@ -623,7 +643,8 @@ static int sun6i_spi_probe(struct platform_device *pdev)
 	master->set_cs = sun6i_spi_set_cs;
 	master->transfer_one = sun6i_spi_transfer_one;
 	master->num_chipselect = 4;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST;
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST |
+			    sspi->cfg->mode_bits;
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
 	master->dev.of_node = pdev->dev.of_node;
 	master->auto_runtime_pm = true;

From 25453d797d7abe8801951c8290ea11ea8bba7b96 Mon Sep 17 00:00:00 2001
From: Maksim Kiselev <bigunclemax@gmail.com>
Date: Sat, 24 Jun 2023 16:16:23 +0300
Subject: [PATCH 0012/2047] spi: sun6i: add dual and quad SPI modes support for
 R329/D1/R528/T113s

Listed SoCs have SPI controllers that can operate in dual or quad modes.
This patch adds dual/quad mode bits for spi_master on these SoCS.

Signed-off-by: Maksim Kiselev <bigunclemax@gmail.com>
Link: https://lore.kernel.org/r/20230624131632.2972546-3-bigunclemax@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sun6i.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index cec2747235ab..e9144d76bcdb 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -761,6 +761,7 @@ static const struct sun6i_spi_cfg sun8i_h3_spi_cfg = {
 
 static const struct sun6i_spi_cfg sun50i_r329_spi_cfg = {
 	.fifo_depth	= SUN8I_FIFO_DEPTH,
+	.mode_bits	= SPI_RX_DUAL | SPI_TX_DUAL | SPI_RX_QUAD | SPI_TX_QUAD,
 };
 
 static const struct of_device_id sun6i_spi_match[] = {

From 68a199640d28ecd507bc53bccb5cd98f5330fafc Mon Sep 17 00:00:00 2001
From: Martin Kurbanov <mmkurbanov@sberdevices.ru>
Date: Thu, 6 Jul 2023 14:03:30 +0300
Subject: [PATCH 0013/2047] spi: amlogic-spifc-a1: implement adjust_op_size()

This enhancement eliminates the need for a loop in the
amlogic_spifc_a1_exec_op() function and allows the SPI core to
dynamically divide transactions into appropriately sized chunks.

Signed-off-by: Martin Kurbanov <mmkurbanov@sberdevices.ru>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20230706110331.19794-2-mmkurbanov@sberdevices.ru
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-amlogic-spifc-a1.c | 66 +++++++++++-------------------
 1 file changed, 25 insertions(+), 41 deletions(-)

diff --git a/drivers/spi/spi-amlogic-spifc-a1.c b/drivers/spi/spi-amlogic-spifc-a1.c
index 3c4224c38399..a92e4fc23396 100644
--- a/drivers/spi/spi-amlogic-spifc-a1.c
+++ b/drivers/spi/spi-amlogic-spifc-a1.c
@@ -72,7 +72,7 @@
 
 #define SPIFC_A1_USER_DBUF_ADDR_REG	0x248
 
-#define SPIFC_A1_BUFFER_SIZE		512
+#define SPIFC_A1_BUFFER_SIZE		512U
 
 #define SPIFC_A1_MAX_HZ			200000000
 #define SPIFC_A1_MIN_HZ			1000000
@@ -240,61 +240,44 @@ static int amlogic_spifc_a1_exec_op(struct spi_mem *mem,
 {
 	struct amlogic_spifc_a1 *spifc =
 		spi_controller_get_devdata(mem->spi->controller);
-	size_t off, nbytes = op->data.nbytes;
-	u32 cmd_cfg, addr_cfg, dummy_cfg, dmode;
+	size_t data_size = op->data.nbytes;
 	int ret;
 
 	amlogic_spifc_a1_user_init(spifc);
+	amlogic_spifc_a1_set_cmd(spifc, SPIFC_A1_USER_CMD(op));
 
-	cmd_cfg = SPIFC_A1_USER_CMD(op);
-	amlogic_spifc_a1_set_cmd(spifc, cmd_cfg);
+	if (op->addr.nbytes)
+		amlogic_spifc_a1_set_addr(spifc, op->addr.val,
+					  SPIFC_A1_USER_ADDR(op));
 
-	if (op->addr.nbytes) {
-		addr_cfg = SPIFC_A1_USER_ADDR(op);
-		amlogic_spifc_a1_set_addr(spifc, op->addr.val, addr_cfg);
-	}
+	if (op->dummy.nbytes)
+		amlogic_spifc_a1_set_dummy(spifc, SPIFC_A1_USER_DUMMY(op));
 
-	if (op->dummy.nbytes) {
-		dummy_cfg = SPIFC_A1_USER_DUMMY(op);
-		amlogic_spifc_a1_set_dummy(spifc, dummy_cfg);
-	}
-
-	if (!op->data.nbytes)
-		return amlogic_spifc_a1_request(spifc, false);
-
-	dmode = ilog2(op->data.buswidth);
-	off = 0;
-
-	do {
-		size_t block_size = min_t(size_t, nbytes, SPIFC_A1_BUFFER_SIZE);
-
-		amlogic_spifc_a1_set_cmd(spifc, cmd_cfg);
-
-		if (op->addr.nbytes)
-			amlogic_spifc_a1_set_addr(spifc, op->addr.val + off,
-						  addr_cfg);
-
-		if (op->dummy.nbytes)
-			amlogic_spifc_a1_set_dummy(spifc, dummy_cfg);
+	if (data_size) {
+		u32 mode = ilog2(op->data.buswidth);
 
 		writel(0, spifc->base + SPIFC_A1_USER_DBUF_ADDR_REG);
 
 		if (op->data.dir == SPI_MEM_DATA_IN)
-			ret = amlogic_spifc_a1_read(spifc,
-						    op->data.buf.in + off,
-						    block_size, dmode);
+			ret = amlogic_spifc_a1_read(spifc, op->data.buf.in,
+						    data_size, mode);
 		else
-			ret = amlogic_spifc_a1_write(spifc,
-						     op->data.buf.out + off,
-						     block_size, dmode);
-
-		nbytes -= block_size;
-		off += block_size;
-	} while (nbytes != 0 && !ret);
+			ret = amlogic_spifc_a1_write(spifc, op->data.buf.out,
+						     data_size, mode);
+	} else {
+		ret = amlogic_spifc_a1_request(spifc, false);
+	}
 
 	return ret;
 }
 
+static int amlogic_spifc_a1_adjust_op_size(struct spi_mem *mem,
+					   struct spi_mem_op *op)
+{
+	op->data.nbytes = min(op->data.nbytes, SPIFC_A1_BUFFER_SIZE);
+	return 0;
+}
+
 static void amlogic_spifc_a1_hw_init(struct amlogic_spifc_a1 *spifc)
 {
 	u32 regv;
@@ -314,6 +297,7 @@ static void amlogic_spifc_a1_hw_init(struct amlogic_spifc_a1 *spifc)
 
 static const struct spi_controller_mem_ops amlogic_spifc_a1_mem_ops = {
 	.exec_op = amlogic_spifc_a1_exec_op,
+	.adjust_op_size = amlogic_spifc_a1_adjust_op_size,
 };
 
 static int amlogic_spifc_a1_probe(struct platform_device *pdev)

From 8d4d4c6813fea356ef79f182d8f4d82793c8f64b Mon Sep 17 00:00:00 2001
From: Martin Kurbanov <mmkurbanov@sberdevices.ru>
Date: Thu, 6 Jul 2023 14:03:31 +0300
Subject: [PATCH 0014/2047] spi: amlogic-spifc-a1: add support for max_speed_hz

This patch sets the clock rate (spi_transfer->max_speed_hz) from the
amlogic_spifc_a1_exec_op().

Signed-off-by: Martin Kurbanov <mmkurbanov@sberdevices.ru>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20230706110331.19794-3-mmkurbanov@sberdevices.ru
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-amlogic-spifc-a1.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/spi/spi-amlogic-spifc-a1.c b/drivers/spi/spi-amlogic-spifc-a1.c
index a92e4fc23396..605e9e40455c 100644
--- a/drivers/spi/spi-amlogic-spifc-a1.c
+++ b/drivers/spi/spi-amlogic-spifc-a1.c
@@ -107,6 +107,7 @@ struct amlogic_spifc_a1 {
 	struct clk *clk;
 	struct device *dev;
 	void __iomem *base;
+	u32 curr_speed_hz;
 };
 
 static int amlogic_spifc_a1_request(struct amlogic_spifc_a1 *spifc, bool read)
@@ -235,6 +236,21 @@ static int amlogic_spifc_a1_write(struct amlogic_spifc_a1 *spifc,
 	return amlogic_spifc_a1_request(spifc, false);
 }
 
+static int amlogic_spifc_a1_set_freq(struct amlogic_spifc_a1 *spifc, u32 freq)
+{
+	int ret;
+
+	if (freq == spifc->curr_speed_hz)
+		return 0;
+
+	ret = clk_set_rate(spifc->clk, freq);
+	if (ret)
+		return ret;
+
+	spifc->curr_speed_hz = freq;
+	return 0;
+}
+
 static int amlogic_spifc_a1_exec_op(struct spi_mem *mem,
 				    const struct spi_mem_op *op)
 {
@@ -243,6 +259,10 @@ static int amlogic_spifc_a1_exec_op(struct spi_mem *mem,
 	size_t data_size = op->data.nbytes;
 	int ret;
 
+	ret = amlogic_spifc_a1_set_freq(spifc, mem->spi->max_speed_hz);
+	if (ret)
+		return ret;
+
 	amlogic_spifc_a1_user_init(spifc);
 	amlogic_spifc_a1_set_cmd(spifc, SPIFC_A1_USER_CMD(op));
 

From fee681646fc831b154619ac0261afedcc7e671e7 Mon Sep 17 00:00:00 2001
From: Valentin Caron <valentin.caron@foss.st.com>
Date: Thu, 6 Jul 2023 10:13:42 +0200
Subject: [PATCH 0015/2047] spi: stm32: disable device mode with st,stm32f4-spi
 compatible

STM32 SPI driver is not capable to handle device mode with stm32f4 soc.
Stop probing if this case happens.

Signed-off-by: Valentin Caron <valentin.caron@foss.st.com>
Link: https://lore.kernel.org/r/20230706081342.468090-1-valentin.caron@foss.st.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-stm32.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 6d10fa4ab783..536d89e044cf 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -238,6 +238,7 @@ struct stm32_spi;
  * @baud_rate_div_min: minimum baud rate divisor
  * @baud_rate_div_max: maximum baud rate divisor
  * @has_fifo: boolean to know if fifo is used for driver
+ * @has_device_mode: is this compatible capable to switch on device mode
  * @flags: compatible specific SPI controller flags used at registration time
  */
 struct stm32_spi_cfg {
@@ -259,6 +260,7 @@ struct stm32_spi_cfg {
 	unsigned int baud_rate_div_min;
 	unsigned int baud_rate_div_max;
 	bool has_fifo;
+	bool has_device_mode;
 	u16 flags;
 };
 
@@ -1750,6 +1752,7 @@ static const struct stm32_spi_cfg stm32f4_spi_cfg = {
 	.baud_rate_div_min = STM32F4_SPI_BR_DIV_MIN,
 	.baud_rate_div_max = STM32F4_SPI_BR_DIV_MAX,
 	.has_fifo = false,
+	.has_device_mode = false,
 	.flags = SPI_MASTER_MUST_TX,
 };
 
@@ -1774,6 +1777,7 @@ static const struct stm32_spi_cfg stm32h7_spi_cfg = {
 	.baud_rate_div_min = STM32H7_SPI_MBR_DIV_MIN,
 	.baud_rate_div_max = STM32H7_SPI_MBR_DIV_MAX,
 	.has_fifo = true,
+	.has_device_mode = true,
 };
 
 static const struct of_device_id stm32_spi_of_match[] = {
@@ -1798,8 +1802,13 @@ static int stm32_spi_probe(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node;
 	bool device_mode;
 	int ret;
+	const struct stm32_spi_cfg *cfg = of_device_get_match_data(&pdev->dev);
 
 	device_mode = of_property_read_bool(np, "spi-slave");
+	if (!cfg->has_device_mode && device_mode) {
+		dev_err(&pdev->dev, "spi-slave not supported\n");
+		return -EPERM;
+	}
 
 	if (device_mode)
 		ctrl = devm_spi_alloc_slave(&pdev->dev, sizeof(struct stm32_spi));
@@ -1817,9 +1826,7 @@ static int stm32_spi_probe(struct platform_device *pdev)
 	spi->device_mode = device_mode;
 	spin_lock_init(&spi->lock);
 
-	spi->cfg = (const struct stm32_spi_cfg *)
-		of_match_device(pdev->dev.driver->of_match_table,
-				&pdev->dev)->data;
+	spi->cfg = cfg;
 
 	spi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(spi->base))

From 15a6af94a2779d5dfb42ee4bfac858ea8e964a3f Mon Sep 17 00:00:00 2001
From: Stefan Moring <stefan.moring@technolution.nl>
Date: Wed, 28 Jun 2023 14:54:06 +0200
Subject: [PATCH 0016/2047] spi: Increase imx51 ecspi burst length based on
 transfer length

IMX51 supports 4096 bit burst lengths. Using the spi transfer length
instead of bits_per_word increases performance significantly.

Signed-off-by: Stefan Moring <stefan.moring@technolution.nl>
Link: https://lore.kernel.org/r/20230628125406.237949-1-stefan.moring@technolution.nl
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-imx.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 528ae46c087f..169d158b6f00 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -659,9 +659,13 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx,
 	if (spi_imx->slave_mode && is_imx53_ecspi(spi_imx))
 		ctrl |= (spi_imx->slave_burst * 8 - 1)
 			<< MX51_ECSPI_CTRL_BL_OFFSET;
-	else
-		ctrl |= (spi_imx->bits_per_word - 1)
-			<< MX51_ECSPI_CTRL_BL_OFFSET;
+	else {
+		if (spi_imx->count >= 512)
+			ctrl |= 0xFFF << MX51_ECSPI_CTRL_BL_OFFSET;
+		else
+			ctrl |= (spi_imx->count*8 - 1)
+				<< MX51_ECSPI_CTRL_BL_OFFSET;
+	}
 
 	/* set clock speed */
 	ctrl &= ~(0xf << MX51_ECSPI_CTRL_POSTDIV_OFFSET |
@@ -1258,6 +1262,7 @@ static int spi_imx_setupxfer(struct spi_device *spi,
 		spi_imx->spi_bus_clk = t->speed_hz;
 
 	spi_imx->bits_per_word = t->bits_per_word;
+	spi_imx->count = t->len;
 
 	/*
 	 * Initialize the functions for transfer. To transfer non byte-aligned

From 9303331461cb63a61e598655106a088c953e5660 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 7 Jul 2023 09:11:19 +0200
Subject: [PATCH 0017/2047] spi: rzv2m-csi: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is ignored (apart
from emitting a warning) and this typically results in resource leaks.
To improve here there is a quest to make the remove callback return
void. In the first step of this quest all drivers are converted to
.remove_new() which already returns void. Eventually after all drivers
are converted, .remove_new() is renamed to .remove().

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20230707071119.3394198-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rzv2m-csi.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-rzv2m-csi.c b/drivers/spi/spi-rzv2m-csi.c
index 14ad65da930d..c0d9a776770f 100644
--- a/drivers/spi/spi-rzv2m-csi.c
+++ b/drivers/spi/spi-rzv2m-csi.c
@@ -635,15 +635,13 @@ static int rzv2m_csi_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int rzv2m_csi_remove(struct platform_device *pdev)
+static void rzv2m_csi_remove(struct platform_device *pdev)
 {
 	struct rzv2m_csi_priv *csi = platform_get_drvdata(pdev);
 
 	spi_unregister_controller(csi->controller);
 	rzv2m_csi_sw_reset(csi, 1);
 	clk_disable_unprepare(csi->csiclk);
-
-	return 0;
 }
 
 static const struct of_device_id rzv2m_csi_match[] = {
@@ -654,7 +652,7 @@ MODULE_DEVICE_TABLE(of, rzv2m_csi_match);
 
 static struct platform_driver rzv2m_csi_drv = {
 	.probe = rzv2m_csi_probe,
-	.remove = rzv2m_csi_remove,
+	.remove_new = rzv2m_csi_remove,
 	.driver = {
 		.name = "rzv2m_csi",
 		.of_match_table = rzv2m_csi_match,

From 616a733ccaee2412cd8def29f6f106e22c3cfabb Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 6 Jul 2023 11:27:20 +0800
Subject: [PATCH 0018/2047] spi: atmel: Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230706032727.9180-1-frank.li@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-atmel.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 152cd6773403..8a9c8b92be8c 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -1450,10 +1450,6 @@ static int atmel_spi_probe(struct platform_device *pdev)
 	/* Select default pin state */
 	pinctrl_pm_select_default_state(&pdev->dev);
 
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!regs)
-		return -ENXIO;
-
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
 		return irq;
@@ -1490,7 +1486,7 @@ static int atmel_spi_probe(struct platform_device *pdev)
 	spin_lock_init(&as->lock);
 
 	as->pdev = pdev;
-	as->regs = devm_ioremap_resource(&pdev->dev, regs);
+	as->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &regs);
 	if (IS_ERR(as->regs)) {
 		ret = PTR_ERR(as->regs);
 		goto out_unmap_regs;

From 2ae3c98b6e80f5b99afd9eafbdfab5146330dae5 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 6 Jul 2023 11:27:21 +0800
Subject: [PATCH 0019/2047] spi: davinci: Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230706032727.9180-2-frank.li@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-davinci.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index b04811c911e2..a85fd0cb4a5e 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -895,25 +895,16 @@ static int davinci_spi_probe(struct platform_device *pdev)
 		goto free_master;
 	}
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (r == NULL) {
-		ret = -ENOENT;
-		goto free_master;
-	}
-
-	dspi->pbase = r->start;
-
-	dspi->base = devm_ioremap_resource(&pdev->dev, r);
+	dspi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &r);
 	if (IS_ERR(dspi->base)) {
 		ret = PTR_ERR(dspi->base);
 		goto free_master;
 	}
+	dspi->pbase = r->start;
 
 	init_completion(&dspi->done);
 
 	ret = platform_get_irq(pdev, 0);
-	if (ret == 0)
-		ret = -EINVAL;
 	if (ret < 0)
 		goto free_master;
 	dspi->irq = ret;

From cb8ea3dd55acd191f80847488d2231a1d18ab1ed Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 6 Jul 2023 11:27:22 +0800
Subject: [PATCH 0020/2047] spi: ep93xx: Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230706032727.9180-3-frank.li@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ep93xx.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c
index 1615fd22f9a2..3693b3d425c7 100644
--- a/drivers/spi/spi-ep93xx.c
+++ b/drivers/spi/spi-ep93xx.c
@@ -661,13 +661,7 @@ static int ep93xx_spi_probe(struct platform_device *pdev)
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
-		return -EBUSY;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "unable to get iomem resource\n");
-		return -ENODEV;
-	}
+		return irq;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*espi));
 	if (!master)
@@ -705,13 +699,12 @@ static int ep93xx_spi_probe(struct platform_device *pdev)
 	master->max_speed_hz = clk_get_rate(espi->clk) / 2;
 	master->min_speed_hz = clk_get_rate(espi->clk) / (254 * 256);
 
-	espi->sspdr_phys = res->start + SSPDR;
-
-	espi->mmio = devm_ioremap_resource(&pdev->dev, res);
+	espi->mmio = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(espi->mmio)) {
 		error = PTR_ERR(espi->mmio);
 		goto fail_release_master;
 	}
+	espi->sspdr_phys = res->start + SSPDR;
 
 	error = devm_request_irq(&pdev->dev, irq, ep93xx_spi_interrupt,
 				0, "ep93xx-spi", master);

From 8c8e947b6be037d0a311f11688212c7d87aa5440 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 6 Jul 2023 11:27:23 +0800
Subject: [PATCH 0021/2047] spi: spi-nxp-fspi: Convert to
 devm_platform_ioremap_resource() and devm_platform_ioremap_resource_byname()

Use devm_platform_ioremap_resource() and
devm_platform_ioremap_resource_byname() to simplify code.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Haibo Chen <haibo.chen@nxp.com>
Link: https://lore.kernel.org/r/20230706032727.9180-4-frank.li@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-nxp-fspi.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
index 544017655787..4cf9b3bf7b57 100644
--- a/drivers/spi/spi-nxp-fspi.c
+++ b/drivers/spi/spi-nxp-fspi.c
@@ -1157,12 +1157,10 @@ static int nxp_fspi_probe(struct platform_device *pdev)
 
 	/* find the resources - configuration register address space */
 	if (is_acpi_node(dev_fwnode(f->dev)))
-		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		f->iobase = devm_platform_ioremap_resource(pdev, 0);
 	else
-		res = platform_get_resource_byname(pdev,
-				IORESOURCE_MEM, "fspi_base");
+		f->iobase = devm_platform_ioremap_resource_byname(pdev, "fspi_base");
 
-	f->iobase = devm_ioremap_resource(dev, res);
 	if (IS_ERR(f->iobase)) {
 		ret = PTR_ERR(f->iobase);
 		goto err_put_ctrl;

From b778d967979d7ed638fe1de8c2465ff33ace47d8 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 6 Jul 2023 11:27:24 +0800
Subject: [PATCH 0022/2047] spi: rspi: Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230706032727.9180-5-frank.li@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rspi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 08ceebbaf69b..9312a9d58a1b 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -1317,8 +1317,7 @@ static int rspi_probe(struct platform_device *pdev)
 	rspi->ops = ops;
 	rspi->ctlr = ctlr;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	rspi->addr = devm_ioremap_resource(&pdev->dev, res);
+	rspi->addr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(rspi->addr)) {
 		ret = PTR_ERR(rspi->addr);
 		goto error1;

From 2e4ed2577936476e0e9bb9a07158e94effb86285 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 6 Jul 2023 11:27:25 +0800
Subject: [PATCH 0023/2047] spi: tegra20-slink: Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Link: https://lore.kernel.org/r/20230706032727.9180-6-frank.li@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra20-slink.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index c2915f7672cc..8cfd9e4f60b5 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -1034,18 +1034,12 @@ static int tegra_slink_probe(struct platform_device *pdev)
 				 &master->max_speed_hz))
 		master->max_speed_hz = 25000000; /* 25MHz */
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!r) {
-		dev_err(&pdev->dev, "No IO memory resource\n");
-		ret = -ENODEV;
-		goto exit_free_master;
-	}
-	tspi->phys = r->start;
-	tspi->base = devm_ioremap_resource(&pdev->dev, r);
+	tspi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &r);
 	if (IS_ERR(tspi->base)) {
 		ret = PTR_ERR(tspi->base);
 		goto exit_free_master;
 	}
+	tspi->phys = r->start;
 
 	/* disabled clock may cause interrupt storm upon request */
 	tspi->clk = devm_clk_get(&pdev->dev, NULL);

From 4f81b540ccdf023b7b39432f5c0a26c7eadcc9ce Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 6 Jul 2023 11:27:26 +0800
Subject: [PATCH 0024/2047] spi: s3c64xx: Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230706032727.9180-7-frank.li@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-s3c64xx.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index fd55697144cc..ad008597c7d8 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -1164,11 +1164,6 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
 		return dev_err_probe(&pdev->dev, -ENODEV,
 				     "Platform_data missing!\n");
 
-	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!mem_res)
-		return dev_err_probe(&pdev->dev, -ENXIO,
-				     "Unable to get SPI MEM resource\n");
-
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
 		return dev_err_probe(&pdev->dev, irq, "Failed to get IRQ\n");
@@ -1185,7 +1180,6 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
 	sdd->master = master;
 	sdd->cntrlr_info = sci;
 	sdd->pdev = pdev;
-	sdd->sfr_start = mem_res->start;
 	if (pdev->dev.of_node) {
 		ret = of_alias_get_id(pdev->dev.of_node, "spi");
 		if (ret < 0)
@@ -1223,9 +1217,10 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
 	if (!is_polling(sdd))
 		master->can_dma = s3c64xx_spi_can_dma;
 
-	sdd->regs = devm_ioremap_resource(&pdev->dev, mem_res);
+	sdd->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &mem_res);
 	if (IS_ERR(sdd->regs))
 		return PTR_ERR(sdd->regs);
+	sdd->sfr_start = mem_res->start;
 
 	if (sci->cfg_gpio && sci->cfg_gpio())
 		return dev_err_probe(&pdev->dev, -EBUSY,

From 8c87a46e2ce3d5aaf315ffb61dcda62417e41bbf Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 5 Jul 2023 17:26:01 +0200
Subject: [PATCH 0025/2047] dt-bindings: spi: tegra-slink: Convert to
 json-schema

Convert the Tegra SLINK bindings from the free-form text format to
json-schema.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230705152603.2514235-1-thierry.reding@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/spi/nvidia,tegra20-slink.txt     | 37 --------
 .../bindings/spi/nvidia,tegra20-slink.yaml    | 90 +++++++++++++++++++
 2 files changed, 90 insertions(+), 37 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
 create mode 100644 Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.yaml

diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
deleted file mode 100644
index 40d80b93e327..000000000000
--- a/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-NVIDIA Tegra20/Tegra30 SLINK controller.
-
-Required properties:
-- compatible : should be "nvidia,tegra20-slink", "nvidia,tegra30-slink".
-- reg: Should contain SLINK registers location and length.
-- interrupts: Should contain SLINK interrupts.
-- clocks : Must contain one entry, for the module clock.
-  See ../clocks/clock-bindings.txt for details.
-- resets : Must contain an entry for each entry in reset-names.
-  See ../reset/reset.txt for details.
-- reset-names : Must include the following entries:
-  - spi
-- dmas : Must contain an entry for each entry in clock-names.
-  See ../dma/dma.txt for details.
-- dma-names : Must include the following entries:
-  - rx
-  - tx
-
-Recommended properties:
-- spi-max-frequency: Definition as per
-                     Documentation/devicetree/bindings/spi/spi-bus.txt
-
-Example:
-
-spi@7000d600 {
-	compatible = "nvidia,tegra20-slink";
-	reg = <0x7000d600 0x200>;
-	interrupts = <0 82 0x04>;
-	spi-max-frequency = <25000000>;
-	#address-cells = <1>;
-	#size-cells = <0>;
-	clocks = <&tegra_car 44>;
-	resets = <&tegra_car 44>;
-	reset-names = "spi";
-	dmas = <&apbdma 16>, <&apbdma 16>;
-	dma-names = "rx", "tx";
-};
diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.yaml
new file mode 100644
index 000000000000..291c25ec015d
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.yaml
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/nvidia,tegra20-slink.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra20/30 SLINK controller
+
+maintainers:
+  - Thierry Reding <thierry.reding@gmail.com>
+  - Jon Hunter <jonathanh@nvidia.com>
+
+properties:
+  compatible:
+    enum:
+      - nvidia,tegra20-slink
+      - nvidia,tegra30-slink
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: module clock
+
+  resets:
+    items:
+      - description: module reset
+
+  reset-names:
+    items:
+      - const: spi
+
+  dmas:
+    items:
+      - description: DMA channel used for reception
+      - description: DMA channel used for transmission
+
+  dma-names:
+    items:
+      - const: rx
+      - const: tx
+
+  operating-points-v2:
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+  power-domains:
+    items:
+      - description: phandle to the core power domain
+
+  spi-max-frequency:
+    description: Maximum SPI clocking speed of the controller in Hz.
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+allOf:
+  - $ref: spi-controller.yaml
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - resets
+  - reset-names
+  - dmas
+  - dma-names
+
+examples:
+  - |
+    #include <dt-bindings/clock/tegra20-car.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    spi@7000d600 {
+        compatible = "nvidia,tegra20-slink";
+        reg = <0x7000d600 0x200>;
+        interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+        spi-max-frequency = <25000000>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+        clocks = <&tegra_car TEGRA20_CLK_SBC2>;
+        resets = <&tegra_car 44>;
+        reset-names = "spi";
+        dmas = <&apbdma 16>, <&apbdma 16>;
+        dma-names = "rx", "tx";
+    };

From 17a9ab02f72c832293155a432895c889842b7da4 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 5 Jul 2023 17:26:02 +0200
Subject: [PATCH 0026/2047] dt-bindings: spi: tegra-sflash: Convert to
 json-schema

Convert the Tegra SFLASH controller bindings from the free-form text
format to json-schema.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230705152603.2514235-2-thierry.reding@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/spi/nvidia,tegra20-sflash.txt    | 37 ---------
 .../bindings/spi/nvidia,tegra20-sflash.yaml   | 81 +++++++++++++++++++
 2 files changed, 81 insertions(+), 37 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt
 create mode 100644 Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.yaml

diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt
deleted file mode 100644
index c212491929b5..000000000000
--- a/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-NVIDIA Tegra20 SFLASH controller.
-
-Required properties:
-- compatible : should be "nvidia,tegra20-sflash".
-- reg: Should contain SFLASH registers location and length.
-- interrupts: Should contain SFLASH interrupts.
-- clocks : Must contain one entry, for the module clock.
-  See ../clocks/clock-bindings.txt for details.
-- resets : Must contain an entry for each entry in reset-names.
-  See ../reset/reset.txt for details.
-- reset-names : Must include the following entries:
-  - spi
-- dmas : Must contain an entry for each entry in clock-names.
-  See ../dma/dma.txt for details.
-- dma-names : Must include the following entries:
-  - rx
-  - tx
-
-Recommended properties:
-- spi-max-frequency: Definition as per
-                     Documentation/devicetree/bindings/spi/spi-bus.txt
-
-Example:
-
-spi@7000c380 {
-	compatible = "nvidia,tegra20-sflash";
-	reg = <0x7000c380 0x80>;
-	interrupts = <0 39 0x04>;
-	spi-max-frequency = <25000000>;
-	#address-cells = <1>;
-	#size-cells = <0>;
-	clocks = <&tegra_car 43>;
-	resets = <&tegra_car 43>;
-	reset-names = "spi";
-	dmas = <&apbdma 11>, <&apbdma 11>;
-	dma-names = "rx", "tx";
-};
diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.yaml
new file mode 100644
index 000000000000..e245bad85a25
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/nvidia,tegra20-sflash.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra20 SFLASH controller
+
+maintainers:
+  - Thierry Reding <thierry.reding@gmail.com>
+  - Jon Hunter <jonathanh@nvidia.com>
+
+properties:
+  compatible:
+    const: nvidia,tegra20-sflash
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: module clock
+
+  resets:
+    items:
+      - description: module reset
+
+  reset-names:
+    items:
+      - const: spi
+
+  dmas:
+    items:
+      - description: DMA channel used for reception
+      - description: DMA channel used for transmission
+
+  dma-names:
+    items:
+      - const: rx
+      - const: tx
+
+  spi-max-frequency:
+    description: Maximum SPI clocking speed of the controller in Hz.
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+allOf:
+  - $ref: spi-controller.yaml
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - resets
+  - reset-names
+  - dmas
+  - dma-names
+
+examples:
+  - |
+    #include <dt-bindings/clock/tegra20-car.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    spi@7000c380 {
+        compatible = "nvidia,tegra20-sflash";
+        reg = <0x7000c380 0x80>;
+        interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
+        spi-max-frequency = <25000000>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+        clocks = <&tegra_car TEGRA20_CLK_SPI>;
+        resets = <&tegra_car 43>;
+        reset-names = "spi";
+        dmas = <&apbdma 11>, <&apbdma 11>;
+        dma-names = "rx", "tx";
+    };

From b8968c388b69d9cf31d7f5b1721ac7fe9f932cb9 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 5 Jul 2023 17:26:03 +0200
Subject: [PATCH 0027/2047] dt-bindings: spi: Convert Tegra114 SPI to
 json-schema

Convert the Tegra114 SPI controller device tree bindings from plain text
to json-schema format.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230705152603.2514235-3-thierry.reding@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/spi/nvidia,tegra114-spi.txt      |  61 -----------
 .../bindings/spi/nvidia,tegra114-spi.yaml     | 100 ++++++++++++++++++
 2 files changed, 100 insertions(+), 61 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt
 create mode 100644 Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.yaml

diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt
deleted file mode 100644
index db8e0d71c5bc..000000000000
--- a/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-NVIDIA Tegra114 SPI controller.
-
-Required properties:
-- compatible : For Tegra114, must contain "nvidia,tegra114-spi".
-  Otherwise, must contain '"nvidia,<chip>-spi", "nvidia,tegra114-spi"' where
-  <chip> is tegra124, tegra132, or tegra210.
-- reg: Should contain SPI registers location and length.
-- interrupts: Should contain SPI interrupts.
-- clock-names : Must include the following entries:
-  - spi
-- resets : Must contain an entry for each entry in reset-names.
-  See ../reset/reset.txt for details.
-- reset-names : Must include the following entries:
-  - spi
-- dmas : Must contain an entry for each entry in clock-names.
-  See ../dma/dma.txt for details.
-- dma-names : Must include the following entries:
-  - rx
-  - tx
-- clocks : Must contain an entry for each entry in clock-names.
-  See ../clocks/clock-bindings.txt for details.
-
-Recommended properties:
-- spi-max-frequency: Definition as per
-                     Documentation/devicetree/bindings/spi/spi-bus.txt
-Optional properties:
-- nvidia,tx-clk-tap-delay: Delays the clock going out to the external device
-  with this tap value. This property is used to tune the outgoing data from
-  Tegra SPI master with respect to outgoing Tegra SPI master clock.
-  Tap values vary based on the platform design trace lengths from Tegra SPI
-  to corresponding slave devices. Valid tap values are from 0 thru 63.
-- nvidia,rx-clk-tap-delay: Delays the clock coming in from the external device
-  with this tap value. This property is used to adjust the Tegra SPI master
-  clock with respect to the data from the SPI slave device.
-  Tap values vary based on the platform design trace lengths from Tegra SPI
-  to corresponding slave devices. Valid tap values are from 0 thru 63.
-
-Example:
-
-spi@7000d600 {
-	compatible = "nvidia,tegra114-spi";
-	reg = <0x7000d600 0x200>;
-	interrupts = <0 82 0x04>;
-	spi-max-frequency = <25000000>;
-	#address-cells = <1>;
-	#size-cells = <0>;
-	clocks = <&tegra_car 44>;
-	clock-names = "spi";
-	resets = <&tegra_car 44>;
-	reset-names = "spi";
-	dmas = <&apbdma 16>, <&apbdma 16>;
-	dma-names = "rx", "tx";
-	<spi-client>@<bus_num> {
-		...
-		...
-		nvidia,rx-clk-tap-delay = <0>;
-		nvidia,tx-clk-tap-delay = <16>;
-		...
-	};
-
-};
diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.yaml
new file mode 100644
index 000000000000..58222ffa53d7
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.yaml
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/nvidia,tegra114-spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra114 SPI controller
+
+maintainers:
+  - Thierry Reding <thierry.reding@gmail.com>
+  - Jon Hunter <jonathanh@nvidia.com>
+
+properties:
+  compatible:
+    oneOf:
+      - const: nvidia,tegra114-spi
+      - items:
+          - enum:
+              - nvidia,tegra210-spi
+              - nvidia,tegra124-spi
+          - const: nvidia,tegra114-spi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: SPI module clock
+
+  clock-names:
+    items:
+      - const: spi
+
+  resets:
+    items:
+      - description: SPI module reset
+
+  reset-names:
+    items:
+      - const: spi
+
+  dmas:
+    items:
+      - description: DMA channel for the reception FIFO
+      - description: DMA channel for the transmission FIFO
+
+  dma-names:
+    items:
+      - const: rx
+      - const: tx
+
+  spi-max-frequency:
+    description: Maximum SPI clocking speed of the controller in Hz.
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+allOf:
+  - $ref: spi-controller.yaml
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - dmas
+  - dma-names
+
+examples:
+  - |
+    spi@7000d600 {
+        compatible = "nvidia,tegra114-spi";
+        reg = <0x7000d600 0x200>;
+        interrupts = <0 82 0x04>;
+        clocks = <&tegra_car 44>;
+        clock-names = "spi";
+        resets = <&tegra_car 44>;
+        reset-names = "spi";
+        dmas = <&apbdma 16>, <&apbdma 16>;
+        dma-names = "rx", "tx";
+
+        spi-max-frequency = <25000000>;
+
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        flash@0 {
+            compatible = "jedec,spi-nor";
+            reg = <0>;
+            spi-max-frequency = <20000000>;
+            nvidia,rx-clk-tap-delay = <0>;
+            nvidia,tx-clk-tap-delay = <16>;
+        };
+    };

From 7c2128235eff99b448af8f4b5b2933495bf1a440 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@amd.com>
Date: Tue, 20 Jun 2023 14:46:03 +0530
Subject: [PATCH 0028/2047] perf/amd: Prevent grouping of IBS events

IBS PMUs can have only one event active at any point in time. Restrict
grouping of multiple IBS events.

Reported-by: Sandipan Das <sandipan.das@amd.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230620091603.269-1-ravi.bangoria@amd.com
---
 arch/x86/events/amd/ibs.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 371014802191..74e664266753 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -247,11 +247,33 @@ int forward_event_to_ibs(struct perf_event *event)
 	return -ENOENT;
 }
 
+/*
+ * Grouping of IBS events is not possible since IBS can have only
+ * one event active at any point in time.
+ */
+static int validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling;
+
+	if (event->group_leader == event)
+		return 0;
+
+	if (event->group_leader->pmu == event->pmu)
+		return -EINVAL;
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling->pmu == event->pmu)
+			return -EINVAL;
+	}
+	return 0;
+}
+
 static int perf_ibs_init(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct perf_ibs *perf_ibs;
 	u64 max_cnt, config;
+	int ret;
 
 	perf_ibs = get_ibs_pmu(event->attr.type);
 	if (!perf_ibs)
@@ -265,6 +287,10 @@ static int perf_ibs_init(struct perf_event *event)
 	if (config & ~perf_ibs->config_mask)
 		return -EINVAL;
 
+	ret = validate_group(event);
+	if (ret)
+		return ret;
+
 	if (hwc->sample_period) {
 		if (config & perf_ibs->cnt_mask)
 			/* raw max_cnt may not be set */

From 4c1c9dea2089374cb58505a0df6136969ff3c8b9 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Thu, 6 Jul 2023 16:16:48 +0200
Subject: [PATCH 0029/2047] perf/x86: Use local64_try_cmpxchg

Use local64_try_cmpxchg instead of local64_cmpxchg (*ptr, old, new) == old.
x86 CMPXCHG instruction returns success in ZF flag, so this change saves a
compare after cmpxchg (and related move instruction in front of cmpxchg).

Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg
fails. There is no need to re-read the value in the loop.

No functional change intended.

Cc. "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230706141720.2672-1-ubizjak@gmail.com
---
 arch/x86/events/amd/ibs.c      |  4 ++--
 arch/x86/events/core.c         | 10 ++++------
 arch/x86/events/intel/cstate.c | 10 ++++------
 arch/x86/events/msr.c          |  8 +++-----
 4 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 74e664266753..7d29be0a279b 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -156,8 +156,8 @@ perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
 	 * count to the generic event atomically:
 	 */
 	prev_raw_count = local64_read(&hwc->prev_count);
-	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-					new_raw_count) != prev_raw_count)
+	if (!local64_try_cmpxchg(&hwc->prev_count,
+				 &prev_raw_count, new_raw_count))
 		return 0;
 
 	/*
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 9d248703cbdd..23c96420dea1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -129,13 +129,11 @@ u64 x86_perf_event_update(struct perf_event *event)
 	 * exchange a new raw count - then add that new-prev delta
 	 * count to the generic event atomically:
 	 */
-again:
 	prev_raw_count = local64_read(&hwc->prev_count);
-	rdpmcl(hwc->event_base_rdpmc, new_raw_count);
-
-	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-					new_raw_count) != prev_raw_count)
-		goto again;
+	do {
+		rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+	} while (!local64_try_cmpxchg(&hwc->prev_count,
+				      &prev_raw_count, new_raw_count));
 
 	/*
 	 * Now we have the new raw value and have updated the prev
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 835862c548cc..97035db68df2 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -365,13 +365,11 @@ static void cstate_pmu_event_update(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	u64 prev_raw_count, new_raw_count;
 
-again:
 	prev_raw_count = local64_read(&hwc->prev_count);
-	new_raw_count = cstate_pmu_read_counter(event);
-
-	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			    new_raw_count) != prev_raw_count)
-		goto again;
+	do {
+		new_raw_count = cstate_pmu_read_counter(event);
+	} while (!local64_try_cmpxchg(&hwc->prev_count,
+				      &prev_raw_count, new_raw_count));
 
 	local64_add(new_raw_count - prev_raw_count, &event->count);
 }
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 0feaaa571303..61c59e0627d4 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -244,12 +244,10 @@ static void msr_event_update(struct perf_event *event)
 	s64 delta;
 
 	/* Careful, an NMI might modify the previous event value: */
-again:
 	prev = local64_read(&event->hw.prev_count);
-	now = msr_read_counter(event);
-
-	if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
-		goto again;
+	do {
+		now = msr_read_counter(event);
+	} while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, now));
 
 	delta = now - prev;
 	if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {

From 28fd85a10a2a73658c6e26056d9e093de06b5a22 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Sat, 8 Jul 2023 10:10:57 +0200
Subject: [PATCH 0030/2047] perf/core: Use local64_try_cmpxchg in
 perf_swevent_set_period

Use local64_try_cmpxchg instead of local64_cmpxchg (*ptr, old, new) == old
in perf_swevent_set_period.  x86 CMPXCHG instruction returns success in ZF
flag, so this change saves a compare after cmpxchg (and related move
instruction in front of cmpxchg).

Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg
fails. There is no need to re-read the value in the loop.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230708081129.45915-1-ubizjak@gmail.com
---
 kernel/events/core.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 78ae7b6f90fd..f84e2640ea2f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9595,16 +9595,16 @@ u64 perf_swevent_set_period(struct perf_event *event)
 
 	hwc->last_period = hwc->sample_period;
 
-again:
-	old = val = local64_read(&hwc->period_left);
-	if (val < 0)
-		return 0;
+	old = local64_read(&hwc->period_left);
+	do {
+		val = old;
+		if (val < 0)
+			return 0;
 
-	nr = div64_u64(period + val, period);
-	offset = nr * period;
-	val -= offset;
-	if (local64_cmpxchg(&hwc->period_left, old, val) != old)
-		goto again;
+		nr = div64_u64(period + val, period);
+		offset = nr * period;
+		val -= offset;
+	} while (!local64_try_cmpxchg(&hwc->period_left, &old, val));
 
 	return nr;
 }

From d6b45484c130f4095313ae3edeb4aae662c12fb1 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Sat, 8 Jul 2023 11:00:36 +0200
Subject: [PATCH 0031/2047] locking/arch: Avoid variable shadowing in
 local_try_cmpxchg()

Several architectures define arch_try_local_cmpxchg macro using
internal temporary variables named ___old, __old or _old. Remove
temporary varible in local_try_cmpxchg to avoid variable shadowing.

No functional change intended.

Fixes: d994f2c8e241 ("locking/arch: Wire up local_try_cmpxchg()")
Closes: https://lore.kernel.org/lkml/CAFGhKbyxtuk=LoW-E3yLXgcmR93m+Dfo5-u9oQA_YC5Fcy_t9g@mail.gmail.com/
Reported-by: Charlemagne Lasse <charlemagnelasse@gmail.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230708090048.63046-1-ubizjak@gmail.com
---
 arch/loongarch/include/asm/local.h | 4 ++--
 arch/mips/include/asm/local.h      | 4 ++--
 arch/x86/include/asm/local.h       | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/loongarch/include/asm/local.h b/arch/loongarch/include/asm/local.h
index 83e995b30e47..c49675852bdc 100644
--- a/arch/loongarch/include/asm/local.h
+++ b/arch/loongarch/include/asm/local.h
@@ -63,8 +63,8 @@ static inline long local_cmpxchg(local_t *l, long old, long new)
 
 static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
 {
-	typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
-	return try_cmpxchg_local(&l->a.counter, __old, new);
+	return try_cmpxchg_local(&l->a.counter,
+				 (typeof(l->a.counter) *) old, new);
 }
 
 #define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n)))
diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h
index 5daf6fe8e3e9..e6ae3df0349d 100644
--- a/arch/mips/include/asm/local.h
+++ b/arch/mips/include/asm/local.h
@@ -101,8 +101,8 @@ static __inline__ long local_cmpxchg(local_t *l, long old, long new)
 
 static __inline__ bool local_try_cmpxchg(local_t *l, long *old, long new)
 {
-	typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
-	return try_cmpxchg_local(&l->a.counter, __old, new);
+	return try_cmpxchg_local(&l->a.counter,
+				 (typeof(l->a.counter) *) old, new);
 }
 
 #define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n)))
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 56d4ef604b91..635132a12778 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -127,8 +127,8 @@ static inline long local_cmpxchg(local_t *l, long old, long new)
 
 static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
 {
-	typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
-	return try_cmpxchg_local(&l->a.counter, __old, new);
+	return try_cmpxchg_local(&l->a.counter,
+				 (typeof(l->a.counter) *) old, new);
 }
 
 /* Always has a lock prefix */

From 1af61adb3a23192023fec1733bd4c8500f53e546 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Sat, 8 Jul 2023 11:00:37 +0200
Subject: [PATCH 0032/2047] perf/ring_buffer: Use local_try_cmpxchg in
 __perf_output_begin

Use local_try_cmpxchg instead of local_cmpxchg (*ptr, old, new) == old
in __perf_output_begin.  x86 CMPXCHG instruction returns success in ZF
flag, so this change saves a compare after cmpxchg (and related move
instruction in front of cmpxchg).

Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg
fails. There is no need to re-read the value in the loop.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230708090048.63046-2-ubizjak@gmail.com
---
 kernel/events/ring_buffer.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index a0433f37b024..fb1e180b5f0a 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -191,9 +191,10 @@ __perf_output_begin(struct perf_output_handle *handle,
 
 	perf_output_get_handle(handle);
 
+	offset = local_read(&rb->head);
 	do {
+		head = offset;
 		tail = READ_ONCE(rb->user_page->data_tail);
-		offset = head = local_read(&rb->head);
 		if (!rb->overwrite) {
 			if (unlikely(!ring_buffer_has_space(head, tail,
 							    perf_data_size(rb),
@@ -217,7 +218,7 @@ __perf_output_begin(struct perf_output_handle *handle,
 			head += size;
 		else
 			head -= size;
-	} while (local_cmpxchg(&rb->head, offset, head) != offset);
+	} while (!local_try_cmpxchg(&rb->head, &offset, head));
 
 	if (backward) {
 		offset = head;

From e2051394a50c7dd49e9b56bfaf049a03972362ae Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 5 Jul 2023 09:42:19 +0200
Subject: [PATCH 0033/2047] gpiolib: add missing include

gpiolib.h uses notifiers but doesn't include <linux/notifier.h>.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Kent Gibson <warthog618@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpiolib.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index cca81375f127..1409d52487c0 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -14,6 +14,7 @@
 #include <linux/err.h>
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/notifier.h>
 #include <linux/cdev.h>
 #include <linux/rwsem.h>
 

From 3283d820dce649ad6d5eaa531d76251b4e22ad40 Mon Sep 17 00:00:00 2001
From: Shenwei Wang <shenwei.wang@nxp.com>
Date: Thu, 6 Jul 2023 09:56:11 -0500
Subject: [PATCH 0034/2047] gpio: mxc: add runtime pm support

Add runtime PM support and allow the GPIO controller to enter
into runtime suspend automatically when not in use to save power.
However, it will automatically resume and enable clocks when a
GPIO or IRQ is requested.

While putting the GPIO module itself into power saving mode may not
have an obvious impact on current dissipation, the function is necessary
because the GPIO module disables its clock when idle. This enables the
system an opportunity to power off the parent subsystem, and this conserves
more power. The typical i.MX8 SoC features up to 8 GPIO controllers, but
most of the controllers often remain unused.

Signed-off-by: Shenwei Wang <shenwei.wang@nxp.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-mxc.c | 61 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index 9d0cec4b82a3..a9fb6bd9aa6f 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -17,6 +17,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/syscore_ops.h>
@@ -382,6 +383,23 @@ static int mxc_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
 	return irq_find_mapping(port->domain, offset);
 }
 
+static int mxc_gpio_request(struct gpio_chip *chip, unsigned int offset)
+{
+	int ret;
+
+	ret = gpiochip_generic_request(chip, offset);
+	if (ret)
+		return ret;
+
+	return pm_runtime_resume_and_get(chip->parent);
+}
+
+static void mxc_gpio_free(struct gpio_chip *chip, unsigned int offset)
+{
+	gpiochip_generic_free(chip, offset);
+	pm_runtime_put(chip->parent);
+}
+
 static int mxc_gpio_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
@@ -429,6 +447,10 @@ static int mxc_gpio_probe(struct platform_device *pdev)
 	if (of_device_is_compatible(np, "fsl,imx7d-gpio"))
 		port->power_off = true;
 
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
 	/* disable the interrupt and clear the status */
 	writel(0, port->base + GPIO_IMR);
 	writel(~0, port->base + GPIO_ISR);
@@ -459,8 +481,8 @@ static int mxc_gpio_probe(struct platform_device *pdev)
 	if (err)
 		goto out_bgio;
 
-	port->gc.request = gpiochip_generic_request;
-	port->gc.free = gpiochip_generic_free;
+	port->gc.request = mxc_gpio_request;
+	port->gc.free = mxc_gpio_free;
 	port->gc.to_irq = mxc_gpio_to_irq;
 	port->gc.base = (pdev->id < 0) ? of_alias_get_id(np, "gpio") * 32 :
 					     pdev->id * 32;
@@ -482,6 +504,8 @@ static int mxc_gpio_probe(struct platform_device *pdev)
 		goto out_bgio;
 	}
 
+	irq_domain_set_pm_device(port->domain, &pdev->dev);
+
 	/* gpio-mxc can be a generic irq chip */
 	err = mxc_gpio_init_gc(port, irq_base);
 	if (err < 0)
@@ -490,12 +514,15 @@ static int mxc_gpio_probe(struct platform_device *pdev)
 	list_add_tail(&port->node, &mxc_gpio_ports);
 
 	platform_set_drvdata(pdev, port);
+	pm_runtime_put_autosuspend(&pdev->dev);
 
 	return 0;
 
 out_irqdomain_remove:
 	irq_domain_remove(port->domain);
 out_bgio:
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_put_noidle(&pdev->dev);
 	clk_disable_unprepare(port->clk);
 	dev_info(&pdev->dev, "%s failed with errno %d\n", __func__, err);
 	return err;
@@ -572,6 +599,30 @@ static bool mxc_gpio_set_pad_wakeup(struct mxc_gpio_port *port, bool enable)
 	return ret;
 }
 
+static int mxc_gpio_runtime_suspend(struct device *dev)
+{
+	struct mxc_gpio_port *port = dev_get_drvdata(dev);
+
+	mxc_gpio_save_regs(port);
+	clk_disable_unprepare(port->clk);
+
+	return 0;
+}
+
+static int mxc_gpio_runtime_resume(struct device *dev)
+{
+	struct mxc_gpio_port *port = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(port->clk);
+	if (ret)
+		return ret;
+
+	mxc_gpio_restore_regs(port);
+
+	return 0;
+}
+
 static int __maybe_unused mxc_gpio_noirq_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -597,14 +648,19 @@ static int __maybe_unused mxc_gpio_noirq_resume(struct device *dev)
 
 static const struct dev_pm_ops mxc_gpio_dev_pm_ops = {
 	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mxc_gpio_noirq_suspend, mxc_gpio_noirq_resume)
+	SET_RUNTIME_PM_OPS(mxc_gpio_runtime_suspend, mxc_gpio_runtime_resume, NULL)
 };
 
 static int mxc_gpio_syscore_suspend(void)
 {
 	struct mxc_gpio_port *port;
+	int ret;
 
 	/* walk through all ports */
 	list_for_each_entry(port, &mxc_gpio_ports, node) {
+		ret = clk_prepare_enable(port->clk);
+		if (ret)
+			return ret;
 		mxc_gpio_save_regs(port);
 		clk_disable_unprepare(port->clk);
 	}
@@ -625,6 +681,7 @@ static void mxc_gpio_syscore_resume(void)
 			return;
 		}
 		mxc_gpio_restore_regs(port);
+		clk_disable_unprepare(port->clk);
 	}
 }
 

From d619f48e7d70919e97259900ca2d591d6165c65d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:28 -0400
Subject: [PATCH 0035/2047] ibmvmc: update ctime in conjunction with mtime on
 write

POSIX says:

"Upon successful completion, where nbyte is greater than 0, write()
 shall mark for update the last data modification and last file status
 change timestamps of the file..."

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-1-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 drivers/misc/ibmvmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c
index cbaf6d35e854..d7c7f0305257 100644
--- a/drivers/misc/ibmvmc.c
+++ b/drivers/misc/ibmvmc.c
@@ -1124,7 +1124,7 @@ static ssize_t ibmvmc_write(struct file *file, const char *buffer,
 		goto out;
 
 	inode = file_inode(file);
-	inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode->i_ctime = current_time(inode);
 	mark_inode_dirty(inode);
 
 	dev_dbg(adapter->dev, "write: file = 0x%lx, count = 0x%lx\n",

From a4440d741eb3d65edbcec13f2d8a0384548dd841 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:29 -0400
Subject: [PATCH 0036/2047] bfs: update ctime in addition to mtime when adding
 entries

When adding entries to a directory, POSIX generally requires that the
ctime be updated alongside the mtime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-2-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/bfs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 040d5140e426..d2e8a2a56b05 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -294,7 +294,7 @@ static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino)
 					dir->i_size += BFS_DIRENT_SIZE;
 					dir->i_ctime = current_time(dir);
 				}
-				dir->i_mtime = current_time(dir);
+				dir->i_mtime = dir->i_ctime = current_time(dir);
 				mark_inode_dirty(dir);
 				de->ino = cpu_to_le16((u16)ino);
 				for (i = 0; i < BFS_NAMELEN; i++)

From 73e6730650791072681b527aa132a126ed3681b5 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:30 -0400
Subject: [PATCH 0037/2047] efivarfs: update ctime when mtime changes on a
 write

POSIX says:

"Upon successful completion, where nbyte is greater than 0, write()
 shall mark for update the last data modification and last file status
 change timestamps of the file..."

Add the missing ctime update.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-3-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/efivarfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index d57ee15874f9..375576111dc3 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -51,7 +51,7 @@ static ssize_t efivarfs_file_write(struct file *file,
 	} else {
 		inode_lock(inode);
 		i_size_write(inode, datasize + sizeof(attributes));
-		inode->i_mtime = current_time(inode);
+		inode->i_mtime = inode->i_ctime = current_time(inode);
 		inode_unlock(inode);
 	}
 

From fb91816e403ed10d7d630b11ae3c7516834d0cce Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:31 -0400
Subject: [PATCH 0038/2047] exfat: ensure that ctime is updated whenever the
 mtime is

When removing entries from a directory, the ctime must also be updated
alongside the mtime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-4-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/exfat/namei.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index e0ff9d156f6f..d9b46fa36bff 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -817,7 +817,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
 	ei->dir.dir = DIR_DELETED;
 
 	inode_inc_iversion(dir);
-	dir->i_mtime = dir->i_atime = current_time(dir);
+	dir->i_mtime = dir->i_atime = dir->i_ctime = current_time(dir);
 	exfat_truncate_atime(&dir->i_atime);
 	if (IS_DIRSYNC(dir))
 		exfat_sync_inode(dir);
@@ -825,7 +825,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
 		mark_inode_dirty(dir);
 
 	clear_nlink(inode);
-	inode->i_mtime = inode->i_atime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 	exfat_truncate_atime(&inode->i_atime);
 	exfat_unhash_inode(inode);
 	exfat_d_version_set(dentry, inode_query_iversion(dir));
@@ -979,7 +979,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
 	ei->dir.dir = DIR_DELETED;
 
 	inode_inc_iversion(dir);
-	dir->i_mtime = dir->i_atime = current_time(dir);
+	dir->i_mtime = dir->i_atime = dir->i_ctime = current_time(dir);
 	exfat_truncate_atime(&dir->i_atime);
 	if (IS_DIRSYNC(dir))
 		exfat_sync_inode(dir);
@@ -988,7 +988,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
 	drop_nlink(dir);
 
 	clear_nlink(inode);
-	inode->i_mtime = inode->i_atime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 	exfat_truncate_atime(&inode->i_atime);
 	exfat_unhash_inode(inode);
 	exfat_d_version_set(dentry, inode_query_iversion(dir));

From 46fc6b35a6743075f5903eda411e4e0f4f0df84b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:32 -0400
Subject: [PATCH 0039/2047] apparmor: update ctime whenever the mtime changes
 on an inode

In general, when updating the mtime on an inode, one must also update
the ctime. Add the missing ctime updates.

Acked-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-5-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 security/apparmor/apparmorfs.c    |  7 +++++--
 security/apparmor/policy_unpack.c | 11 +++++++----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index db7a51acf9db..c06053718836 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1554,8 +1554,11 @@ void __aafs_profile_migrate_dents(struct aa_profile *old,
 
 	for (i = 0; i < AAFS_PROF_SIZEOF; i++) {
 		new->dents[i] = old->dents[i];
-		if (new->dents[i])
-			new->dents[i]->d_inode->i_mtime = current_time(new->dents[i]->d_inode);
+		if (new->dents[i]) {
+			struct inode *inode = d_inode(new->dents[i]);
+
+			inode->i_mtime = inode->i_ctime = current_time(inode);
+		}
 		old->dents[i] = NULL;
 	}
 }
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 694fb7a09962..ed180722a833 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -86,10 +86,13 @@ void __aa_loaddata_update(struct aa_loaddata *data, long revision)
 
 	data->revision = revision;
 	if ((data->dents[AAFS_LOADDATA_REVISION])) {
-		d_inode(data->dents[AAFS_LOADDATA_DIR])->i_mtime =
-			current_time(d_inode(data->dents[AAFS_LOADDATA_DIR]));
-		d_inode(data->dents[AAFS_LOADDATA_REVISION])->i_mtime =
-			current_time(d_inode(data->dents[AAFS_LOADDATA_REVISION]));
+		struct inode *inode;
+
+		inode = d_inode(data->dents[AAFS_LOADDATA_DIR]);
+		inode->i_mtime = inode->i_ctime = current_time(inode);
+
+		inode = d_inode(data->dents[AAFS_LOADDATA_REVISION]);
+		inode->i_mtime = inode->i_ctime = current_time(inode);
 	}
 }
 

From bc2390f2c8843f342ba72bea0ee549b0d6976463 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:33 -0400
Subject: [PATCH 0040/2047] cifs: update the ctime on a partial page write

POSIX says:

    "Upon successful completion, where nbyte is greater than 0, write()
     shall mark for update the last data modification and last file status
     change timestamps of the file..."

Add the missing ctime update.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Steve French <stfrench@microsoft.com>
Message-Id: <20230705190309.579783-6-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/smb/client/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 879bc8e6555c..0a5fe8d5314b 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -2596,7 +2596,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 					   write_data, to - from, &offset);
 		cifsFileInfo_put(open_file);
 		/* Does mm or vfs already set times? */
-		inode->i_atime = inode->i_mtime = current_time(inode);
+		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
 		if ((bytes_written > 0) && (offset))
 			rc = 0;
 		else if (bytes_written < 0)

From 9b6304c1d53745c300b86f202d0dcff395e2d2db Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 14:58:10 -0400
Subject: [PATCH 0041/2047] fs: add ctime accessors infrastructure

struct timespec64 has unused bits in the tv_nsec field that can be used
for other purposes. In future patches, we're going to change how the
inode->i_ctime is accessed in certain inodes in order to make use of
them. In order to do that safely though, we'll need to eradicate raw
accesses of the inode->i_ctime field from the kernel.

Add new accessor functions for the ctime that we use to replace them.

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Message-Id: <20230705185812.579118-2-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/inode.c         | 16 ++++++++++++++++
 include/linux/fs.h | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/fs/inode.c b/fs/inode.c
index 8fefb69e1f84..aac5cdcf5e89 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2501,6 +2501,22 @@ struct timespec64 current_time(struct inode *inode)
 }
 EXPORT_SYMBOL(current_time);
 
+/**
+ * inode_set_ctime_current - set the ctime to current_time
+ * @inode: inode
+ *
+ * Set the inode->i_ctime to the current value for the inode. Returns
+ * the current value that was assigned to i_ctime.
+ */
+struct timespec64 inode_set_ctime_current(struct inode *inode)
+{
+	struct timespec64 now = current_time(inode);
+
+	inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
+	return now;
+}
+EXPORT_SYMBOL(inode_set_ctime_current);
+
 /**
  * in_group_or_capable - check whether caller is CAP_FSETID privileged
  * @idmap:	idmap of the mount @inode was found from
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6867512907d6..d41bfcb26da0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1474,7 +1474,50 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
 	       kgid_has_mapping(fs_userns, kgid);
 }
 
-extern struct timespec64 current_time(struct inode *inode);
+struct timespec64 current_time(struct inode *inode);
+struct timespec64 inode_set_ctime_current(struct inode *inode);
+
+/**
+ * inode_get_ctime - fetch the current ctime from the inode
+ * @inode: inode from which to fetch ctime
+ *
+ * Grab the current ctime from the inode and return it.
+ */
+static inline struct timespec64 inode_get_ctime(const struct inode *inode)
+{
+	return inode->i_ctime;
+}
+
+/**
+ * inode_set_ctime_to_ts - set the ctime in the inode
+ * @inode: inode in which to set the ctime
+ * @ts: value to set in the ctime field
+ *
+ * Set the ctime in @inode to @ts
+ */
+static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
+						      struct timespec64 ts)
+{
+	inode->i_ctime = ts;
+	return ts;
+}
+
+/**
+ * inode_set_ctime - set the ctime in the inode
+ * @inode: inode in which to set the ctime
+ * @sec: tv_sec value to set
+ * @nsec: tv_nsec value to set
+ *
+ * Set the ctime in @inode to { @sec, @nsec }
+ */
+static inline struct timespec64 inode_set_ctime(struct inode *inode,
+						time64_t sec, long nsec)
+{
+	struct timespec64 ts = { .tv_sec  = sec,
+				 .tv_nsec = nsec };
+
+	return inode_set_ctime_to_ts(inode, ts);
+}
 
 /*
  * Snapshotting support.

From 0c4767923ed6964d279309744cdb248890e95ec2 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 14:58:11 -0400
Subject: [PATCH 0042/2047] fs: new helper: simple_rename_timestamp

A rename potentially involves updating 4 different inode timestamps. Add
a function that handles the details sanely, and convert the libfs.c
callers to use it.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705185812.579118-3-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/libfs.c         | 36 +++++++++++++++++++++++++++---------
 include/linux/fs.h |  2 ++
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index 5b851315eeed..ea0a84591886 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -444,6 +444,31 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
 }
 EXPORT_SYMBOL(simple_rmdir);
 
+/**
+ * simple_rename_timestamp - update the various inode timestamps for rename
+ * @old_dir: old parent directory
+ * @old_dentry: dentry that is being renamed
+ * @new_dir: new parent directory
+ * @new_dentry: target for rename
+ *
+ * POSIX mandates that the old and new parent directories have their ctime and
+ * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), have
+ * their ctime updated.
+ */
+void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
+			     struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct inode *newino = d_inode(new_dentry);
+
+	old_dir->i_mtime = inode_set_ctime_current(old_dir);
+	if (new_dir != old_dir)
+		new_dir->i_mtime = inode_set_ctime_current(new_dir);
+	inode_set_ctime_current(d_inode(old_dentry));
+	if (newino)
+		inode_set_ctime_current(newino);
+}
+EXPORT_SYMBOL_GPL(simple_rename_timestamp);
+
 int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
 			   struct inode *new_dir, struct dentry *new_dentry)
 {
@@ -459,11 +484,7 @@ int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
 			inc_nlink(old_dir);
 		}
 	}
-	old_dir->i_ctime = old_dir->i_mtime =
-	new_dir->i_ctime = new_dir->i_mtime =
-	d_inode(old_dentry)->i_ctime =
-	d_inode(new_dentry)->i_ctime = current_time(old_dir);
-
+	simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(simple_rename_exchange);
@@ -472,7 +493,6 @@ int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 		  struct dentry *old_dentry, struct inode *new_dir,
 		  struct dentry *new_dentry, unsigned int flags)
 {
-	struct inode *inode = d_inode(old_dentry);
 	int they_are_dirs = d_is_dir(old_dentry);
 
 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
@@ -495,9 +515,7 @@ int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 		inc_nlink(new_dir);
 	}
 
-	old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
-		new_dir->i_mtime = inode->i_ctime = current_time(old_dir);
-
+	simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 	return 0;
 }
 EXPORT_SYMBOL(simple_rename);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d41bfcb26da0..42755cb7d55b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2977,6 +2977,8 @@ extern int simple_open(struct inode *inode, struct file *file);
 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
 extern int simple_unlink(struct inode *, struct dentry *);
 extern int simple_rmdir(struct inode *, struct dentry *);
+void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
+			     struct inode *new_dir, struct dentry *new_dentry);
 extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
 				  struct inode *new_dir, struct dentry *new_dentry);
 extern int simple_rename(struct mnt_idmap *, struct inode *,

From 130f1eca981d96e11b79720dfc13d1522d399e51 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:34 -0400
Subject: [PATCH 0043/2047] btrfs: convert to simple_rename_timestamp

A rename potentially involves updating 4 different inode timestamps.
Convert to the new simple_rename_timestamp helper function.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-7-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/btrfs/inode.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dbbb67293e34..956ae499740a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8777,7 +8777,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	struct btrfs_root *dest = BTRFS_I(new_dir)->root;
 	struct inode *new_inode = new_dentry->d_inode;
 	struct inode *old_inode = old_dentry->d_inode;
-	struct timespec64 ctime = current_time(old_inode);
 	struct btrfs_rename_ctx old_rename_ctx;
 	struct btrfs_rename_ctx new_rename_ctx;
 	u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
@@ -8908,12 +8907,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	inode_inc_iversion(new_dir);
 	inode_inc_iversion(old_inode);
 	inode_inc_iversion(new_inode);
-	old_dir->i_mtime = ctime;
-	old_dir->i_ctime = ctime;
-	new_dir->i_mtime = ctime;
-	new_dir->i_ctime = ctime;
-	old_inode->i_ctime = ctime;
-	new_inode->i_ctime = ctime;
+	simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 
 	if (old_dentry->d_parent != new_dentry->d_parent) {
 		btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
@@ -9177,11 +9171,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
 	inode_inc_iversion(old_dir);
 	inode_inc_iversion(new_dir);
 	inode_inc_iversion(old_inode);
-	old_dir->i_mtime = current_time(old_dir);
-	old_dir->i_ctime = old_dir->i_mtime;
-	new_dir->i_mtime = old_dir->i_mtime;
-	new_dir->i_ctime = old_dir->i_mtime;
-	old_inode->i_ctime = old_dir->i_mtime;
+	simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 
 	if (old_dentry->d_parent != new_dentry->d_parent)
 		btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
@@ -9203,7 +9193,6 @@ static int btrfs_rename(struct mnt_idmap *idmap,
 
 	if (new_inode) {
 		inode_inc_iversion(new_inode);
-		new_inode->i_ctime = current_time(new_inode);
 		if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
 			     BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
 			ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry);

From e54c86fd48d0e9938d59a338a9f4811555fbf453 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:35 -0400
Subject: [PATCH 0044/2047] ubifs: convert to simple_rename_timestamp

A rename potentially involves updating 4 different inode timestamps.
Convert to the new simple_rename_timestamp helper function.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
Message-Id: <20230705190309.579783-8-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ubifs/dir.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index ef0499edc248..7ec25310bd8a 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1414,8 +1414,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * Like most other Unix systems, set the @i_ctime for inodes on a
 	 * rename.
 	 */
-	time = current_time(old_dir);
-	old_inode->i_ctime = time;
+	simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 
 	/* We must adjust parent link count when renaming directories */
 	if (is_dir) {
@@ -1444,13 +1443,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	old_dir->i_size -= old_sz;
 	ubifs_inode(old_dir)->ui_size = old_dir->i_size;
-	old_dir->i_mtime = old_dir->i_ctime = time;
-	new_dir->i_mtime = new_dir->i_ctime = time;
 
 	/*
 	 * And finally, if we unlinked a direntry which happened to have the
 	 * same name as the moved direntry, we have to decrement @i_nlink of
-	 * the unlinked inode and change its ctime.
+	 * the unlinked inode.
 	 */
 	if (unlink) {
 		/*
@@ -1462,7 +1459,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
 			clear_nlink(new_inode);
 		else
 			drop_nlink(new_inode);
-		new_inode->i_ctime = time;
 	} else {
 		new_dir->i_size += new_sz;
 		ubifs_inode(new_dir)->ui_size = new_dir->i_size;
@@ -1557,7 +1553,6 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
 	int sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir);
 	struct inode *fst_inode = d_inode(old_dentry);
 	struct inode *snd_inode = d_inode(new_dentry);
-	struct timespec64 time;
 	int err;
 	struct fscrypt_name fst_nm, snd_nm;
 
@@ -1588,11 +1583,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
 
 	lock_4_inodes(old_dir, new_dir, NULL, NULL);
 
-	time = current_time(old_dir);
-	fst_inode->i_ctime = time;
-	snd_inode->i_ctime = time;
-	old_dir->i_mtime = old_dir->i_ctime = time;
-	new_dir->i_mtime = new_dir->i_ctime = time;
+	simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 
 	if (old_dir != new_dir) {
 		if (S_ISDIR(fst_inode->i_mode) && !S_ISDIR(snd_inode->i_mode)) {

From 944d0d9def9de37f0209ff73f3d8daa1baccab67 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:36 -0400
Subject: [PATCH 0045/2047] shmem: convert to simple_rename_timestamp

A rename potentially involves updating 4 different inode timestamps.
Convert to the new simple_rename_timestamp helper function.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-9-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 mm/shmem.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 2f2e0e618072..a7dfbd221a25 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3266,9 +3266,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
 
 	old_dir->i_size -= BOGO_DIRENT_SIZE;
 	new_dir->i_size += BOGO_DIRENT_SIZE;
-	old_dir->i_ctime = old_dir->i_mtime =
-	new_dir->i_ctime = new_dir->i_mtime =
-	inode->i_ctime = current_time(old_dir);
+	simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 	inode_inc_iversion(old_dir);
 	inode_inc_iversion(new_dir);
 	return 0;

From d06cdfddd1d1483c78b60c64d110ae2218b37a0c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:37 -0400
Subject: [PATCH 0046/2047] exfat: convert to simple_rename_timestamp

A rename potentially involves updating 4 different inode timestamps.
Convert to the new simple_rename_timestamp helper function.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-10-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/exfat/namei.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index d9b46fa36bff..e91022ff80ef 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -1312,8 +1312,8 @@ static int exfat_rename(struct mnt_idmap *idmap,
 		goto unlock;
 
 	inode_inc_iversion(new_dir);
-	new_dir->i_ctime = new_dir->i_mtime = new_dir->i_atime =
-		EXFAT_I(new_dir)->i_crtime = current_time(new_dir);
+	simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
+	EXFAT_I(new_dir)->i_crtime = current_time(new_dir);
 	exfat_truncate_atime(&new_dir->i_atime);
 	if (IS_DIRSYNC(new_dir))
 		exfat_sync_inode(new_dir);
@@ -1336,7 +1336,6 @@ static int exfat_rename(struct mnt_idmap *idmap,
 	}
 
 	inode_inc_iversion(old_dir);
-	old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
 	if (IS_DIRSYNC(old_dir))
 		exfat_sync_inode(old_dir);
 	else

From 487e34fc9fc1ce8e59cf8d4087c30106c1079b0c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:38 -0400
Subject: [PATCH 0047/2047] ntfs3: convert to simple_rename_timestamp

A rename potentially involves updating 4 different inode timestamps.
Convert to the new simple_rename_timestamp helper function.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-11-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ntfs3/namei.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index 70f8c859e0ad..bfd986699f9e 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -324,14 +324,11 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir,
 		/* Restore after failed rename failed too. */
 		_ntfs_bad_inode(inode);
 	} else if (!err) {
-		inode->i_ctime = dir->i_ctime = dir->i_mtime =
-			current_time(dir);
+		simple_rename_timestamp(dir, dentry, new_dir, new_dentry);
 		mark_inode_dirty(inode);
 		mark_inode_dirty(dir);
-		if (dir != new_dir) {
-			new_dir->i_mtime = new_dir->i_ctime = dir->i_ctime;
+		if (dir != new_dir)
 			mark_inode_dirty(new_dir);
-		}
 
 		if (IS_DIRSYNC(dir))
 			ntfs_sync_inode(dir);

From 298d35ef763de3393bd1f60d35ff4203cbb4f26a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:39 -0400
Subject: [PATCH 0048/2047] reiserfs: convert to simple_rename_timestamp

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-12-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/reiserfs/namei.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 52240cc891cf..405ac59eb2dd 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1325,7 +1325,6 @@ static int reiserfs_rename(struct mnt_idmap *idmap,
 	int jbegin_count;
 	umode_t old_inode_mode;
 	unsigned long savelink = 1;
-	struct timespec64 ctime;
 
 	if (flags & ~RENAME_NOREPLACE)
 		return -EINVAL;
@@ -1576,14 +1575,11 @@ static int reiserfs_rename(struct mnt_idmap *idmap,
 
 	mark_de_hidden(old_de.de_deh + old_de.de_entry_num);
 	journal_mark_dirty(&th, old_de.de_bh);
-	ctime = current_time(old_dir);
-	old_dir->i_ctime = old_dir->i_mtime = ctime;
-	new_dir->i_ctime = new_dir->i_mtime = ctime;
 	/*
 	 * thanks to Alex Adriaanse <alex_a@caltech.edu> for patch
 	 * which adds ctime update of renamed object
 	 */
-	old_inode->i_ctime = ctime;
+	simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 
 	if (new_dentry_inode) {
 		/* adjust link number of the victim */
@@ -1592,7 +1588,6 @@ static int reiserfs_rename(struct mnt_idmap *idmap,
 		} else {
 			drop_nlink(new_dentry_inode);
 		}
-		new_dentry_inode->i_ctime = ctime;
 		savelink = new_dentry_inode->i_nlink;
 	}
 

From 3cc0cc635baeaf2e15a7a58354249345ba84c96e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:40 -0400
Subject: [PATCH 0049/2047] spufs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Jeremy Kerr <jk@ozlabs.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Message-Id: <20230705190309.579783-13-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 arch/powerpc/platforms/cell/spufs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index ea807aa0c31a..38c5be34c895 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode)
 	inode->i_mode = mode;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 out:
 	return inode;
 }

From 59a4a3512c94c2e59757cd9119fcf4faf53fe198 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 3 Jul 2023 17:23:06 +0300
Subject: [PATCH 0050/2047] gpiolib: of: Don't use GPIO chip fwnode in
 of_gpiochip_*()

GPIO library should rely only on the GPIO device's fwnode.
Hence, replace GPIO chip fwnode usage by respective OF node
of the GPIO device.

JFYI, this is partial revert of 8afe82550240 ("gpiolib: of:
Prepare of_gpiochip_add() / of_gpiochip_remove() for fwnode").

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Benjamin Tissoires <bentiss@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 1436cdb5fa26..5fde5a3f5118 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -1078,16 +1078,16 @@ int of_gpiochip_add(struct gpio_chip *chip)
 	if (ret)
 		return ret;
 
-	fwnode_handle_get(chip->fwnode);
+	of_node_get(np);
 
 	ret = of_gpiochip_scan_gpios(chip);
 	if (ret)
-		fwnode_handle_put(chip->fwnode);
+		of_node_put(np);
 
 	return ret;
 }
 
 void of_gpiochip_remove(struct gpio_chip *chip)
 {
-	fwnode_handle_put(chip->fwnode);
+	of_node_put(dev_of_node(&chip->gpiodev->dev));
 }

From 067dbc1ea5ce61ba174d0c5f2e375defe4d562e6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 3 Jul 2023 17:23:07 +0300
Subject: [PATCH 0051/2047] gpiolib: acpi: Don't use GPIO chip fwnode in
 acpi_gpiochip_find()

GPIO library should rely only on the GPIO device's fwnode.
Hence, replace GPIO chip fwnode usage by respective handle
of the GPIO device.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Benjamin Tissoires <bentiss@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 97496c0f9133..fbda452fb4d6 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -128,7 +128,7 @@ static bool acpi_gpio_deferred_req_irqs_done;
 
 static int acpi_gpiochip_find(struct gpio_chip *gc, void *data)
 {
-	return ACPI_HANDLE_FWNODE(gc->fwnode) == data;
+	return device_match_acpi_handle(&gc->gpiodev->dev, data);
 }
 
 /**

From daecca4b8433d47f0db4933bcc0f283d530ba22e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 3 Jul 2023 17:23:08 +0300
Subject: [PATCH 0052/2047] gpiolib: Do not alter GPIO chip fwnode member

Ideally we should not touch data in the given GPIO chip structure.
Let's become closer to it by avoiding altering fwnode member.

The GPIO library must use fwnode from GPIO device and the drivers
might use one from GPIO chip in case they initialized it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Benjamin Tissoires <bentiss@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 251c875b5c34..826d927c2076 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -711,13 +711,6 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 	int base = 0;
 	int ret = 0;
 
-	/*
-	 * If the calling driver did not initialize firmware node, do it here
-	 * using the parent device, if any.
-	 */
-	if (!gc->fwnode && gc->parent)
-		gc->fwnode = dev_fwnode(gc->parent);
-
 	/*
 	 * First: allocate and populate the internal stat container, and
 	 * set up the struct device.
@@ -732,7 +725,14 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 	gc->gpiodev = gdev;
 	gpiochip_set_data(gc, data);
 
-	device_set_node(&gdev->dev, gc->fwnode);
+	/*
+	 * If the calling driver did not initialize firmware node,
+	 * do it here using the parent device, if any.
+	 */
+	if (gc->fwnode)
+		device_set_node(&gdev->dev, gc->fwnode);
+	else if (gc->parent)
+		device_set_node(&gdev->dev, dev_fwnode(gc->parent));
 
 	gdev->id = ida_alloc(&gpio_ida, GFP_KERNEL);
 	if (gdev->id < 0) {

From b683b487dce74bd709aa21aa1056bcc9462d1dfc Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 21 Jun 2023 20:49:39 +0300
Subject: [PATCH 0053/2047] gpiolib: Make gpiochip_hierarchy_add_domain()
 return domain

As a preparatory patch and for the sake of consistency, make
gpiochip_hierarchy_add_domain() return IRQ domain. While at it,
rename it to gpiochip_hierarchy_create_domain() to show
the change.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 826d927c2076..1c0d0e33c6bd 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1292,12 +1292,14 @@ static void gpiochip_hierarchy_setup_domain_ops(struct irq_domain_ops *ops)
 		ops->free = irq_domain_free_irqs_common;
 }
 
-static int gpiochip_hierarchy_add_domain(struct gpio_chip *gc)
+static struct irq_domain *gpiochip_hierarchy_create_domain(struct gpio_chip *gc)
 {
+	struct irq_domain *domain;
+
 	if (!gc->irq.child_to_parent_hwirq ||
 	    !gc->irq.fwnode) {
 		chip_err(gc, "missing irqdomain vital data\n");
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 	}
 
 	if (!gc->irq.child_offset_to_irq)
@@ -1309,7 +1311,7 @@ static int gpiochip_hierarchy_add_domain(struct gpio_chip *gc)
 
 	gpiochip_hierarchy_setup_domain_ops(&gc->irq.child_irq_domain_ops);
 
-	gc->irq.domain = irq_domain_create_hierarchy(
+	domain = irq_domain_create_hierarchy(
 		gc->irq.parent_domain,
 		0,
 		gc->ngpio,
@@ -1317,12 +1319,12 @@ static int gpiochip_hierarchy_add_domain(struct gpio_chip *gc)
 		&gc->irq.child_irq_domain_ops,
 		gc);
 
-	if (!gc->irq.domain)
-		return -ENOMEM;
+	if (!domain)
+		return ERR_PTR(-ENOMEM);
 
 	gpiochip_set_hierarchical_irqchip(gc, gc->irq.chip);
 
-	return 0;
+	return domain;
 }
 
 static bool gpiochip_hierarchy_is_hierarchical(struct gpio_chip *gc)
@@ -1366,9 +1368,9 @@ EXPORT_SYMBOL_GPL(gpiochip_populate_parent_fwspec_fourcell);
 
 #else
 
-static int gpiochip_hierarchy_add_domain(struct gpio_chip *gc)
+static struct irq_domain *gpiochip_hierarchy_create_domain(struct gpio_chip *gc)
 {
-	return -EINVAL;
+	return ERR_PTR(-EINVAL);
 }
 
 static bool gpiochip_hierarchy_is_hierarchical(struct gpio_chip *gc)
@@ -1667,9 +1669,9 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
 
 	/* If a parent irqdomain is provided, let's build a hierarchy */
 	if (gpiochip_hierarchy_is_hierarchical(gc)) {
-		int ret = gpiochip_hierarchy_add_domain(gc);
-		if (ret)
-			return ret;
+		gc->irq.domain = gpiochip_hierarchy_create_domain(gc);
+		if (IS_ERR(gc->irq.domain))
+			return PTR_ERR(gc->irq.domain);
 	} else {
 		gc->irq.domain = irq_domain_create_simple(fwnode,
 			gc->ngpio,

From 1efc43de1781bbb8780ee6f6f2291073003f1ff1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 21 Jun 2023 20:49:40 +0300
Subject: [PATCH 0054/2047] gpiolib: Factor out gpiochip_simple_create_domain()

As a preparatory patch and for the sake of consistency,
factor out gpiochip_simple_create_domain().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 1c0d0e33c6bd..796d2802ebb9 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1447,6 +1447,19 @@ static const struct irq_domain_ops gpiochip_domain_ops = {
 	.xlate	= irq_domain_xlate_twocell,
 };
 
+static struct irq_domain *gpiochip_simple_create_domain(struct gpio_chip *gc)
+{
+	struct fwnode_handle *fwnode = dev_fwnode(&gc->gpiodev->dev);
+	struct irq_domain *domain;
+
+	domain = irq_domain_create_simple(fwnode, gc->ngpio, gc->irq.first,
+					  &gpiochip_domain_ops, gc);
+	if (!domain)
+		return ERR_PTR(-EINVAL);
+
+	return domain;
+}
+
 /*
  * TODO: move these activate/deactivate in under the hierarchicial
  * irqchip implementation as static once SPMI and SSBI (all external
@@ -1673,13 +1686,9 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
 		if (IS_ERR(gc->irq.domain))
 			return PTR_ERR(gc->irq.domain);
 	} else {
-		gc->irq.domain = irq_domain_create_simple(fwnode,
-			gc->ngpio,
-			gc->irq.first,
-			&gpiochip_domain_ops,
-			gc);
-		if (!gc->irq.domain)
-			return -EINVAL;
+		gc->irq.domain = gpiochip_simple_create_domain(gc);
+		if (IS_ERR(gc->irq.domain))
+			return PTR_ERR(gc->irq.domain);
 	}
 
 	if (gc->irq.parent_handler) {

From 39f3ad73d4465d0333444bd5adce052f8e1c2783 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 21 Jun 2023 20:49:41 +0300
Subject: [PATCH 0055/2047] gpiolib: Do not assign error pointer to the GPIO
 IRQ chip domain

Check domain for being an error pointer before assigning it to
the GPIO IRQ chip domain.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 796d2802ebb9..e4300b99bfa5 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1650,6 +1650,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
 {
 	struct fwnode_handle *fwnode = dev_fwnode(&gc->gpiodev->dev);
 	struct irq_chip *irqchip = gc->irq.chip;
+	struct irq_domain *domain;
 	unsigned int type;
 	unsigned int i;
 
@@ -1682,14 +1683,13 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
 
 	/* If a parent irqdomain is provided, let's build a hierarchy */
 	if (gpiochip_hierarchy_is_hierarchical(gc)) {
-		gc->irq.domain = gpiochip_hierarchy_create_domain(gc);
-		if (IS_ERR(gc->irq.domain))
-			return PTR_ERR(gc->irq.domain);
+		domain = gpiochip_hierarchy_create_domain(gc);
 	} else {
-		gc->irq.domain = gpiochip_simple_create_domain(gc);
-		if (IS_ERR(gc->irq.domain))
-			return PTR_ERR(gc->irq.domain);
+		domain = gpiochip_simple_create_domain(gc);
 	}
+	if (IS_ERR(domain))
+		return PTR_ERR(domain);
+	gc->irq.domain = domain;
 
 	if (gc->irq.parent_handler) {
 		for (i = 0; i < gc->irq.num_parents; i++) {

From 081bfdb303abaf5c818de5a444dd899c7de3fab0 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 21 Jun 2023 20:49:42 +0300
Subject: [PATCH 0056/2047] gpiolib: Split out
 gpiochip_irqchip_add_allocated_domain() helper

The gpiochip_irqchip_add_allocated_domain() can be used
in another place in the code.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib.c | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e4300b99bfa5..0179902d3b74 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1638,6 +1638,28 @@ static void gpiochip_set_irq_hooks(struct gpio_chip *gc)
 	}
 }
 
+static int gpiochip_irqchip_add_allocated_domain(struct gpio_chip *gc,
+						 struct irq_domain *domain,
+						 bool allocated_externally)
+{
+	if (!domain)
+		return -EINVAL;
+
+	gc->to_irq = gpiochip_to_irq;
+	gc->irq.domain = domain;
+	gc->irq.domain_is_allocated_externally = allocated_externally;
+
+	/*
+	 * Using barrier() here to prevent compiler from reordering
+	 * gc->irq.initialized before adding irqdomain.
+	 */
+	barrier();
+
+	gc->irq.initialized = true;
+
+	return 0;
+}
+
 /**
  * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip
  * @gc: the GPIO chip to add the IRQ chip to
@@ -1791,22 +1813,7 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gc)
 int gpiochip_irqchip_add_domain(struct gpio_chip *gc,
 				struct irq_domain *domain)
 {
-	if (!domain)
-		return -EINVAL;
-
-	gc->to_irq = gpiochip_to_irq;
-	gc->irq.domain = domain;
-	gc->irq.domain_is_allocated_externally = true;
-
-	/*
-	 * Using barrier() here to prevent compiler from reordering
-	 * gc->irq.initialized before adding irqdomain.
-	 */
-	barrier();
-
-	gc->irq.initialized = true;
-
-	return 0;
+	return gpiochip_irqchip_add_allocated_domain(gc, domain, true);
 }
 EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_domain);
 

From eec349dbe4fa2712d4933d674531778a93c50b28 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 21 Jun 2023 20:49:43 +0300
Subject: [PATCH 0057/2047] gpiolib: Replace open coded
 gpiochip_irqchip_add_allocated_domain()

Replace open coded variant of gpiochip_irqchip_add_allocated_domain()
in gpiochip_add_irqchip().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 0179902d3b74..62199ec96db2 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1645,6 +1645,9 @@ static int gpiochip_irqchip_add_allocated_domain(struct gpio_chip *gc,
 	if (!domain)
 		return -EINVAL;
 
+	if (gc->to_irq)
+		chip_warn(gc, "to_irq is redefined in %s and you shouldn't rely on it\n", __func__);
+
 	gc->to_irq = gpiochip_to_irq;
 	gc->irq.domain = domain;
 	gc->irq.domain_is_allocated_externally = allocated_externally;
@@ -1675,6 +1678,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
 	struct irq_domain *domain;
 	unsigned int type;
 	unsigned int i;
+	int ret;
 
 	if (!irqchip)
 		return 0;
@@ -1695,10 +1699,6 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
 		 "%pfw: Ignoring %u default trigger\n", fwnode, type))
 		type = IRQ_TYPE_NONE;
 
-	if (gc->to_irq)
-		chip_warn(gc, "to_irq is redefined in %s and you shouldn't rely on it\n", __func__);
-
-	gc->to_irq = gpiochip_to_irq;
 	gc->irq.default_type = type;
 	gc->irq.lock_key = lock_key;
 	gc->irq.request_key = request_key;
@@ -1711,7 +1711,6 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
 	}
 	if (IS_ERR(domain))
 		return PTR_ERR(domain);
-	gc->irq.domain = domain;
 
 	if (gc->irq.parent_handler) {
 		for (i = 0; i < gc->irq.num_parents; i++) {
@@ -1735,14 +1734,9 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
 
 	gpiochip_set_irq_hooks(gc);
 
-	/*
-	 * Using barrier() here to prevent compiler from reordering
-	 * gc->irq.initialized before initialization of above
-	 * GPIO chip irq members.
-	 */
-	barrier();
-
-	gc->irq.initialized = true;
+	ret = gpiochip_irqchip_add_allocated_domain(gc, domain, false);
+	if (ret)
+		return ret;
 
 	acpi_gpiochip_request_interrupts(gc);
 

From d16e0b0e798700b036ad2701ce70525a6fbea8ea Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland@sifive.com>
Date: Mon, 26 Jun 2023 10:26:08 -0700
Subject: [PATCH 0058/2047] gpio: sifive: Support IRQ wake

Each pin drives a separate interrupt in the parent IRQ domain, so there
is no need to set IRQCHIP_MASK_ON_SUSPEND.

Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-sifive.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
index 745e5f67254e..efab7b58f739 100644
--- a/drivers/gpio/gpio-sifive.c
+++ b/drivers/gpio/gpio-sifive.c
@@ -150,6 +150,7 @@ static const struct irq_chip sifive_gpio_irqchip = {
 	.irq_disable	= sifive_gpio_irq_disable,
 	.irq_eoi	= sifive_gpio_irq_eoi,
 	.irq_set_affinity = sifive_gpio_irq_set_affinity,
+	.irq_set_wake	= irq_chip_set_wake_parent,
 	.flags		= IRQCHIP_IMMUTABLE,
 	GPIOCHIP_IRQ_RESOURCE_HELPERS,
 };

From 8b70deb8ca901d45e2342d654c1e8d306f52e6cb Mon Sep 17 00:00:00 2001
From: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
Date: Wed, 26 Apr 2023 19:22:21 +0200
Subject: [PATCH 0059/2047] fs/ecryptfs: Replace kmap() with kmap_local_page()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kmap() has been deprecated in favor of kmap_local_page().

Therefore, replace kmap() with kmap_local_page() in fs/ecryptfs.

There are two main problems with kmap(): (1) It comes with an overhead as
the mapping space is restricted and protected by a global lock for
synchronization and (2) it also requires global TLB invalidation when the
kmap’s pool wraps and it might block when the mapping space is fully
utilized until a slot becomes available.

With kmap_local_page() the mappings are per thread, CPU local, can take
page faults, and can be called from any context (including interrupts).
It is faster than kmap() in kernels with HIGHMEM enabled. The tasks can
be preempted and, when they are scheduled to run again, the kernel
virtual addresses are restored and still valid.

Obviously, thread locality implies that the kernel virtual addresses
returned by kmap_local_page() are only valid in the context of the
callers (i.e., they cannot be handed to other threads).

The use of kmap_local_page() in fs/ecryptfs does not break the
above-mentioned assumption, so it is allowed and preferred.

Tested in a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with
HIGHMEM64GB enabled.

Suggested-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
Message-Id: <20230426172223.8896-2-fmdefrancesco@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ecryptfs/crypto.c     | 8 ++++----
 fs/ecryptfs/read_write.c | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c16f0d660cb7..03bd55069d86 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -441,10 +441,10 @@ int ecryptfs_encrypt_page(struct page *page)
 	}
 
 	lower_offset = lower_offset_for_page(crypt_stat, page);
-	enc_extent_virt = kmap(enc_extent_page);
+	enc_extent_virt = kmap_local_page(enc_extent_page);
 	rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset,
 				  PAGE_SIZE);
-	kunmap(enc_extent_page);
+	kunmap_local(enc_extent_virt);
 	if (rc < 0) {
 		ecryptfs_printk(KERN_ERR,
 			"Error attempting to write lower page; rc = [%d]\n",
@@ -490,10 +490,10 @@ int ecryptfs_decrypt_page(struct page *page)
 	BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
 
 	lower_offset = lower_offset_for_page(crypt_stat, page);
-	page_virt = kmap(page);
+	page_virt = kmap_local_page(page);
 	rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_SIZE,
 				 ecryptfs_inode);
-	kunmap(page);
+	kunmap_local(page_virt);
 	if (rc < 0) {
 		ecryptfs_printk(KERN_ERR,
 			"Error attempting to read lower page; rc = [%d]\n",
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 60bdcaddcbe5..5edf027c8359 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -64,11 +64,11 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
 
 	offset = ((((loff_t)page_for_lower->index) << PAGE_SHIFT)
 		  + offset_in_page);
-	virt = kmap(page_for_lower);
+	virt = kmap_local_page(page_for_lower);
 	rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
 	if (rc > 0)
 		rc = 0;
-	kunmap(page_for_lower);
+	kunmap_local(virt);
 	return rc;
 }
 
@@ -253,11 +253,11 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
 	int rc;
 
 	offset = ((((loff_t)page_index) << PAGE_SHIFT) + offset_in_page);
-	virt = kmap(page_for_ecryptfs);
+	virt = kmap_local_page(page_for_ecryptfs);
 	rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
 	if (rc > 0)
 		rc = 0;
-	kunmap(page_for_ecryptfs);
+	kunmap_local(virt);
 	flush_dcache_page(page_for_ecryptfs);
 	return rc;
 }

From c3c6833ea8114a18d5acf5f9a4d32f864103144c Mon Sep 17 00:00:00 2001
From: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
Date: Wed, 26 Apr 2023 19:22:22 +0200
Subject: [PATCH 0060/2047] fs/ecryptfs: Use kmap_local_page() in
 ecryptfs_write()

kmap_atomic() is deprecated in favor of kmap_local_page().

Therefore, replace kmap_atomic() with kmap_local_page() in
ecryptfs_write().

kmap_atomic() is implemented like kmap_local_page() which also disables
page-faults and preemption (the latter only for !PREEMPT_RT kernels).

The code within the mapping/un-mapping in ecryptfs_write() does not
depend on the above-mentioned side effects so that a mere replacement of
the old API with the new one is all that is required (i.e., there is no
need to explicitly call pagefault_disable() and/or preempt_disable()).

Tested in a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with
HIGHMEM64GB enabled.

Suggested-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
Message-Id: <20230426172223.8896-3-fmdefrancesco@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ecryptfs/read_write.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 5edf027c8359..3458f153a588 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -140,7 +140,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
 			       ecryptfs_page_idx, rc);
 			goto out;
 		}
-		ecryptfs_page_virt = kmap_atomic(ecryptfs_page);
+		ecryptfs_page_virt = kmap_local_page(ecryptfs_page);
 
 		/*
 		 * pos: where we're now writing, offset: where the request was
@@ -163,7 +163,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
 			       (data + data_offset), num_bytes);
 			data_offset += num_bytes;
 		}
-		kunmap_atomic(ecryptfs_page_virt);
+		kunmap_local(ecryptfs_page_virt);
 		flush_dcache_page(ecryptfs_page);
 		SetPageUptodate(ecryptfs_page);
 		unlock_page(ecryptfs_page);

From e2393b8f3987c5f29dca03aa71d0502f6d721c0d Mon Sep 17 00:00:00 2001
From: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
Date: Wed, 26 Apr 2023 19:22:23 +0200
Subject: [PATCH 0061/2047] fs/ecryptfs: Use kmap_local_page() in
 copy_up_encrypted_with_header()

kmap_atomic() has been deprecated in favor of kmap_local_page().

Therefore, replace kmap_atomic() with kmap_local_page() in
ecryptfs_copy_up_encrypted_with_header().

kmap_atomic() is implemented like a kmap_local_page() which also
disables page-faults and preemption (the latter only in !PREEMPT_RT
kernels). The kernel virtual addresses returned by these two API are
only valid in the context of the callers (i.e., they cannot be handed to
other threads).

With kmap_local_page() the mappings are per thread and CPU local like
in kmap_atomic(); however, they can handle page-faults and can be called
from any context (including interrupts). The tasks that call
kmap_local_page() can be preempted and, when they are scheduled to run
again, the kernel virtual addresses are restored and are still valid.

In ecryptfs_copy_up_encrypted_with_header(), the block of code between
the mapping and un-mapping does not depend on the above-mentioned side
effects of kmap_aatomic(), so that the mere replacements of the old API
with the new one is all that is required (i.e., there is no need to
explicitly call pagefault_disable() and/or preempt_disable()).

Tested in a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with
HIGHMEM64GB enabled.

Cc: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
Message-Id: <20230426172223.8896-4-fmdefrancesco@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ecryptfs/mmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 373c3e5747e6..cb1e998ce54d 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -125,7 +125,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
 			/* This is a header extent */
 			char *page_virt;
 
-			page_virt = kmap_atomic(page);
+			page_virt = kmap_local_page(page);
 			memset(page_virt, 0, PAGE_SIZE);
 			/* TODO: Support more than one header extent */
 			if (view_extent_num == 0) {
@@ -138,7 +138,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
 							       crypt_stat,
 							       &written);
 			}
-			kunmap_atomic(page_virt);
+			kunmap_local(page_virt);
 			flush_dcache_page(page);
 			if (rc) {
 				printk(KERN_ERR "%s: Error reading xattr "

From ba38980add7ffc9e674ada5b4ded4e7d14e76581 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Sun, 4 Jun 2023 12:16:06 +0100
Subject: [PATCH 0062/2047] reiserfs: Check the return value from __getblk()

__getblk() can return a NULL pointer if we run out of memory or if we
try to access beyond the end of the device; check it and handle it
appropriately.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Link: https://lore.kernel.org/lkml/CAFcO6XOacq3hscbXevPQP7sXRoYFz34ZdKPYjmd6k5sZuhGFDw@mail.gmail.com/
Tested-by: butt3rflyh4ck <butterflyhuangxx@gmail.com>
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") # probably introduced in 2002
Acked-by: Edward Shishkin <edward.shishkin@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/reiserfs/journal.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 479aa4a57602..015bfe4e4524 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2326,7 +2326,7 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
 	int i, j;
 
 	bh = __getblk(dev, block, bufsize);
-	if (buffer_uptodate(bh))
+	if (!bh || buffer_uptodate(bh))
 		return (bh);
 
 	if (block + BUFNR > max_block) {
@@ -2336,6 +2336,8 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
 	j = 1;
 	for (i = 1; i < blocks; i++) {
 		bh = __getblk(dev, block + i, bufsize);
+		if (!bh)
+			break;
 		if (buffer_uptodate(bh)) {
 			brelse(bh);
 			break;

From 12ee4b66af34f8e72f3b2fd93a946a955efe7c86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?=
 <nabijaczleweli@nabijaczleweli.xyz>
Date: Mon, 3 Jul 2023 16:42:13 +0200
Subject: [PATCH 0063/2047] splice: always fsnotify_access(in),
 fsnotify_modify(out) on success
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current behaviour caused an asymmetry where some write APIs
(write, sendfile) would notify the written-to/read-from objects,
but splice wouldn't.

This affected userspace which uses inotify, most notably coreutils
tail -f, to monitor pipes.
If the pipe buffer had been filled by a splice-family function:
  * tail wouldn't know and thus wouldn't service the pipe, and
  * all writes to the pipe would block because it's full,
thus service was denied.
(For the particular case of tail -f this could be worked around
 with ---disable-inotify.)

Fixes: 983652c69199 ("splice: report related fsnotify events")
Link: https://lore.kernel.org/linux-fsdevel/jbyihkyk5dtaohdwjyivambb2gffyjs3dodpofafnkkunxq7bu@jngkdxx65pux/t/#u
Link: https://bugs.debian.org/1039488
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Acked-by: Jan Kara <jack@suse.cz>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Message-Id: <604ec704d933e0e0121d9e107ce914512e045fad.1688393619.git.nabijaczleweli@nabijaczleweli.xyz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/splice.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index 004eb1c4ce31..016b0a83df44 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1267,10 +1267,8 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
 		if ((in->f_flags | out->f_flags) & O_NONBLOCK)
 			flags |= SPLICE_F_NONBLOCK;
 
-		return splice_pipe_to_pipe(ipipe, opipe, len, flags);
-	}
-
-	if (ipipe) {
+		ret = splice_pipe_to_pipe(ipipe, opipe, len, flags);
+	} else if (ipipe) {
 		if (off_in)
 			return -ESPIPE;
 		if (off_out) {
@@ -1295,18 +1293,11 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
 		ret = do_splice_from(ipipe, out, &offset, len, flags);
 		file_end_write(out);
 
-		if (ret > 0)
-			fsnotify_modify(out);
-
 		if (!off_out)
 			out->f_pos = offset;
 		else
 			*off_out = offset;
-
-		return ret;
-	}
-
-	if (opipe) {
+	} else if (opipe) {
 		if (off_out)
 			return -ESPIPE;
 		if (off_in) {
@@ -1322,18 +1313,25 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
 
 		ret = splice_file_to_pipe(in, opipe, &offset, len, flags);
 
-		if (ret > 0)
-			fsnotify_access(in);
-
 		if (!off_in)
 			in->f_pos = offset;
 		else
 			*off_in = offset;
-
-		return ret;
+	} else {
+		ret = -EINVAL;
 	}
 
-	return -EINVAL;
+	if (ret > 0) {
+		/*
+		 * Generate modify out before access in:
+		 * do_splice_from() may've already sent modify out,
+		 * and this ensures the events get merged.
+		 */
+		fsnotify_modify(out);
+		fsnotify_access(in);
+	}
+
+	return ret;
 }
 
 static long __do_splice(struct file *in, loff_t __user *off_in,

From 7f0f1ea069e52d5a16921abd59377a7da6c25149 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?=
 <nabijaczleweli@nabijaczleweli.xyz>
Date: Mon, 3 Jul 2023 16:42:17 +0200
Subject: [PATCH 0064/2047] splice: fsnotify_access(fd)/fsnotify_modify(fd) in
 vmsplice
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Same logic applies here: this can fill up the pipe and pollers that rely
on getting IN_MODIFY notifications never wake up.

Fixes: 983652c69199 ("splice: report related fsnotify events")
Link: https://lore.kernel.org/linux-fsdevel/jbyihkyk5dtaohdwjyivambb2gffyjs3dodpofafnkkunxq7bu@jngkdxx65pux/t/#u
Link: https://bugs.debian.org/1039488
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Acked-by: Jan Kara <jack@suse.cz>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Message-Id: <8d9ad5acb9c5c1dd2376a2ff5da6ac3183115389.1688393619.git.nabijaczleweli@nabijaczleweli.xyz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/splice.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/splice.c b/fs/splice.c
index 016b0a83df44..2cb89f5f9a8a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1460,6 +1460,9 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
 		pipe_unlock(pipe);
 	}
 
+	if (ret > 0)
+		fsnotify_access(file);
+
 	return ret;
 }
 
@@ -1489,8 +1492,10 @@ static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
 	if (!ret)
 		ret = iter_to_pipe(iter, pipe, buf_flag);
 	pipe_unlock(pipe);
-	if (ret > 0)
+	if (ret > 0) {
 		wakeup_pipe_readers(pipe);
+		fsnotify_modify(file);
+	}
 	return ret;
 }
 

From 576d498e0ac5caff2d9f6312573ab54d98f12d32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?=
 <nabijaczleweli@nabijaczleweli.xyz>
Date: Mon, 3 Jul 2023 16:42:21 +0200
Subject: [PATCH 0065/2047] splice: fsnotify_access(in), fsnotify_modify(out)
 on success in tee
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Same logic applies here: this can fill up the pipe, and pollers that rely
on getting IN_MODIFY notifications never wake up.

Fixes: 983652c69199 ("splice: report related fsnotify events")
Link: https://lore.kernel.org/linux-fsdevel/jbyihkyk5dtaohdwjyivambb2gffyjs3dodpofafnkkunxq7bu@jngkdxx65pux/t/#u
Link: https://bugs.debian.org/1039488
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Acked-by: Jan Kara <jack@suse.cz>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Message-Id: <10d76dd8c85017ae3cd047c9b9a32e26daefdaa2.1688393619.git.nabijaczleweli@nabijaczleweli.xyz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/splice.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/splice.c b/fs/splice.c
index 2cb89f5f9a8a..378fedb392ae 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1929,6 +1929,11 @@ long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags)
 		}
 	}
 
+	if (ret > 0) {
+		fsnotify_access(in);
+		fsnotify_modify(out);
+	}
+
 	return ret;
 }
 

From bccb5c397fbfe348e22c045f5e09c35780edb817 Mon Sep 17 00:00:00 2001
From: Luca Vizzarro <Luca.Vizzarro@arm.com>
Date: Thu, 2 Feb 2023 11:58:31 +0000
Subject: [PATCH 0066/2047] fcntl: Cast commands with int args explicitly

According to the fcntl API specification commands that expect an
integer, hence not a pointer, always take an int and not long. In
order to avoid access to undefined bits, we should explicitly cast
the argument to int.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Kevin Brodsky <Kevin.Brodsky@arm.com>
Cc: Vincenzo Frascino <Vincenzo.Frascino@arm.com>
Cc: Szabolcs Nagy <Szabolcs.Nagy@arm.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: David Laight <David.Laight@ACULAB.com>
Cc: Mark Rutland <Mark.Rutland@arm.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-morello@op-lists.linaro.org
Signed-off-by: Luca Vizzarro <Luca.Vizzarro@arm.com>
Message-Id: <20230414152459.816046-2-Luca.Vizzarro@arm.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fcntl.c         | 29 +++++++++++++++--------------
 include/linux/fs.h |  2 +-
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index b622be119706..e871009f6c88 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -34,7 +34,7 @@
 
 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
 
-static int setfl(int fd, struct file * filp, unsigned long arg)
+static int setfl(int fd, struct file * filp, unsigned int arg)
 {
 	struct inode * inode = file_inode(filp);
 	int error = 0;
@@ -112,11 +112,11 @@ void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 }
 EXPORT_SYMBOL(__f_setown);
 
-int f_setown(struct file *filp, unsigned long arg, int force)
+int f_setown(struct file *filp, int who, int force)
 {
 	enum pid_type type;
 	struct pid *pid = NULL;
-	int who = arg, ret = 0;
+	int ret = 0;
 
 	type = PIDTYPE_TGID;
 	if (who < 0) {
@@ -317,28 +317,29 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		struct file *filp)
 {
 	void __user *argp = (void __user *)arg;
+	int argi = (int)arg;
 	struct flock flock;
 	long err = -EINVAL;
 
 	switch (cmd) {
 	case F_DUPFD:
-		err = f_dupfd(arg, filp, 0);
+		err = f_dupfd(argi, filp, 0);
 		break;
 	case F_DUPFD_CLOEXEC:
-		err = f_dupfd(arg, filp, O_CLOEXEC);
+		err = f_dupfd(argi, filp, O_CLOEXEC);
 		break;
 	case F_GETFD:
 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
 		break;
 	case F_SETFD:
 		err = 0;
-		set_close_on_exec(fd, arg & FD_CLOEXEC);
+		set_close_on_exec(fd, argi & FD_CLOEXEC);
 		break;
 	case F_GETFL:
 		err = filp->f_flags;
 		break;
 	case F_SETFL:
-		err = setfl(fd, filp, arg);
+		err = setfl(fd, filp, argi);
 		break;
 #if BITS_PER_LONG != 32
 	/* 32-bit arches must use fcntl64() */
@@ -375,7 +376,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		force_successful_syscall_return();
 		break;
 	case F_SETOWN:
-		err = f_setown(filp, arg, 1);
+		err = f_setown(filp, argi, 1);
 		break;
 	case F_GETOWN_EX:
 		err = f_getown_ex(filp, arg);
@@ -391,28 +392,28 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		break;
 	case F_SETSIG:
 		/* arg == 0 restores default behaviour. */
-		if (!valid_signal(arg)) {
+		if (!valid_signal(argi)) {
 			break;
 		}
 		err = 0;
-		filp->f_owner.signum = arg;
+		filp->f_owner.signum = argi;
 		break;
 	case F_GETLEASE:
 		err = fcntl_getlease(filp);
 		break;
 	case F_SETLEASE:
-		err = fcntl_setlease(fd, filp, arg);
+		err = fcntl_setlease(fd, filp, argi);
 		break;
 	case F_NOTIFY:
-		err = fcntl_dirnotify(fd, filp, arg);
+		err = fcntl_dirnotify(fd, filp, argi);
 		break;
 	case F_SETPIPE_SZ:
 	case F_GETPIPE_SZ:
-		err = pipe_fcntl(filp, cmd, arg);
+		err = pipe_fcntl(filp, cmd, argi);
 		break;
 	case F_ADD_SEALS:
 	case F_GET_SEALS:
-		err = memfd_fcntl(filp, cmd, arg);
+		err = memfd_fcntl(filp, cmd, argi);
 		break;
 	case F_GET_RW_HINT:
 	case F_SET_RW_HINT:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6867512907d6..8e546433c781 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1069,7 +1069,7 @@ extern void fasync_free(struct fasync_struct *);
 extern void kill_fasync(struct fasync_struct **, int, int);
 
 extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
-extern int f_setown(struct file *filp, unsigned long arg, int force);
+extern int f_setown(struct file *filp, int who, int force);
 extern void f_delown(struct file *filp);
 extern pid_t f_getown(struct file *filp);
 extern int send_sigurg(struct fown_struct *fown);

From ed5f17f66ef39273dcf83ca89b2f1d52a52b22a5 Mon Sep 17 00:00:00 2001
From: Luca Vizzarro <Luca.Vizzarro@arm.com>
Date: Wed, 1 Feb 2023 15:05:33 +0000
Subject: [PATCH 0067/2047] fs: Pass argument to fcntl_setlease as int

The interface for fcntl expects the argument passed for the command
F_SETLEASE to be of type int. The current code wrongly treats it as
a long. In order to avoid access to undefined bits, we should explicitly
cast the argument to int.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Anna Schumaker <anna@kernel.org>
Cc: Kevin Brodsky <Kevin.Brodsky@arm.com>
Cc: Vincenzo Frascino <Vincenzo.Frascino@arm.com>
Cc: Szabolcs Nagy <Szabolcs.Nagy@arm.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: David Laight <David.Laight@ACULAB.com>
Cc: Mark Rutland <Mark.Rutland@arm.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-cifs@vger.kernel.org
Cc: linux-nfs@vger.kernel.org
Cc: linux-morello@op-lists.linaro.org
Signed-off-by: Luca Vizzarro <Luca.Vizzarro@arm.com>
Message-Id: <20230414152459.816046-3-Luca.Vizzarro@arm.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/libfs.c               |  2 +-
 fs/locks.c               | 20 ++++++++++----------
 fs/nfs/nfs4_fs.h         |  2 +-
 fs/nfs/nfs4file.c        |  2 +-
 fs/nfs/nfs4proc.c        |  4 ++--
 fs/smb/client/cifsfs.c   |  2 +-
 include/linux/filelock.h | 12 ++++++------
 include/linux/fs.h       |  4 ++--
 8 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index 5b851315eeed..0ea7fedb0b7a 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1269,7 +1269,7 @@ EXPORT_SYMBOL(alloc_anon_inode);
  * All arguments are ignored and it just returns -EINVAL.
  */
 int
-simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
+simple_nosetlease(struct file *filp, int arg, struct file_lock **flp,
 		  void **priv)
 {
 	return -EINVAL;
diff --git a/fs/locks.c b/fs/locks.c
index df8b26a42524..265b5190db3e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -438,7 +438,7 @@ static void flock_make_lock(struct file *filp, struct file_lock *fl, int type)
 	fl->fl_end = OFFSET_MAX;
 }
 
-static int assign_type(struct file_lock *fl, long type)
+static int assign_type(struct file_lock *fl, int type)
 {
 	switch (type) {
 	case F_RDLCK:
@@ -549,7 +549,7 @@ static const struct lock_manager_operations lease_manager_ops = {
 /*
  * Initialize a lease, use the default lock manager operations
  */
-static int lease_init(struct file *filp, long type, struct file_lock *fl)
+static int lease_init(struct file *filp, int type, struct file_lock *fl)
 {
 	if (assign_type(fl, type) != 0)
 		return -EINVAL;
@@ -567,7 +567,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)
 }
 
 /* Allocate a file_lock initialised to this type of lease */
-static struct file_lock *lease_alloc(struct file *filp, long type)
+static struct file_lock *lease_alloc(struct file *filp, int type)
 {
 	struct file_lock *fl = locks_alloc_lock();
 	int error = -ENOMEM;
@@ -1666,7 +1666,7 @@ int fcntl_getlease(struct file *filp)
  * conflict with the lease we're trying to set.
  */
 static int
-check_conflicting_open(struct file *filp, const long arg, int flags)
+check_conflicting_open(struct file *filp, const int arg, int flags)
 {
 	struct inode *inode = file_inode(filp);
 	int self_wcount = 0, self_rcount = 0;
@@ -1701,7 +1701,7 @@ check_conflicting_open(struct file *filp, const long arg, int flags)
 }
 
 static int
-generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
+generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **priv)
 {
 	struct file_lock *fl, *my_fl = NULL, *lease;
 	struct inode *inode = file_inode(filp);
@@ -1859,7 +1859,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
  *	The (input) flp->fl_lmops->lm_break function is required
  *	by break_lease().
  */
-int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
+int generic_setlease(struct file *filp, int arg, struct file_lock **flp,
 			void **priv)
 {
 	struct inode *inode = file_inode(filp);
@@ -1906,7 +1906,7 @@ lease_notifier_chain_init(void)
 }
 
 static inline void
-setlease_notifier(long arg, struct file_lock *lease)
+setlease_notifier(int arg, struct file_lock *lease)
 {
 	if (arg != F_UNLCK)
 		srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
@@ -1942,7 +1942,7 @@ EXPORT_SYMBOL_GPL(lease_unregister_notifier);
  * may be NULL if the lm_setup operation doesn't require it.
  */
 int
-vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
+vfs_setlease(struct file *filp, int arg, struct file_lock **lease, void **priv)
 {
 	if (lease)
 		setlease_notifier(arg, *lease);
@@ -1953,7 +1953,7 @@ vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 }
 EXPORT_SYMBOL_GPL(vfs_setlease);
 
-static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
+static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg)
 {
 	struct file_lock *fl;
 	struct fasync_struct *new;
@@ -1988,7 +1988,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
  *	Note that you also need to call %F_SETSIG to
  *	receive a signal when the lease is broken.
  */
-int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
 {
 	if (arg == F_UNLCK)
 		return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 4c9f8bd866ab..47c5c1f86d66 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -328,7 +328,7 @@ extern int update_open_stateid(struct nfs4_state *state,
 				const nfs4_stateid *open_stateid,
 				const nfs4_stateid *deleg_stateid,
 				fmode_t fmode);
-extern int nfs4_proc_setlease(struct file *file, long arg,
+extern int nfs4_proc_setlease(struct file *file, int arg,
 			      struct file_lock **lease, void **priv);
 extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
 		struct nfs_fsinfo *fsinfo);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 4aeadd6e1a6d..02788c3c85e5 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -438,7 +438,7 @@ void nfs42_ssc_unregister_ops(void)
 }
 #endif /* CONFIG_NFS_V4_2 */
 
-static int nfs4_setlease(struct file *file, long arg, struct file_lock **lease,
+static int nfs4_setlease(struct file *file, int arg, struct file_lock **lease,
 			 void **priv)
 {
 	return nfs4_proc_setlease(file, arg, lease, priv);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e1a886b58354..b4593fbe0114 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7573,7 +7573,7 @@ static int nfs4_delete_lease(struct file *file, void **priv)
 	return generic_setlease(file, F_UNLCK, NULL, priv);
 }
 
-static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease,
+static int nfs4_add_lease(struct file *file, int arg, struct file_lock **lease,
 			  void **priv)
 {
 	struct inode *inode = file_inode(file);
@@ -7591,7 +7591,7 @@ static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease,
 	return -EAGAIN;
 }
 
-int nfs4_proc_setlease(struct file *file, long arg, struct file_lock **lease,
+int nfs4_proc_setlease(struct file *file, int arg, struct file_lock **lease,
 		       void **priv)
 {
 	switch (arg) {
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index a4d8b0ea1c8c..6fc8f43b1c9d 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -1077,7 +1077,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
 }
 
 static int
-cifs_setlease(struct file *file, long arg, struct file_lock **lease, void **priv)
+cifs_setlease(struct file *file, int arg, struct file_lock **lease, void **priv)
 {
 	/*
 	 * Note that this is called by vfs setlease with i_lock held to
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index efcdd1631d9b..95e868e09e29 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -144,7 +144,7 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int,
 			struct flock64 *);
 #endif
 
-int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
+int fcntl_setlease(unsigned int fd, struct file *filp, int arg);
 int fcntl_getlease(struct file *filp);
 
 /* fs/locks.c */
@@ -167,8 +167,8 @@ bool vfs_inode_has_locks(struct inode *inode);
 int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
 int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
 void lease_get_mtime(struct inode *, struct timespec64 *time);
-int generic_setlease(struct file *, long, struct file_lock **, void **priv);
-int vfs_setlease(struct file *, long, struct file_lock **, void **);
+int generic_setlease(struct file *, int, struct file_lock **, void **priv);
+int vfs_setlease(struct file *, int, struct file_lock **, void **);
 int lease_modify(struct file_lock *, int, struct list_head *);
 
 struct notifier_block;
@@ -213,7 +213,7 @@ static inline int fcntl_setlk64(unsigned int fd, struct file *file,
 	return -EACCES;
 }
 #endif
-static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+static inline int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
 {
 	return -EINVAL;
 }
@@ -306,13 +306,13 @@ static inline void lease_get_mtime(struct inode *inode,
 	return;
 }
 
-static inline int generic_setlease(struct file *filp, long arg,
+static inline int generic_setlease(struct file *filp, int arg,
 				    struct file_lock **flp, void **priv)
 {
 	return -EINVAL;
 }
 
-static inline int vfs_setlease(struct file *filp, long arg,
+static inline int vfs_setlease(struct file *filp, int arg,
 			       struct file_lock **lease, void **priv)
 {
 	return -EINVAL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8e546433c781..11055d00fab8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1799,7 +1799,7 @@ struct file_operations {
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
 	void (*splice_eof)(struct file *file);
-	int (*setlease)(struct file *, long, struct file_lock **, void **);
+	int (*setlease)(struct file *, int, struct file_lock **, void **);
 	long (*fallocate)(struct file *file, int mode, loff_t offset,
 			  loff_t len);
 	void (*show_fdinfo)(struct seq_file *m, struct file *f);
@@ -2950,7 +2950,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
 extern const struct address_space_operations ram_aops;
 extern int always_delete_dentry(const struct dentry *);
 extern struct inode *alloc_anon_inode(struct super_block *);
-extern int simple_nosetlease(struct file *, long, struct file_lock **, void **);
+extern int simple_nosetlease(struct file *, int, struct file_lock **, void **);
 extern const struct dentry_operations simple_dentry_operations;
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);

From 515c5046650d8f56f93546d3867b64c1906162bc Mon Sep 17 00:00:00 2001
From: Luca Vizzarro <Luca.Vizzarro@arm.com>
Date: Wed, 1 Feb 2023 16:18:53 +0000
Subject: [PATCH 0068/2047] pipe: Pass argument of pipe_fcntl as int

The interface for fcntl expects the argument passed for the command
F_SETPIPE_SZ to be of type int. The current code wrongly treats it as
a long. In order to avoid access to undefined bits, we should explicitly
cast the argument to int.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Kevin Brodsky <Kevin.Brodsky@arm.com>
Cc: Vincenzo Frascino <Vincenzo.Frascino@arm.com>
Cc: Szabolcs Nagy <Szabolcs.Nagy@arm.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: David Laight <David.Laight@ACULAB.com>
Cc: Mark Rutland <Mark.Rutland@arm.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-morello@op-lists.linaro.org
Signed-off-by: Luca Vizzarro <Luca.Vizzarro@arm.com>
Message-Id: <20230414152459.816046-4-Luca.Vizzarro@arm.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/pipe.c                 | 6 +++---
 include/linux/pipe_fs_i.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 2d88f73f585a..a388612f0f86 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1236,7 +1236,7 @@ const struct file_operations pipefifo_fops = {
  * Currently we rely on the pipe array holding a power-of-2 number
  * of pages. Returns 0 on error.
  */
-unsigned int round_pipe_size(unsigned long size)
+unsigned int round_pipe_size(unsigned int size)
 {
 	if (size > (1U << 31))
 		return 0;
@@ -1319,7 +1319,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
  * Allocate a new array of pipe buffers and copy the info over. Returns the
  * pipe size if successful, or return -ERROR on error.
  */
-static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
+static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg)
 {
 	unsigned long user_bufs;
 	unsigned int nr_slots, size;
@@ -1387,7 +1387,7 @@ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
 	return pipe;
 }
 
-long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
 {
 	struct pipe_inode_info *pipe;
 	long ret;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 02e0086b10f6..608a9eb86bff 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -269,10 +269,10 @@ bool pipe_is_unprivileged_user(void);
 
 /* for F_SETPIPE_SZ and F_GETPIPE_SZ */
 int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots);
-long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
+long pipe_fcntl(struct file *, unsigned int, unsigned int arg);
 struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice);
 
 int create_pipe_files(struct file **, int);
-unsigned int round_pipe_size(unsigned long size);
+unsigned int round_pipe_size(unsigned int size);
 
 #endif

From f4ae4081e5a871b4c4e2f802ccf73f8d69566073 Mon Sep 17 00:00:00 2001
From: Luca Vizzarro <Luca.Vizzarro@arm.com>
Date: Wed, 15 Feb 2023 15:50:05 +0000
Subject: [PATCH 0069/2047] dnotify: Pass argument of fcntl_dirnotify as int

The interface for fcntl expects the argument passed for the command
F_DIRNOTIFY to be of type int. The current code wrongly treats it as
a long. In order to avoid access to undefined bits, we should explicitly
cast the argument to int.

Cc: Jan Kara <jack@suse.cz>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Kevin Brodsky <Kevin.Brodsky@arm.com>
Cc: Vincenzo Frascino <Vincenzo.Frascino@arm.com>
Cc: Szabolcs Nagy <Szabolcs.Nagy@arm.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: David Laight <David.Laight@ACULAB.com>
Cc: Mark Rutland <Mark.Rutland@arm.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-morello@op-lists.linaro.org
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Luca Vizzarro <Luca.Vizzarro@arm.com>
Message-Id: <20230414152459.816046-6-Luca.Vizzarro@arm.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/notify/dnotify/dnotify.c | 4 ++--
 include/linux/dnotify.h     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 190aa717fa32..ebdcc25df0f7 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -199,7 +199,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 }
 
 /* this conversion is done only at watch creation */
-static __u32 convert_arg(unsigned long arg)
+static __u32 convert_arg(unsigned int arg)
 {
 	__u32 new_mask = FS_EVENT_ON_CHILD;
 
@@ -258,7 +258,7 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
  * up here.  Allocate both a mark for fsnotify to add and a dnotify_struct to be
  * attached to the fsnotify_mark.
  */
-int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
 {
 	struct dnotify_mark *new_dn_mark, *dn_mark;
 	struct fsnotify_mark *new_fsn_mark, *fsn_mark;
diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index b1d26f9f1c9f..9f183a679277 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -30,7 +30,7 @@ struct dnotify_struct {
 			    FS_MOVED_FROM | FS_MOVED_TO)
 
 extern void dnotify_flush(struct file *, fl_owner_t);
-extern int fcntl_dirnotify(int, struct file *, unsigned long);
+extern int fcntl_dirnotify(int, struct file *, unsigned int);
 
 #else
 
@@ -38,7 +38,7 @@ static inline void dnotify_flush(struct file *filp, fl_owner_t id)
 {
 }
 
-static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
 {
 	return -EINVAL;
 }

From 7c94858ec17dae9071fa3bdf6b005967b9f146f4 Mon Sep 17 00:00:00 2001
From: Philip Prindeville <philipp@redfish-solutions.com>
Date: Wed, 12 Apr 2023 10:18:20 -0600
Subject: [PATCH 0070/2047] ACPICA: Fix GCC 12 dangling-pointer warning

ACPICA commit aea0a5cfce262ce2ab16fd96d87c12cf5e756380

We're storing a persistent pointer to an ephemeral local variable
which technically is a dangling pointer and the compiler is correct.
However, since we never indirect the pointer, this is a safe
operation and we can suppress the warning.

Also, some C run-times (like MUSL) aren't including <stdint.h>
indirectly so we must include it explicitly or we won't have the
type definition for uintptr_t.

Fixes issue #867.

Link: https://github.com/acpica/acpica/commit/aea0a5cf
Signed-off-by: Philip Prindeville <philipp@redfish-solutions.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/utdebug.c   | 5 +++++
 include/acpi/platform/aclinux.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index 1bbba8585fa6..c5f6c85a3a09 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -37,7 +37,12 @@ void acpi_ut_init_stack_ptr_trace(void)
 {
 	acpi_size current_sp;
 
+#pragma GCC diagnostic push
+#if defined(__GNUC__) && __GNUC__ >= 12
+#pragma GCC diagnostic ignored "-Wdangling-pointer="
+#endif
 	acpi_gbl_entry_stack_pointer = &current_sp;
+#pragma GCC diagnostic pop
 }
 
 /*******************************************************************************
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 1ca450e35c0d..565341c826e3 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -182,6 +182,7 @@
 #ifdef ACPI_USE_STANDARD_HEADERS
 #include <stddef.h>
 #include <unistd.h>
+#include <stdint.h>
 
 #define ACPI_OFFSET(d, f)   offsetof(d, f)
 #endif

From 4f1094baeb14ef150b964eb1537ea71099ff7da0 Mon Sep 17 00:00:00 2001
From: George Guo <guodongtai@kylinos.cn>
Date: Thu, 20 Apr 2023 10:50:46 +0800
Subject: [PATCH 0071/2047] ACPICA: Modify ACPI_STATE_COMMON

ACPICA commit 0a43d3521582b5234f69e8bb535e83325387525a

Avoid trailing semicolons in macro, and it's not readable to put macro ACPI_STATE_COMMON and other
variables in the same line.
So modify the macro and just put it in a single line.

Link: https://github.com/acpica/acpica/commit/0a43d352
Signed-off-by: George Guo <guodongtai@kylinos.cn>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/aclocal.h | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 12d4a024f029..75a81b6c1a16 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -560,25 +560,28 @@ struct acpi_field_info {
 	u8                              descriptor_type; /* To differentiate various internal objs */\
 	u8                              flags; \
 	u16                             value; \
-	u16                             state;
+	u16                             state
 
 	/* There are 2 bytes available here until the next natural alignment boundary */
 
 struct acpi_common_state {
-ACPI_STATE_COMMON};
+	ACPI_STATE_COMMON;
+};
 
 /*
  * Update state - used to traverse complex objects such as packages
  */
 struct acpi_update_state {
-	ACPI_STATE_COMMON union acpi_operand_object *object;
+	ACPI_STATE_COMMON;
+	union acpi_operand_object *object;
 };
 
 /*
  * Pkg state - used to traverse nested package structures
  */
 struct acpi_pkg_state {
-	ACPI_STATE_COMMON u32 index;
+	ACPI_STATE_COMMON;
+	u32 index;
 	union acpi_operand_object *source_object;
 	union acpi_operand_object *dest_object;
 	struct acpi_walk_state *walk_state;
@@ -591,7 +594,8 @@ struct acpi_pkg_state {
  * Allows nesting of these constructs
  */
 struct acpi_control_state {
-	ACPI_STATE_COMMON u16 opcode;
+	ACPI_STATE_COMMON;
+	u16 opcode;
 	union acpi_parse_object *predicate_op;
 	u8 *aml_predicate_start;	/* Start of if/while predicate */
 	u8 *package_end;	/* End of if/while block */
@@ -602,11 +606,13 @@ struct acpi_control_state {
  * Scope state - current scope during namespace lookups
  */
 struct acpi_scope_state {
-	ACPI_STATE_COMMON struct acpi_namespace_node *node;
+	ACPI_STATE_COMMON;
+	struct acpi_namespace_node *node;
 };
 
 struct acpi_pscope_state {
-	ACPI_STATE_COMMON u32 arg_count;	/* Number of fixed arguments */
+	ACPI_STATE_COMMON;
+	u32 arg_count;		/* Number of fixed arguments */
 	union acpi_parse_object *op;	/* Current op being parsed */
 	u8 *arg_end;		/* Current argument end */
 	u8 *pkg_end;		/* Current package end */
@@ -618,7 +624,8 @@ struct acpi_pscope_state {
  * states are created when there are nested control methods executing.
  */
 struct acpi_thread_state {
-	ACPI_STATE_COMMON u8 current_sync_level;	/* Mutex Sync (nested acquire) level */
+	ACPI_STATE_COMMON;
+	u8 current_sync_level;	/* Mutex Sync (nested acquire) level */
 	struct acpi_walk_state *walk_state_list;	/* Head of list of walk_states for this thread */
 	union acpi_operand_object *acquired_mutex_list;	/* List of all currently acquired mutexes */
 	acpi_thread_id thread_id;	/* Running thread ID */
@@ -629,8 +636,8 @@ struct acpi_thread_state {
  * AML arguments
  */
 struct acpi_result_values {
-	ACPI_STATE_COMMON
-	    union acpi_operand_object *obj_desc[ACPI_RESULTS_FRAME_OBJ_NUM];
+	ACPI_STATE_COMMON;
+	union acpi_operand_object *obj_desc[ACPI_RESULTS_FRAME_OBJ_NUM];
 };
 
 typedef
@@ -652,7 +659,8 @@ struct acpi_global_notify_handler {
  * handler/dispatcher.
  */
 struct acpi_notify_info {
-	ACPI_STATE_COMMON u8 handler_list_id;
+	ACPI_STATE_COMMON;
+	u8 handler_list_id;
 	struct acpi_namespace_node *node;
 	union acpi_operand_object *handler_list_head;
 	struct acpi_global_notify_handler *global;

From bac36a179a2b4163fa524de806afb04d639b6781 Mon Sep 17 00:00:00 2001
From: Jiangshan Yi <yijiangshan@kylinos.cn>
Date: Thu, 20 Apr 2023 15:42:47 +0800
Subject: [PATCH 0072/2047] ACPICA: exserial.c: replace ternary operator with
 ACPI_MIN()

ACPICA commit 2250f71fe77396db21df805222ffcbf19e1e7896

Make the code simpler and more readable.

Link: https://github.com/acpica/acpica/commit/2250f71f
Signed-off-by: Jiangshan Yi <yijiangshan@kylinos.cn>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/exserial.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/acpi/acpica/exserial.c b/drivers/acpi/acpica/exserial.c
index 5d99b1a76c83..5241f4c01c76 100644
--- a/drivers/acpi/acpica/exserial.c
+++ b/drivers/acpi/acpica/exserial.c
@@ -343,8 +343,7 @@ acpi_ex_write_serial_bus(union acpi_operand_object *source_desc,
 	/* Copy the input buffer data to the transfer buffer */
 
 	buffer = buffer_desc->buffer.pointer;
-	data_length = (buffer_length < source_desc->buffer.length ?
-		       buffer_length : source_desc->buffer.length);
+	data_length = ACPI_MIN(buffer_length, source_desc->buffer.length);
 	memcpy(buffer, source_desc->buffer.pointer, data_length);
 
 	/* Lock entire transaction if requested */

From ed9aa118e4e1d5676da7047905c2080edadff65c Mon Sep 17 00:00:00 2001
From: Saket Dumbre <saket.dumbre@intel.com>
Date: Tue, 25 Apr 2023 10:52:21 -0700
Subject: [PATCH 0073/2047] ACPICA: Add support for _DSC as per ACPI 6.5

ACPICA commit a597e3b247df72aec0f6e056c95abe2d973ac10c

Link: https://github.com/acpica/acpica/commit/a597e3b2
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/acpredef.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index e64aabe3d33a..2e442f5a3123 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -440,6 +440,9 @@ const union acpi_predefined_info acpi_gbl_predefined_methods[] = {
 	{{"_DOS", METHOD_1ARGS(ACPI_TYPE_INTEGER),
 	  METHOD_NO_RETURN_VALUE}},
 
+	{{"_DSC", METHOD_0ARGS,
+	  METHOD_RETURNS(ACPI_RTYPE_INTEGER)}},
+
 	{{"_DSD", METHOD_0ARGS,	/* ACPI 6.0 */
 	  METHOD_RETURNS(ACPI_RTYPE_PACKAGE)}},	/* Variable-length (Pkgs) each: 1 Buf, 1 Pkg */
 	PACKAGE_INFO(ACPI_PTYPE2_UUID_PAIR, ACPI_RTYPE_BUFFER, 1,

From 9558c2a2425077d5c8c761c16a7bfad867a24f65 Mon Sep 17 00:00:00 2001
From: "Najumon B.A" <najumon.ba@intel.com>
Date: Thu, 27 Apr 2023 07:02:10 +0530
Subject: [PATCH 0074/2047] ACPICA: fix for conflict macro definition on zephyr
 interface

ACPICA commit c71a12dfc66593fa9730c62a519161c4a7fca9f6

remove SEEK_SET/SEEK_END duplicate macro on zephyr header which through
 error while run zephyr CI jobs.

Link: https://github.com/acpica/acpica/commit/c71a12df
Signed-off-by: Najumon B.A <najumon.ba@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/platform/aczephyr.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/acpi/platform/aczephyr.h b/include/acpi/platform/aczephyr.h
index 2f0d30c3c5fd..703db4dc740d 100644
--- a/include/acpi/platform/aczephyr.h
+++ b/include/acpi/platform/aczephyr.h
@@ -10,9 +10,6 @@
 #ifndef __ACZEPHYR_H__
 #define __ACZEPHYR_H__
 
-#define SEEK_SET FS_SEEK_SET
-#define SEEK_END FS_SEEK_END
-
 #define ACPI_MACHINE_WIDTH      64
 
 #define ACPI_NO_ERROR_MESSAGES

From 24a4b8724536ad7438fe94e8fd269be97e23d0d1 Mon Sep 17 00:00:00 2001
From: Jose Marinho <jose.marinho@arm.com>
Date: Tue, 28 Mar 2023 10:50:46 +0100
Subject: [PATCH 0075/2047] ACPICA: Detect GED device and keep track of _EVT

ACPICA commit dc6fd1d12903015726a8a6f87f63e86141576a68

The GED device is described by a _HID of ACPI0013.
This code traverses the namespace identifying all GED devices.
For each GED device in the namespace we record 1) the Interrupt objects
and the _EVT method.

This information is used when an interrupt is simulate.

Link: https://github.com/acpica/acpica/commit/dc6fd1d1
Signed-off-by: Jose Marinho <jose.marinho@arm.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/acglobal.h | 1 +
 drivers/acpi/acpica/aclocal.h  | 8 ++++++++
 include/acpi/acnames.h         | 1 +
 3 files changed, 10 insertions(+)

diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 778241173ed4..f4c90fc99be2 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -129,6 +129,7 @@ ACPI_GLOBAL(acpi_table_handler, acpi_gbl_table_handler);
 ACPI_GLOBAL(void *, acpi_gbl_table_handler_context);
 ACPI_GLOBAL(acpi_interface_handler, acpi_gbl_interface_handler);
 ACPI_GLOBAL(struct acpi_sci_handler_info *, acpi_gbl_sci_handler_list);
+ACPI_GLOBAL(struct acpi_ged_handler_info *, acpi_gbl_ged_handler_list);
 
 /* Owner ID support */
 
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 75a81b6c1a16..82563b44af35 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -543,6 +543,14 @@ struct acpi_field_info {
 	u32 pkg_length;
 };
 
+/* Information about the interrupt ID and _EVT of a GED device */
+
+struct acpi_ged_handler_info {
+	struct acpi_ged_handler_info *next;
+	u32 int_id;		/* The interrupt ID that triggers the execution ofthe evt_method. */
+	struct acpi_namespace_node *evt_method;	/* The _EVT method to be executed when an interrupt with ID = int_ID is received */
+};
+
 /*****************************************************************************
  *
  * Generic "state" object for stacks
diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h
index d71291f25a80..76aa6aa346ba 100644
--- a/include/acpi/acnames.h
+++ b/include/acpi/acnames.h
@@ -22,6 +22,7 @@
 #define METHOD_NAME__DDN        "_DDN"
 #define METHOD_NAME__DIS        "_DIS"
 #define METHOD_NAME__DMA        "_DMA"
+#define METHOD_NAME__EVT        "_EVT"
 #define METHOD_NAME__HID        "_HID"
 #define METHOD_NAME__INI        "_INI"
 #define METHOD_NAME__PLD        "_PLD"

From b0f2e7d7e6147f09879d7a631eceb03e1a744753 Mon Sep 17 00:00:00 2001
From: Jose Marinho <jose.marinho@arm.com>
Date: Sun, 26 Mar 2023 20:06:00 +0100
Subject: [PATCH 0076/2047] ACPICA: Add interrupt command to acpiexec

ACPICA commit ef7cf185a046d76119b631f16e7c991543c80edc

This commit add the Interrupt command to acpiexec.

The Interrupt command simulates an interrupt with a int_ID (GSIV)
equal to the first argument of the call.

The acpiexec code simulates the behaviour by OSPM: execute the _EVT
method of the GED device associated with that int_ID.

Link: https://github.com/acpica/acpica/commit/ef7cf185
Signed-off-by: Jose Marinho <jose.marinho@arm.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/acdebug.h |  2 ++
 drivers/acpi/acpica/dbcmds.c  | 58 +++++++++++++++++++++++++++++++++++
 drivers/acpi/acpica/dbinput.c |  8 +++++
 3 files changed, 68 insertions(+)

diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h
index 22f1f7a9e5a3..911875c5a5f1 100644
--- a/drivers/acpi/acpica/acdebug.h
+++ b/drivers/acpi/acpica/acdebug.h
@@ -287,4 +287,6 @@ struct acpi_namespace_node *acpi_db_local_ns_lookup(char *name);
 
 void acpi_db_uint32_to_hex_string(u32 value, char *buffer);
 
+void acpi_db_generate_interrupt(char *gsiv_arg);
+
 #endif				/* __ACDEBUG_H__ */
diff --git a/drivers/acpi/acpica/dbcmds.c b/drivers/acpi/acpica/dbcmds.c
index 9eb68e0751c7..3d99a9048585 100644
--- a/drivers/acpi/acpica/dbcmds.c
+++ b/drivers/acpi/acpica/dbcmds.c
@@ -1010,6 +1010,64 @@ void acpi_db_display_resources(char *object_arg)
 	acpi_db_set_output_destination(ACPI_DB_CONSOLE_OUTPUT);
 }
 
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_db_generate_ged
+ *
+ * PARAMETERS:  ged_arg             - Raw GED number, ascii string
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Simulate firing of a GED
+ *
+ ******************************************************************************/
+
+void acpi_db_generate_interrupt(char *gsiv_arg)
+{
+	u32 gsiv_number;
+	struct acpi_ged_handler_info *ged_info = acpi_gbl_ged_handler_list;
+
+	if (!ged_info) {
+		acpi_os_printf("No GED handling present\n");
+	}
+
+	gsiv_number = strtoul(gsiv_arg, NULL, 0);
+
+	while (ged_info) {
+
+		if (ged_info->int_id == gsiv_number) {
+			struct acpi_object_list arg_list;
+			union acpi_object arg0;
+			acpi_handle evt_handle = ged_info->evt_method;
+			acpi_status status;
+
+			acpi_os_printf("Evaluate GED _EVT (GSIV=%d)\n",
+				       gsiv_number);
+
+			if (!evt_handle) {
+				acpi_os_printf("Undefined _EVT method\n");
+				return;
+			}
+
+			arg0.integer.type = ACPI_TYPE_INTEGER;
+			arg0.integer.value = gsiv_number;
+
+			arg_list.count = 1;
+			arg_list.pointer = &arg0;
+
+			status =
+			    acpi_evaluate_object(evt_handle, NULL, &arg_list,
+						 NULL);
+			if (ACPI_FAILURE(status)) {
+				acpi_os_printf("Could not evaluate _EVT\n");
+				return;
+			}
+
+		}
+		ged_info = ged_info->next;
+	}
+}
+
 #if (!ACPI_REDUCED_HARDWARE)
 /*******************************************************************************
  *
diff --git a/drivers/acpi/acpica/dbinput.c b/drivers/acpi/acpica/dbinput.c
index b8a48923064f..861b12c334ab 100644
--- a/drivers/acpi/acpica/dbinput.c
+++ b/drivers/acpi/acpica/dbinput.c
@@ -106,6 +106,7 @@ enum acpi_ex_debugger_commands {
 	CMD_THREADS,
 
 	CMD_TEST,
+	CMD_INTERRUPT,
 #endif
 };
 
@@ -185,6 +186,7 @@ static const struct acpi_db_command_info acpi_gbl_db_commands[] = {
 	{"THREADS", 3},
 
 	{"TEST", 1},
+	{"INTERRUPT", 1},
 #endif
 	{NULL, 0}
 };
@@ -318,6 +320,7 @@ static const struct acpi_db_command_help acpi_gbl_db_command_help[] = {
 	{1, "  Gpes", "Display info on all GPE devices\n"},
 	{1, "  Sci", "Generate an SCI\n"},
 	{1, "  Sleep [SleepState]", "Simulate sleep/wake sequence(s) (0-5)\n"},
+	{1, "  Interrupt <GSIV>", "Simulate an interrupt\n"},
 #endif
 	{0, NULL, NULL}
 };
@@ -1064,6 +1067,11 @@ acpi_db_command_dispatch(char *input_buffer,
 		acpi_os_printf("Event command not implemented\n");
 		break;
 
+	case CMD_INTERRUPT:
+
+		acpi_db_generate_interrupt(acpi_gbl_db_args[1]);
+		break;
+
 	case CMD_GPE:
 
 		acpi_db_generate_gpe(acpi_gbl_db_args[1], acpi_gbl_db_args[2]);

From 6399878f4cb4cc7440965dbd679ee650c59b19e3 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 15 May 2023 09:07:19 -0700
Subject: [PATCH 0077/2047] ACPICA: Fix misspelled CDAT DSMAS define

ACPICA commit 32a50922b66a9e288b9a9b4740de86a542668a43

ACPI_CEDT_DSMAS_NON_VOLATILE -> ACPI_CDAT_DSMAS_NON_VOLATILE

Link: https://github.com/acpica/acpica/commit/32a50922
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl1.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 58b0490a2ad1..8d5572ad48cb 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -402,7 +402,7 @@ struct acpi_cdat_dsmas {
 
 /* Flags for subtable above */
 
-#define ACPI_CEDT_DSMAS_NON_VOLATILE        (1 << 2)
+#define ACPI_CDAT_DSMAS_NON_VOLATILE        (1 << 2)
 
 /* Subtable 1: Device scoped Latency and Bandwidth Information Structure (DSLBIS) */
 

From 1e8d007a6adb6c64e9ee9c809d6f9c11d3ed0c70 Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <679652+tititiou36@users.noreply.github.com>
Date: Mon, 29 May 2023 17:09:43 +0200
Subject: [PATCH 0078/2047] ACPICA: Slightly simplify an error message in
 acpi_ds_result_push()

ACPICA commit 3a9dbc5cb1573b87a16b50918977ab9e53e24408

'object' is known to be NULL at this point. There is little value to log
it twice in the error message.

Link: https://github.com/acpica/acpica/commit/3a9dbc5c
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/dswstate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/acpica/dswstate.c b/drivers/acpi/acpica/dswstate.c
index d3841ded3a81..75338a13c802 100644
--- a/drivers/acpi/acpica/dswstate.c
+++ b/drivers/acpi/acpica/dswstate.c
@@ -146,8 +146,8 @@ acpi_ds_result_push(union acpi_operand_object *object,
 
 	if (!object) {
 		ACPI_ERROR((AE_INFO,
-			    "Null Object! Obj=%p State=%p Num=%u",
-			    object, walk_state, walk_state->result_count));
+			    "Null Object! State=%p Num=%u",
+			    walk_state, walk_state->result_count));
 		return (AE_BAD_PARAMETER);
 	}
 

From 25eea70724b441f124f015163982a914bcb70b4f Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 15 Jun 2023 11:11:12 -0700
Subject: [PATCH 0079/2047] ACPICA: Add a define for size of struct
 acpi_srat_generic_affinity device_handle

ACPICA commit be56820b03d8aeabfa6709c4d99bf1711afe7ef1

Replace magic number with a define. Linux kernel code will utilize this
define.

Link: https://github.com/acpica/acpica/commit/be56820b
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl3.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h
index 000764ab3985..c080d579a546 100644
--- a/include/acpi/actbl3.h
+++ b/include/acpi/actbl3.h
@@ -279,12 +279,14 @@ struct acpi_srat_gic_its_affinity {
  * 6: ACPI_SRAT_TYPE_GENERIC_PORT_AFFINITY
  */
 
+#define ACPI_SRAT_DEVICE_HANDLE_SIZE	16
+
 struct acpi_srat_generic_affinity {
 	struct acpi_subtable_header header;
 	u8 reserved;
 	u8 device_handle_type;
 	u32 proximity_domain;
-	u8 device_handle[16];
+	u8 device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE];
 	u32 flags;
 	u32 reserved1;
 };

From 3a21ffdbc825e0919db9da0e27ee5ff2cc8a863e Mon Sep 17 00:00:00 2001
From: Abhishek Mainkar <abmainkar@nvidia.com>
Date: Mon, 26 Jun 2023 22:26:06 +0000
Subject: [PATCH 0080/2047] ACPICA: Add AML_NO_OPERAND_RESOLVE flag to Timer

ACPICA commit 90310989a0790032f5a0140741ff09b545af4bc5

According to the ACPI specification 19.6.134, no argument is required to be passed for ASL Timer instruction. For taking care of no argument, AML_NO_OPERAND_RESOLVE flag is added to ASL Timer instruction opcode.

When ASL timer instruction interpreted by ACPI interpreter, getting error. After adding AML_NO_OPERAND_RESOLVE flag to ASL Timer instruction opcode, issue is not observed.

=============================================================
UBSAN: array-index-out-of-bounds in acpica/dswexec.c:401:12 index -1 is out of range for type 'union acpi_operand_object *[9]'
CPU: 37 PID: 1678 Comm: cat Not tainted
6.0.0-dev-th500-6.0.y-1+bcf8c46459e407-generic-64k
HW name: NVIDIA BIOS v1.1.1-d7acbfc-dirty 12/19/2022 Call trace:
 dump_backtrace+0xe0/0x130
 show_stack+0x20/0x60
 dump_stack_lvl+0x68/0x84
 dump_stack+0x18/0x34
 ubsan_epilogue+0x10/0x50
 __ubsan_handle_out_of_bounds+0x80/0x90
 acpi_ds_exec_end_op+0x1bc/0x6d8
 acpi_ps_parse_loop+0x57c/0x618
 acpi_ps_parse_aml+0x1e0/0x4b4
 acpi_ps_execute_method+0x24c/0x2b8
 acpi_ns_evaluate+0x3a8/0x4bc
 acpi_evaluate_object+0x15c/0x37c
 acpi_evaluate_integer+0x54/0x15c
 show_power+0x8c/0x12c [acpi_power_meter]

Link: https://github.com/acpica/acpica/commit/90310989
Signed-off-by: Abhishek Mainkar <abmainkar@nvidia.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/psopcode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index 09029fe545f1..39e31030e5f4 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -603,7 +603,7 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
 
 /* 7E */ ACPI_OP("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY,
 			 AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R,
-			 AML_FLAGS_EXEC_0A_0T_1R),
+			 AML_FLAGS_EXEC_0A_0T_1R | AML_NO_OPERAND_RESOLVE),
 
 /* ACPI 5.0 opcodes */
 

From f3b19adef6c63c2b0778fe5918f786248a9ff897 Mon Sep 17 00:00:00 2001
From: Sunil V L <sunilvl@ventanamicro.com>
Date: Sat, 15 Apr 2023 19:19:16 +0530
Subject: [PATCH 0081/2047] ACPICA: MADT: Add RISC-V external interrupt
 controllers

ACPICA commit 8c048cee4ea7b9ded8db3e1b3b9c14e21e084a2c

This adds 3 different external interrupt controller
definitions in MADT for RISC-V.

 1) RISC-V PLIC is a platform interrupt controller for
    handling wired interrupt in a RISC-V systems.

 2) RISC-V IMSIC is MSI interrupt controller to
    support MSI interrupts.

 3) RISC-V APLIC has dual functionality. First it can
    act like PLIC and direct all wired interrupts to
    the CPU which doesn't have MSI controller. Second,
    when the CPU has MSI controller (IMSIC), it will
    act as a converter from wired interrupts to MSI.

Update the existing RINTC structure also to support
these external interrupt controllers.

This codefirst ECR is approved by UEFI forum and will
be part of next ACPI spec version.

Link: https://github.com/acpica/acpica/commit/8c048cee
Signed-off-by: Haibo, Xu <haibo1.xu@intel.com>
Co-developed-by: Haibo, Xu <haibo1.xu@intel.com>
Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 50 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 0029336775a9..280ab4c7f77a 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -893,7 +893,10 @@ enum acpi_madt_type {
 	ACPI_MADT_TYPE_BIO_PIC = 22,
 	ACPI_MADT_TYPE_LPC_PIC = 23,
 	ACPI_MADT_TYPE_RINTC = 24,
-	ACPI_MADT_TYPE_RESERVED = 25,	/* 25 to 0x7F are reserved */
+	ACPI_MADT_TYPE_IMSIC = 25,
+	ACPI_MADT_TYPE_APLIC = 26,
+	ACPI_MADT_TYPE_PLIC = 27,
+	ACPI_MADT_TYPE_RESERVED = 28,	/* 28 to 0x7F are reserved */
 	ACPI_MADT_TYPE_OEM_RESERVED = 0x80	/* 0x80 to 0xFF are reserved for OEM use */
 };
 
@@ -1261,6 +1264,9 @@ struct acpi_madt_rintc {
 	u32 flags;
 	u64 hart_id;
 	u32 uid;		/* ACPI processor UID */
+	u32 ext_intc_id;	/* External INTC Id */
+	u64 imsic_addr;		/* IMSIC base address */
+	u32 imsic_size;		/* IMSIC size */
 };
 
 /* Values for RISC-V INTC Version field above */
@@ -1271,6 +1277,48 @@ enum acpi_madt_rintc_version {
 	ACPI_MADT_RINTC_VERSION_RESERVED = 2	/* 2 and greater are reserved */
 };
 
+/* 25: RISC-V IMSIC */
+struct acpi_madt_imsic {
+	struct acpi_subtable_header header;
+	u8 version;
+	u8 reserved;
+	u32 flags;
+	u16 num_ids;
+	u16 num_guest_ids;
+	u8 guest_index_bits;
+	u8 hart_index_bits;
+	u8 group_index_bits;
+	u8 group_index_shift;
+};
+
+/* 26: RISC-V APLIC */
+struct acpi_madt_aplic {
+	struct acpi_subtable_header header;
+	u8 version;
+	u8 id;
+	u32 flags;
+	u8 hw_id[8];
+	u16 num_idcs;
+	u16 num_sources;
+	u32 gsi_base;
+	u64 base_addr;
+	u32 size;
+};
+
+/* 27: RISC-V PLIC */
+struct acpi_madt_plic {
+	struct acpi_subtable_header header;
+	u8 version;
+	u8 id;
+	u8 hw_id[8];
+	u16 num_irqs;
+	u16 max_prio;
+	u32 flags;
+	u32 size;
+	u64 base_addr;
+	u32 gsi_base;
+};
+
 /* 80: OEM data */
 
 struct acpi_madt_oem_data {

From fe85f8ff2fc99897c62e3d327f853742231647fb Mon Sep 17 00:00:00 2001
From: Sunil V L <sunilvl@ventanamicro.com>
Date: Sat, 15 Apr 2023 20:17:54 +0530
Subject: [PATCH 0082/2047] ACPICA: RHCT: Add flags, CMO and MMU nodes

ACPICA commit 2eded5a6a13d892b7dc3be6096e7b1e8d4407600

Update RHCT table with below details.

 1) Add additional structure to describe the Cache Management
    Operation (CMO) related information.

 2) Add structure to describe MMU type.

 3) Convert the current reserved field to flags and define
    a flag to indicate timer capability.

This codefirst ECR is approved by UEFI forum and will
be part of next ACPI spec version.

Link: https://github.com/acpica/acpica/commit/2eded5a6
Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 280ab4c7f77a..3751ae69432f 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -2778,12 +2778,15 @@ enum acpi_rgrt_image_type {
 
 struct acpi_table_rhct {
 	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 reserved;
+	u32 flags;		/* RHCT flags */
 	u64 time_base_freq;
 	u32 node_count;
 	u32 node_offset;
 };
 
+/* RHCT Flags */
+
+#define ACPI_RHCT_TIMER_CANNOT_WAKEUP_CPU       (1)
 /*
  * RHCT subtables
  */
@@ -2797,6 +2800,9 @@ struct acpi_rhct_node_header {
 
 enum acpi_rhct_node_type {
 	ACPI_RHCT_NODE_TYPE_ISA_STRING = 0x0000,
+	ACPI_RHCT_NODE_TYPE_CMO = 0x0001,
+	ACPI_RHCT_NODE_TYPE_MMU = 0x0002,
+	ACPI_RHCT_NODE_TYPE_RESERVED = 0x0003,
 	ACPI_RHCT_NODE_TYPE_HART_INFO = 0xFFFF,
 };
 
@@ -2810,6 +2816,24 @@ struct acpi_rhct_isa_string {
 	char isa[];
 };
 
+struct acpi_rhct_cmo_node {
+	u8 reserved;		/* Must be zero */
+	u8 cbom_size;		/* CBOM size in powerof 2 */
+	u8 cbop_size;		/* CBOP size in powerof 2 */
+	u8 cboz_size;		/* CBOZ size in powerof 2 */
+};
+
+struct acpi_rhct_mmu_node {
+	u8 reserved;		/* Must be zero */
+	u8 mmu_type;		/* Virtual Address Scheme */
+};
+
+enum acpi_rhct_mmu_type {
+	ACPI_RHCT_MMU_TYPE_SV39 = 0,
+	ACPI_RHCT_MMU_TYPE_SV48 = 1,
+	ACPI_RHCT_MMU_TYPE_SV57 = 2
+};
+
 /* Hart Info node structure */
 struct acpi_rhct_hart_info {
 	u16 num_offsets;

From f3b091a9f76b95292b81df5b18f92806a4de8f84 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 28 Jun 2023 14:11:26 -0700
Subject: [PATCH 0083/2047] ACPICA: Update version to 20230628

ACPICA commit f16a0b4d0f0edd7b78a332fcf507be2187fac21e

Version 20230628.

Link: https://github.com/acpica/acpica/commit/f16a0b4d
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 9ffdc0425bc2..0c1b69ad95d0 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20230331
+#define ACPI_CA_VERSION                 0x20230628
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>

From fbab5b2c09060e8034fee6ec2df69a62594fb7db Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 Jul 2023 18:49:18 +0300
Subject: [PATCH 0084/2047] spi: Remove unneeded OF node NULL checks

In the couple of places the NULL check of OF node is implied by the call
that takes it as a parameter. Drop the respective duplicate checks.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230710154932.68377-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 9291b2a0e887..8f3282a71c63 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2399,9 +2399,6 @@ static void of_register_spi_devices(struct spi_controller *ctlr)
 	struct spi_device *spi;
 	struct device_node *nc;
 
-	if (!ctlr->dev.of_node)
-		return;
-
 	for_each_available_child_of_node(ctlr->dev.of_node, nc) {
 		if (of_node_test_and_set_flag(nc, OF_POPULATED))
 			continue;
@@ -3134,7 +3131,7 @@ int spi_register_controller(struct spi_controller *ctlr)
 		if (WARN(id < 0, "couldn't get idr"))
 			return id == -ENOSPC ? -EBUSY : id;
 		ctlr->bus_num = id;
-	} else if (ctlr->dev.of_node) {
+	} else {
 		/* Allocate dynamic bus number using Linux idr */
 		id = of_alias_get_id(ctlr->dev.of_node, "spi");
 		if (id >= 0) {

From 440c47331bdb889e24128c75387c695ca81d9b9b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 Jul 2023 18:49:19 +0300
Subject: [PATCH 0085/2047] spi: Drop duplicate IDR allocation code in
 spi_register_controller()

Refactor spi_register_controller() to drop duplicate IDR allocation.
Instead of if-else-if branching use two sequential if:s, which allows
to re-use the logic of IDR allocation in all cases.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230710154932.68377-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 50 ++++++++++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 27 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8f3282a71c63..6d74218cf38e 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3081,6 +3081,20 @@ static int spi_controller_check_ops(struct spi_controller *ctlr)
 	return 0;
 }
 
+/* Allocate dynamic bus number using Linux idr */
+static int spi_controller_id_alloc(struct spi_controller *ctlr, int start, int end)
+{
+	int id;
+
+	mutex_lock(&board_lock);
+	id = idr_alloc(&spi_master_idr, ctlr, start, end, GFP_KERNEL);
+	mutex_unlock(&board_lock);
+	if (WARN(id < 0, "couldn't get idr"))
+		return id == -ENOSPC ? -EBUSY : id;
+	ctlr->bus_num = id;
+	return 0;
+}
+
 /**
  * spi_register_controller - register SPI master or slave controller
  * @ctlr: initialized master, originally from spi_alloc_master() or
@@ -3108,8 +3122,8 @@ int spi_register_controller(struct spi_controller *ctlr)
 {
 	struct device		*dev = ctlr->dev.parent;
 	struct boardinfo	*bi;
+	int			first_dynamic;
 	int			status;
-	int			id, first_dynamic;
 
 	if (!dev)
 		return -ENODEV;
@@ -3122,27 +3136,13 @@ int spi_register_controller(struct spi_controller *ctlr)
 	if (status)
 		return status;
 
+	if (ctlr->bus_num < 0)
+		ctlr->bus_num = of_alias_get_id(ctlr->dev.of_node, "spi");
 	if (ctlr->bus_num >= 0) {
 		/* Devices with a fixed bus num must check-in with the num */
-		mutex_lock(&board_lock);
-		id = idr_alloc(&spi_master_idr, ctlr, ctlr->bus_num,
-			ctlr->bus_num + 1, GFP_KERNEL);
-		mutex_unlock(&board_lock);
-		if (WARN(id < 0, "couldn't get idr"))
-			return id == -ENOSPC ? -EBUSY : id;
-		ctlr->bus_num = id;
-	} else {
-		/* Allocate dynamic bus number using Linux idr */
-		id = of_alias_get_id(ctlr->dev.of_node, "spi");
-		if (id >= 0) {
-			ctlr->bus_num = id;
-			mutex_lock(&board_lock);
-			id = idr_alloc(&spi_master_idr, ctlr, ctlr->bus_num,
-				       ctlr->bus_num + 1, GFP_KERNEL);
-			mutex_unlock(&board_lock);
-			if (WARN(id < 0, "couldn't get idr"))
-				return id == -ENOSPC ? -EBUSY : id;
-		}
+		status = spi_controller_id_alloc(ctlr, ctlr->bus_num, ctlr->bus_num + 1);
+		if (status)
+			return status;
 	}
 	if (ctlr->bus_num < 0) {
 		first_dynamic = of_alias_get_highest_id("spi");
@@ -3151,13 +3151,9 @@ int spi_register_controller(struct spi_controller *ctlr)
 		else
 			first_dynamic++;
 
-		mutex_lock(&board_lock);
-		id = idr_alloc(&spi_master_idr, ctlr, first_dynamic,
-			       0, GFP_KERNEL);
-		mutex_unlock(&board_lock);
-		if (WARN(id < 0, "couldn't get idr"))
-			return id;
-		ctlr->bus_num = id;
+		status = spi_controller_id_alloc(ctlr, first_dynamic, 0);
+		if (status)
+			return status;
 	}
 	ctlr->bus_lock_flag = 0;
 	init_completion(&ctlr->xfer_completion);

From 2b308e7176e366a52a07a49868e3b1a295e56785 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 Jul 2023 18:49:20 +0300
Subject: [PATCH 0086/2047] spi: Replace if-else-if by bitops and
 multiplications

Instead of if-else-if, simply call roundup_pow_of_two(BITS_PER_BYTES()).
Note, there is no division assumed as compiler may optimize it away.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230710154932.68377-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 6d74218cf38e..125dea8fae00 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3643,13 +3643,7 @@ int spi_split_transfers_maxwords(struct spi_controller *ctlr,
 		size_t maxsize;
 		int ret;
 
-		if (xfer->bits_per_word <= 8)
-			maxsize = maxwords;
-		else if (xfer->bits_per_word <= 16)
-			maxsize = 2 * maxwords;
-		else
-			maxsize = 4 * maxwords;
-
+		maxsize = maxwords * roundup_pow_of_two(BITS_TO_BYTES(xfer->bits_per_word));
 		if (xfer->len > maxsize) {
 			ret = __spi_split_transfer_maxsize(ctlr, msg, &xfer,
 							   maxsize, gfp);

From f2daa4667fda1aa951b91da0ae9675a5da9d7716 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 Jul 2023 18:49:23 +0300
Subject: [PATCH 0087/2047] spi: Use sysfs_emit() to instead of s*printf()

Follow the advice of the Documentation/filesystems/sysfs.rst and show()
should only use sysfs_emit() or sysfs_emit_at() when formatting the
value to be returned to user space.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230710154932.68377-7-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 125dea8fae00..eec2371ea52e 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -64,7 +64,7 @@ modalias_show(struct device *dev, struct device_attribute *a, char *buf)
 	if (len != -ENODEV)
 		return len;
 
-	return sprintf(buf, "%s%s\n", SPI_MODULE_PREFIX, spi->modalias);
+	return sysfs_emit(buf, "%s%s\n", SPI_MODULE_PREFIX, spi->modalias);
 }
 static DEVICE_ATTR_RO(modalias);
 
@@ -89,7 +89,7 @@ static ssize_t driver_override_show(struct device *dev,
 	ssize_t len;
 
 	device_lock(dev);
-	len = snprintf(buf, PAGE_SIZE, "%s\n", spi->driver_override ? : "");
+	len = sysfs_emit(buf, "%s\n", spi->driver_override ? : "");
 	device_unlock(dev);
 	return len;
 }
@@ -2814,8 +2814,7 @@ static ssize_t slave_show(struct device *dev, struct device_attribute *attr,
 	struct device *child;
 
 	child = device_find_any_child(&ctlr->dev);
-	return sprintf(buf, "%s\n",
-		       child ? to_spi_device(child)->modalias : NULL);
+	return sysfs_emit(buf, "%s\n", child ? to_spi_device(child)->modalias : NULL);
 }
 
 static ssize_t slave_store(struct device *dev, struct device_attribute *attr,

From edf6a864c996f9a9f5299a3b3e574a37e64000c5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 Jul 2023 18:49:24 +0300
Subject: [PATCH 0088/2047] spi: Sort headers alphabetically

Sorting headers alphabetically helps locating duplicates, and
make it easier to figure out where to insert new headers.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230710154932.68377-8-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 42 ++++++++++++++++++++---------------------
 include/linux/spi/spi.h | 14 +++++++-------
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index eec2371ea52e..4c90875e5db1 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -4,36 +4,36 @@
 // Copyright (C) 2005 David Brownell
 // Copyright (C) 2008 Secret Lab Technologies Ltd.
 
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/init.h>
+#include <linux/acpi.h>
 #include <linux/cache.h>
-#include <linux/dma-mapping.h>
+#include <linux/clk/clk-conf.h>
+#include <linux/delay.h>
+#include <linux/device.h>
 #include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
+#include <linux/gpio/consumer.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/mod_devicetable.h>
 #include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
-#include <linux/clk/clk-conf.h>
+#include <linux/percpu.h>
+#include <linux/platform_data/x86/apple.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/sched/rt.h>
 #include <linux/slab.h>
-#include <linux/mod_devicetable.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-mem.h>
-#include <linux/gpio/consumer.h>
-#include <linux/pm_runtime.h>
-#include <linux/pm_domain.h>
-#include <linux/property.h>
-#include <linux/export.h>
-#include <linux/sched/rt.h>
 #include <uapi/linux/sched/types.h>
-#include <linux/delay.h>
-#include <linux/kthread.h>
-#include <linux/ioport.h>
-#include <linux/acpi.h>
-#include <linux/highmem.h>
-#include <linux/idr.h>
-#include <linux/platform_data/x86/apple.h>
-#include <linux/ptp_clock_kernel.h>
-#include <linux/percpu.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/spi.h>
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 32c94eae8926..21b77bdfac29 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -6,18 +6,18 @@
 #ifndef __LINUX_SPI_H
 #define __LINUX_SPI_H
 
+#include <linux/acpi.h>
 #include <linux/bits.h>
-#include <linux/device.h>
-#include <linux/mod_devicetable.h>
-#include <linux/slab.h>
-#include <linux/kthread.h>
 #include <linux/completion.h>
-#include <linux/scatterlist.h>
+#include <linux/device.h>
 #include <linux/gpio/consumer.h>
+#include <linux/kthread.h>
+#include <linux/mod_devicetable.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/u64_stats_sync.h>
 
 #include <uapi/linux/spi/spi.h>
-#include <linux/acpi.h>
-#include <linux/u64_stats_sync.h>
 
 struct dma_chan;
 struct software_node;

From 5bd00f6db012f75b42434d39b7fec98b95c1afcc Mon Sep 17 00:00:00 2001
From: Imran Khan <imran.f.khan@oracle.com>
Date: Tue, 9 May 2023 08:31:23 +1000
Subject: [PATCH 0089/2047] smp: Reduce logging due to dump_stack of CSD
 waiters

If a waiter is waiting for CSD lock, its call stack will not change
between first and subsequent hang detection for the same CSD lock.
Therefore, do dump_stack only for first-time detection for a given waiter.

This avoids excessive logging on systems with hundreds of CPUs where
repetitive dump_stack from hundreds of CPUs would otherwise flood the
console.

Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juergen Gross <jgross@suse.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/smp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index 385179dae360..1d41a0cb54f1 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -259,7 +259,8 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
 			arch_send_call_function_single_ipi(cpu);
 		}
 	}
-	dump_stack();
+	if (firsttime)
+		dump_stack();
 	*ts1 = ts2;
 
 	return false;

From 0d3a00b370424f5f1b9fd037bc8a4a3e7cbf0939 Mon Sep 17 00:00:00 2001
From: Imran Khan <imran.f.khan@oracle.com>
Date: Tue, 9 May 2023 08:31:24 +1000
Subject: [PATCH 0090/2047] smp: Reduce NMI traffic from CSD waiters to CSD
 destination

On systems with hundreds of CPUs, if most of the CPUs detect a CSD hang,
then all of these waiting CPUs send an NMI to the destination CPU in
order to dump its backtrace.

Given enough NMIs, the destination CPU will spent much of its time
producing backtraces, thus further delaying that CPU's response to the
original CSD IPI.  In the worst case, by the time destination CPU is
done producing all of these backtrace NMIs, the CSD wait timeout will
have elapsed so that the waiters resend their backtrace NMIs again,
further delaying forward progress.

Therefore, to avoid these delays, issue the backtrace NMI only from
the first waiter.  The destination CPU's other waiters can make use of
backtrace obtained from the first waiter's NMI.

Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juergen Gross <jgross@suse.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/smp.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index 1d41a0cb54f1..8455a53465af 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -46,6 +46,8 @@ static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
 
+static DEFINE_PER_CPU(atomic_t, trigger_backtrace) = ATOMIC_INIT(1);
+
 static void __flush_smp_call_function_queue(bool warn_cpu_offline);
 
 int smpcfd_prepare_cpu(unsigned int cpu)
@@ -253,7 +255,8 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
 			 *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
 	}
 	if (cpu >= 0) {
-		dump_cpu_task(cpu);
+		if (atomic_cmpxchg_acquire(&per_cpu(trigger_backtrace, cpu), 1, 0))
+			dump_cpu_task(cpu);
 		if (!cpu_cur_csd) {
 			pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
 			arch_send_call_function_single_ipi(cpu);
@@ -434,9 +437,14 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline)
 	struct llist_node *entry, *prev;
 	struct llist_head *head;
 	static bool warned;
+	atomic_t *tbt;
 
 	lockdep_assert_irqs_disabled();
 
+	/* Allow waiters to send backtrace NMI from here onwards */
+	tbt = this_cpu_ptr(&trigger_backtrace);
+	atomic_set_release(tbt, 1);
+
 	head = this_cpu_ptr(&call_single_queue);
 	entry = llist_del_all(head);
 	entry = llist_reverse_order(entry);

From 758b492047816a3158d027e9fca660bc5bcf20bf Mon Sep 17 00:00:00 2001
From: Wen Yang <wenyang.linux@foxmail.com>
Date: Sun, 9 Jul 2023 14:54:51 +0800
Subject: [PATCH 0091/2047] eventfd: prevent underflow for eventfd semaphores

For eventfd with flag EFD_SEMAPHORE, when its ctx->count is 0, calling
eventfd_ctx_do_read will cause ctx->count to overflow to ULLONG_MAX.

An underflow can happen with EFD_SEMAPHORE eventfds in at least the
following three subsystems:

(1) virt/kvm/eventfd.c
(2) drivers/vfio/virqfd.c
(3) drivers/virt/acrn/irqfd.c

where (2) and (3) are just modeled after (1). An eventfd must be
specified for use with the KVM_IRQFD ioctl(). This can also be an
EFD_SEMAPHORE eventfd. When the eventfd count is zero or has been
decremented to zero an underflow can be triggered when the irqfd is shut
down by raising the KVM_IRQFD_FLAG_DEASSIGN flag in the KVM_IRQFD
ioctl():

        // ctx->count == 0
        kvm_vm_ioctl()
        -> kvm_irqfd()
           -> kvm_irqfd_deassign()
              -> irqfd_deactivate()
                 -> irqfd_shutdown()
                    -> eventfd_ctx_remove_wait_queue(&cnt)
                       -> eventfd_ctx_do_read(&cnt)

Userspace polling on the eventfd wouldn't notice the underflow because 1
is always returned as the value from eventfd_read() while ctx->count
would've underflowed. It's not a huge deal because this should only be
happening when the irqfd is shutdown but we should still fix it and
avoid the spurious wakeup.

Fixes: cb289d6244a3 ("eventfd - allow atomic read and waitqueue remove")
Signed-off-by: Wen Yang <wenyang.linux@foxmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dylan Yudaken <dylany@fb.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Message-Id: <tencent_7588DFD1F365950A757310D764517A14B306@qq.com>
[brauner: rewrite commit message and add explanation how this underflow can happen]
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/eventfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 8aa36cd37351..33a918f9566c 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -189,7 +189,7 @@ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
 {
 	lockdep_assert_held(&ctx->wqh.lock);
 
-	*cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+	*cnt = ((ctx->flags & EFD_SEMAPHORE) && ctx->count) ? 1 : ctx->count;
 	ctx->count -= *cnt;
 }
 EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);

From c397f09e5498994790503a64486213ef85e58db9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 Jul 2023 18:49:28 +0300
Subject: [PATCH 0092/2047] spi: Get rid of old SPI_MASTER_NO_TX &
 SPI_MASTER_NO_RX

Convert the users from SPI_MASTER_NO_TX and/or SPI_MASTER_NO_RX
to SPI_CONTROLLER_NO_TX and/or SPI_CONTROLLER_NO_RX respectively
and kill the not used anymore definitions.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230710154932.68377-12-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bitbang-txrx.h  | 16 ++++++++--------
 drivers/spi/spi-bitbang.c       |  2 +-
 drivers/spi/spi-gpio.c          |  8 ++++----
 drivers/spi/spi-lp8841-rtc.c    |  8 ++++----
 drivers/spi/spi-xtensa-xtfpga.c |  2 +-
 include/linux/spi/spi.h         |  2 --
 6 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/drivers/spi/spi-bitbang-txrx.h b/drivers/spi/spi-bitbang-txrx.h
index 2dcbe166df63..0cab48b7875b 100644
--- a/drivers/spi/spi-bitbang-txrx.h
+++ b/drivers/spi/spi-bitbang-txrx.h
@@ -57,7 +57,7 @@ bitbang_txrx_be_cpha0(struct spi_device *spi,
 	for (word <<= (32 - bits); likely(bits); bits--) {
 
 		/* setup MSB (to slave) on trailing edge */
-		if ((flags & SPI_MASTER_NO_TX) == 0) {
+		if ((flags & SPI_CONTROLLER_NO_TX) == 0) {
 			if ((word & (1 << 31)) != oldbit) {
 				setmosi(spi, word & (1 << 31));
 				oldbit = word & (1 << 31);
@@ -70,7 +70,7 @@ bitbang_txrx_be_cpha0(struct spi_device *spi,
 
 		/* sample MSB (from slave) on leading edge */
 		word <<= 1;
-		if ((flags & SPI_MASTER_NO_RX) == 0)
+		if ((flags & SPI_CONTROLLER_NO_RX) == 0)
 			word |= getmiso(spi);
 		setsck(spi, cpol);
 	}
@@ -90,7 +90,7 @@ bitbang_txrx_be_cpha1(struct spi_device *spi,
 
 		/* setup MSB (to slave) on leading edge */
 		setsck(spi, !cpol);
-		if ((flags & SPI_MASTER_NO_TX) == 0) {
+		if ((flags & SPI_CONTROLLER_NO_TX) == 0) {
 			if ((word & (1 << 31)) != oldbit) {
 				setmosi(spi, word & (1 << 31));
 				oldbit = word & (1 << 31);
@@ -103,7 +103,7 @@ bitbang_txrx_be_cpha1(struct spi_device *spi,
 
 		/* sample MSB (from slave) on trailing edge */
 		word <<= 1;
-		if ((flags & SPI_MASTER_NO_RX) == 0)
+		if ((flags & SPI_CONTROLLER_NO_RX) == 0)
 			word |= getmiso(spi);
 	}
 	return word;
@@ -122,7 +122,7 @@ bitbang_txrx_le_cpha0(struct spi_device *spi,
 	for (; likely(bits); bits--) {
 
 		/* setup LSB (to slave) on trailing edge */
-		if ((flags & SPI_MASTER_NO_TX) == 0) {
+		if ((flags & SPI_CONTROLLER_NO_TX) == 0) {
 			if ((word & 1) != oldbit) {
 				setmosi(spi, word & 1);
 				oldbit = word & 1;
@@ -135,7 +135,7 @@ bitbang_txrx_le_cpha0(struct spi_device *spi,
 
 		/* sample LSB (from slave) on leading edge */
 		word >>= 1;
-		if ((flags & SPI_MASTER_NO_RX) == 0)
+		if ((flags & SPI_CONTROLLER_NO_RX) == 0)
 			word |= getmiso(spi) << rxbit;
 		setsck(spi, cpol);
 	}
@@ -156,7 +156,7 @@ bitbang_txrx_le_cpha1(struct spi_device *spi,
 
 		/* setup LSB (to slave) on leading edge */
 		setsck(spi, !cpol);
-		if ((flags & SPI_MASTER_NO_TX) == 0) {
+		if ((flags & SPI_CONTROLLER_NO_TX) == 0) {
 			if ((word & 1) != oldbit) {
 				setmosi(spi, word & 1);
 				oldbit = word & 1;
@@ -169,7 +169,7 @@ bitbang_txrx_le_cpha1(struct spi_device *spi,
 
 		/* sample LSB (from slave) on trailing edge */
 		word >>= 1;
-		if ((flags & SPI_MASTER_NO_RX) == 0)
+		if ((flags & SPI_CONTROLLER_NO_RX) == 0)
 			word |= getmiso(spi) << rxbit;
 	}
 	return word;
diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c
index 27d0087f8688..862f209cada1 100644
--- a/drivers/spi/spi-bitbang.c
+++ b/drivers/spi/spi-bitbang.c
@@ -248,7 +248,7 @@ static int spi_bitbang_bufs(struct spi_device *spi, struct spi_transfer *t)
 	if (spi->mode & SPI_3WIRE) {
 		unsigned flags;
 
-		flags = t->tx_buf ? SPI_MASTER_NO_RX : SPI_MASTER_NO_TX;
+		flags = t->tx_buf ? SPI_CONTROLLER_NO_RX : SPI_CONTROLLER_NO_TX;
 		return cs->txrx_bufs(spi, cs->txrx_word, nsecs, t, flags);
 	}
 	return cs->txrx_bufs(spi, cs->txrx_word, nsecs, t, 0);
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 092afc7679d4..85e149ec2910 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -170,7 +170,7 @@ static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi,
 
 /*
  * These functions do not call setmosi or getmiso if respective flag
- * (SPI_MASTER_NO_RX or SPI_MASTER_NO_TX) is set, so they are safe to
+ * (SPI_CONTROLLER_NO_RX or SPI_CONTROLLER_NO_TX) is set, so they are safe to
  * call when such pin is not present or defined in the controller.
  * A separate set of callbacks is defined to get highest possible
  * speed in the generic case (when both MISO and MOSI lines are
@@ -416,11 +416,11 @@ static int spi_gpio_probe(struct platform_device *pdev)
 	if (!spi_gpio->mosi) {
 		/* HW configuration without MOSI pin
 		 *
-		 * No setting SPI_MASTER_NO_RX here - if there is only
+		 * No setting SPI_CONTROLLER_NO_RX here - if there is only
 		 * a MOSI pin connected the host can still do RX by
 		 * changing the direction of the line.
 		 */
-		master->flags = SPI_MASTER_NO_TX;
+		master->flags = SPI_CONTROLLER_NO_TX;
 	}
 
 	master->bus_num = pdev->id;
@@ -438,7 +438,7 @@ static int spi_gpio_probe(struct platform_device *pdev)
 	bb->chipselect = spi_gpio_chipselect;
 	bb->set_line_direction = spi_gpio_set_direction;
 
-	if (master->flags & SPI_MASTER_NO_TX) {
+	if (master->flags & SPI_CONTROLLER_NO_TX) {
 		bb->txrx_word[SPI_MODE_0] = spi_gpio_spec_txrx_word_mode0;
 		bb->txrx_word[SPI_MODE_1] = spi_gpio_spec_txrx_word_mode1;
 		bb->txrx_word[SPI_MODE_2] = spi_gpio_spec_txrx_word_mode2;
diff --git a/drivers/spi/spi-lp8841-rtc.c b/drivers/spi/spi-lp8841-rtc.c
index 2d436541d6c2..ccaa7a946359 100644
--- a/drivers/spi/spi-lp8841-rtc.c
+++ b/drivers/spi/spi-lp8841-rtc.c
@@ -75,14 +75,14 @@ bitbang_txrx_be_cpha0_lsb(struct spi_lp8841_rtc *data,
 	for (; likely(bits); bits--) {
 
 		/* setup LSB (to slave) on leading edge */
-		if ((flags & SPI_MASTER_NO_TX) == 0)
+		if ((flags & SPI_CONTROLLER_NO_TX) == 0)
 			setmosi(data, (word & 1));
 
 		usleep_range(usecs, usecs + 1);	/* T(setup) */
 
 		/* sample LSB (from slave) on trailing edge */
 		word >>= 1;
-		if ((flags & SPI_MASTER_NO_RX) == 0)
+		if ((flags & SPI_CONTROLLER_NO_RX) == 0)
 			word |= (getmiso(data) << 31);
 
 		setsck(data, !cpol);
@@ -113,7 +113,7 @@ spi_lp8841_rtc_transfer_one(struct spi_master *master,
 		while (likely(count > 0)) {
 			word = *tx++;
 			bitbang_txrx_be_cpha0_lsb(data, 1, 0,
-					SPI_MASTER_NO_RX, word, 8);
+					SPI_CONTROLLER_NO_RX, word, 8);
 			count--;
 		}
 	} else if (rx) {
@@ -121,7 +121,7 @@ spi_lp8841_rtc_transfer_one(struct spi_master *master,
 		writeb(data->state, data->iomem);
 		while (likely(count > 0)) {
 			word = bitbang_txrx_be_cpha0_lsb(data, 1, 0,
-					SPI_MASTER_NO_TX, word, 8);
+					SPI_CONTROLLER_NO_TX, word, 8);
 			*rx++ = word;
 			count--;
 		}
diff --git a/drivers/spi/spi-xtensa-xtfpga.c b/drivers/spi/spi-xtensa-xtfpga.c
index 24dc845b940e..dbd85d7a1526 100644
--- a/drivers/spi/spi-xtensa-xtfpga.c
+++ b/drivers/spi/spi-xtensa-xtfpga.c
@@ -87,7 +87,7 @@ static int xtfpga_spi_probe(struct platform_device *pdev)
 	if (!master)
 		return -ENOMEM;
 
-	master->flags = SPI_MASTER_NO_RX;
+	master->flags = SPI_CONTROLLER_NO_RX;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 16);
 	master->bus_num = pdev->dev.id;
 	master->dev.of_node = pdev->dev.of_node;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 21b77bdfac29..debb54f37603 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1623,8 +1623,6 @@ spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer)
 #define spi_master			spi_controller
 
 #define SPI_MASTER_HALF_DUPLEX		SPI_CONTROLLER_HALF_DUPLEX
-#define SPI_MASTER_NO_RX		SPI_CONTROLLER_NO_RX
-#define SPI_MASTER_NO_TX		SPI_CONTROLLER_NO_TX
 #define SPI_MASTER_MUST_RX		SPI_CONTROLLER_MUST_RX
 #define SPI_MASTER_MUST_TX		SPI_CONTROLLER_MUST_TX
 

From 90366cd60133a9f5b6a2f31360367c658585e125 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 Jul 2023 18:49:29 +0300
Subject: [PATCH 0093/2047] spi: Get rid of old SPI_MASTER_MUST_TX &
 SPI_MASTER_MUST_RX

Convert the users from SPI_MASTER_MUST_TX and/or SPI_MASTER_MUST_RX
to SPI_CONTROLLER_MUST_TX and/or SPI_CONTROLLER_MUST_RX respectively
and kill the not used anymore definitions.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230710154932.68377-13-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-at91-usart.c   | 2 +-
 drivers/spi/spi-atmel.c        | 2 +-
 drivers/spi/spi-davinci.c      | 2 +-
 drivers/spi/spi-fsl-lpspi.c    | 2 +-
 drivers/spi/spi-meson-spicc.c  | 2 +-
 drivers/spi/spi-mt65xx.c       | 2 +-
 drivers/spi/spi-pci1xxxx.c     | 2 +-
 drivers/spi/spi-pic32.c        | 2 +-
 drivers/spi/spi-rb4xx.c        | 2 +-
 drivers/spi/spi-slave-mt27xx.c | 2 +-
 drivers/spi/spi-stm32.c        | 2 +-
 include/linux/spi/spi.h        | 2 --
 12 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/spi/spi-at91-usart.c b/drivers/spi/spi-at91-usart.c
index 7854d9790fe9..7dfe2b6c2990 100644
--- a/drivers/spi/spi-at91-usart.c
+++ b/drivers/spi/spi-at91-usart.c
@@ -527,7 +527,7 @@ static int at91_usart_spi_probe(struct platform_device *pdev)
 	controller->dev.of_node = pdev->dev.parent->of_node;
 	controller->bits_per_word_mask = SPI_BPW_MASK(8);
 	controller->setup = at91_usart_spi_setup;
-	controller->flags = SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX;
+	controller->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX;
 	controller->transfer_one = at91_usart_spi_transfer_one;
 	controller->prepare_message = at91_usart_spi_prepare_message;
 	controller->unprepare_message = at91_usart_spi_unprepare_message;
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 152cd6773403..0865993005b2 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -1475,7 +1475,7 @@ static int atmel_spi_probe(struct platform_device *pdev)
 	host->bus_num = pdev->id;
 	host->num_chipselect = 4;
 	host->setup = atmel_spi_setup;
-	host->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX |
+	host->flags = (SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX |
 			SPI_MASTER_GPIO_SS);
 	host->transfer_one = atmel_spi_one_transfer;
 	host->set_cs = atmel_spi_set_cs;
diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index b04811c911e2..014392459d5e 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -939,7 +939,7 @@ static int davinci_spi_probe(struct platform_device *pdev)
 	master->bus_num = pdev->id;
 	master->num_chipselect = pdata->num_chipselect;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 16);
-	master->flags = SPI_MASTER_MUST_RX | SPI_MASTER_GPIO_SS;
+	master->flags = SPI_CONTROLLER_MUST_RX | SPI_MASTER_GPIO_SS;
 	master->setup = davinci_spi_setup;
 	master->cleanup = davinci_spi_cleanup;
 	master->can_dma = davinci_spi_can_dma;
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index fb68c72df171..1c907d5d5bb3 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -856,7 +856,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 	controller->prepare_transfer_hardware = lpspi_prepare_xfer_hardware;
 	controller->unprepare_transfer_hardware = lpspi_unprepare_xfer_hardware;
 	controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
-	controller->flags = SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX;
+	controller->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX;
 	controller->dev.of_node = pdev->dev.of_node;
 	controller->bus_num = pdev->id;
 	controller->num_chipselect = fsl_lpspi->num_cs;
diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
index 141562c882f1..7a9eba35ac38 100644
--- a/drivers/spi/spi-meson-spicc.c
+++ b/drivers/spi/spi-meson-spicc.c
@@ -864,7 +864,7 @@ static int meson_spicc_probe(struct platform_device *pdev)
 				     SPI_BPW_MASK(24) |
 				     SPI_BPW_MASK(16) |
 				     SPI_BPW_MASK(8);
-	master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX);
+	master->flags = (SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX);
 	master->min_speed_hz = spicc->data->min_speed_hz;
 	master->max_speed_hz = spicc->data->max_speed_hz;
 	master->setup = meson_spicc_setup;
diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 39272ad6641b..0757985947dd 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -1142,7 +1142,7 @@ static int mtk_spi_probe(struct platform_device *pdev)
 		master->mode_bits |= SPI_CS_HIGH;
 
 	if (mdata->dev_comp->must_tx)
-		master->flags = SPI_MASTER_MUST_TX;
+		master->flags = SPI_CONTROLLER_MUST_TX;
 	if (mdata->dev_comp->ipm_design)
 		master->mode_bits |= SPI_LOOP | SPI_RX_DUAL | SPI_TX_DUAL |
 				     SPI_RX_QUAD | SPI_TX_QUAD;
diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
index 4445d82409d6..d23c42839da1 100644
--- a/drivers/spi/spi-pci1xxxx.c
+++ b/drivers/spi/spi-pci1xxxx.c
@@ -365,7 +365,7 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
 		spi_host->bits_per_word_mask = SPI_BPW_MASK(8);
 		spi_host->max_speed_hz = PCI1XXXX_SPI_MAX_CLOCK_HZ;
 		spi_host->min_speed_hz = PCI1XXXX_SPI_MIN_CLOCK_HZ;
-		spi_host->flags = SPI_MASTER_MUST_TX;
+		spi_host->flags = SPI_CONTROLLER_MUST_TX;
 		spi_master_set_devdata(spi_host, spi_sub_ptr);
 		ret = devm_spi_register_master(dev, spi_host);
 		if (ret)
diff --git a/drivers/spi/spi-pic32.c b/drivers/spi/spi-pic32.c
index f2af5e653f3d..e9b4c9cb97fb 100644
--- a/drivers/spi/spi-pic32.c
+++ b/drivers/spi/spi-pic32.c
@@ -773,7 +773,7 @@ static int pic32_spi_probe(struct platform_device *pdev)
 	master->max_speed_hz	= clk_get_rate(pic32s->clk);
 	master->setup		= pic32_spi_setup;
 	master->cleanup		= pic32_spi_cleanup;
-	master->flags		= SPI_MASTER_MUST_TX | SPI_MASTER_MUST_RX;
+	master->flags		= SPI_CONTROLLER_MUST_TX | SPI_CONTROLLER_MUST_RX;
 	master->bits_per_word_mask	= SPI_BPW_MASK(8) | SPI_BPW_MASK(16) |
 					  SPI_BPW_MASK(32);
 	master->transfer_one		= pic32_spi_one_transfer;
diff --git a/drivers/spi/spi-rb4xx.c b/drivers/spi/spi-rb4xx.c
index 5073736d3d1f..c817889a7797 100644
--- a/drivers/spi/spi-rb4xx.c
+++ b/drivers/spi/spi-rb4xx.c
@@ -156,7 +156,7 @@ static int rb4xx_spi_probe(struct platform_device *pdev)
 	master->num_chipselect = 3;
 	master->mode_bits = SPI_TX_DUAL;
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->flags = SPI_MASTER_MUST_TX;
+	master->flags = SPI_CONTROLLER_MUST_TX;
 	master->transfer_one = rb4xx_transfer_one;
 	master->set_cs = rb4xx_set_cs;
 
diff --git a/drivers/spi/spi-slave-mt27xx.c b/drivers/spi/spi-slave-mt27xx.c
index 4e4d426bfb43..6d6772974783 100644
--- a/drivers/spi/spi-slave-mt27xx.c
+++ b/drivers/spi/spi-slave-mt27xx.c
@@ -414,7 +414,7 @@ static int mtk_spi_slave_probe(struct platform_device *pdev)
 	mdata->dev_comp = of_id->data;
 
 	if (mdata->dev_comp->must_rx)
-		ctlr->flags = SPI_MASTER_MUST_RX;
+		ctlr->flags = SPI_CONTROLLER_MUST_RX;
 
 	platform_set_drvdata(pdev, ctlr);
 
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 6d10fa4ab783..423212bd0675 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -1750,7 +1750,7 @@ static const struct stm32_spi_cfg stm32f4_spi_cfg = {
 	.baud_rate_div_min = STM32F4_SPI_BR_DIV_MIN,
 	.baud_rate_div_max = STM32F4_SPI_BR_DIV_MAX,
 	.has_fifo = false,
-	.flags = SPI_MASTER_MUST_TX,
+	.flags = SPI_CONTROLLER_MUST_TX,
 };
 
 static const struct stm32_spi_cfg stm32h7_spi_cfg = {
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index debb54f37603..becad31aeea2 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1623,8 +1623,6 @@ spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer)
 #define spi_master			spi_controller
 
 #define SPI_MASTER_HALF_DUPLEX		SPI_CONTROLLER_HALF_DUPLEX
-#define SPI_MASTER_MUST_RX		SPI_CONTROLLER_MUST_RX
-#define SPI_MASTER_MUST_TX		SPI_CONTROLLER_MUST_TX
 
 #define spi_master_get_devdata(_ctlr)	spi_controller_get_devdata(_ctlr)
 #define spi_master_set_devdata(_ctlr, _data)	\

From 82238d2cbd99ebd09dda48fb7c1c8802097da6a2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 Jul 2023 18:49:30 +0300
Subject: [PATCH 0094/2047] spi: Rename SPI_MASTER_GPIO_SS to
 SPI_CONTROLLER_GPIO_SS

Rename SPI_MASTER_GPIO_SS to SPI_CONTROLLER_GPIO_SS and
convert the users to SPI_CONTROLLER_GPIO_SS to follow
the new naming shema.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230710154932.68377-14-andriy.shevchenko@linux.intel.com
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ath79.c    | 2 +-
 drivers/spi/spi-atmel.c    | 2 +-
 drivers/spi/spi-bitbang.c  | 6 +++---
 drivers/spi/spi-davinci.c  | 2 +-
 drivers/spi/spi-dw-core.c  | 2 +-
 drivers/spi/spi-gpio.c     | 2 +-
 drivers/spi/spi-imx.c      | 2 +-
 drivers/spi/spi-orion.c    | 2 +-
 drivers/spi/spi-rockchip.c | 2 +-
 drivers/spi/spi-sifive.c   | 2 +-
 drivers/spi/spi.c          | 4 ++--
 include/linux/spi/spi.h    | 3 +--
 12 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c
index d3dd21386f12..1b6d977d111c 100644
--- a/drivers/spi/spi-ath79.c
+++ b/drivers/spi/spi-ath79.c
@@ -185,7 +185,7 @@ static int ath79_spi_probe(struct platform_device *pdev)
 
 	host->use_gpio_descriptors = true;
 	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
-	host->flags = SPI_MASTER_GPIO_SS;
+	host->flags = SPI_CONTROLLER_GPIO_SS;
 	host->num_chipselect = 3;
 	host->mem_ops = &ath79_mem_ops;
 
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 0865993005b2..9674499ed0a6 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -1476,7 +1476,7 @@ static int atmel_spi_probe(struct platform_device *pdev)
 	host->num_chipselect = 4;
 	host->setup = atmel_spi_setup;
 	host->flags = (SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX |
-			SPI_MASTER_GPIO_SS);
+			SPI_CONTROLLER_GPIO_SS);
 	host->transfer_one = atmel_spi_one_transfer;
 	host->set_cs = atmel_spi_set_cs;
 	host->cleanup = atmel_spi_cleanup;
diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c
index 862f209cada1..ecd44016c197 100644
--- a/drivers/spi/spi-bitbang.c
+++ b/drivers/spi/spi-bitbang.c
@@ -349,11 +349,11 @@ int spi_bitbang_init(struct spi_bitbang *bitbang)
 	/*
 	 * We only need the chipselect callback if we are actually using it.
 	 * If we just use GPIO descriptors, it is surplus. If the
-	 * SPI_MASTER_GPIO_SS flag is set, we always need to call the
+	 * SPI_CONTROLLER_GPIO_SS flag is set, we always need to call the
 	 * driver-specific chipselect routine.
 	 */
 	custom_cs = (!master->use_gpio_descriptors ||
-		     (master->flags & SPI_MASTER_GPIO_SS));
+		     (master->flags & SPI_CONTROLLER_GPIO_SS));
 
 	if (custom_cs && !bitbang->chipselect)
 		return -EINVAL;
@@ -371,7 +371,7 @@ int spi_bitbang_init(struct spi_bitbang *bitbang)
 	master->transfer_one = spi_bitbang_transfer_one;
 	/*
 	 * When using GPIO descriptors, the ->set_cs() callback doesn't even
-	 * get called unless SPI_MASTER_GPIO_SS is set.
+	 * get called unless SPI_CONTROLLER_GPIO_SS is set.
 	 */
 	if (custom_cs)
 		master->set_cs = spi_bitbang_set_cs;
diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 014392459d5e..7ea2ac053052 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -939,7 +939,7 @@ static int davinci_spi_probe(struct platform_device *pdev)
 	master->bus_num = pdev->id;
 	master->num_chipselect = pdata->num_chipselect;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 16);
-	master->flags = SPI_CONTROLLER_MUST_RX | SPI_MASTER_GPIO_SS;
+	master->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_GPIO_SS;
 	master->setup = davinci_spi_setup;
 	master->cleanup = davinci_spi_cleanup;
 	master->can_dma = davinci_spi_can_dma;
diff --git a/drivers/spi/spi-dw-core.c b/drivers/spi/spi-dw-core.c
index a8ba41ad4541..45f5acc26b1d 100644
--- a/drivers/spi/spi-dw-core.c
+++ b/drivers/spi/spi-dw-core.c
@@ -932,7 +932,7 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
 	if (dws->mem_ops.exec_op)
 		master->mem_ops = &dws->mem_ops;
 	master->max_speed_hz = dws->max_freq;
-	master->flags = SPI_MASTER_GPIO_SS;
+	master->flags = SPI_CONTROLLER_GPIO_SS;
 	master->auto_runtime_pm = true;
 
 	/* Get default rx sample delay */
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 85e149ec2910..612db477ad12 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -434,7 +434,7 @@ static int spi_gpio_probe(struct platform_device *pdev)
 	 * line, that we need to do on selection. This makes the local
 	 * callback for chipselect always get called.
 	 */
-	master->flags |= SPI_MASTER_GPIO_SS;
+	master->flags |= SPI_CONTROLLER_GPIO_SS;
 	bb->chipselect = spi_gpio_chipselect;
 	bb->set_line_direction = spi_gpio_set_direction;
 
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 528ae46c087f..8d97f2b32c2b 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -1779,7 +1779,7 @@ static int spi_imx_probe(struct platform_device *pdev)
 
 	if (is_imx51_ecspi(spi_imx) || is_imx53_ecspi(spi_imx)) {
 		controller->max_native_cs = 4;
-		controller->flags |= SPI_MASTER_GPIO_SS;
+		controller->flags |= SPI_CONTROLLER_GPIO_SS;
 	}
 
 	spi_imx->spi_drctl = spi_drctl;
diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index ad9e83e34297..4882aafd0505 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c
@@ -677,7 +677,7 @@ static int orion_spi_probe(struct platform_device *pdev)
 	master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
 	master->auto_runtime_pm = true;
 	master->use_gpio_descriptors = true;
-	master->flags = SPI_MASTER_GPIO_SS;
+	master->flags = SPI_CONTROLLER_GPIO_SS;
 
 	platform_set_drvdata(pdev, master);
 
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 143ede958ac1..a37943847e81 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -858,7 +858,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 		ctlr->mode_bits |= SPI_NO_CS;
 		ctlr->slave_abort = rockchip_spi_slave_abort;
 	} else {
-		ctlr->flags = SPI_MASTER_GPIO_SS;
+		ctlr->flags = SPI_CONTROLLER_GPIO_SS;
 		ctlr->max_native_cs = ROCKCHIP_SPI_MAX_CS_NUM;
 		/*
 		 * rk spi0 has two native cs, spi1..5 one cs only
diff --git a/drivers/spi/spi-sifive.c b/drivers/spi/spi-sifive.c
index dae9e097c333..2f77dae85386 100644
--- a/drivers/spi/spi-sifive.c
+++ b/drivers/spi/spi-sifive.c
@@ -379,7 +379,7 @@ static int sifive_spi_probe(struct platform_device *pdev)
 	 * we need to "left-align" the bits (unless SPI_LSB_FIRST)
 	 */
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->flags = SPI_CONTROLLER_MUST_TX | SPI_MASTER_GPIO_SS;
+	master->flags = SPI_CONTROLLER_MUST_TX | SPI_CONTROLLER_GPIO_SS;
 	master->prepare_message = sifive_spi_prepare_message;
 	master->set_cs = sifive_spi_set_cs;
 	master->transfer_one = sifive_spi_transfer_one;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 4c90875e5db1..d9667f3f151e 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1006,7 +1006,7 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
 				gpiod_set_value_cansleep(spi_get_csgpiod(spi, 0), activate);
 		}
 		/* Some SPI masters need both GPIO CS & slave_select */
-		if ((spi->controller->flags & SPI_MASTER_GPIO_SS) &&
+		if ((spi->controller->flags & SPI_CONTROLLER_GPIO_SS) &&
 		    spi->controller->set_cs)
 			spi->controller->set_cs(spi, !enable);
 	} else if (spi->controller->set_cs) {
@@ -3052,7 +3052,7 @@ static int spi_get_gpio_descs(struct spi_controller *ctlr)
 
 	ctlr->unused_native_cs = ffs(~native_cs_mask) - 1;
 
-	if ((ctlr->flags & SPI_MASTER_GPIO_SS) && num_cs_gpios &&
+	if ((ctlr->flags & SPI_CONTROLLER_GPIO_SS) && num_cs_gpios &&
 	    ctlr->max_native_cs && ctlr->unused_native_cs >= ctlr->max_native_cs) {
 		dev_err(dev, "No unused native chip select available\n");
 		return -EINVAL;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index becad31aeea2..39a2e6108694 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -562,8 +562,7 @@ struct spi_controller {
 #define SPI_CONTROLLER_NO_TX		BIT(2)	/* Can't do buffer write */
 #define SPI_CONTROLLER_MUST_RX		BIT(3)	/* Requires rx */
 #define SPI_CONTROLLER_MUST_TX		BIT(4)	/* Requires tx */
-
-#define SPI_MASTER_GPIO_SS		BIT(5)	/* GPIO CS must select slave */
+#define SPI_CONTROLLER_GPIO_SS		BIT(5)	/* GPIO CS must select slave */
 
 	/* Flag indicating if the allocation of this struct is devres-managed */
 	bool			devm_allocated;

From 7a2b552c8e0e5bb280558f6c120140f5f06323bc Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 Jul 2023 18:49:31 +0300
Subject: [PATCH 0095/2047] spi: Convert to SPI_CONTROLLER_HALF_DUPLEX

Convert the users under SPI subsystem to SPI_CONTROLLER_HALF_DUPLEX.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230710154932.68377-15-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-amd.c             | 2 +-
 drivers/spi/spi-cavium-thunderx.c | 2 +-
 drivers/spi/spi-falcon.c          | 2 +-
 drivers/spi/spi-lp8841-rtc.c      | 2 +-
 drivers/spi/spi-mxs.c             | 2 +-
 drivers/spi/spi-omap-uwire.c      | 2 +-
 drivers/spi/spi-pic32-sqi.c       | 2 +-
 drivers/spi/spi-qcom-qspi.c       | 2 +-
 drivers/spi/spi-rockchip-sfc.c    | 2 +-
 drivers/spi/spi-sprd-adi.c        | 2 +-
 drivers/spi/spi-ti-qspi.c         | 2 +-
 drivers/spi/spi-xcomm.c           | 2 +-
 12 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
index fecead757a3c..b19766571f28 100644
--- a/drivers/spi/spi-amd.c
+++ b/drivers/spi/spi-amd.c
@@ -404,7 +404,7 @@ static int amd_spi_probe(struct platform_device *pdev)
 	master->bus_num = 0;
 	master->num_chipselect = 4;
 	master->mode_bits = 0;
-	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
 	master->max_speed_hz = AMD_SPI_MAX_HZ;
 	master->min_speed_hz = AMD_SPI_MIN_HZ;
 	master->setup = amd_spi_master_setup;
diff --git a/drivers/spi/spi-cavium-thunderx.c b/drivers/spi/spi-cavium-thunderx.c
index 60c0d6934654..535f7eb9fa69 100644
--- a/drivers/spi/spi-cavium-thunderx.c
+++ b/drivers/spi/spi-cavium-thunderx.c
@@ -64,7 +64,7 @@ static int thunderx_spi_probe(struct pci_dev *pdev,
 		p->sys_freq = SYS_FREQ_DEFAULT;
 	dev_info(dev, "Set system clock to %u\n", p->sys_freq);
 
-	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
 	master->num_chipselect = 4;
 	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH |
 			    SPI_LSB_FIRST | SPI_3WIRE;
diff --git a/drivers/spi/spi-falcon.c b/drivers/spi/spi-falcon.c
index 4c103dff0d44..8a8414cbb400 100644
--- a/drivers/spi/spi-falcon.c
+++ b/drivers/spi/spi-falcon.c
@@ -401,7 +401,7 @@ static int falcon_sflash_probe(struct platform_device *pdev)
 	priv->master = master;
 
 	master->mode_bits = SPI_MODE_3;
-	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
 	master->setup = falcon_sflash_setup;
 	master->transfer_one_message = falcon_sflash_xfer_one;
 	master->dev.of_node = pdev->dev.of_node;
diff --git a/drivers/spi/spi-lp8841-rtc.c b/drivers/spi/spi-lp8841-rtc.c
index ccaa7a946359..c6810720b3b5 100644
--- a/drivers/spi/spi-lp8841-rtc.c
+++ b/drivers/spi/spi-lp8841-rtc.c
@@ -191,7 +191,7 @@ spi_lp8841_rtc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	platform_set_drvdata(pdev, master);
 
-	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
 	master->mode_bits = SPI_CS_HIGH | SPI_3WIRE | SPI_LSB_FIRST;
 
 	master->bus_num = pdev->id;
diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c
index 963a53dd680b..cd0e7ae07162 100644
--- a/drivers/spi/spi-mxs.c
+++ b/drivers/spi/spi-mxs.c
@@ -572,7 +572,7 @@ static int mxs_spi_probe(struct platform_device *pdev)
 	master->mode_bits = SPI_CPOL | SPI_CPHA;
 	master->num_chipselect = 3;
 	master->dev.of_node = np;
-	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
 	master->auto_runtime_pm = true;
 
 	spi = spi_master_get_devdata(master);
diff --git a/drivers/spi/spi-omap-uwire.c b/drivers/spi/spi-omap-uwire.c
index 902d2e0c1f2f..f89aa9e52c23 100644
--- a/drivers/spi/spi-omap-uwire.c
+++ b/drivers/spi/spi-omap-uwire.c
@@ -486,7 +486,7 @@ static int uwire_probe(struct platform_device *pdev)
 	/* the spi->mode bits understood by this driver: */
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 16);
-	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
 
 	master->bus_num = 2;	/* "official" */
 	master->num_chipselect = 4;
diff --git a/drivers/spi/spi-pic32-sqi.c b/drivers/spi/spi-pic32-sqi.c
index 51dfb49523f3..5cbebcf26a2a 100644
--- a/drivers/spi/spi-pic32-sqi.c
+++ b/drivers/spi/spi-pic32-sqi.c
@@ -648,7 +648,7 @@ static int pic32_sqi_probe(struct platform_device *pdev)
 	master->dev.of_node	= pdev->dev.of_node;
 	master->mode_bits	= SPI_MODE_3 | SPI_MODE_0 | SPI_TX_DUAL |
 				  SPI_RX_DUAL | SPI_TX_QUAD | SPI_RX_QUAD;
-	master->flags		= SPI_MASTER_HALF_DUPLEX;
+	master->flags		= SPI_CONTROLLER_HALF_DUPLEX;
 	master->can_dma		= pic32_sqi_can_dma;
 	master->bits_per_word_mask	= SPI_BPW_RANGE_MASK(8, 32);
 	master->transfer_one_message	= pic32_sqi_one_message;
diff --git a/drivers/spi/spi-qcom-qspi.c b/drivers/spi/spi-qcom-qspi.c
index a8a683d6145c..5a98c52bad32 100644
--- a/drivers/spi/spi-qcom-qspi.c
+++ b/drivers/spi/spi-qcom-qspi.c
@@ -724,7 +724,7 @@ static int qcom_qspi_probe(struct platform_device *pdev)
 	master->mode_bits = SPI_MODE_0 |
 			    SPI_TX_DUAL | SPI_RX_DUAL |
 			    SPI_TX_QUAD | SPI_RX_QUAD;
-	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
 	master->prepare_message = qcom_qspi_prepare_message;
 	master->transfer_one = qcom_qspi_transfer_one;
 	master->handle_err = qcom_qspi_handle_err;
diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c
index 583f4187f030..bd550e76ab3d 100644
--- a/drivers/spi/spi-rockchip-sfc.c
+++ b/drivers/spi/spi-rockchip-sfc.c
@@ -565,7 +565,7 @@ static int rockchip_sfc_probe(struct platform_device *pdev)
 	if (!master)
 		return -ENOMEM;
 
-	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
 	master->mem_ops = &rockchip_sfc_mem_ops;
 	master->dev.of_node = pdev->dev.of_node;
 	master->mode_bits = SPI_TX_QUAD | SPI_TX_DUAL | SPI_RX_QUAD | SPI_RX_DUAL;
diff --git a/drivers/spi/spi-sprd-adi.c b/drivers/spi/spi-sprd-adi.c
index 22e39c4c12c4..94d16aadfa44 100644
--- a/drivers/spi/spi-sprd-adi.c
+++ b/drivers/spi/spi-sprd-adi.c
@@ -580,7 +580,7 @@ static int sprd_adi_probe(struct platform_device *pdev)
 	ctlr->dev.of_node = pdev->dev.of_node;
 	ctlr->bus_num = pdev->id;
 	ctlr->num_chipselect = num_chipselect;
-	ctlr->flags = SPI_MASTER_HALF_DUPLEX;
+	ctlr->flags = SPI_CONTROLLER_HALF_DUPLEX;
 	ctlr->bits_per_word_mask = 0;
 	ctlr->transfer_one = sprd_adi_transfer_one;
 
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index 5914335ff63d..bf4b40289bee 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -770,7 +770,7 @@ static int ti_qspi_probe(struct platform_device *pdev)
 
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_RX_DUAL | SPI_RX_QUAD;
 
-	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
 	master->setup = ti_qspi_setup;
 	master->auto_runtime_pm = true;
 	master->transfer_one_message = ti_qspi_start_transfer_one;
diff --git a/drivers/spi/spi-xcomm.c b/drivers/spi/spi-xcomm.c
index ae6218bcd02a..a3d57554f5ba 100644
--- a/drivers/spi/spi-xcomm.c
+++ b/drivers/spi/spi-xcomm.c
@@ -218,7 +218,7 @@ static int spi_xcomm_probe(struct i2c_client *i2c)
 	master->num_chipselect = 16;
 	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_3WIRE;
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
 	master->transfer_one_message = spi_xcomm_transfer_one;
 	master->dev.of_node = i2c->dev.of_node;
 	i2c_set_clientdata(i2c, master);

From 702ca0269ed56e2d8dae7874a4d8af268e2a382e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 Jul 2023 18:49:32 +0300
Subject: [PATCH 0096/2047] spi: Fix spelling typos and acronyms capitalization

Fix
  - spelling typos
  - capitalization of acronyms
in the comments.

While at it, fix the multi-line comment style.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230710154932.68377-16-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c          |  54 ++++++++-------
 include/linux/spi/spi.h    | 137 ++++++++++++++++++++-----------------
 include/trace/events/spi.h |   2 +-
 3 files changed, 104 insertions(+), 89 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index d9667f3f151e..ae2693ba1744 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -682,7 +682,7 @@ static int __spi_add_device(struct spi_device *spi)
  * @spi: spi_device to register
  *
  * Companion function to spi_alloc_device.  Devices allocated with
- * spi_alloc_device can be added onto the spi bus with this function.
+ * spi_alloc_device can be added onto the SPI bus with this function.
  *
  * Return: 0 on success; negative errno on failure
  */
@@ -889,7 +889,7 @@ int spi_register_board_info(struct spi_board_info const *info, unsigned n)
  * spi_res_alloc - allocate a spi resource that is life-cycle managed
  *                 during the processing of a spi_message while using
  *                 spi_transfer_one
- * @spi:     the spi device for which we allocate memory
+ * @spi:     the SPI device for which we allocate memory
  * @release: the release code to execute for this resource
  * @size:    size to alloc and return
  * @gfp:     GFP allocation flags
@@ -915,7 +915,7 @@ static void *spi_res_alloc(struct spi_device *spi, spi_res_release_t release,
 }
 
 /**
- * spi_res_free - free an spi resource
+ * spi_res_free - free an SPI resource
  * @res: pointer to the custom data of a resource
  */
 static void spi_res_free(void *res)
@@ -931,7 +931,7 @@ static void spi_res_free(void *res)
 
 /**
  * spi_res_add - add a spi_res to the spi_message
- * @message: the spi message
+ * @message: the SPI message
  * @res:     the spi_resource
  */
 static void spi_res_add(struct spi_message *message, void *res)
@@ -943,7 +943,7 @@ static void spi_res_add(struct spi_message *message, void *res)
 }
 
 /**
- * spi_res_release - release all spi resources for this message
+ * spi_res_release - release all SPI resources for this message
  * @ctlr:  the @spi_controller
  * @message: the @spi_message
  */
@@ -1424,7 +1424,7 @@ int spi_delay_to_ns(struct spi_delay *_delay, struct spi_transfer *xfer)
 			return -EINVAL;
 		/*
 		 * If there is unknown effective speed, approximate it
-		 * by underestimating with half of the requested hz.
+		 * by underestimating with half of the requested Hz.
 		 */
 		hz = xfer->effective_speed_hz ?: xfer->speed_hz / 2;
 		if (!hz)
@@ -1739,11 +1739,11 @@ static int __spi_pump_transfer_message(struct spi_controller *ctlr,
 }
 
 /**
- * __spi_pump_messages - function which processes spi message queue
+ * __spi_pump_messages - function which processes SPI message queue
  * @ctlr: controller to process queue for
  * @in_kthread: true if we are in the context of the message pump thread
  *
- * This function checks if there is any spi message in the queue that
+ * This function checks if there is any SPI message in the queue that
  * needs processing and if so call out to the driver to initialize hardware
  * and transfer each message.
  *
@@ -1758,7 +1758,7 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
 	unsigned long flags;
 	int ret;
 
-	/* Take the IO mutex */
+	/* Take the I/O mutex */
 	mutex_lock(&ctlr->io_mutex);
 
 	/* Lock queue */
@@ -2169,8 +2169,8 @@ static int __spi_queued_transfer(struct spi_device *spi,
 
 /**
  * spi_queued_transfer - transfer function for queued transfers
- * @spi: spi device which is requesting transfer
- * @msg: spi message which is to handled is queued to driver queue
+ * @spi: SPI device which is requesting transfer
+ * @msg: SPI message which is to handled is queued to driver queue
  *
  * Return: zero on success, else a negative error code.
  */
@@ -2496,7 +2496,7 @@ static int acpi_spi_count(struct acpi_resource *ares, void *data)
  * acpi_spi_count_resources - Count the number of SpiSerialBus resources
  * @adev:	ACPI device
  *
- * Returns the number of SpiSerialBus resources in the ACPI-device's
+ * Return: the number of SpiSerialBus resources in the ACPI-device's
  * resource-list; or a negative error code.
  */
 int acpi_spi_count_resources(struct acpi_device *adev)
@@ -2630,10 +2630,10 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data)
  * @adev: ACPI Device for the spi device
  * @index: Index of the spi resource inside the ACPI Node
  *
- * This should be used to allocate a new spi device from and ACPI Node.
- * The caller is responsible for calling spi_add_device to register the spi device.
+ * This should be used to allocate a new SPI device from and ACPI Device node.
+ * The caller is responsible for calling spi_add_device to register the SPI device.
  *
- * If ctlr is set to NULL, the Controller for the spi device will be looked up
+ * If ctlr is set to NULL, the Controller for the SPI device will be looked up
  * using the resource.
  * If index is set to -1, index is not used.
  * Note: If index is -1, ctlr must be set.
@@ -3331,7 +3331,8 @@ void spi_unregister_controller(struct spi_controller *ctlr)
 	if (IS_ENABLED(CONFIG_SPI_DYNAMIC))
 		mutex_unlock(&ctlr->add_lock);
 
-	/* Release the last reference on the controller if its driver
+	/*
+	 * Release the last reference on the controller if its driver
 	 * has not yet been converted to devm_spi_alloc_master/slave().
 	 */
 	if (!ctlr->devm_allocated)
@@ -3544,7 +3545,7 @@ static int __spi_split_transfer_maxsize(struct spi_controller *ctlr,
 
 	/* All the others need rx_buf/tx_buf also set */
 	for (i = 1, offset = maxsize; i < count; offset += maxsize, i++) {
-		/* Update rx_buf, tx_buf and dma */
+		/* Update rx_buf, tx_buf and DMA */
 		if (xfers[i].rx_buf)
 			xfers[i].rx_buf += offset;
 		if (xfers[i].rx_dma)
@@ -3614,7 +3615,7 @@ EXPORT_SYMBOL_GPL(spi_split_transfers_maxsize);
 
 
 /**
- * spi_split_transfers_maxwords - split spi transfers into multiple transfers
+ * spi_split_transfers_maxwords - split SPI transfers into multiple transfers
  *                                when an individual transfer exceeds a
  *                                certain number of SPI words
  * @ctlr:     the @spi_controller for this transfer
@@ -3657,7 +3658,8 @@ EXPORT_SYMBOL_GPL(spi_split_transfers_maxwords);
 
 /*-------------------------------------------------------------------------*/
 
-/* Core methods for SPI controller protocol drivers.  Some of the
+/*
+ * Core methods for SPI controller protocol drivers. Some of the
  * other core methods are currently defined as inline functions.
  */
 
@@ -3717,7 +3719,7 @@ static int spi_set_cs_timing(struct spi_device *spi)
  * changes those settings, and must be called from a context that can sleep.
  * Except for SPI_CS_HIGH, which takes effect immediately, the changes take
  * effect the next time the device is selected and data is transferred to
- * or from it.  When this function returns, the spi device is deselected.
+ * or from it.  When this function returns, the SPI device is deselected.
  *
  * Note that this call will fail if the protocol driver specifies an option
  * that the underlying controller or its driver does not support.  For
@@ -4057,7 +4059,7 @@ static int __spi_async(struct spi_device *spi, struct spi_message *message)
  * spi_async - asynchronous SPI transfer
  * @spi: device with which data will be exchanged
  * @message: describes the data transfers, including completion callback
- * Context: any (irqs may be blocked, etc)
+ * Context: any (IRQs may be blocked, etc)
  *
  * This call may be used in_irq and other contexts which can't sleep,
  * as well as from task contexts which can sleep.
@@ -4111,7 +4113,7 @@ EXPORT_SYMBOL_GPL(spi_async);
  * spi_async_locked - version of spi_async with exclusive bus usage
  * @spi: device with which data will be exchanged
  * @message: describes the data transfers, including completion callback
- * Context: any (irqs may be blocked, etc)
+ * Context: any (IRQs may be blocked, etc)
  *
  * This call may be used in_irq and other contexts which can't sleep,
  * as well as from task contexts which can sleep.
@@ -4374,9 +4376,9 @@ static u8	*buf;
 /**
  * spi_write_then_read - SPI synchronous write followed by read
  * @spi: device with which data will be exchanged
- * @txbuf: data to be written (need not be dma-safe)
+ * @txbuf: data to be written (need not be DMA-safe)
  * @n_tx: size of txbuf, in bytes
- * @rxbuf: buffer into which data will be read (need not be dma-safe)
+ * @rxbuf: buffer into which data will be read (need not be DMA-safe)
  * @n_rx: size of rxbuf, in bytes
  * Context: can sleep
  *
@@ -4387,7 +4389,7 @@ static u8	*buf;
  *
  * Parameters to this routine are always copied using a small buffer.
  * Performance-sensitive or bulk transfer code should instead use
- * spi_{async,sync}() calls with dma-safe buffers.
+ * spi_{async,sync}() calls with DMA-safe buffers.
  *
  * Return: zero on success, else a negative error code.
  */
@@ -4432,7 +4434,7 @@ int spi_write_then_read(struct spi_device *spi,
 	x[0].tx_buf = local_buf;
 	x[1].rx_buf = local_buf + n_tx;
 
-	/* Do the i/o */
+	/* Do the I/O */
 	status = spi_sync(spi, &message);
 	if (status == 0)
 		memcpy(rxbuf, x[1].rx_buf, n_rx);
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 39a2e6108694..04daf61dfd3f 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -36,7 +36,7 @@ extern struct bus_type spi_bus_type;
 
 /**
  * struct spi_statistics - statistics for spi transfers
- * @syncp:         seqcount to protect members in this struct for per-cpu udate
+ * @syncp:         seqcount to protect members in this struct for per-cpu update
  *                 on 32-bit systems
  *
  * @messages:      number of spi-messages handled
@@ -55,7 +55,7 @@ extern struct bus_type spi_bus_type;
  * @bytes_rx:      number of bytes received from device
  *
  * @transfer_bytes_histo:
- *                 transfer bytes histogramm
+ *                 transfer bytes histogram
  *
  * @transfers_split_maxsize:
  *                 number of transfers that have been split because of
@@ -156,7 +156,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg,
  *	the device will bind to the named driver and only the named driver.
  *	Do not set directly, because core frees it; use driver_set_override() to
  *	set or clear it.
- * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when
+ * @cs_gpiod: GPIO descriptor of the chipselect line (optional, NULL when
  *	not using a GPIO line)
  * @word_delay: delay to be inserted between consecutive
  *	words of a transfer
@@ -212,7 +212,7 @@ struct spi_device {
 	void			*controller_data;
 	char			modalias[SPI_NAME_SIZE];
 	const char		*driver_override;
-	struct gpio_desc	*cs_gpiod;	/* Chip select gpio desc */
+	struct gpio_desc	*cs_gpiod;	/* Chip select GPIO descriptor */
 	struct spi_delay	word_delay; /* Inter-word delay */
 	/* CS delays */
 	struct spi_delay	cs_setup;
@@ -223,7 +223,7 @@ struct spi_device {
 	struct spi_statistics __percpu	*pcpu_statistics;
 
 	/*
-	 * likely need more hooks for more protocol options affecting how
+	 * Likely need more hooks for more protocol options affecting how
 	 * the controller talks to each chip, like:
 	 *  - memory packing (12 bit samples into low bits, others zeroed)
 	 *  - priority
@@ -299,11 +299,11 @@ static inline void spi_set_csgpiod(struct spi_device *spi, u8 idx, struct gpio_d
 /**
  * struct spi_driver - Host side "protocol" driver
  * @id_table: List of SPI devices supported by this driver
- * @probe: Binds this driver to the spi device.  Drivers can verify
+ * @probe: Binds this driver to the SPI device.  Drivers can verify
  *	that the device is actually present, and may need to configure
  *	characteristics (such as bits_per_word) which weren't needed for
  *	the initial configuration done during system setup.
- * @remove: Unbinds this driver from the spi device
+ * @remove: Unbinds this driver from the SPI device
  * @shutdown: Standard shutdown callback used during system state
  *	transitions such as powerdown/halt and kexec
  * @driver: SPI device drivers should initialize the name and owner
@@ -415,7 +415,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
  * @queued: whether this controller is providing an internal message queue
  * @kworker: pointer to thread struct for message pump
  * @pump_messages: work struct for scheduling work to the message pump
- * @queue_lock: spinlock to syncronise access to message queue
+ * @queue_lock: spinlock to synchronise access to message queue
  * @queue: message queue
  * @cur_msg: the currently in-flight message
  * @cur_msg_completion: a completion for the current in-flight message
@@ -473,7 +473,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
  * @unprepare_message: undo any work done by prepare_message().
  * @slave_abort: abort the ongoing transfer request on an SPI slave controller
  * @target_abort: abort the ongoing transfer request on an SPI target controller
- * @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS
+ * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS
  *	number. Any individual value may be NULL for CS lines that
  *	are not GPIOs (driven by the SPI controller itself).
  * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab
@@ -500,7 +500,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
  *	If the driver does not set this, the SPI core takes the snapshot as
  *	close to the driver hand-over as possible.
  * @irq_flags: Interrupt enable state during PTP system timestamping
- * @fallback: fallback to pio if dma transfer return failure with
+ * @fallback: fallback to PIO if DMA transfer return failure with
  *	SPI_TRANS_FAIL_NO_START.
  * @queue_empty: signal green light for opportunistically skipping the queue
  *	for spi_sync transfers.
@@ -522,15 +522,17 @@ struct spi_controller {
 
 	struct list_head list;
 
-	/* Other than negative (== assign one dynamically), bus_num is fully
-	 * board-specific.  usually that simplifies to being SOC-specific.
-	 * example:  one SOC has three SPI controllers, numbered 0..2,
-	 * and one board's schematics might show it using SPI-2.  software
+	/*
+	 * Other than negative (== assign one dynamically), bus_num is fully
+	 * board-specific. Usually that simplifies to being SoC-specific.
+	 * example: one SoC has three SPI controllers, numbered 0..2,
+	 * and one board's schematics might show it using SPI-2. Software
 	 * would normally use bus_num=2 for that controller.
 	 */
 	s16			bus_num;
 
-	/* chipselects will be integral to many controllers; some others
+	/*
+	 * Chipselects will be integral to many controllers; some others
 	 * might use board-specific GPIOs.
 	 */
 	u16			num_chipselect;
@@ -575,8 +577,8 @@ struct spi_controller {
 	};
 
 	/*
-	 * on some hardware transfer / message size may be constrained
-	 * the limit may depend on device transfer settings
+	 * On some hardware transfer / message size may be constrained
+	 * the limit may depend on device transfer settings.
 	 */
 	size_t (*max_transfer_size)(struct spi_device *spi);
 	size_t (*max_message_size)(struct spi_device *spi);
@@ -594,7 +596,8 @@ struct spi_controller {
 	/* Flag indicating that the SPI bus is locked for exclusive use */
 	bool			bus_lock_flag;
 
-	/* Setup mode and clock, etc (spi driver may call many times).
+	/*
+	 * Setup mode and clock, etc (SPI driver may call many times).
 	 *
 	 * IMPORTANT:  this may be called when transfers to another
 	 * device are active.  DO NOT UPDATE SHARED REGISTERS in ways
@@ -612,18 +615,19 @@ struct spi_controller {
 	 */
 	int (*set_cs_timing)(struct spi_device *spi);
 
-	/* Bidirectional bulk transfers
+	/*
+	 * Bidirectional bulk transfers
 	 *
 	 * + The transfer() method may not sleep; its main role is
 	 *   just to add the message to the queue.
 	 * + For now there's no remove-from-queue operation, or
 	 *   any other request management
-	 * + To a given spi_device, message queueing is pure fifo
+	 * + To a given spi_device, message queueing is pure FIFO
 	 *
 	 * + The controller's main job is to process its message queue,
 	 *   selecting a chip (for masters), then transferring data
 	 * + If there are multiple spi_device children, the i/o queue
-	 *   arbitration algorithm is unspecified (round robin, fifo,
+	 *   arbitration algorithm is unspecified (round robin, FIFO,
 	 *   priority, reservations, preemption, etc)
 	 *
 	 * + Chipselect stays active during the entire message
@@ -704,7 +708,7 @@ struct spi_controller {
 	const struct spi_controller_mem_ops *mem_ops;
 	const struct spi_controller_mem_caps *mem_caps;
 
-	/* gpio chip select */
+	/* GPIO chip select */
 	struct gpio_desc	**cs_gpiods;
 	bool			use_gpio_descriptors;
 	s8			unused_native_cs;
@@ -788,7 +792,7 @@ void spi_take_timestamp_post(struct spi_controller *ctlr,
 			     struct spi_transfer *xfer,
 			     size_t progress, bool irqs_off);
 
-/* The spi driver core manages memory for the spi_controller classdev */
+/* The SPI driver core manages memory for the spi_controller classdev */
 extern struct spi_controller *__spi_alloc_controller(struct device *host,
 						unsigned int size, bool slave);
 
@@ -877,13 +881,13 @@ typedef void (*spi_res_release_t)(struct spi_controller *ctlr,
 				  void *res);
 
 /**
- * struct spi_res - spi resource management structure
+ * struct spi_res - SPI resource management structure
  * @entry:   list entry
  * @release: release code called prior to freeing this resource
  * @data:    extra data allocated for the specific use-case
  *
- * this is based on ideas from devres, but focused on life-cycle
- * management during spi_message processing
+ * This is based on ideas from devres, but focused on life-cycle
+ * management during spi_message processing.
  */
 struct spi_res {
 	struct list_head        entry;
@@ -901,7 +905,7 @@ struct spi_res {
  *
  * The spi_messages themselves consist of a series of read+write transfer
  * segments.  Those segments always read the same number of bits as they
- * write; but one or the other is easily ignored by passing a null buffer
+ * write; but one or the other is easily ignored by passing a NULL buffer
  * pointer.  (This is unlike most types of I/O API, because SPI hardware
  * is full duplex.)
  *
@@ -912,8 +916,8 @@ struct spi_res {
 
 /**
  * struct spi_transfer - a read/write buffer pair
- * @tx_buf: data to be written (dma-safe memory), or NULL
- * @rx_buf: data to be read (dma-safe memory), or NULL
+ * @tx_buf: data to be written (DMA-safe memory), or NULL
+ * @rx_buf: data to be read (DMA-safe memory), or NULL
  * @tx_dma: DMA address of tx_buf, if @spi_message.is_dma_mapped
  * @rx_dma: DMA address of rx_buf, if @spi_message.is_dma_mapped
  * @tx_nbits: number of bits used for writing. If 0 the default
@@ -936,7 +940,7 @@ struct spi_res {
  * @word_delay: inter word delay to be introduced after each word size
  *	(set by bits_per_word) transmission.
  * @effective_speed_hz: the effective SCK-speed that was used to
- *      transfer this transfer. Set to 0 if the spi bus driver does
+ *      transfer this transfer. Set to 0 if the SPI bus driver does
  *      not support it.
  * @transfer_list: transfers are sequenced through @spi_message.transfers
  * @tx_sg: Scatterlist for transmit, currently not for client use
@@ -965,16 +969,16 @@ struct spi_res {
  *	transmitting the "pre" word, and the "post" timestamp after receiving
  *	transmit confirmation from the controller for the "post" word.
  * @timestamped: true if the transfer has been timestamped
- * @error: Error status logged by spi controller driver.
+ * @error: Error status logged by SPI controller driver.
  *
  * SPI transfers always write the same number of bytes as they read.
  * Protocol drivers should always provide @rx_buf and/or @tx_buf.
  * In some cases, they may also want to provide DMA addresses for
  * the data being transferred; that may reduce overhead, when the
- * underlying driver uses dma.
+ * underlying driver uses DMA.
  *
- * If the transmit buffer is null, zeroes will be shifted out
- * while filling @rx_buf.  If the receive buffer is null, the data
+ * If the transmit buffer is NULL, zeroes will be shifted out
+ * while filling @rx_buf.  If the receive buffer is NULL, the data
  * shifted in will be discarded.  Only "len" bytes shift out (or in).
  * It's an error to try to shift out a partial word.  (For example, by
  * shifting out three bytes with word size of sixteen or twenty bits;
@@ -1008,7 +1012,7 @@ struct spi_res {
  * Some devices need protocol transactions to be built from a series of
  * spi_message submissions, where the content of one message is determined
  * by the results of previous messages and where the whole transaction
- * ends when the chipselect goes intactive.
+ * ends when the chipselect goes inactive.
  *
  * When SPI can transfer in 1x,2x or 4x. It can get this transfer information
  * from device through @tx_nbits and @rx_nbits. In Bi-direction, these
@@ -1022,10 +1026,11 @@ struct spi_res {
  * and its transfers, ignore them until its completion callback.
  */
 struct spi_transfer {
-	/* It's ok if tx_buf == rx_buf (right?)
-	 * for MicroWire, one buffer must be null
-	 * buffers must work with dma_*map_single() calls, unless
-	 *   spi_message.is_dma_mapped reports a pre-existing mapping
+	/*
+	 * It's okay if tx_buf == rx_buf (right?).
+	 * For MicroWire, one buffer must be NULL.
+	 * Buffers must work with dma_*map_single() calls, unless
+	 * spi_message.is_dma_mapped reports a pre-existing mapping.
 	 */
 	const void	*tx_buf;
 	void		*rx_buf;
@@ -1045,9 +1050,9 @@ struct spi_transfer {
 	unsigned	tx_nbits:3;
 	unsigned	rx_nbits:3;
 	unsigned	timestamped:1;
-#define	SPI_NBITS_SINGLE	0x01 /* 1bit transfer */
-#define	SPI_NBITS_DUAL		0x02 /* 2bits transfer */
-#define	SPI_NBITS_QUAD		0x04 /* 4bits transfer */
+#define	SPI_NBITS_SINGLE	0x01 /* 1-bit transfer */
+#define	SPI_NBITS_DUAL		0x02 /* 2-bit transfer */
+#define	SPI_NBITS_QUAD		0x04 /* 4-bit transfer */
 	u8		bits_per_word;
 	struct spi_delay	delay;
 	struct spi_delay	cs_change_delay;
@@ -1068,7 +1073,7 @@ struct spi_transfer {
  * struct spi_message - one multi-segment SPI transaction
  * @transfers: list of transfer segments in this transaction
  * @spi: SPI device to which the transaction is queued
- * @is_dma_mapped: if true, the caller provided both dma and cpu virtual
+ * @is_dma_mapped: if true, the caller provided both DMA and CPU virtual
  *	addresses for each transfer buffer
  * @complete: called to report transaction completions
  * @context: the argument to complete() when it's called
@@ -1078,7 +1083,7 @@ struct spi_transfer {
  * @status: zero for success, else negative errno
  * @queue: for use by whichever driver currently owns the message
  * @state: for use by whichever driver currently owns the message
- * @resources: for resource management when the spi message is processed
+ * @resources: for resource management when the SPI message is processed
  * @prepared: spi_prepare_message was called for the this message
  *
  * A @spi_message is used to execute an atomic sequence of data transfers,
@@ -1105,7 +1110,8 @@ struct spi_message {
 	/* spi_prepare_message() was called for this message */
 	bool			prepared;
 
-	/* REVISIT:  we might want a flag affecting the behavior of the
+	/*
+	 * REVISIT: we might want a flag affecting the behavior of the
 	 * last transfer ... allowing things like "read 16 bit length L"
 	 * immediately followed by "read L bytes".  Basically imposing
 	 * a specific message scheduling algorithm.
@@ -1123,14 +1129,15 @@ struct spi_message {
 	unsigned		frame_length;
 	unsigned		actual_length;
 
-	/* For optional use by whatever driver currently owns the
+	/*
+	 * For optional use by whatever driver currently owns the
 	 * spi_message ...  between calls to spi_async and then later
 	 * complete(), that's the spi_controller controller driver.
 	 */
 	struct list_head	queue;
 	void			*state;
 
-	/* List of spi_res reources when the spi message is processed */
+	/* List of spi_res resources when the SPI message is processed */
 	struct list_head        resources;
 };
 
@@ -1167,7 +1174,7 @@ spi_transfer_delay_exec(struct spi_transfer *t)
 /**
  * spi_message_init_with_transfers - Initialize spi_message and append transfers
  * @m: spi_message to be initialized
- * @xfers: An array of spi transfers
+ * @xfers: An array of SPI transfers
  * @num_xfers: Number of items in the xfer array
  *
  * This function initializes the given spi_message and adds each spi_transfer in
@@ -1184,10 +1191,10 @@ struct spi_transfer *xfers, unsigned int num_xfers)
 		spi_message_add_tail(&xfers[i], m);
 }
 
-/* It's fine to embed message and transaction structures in other data
+/*
+ * It's fine to embed message and transaction structures in other data
  * structures so long as you don't free them while they're in use.
  */
-
 static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags)
 {
 	struct spi_message *m;
@@ -1290,7 +1297,7 @@ typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr,
  *                                 replacements that have occurred
  *                                 so that they can get reverted
  * @release:            some extra release code to get executed prior to
- *                      relasing this structure
+ *                      releasing this structure
  * @extradata:          pointer to some extra data if requested or NULL
  * @replaced_transfers: transfers that have been replaced and which need
  *                      to get restored
@@ -1300,9 +1307,9 @@ typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr,
  * @inserted_transfers: array of spi_transfers of array-size @inserted,
  *                      that have been replacing replaced_transfers
  *
- * note: that @extradata will point to @inserted_transfers[@inserted]
+ * Note: that @extradata will point to @inserted_transfers[@inserted]
  * if some extra allocation is requested, so alignment will be the same
- * as for spi_transfers
+ * as for spi_transfers.
  */
 struct spi_replaced_transfers {
 	spi_replaced_release_t release;
@@ -1328,7 +1335,8 @@ extern int spi_split_transfers_maxwords(struct spi_controller *ctlr,
 
 /*---------------------------------------------------------------------------*/
 
-/* All these synchronous SPI transfer routines are utilities layered
+/*
+ * All these synchronous SPI transfer routines are utilities layered
  * over the core async transfer primitive.  Here, "synchronous" means
  * they will sleep uninterruptibly until the async transfer completes.
  */
@@ -1471,7 +1479,7 @@ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd)
  *
  * Callable only from contexts that can sleep.
  *
- * Return: the (unsigned) sixteen bit number returned by the device in cpu
+ * Return: the (unsigned) sixteen bit number returned by the device in CPU
  * endianness, or else a negative error code.
  */
 static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd)
@@ -1499,7 +1507,7 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd)
  * As a rule, SPI devices can't be probed.  Instead, board init code
  * provides a table listing the devices which are present, with enough
  * information to bind and set up the device's driver.  There's basic
- * support for nonstatic configurations too; enough to handle adding
+ * support for non-static configurations too; enough to handle adding
  * parport adapters, or microcontrollers acting as USB-to-SPI bridges.
  */
 
@@ -1536,12 +1544,13 @@ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd)
  * are active in some dynamic board configuration models.
  */
 struct spi_board_info {
-	/* The device name and module name are coupled, like platform_bus;
+	/*
+	 * The device name and module name are coupled, like platform_bus;
 	 * "modalias" is normally the driver name.
 	 *
 	 * platform_data goes to spi_device.dev.platform_data,
 	 * controller_data goes to spi_device.controller_data,
-	 * irq is copied too
+	 * IRQ is copied too.
 	 */
 	char		modalias[SPI_NAME_SIZE];
 	const void	*platform_data;
@@ -1553,7 +1562,8 @@ struct spi_board_info {
 	u32		max_speed_hz;
 
 
-	/* bus_num is board specific and matches the bus_num of some
+	/*
+	 * bus_num is board specific and matches the bus_num of some
 	 * spi_controller that will probably be registered later.
 	 *
 	 * chip_select reflects how this chip is wired to that master;
@@ -1562,12 +1572,14 @@ struct spi_board_info {
 	u16		bus_num;
 	u16		chip_select;
 
-	/* mode becomes spi_device.mode, and is essential for chips
+	/*
+	 * mode becomes spi_device.mode, and is essential for chips
 	 * where the default of SPI_CS_HIGH = 0 is wrong.
 	 */
 	u32		mode;
 
-	/* ... may need additional spi_device chip config data here.
+	/*
+	 * ... may need additional spi_device chip config data here.
 	 * avoid stuff protocol drivers can set; but include stuff
 	 * needed to behave without being bound to a driver:
 	 *  - quirks like clock rate mattering when not selected
@@ -1584,7 +1596,8 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
 	{ return 0; }
 #endif
 
-/* If you're hotplugging an adapter with devices (parport, usb, etc)
+/*
+ * If you're hotplugging an adapter with devices (parport, USB, etc)
  * use spi_new_device() to describe each device.  You can also call
  * spi_unregister_device() to start making that device vanish, but
  * normally that would be handled by spi_unregister_controller().
diff --git a/include/trace/events/spi.h b/include/trace/events/spi.h
index c0248a8fa79c..e63d4a24d879 100644
--- a/include/trace/events/spi.h
+++ b/include/trace/events/spi.h
@@ -167,7 +167,7 @@ TRACE_EVENT(spi_message_done,
 );
 
 /*
- * consider a buffer valid if non-NULL and if it doesn't match the dummy buffer
+ * Consider a buffer valid if non-NULL and if it doesn't match the dummy buffer
  * that only exist to work with controllers that have SPI_CONTROLLER_MUST_TX or
  * SPI_CONTROLLER_MUST_RX.
  */

From 06a0213977dc5f8345c1222183b8fafac1a8a524 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@sifive.com>
Date: Tue, 11 Jul 2023 18:16:03 +0200
Subject: [PATCH 0097/2047] Non-functional cleanup of a "__user * filename"

The next patch defines a very similar interface, which I copied from
this definition.  Since I'm touching it anyway I don't see any reason
not to just go fix this one up.

Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Message-Id: <ef644540cfd8717f30bcc5e4c32f06c80b6c156e.1689092120.git.legion@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 03e3d0121d5e..584f404bf868 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -438,7 +438,7 @@ asmlinkage long sys_chdir(const char __user *filename);
 asmlinkage long sys_fchdir(unsigned int fd);
 asmlinkage long sys_chroot(const char __user *filename);
 asmlinkage long sys_fchmod(unsigned int fd, umode_t mode);
-asmlinkage long sys_fchmodat(int dfd, const char __user * filename,
+asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
 			     umode_t mode);
 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 			     gid_t group, int flag);

From 5d37a1198068b099de47073411efc087d1b555ca Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 5 Jul 2023 14:17:19 -0700
Subject: [PATCH 0098/2047] fsverity: explicitly check that there is no
 algorithm 0

Since libfsverity and some other code would break if 0 is ever allocated
as an FS_VERITY_HASH_ALG_* value, make fsverity_check_hash_algs()
explicitly check that there is no algorithm 0.

Link: https://lore.kernel.org/r/20230705211719.37713-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/verity/hash_algs.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c
index c598d2035476..6b08b1d9a7d7 100644
--- a/fs/verity/hash_algs.c
+++ b/fs/verity/hash_algs.c
@@ -226,6 +226,14 @@ void __init fsverity_check_hash_algs(void)
 		if (!alg->name)
 			continue;
 
+		/*
+		 * 0 must never be allocated as an FS_VERITY_HASH_ALG_* value,
+		 * as it is reserved for users that use 0 to mean unspecified or
+		 * a default value.  fs/verity/ itself doesn't care and doesn't
+		 * have a default algorithm, but some users make use of this.
+		 */
+		BUG_ON(i == 0);
+
 		BUG_ON(alg->digest_size > FS_VERITY_MAX_DIGEST_SIZE);
 
 		/*

From e77000ccc531088c486fe5fbd13416fd5e3d2714 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 5 Jul 2023 14:27:42 -0700
Subject: [PATCH 0099/2047] fsverity: simplify handling of errors during
 initcall

Since CONFIG_FS_VERITY is a bool, not a tristate, fs/verity/ can only be
builtin or absent entirely; it can't be a loadable module.  Therefore,
the error code that gets returned from the fsverity_init() initcall is
never used.  If any part of the initcall does fail, which should never
happen, the kernel will be left in a bad state.

Following the usual convention for builtin code, just panic the kernel
if any of part of the initcall fails.

Link: https://lore.kernel.org/r/20230705212743.42180-2-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/verity/fsverity_private.h | 11 ++++------
 fs/verity/init.c             | 24 +++------------------
 fs/verity/open.c             | 18 +++++-----------
 fs/verity/signature.c        | 42 ++++++++++++------------------------
 fs/verity/verify.c           | 11 ++--------
 5 files changed, 28 insertions(+), 78 deletions(-)

diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index 49bf3a1eb2a0..c5ab9023dd2d 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -118,8 +118,7 @@ void fsverity_free_info(struct fsverity_info *vi);
 int fsverity_get_descriptor(struct inode *inode,
 			    struct fsverity_descriptor **desc_ret);
 
-int __init fsverity_init_info_cache(void);
-void __init fsverity_exit_info_cache(void);
+void __init fsverity_init_info_cache(void);
 
 /* signature.c */
 
@@ -127,7 +126,7 @@ void __init fsverity_exit_info_cache(void);
 int fsverity_verify_signature(const struct fsverity_info *vi,
 			      const u8 *signature, size_t sig_size);
 
-int __init fsverity_init_signature(void);
+void __init fsverity_init_signature(void);
 #else /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
 static inline int
 fsverity_verify_signature(const struct fsverity_info *vi,
@@ -136,15 +135,13 @@ fsverity_verify_signature(const struct fsverity_info *vi,
 	return 0;
 }
 
-static inline int fsverity_init_signature(void)
+static inline void fsverity_init_signature(void)
 {
-	return 0;
 }
 #endif /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
 
 /* verify.c */
 
-int __init fsverity_init_workqueue(void);
-void __init fsverity_exit_workqueue(void);
+void __init fsverity_init_workqueue(void);
 
 #endif /* _FSVERITY_PRIVATE_H */
diff --git a/fs/verity/init.c b/fs/verity/init.c
index 023905151035..bcd11d63eb1c 100644
--- a/fs/verity/init.c
+++ b/fs/verity/init.c
@@ -33,28 +33,10 @@ void fsverity_msg(const struct inode *inode, const char *level,
 
 static int __init fsverity_init(void)
 {
-	int err;
-
 	fsverity_check_hash_algs();
-
-	err = fsverity_init_info_cache();
-	if (err)
-		return err;
-
-	err = fsverity_init_workqueue();
-	if (err)
-		goto err_exit_info_cache;
-
-	err = fsverity_init_signature();
-	if (err)
-		goto err_exit_workqueue;
-
+	fsverity_init_info_cache();
+	fsverity_init_workqueue();
+	fsverity_init_signature();
 	return 0;
-
-err_exit_workqueue:
-	fsverity_exit_workqueue();
-err_exit_info_cache:
-	fsverity_exit_info_cache();
-	return err;
 }
 late_initcall(fsverity_init)
diff --git a/fs/verity/open.c b/fs/verity/open.c
index 1db5106a9c38..6c31a871b84b 100644
--- a/fs/verity/open.c
+++ b/fs/verity/open.c
@@ -408,18 +408,10 @@ void __fsverity_cleanup_inode(struct inode *inode)
 }
 EXPORT_SYMBOL_GPL(__fsverity_cleanup_inode);
 
-int __init fsverity_init_info_cache(void)
+void __init fsverity_init_info_cache(void)
 {
-	fsverity_info_cachep = KMEM_CACHE_USERCOPY(fsverity_info,
-						   SLAB_RECLAIM_ACCOUNT,
-						   file_digest);
-	if (!fsverity_info_cachep)
-		return -ENOMEM;
-	return 0;
-}
-
-void __init fsverity_exit_info_cache(void)
-{
-	kmem_cache_destroy(fsverity_info_cachep);
-	fsverity_info_cachep = NULL;
+	fsverity_info_cachep = KMEM_CACHE_USERCOPY(
+					fsverity_info,
+					SLAB_RECLAIM_ACCOUNT | SLAB_PANIC,
+					file_digest);
 }
diff --git a/fs/verity/signature.c b/fs/verity/signature.c
index 72034bc71c9d..ec75ffec069e 100644
--- a/fs/verity/signature.c
+++ b/fs/verity/signature.c
@@ -109,43 +109,29 @@ static struct ctl_table fsverity_sysctl_table[] = {
 	{ }
 };
 
-static int __init fsverity_sysctl_init(void)
+static void __init fsverity_sysctl_init(void)
 {
-	fsverity_sysctl_header = register_sysctl("fs/verity", fsverity_sysctl_table);
-	if (!fsverity_sysctl_header) {
-		pr_err("sysctl registration failed!\n");
-		return -ENOMEM;
-	}
-	return 0;
+	fsverity_sysctl_header = register_sysctl("fs/verity",
+						 fsverity_sysctl_table);
+	if (!fsverity_sysctl_header)
+		panic("fsverity sysctl registration failed");
 }
 #else /* !CONFIG_SYSCTL */
-static inline int __init fsverity_sysctl_init(void)
+static inline void fsverity_sysctl_init(void)
 {
-	return 0;
 }
 #endif /* !CONFIG_SYSCTL */
 
-int __init fsverity_init_signature(void)
+void __init fsverity_init_signature(void)
 {
-	struct key *ring;
-	int err;
-
-	ring = keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0),
-			     current_cred(), KEY_POS_SEARCH |
+	fsverity_keyring =
+		keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0),
+			      current_cred(), KEY_POS_SEARCH |
 				KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE |
 				KEY_USR_SEARCH | KEY_USR_SETATTR,
-			     KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
-	if (IS_ERR(ring))
-		return PTR_ERR(ring);
+			      KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+	if (IS_ERR(fsverity_keyring))
+		panic("failed to allocate \".fs-verity\" keyring");
 
-	err = fsverity_sysctl_init();
-	if (err)
-		goto err_put_ring;
-
-	fsverity_keyring = ring;
-	return 0;
-
-err_put_ring:
-	key_put(ring);
-	return err;
+	fsverity_sysctl_init();
 }
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
index 433cef51f5f6..904ccd7e8e16 100644
--- a/fs/verity/verify.c
+++ b/fs/verity/verify.c
@@ -346,7 +346,7 @@ void fsverity_enqueue_verify_work(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
 
-int __init fsverity_init_workqueue(void)
+void __init fsverity_init_workqueue(void)
 {
 	/*
 	 * Use a high-priority workqueue to prioritize verification work, which
@@ -360,12 +360,5 @@ int __init fsverity_init_workqueue(void)
 						  WQ_HIGHPRI,
 						  num_online_cpus());
 	if (!fsverity_read_workqueue)
-		return -ENOMEM;
-	return 0;
-}
-
-void __init fsverity_exit_workqueue(void)
-{
-	destroy_workqueue(fsverity_read_workqueue);
-	fsverity_read_workqueue = NULL;
+		panic("failed to allocate fsverity_read_queue");
 }

From 324718ddddc40905f0216062dfbb977809184c06 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 29 Jun 2023 23:48:10 -0700
Subject: [PATCH 0100/2047] fscrypt: improve the "Encryption modes and usage"
 section

As the number of supported encryption modes has grown, the part of the
"Encryption modes and usage" section that describes the supported
encryption modes has gotten a bit messy.  It presents useful
information, but it's a bit lacking in high-level context.

Rework the section to hopefully be much more useful.

Link: https://lore.kernel.org/r/20230630064811.22569-2-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 Documentation/filesystems/fscrypt.rst | 152 +++++++++++++++++++-------
 1 file changed, 113 insertions(+), 39 deletions(-)

diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index eccd327e6df5..a624e92f2687 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -332,54 +332,121 @@ Encryption modes and usage
 fscrypt allows one encryption mode to be specified for file contents
 and one encryption mode to be specified for filenames.  Different
 directory trees are permitted to use different encryption modes.
+
+Supported modes
+---------------
+
 Currently, the following pairs of encryption modes are supported:
 
 - AES-256-XTS for contents and AES-256-CTS-CBC for filenames
-- AES-128-CBC for contents and AES-128-CTS-CBC for filenames
+- AES-256-XTS for contents and AES-256-HCTR2 for filenames
 - Adiantum for both contents and filenames
-- AES-256-XTS for contents and AES-256-HCTR2 for filenames (v2 policies only)
-- SM4-XTS for contents and SM4-CTS-CBC for filenames (v2 policies only)
+- AES-128-CBC-ESSIV for contents and AES-128-CTS-CBC for filenames
+- SM4-XTS for contents and SM4-CTS-CBC for filenames
 
-If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
+Authenticated encryption modes are not currently supported because of
+the difficulty of dealing with ciphertext expansion.  Therefore,
+contents encryption uses a block cipher in `XTS mode
+<https://en.wikipedia.org/wiki/Disk_encryption_theory#XTS>`_ or
+`CBC-ESSIV mode
+<https://en.wikipedia.org/wiki/Disk_encryption_theory#Encrypted_salt-sector_initialization_vector_(ESSIV)>`_,
+or a wide-block cipher.  Filenames encryption uses a
+block cipher in `CTS-CBC mode
+<https://en.wikipedia.org/wiki/Ciphertext_stealing>`_ or a wide-block
+cipher.
 
-AES-128-CBC was added only for low-powered embedded devices with
-crypto accelerators such as CAAM or CESA that do not support XTS.  To
-use AES-128-CBC, CONFIG_CRYPTO_ESSIV and CONFIG_CRYPTO_SHA256 (or
-another SHA-256 implementation) must be enabled so that ESSIV can be
-used.
+The (AES-256-XTS, AES-256-CTS-CBC) pair is the recommended default.
+It is also the only option that is *guaranteed* to always be supported
+if the kernel supports fscrypt at all; see `Kernel config options`_.
 
-Adiantum is a (primarily) stream cipher-based mode that is fast even
-on CPUs without dedicated crypto instructions.  It's also a true
-wide-block mode, unlike XTS.  It can also eliminate the need to derive
-per-file encryption keys.  However, it depends on the security of two
-primitives, XChaCha12 and AES-256, rather than just one.  See the
-paper "Adiantum: length-preserving encryption for entry-level
-processors" (https://eprint.iacr.org/2018/720.pdf) for more details.
-To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled.  Also, fast
-implementations of ChaCha and NHPoly1305 should be enabled, e.g.
-CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM.
+The (AES-256-XTS, AES-256-HCTR2) pair is also a good choice that
+upgrades the filenames encryption to use a wide-block cipher.  (A
+*wide-block cipher*, also called a tweakable super-pseudorandom
+permutation, has the property that changing one bit scrambles the
+entire result.)  As described in `Filenames encryption`_, a wide-block
+cipher is the ideal mode for the problem domain, though CTS-CBC is the
+"least bad" choice among the alternatives.  For more information about
+HCTR2, see `the HCTR2 paper <https://eprint.iacr.org/2021/1441.pdf>`_.
 
-AES-256-HCTR2 is another true wide-block encryption mode that is intended for
-use on CPUs with dedicated crypto instructions.  AES-256-HCTR2 has the property
-that a bitflip in the plaintext changes the entire ciphertext.  This property
-makes it desirable for filename encryption since initialization vectors are
-reused within a directory.  For more details on AES-256-HCTR2, see the paper
-"Length-preserving encryption with HCTR2"
-(https://eprint.iacr.org/2021/1441.pdf).  To use AES-256-HCTR2,
-CONFIG_CRYPTO_HCTR2 must be enabled.  Also, fast implementations of XCTR and
-POLYVAL should be enabled, e.g. CRYPTO_POLYVAL_ARM64_CE and
-CRYPTO_AES_ARM64_CE_BLK for ARM64.
+Adiantum is recommended on systems where AES is too slow due to lack
+of hardware acceleration for AES.  Adiantum is a wide-block cipher
+that uses XChaCha12 and AES-256 as its underlying components.  Most of
+the work is done by XChaCha12, which is much faster than AES when AES
+acceleration is unavailable.  For more information about Adiantum, see
+`the Adiantum paper <https://eprint.iacr.org/2018/720.pdf>`_.
 
-SM4 is a Chinese block cipher that is an alternative to AES.  It has
-not seen as much security review as AES, and it only has a 128-bit key
-size.  It may be useful in cases where its use is mandated.
-Otherwise, it should not be used.  For SM4 support to be available, it
-also needs to be enabled in the kernel crypto API.
+The (AES-128-CBC-ESSIV, AES-128-CTS-CBC) pair exists only to support
+systems whose only form of AES acceleration is an off-CPU crypto
+accelerator such as CAAM or CESA that does not support XTS.
 
-New encryption modes can be added relatively easily, without changes
-to individual filesystems.  However, authenticated encryption (AE)
-modes are not currently supported because of the difficulty of dealing
-with ciphertext expansion.
+The remaining mode pairs are the "national pride ciphers":
+
+- (SM4-XTS, SM4-CTS-CBC)
+
+Generally speaking, these ciphers aren't "bad" per se, but they
+receive limited security review compared to the usual choices such as
+AES and ChaCha.  They also don't bring much new to the table.  It is
+suggested to only use these ciphers where their use is mandated.
+
+Kernel config options
+---------------------
+
+Enabling fscrypt support (CONFIG_FS_ENCRYPTION) automatically pulls in
+only the basic support from the crypto API needed to use AES-256-XTS
+and AES-256-CTS-CBC encryption.  For optimal performance, it is
+strongly recommended to also enable any available platform-specific
+kconfig options that provide acceleration for the algorithm(s) you
+wish to use.  Support for any "non-default" encryption modes typically
+requires extra kconfig options as well.
+
+Below, some relevant options are listed by encryption mode.  Note,
+acceleration options not listed below may be available for your
+platform; refer to the kconfig menus.  File contents encryption can
+also be configured to use inline encryption hardware instead of the
+kernel crypto API (see `Inline encryption support`_); in that case,
+the file contents mode doesn't need to supported in the kernel crypto
+API, but the filenames mode still does.
+
+- AES-256-XTS and AES-256-CTS-CBC
+    - Recommended:
+        - arm64: CONFIG_CRYPTO_AES_ARM64_CE_BLK
+        - x86: CONFIG_CRYPTO_AES_NI_INTEL
+
+- AES-256-HCTR2
+    - Mandatory:
+        - CONFIG_CRYPTO_HCTR2
+    - Recommended:
+        - arm64: CONFIG_CRYPTO_AES_ARM64_CE_BLK
+        - arm64: CONFIG_CRYPTO_POLYVAL_ARM64_CE
+        - x86: CONFIG_CRYPTO_AES_NI_INTEL
+        - x86: CONFIG_CRYPTO_POLYVAL_CLMUL_NI
+
+- Adiantum
+    - Mandatory:
+        - CONFIG_CRYPTO_ADIANTUM
+    - Recommended:
+        - arm32: CONFIG_CRYPTO_CHACHA20_NEON
+        - arm32: CONFIG_CRYPTO_NHPOLY1305_NEON
+        - arm64: CONFIG_CRYPTO_CHACHA20_NEON
+        - arm64: CONFIG_CRYPTO_NHPOLY1305_NEON
+        - x86: CONFIG_CRYPTO_CHACHA20_X86_64
+        - x86: CONFIG_CRYPTO_NHPOLY1305_SSE2
+        - x86: CONFIG_CRYPTO_NHPOLY1305_AVX2
+
+- AES-128-CBC-ESSIV and AES-128-CTS-CBC:
+    - Mandatory:
+        - CONFIG_CRYPTO_ESSIV
+        - CONFIG_CRYPTO_SHA256 or another SHA-256 implementation
+    - Recommended:
+        - AES-CBC acceleration
+
+fscrypt also uses HMAC-SHA512 for key derivation, so enabling SHA-512
+acceleration is recommended:
+
+- SHA-512
+    - Recommended:
+        - arm64: CONFIG_CRYPTO_SHA512_ARM64_CE
+        - x86: CONFIG_CRYPTO_SHA512_SSSE3
 
 Contents encryption
 -------------------
@@ -493,7 +560,14 @@ This structure must be initialized as follows:
   be set to constants from ``<linux/fscrypt.h>`` which identify the
   encryption modes to use.  If unsure, use FSCRYPT_MODE_AES_256_XTS
   (1) for ``contents_encryption_mode`` and FSCRYPT_MODE_AES_256_CTS
-  (4) for ``filenames_encryption_mode``.
+  (4) for ``filenames_encryption_mode``.  For details, see `Encryption
+  modes and usage`_.
+
+  v1 encryption policies only support three combinations of modes:
+  (FSCRYPT_MODE_AES_256_XTS, FSCRYPT_MODE_AES_256_CTS),
+  (FSCRYPT_MODE_AES_128_CBC, FSCRYPT_MODE_AES_128_CTS), and
+  (FSCRYPT_MODE_ADIANTUM, FSCRYPT_MODE_ADIANTUM).  v2 policies support
+  all combinations documented in `Supported modes`_.
 
 - ``flags`` contains optional flags from ``<linux/fscrypt.h>``:
 

From 456ae5fe9b448f44ebe98b391a3bae9c75df465e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 5 Jul 2023 14:27:43 -0700
Subject: [PATCH 0101/2047] fsverity: move sysctl registration out of
 signature.c

Currently the registration of the fsverity sysctls happens in
signature.c, which couples it to CONFIG_FS_VERITY_BUILTIN_SIGNATURES.

This makes it hard to add new sysctls unrelated to builtin signatures.

Also, some users have started checking whether the directory
/proc/sys/fs/verity exists as a way to tell whether fsverity is
supported.  This isn't the intended method; instead, the existence of
/sys/fs/$fstype/features/verity should be checked, or users should just
try to use the fsverity ioctls.  Regardless, it should be made to work
as expected without a dependency on CONFIG_FS_VERITY_BUILTIN_SIGNATURES.

Therefore, move the sysctl registration into init.c.  With
CONFIG_FS_VERITY_BUILTIN_SIGNATURES, nothing changes.  Without it, but
with CONFIG_FS_VERITY, an empty list of sysctls is now registered.

Link: https://lore.kernel.org/r/20230705212743.42180-3-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/verity/fsverity_private.h |  1 +
 fs/verity/init.c             | 32 ++++++++++++++++++++++++++++++++
 fs/verity/signature.c        | 33 +--------------------------------
 3 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index c5ab9023dd2d..d071a6e32581 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -123,6 +123,7 @@ void __init fsverity_init_info_cache(void);
 /* signature.c */
 
 #ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES
+extern int fsverity_require_signatures;
 int fsverity_verify_signature(const struct fsverity_info *vi,
 			      const u8 *signature, size_t sig_size);
 
diff --git a/fs/verity/init.c b/fs/verity/init.c
index bcd11d63eb1c..a29f062f6047 100644
--- a/fs/verity/init.c
+++ b/fs/verity/init.c
@@ -9,6 +9,37 @@
 
 #include <linux/ratelimit.h>
 
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *fsverity_sysctl_header;
+
+static struct ctl_table fsverity_sysctl_table[] = {
+#ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES
+	{
+		.procname       = "require_signatures",
+		.data           = &fsverity_require_signatures,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec_minmax,
+		.extra1         = SYSCTL_ZERO,
+		.extra2         = SYSCTL_ONE,
+	},
+#endif
+	{ }
+};
+
+static void __init fsverity_init_sysctl(void)
+{
+	fsverity_sysctl_header = register_sysctl("fs/verity",
+						 fsverity_sysctl_table);
+	if (!fsverity_sysctl_header)
+		panic("fsverity sysctl registration failed");
+}
+#else /* CONFIG_SYSCTL */
+static inline void fsverity_init_sysctl(void)
+{
+}
+#endif /* !CONFIG_SYSCTL */
+
 void fsverity_msg(const struct inode *inode, const char *level,
 		  const char *fmt, ...)
 {
@@ -36,6 +67,7 @@ static int __init fsverity_init(void)
 	fsverity_check_hash_algs();
 	fsverity_init_info_cache();
 	fsverity_init_workqueue();
+	fsverity_init_sysctl();
 	fsverity_init_signature();
 	return 0;
 }
diff --git a/fs/verity/signature.c b/fs/verity/signature.c
index ec75ffec069e..b95acae64eac 100644
--- a/fs/verity/signature.c
+++ b/fs/verity/signature.c
@@ -24,7 +24,7 @@
  * /proc/sys/fs/verity/require_signatures
  * If 1, all verity files must have a valid builtin signature.
  */
-static int fsverity_require_signatures;
+int fsverity_require_signatures;
 
 /*
  * Keyring that contains the trusted X.509 certificates.
@@ -93,35 +93,6 @@ int fsverity_verify_signature(const struct fsverity_info *vi,
 	return 0;
 }
 
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *fsverity_sysctl_header;
-
-static struct ctl_table fsverity_sysctl_table[] = {
-	{
-		.procname       = "require_signatures",
-		.data           = &fsverity_require_signatures,
-		.maxlen         = sizeof(int),
-		.mode           = 0644,
-		.proc_handler   = proc_dointvec_minmax,
-		.extra1         = SYSCTL_ZERO,
-		.extra2         = SYSCTL_ONE,
-	},
-	{ }
-};
-
-static void __init fsverity_sysctl_init(void)
-{
-	fsverity_sysctl_header = register_sysctl("fs/verity",
-						 fsverity_sysctl_table);
-	if (!fsverity_sysctl_header)
-		panic("fsverity sysctl registration failed");
-}
-#else /* !CONFIG_SYSCTL */
-static inline void fsverity_sysctl_init(void)
-{
-}
-#endif /* !CONFIG_SYSCTL */
-
 void __init fsverity_init_signature(void)
 {
 	fsverity_keyring =
@@ -132,6 +103,4 @@ void __init fsverity_init_signature(void)
 			      KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
 	if (IS_ERR(fsverity_keyring))
 		panic("failed to allocate \".fs-verity\" keyring");
-
-	fsverity_sysctl_init();
 }

From 3b38cabd5e12b8b54b9c9feab9ab0093b991f249 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 14:48:32 +0800
Subject: [PATCH 0102/2047] spi: spi-cadence: Delete unmatched comments

The function no longer returns a value, synchronize the comments.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230712064832.67091-1-frank.li@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cadence.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index de8fe3c5becb..42f101d357c3 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -690,8 +690,6 @@ remove_ctlr:
  * This function is called if a device is physically removed from the system or
  * if the driver module is being unloaded. It frees all resources allocated to
  * the device.
- *
- * Return:	0 on success and error value on error
  */
 static void cdns_spi_remove(struct platform_device *pdev)
 {

From 43818a4bf8ef8f38253bfb851ae64f3cdf45fdf4 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 12 Jul 2023 12:01:05 +0200
Subject: [PATCH 0103/2047] gpio: sim: fix a typo in comment

It was supposed to say 'for' not 'fo'.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-sim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index 8b49b0abacd5..cfbdade841e8 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -488,7 +488,7 @@ struct gpio_sim_device {
 	 * This structure however can be modified by callbacks of different
 	 * attributes so we need another lock.
 	 *
-	 * We use this lock fo protecting all data structures owned by this
+	 * We use this lock for protecting all data structures owned by this
 	 * object too.
 	 */
 	struct mutex lock;

From 0f93a345aa42c40e2145c3719d878e7daa4eb6ee Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 12 Jul 2023 11:59:55 +0200
Subject: [PATCH 0104/2047] gpiolib: order includes alphabetically in gpiolib.h

After adding the missing notifier.h header, let's order all includes
alphabetically.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpiolib.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 1409d52487c0..7c562fbb9fb0 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -9,13 +9,13 @@
 #ifndef GPIOLIB_H
 #define GPIOLIB_H
 
-#include <linux/gpio/driver.h>
-#include <linux/gpio/consumer.h> /* for enum gpiod_flags */
-#include <linux/err.h>
+#include <linux/cdev.h>
 #include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h> /* for enum gpiod_flags */
+#include <linux/gpio/driver.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
-#include <linux/cdev.h>
 #include <linux/rwsem.h>
 
 #define GPIOCHIP_NAME	"gpiochip"

From 95f8020459fd2f07ae6980d524c21d4d912fef59 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:41 -0400
Subject: [PATCH 0105/2047] s390: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-14-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 arch/s390/hypfs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index ee919bfc8186..5feef8da406b 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -53,7 +53,7 @@ static void hypfs_update_update(struct super_block *sb)
 	struct inode *inode = d_inode(sb_info->update_file);
 
 	sb_info->last_update = ktime_get_seconds();
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 }
 
 /* directory tree removal functions */
@@ -101,7 +101,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode)
 		ret->i_mode = mode;
 		ret->i_uid = hypfs_info->uid;
 		ret->i_gid = hypfs_info->gid;
-		ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+		ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
 		if (S_ISDIR(mode))
 			set_nlink(ret, 2);
 	}

From 278832b81c5354405ad4d4e1645a9039aff56ac7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:42 -0400
Subject: [PATCH 0106/2047] binderfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-15-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 drivers/android/binderfs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
index 76e7d6676657..faebe9f5412a 100644
--- a/drivers/android/binderfs.c
+++ b/drivers/android/binderfs.c
@@ -153,7 +153,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode,
 		goto err;
 
 	inode->i_ino = minor + INODE_OFFSET;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	init_special_inode(inode, S_IFCHR | 0600,
 			   MKDEV(MAJOR(binderfs_dev), minor));
 	inode->i_fop = &binder_fops;
@@ -432,7 +432,7 @@ static int binderfs_binder_ctl_create(struct super_block *sb)
 	}
 
 	inode->i_ino = SECOND_INODE;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	init_special_inode(inode, S_IFCHR | 0600,
 			   MKDEV(MAJOR(binderfs_dev), minor));
 	inode->i_fop = &binder_ctl_fops;
@@ -474,7 +474,7 @@ static struct inode *binderfs_make_inode(struct super_block *sb, int mode)
 	if (ret) {
 		ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET);
 		ret->i_mode = mode;
-		ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+		ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
 	}
 	return ret;
 }
@@ -703,7 +703,7 @@ static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc)
 	inode->i_ino = FIRST_INODE;
 	inode->i_fop = &simple_dir_operations;
 	inode->i_mode = S_IFDIR | 0755;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_op = &binderfs_dir_inode_operations;
 	set_nlink(inode, 2);
 

From 24856a96cfbf1ebdc742b725a83d7729f2b6d236 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:43 -0400
Subject: [PATCH 0107/2047] infiniband: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-16-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 drivers/infiniband/hw/qib/qib_fs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index a973905afd13..ed7d4b02f45a 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -64,9 +64,8 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
 	inode->i_uid = GLOBAL_ROOT_UID;
 	inode->i_gid = GLOBAL_ROOT_GID;
 	inode->i_blocks = 0;
-	inode->i_atime = current_time(inode);
+	inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_mtime = inode->i_atime;
-	inode->i_ctime = inode->i_atime;
 	inode->i_private = data;
 	if (S_ISDIR(mode)) {
 		inode->i_op = &simple_dir_inode_operations;

From 41441cecd10f565d18668216dc46b634bcc2cada Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:44 -0400
Subject: [PATCH 0108/2047] ibm: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-17-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 drivers/misc/ibmasm/ibmasmfs.c | 2 +-
 drivers/misc/ibmvmc.c          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index 35fec1bf1b3d..5867af9f592c 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -139,7 +139,7 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
 	if (ret) {
 		ret->i_ino = get_next_ino();
 		ret->i_mode = mode;
-		ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+		ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
 	}
 	return ret;
 }
diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c
index d7c7f0305257..2101eb12bcba 100644
--- a/drivers/misc/ibmvmc.c
+++ b/drivers/misc/ibmvmc.c
@@ -1124,7 +1124,7 @@ static ssize_t ibmvmc_write(struct file *file, const char *buffer,
 		goto out;
 
 	inode = file_inode(file);
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 
 	dev_dbg(adapter->dev, "write: file = 0x%lx, count = 0x%lx\n",

From c7603adcc6b1e8be32aed4ff95fe20806014ac34 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:45 -0400
Subject: [PATCH 0109/2047] usb: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-18-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 drivers/usb/core/devio.c           | 16 ++++++++--------
 drivers/usb/gadget/function/f_fs.c |  3 +--
 drivers/usb/gadget/legacy/inode.c  |  3 +--
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 1a16a8bdea60..4f68f6ef3cc1 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -2642,21 +2642,21 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 		snoop(&dev->dev, "%s: CONTROL\n", __func__);
 		ret = proc_control(ps, p);
 		if (ret >= 0)
-			inode->i_mtime = inode->i_ctime = current_time(inode);
+			inode->i_mtime = inode_set_ctime_current(inode);
 		break;
 
 	case USBDEVFS_BULK:
 		snoop(&dev->dev, "%s: BULK\n", __func__);
 		ret = proc_bulk(ps, p);
 		if (ret >= 0)
-			inode->i_mtime = inode->i_ctime = current_time(inode);
+			inode->i_mtime = inode_set_ctime_current(inode);
 		break;
 
 	case USBDEVFS_RESETEP:
 		snoop(&dev->dev, "%s: RESETEP\n", __func__);
 		ret = proc_resetep(ps, p);
 		if (ret >= 0)
-			inode->i_mtime = inode->i_ctime = current_time(inode);
+			inode->i_mtime = inode_set_ctime_current(inode);
 		break;
 
 	case USBDEVFS_RESET:
@@ -2668,7 +2668,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 		snoop(&dev->dev, "%s: CLEAR_HALT\n", __func__);
 		ret = proc_clearhalt(ps, p);
 		if (ret >= 0)
-			inode->i_mtime = inode->i_ctime = current_time(inode);
+			inode->i_mtime = inode_set_ctime_current(inode);
 		break;
 
 	case USBDEVFS_GETDRIVER:
@@ -2695,7 +2695,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 		snoop(&dev->dev, "%s: SUBMITURB\n", __func__);
 		ret = proc_submiturb(ps, p);
 		if (ret >= 0)
-			inode->i_mtime = inode->i_ctime = current_time(inode);
+			inode->i_mtime = inode_set_ctime_current(inode);
 		break;
 
 #ifdef CONFIG_COMPAT
@@ -2703,14 +2703,14 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 		snoop(&dev->dev, "%s: CONTROL32\n", __func__);
 		ret = proc_control_compat(ps, p);
 		if (ret >= 0)
-			inode->i_mtime = inode->i_ctime = current_time(inode);
+			inode->i_mtime = inode_set_ctime_current(inode);
 		break;
 
 	case USBDEVFS_BULK32:
 		snoop(&dev->dev, "%s: BULK32\n", __func__);
 		ret = proc_bulk_compat(ps, p);
 		if (ret >= 0)
-			inode->i_mtime = inode->i_ctime = current_time(inode);
+			inode->i_mtime = inode_set_ctime_current(inode);
 		break;
 
 	case USBDEVFS_DISCSIGNAL32:
@@ -2722,7 +2722,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 		snoop(&dev->dev, "%s: SUBMITURB32\n", __func__);
 		ret = proc_submiturb_compat(ps, p);
 		if (ret >= 0)
-			inode->i_mtime = inode->i_ctime = current_time(inode);
+			inode->i_mtime = inode_set_ctime_current(inode);
 		break;
 
 	case USBDEVFS_IOCTL32:
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index f41a385a5c42..6e9ef35a43a7 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1377,7 +1377,7 @@ ffs_sb_make_inode(struct super_block *sb, void *data,
 	inode = new_inode(sb);
 
 	if (inode) {
-		struct timespec64 ts = current_time(inode);
+		struct timespec64 ts = inode_set_ctime_current(inode);
 
 		inode->i_ino	 = get_next_ino();
 		inode->i_mode    = perms->mode;
@@ -1385,7 +1385,6 @@ ffs_sb_make_inode(struct super_block *sb, void *data,
 		inode->i_gid     = perms->gid;
 		inode->i_atime   = ts;
 		inode->i_mtime   = ts;
-		inode->i_ctime   = ts;
 		inode->i_private = data;
 		if (fops)
 			inode->i_fop = fops;
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 28249d0bf062..ce9e31f3d26b 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1969,8 +1969,7 @@ gadgetfs_make_inode (struct super_block *sb,
 		inode->i_mode = mode;
 		inode->i_uid = make_kuid(&init_user_ns, default_uid);
 		inode->i_gid = make_kgid(&init_user_ns, default_gid);
-		inode->i_atime = inode->i_mtime = inode->i_ctime
-				= current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 		inode->i_private = data;
 		inode->i_fop = fops;
 	}

From 4f87180060d88da888070029c5ed2fd0873eeef9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:46 -0400
Subject: [PATCH 0110/2047] 9p: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-19-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/9p/vfs_inode.c      | 4 ++--
 fs/9p/vfs_inode_dotl.c | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 36b466e35887..16d85e6033a3 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -261,7 +261,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
 	inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
 	inode->i_blocks = 0;
 	inode->i_rdev = rdev;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	inode->i_mapping->a_ops = &v9fs_addr_operations;
 	inode->i_private = NULL;
 
@@ -1158,7 +1158,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 
 	inode->i_atime.tv_sec = stat->atime;
 	inode->i_mtime.tv_sec = stat->mtime;
-	inode->i_ctime.tv_sec = stat->mtime;
+	inode_set_ctime(inode, stat->mtime, 0);
 
 	inode->i_uid = v9ses->dfltuid;
 	inode->i_gid = v9ses->dfltgid;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 5361cd2d7996..464ea73d1bf8 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -646,8 +646,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
 		inode->i_atime.tv_nsec = stat->st_atime_nsec;
 		inode->i_mtime.tv_sec = stat->st_mtime_sec;
 		inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
-		inode->i_ctime.tv_sec = stat->st_ctime_sec;
-		inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+		inode_set_ctime(inode, stat->st_ctime_sec,
+				stat->st_ctime_nsec);
 		inode->i_uid = stat->st_uid;
 		inode->i_gid = stat->st_gid;
 		set_nlink(inode, stat->st_nlink);
@@ -669,8 +669,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
 			inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
 		}
 		if (stat->st_result_mask & P9_STATS_CTIME) {
-			inode->i_ctime.tv_sec = stat->st_ctime_sec;
-			inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+			inode_set_ctime(inode, stat->st_ctime_sec,
+					stat->st_ctime_nsec);
 		}
 		if (stat->st_result_mask & P9_STATS_UID)
 			inode->i_uid = stat->st_uid;

From 9689414767f2f7eda188957f4704ca9400fc4d5c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:47 -0400
Subject: [PATCH 0111/2047] adfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-20-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/adfs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index c3ac613d0975..20963002578a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -270,7 +270,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
 	inode->i_mode	 = adfs_atts2mode(sb, inode);
 	adfs_adfs2unix_time(&inode->i_mtime, inode);
 	inode->i_atime = inode->i_mtime;
-	inode->i_ctime = inode->i_mtime;
+	inode_set_ctime_to_ts(inode, inode->i_mtime);
 
 	if (S_ISDIR(inode->i_mode)) {
 		inode->i_op	= &adfs_dir_inode_operations;
@@ -331,7 +331,7 @@ adfs_notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
 	if (ia_valid & ATTR_ATIME)
 		inode->i_atime = attr->ia_atime;
 	if (ia_valid & ATTR_CTIME)
-		inode->i_ctime = attr->ia_ctime;
+		inode_set_ctime_to_ts(inode, attr->ia_ctime);
 	if (ia_valid & ATTR_MODE) {
 		ADFS_I(inode)->attr = adfs_mode2atts(sb, inode, attr->ia_mode);
 		inode->i_mode = adfs_atts2mode(sb, inode);

From f873e9d6db14e47dcb026e5b0314a6fe4faa4ed8 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:48 -0400
Subject: [PATCH 0112/2047] affs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: David Sterba <dsterba@suse.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-21-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/affs/amigaffs.c |  6 +++---
 fs/affs/inode.c    | 16 ++++++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 29f11e10a7c7..7ba93efc1143 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -60,7 +60,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh)
 	mark_buffer_dirty_inode(dir_bh, dir);
 	affs_brelse(dir_bh);
 
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	inode_inc_iversion(dir);
 	mark_inode_dirty(dir);
 
@@ -114,7 +114,7 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
 
 	affs_brelse(bh);
 
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	inode_inc_iversion(dir);
 	mark_inode_dirty(dir);
 
@@ -315,7 +315,7 @@ affs_remove_header(struct dentry *dentry)
 	else
 		clear_nlink(inode);
 	affs_unlock_link(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 
 done:
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 27f77a52c5c8..060746c63151 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -149,13 +149,13 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
 		break;
 	}
 
-	inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec
-		       = (be32_to_cpu(tail->change.days) * 86400LL +
-		         be32_to_cpu(tail->change.mins) * 60 +
-			 be32_to_cpu(tail->change.ticks) / 50 +
-			 AFFS_EPOCH_DELTA) +
-			 sys_tz.tz_minuteswest * 60;
-	inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_atime.tv_nsec = 0;
+	inode->i_mtime.tv_sec = inode->i_atime.tv_sec =
+		inode_set_ctime(inode,
+				(be32_to_cpu(tail->change.days) * 86400LL +
+				 be32_to_cpu(tail->change.mins) * 60 +
+				 be32_to_cpu(tail->change.ticks) / 50 + AFFS_EPOCH_DELTA)
+				+ sys_tz.tz_minuteswest * 60, 0).tv_sec;
+	inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = 0;
 	affs_brelse(bh);
 	unlock_new_inode(inode);
 	return inode;
@@ -314,7 +314,7 @@ affs_new_inode(struct inode *dir)
 	inode->i_gid     = current_fsgid();
 	inode->i_ino     = block;
 	set_nlink(inode, 1);
-	inode->i_mtime   = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime   = inode->i_atime = inode_set_ctime_current(inode);
 	atomic_set(&AFFS_I(inode)->i_opencnt, 0);
 	AFFS_I(inode)->i_blkcnt = 0;
 	AFFS_I(inode)->i_lc = NULL;

From b9170a28839a59fc1314a194712c06109e0ca0ab Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:49 -0400
Subject: [PATCH 0113/2047] afs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-22-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/afs/dynroot.c | 2 +-
 fs/afs/inode.c   | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index d7d9402ff718..95bcbd7654d1 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -88,7 +88,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
 	set_nlink(inode, 2);
 	inode->i_uid		= GLOBAL_ROOT_UID;
 	inode->i_gid		= GLOBAL_ROOT_GID;
-	inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	inode->i_blocks		= 0;
 	inode->i_generation	= 0;
 
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 866bab860a88..6b636f43f548 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -90,7 +90,7 @@ static int afs_inode_init_from_status(struct afs_operation *op,
 	vnode->status = *status;
 
 	t = status->mtime_client;
-	inode->i_ctime = t;
+	inode_set_ctime_to_ts(inode, t);
 	inode->i_mtime = t;
 	inode->i_atime = t;
 	inode->i_flags |= S_NOATIME;
@@ -206,7 +206,7 @@ static void afs_apply_status(struct afs_operation *op,
 	t = status->mtime_client;
 	inode->i_mtime = t;
 	if (vp->update_ctime)
-		inode->i_ctime = op->ctime;
+		inode_set_ctime_to_ts(inode, op->ctime);
 
 	if (vnode->status.data_version != status->data_version)
 		data_changed = true;
@@ -252,7 +252,7 @@ static void afs_apply_status(struct afs_operation *op,
 		vnode->netfs.remote_i_size = status->size;
 		if (change_size) {
 			afs_set_i_size(vnode, status->size);
-			inode->i_ctime = t;
+			inode_set_ctime_to_ts(inode, t);
 			inode->i_atime = t;
 		}
 	}

From 2276e5ba8567f683c49a36ba885d0fe6abe2b45e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:50 -0400
Subject: [PATCH 0114/2047] fs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-23-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/attr.c        |  2 +-
 fs/bad_inode.c   |  3 +--
 fs/binfmt_misc.c |  3 +--
 fs/inode.c       | 10 +++++++---
 fs/nsfs.c        |  2 +-
 fs/pipe.c        |  2 +-
 fs/posix_acl.c   |  2 +-
 fs/stack.c       |  2 +-
 fs/stat.c        |  2 +-
 9 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/fs/attr.c b/fs/attr.c
index d60dc1edb526..599f6d14c0ed 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -312,7 +312,7 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
 	if (ia_valid & ATTR_MTIME)
 		inode->i_mtime = attr->ia_mtime;
 	if (ia_valid & ATTR_CTIME)
-		inode->i_ctime = attr->ia_ctime;
+		inode_set_ctime_to_ts(inode, attr->ia_ctime);
 	if (ia_valid & ATTR_MODE) {
 		umode_t mode = attr->ia_mode;
 		if (!in_group_or_capable(idmap, inode,
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index db649487d58c..6e21f7412a85 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -209,8 +209,7 @@ void make_bad_inode(struct inode *inode)
 	remove_inode_hash(inode);
 
 	inode->i_mode = S_IFREG;
-	inode->i_atime = inode->i_mtime = inode->i_ctime =
-		current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	inode->i_op = &bad_inode_ops;	
 	inode->i_opflags &= ~IOP_XATTR;
 	inode->i_fop = &bad_file_ops;	
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index bb202ad369d5..e0108d17b085 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -547,8 +547,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
 	if (inode) {
 		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
-		inode->i_atime = inode->i_mtime = inode->i_ctime =
-			current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	}
 	return inode;
 }
diff --git a/fs/inode.c b/fs/inode.c
index aac5cdcf5e89..d4ab92233062 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1853,6 +1853,7 @@ EXPORT_SYMBOL(bmap);
 static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
 			     struct timespec64 now)
 {
+	struct timespec64 ctime;
 
 	if (!(mnt->mnt_flags & MNT_RELATIME))
 		return 1;
@@ -1864,7 +1865,8 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
 	/*
 	 * Is ctime younger than or equal to atime? If yes, update atime:
 	 */
-	if (timespec64_compare(&inode->i_ctime, &inode->i_atime) >= 0)
+	ctime = inode_get_ctime(inode);
+	if (timespec64_compare(&ctime, &inode->i_atime) >= 0)
 		return 1;
 
 	/*
@@ -1887,7 +1889,7 @@ int generic_update_time(struct inode *inode, struct timespec64 *time, int flags)
 		if (flags & S_ATIME)
 			inode->i_atime = *time;
 		if (flags & S_CTIME)
-			inode->i_ctime = *time;
+			inode_set_ctime_to_ts(inode, *time);
 		if (flags & S_MTIME)
 			inode->i_mtime = *time;
 
@@ -2073,6 +2075,7 @@ EXPORT_SYMBOL(file_remove_privs);
 static int inode_needs_update_time(struct inode *inode, struct timespec64 *now)
 {
 	int sync_it = 0;
+	struct timespec64 ctime;
 
 	/* First try to exhaust all avenues to not sync */
 	if (IS_NOCMTIME(inode))
@@ -2081,7 +2084,8 @@ static int inode_needs_update_time(struct inode *inode, struct timespec64 *now)
 	if (!timespec64_equal(&inode->i_mtime, now))
 		sync_it = S_MTIME;
 
-	if (!timespec64_equal(&inode->i_ctime, now))
+	ctime = inode_get_ctime(inode);
+	if (!timespec64_equal(&ctime, now))
 		sync_it |= S_CTIME;
 
 	if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
diff --git a/fs/nsfs.c b/fs/nsfs.c
index f602a96a1afe..647a22433bd8 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -84,7 +84,7 @@ slow:
 		return -ENOMEM;
 	}
 	inode->i_ino = ns->inum;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_flags |= S_IMMUTABLE;
 	inode->i_mode = S_IFREG | S_IRUGO;
 	inode->i_fop = &ns_file_operations;
diff --git a/fs/pipe.c b/fs/pipe.c
index 2d88f73f585a..174682103669 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -899,7 +899,7 @@ static struct inode * get_pipe_inode(void)
 	inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 
 	return inode;
 
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 7fa1b738bbab..a05fe94970ce 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -1027,7 +1027,7 @@ int simple_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 			return error;
 	}
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	if (IS_I_VERSION(inode))
 		inode_inc_iversion(inode);
 	set_cached_acl(inode, type, acl);
diff --git a/fs/stack.c b/fs/stack.c
index c9830924eb12..b5e01bdb5f5f 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -68,7 +68,7 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
 	dest->i_rdev = src->i_rdev;
 	dest->i_atime = src->i_atime;
 	dest->i_mtime = src->i_mtime;
-	dest->i_ctime = src->i_ctime;
+	inode_set_ctime_to_ts(dest, inode_get_ctime(src));
 	dest->i_blkbits = src->i_blkbits;
 	dest->i_flags = src->i_flags;
 	set_nlink(dest, src->i_nlink);
diff --git a/fs/stat.c b/fs/stat.c
index 7c238da22ef0..8c2b30af19f5 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -58,7 +58,7 @@ void generic_fillattr(struct mnt_idmap *idmap, struct inode *inode,
 	stat->size = i_size_read(inode);
 	stat->atime = inode->i_atime;
 	stat->mtime = inode->i_mtime;
-	stat->ctime = inode->i_ctime;
+	stat->ctime = inode_get_ctime(inode);
 	stat->blksize = i_blocksize(inode);
 	stat->blocks = inode->i_blocks;
 }

From 36aa5eae75fc61e443f3ffc18017bf7a682fe034 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:51 -0400
Subject: [PATCH 0115/2047] autofs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Reviewed-by: Ian Kent <raven@themaw.net>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-24-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/autofs/inode.c | 2 +-
 fs/autofs/root.c  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index affa70360b1f..2b49662ed237 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -370,7 +370,7 @@ struct inode *autofs_get_inode(struct super_block *sb, umode_t mode)
 		inode->i_uid = d_inode(sb->s_root)->i_uid;
 		inode->i_gid = d_inode(sb->s_root)->i_gid;
 	}
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	inode->i_ino = get_next_ino();
 
 	if (S_ISDIR(mode)) {
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 93046c9dc461..512b9a26c63d 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -600,7 +600,7 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap,
 	p_ino = autofs_dentry_ino(dentry->d_parent);
 	p_ino->count++;
 
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 
 	return 0;
 }
@@ -633,7 +633,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry)
 	d_inode(dentry)->i_size = 0;
 	clear_nlink(d_inode(dentry));
 
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 
 	spin_lock(&sbi->lookup_lock);
 	__autofs_add_expiring(dentry);
@@ -749,7 +749,7 @@ static int autofs_dir_mkdir(struct mnt_idmap *idmap,
 	p_ino = autofs_dentry_ino(dentry->d_parent);
 	p_ino->count++;
 	inc_nlink(dir);
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 
 	return 0;
 }

From 0a8359482472cc85b4a61ac7ce20a28a11db3bec Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:52 -0400
Subject: [PATCH 0116/2047] befs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-25-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/befs/linuxvfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index eee9237386e2..9a16a51fbb88 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -363,7 +363,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
 	inode->i_mtime.tv_sec =
 	    fs64_to_cpu(sb, raw_inode->last_modified_time) >> 16;
 	inode->i_mtime.tv_nsec = 0;   /* lower 16 bits are not a time */
-	inode->i_ctime = inode->i_mtime;
+	inode_set_ctime_to_ts(inode, inode->i_mtime);
 	inode->i_atime = inode->i_mtime;
 
 	befs_ino->i_inode_num = fsrun_to_cpu(sb, raw_inode->inode_num);

From 73d9b9d028176b448d3912c47db13b614fef4a65 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:53 -0400
Subject: [PATCH 0117/2047] bfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-26-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/bfs/dir.c   | 16 ++++++++--------
 fs/bfs/inode.c |  5 ++---
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index d2e8a2a56b05..12b8af04dcb3 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -97,7 +97,7 @@ static int bfs_create(struct mnt_idmap *idmap, struct inode *dir,
 	set_bit(ino, info->si_imap);
 	info->si_freei--;
 	inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_blocks = 0;
 	inode->i_op = &bfs_file_inops;
 	inode->i_fop = &bfs_file_operations;
@@ -158,7 +158,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
 		return err;
 	}
 	inc_nlink(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 	ihold(inode);
 	d_instantiate(new, inode);
@@ -187,9 +187,9 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
 	}
 	de->ino = 0;
 	mark_buffer_dirty_inode(bh, dir);
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
-	inode->i_ctime = dir->i_ctime;
+	inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
 	inode_dec_link_count(inode);
 	error = 0;
 
@@ -240,10 +240,10 @@ static int bfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 			goto end_rename;
 	}
 	old_de->ino = 0;
-	old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
+	old_dir->i_mtime = inode_set_ctime_current(old_dir);
 	mark_inode_dirty(old_dir);
 	if (new_inode) {
-		new_inode->i_ctime = current_time(new_inode);
+		inode_set_ctime_current(new_inode);
 		inode_dec_link_count(new_inode);
 	}
 	mark_buffer_dirty_inode(old_bh, old_dir);
@@ -292,9 +292,9 @@ static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino)
 				pos = (block - sblock) * BFS_BSIZE + off;
 				if (pos >= dir->i_size) {
 					dir->i_size += BFS_DIRENT_SIZE;
-					dir->i_ctime = current_time(dir);
+					inode_set_ctime_current(dir);
 				}
-				dir->i_mtime = dir->i_ctime = current_time(dir);
+				dir->i_mtime = inode_set_ctime_current(dir);
 				mark_inode_dirty(dir);
 				de->ino = cpu_to_le16((u16)ino);
 				for (i = 0; i < BFS_NAMELEN; i++)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 1926bec2c850..e6a76ae9eb44 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -82,10 +82,9 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
 	inode->i_blocks = BFS_FILEBLOCKS(di);
 	inode->i_atime.tv_sec =  le32_to_cpu(di->i_atime);
 	inode->i_mtime.tv_sec =  le32_to_cpu(di->i_mtime);
-	inode->i_ctime.tv_sec =  le32_to_cpu(di->i_ctime);
+	inode_set_ctime(inode, le32_to_cpu(di->i_ctime), 0);
 	inode->i_atime.tv_nsec = 0;
 	inode->i_mtime.tv_nsec = 0;
-	inode->i_ctime.tv_nsec = 0;
 
 	brelse(bh);
 	unlock_new_inode(inode);
@@ -143,7 +142,7 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	di->i_nlink = cpu_to_le32(inode->i_nlink);
 	di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
 	di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
-	di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+	di->i_ctime = cpu_to_le32(inode_get_ctime(inode).tv_sec);
 	i_sblock = BFS_I(inode)->i_sblock;
 	di->i_sblock = cpu_to_le32(i_sblock);
 	di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock);

From 2a9462de4352d141ca17d7b872dee64d3ba8995f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:54 -0400
Subject: [PATCH 0118/2047] btrfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-27-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/btrfs/delayed-inode.c |  8 ++++----
 fs/btrfs/file.c          | 21 +++++++++------------
 fs/btrfs/inode.c         | 39 ++++++++++++++++-----------------------
 fs/btrfs/ioctl.c         |  2 +-
 fs/btrfs/reflink.c       |  3 +--
 fs/btrfs/transaction.c   |  3 +--
 fs/btrfs/tree-log.c      |  4 ++--
 fs/btrfs/xattr.c         |  4 ++--
 8 files changed, 36 insertions(+), 48 deletions(-)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6b457b010cbc..6f8c0a9b2017 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1808,9 +1808,9 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
 				      inode->i_mtime.tv_nsec);
 
 	btrfs_set_stack_timespec_sec(&inode_item->ctime,
-				     inode->i_ctime.tv_sec);
+				     inode_get_ctime(inode).tv_sec);
 	btrfs_set_stack_timespec_nsec(&inode_item->ctime,
-				      inode->i_ctime.tv_nsec);
+				      inode_get_ctime(inode).tv_nsec);
 
 	btrfs_set_stack_timespec_sec(&inode_item->otime,
 				     BTRFS_I(inode)->i_otime.tv_sec);
@@ -1861,8 +1861,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
 	inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
 	inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
 
-	inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
-	inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
+	inode_set_ctime(inode, btrfs_stack_timespec_sec(&inode_item->ctime),
+			btrfs_stack_timespec_nsec(&inode_item->ctime));
 
 	BTRFS_I(inode)->i_otime.tv_sec =
 		btrfs_stack_timespec_sec(&inode_item->otime);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fd03e689a6be..d7a9ece7a40b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1108,7 +1108,7 @@ void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
 
 static void update_time_for_write(struct inode *inode)
 {
-	struct timespec64 now;
+	struct timespec64 now, ctime;
 
 	if (IS_NOCMTIME(inode))
 		return;
@@ -1117,8 +1117,9 @@ static void update_time_for_write(struct inode *inode)
 	if (!timespec64_equal(&inode->i_mtime, &now))
 		inode->i_mtime = now;
 
-	if (!timespec64_equal(&inode->i_ctime, &now))
-		inode->i_ctime = now;
+	ctime = inode_get_ctime(inode);
+	if (!timespec64_equal(&ctime, &now))
+		inode_set_ctime_to_ts(inode, now);
 
 	if (IS_I_VERSION(inode))
 		inode_inc_iversion(inode);
@@ -2459,10 +2460,8 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
 		 */
 		inode_inc_iversion(&inode->vfs_inode);
 
-		if (!extent_info || extent_info->update_times) {
-			inode->vfs_inode.i_mtime = current_time(&inode->vfs_inode);
-			inode->vfs_inode.i_ctime = inode->vfs_inode.i_mtime;
-		}
+		if (!extent_info || extent_info->update_times)
+			inode->vfs_inode.i_mtime = inode_set_ctime_current(&inode->vfs_inode);
 
 		ret = btrfs_update_inode(trans, root, inode);
 		if (ret)
@@ -2703,8 +2702,7 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
 
 	ASSERT(trans != NULL);
 	inode_inc_iversion(inode);
-	inode->i_mtime = current_time(inode);
-	inode->i_ctime = inode->i_mtime;
+	inode->i_mtime = inode_set_ctime_current(inode);
 	ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
 	updated_inode = true;
 	btrfs_end_transaction(trans);
@@ -2721,11 +2719,10 @@ out_only_mutex:
 		 * for detecting, at fsync time, if the inode isn't yet in the
 		 * log tree or it's there but not up to date.
 		 */
-		struct timespec64 now = current_time(inode);
+		struct timespec64 now = inode_set_ctime_current(inode);
 
 		inode_inc_iversion(inode);
 		inode->i_mtime = now;
-		inode->i_ctime = now;
 		trans = btrfs_start_transaction(root, 1);
 		if (IS_ERR(trans)) {
 			ret = PTR_ERR(trans);
@@ -2796,7 +2793,7 @@ static int btrfs_fallocate_update_isize(struct inode *inode,
 	if (IS_ERR(trans))
 		return PTR_ERR(trans);
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	i_size_write(inode, end);
 	btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
 	ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 956ae499740a..ceac62c1cbfc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3901,8 +3901,8 @@ static int btrfs_read_locked_inode(struct inode *inode,
 	inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
 	inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
 
-	inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
-	inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
+	inode_set_ctime(inode, btrfs_timespec_sec(leaf, &inode_item->ctime),
+			btrfs_timespec_nsec(leaf, &inode_item->ctime));
 
 	BTRFS_I(inode)->i_otime.tv_sec =
 		btrfs_timespec_sec(leaf, &inode_item->otime);
@@ -4073,9 +4073,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 				      inode->i_mtime.tv_nsec);
 
 	btrfs_set_token_timespec_sec(&token, &item->ctime,
-				     inode->i_ctime.tv_sec);
+				     inode_get_ctime(inode).tv_sec);
 	btrfs_set_token_timespec_nsec(&token, &item->ctime,
-				      inode->i_ctime.tv_nsec);
+				      inode_get_ctime(inode).tv_nsec);
 
 	btrfs_set_token_timespec_sec(&token, &item->otime,
 				     BTRFS_I(inode)->i_otime.tv_sec);
@@ -4273,9 +4273,8 @@ err:
 	btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2);
 	inode_inc_iversion(&inode->vfs_inode);
 	inode_inc_iversion(&dir->vfs_inode);
-	inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
-	dir->vfs_inode.i_mtime = inode->vfs_inode.i_ctime;
-	dir->vfs_inode.i_ctime = inode->vfs_inode.i_ctime;
+	inode_set_ctime_current(&inode->vfs_inode);
+	dir->vfs_inode.i_mtime = inode_set_ctime_current(&dir->vfs_inode);
 	ret = btrfs_update_inode(trans, root, dir);
 out:
 	return ret;
@@ -4448,8 +4447,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 
 	btrfs_i_size_write(dir, dir->vfs_inode.i_size - fname.disk_name.len * 2);
 	inode_inc_iversion(&dir->vfs_inode);
-	dir->vfs_inode.i_mtime = current_time(&dir->vfs_inode);
-	dir->vfs_inode.i_ctime = dir->vfs_inode.i_mtime;
+	dir->vfs_inode.i_mtime = inode_set_ctime_current(&dir->vfs_inode);
 	ret = btrfs_update_inode_fallback(trans, root, dir);
 	if (ret)
 		btrfs_abort_transaction(trans, ret);
@@ -5091,8 +5089,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 	if (newsize != oldsize) {
 		inode_inc_iversion(inode);
 		if (!(mask & (ATTR_CTIME | ATTR_MTIME))) {
-			inode->i_mtime = current_time(inode);
-			inode->i_ctime = inode->i_mtime;
+			inode->i_mtime = inode_set_ctime_current(inode);
 		}
 	}
 
@@ -5736,9 +5733,8 @@ static struct inode *new_simple_dir(struct super_block *s,
 	inode->i_opflags &= ~IOP_XATTR;
 	inode->i_fop = &simple_dir_operations;
 	inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
-	inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	inode->i_atime = inode->i_mtime;
-	inode->i_ctime = inode->i_mtime;
 	BTRFS_I(inode)->i_otime = inode->i_mtime;
 
 	return inode;
@@ -6075,7 +6071,7 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
 	if (flags & S_VERSION)
 		dirty |= inode_maybe_inc_iversion(inode, dirty);
 	if (flags & S_CTIME)
-		inode->i_ctime = *now;
+		inode_set_ctime_to_ts(inode, *now);
 	if (flags & S_MTIME)
 		inode->i_mtime = *now;
 	if (flags & S_ATIME)
@@ -6378,9 +6374,8 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
 		goto discard;
 	}
 
-	inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	inode->i_atime = inode->i_mtime;
-	inode->i_ctime = inode->i_mtime;
 	BTRFS_I(inode)->i_otime = inode->i_mtime;
 
 	/*
@@ -6545,12 +6540,10 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 	 * log replay procedure is responsible for setting them to their correct
 	 * values (the ones it had when the fsync was done).
 	 */
-	if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
-		struct timespec64 now = current_time(&parent_inode->vfs_inode);
+	if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags))
+		parent_inode->vfs_inode.i_mtime =
+			inode_set_ctime_current(&parent_inode->vfs_inode);
 
-		parent_inode->vfs_inode.i_mtime = now;
-		parent_inode->vfs_inode.i_ctime = now;
-	}
 	ret = btrfs_update_inode(trans, root, parent_inode);
 	if (ret)
 		btrfs_abort_transaction(trans, ret);
@@ -6690,7 +6683,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	BTRFS_I(inode)->dir_index = 0ULL;
 	inc_nlink(inode);
 	inode_inc_iversion(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	ihold(inode);
 	set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
 
@@ -9732,7 +9725,7 @@ next:
 		*alloc_hint = ins.objectid + ins.offset;
 
 		inode_inc_iversion(inode);
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
 		if (!(mode & FALLOC_FL_KEEP_SIZE) &&
 		    (actual_len > inode->i_size) &&
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a895d105464b..a18ee7b5a166 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -384,7 +384,7 @@ update_flags:
 	binode->flags = binode_flags;
 	btrfs_sync_inode_flags_to_i_flags(inode);
 	inode_inc_iversion(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
 
  out_end_trans:
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 0474bbe39da7..65d2bd6910f2 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -30,8 +30,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
 
 	inode_inc_iversion(inode);
 	if (!no_time_update) {
-		inode->i_mtime = current_time(inode);
-		inode->i_ctime = inode->i_mtime;
+		inode->i_mtime = inode_set_ctime_current(inode);
 	}
 	/*
 	 * We round up to the block size at eof when determining which
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index cf306351b148..f6e4016a343e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1831,8 +1831,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
 	btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
 						  fname.disk_name.len * 2);
-	parent_inode->i_mtime = current_time(parent_inode);
-	parent_inode->i_ctime = parent_inode->i_mtime;
+	parent_inode->i_mtime = inode_set_ctime_current(parent_inode);
 	ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 365a1cc0a3c3..ffcff7188170 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4148,9 +4148,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 				      inode->i_mtime.tv_nsec);
 
 	btrfs_set_token_timespec_sec(&token, &item->ctime,
-				     inode->i_ctime.tv_sec);
+				     inode_get_ctime(inode).tv_sec);
 	btrfs_set_token_timespec_nsec(&token, &item->ctime,
-				      inode->i_ctime.tv_nsec);
+				      inode_get_ctime(inode).tv_nsec);
 
 	/*
 	 * We do not need to set the nbytes field, in fact during a fast fsync
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index fc4b20c2688a..96828a13dd43 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -264,7 +264,7 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
 		goto out;
 
 	inode_inc_iversion(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
 	if (ret)
 		btrfs_abort_transaction(trans, ret);
@@ -407,7 +407,7 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
 	ret = btrfs_set_prop(trans, inode, name, value, size, flags);
 	if (!ret) {
 		inode_inc_iversion(inode);
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
 		if (ret)
 			btrfs_abort_transaction(trans, ret);

From 7795aef0819225db9aea918b7f7c47a854304d5e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:55 -0400
Subject: [PATCH 0119/2047] ceph: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Reviewed-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-28-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ceph/acl.c   |  2 +-
 fs/ceph/caps.c  |  2 +-
 fs/ceph/inode.c | 16 +++++++++-------
 fs/ceph/snap.c  |  2 +-
 fs/ceph/xattr.c |  2 +-
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 6945a938d396..c91b293267d7 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -93,7 +93,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 	char *value = NULL;
 	struct iattr newattrs;
 	struct inode *inode = d_inode(dentry);
-	struct timespec64 old_ctime = inode->i_ctime;
+	struct timespec64 old_ctime = inode_get_ctime(inode);
 	umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP) {
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index e2bb0d0072da..09cd6d334604 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1400,7 +1400,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
 
 	arg->mtime = inode->i_mtime;
 	arg->atime = inode->i_atime;
-	arg->ctime = inode->i_ctime;
+	arg->ctime = inode_get_ctime(inode);
 	arg->btime = ci->i_btime;
 	arg->change_attr = inode_peek_iversion_raw(inode);
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8e5f41d45283..5f6e93714f5a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -100,7 +100,7 @@ struct inode *ceph_get_snapdir(struct inode *parent)
 	inode->i_uid = parent->i_uid;
 	inode->i_gid = parent->i_gid;
 	inode->i_mtime = parent->i_mtime;
-	inode->i_ctime = parent->i_ctime;
+	inode_set_ctime_to_ts(inode, inode_get_ctime(parent));
 	inode->i_atime = parent->i_atime;
 	ci->i_rbytes = 0;
 	ci->i_btime = ceph_inode(parent)->i_btime;
@@ -688,6 +688,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
 			 struct timespec64 *mtime, struct timespec64 *atime)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct timespec64 ictime = inode_get_ctime(inode);
 	int warn = 0;
 
 	if (issued & (CEPH_CAP_FILE_EXCL|
@@ -696,11 +697,11 @@ void ceph_fill_file_time(struct inode *inode, int issued,
 		      CEPH_CAP_AUTH_EXCL|
 		      CEPH_CAP_XATTR_EXCL)) {
 		if (ci->i_version == 0 ||
-		    timespec64_compare(ctime, &inode->i_ctime) > 0) {
+		    timespec64_compare(ctime, &ictime) > 0) {
 			dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n",
-			     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+			     ictime.tv_sec, ictime.tv_nsec,
 			     ctime->tv_sec, ctime->tv_nsec);
-			inode->i_ctime = *ctime;
+			inode_set_ctime_to_ts(inode, *ctime);
 		}
 		if (ci->i_version == 0 ||
 		    ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
@@ -738,7 +739,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
 	} else {
 		/* we have no write|excl caps; whatever the MDS says is true */
 		if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
-			inode->i_ctime = *ctime;
+			inode_set_ctime_to_ts(inode, *ctime);
 			inode->i_mtime = *mtime;
 			inode->i_atime = *atime;
 			ci->i_time_warp_seq = time_warp_seq;
@@ -2166,7 +2167,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 		bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME|
 					 ATTR_MODE|ATTR_UID|ATTR_GID)) == 0;
 		dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode,
-		     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+		     inode_get_ctime(inode).tv_sec,
+		     inode_get_ctime(inode).tv_nsec,
 		     attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
 		     only ? "ctime only" : "ignored");
 		if (only) {
@@ -2191,7 +2193,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 	if (dirtied) {
 		inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
 							   &prealloc_cf);
-		inode->i_ctime = attr->ia_ctime;
+		inode_set_ctime_to_ts(inode, attr->ia_ctime);
 		inode_inc_iversion_raw(inode);
 	}
 
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 343d738448dc..c9920ade15f5 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -660,7 +660,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 	capsnap->size = i_size_read(inode);
 	capsnap->mtime = inode->i_mtime;
 	capsnap->atime = inode->i_atime;
-	capsnap->ctime = inode->i_ctime;
+	capsnap->ctime = inode_get_ctime(inode);
 	capsnap->btime = ci->i_btime;
 	capsnap->change_attr = inode_peek_iversion_raw(inode);
 	capsnap->time_warp_seq = ci->i_time_warp_seq;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 806183959c47..1cbd84cc82a8 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1238,7 +1238,7 @@ retry:
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
 					       &prealloc_cf);
 		ci->i_xattrs.dirty = true;
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 	}
 
 	spin_unlock(&ci->i_ceph_lock);

From ea9b53d4ae6619963f760bc5f3a1411b59a803f1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:56 -0400
Subject: [PATCH 0120/2047] coda: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-29-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/coda/coda_linux.c | 3 ++-
 fs/coda/dir.c        | 2 +-
 fs/coda/file.c       | 2 +-
 fs/coda/inode.c      | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 903ca8fa4b9b..ae023853a98f 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -127,7 +127,8 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
 	if (attr->va_mtime.tv_sec != -1)
 		inode->i_mtime = coda_to_timespec64(attr->va_mtime);
         if (attr->va_ctime.tv_sec != -1)
-		inode->i_ctime = coda_to_timespec64(attr->va_ctime);
+		inode_set_ctime_to_ts(inode,
+				      coda_to_timespec64(attr->va_ctime));
 }
 
 
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 8450b1bd354b..1d4f40048efc 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -111,7 +111,7 @@ static inline void coda_dir_update_mtime(struct inode *dir)
 	/* optimistically we can also act as if our nose bleeds. The
 	 * granularity of the mtime is coarse anyways so we might actually be
 	 * right most of the time. Note: we only do this for directories. */
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 #endif
 }
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 12b26bd13564..42346618b4ed 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -84,7 +84,7 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
 	ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0);
 	coda_inode->i_size = file_inode(host_file)->i_size;
 	coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
-	coda_inode->i_mtime = coda_inode->i_ctime = current_time(coda_inode);
+	coda_inode->i_mtime = inode_set_ctime_current(coda_inode);
 	inode_unlock(coda_inode);
 	file_end_write(host_file);
 
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index d661e6cf17ac..3e64679c1620 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -269,7 +269,7 @@ int coda_setattr(struct mnt_idmap *idmap, struct dentry *de,
 
 	memset(&vattr, 0, sizeof(vattr)); 
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	coda_iattr_to_vattr(iattr, &vattr);
 	vattr.va_type = C_VNON; /* cannot set type */
 

From 8e2dc788365cfdceda029b51f44aa52906392627 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:57 -0400
Subject: [PATCH 0121/2047] configfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Joel Becker <jlbec@evilplan.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-30-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/configfs/inode.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 1c15edbe70ff..fbdcb3582926 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -88,8 +88,7 @@ int configfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 static inline void set_default_inode_attr(struct inode * inode, umode_t mode)
 {
 	inode->i_mode = mode;
-	inode->i_atime = inode->i_mtime =
-		inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 }
 
 static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
@@ -99,7 +98,7 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
 	inode->i_gid = iattr->ia_gid;
 	inode->i_atime = iattr->ia_atime;
 	inode->i_mtime = iattr->ia_mtime;
-	inode->i_ctime = iattr->ia_ctime;
+	inode_set_ctime_to_ts(inode, iattr->ia_ctime);
 }
 
 struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd,
@@ -172,7 +171,7 @@ struct inode *configfs_create(struct dentry *dentry, umode_t mode)
 		return ERR_PTR(-ENOMEM);
 
 	p_inode = d_inode(dentry->d_parent);
-	p_inode->i_mtime = p_inode->i_ctime = current_time(p_inode);
+	p_inode->i_mtime = inode_set_ctime_current(p_inode);
 	configfs_set_inode_lock_class(sd, inode);
 	return inode;
 }

From 1e018769388acacf3a04eabffbd71b66cc11052f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:58 -0400
Subject: [PATCH 0122/2047] cramfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-31-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/cramfs/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 27c6597aa1be..e755b2223936 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -133,7 +133,8 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
 	}
 
 	/* Struct copy intentional */
-	inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
+	inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode,
+								zerotime);
 	/* inode->i_nlink is left 1 - arguably wrong for directories,
 	   but it's the best we can do without reading the directory
 	   contents.  1 yields the right result in GNU find, even

From 0f7bda3fcd9cf204efa55d074fb6140c16d08545 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:00:59 -0400
Subject: [PATCH 0123/2047] debugfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-32-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/debugfs/inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 3f81f73c241a..83e57e9f9fa0 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -72,8 +72,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb)
 	struct inode *inode = new_inode(sb);
 	if (inode) {
 		inode->i_ino = get_next_ino();
-		inode->i_atime = inode->i_mtime =
-			inode->i_ctime = current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	}
 	return inode;
 }

From 2ca7b9cd760d2e3dec6e1808d56a8c21eec1e4fc Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:00 -0400
Subject: [PATCH 0124/2047] devpts: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-33-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/devpts/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index fe3db0eda8e4..5ede89880911 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -338,7 +338,7 @@ static int mknod_ptmx(struct super_block *sb)
 	}
 
 	inode->i_ino = 2;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 
 	mode = S_IFCHR|opts->ptmxmode;
 	init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
@@ -451,7 +451,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
 	if (!inode)
 		goto fail;
 	inode->i_ino = 1;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
 	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
@@ -560,7 +560,7 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
 	inode->i_ino = index + 3;
 	inode->i_uid = opts->setuid ? opts->uid : current_fsuid();
 	inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	init_special_inode(inode, S_IFCHR|opts->mode, MKDEV(UNIX98_PTY_SLAVE_MAJOR, index));
 
 	sprintf(s, "%d", index);

From 79d9ce76cc48e0634614e9846d0206156a362b9d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:01 -0400
Subject: [PATCH 0125/2047] ecryptfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-34-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ecryptfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 83274915ba6d..b491bb239c8f 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -148,7 +148,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
 	}
 	fsstack_copy_attr_times(dir, lower_dir);
 	set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink);
-	inode->i_ctime = dir->i_ctime;
+	inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
 out_unlock:
 	dput(lower_dentry);
 	inode_unlock(lower_dir);

From 2fa9a97512dae779003145b693e6726ed92240e3 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:02 -0400
Subject: [PATCH 0126/2047] efivarfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-35-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/efivarfs/file.c  | 2 +-
 fs/efivarfs/inode.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index 375576111dc3..59b52718a3a2 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -51,7 +51,7 @@ static ssize_t efivarfs_file_write(struct file *file,
 	} else {
 		inode_lock(inode);
 		i_size_write(inode, datasize + sizeof(attributes));
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 		inode_unlock(inode);
 	}
 
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index b973a2c03dde..db9231f0e77b 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -25,7 +25,7 @@ struct inode *efivarfs_get_inode(struct super_block *sb,
 	if (inode) {
 		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
-		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 		inode->i_flags = is_removable ? 0 : S_IMMUTABLE;
 		switch (mode & S_IFMT) {
 		case S_IFREG:

From a68d1f51276d37d8d43408c75eedc8590775b16a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:03 -0400
Subject: [PATCH 0127/2047] efs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-36-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/efs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 3ba94bb005a6..3789d22ba501 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -105,8 +105,8 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
 	inode->i_size  = be32_to_cpu(efs_inode->di_size);
 	inode->i_atime.tv_sec = be32_to_cpu(efs_inode->di_atime);
 	inode->i_mtime.tv_sec = be32_to_cpu(efs_inode->di_mtime);
-	inode->i_ctime.tv_sec = be32_to_cpu(efs_inode->di_ctime);
-	inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
+	inode_set_ctime(inode, be32_to_cpu(efs_inode->di_ctime), 0);
+	inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0;
 
 	/* this is the number of blocks in the file */
 	if (inode->i_size == 0) {

From 7be935e18e2f93670cb3f1944c54c91302e4a1b8 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:04 -0400
Subject: [PATCH 0128/2047] erofs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Gao Xiang <xiang@kernel.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-37-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/erofs/inode.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index d70b12b81507..567c0d305ea4 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -105,8 +105,8 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 		set_nlink(inode, le32_to_cpu(die->i_nlink));
 
 		/* extended inode has its own timestamp */
-		inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime);
-		inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec);
+		inode_set_ctime(inode, le64_to_cpu(die->i_mtime),
+				le32_to_cpu(die->i_mtime_nsec));
 
 		inode->i_size = le64_to_cpu(die->i_size);
 
@@ -148,8 +148,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 		set_nlink(inode, le16_to_cpu(dic->i_nlink));
 
 		/* use build time for compact inodes */
-		inode->i_ctime.tv_sec = sbi->build_time;
-		inode->i_ctime.tv_nsec = sbi->build_time_nsec;
+		inode_set_ctime(inode, sbi->build_time, sbi->build_time_nsec);
 
 		inode->i_size = le32_to_cpu(dic->i_size);
 		if (erofs_inode_is_data_compressed(vi->datalayout))
@@ -176,10 +175,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 		vi->chunkbits = sb->s_blocksize_bits +
 			(vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
 	}
-	inode->i_mtime.tv_sec = inode->i_ctime.tv_sec;
-	inode->i_atime.tv_sec = inode->i_ctime.tv_sec;
-	inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec;
-	inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
+	inode->i_mtime = inode->i_atime = inode_get_ctime(inode);
 
 	inode->i_flags &= ~S_DAX;
 	if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&

From f29f190828238d377b2bbffb1ce7a83c45fb2dd4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:05 -0400
Subject: [PATCH 0129/2047] exfat: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-38-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/exfat/file.c  |  4 ++--
 fs/exfat/inode.c |  6 +++---
 fs/exfat/namei.c | 21 +++++++++------------
 fs/exfat/super.c |  3 +--
 4 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 3cbd270e0cba..f40ecfeee3a4 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -22,7 +22,7 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
 	if (err)
 		return err;
 
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 
 	if (!IS_SYNC(inode))
@@ -290,7 +290,7 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 	}
 
 	if (attr->ia_valid & ATTR_SIZE)
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 
 	setattr_copy(&nop_mnt_idmap, inode, attr);
 	exfat_truncate_atime(&inode->i_atime);
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 481dd338f2b8..13329baeafbc 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -355,7 +355,7 @@ static void exfat_write_failed(struct address_space *mapping, loff_t to)
 
 	if (to > i_size_read(inode)) {
 		truncate_pagecache(inode, i_size_read(inode));
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 		exfat_truncate(inode);
 	}
 }
@@ -398,7 +398,7 @@ static int exfat_write_end(struct file *file, struct address_space *mapping,
 		exfat_write_failed(mapping, pos+len);
 
 	if (!(err < 0) && !(ei->attr & ATTR_ARCHIVE)) {
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 		ei->attr |= ATTR_ARCHIVE;
 		mark_inode_dirty(inode);
 	}
@@ -577,7 +577,7 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
 
 	inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9;
 	inode->i_mtime = info->mtime;
-	inode->i_ctime = info->mtime;
+	inode_set_ctime_to_ts(inode, info->mtime);
 	ei->i_crtime = info->crtime;
 	inode->i_atime = info->atime;
 
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index e91022ff80ef..1b9f587f6cca 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -569,7 +569,7 @@ static int exfat_create(struct mnt_idmap *idmap, struct inode *dir,
 		goto unlock;
 
 	inode_inc_iversion(dir);
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	if (IS_DIRSYNC(dir))
 		exfat_sync_inode(dir);
 	else
@@ -582,8 +582,7 @@ static int exfat_create(struct mnt_idmap *idmap, struct inode *dir,
 		goto unlock;
 
 	inode_inc_iversion(inode);
-	inode->i_mtime = inode->i_atime = inode->i_ctime =
-		EXFAT_I(inode)->i_crtime = current_time(inode);
+	inode->i_mtime = inode->i_atime = EXFAT_I(inode)->i_crtime = inode_set_ctime_current(inode);
 	exfat_truncate_atime(&inode->i_atime);
 	/* timestamp is already written, so mark_inode_dirty() is unneeded. */
 
@@ -817,7 +816,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
 	ei->dir.dir = DIR_DELETED;
 
 	inode_inc_iversion(dir);
-	dir->i_mtime = dir->i_atime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
 	exfat_truncate_atime(&dir->i_atime);
 	if (IS_DIRSYNC(dir))
 		exfat_sync_inode(dir);
@@ -825,7 +824,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
 		mark_inode_dirty(dir);
 
 	clear_nlink(inode);
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	exfat_truncate_atime(&inode->i_atime);
 	exfat_unhash_inode(inode);
 	exfat_d_version_set(dentry, inode_query_iversion(dir));
@@ -852,7 +851,7 @@ static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 		goto unlock;
 
 	inode_inc_iversion(dir);
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	if (IS_DIRSYNC(dir))
 		exfat_sync_inode(dir);
 	else
@@ -866,8 +865,7 @@ static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 		goto unlock;
 
 	inode_inc_iversion(inode);
-	inode->i_mtime = inode->i_atime = inode->i_ctime =
-		EXFAT_I(inode)->i_crtime = current_time(inode);
+	inode->i_mtime = inode->i_atime = EXFAT_I(inode)->i_crtime = inode_set_ctime_current(inode);
 	exfat_truncate_atime(&inode->i_atime);
 	/* timestamp is already written, so mark_inode_dirty() is unneeded. */
 
@@ -979,7 +977,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
 	ei->dir.dir = DIR_DELETED;
 
 	inode_inc_iversion(dir);
-	dir->i_mtime = dir->i_atime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
 	exfat_truncate_atime(&dir->i_atime);
 	if (IS_DIRSYNC(dir))
 		exfat_sync_inode(dir);
@@ -988,7 +986,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
 	drop_nlink(dir);
 
 	clear_nlink(inode);
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	exfat_truncate_atime(&inode->i_atime);
 	exfat_unhash_inode(inode);
 	exfat_d_version_set(dentry, inode_query_iversion(dir));
@@ -1353,8 +1351,7 @@ static int exfat_rename(struct mnt_idmap *idmap,
 			exfat_warn(sb, "abnormal access to an inode dropped");
 			WARN_ON(new_inode->i_nlink == 0);
 		}
-		new_inode->i_ctime = EXFAT_I(new_inode)->i_crtime =
-			current_time(new_inode);
+		EXFAT_I(new_inode)->i_crtime = current_time(new_inode);
 	}
 
 unlock:
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 8c32460e031e..8e0ca4b0fc8f 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -379,8 +379,7 @@ static int exfat_read_root(struct inode *inode)
 	ei->i_size_ondisk = i_size_read(inode);
 
 	exfat_save_attr(inode, ATTR_SUBDIR);
-	inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
-		current_time(inode);
+	inode->i_mtime = inode->i_atime = ei->i_crtime = inode_set_ctime_current(inode);
 	exfat_truncate_atime(&inode->i_atime);
 	return 0;
 }

From fc4eed640d8f124dfc47ead69232f570a310496a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:06 -0400
Subject: [PATCH 0130/2047] ext2: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-39-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ext2/acl.c    |  2 +-
 fs/ext2/dir.c    |  6 +++---
 fs/ext2/ialloc.c |  2 +-
 fs/ext2/inode.c  | 10 +++++-----
 fs/ext2/ioctl.c  |  4 ++--
 fs/ext2/namei.c  |  8 ++++----
 fs/ext2/super.c  |  2 +-
 fs/ext2/xattr.c  |  2 +-
 8 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 82b17d7fc93f..7e54c31589c7 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -237,7 +237,7 @@ ext2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 	error = __ext2_set_acl(inode, acl, type);
 	if (!error && update_mode) {
 		inode->i_mode = mode;
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		mark_inode_dirty(inode);
 	}
 	return error;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 42db804794bd..b335f17f682f 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -468,7 +468,7 @@ int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
 	ext2_set_de_type(de, inode);
 	ext2_commit_chunk(page, pos, len);
 	if (update_times)
-		dir->i_mtime = dir->i_ctime = current_time(dir);
+		dir->i_mtime = inode_set_ctime_current(dir);
 	EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
 	mark_inode_dirty(dir);
 	return ext2_handle_dirsync(dir);
@@ -555,7 +555,7 @@ got_it:
 	de->inode = cpu_to_le32(inode->i_ino);
 	ext2_set_de_type (de, inode);
 	ext2_commit_chunk(page, pos, rec_len);
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
 	mark_inode_dirty(dir);
 	err = ext2_handle_dirsync(dir);
@@ -606,7 +606,7 @@ int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page)
 		pde->rec_len = ext2_rec_len_to_disk(to - from);
 	dir->inode = 0;
 	ext2_commit_chunk(page, pos, to - from);
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL;
 	mark_inode_dirty(inode);
 	return ext2_handle_dirsync(inode);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index a4e1d7a9c544..124df89689e1 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -549,7 +549,7 @@ got:
 
 	inode->i_ino = ino;
 	inode->i_blocks = 0;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	memset(ei->i_data, 0, sizeof(ei->i_data));
 	ei->i_flags =
 		ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 75983215c7a1..1259995977d2 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -595,7 +595,7 @@ static void ext2_splice_branch(struct inode *inode,
 	if (where->bh)
 		mark_buffer_dirty_inode(where->bh, inode);
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 }
 
@@ -1287,7 +1287,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
 	__ext2_truncate_blocks(inode, newsize);
 	filemap_invalidate_unlock(inode->i_mapping);
 
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	if (inode_needs_sync(inode)) {
 		sync_mapping_buffers(inode->i_mapping);
 		sync_inode_metadata(inode, 1);
@@ -1409,9 +1409,9 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
 	inode->i_size = le32_to_cpu(raw_inode->i_size);
 	inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
-	inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
+	inode_set_ctime(inode, (signed)le32_to_cpu(raw_inode->i_ctime), 0);
 	inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
-	inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
+	inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0;
 	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
 	/* We now have enough fields to check if the inode was active or not.
 	 * This is needed because nfsd might try to access dead inodes
@@ -1541,7 +1541,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync)
 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
 	raw_inode->i_size = cpu_to_le32(inode->i_size);
 	raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
-	raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+	raw_inode->i_ctime = cpu_to_le32(inode_get_ctime(inode).tv_sec);
 	raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
 
 	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index cc87d413eb43..44e04484e570 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -44,7 +44,7 @@ int ext2_fileattr_set(struct mnt_idmap *idmap,
 		(fa->flags & EXT2_FL_USER_MODIFIABLE);
 
 	ext2_set_inode_flags(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 
 	return 0;
@@ -77,7 +77,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		}
 
 		inode_lock(inode);
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		inode->i_generation = generation;
 		inode_unlock(inode);
 
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 937dd8f60f96..059517068adc 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -211,7 +211,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 	if (err)
 		return err;
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	inode_inc_link_count(inode);
 	ihold(inode);
 
@@ -291,7 +291,7 @@ static int ext2_unlink(struct inode *dir, struct dentry *dentry)
 	if (err)
 		goto out;
 
-	inode->i_ctime = dir->i_ctime;
+	inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
 	inode_dec_link_count(inode);
 	err = 0;
 out:
@@ -367,7 +367,7 @@ static int ext2_rename (struct mnt_idmap * idmap,
 		ext2_put_page(new_page, new_de);
 		if (err)
 			goto out_dir;
-		new_inode->i_ctime = current_time(new_inode);
+		inode_set_ctime_current(new_inode);
 		if (dir_de)
 			drop_nlink(new_inode);
 		inode_dec_link_count(new_inode);
@@ -383,7 +383,7 @@ static int ext2_rename (struct mnt_idmap * idmap,
 	 * Like most other Unix systems, set the ctime for inodes on a
  	 * rename.
 	 */
-	old_inode->i_ctime = current_time(old_inode);
+	inode_set_ctime_current(old_inode);
 	mark_inode_dirty(old_inode);
 
 	err = ext2_delete_entry(old_de, old_page);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 2959afc7541c..aaf3e3e88cb2 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1572,7 +1572,7 @@ out:
 	if (inode->i_size < off+len-towrite)
 		i_size_write(inode, off+len-towrite);
 	inode_inc_iversion(inode);
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 	return len - towrite;
 }
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 8906ba479aaf..1c9187188d68 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -773,7 +773,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 
 	/* Update the inode. */
 	EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	if (IS_SYNC(inode)) {
 		error = sync_inode_metadata(inode, 1);
 		/* In case sync failed due to ENOSPC the inode was actually

From 49319832de90f1943264e5c573b072947af6ae26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 24 Jun 2023 12:30:46 +0200
Subject: [PATCH 0131/2047] selftests/nolibc: drop test chmod_net
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The test relies on /proc/$PID/net to allow chmod() operations.
It is the only file or directory in /proc/$PID/ to allow this and a bug.
That bug will be fixed in the next patch in the series and therefore
the test would start failing.

Link: https://lore.kernel.org/lkml/d0d111ef-edae-4760-83fb-36db84278da1@t-8ch.de/
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Fixes: b4844fa0bdb4 ("selftests/nolibc: implement a few tests for various syscalls")
Tested-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Message-Id: <20230624-proc-net-setattr-v1-1-73176812adee@weissschuh.net>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 486334981e60..970df9e55131 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -577,7 +577,6 @@ int run_syscall(int min, int max)
 		CASE_TEST(chdir_root);        EXPECT_SYSZR(1, chdir("/")); break;
 		CASE_TEST(chdir_dot);         EXPECT_SYSZR(1, chdir(".")); break;
 		CASE_TEST(chdir_blah);        EXPECT_SYSER(1, chdir("/blah"), -1, ENOENT); break;
-		CASE_TEST(chmod_net);         EXPECT_SYSZR(proc, chmod("/proc/self/net", 0555)); break;
 		CASE_TEST(chmod_self);        EXPECT_SYSER(proc, chmod("/proc/self", 0555), -1, EPERM); break;
 		CASE_TEST(chown_self);        EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break;
 		CASE_TEST(chroot_root);       EXPECT_SYSZR(euid0, chroot("/")); break;

From 18e66ae67673f2c8df6f02428798b1355691f2a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 24 Jun 2023 12:30:47 +0200
Subject: [PATCH 0132/2047] proc: use generic setattr() for /proc/$PID/net
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All other files in /proc/$PID/ use proc_setattr().

Not using it allows the usage of chmod() on /proc/$PID/net, even on
other processes owned by the same user.
The same would probably also be true for other attributes to be changed.

As this technically represents an ABI change it is not marked for
stable so any unlikely regressions are caught during a full release cycle.

Fixes: e9720acd728a ("[NET]: Make /proc/net a symlink on /proc/self/net (v3)")
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/lkml/d0d111ef-edae-4760-83fb-36db84278da1@t-8ch.de/
Fixes: b4844fa0bdb4 ("selftests/nolibc: implement a few tests for various syscalls")
Tested-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Message-Id: <20230624-proc-net-setattr-v1-2-73176812adee@weissschuh.net>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/proc/proc_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index a0c0419872e3..78f9e6b469c0 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -321,6 +321,7 @@ static int proc_tgid_net_getattr(struct mnt_idmap *idmap,
 const struct inode_operations proc_net_inode_operations = {
 	.lookup		= proc_tgid_net_lookup,
 	.getattr	= proc_tgid_net_getattr,
+	.setattr        = proc_setattr,
 };
 
 static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx)

From d243b34459cea30cfe5f3a9b2feb44e7daff9938 Mon Sep 17 00:00:00 2001
From: Wander Lairson Costa <wander@redhat.com>
Date: Wed, 14 Jun 2023 09:23:21 -0300
Subject: [PATCH 0133/2047] kernel/fork: beware of __put_task_struct() calling
 context

Under PREEMPT_RT, __put_task_struct() indirectly acquires sleeping
locks. Therefore, it can't be called from an non-preemptible context.

One practical example is splat inside inactive_task_timer(), which is
called in a interrupt context:

  CPU: 1 PID: 2848 Comm: life Kdump: loaded Tainted: G W ---------
   Hardware name: HP ProLiant DL388p Gen8, BIOS P70 07/15/2012
   Call Trace:
   dump_stack_lvl+0x57/0x7d
   mark_lock_irq.cold+0x33/0xba
   mark_lock+0x1e7/0x400
   mark_usage+0x11d/0x140
   __lock_acquire+0x30d/0x930
   lock_acquire.part.0+0x9c/0x210
   rt_spin_lock+0x27/0xe0
   refill_obj_stock+0x3d/0x3a0
   kmem_cache_free+0x357/0x560
   inactive_task_timer+0x1ad/0x340
   __run_hrtimer+0x8a/0x1a0
   __hrtimer_run_queues+0x91/0x130
   hrtimer_interrupt+0x10f/0x220
   __sysvec_apic_timer_interrupt+0x7b/0xd0
   sysvec_apic_timer_interrupt+0x4f/0xd0
   asm_sysvec_apic_timer_interrupt+0x12/0x20
   RIP: 0033:0x7fff196bf6f5

Instead of calling __put_task_struct() directly, we defer it using
call_rcu(). A more natural approach would use a workqueue, but since
in PREEMPT_RT, we can't allocate dynamic memory from atomic context,
the code would become more complex because we would need to put the
work_struct instance in the task_struct and initialize it when we
allocate a new task_struct.

The issue is reproducible with stress-ng:

  while true; do
      stress-ng --sched deadline --sched-period 1000000000 \
	      --sched-runtime 800000000 --sched-deadline \
	      1000000000 --mmapfork 23 -t 20
  done

Reported-by: Hu Chunyu <chuhu@redhat.com>
Suggested-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Valentin Schneider <vschneid@redhat.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Wander Lairson Costa <wander@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230614122323.37957-2-wander@redhat.com
---
 include/linux/sched/task.h | 28 +++++++++++++++++++++++++++-
 kernel/fork.c              |  8 ++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index dd35ce28bb90..6b687c155fb6 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -118,10 +118,36 @@ static inline struct task_struct *get_task_struct(struct task_struct *t)
 }
 
 extern void __put_task_struct(struct task_struct *t);
+extern void __put_task_struct_rcu_cb(struct rcu_head *rhp);
 
 static inline void put_task_struct(struct task_struct *t)
 {
-	if (refcount_dec_and_test(&t->usage))
+	if (!refcount_dec_and_test(&t->usage))
+		return;
+
+	/*
+	 * under PREEMPT_RT, we can't call put_task_struct
+	 * in atomic context because it will indirectly
+	 * acquire sleeping locks.
+	 *
+	 * call_rcu() will schedule delayed_put_task_struct_rcu()
+	 * to be called in process context.
+	 *
+	 * __put_task_struct() is called when
+	 * refcount_dec_and_test(&t->usage) succeeds.
+	 *
+	 * This means that it can't "conflict" with
+	 * put_task_struct_rcu_user() which abuses ->rcu the same
+	 * way; rcu_users has a reference so task->usage can't be
+	 * zero after rcu_users 1 -> 0 transition.
+	 *
+	 * delayed_free_task() also uses ->rcu, but it is only called
+	 * when it fails to fork a process. Therefore, there is no
+	 * way it can conflict with put_task_struct().
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && !preemptible())
+		call_rcu(&t->rcu, __put_task_struct_rcu_cb);
+	else
 		__put_task_struct(t);
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index d2e12b6d2b18..f81149739eb9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -985,6 +985,14 @@ void __put_task_struct(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(__put_task_struct);
 
+void __put_task_struct_rcu_cb(struct rcu_head *rhp)
+{
+	struct task_struct *task = container_of(rhp, struct task_struct, rcu);
+
+	__put_task_struct(task);
+}
+EXPORT_SYMBOL_GPL(__put_task_struct_rcu_cb);
+
 void __init __weak arch_task_cache_init(void) { }
 
 /*

From 893cdaaa3977be6afb3a7f756fbfd7be83f68d8c Mon Sep 17 00:00:00 2001
From: Wander Lairson Costa <wander@redhat.com>
Date: Wed, 14 Jun 2023 09:23:22 -0300
Subject: [PATCH 0134/2047] sched: avoid false lockdep splat in
 put_task_struct()

In put_task_struct(), a spin_lock is indirectly acquired under the kernel
stock. When running the kernel in real-time (RT) configuration, the
operation is dispatched to a preemptible context call to ensure
guaranteed preemption. However, if PROVE_RAW_LOCK_NESTING is enabled
and __put_task_struct() is called while holding a raw_spinlock, lockdep
incorrectly reports an "Invalid lock context" in the stock kernel.

This false splat occurs because lockdep is unaware of the different
route taken under RT. To address this issue, override the inner wait
type to prevent the false lockdep splat.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Wander Lairson Costa <wander@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230614122323.37957-3-wander@redhat.com
---
 include/linux/sched/task.h | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 6b687c155fb6..a23af225c898 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -125,6 +125,19 @@ static inline void put_task_struct(struct task_struct *t)
 	if (!refcount_dec_and_test(&t->usage))
 		return;
 
+	/*
+	 * In !RT, it is always safe to call __put_task_struct().
+	 * Under RT, we can only call it in preemptible context.
+	 */
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
+		static DEFINE_WAIT_OVERRIDE_MAP(put_task_map, LD_WAIT_SLEEP);
+
+		lock_map_acquire_try(&put_task_map);
+		__put_task_struct(t);
+		lock_map_release(&put_task_map);
+		return;
+	}
+
 	/*
 	 * under PREEMPT_RT, we can't call put_task_struct
 	 * in atomic context because it will indirectly
@@ -145,10 +158,7 @@ static inline void put_task_struct(struct task_struct *t)
 	 * when it fails to fork a process. Therefore, there is no
 	 * way it can conflict with put_task_struct().
 	 */
-	if (IS_ENABLED(CONFIG_PREEMPT_RT) && !preemptible())
-		call_rcu(&t->rcu, __put_task_struct_rcu_cb);
-	else
-		__put_task_struct(t);
+	call_rcu(&t->rcu, __put_task_struct_rcu_cb);
 }
 
 DEFINE_FREE(put_task, struct task_struct *, if (_T) put_task_struct(_T))

From 79462e8c879afc7895b30014d31e2c1fd629bb1f Mon Sep 17 00:00:00 2001
From: Josh Don <joshdon@google.com>
Date: Tue, 20 Jun 2023 11:32:46 -0700
Subject: [PATCH 0135/2047] sched: don't account throttle time for empty groups

It is easy for a cfs_rq to become throttled even when it has no enqueued
entities (for example, if we have just put_prev()'d the last runnable
task of the cfs_rq, and the cfs_rq is out of quota).

Avoid accounting this time towards total throttle time, since it
otherwise falsely inflates the stats.

Note that the dequeue path is special, since we normally disallow
migrations when a task is in a throttled hierarchy (see
throttled_lb_pair()).

Signed-off-by: Josh Don <joshdon@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230620183247.737942-1-joshdon@google.com
---
 kernel/sched/fair.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a80a73909dc2..51ccae747795 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4787,6 +4787,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 }
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
+static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
 
 static inline bool cfs_bandwidth_used(void);
 
@@ -4873,8 +4874,14 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	if (cfs_rq->nr_running == 1) {
 		check_enqueue_throttle(cfs_rq);
-		if (!throttled_hierarchy(cfs_rq))
+		if (!throttled_hierarchy(cfs_rq)) {
 			list_add_leaf_cfs_rq(cfs_rq);
+		} else {
+#ifdef CONFIG_CFS_BANDWIDTH
+			if (cfs_rq_throttled(cfs_rq) && !cfs_rq->throttled_clock)
+				cfs_rq->throttled_clock = rq_clock(rq_of(cfs_rq));
+#endif
+		}
 	}
 }
 
@@ -5480,7 +5487,9 @@ done:
 	 * throttled-list.  rq->lock protects completion.
 	 */
 	cfs_rq->throttled = 1;
-	cfs_rq->throttled_clock = rq_clock(rq);
+	SCHED_WARN_ON(cfs_rq->throttled_clock);
+	if (cfs_rq->nr_running)
+		cfs_rq->throttled_clock = rq_clock(rq);
 	return true;
 }
 
@@ -5498,7 +5507,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 	update_rq_clock(rq);
 
 	raw_spin_lock(&cfs_b->lock);
-	cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
+	if (cfs_rq->throttled_clock) {
+		cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
+		cfs_rq->throttled_clock = 0;
+	}
 	list_del_rcu(&cfs_rq->throttled_list);
 	raw_spin_unlock(&cfs_b->lock);
 

From 677ea015f231aa38b3972aa7be54ecd2637e99fd Mon Sep 17 00:00:00 2001
From: Josh Don <joshdon@google.com>
Date: Tue, 20 Jun 2023 11:32:47 -0700
Subject: [PATCH 0136/2047] sched: add throttled time stat for throttled
 children

We currently export the total throttled time for cgroups that are given
a bandwidth limit. This patch extends this accounting to also account
the total time that each children cgroup has been throttled.

This is useful to understand the degree to which children have been
affected by the throttling control. Children which are not runnable
during the entire throttled period, for example, will not show any
self-throttling time during this period.

Expose this in a new interface, 'cpu.stat.local', which is similar to
how non-hierarchical events are accounted in 'memory.events.local'.

Signed-off-by: Josh Don <joshdon@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20230620183247.737942-2-joshdon@google.com
---
 include/linux/cgroup-defs.h |  2 ++
 kernel/cgroup/cgroup.c      | 34 ++++++++++++++++++++++++++++
 kernel/sched/core.c         | 44 +++++++++++++++++++++++++++++++++++++
 kernel/sched/fair.c         | 21 +++++++++++++++++-
 kernel/sched/sched.h        |  2 ++
 5 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8a0d5466c7be..ae20dbb885d6 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -661,6 +661,8 @@ struct cgroup_subsys {
 	void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
 	int (*css_extra_stat_show)(struct seq_file *seq,
 				   struct cgroup_subsys_state *css);
+	int (*css_local_stat_show)(struct seq_file *seq,
+				   struct cgroup_subsys_state *css);
 
 	int (*can_attach)(struct cgroup_taskset *tset);
 	void (*cancel_attach)(struct cgroup_taskset *tset);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index bfe3cd8ccf36..4e3ee13217ce 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3685,6 +3685,36 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
 	return ret;
 }
 
+static int __maybe_unused cgroup_local_stat_show(struct seq_file *seq,
+						 struct cgroup *cgrp, int ssid)
+{
+	struct cgroup_subsys *ss = cgroup_subsys[ssid];
+	struct cgroup_subsys_state *css;
+	int ret;
+
+	if (!ss->css_local_stat_show)
+		return 0;
+
+	css = cgroup_tryget_css(cgrp, ss);
+	if (!css)
+		return 0;
+
+	ret = ss->css_local_stat_show(seq, css);
+	css_put(css);
+	return ret;
+}
+
+static int cpu_local_stat_show(struct seq_file *seq, void *v)
+{
+	struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
+	int ret = 0;
+
+#ifdef CONFIG_CGROUP_SCHED
+	ret = cgroup_local_stat_show(seq, cgrp, cpu_cgrp_id);
+#endif
+	return ret;
+}
+
 #ifdef CONFIG_PSI
 static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
 {
@@ -5235,6 +5265,10 @@ static struct cftype cgroup_base_files[] = {
 		.name = "cpu.stat",
 		.seq_show = cpu_stat_show,
 	},
+	{
+		.name = "cpu.stat.local",
+		.seq_show = cpu_local_stat_show,
+	},
 	{ }	/* terminate */
 };
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c52c2eba7c73..2291f9d91c86 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11139,6 +11139,27 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
 
 	return 0;
 }
+
+static u64 throttled_time_self(struct task_group *tg)
+{
+	int i;
+	u64 total = 0;
+
+	for_each_possible_cpu(i) {
+		total += READ_ONCE(tg->cfs_rq[i]->throttled_clock_self_time);
+	}
+
+	return total;
+}
+
+static int cpu_cfs_local_stat_show(struct seq_file *sf, void *v)
+{
+	struct task_group *tg = css_tg(seq_css(sf));
+
+	seq_printf(sf, "throttled_time %llu\n", throttled_time_self(tg));
+
+	return 0;
+}
 #endif /* CONFIG_CFS_BANDWIDTH */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
@@ -11215,6 +11236,10 @@ static struct cftype cpu_legacy_files[] = {
 		.name = "stat",
 		.seq_show = cpu_cfs_stat_show,
 	},
+	{
+		.name = "stat.local",
+		.seq_show = cpu_cfs_local_stat_show,
+	},
 #endif
 #ifdef CONFIG_RT_GROUP_SCHED
 	{
@@ -11271,6 +11296,24 @@ static int cpu_extra_stat_show(struct seq_file *sf,
 	return 0;
 }
 
+static int cpu_local_stat_show(struct seq_file *sf,
+			       struct cgroup_subsys_state *css)
+{
+#ifdef CONFIG_CFS_BANDWIDTH
+	{
+		struct task_group *tg = css_tg(css);
+		u64 throttled_self_usec;
+
+		throttled_self_usec = throttled_time_self(tg);
+		do_div(throttled_self_usec, NSEC_PER_USEC);
+
+		seq_printf(sf, "throttled_usec %llu\n",
+			   throttled_self_usec);
+	}
+#endif
+	return 0;
+}
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
 			       struct cftype *cft)
@@ -11449,6 +11492,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
 	.css_released	= cpu_cgroup_css_released,
 	.css_free	= cpu_cgroup_css_free,
 	.css_extra_stat_show = cpu_extra_stat_show,
+	.css_local_stat_show = cpu_local_stat_show,
 #ifdef CONFIG_RT_GROUP_SCHED
 	.can_attach	= cpu_cgroup_can_attach,
 #endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 51ccae747795..159b20296dd5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4878,8 +4878,12 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 			list_add_leaf_cfs_rq(cfs_rq);
 		} else {
 #ifdef CONFIG_CFS_BANDWIDTH
+			struct rq *rq = rq_of(cfs_rq);
+
 			if (cfs_rq_throttled(cfs_rq) && !cfs_rq->throttled_clock)
-				cfs_rq->throttled_clock = rq_clock(rq_of(cfs_rq));
+				cfs_rq->throttled_clock = rq_clock(rq);
+			if (!cfs_rq->throttled_clock_self)
+				cfs_rq->throttled_clock_self = rq_clock(rq);
 #endif
 		}
 	}
@@ -5384,6 +5388,17 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
 		/* Add cfs_rq with load or one or more already running entities to the list */
 		if (!cfs_rq_is_decayed(cfs_rq))
 			list_add_leaf_cfs_rq(cfs_rq);
+
+		if (cfs_rq->throttled_clock_self) {
+			u64 delta = rq_clock(rq) - cfs_rq->throttled_clock_self;
+
+			cfs_rq->throttled_clock_self = 0;
+
+			if (SCHED_WARN_ON((s64)delta < 0))
+				delta = 0;
+
+			cfs_rq->throttled_clock_self_time += delta;
+		}
 	}
 
 	return 0;
@@ -5398,6 +5413,10 @@ static int tg_throttle_down(struct task_group *tg, void *data)
 	if (!cfs_rq->throttle_count) {
 		cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq);
 		list_del_leaf_cfs_rq(cfs_rq);
+
+		SCHED_WARN_ON(cfs_rq->throttled_clock_self);
+		if (cfs_rq->nr_running)
+			cfs_rq->throttled_clock_self = rq_clock(rq);
 	}
 	cfs_rq->throttle_count++;
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e93e006a942b..1dcea9bfa0a8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -636,6 +636,8 @@ struct cfs_rq {
 	u64			throttled_clock;
 	u64			throttled_clock_pelt;
 	u64			throttled_clock_pelt_time;
+	u64			throttled_clock_self;
+	u64			throttled_clock_self_time;
 	int			throttled;
 	int			throttle_count;
 	struct list_head	throttled_list;

From 548796e2e70b44b4661fd7feee6eb239245ff1f8 Mon Sep 17 00:00:00 2001
From: Cruz Zhao <CruzZhao@linux.alibaba.com>
Date: Thu, 29 Jun 2023 12:02:04 +0800
Subject: [PATCH 0137/2047] sched/core: introduce sched_core_idle_cpu()

As core scheduling introduced, a new state of idle is defined as
force idle, running idle task but nr_running greater than zero.

If a cpu is in force idle state, idle_cpu() will return zero. This
result makes sense in some scenarios, e.g., load balance,
showacpu when dumping, and judge the RCU boost kthread is starving.

But this will cause error in other scenarios, e.g., tick_irq_exit():
When force idle, rq->curr == rq->idle but rq->nr_running > 0, results
that idle_cpu() returns 0. In function tick_irq_exit(), if idle_cpu()
is 0, tick_nohz_irq_exit() will not be called, and ts->idle_active will
not become 1, which became 0 in tick_nohz_irq_enter().
ts->idle_sleeptime won't update in function update_ts_time_stats(), if
ts->idle_active is 0, which should be 1. And this bug will result that
ts->idle_sleeptime is less than the actual value, and finally will
result that the idle time in /proc/stat is less than the actual value.

To solve this problem, we introduce sched_core_idle_cpu(), which
returns 1 when force idle. We audit all users of idle_cpu(), and
change idle_cpu() into sched_core_idle_cpu() in function
tick_irq_exit().

v2-->v3: Only replace idle_cpu() with sched_core_idle_cpu() in
function tick_irq_exit(). And modify the corresponding commit log.

Signed-off-by: Cruz Zhao <CruzZhao@linux.alibaba.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Joel Fernandes <joel@joelfernandes.org>
Link: https://lore.kernel.org/r/1688011324-42406-1-git-send-email-CruzZhao@linux.alibaba.com
---
 include/linux/sched.h |  2 ++
 kernel/sched/core.c   | 13 +++++++++++++
 kernel/softirq.c      |  2 +-
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 609bde814cb0..efc9f4bdc4ca 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2433,9 +2433,11 @@ extern void sched_core_free(struct task_struct *tsk);
 extern void sched_core_fork(struct task_struct *p);
 extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
 				unsigned long uaddr);
+extern int sched_core_idle_cpu(int cpu);
 #else
 static inline void sched_core_free(struct task_struct *tsk) { }
 static inline void sched_core_fork(struct task_struct *p) { }
+static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
 #endif
 
 extern void sched_set_stop_task(int cpu, struct task_struct *stop);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2291f9d91c86..83e36547af17 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7383,6 +7383,19 @@ struct task_struct *idle_task(int cpu)
 	return cpu_rq(cpu)->idle;
 }
 
+#ifdef CONFIG_SCHED_CORE
+int sched_core_idle_cpu(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	if (sched_core_enabled(rq) && rq->curr == rq->idle)
+		return 1;
+
+	return idle_cpu(cpu);
+}
+
+#endif
+
 #ifdef CONFIG_SMP
 /*
  * This function computes an effective utilization for the given CPU, to be
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 807b34ccd797..210cf5f8d92c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -612,7 +612,7 @@ static inline void tick_irq_exit(void)
 	int cpu = smp_processor_id();
 
 	/* Make sure that timer wheel updates are propagated */
-	if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
+	if ((sched_core_idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
 		if (!in_hardirq())
 			tick_nohz_irq_exit();
 	}

From 35cd21f6292c6656aaab6066a1fa13cd11ca27f5 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Thu, 25 May 2023 18:34:28 +0800
Subject: [PATCH 0138/2047] sched/psi: make psi_cgroups_enabled static

The static key psi_cgroups_enabled is only used inside file psi.c.
Make it static.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Link: https://lore.kernel.org/r/20230525103428.49712-1-linmiaohe@huawei.com
---
 kernel/sched/psi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 81fca77397f6..2ccb0b2ebd78 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -140,7 +140,7 @@
 static int psi_bug __read_mostly;
 
 DEFINE_STATIC_KEY_FALSE(psi_disabled);
-DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled);
+static DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled);
 
 #ifdef CONFIG_PSI_DEFAULT_DISABLED
 static bool psi_enable;

From fee1759e4f042aaaa643c50369a03a9a6559a575 Mon Sep 17 00:00:00 2001
From: Tim C Chen <tim.c.chen@linux.intel.com>
Date: Fri, 7 Jul 2023 15:57:00 -0700
Subject: [PATCH 0139/2047] sched/fair: Determine active load balance for SMT
 sched groups

On hybrid CPUs with scheduling cluster enabled, we will need to
consider balancing between SMT CPU cluster, and Atom core cluster.

Below shows such a hybrid x86 CPU with 4 big cores and 8 atom cores.
Each scheduling cluster span a L2 cache.

          --L2-- --L2-- --L2-- --L2-- ----L2---- -----L2------
          [0, 1] [2, 3] [4, 5] [5, 6] [7 8 9 10] [11 12 13 14]
          Big    Big    Big    Big    Atom       Atom
          core   core   core   core   Module     Module

If the busiest group is a big core with both SMT CPUs busy, we should
active load balance if destination group has idle CPU cores.  Such
condition is considered by asym_active_balance() in load balancing but not
considered when looking for busiest group and computing load imbalance.
Add this consideration in find_busiest_group() and calculate_imbalance().

In addition, update the logic determining the busier group when one group
is SMT and the other group is non SMT but both groups are partially busy
with idle CPU. The busier group should be the group with idle cores rather
than the group with one busy SMT CPU.  We do not want to make the SMT group
the busiest one to pull the only task off SMT CPU and causing the whole core to
go empty.

Otherwise suppose in the search for the busiest group, we first encounter
an SMT group with 1 task and set it as the busiest.  The destination
group is an atom cluster with 1 task and we next encounter an atom
cluster group with 3 tasks, we will not pick this atom cluster over the
SMT group, even though we should.  As a result, we do not load balance
the busier Atom cluster (with 3 tasks) towards the local atom cluster
(with 1 task).  And it doesn't make sense to pick the 1 task SMT group
as the busier group as we also should not pull task off the SMT towards
the 1 task atom cluster and make the SMT core completely empty.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/e24f35d142308790f69be65930b82794ef6658a2.1688770494.git.tim.c.chen@linux.intel.com
---
 kernel/sched/fair.c | 80 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 159b20296dd5..accbfbbfa6a5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8446,6 +8446,11 @@ enum group_type {
 	 * more powerful CPU.
 	 */
 	group_misfit_task,
+	/*
+	 * Balance SMT group that's fully busy. Can benefit from migration
+	 * a task on SMT with busy sibling to another CPU on idle core.
+	 */
+	group_smt_balance,
 	/*
 	 * SD_ASYM_PACKING only: One local CPU with higher capacity is available,
 	 * and the task should be migrated to it instead of running on the
@@ -9154,6 +9159,7 @@ struct sg_lb_stats {
 	unsigned int group_weight;
 	enum group_type group_type;
 	unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */
+	unsigned int group_smt_balance;  /* Task on busy SMT be moved */
 	unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */
 #ifdef CONFIG_NUMA_BALANCING
 	unsigned int nr_numa_running;
@@ -9427,6 +9433,9 @@ group_type group_classify(unsigned int imbalance_pct,
 	if (sgs->group_asym_packing)
 		return group_asym_packing;
 
+	if (sgs->group_smt_balance)
+		return group_smt_balance;
+
 	if (sgs->group_misfit_task_load)
 		return group_misfit_task;
 
@@ -9496,6 +9505,36 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds,  struct sg_lb_stats *sgs
 	return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu);
 }
 
+/* One group has more than one SMT CPU while the other group does not */
+static inline bool smt_vs_nonsmt_groups(struct sched_group *sg1,
+				    struct sched_group *sg2)
+{
+	if (!sg1 || !sg2)
+		return false;
+
+	return (sg1->flags & SD_SHARE_CPUCAPACITY) !=
+		(sg2->flags & SD_SHARE_CPUCAPACITY);
+}
+
+static inline bool smt_balance(struct lb_env *env, struct sg_lb_stats *sgs,
+			       struct sched_group *group)
+{
+	if (env->idle == CPU_NOT_IDLE)
+		return false;
+
+	/*
+	 * For SMT source group, it is better to move a task
+	 * to a CPU that doesn't have multiple tasks sharing its CPU capacity.
+	 * Note that if a group has a single SMT, SD_SHARE_CPUCAPACITY
+	 * will not be on.
+	 */
+	if (group->flags & SD_SHARE_CPUCAPACITY &&
+	    sgs->sum_h_nr_running > 1)
+		return true;
+
+	return false;
+}
+
 static inline bool
 sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
 {
@@ -9588,6 +9627,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 		sgs->group_asym_packing = 1;
 	}
 
+	/* Check for loaded SMT group to be balanced to dst CPU */
+	if (!local_group && smt_balance(env, sgs, group))
+		sgs->group_smt_balance = 1;
+
 	sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
 
 	/* Computing avg_load makes sense only when group is overloaded */
@@ -9672,6 +9715,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
 			return false;
 		break;
 
+	case group_smt_balance:
 	case group_fully_busy:
 		/*
 		 * Select the fully busy group with highest avg_load. In
@@ -9700,6 +9744,18 @@ static bool update_sd_pick_busiest(struct lb_env *env,
 		break;
 
 	case group_has_spare:
+		/*
+		 * Do not pick sg with SMT CPUs over sg with pure CPUs,
+		 * as we do not want to pull task off SMT core with one task
+		 * and make the core idle.
+		 */
+		if (smt_vs_nonsmt_groups(sds->busiest, sg)) {
+			if (sg->flags & SD_SHARE_CPUCAPACITY && sgs->sum_h_nr_running <= 1)
+				return false;
+			else
+				return true;
+		}
+
 		/*
 		 * Select not overloaded group with lowest number of idle cpus
 		 * and highest number of running tasks. We could also compare
@@ -9896,6 +9952,7 @@ static bool update_pick_idlest(struct sched_group *idlest,
 
 	case group_imbalanced:
 	case group_asym_packing:
+	case group_smt_balance:
 		/* Those types are not used in the slow wakeup path */
 		return false;
 
@@ -10027,6 +10084,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
 
 	case group_imbalanced:
 	case group_asym_packing:
+	case group_smt_balance:
 		/* Those type are not used in the slow wakeup path */
 		return NULL;
 
@@ -10281,6 +10339,13 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 		return;
 	}
 
+	if (busiest->group_type == group_smt_balance) {
+		/* Reduce number of tasks sharing CPU capacity */
+		env->migration_type = migrate_task;
+		env->imbalance = 1;
+		return;
+	}
+
 	if (busiest->group_type == group_imbalanced) {
 		/*
 		 * In the group_imb case we cannot rely on group-wide averages
@@ -10536,16 +10601,23 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 		goto force_balance;
 
 	if (busiest->group_type != group_overloaded) {
-		if (env->idle == CPU_NOT_IDLE)
+		if (env->idle == CPU_NOT_IDLE) {
 			/*
 			 * If the busiest group is not overloaded (and as a
 			 * result the local one too) but this CPU is already
 			 * busy, let another idle CPU try to pull task.
 			 */
 			goto out_balanced;
+		}
+
+		if (busiest->group_type == group_smt_balance &&
+		    smt_vs_nonsmt_groups(sds.local, sds.busiest)) {
+			/* Let non SMT CPU pull from SMT CPU sharing with sibling */
+			goto force_balance;
+		}
 
 		if (busiest->group_weight > 1 &&
-		    local->idle_cpus <= (busiest->idle_cpus + 1))
+		    local->idle_cpus <= (busiest->idle_cpus + 1)) {
 			/*
 			 * If the busiest group is not overloaded
 			 * and there is no imbalance between this and busiest
@@ -10556,12 +10628,14 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 			 * there is more than 1 CPU per group.
 			 */
 			goto out_balanced;
+		}
 
-		if (busiest->sum_h_nr_running == 1)
+		if (busiest->sum_h_nr_running == 1) {
 			/*
 			 * busiest doesn't have any tasks waiting to run
 			 */
 			goto out_balanced;
+		}
 	}
 
 force_balance:

From d24cb0d9113f5932b8832533ce82351b5911ed50 Mon Sep 17 00:00:00 2001
From: Tim C Chen <tim.c.chen@linux.intel.com>
Date: Fri, 7 Jul 2023 15:57:01 -0700
Subject: [PATCH 0140/2047] sched/topology: Record number of cores in sched
 group

When balancing sibling domains that have different number of cores,
tasks in respective sibling domain should be proportional to the
number of cores in each domain. In preparation of implementing such a
policy, record the number of cores in a scheduling group.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/04641eeb0e95c21224352f5743ecb93dfac44654.1688770494.git.tim.c.chen@linux.intel.com
---
 kernel/sched/sched.h    |  1 +
 kernel/sched/topology.c | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1dcea9bfa0a8..9baeb1a2dfdd 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1884,6 +1884,7 @@ struct sched_group {
 	atomic_t		ref;
 
 	unsigned int		group_weight;
+	unsigned int		cores;
 	struct sched_group_capacity *sgc;
 	int			asym_prefer_cpu;	/* CPU of highest priority in group */
 	int			flags;
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index d3a3b2646ec4..7cfcfe5d27b9 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1275,14 +1275,24 @@ build_sched_groups(struct sched_domain *sd, int cpu)
 static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 {
 	struct sched_group *sg = sd->groups;
+	struct cpumask *mask = sched_domains_tmpmask2;
 
 	WARN_ON(!sg);
 
 	do {
-		int cpu, max_cpu = -1;
+		int cpu, cores = 0, max_cpu = -1;
 
 		sg->group_weight = cpumask_weight(sched_group_span(sg));
 
+		cpumask_copy(mask, sched_group_span(sg));
+		for_each_cpu(cpu, mask) {
+			cores++;
+#ifdef CONFIG_SCHED_SMT
+			cpumask_andnot(mask, mask, cpu_smt_mask(cpu));
+#endif
+		}
+		sg->cores = cores;
+
 		if (!(sd->flags & SD_ASYM_PACKING))
 			goto next;
 

From 7ff1693236f5d97a939dbeb660c07671a2d57071 Mon Sep 17 00:00:00 2001
From: Tim C Chen <tim.c.chen@linux.intel.com>
Date: Fri, 7 Jul 2023 15:57:02 -0700
Subject: [PATCH 0141/2047] sched/fair: Implement prefer sibling imbalance
 calculation between asymmetric groups

In the current prefer sibling load balancing code, there is an implicit
assumption that the busiest sched group and local sched group are
equivalent, hence the tasks to be moved is simply the difference in
number of tasks between the two groups (i.e. imbalance) divided by two.

However, we may have different number of cores between the cluster groups,
say when we take CPU offline or we have hybrid groups.  In that case,
we should balance between the two groups such that #tasks/#cores ratio
is the same between the same between both groups.  Hence the imbalance
computed will need to reflect this.

Adjust the sibling imbalance computation to take into account of the
above considerations.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/4eacbaa236e680687dae2958378a6173654113df.1688770494.git.tim.c.chen@linux.intel.com
---
 kernel/sched/fair.c | 41 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index accbfbbfa6a5..c6246fbcd74f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9535,6 +9535,41 @@ static inline bool smt_balance(struct lb_env *env, struct sg_lb_stats *sgs,
 	return false;
 }
 
+static inline long sibling_imbalance(struct lb_env *env,
+				    struct sd_lb_stats *sds,
+				    struct sg_lb_stats *busiest,
+				    struct sg_lb_stats *local)
+{
+	int ncores_busiest, ncores_local;
+	long imbalance;
+
+	if (env->idle == CPU_NOT_IDLE || !busiest->sum_nr_running)
+		return 0;
+
+	ncores_busiest = sds->busiest->cores;
+	ncores_local = sds->local->cores;
+
+	if (ncores_busiest == ncores_local) {
+		imbalance = busiest->sum_nr_running;
+		lsub_positive(&imbalance, local->sum_nr_running);
+		return imbalance;
+	}
+
+	/* Balance such that nr_running/ncores ratio are same on both groups */
+	imbalance = ncores_local * busiest->sum_nr_running;
+	lsub_positive(&imbalance, ncores_busiest * local->sum_nr_running);
+	/* Normalize imbalance and do rounding on normalization */
+	imbalance = 2 * imbalance + ncores_local + ncores_busiest;
+	imbalance /= ncores_local + ncores_busiest;
+
+	/* Take advantage of resource in an empty sched group */
+	if (imbalance == 0 && local->sum_nr_running == 0 &&
+	    busiest->sum_nr_running > 1)
+		imbalance = 2;
+
+	return imbalance;
+}
+
 static inline bool
 sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
 {
@@ -10393,14 +10428,12 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 		}
 
 		if (busiest->group_weight == 1 || sds->prefer_sibling) {
-			unsigned int nr_diff = busiest->sum_nr_running;
 			/*
 			 * When prefer sibling, evenly spread running tasks on
 			 * groups.
 			 */
 			env->migration_type = migrate_task;
-			lsub_positive(&nr_diff, local->sum_nr_running);
-			env->imbalance = nr_diff;
+			env->imbalance = sibling_imbalance(env, sds, busiest, local);
 		} else {
 
 			/*
@@ -10597,7 +10630,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 	 * group's child domain.
 	 */
 	if (sds.prefer_sibling && local->group_type == group_has_spare &&
-	    busiest->sum_nr_running > local->sum_nr_running + 1)
+	    sibling_imbalance(env, &sds, busiest, local) > 1)
 		goto force_balance;
 
 	if (busiest->group_type != group_overloaded) {

From b1bfeab9b00283f521d2100afb9f5af84ccdae13 Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Fri, 7 Jul 2023 15:57:03 -0700
Subject: [PATCH 0142/2047] sched/fair: Consider the idle state of the whole
 core for load balance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

should_we_balance() traverses the group_balance_mask (AND'ed with lb_env::
cpus) starting from lower numbered CPUs looking for the first idle CPU.

In hybrid x86 systems, the siblings of SMT cores get CPU numbers, before
non-SMT cores:

	[0, 1] [2, 3] [4, 5] 6 7 8 9
         b  i   b  i   b  i  b i i i

In the figure above, CPUs in brackets are siblings of an SMT core. The
rest are non-SMT cores. 'b' indicates a busy CPU, 'i' indicates an
idle CPU.

We should let a CPU on a fully idle core get the first chance to idle
load balance as it has more CPU capacity than a CPU on an idle SMT
CPU with busy sibling.  So for the figure above, if we are running
should_we_balance() to CPU 1, we should return false to let CPU 7 on
idle core to have a chance first to idle load balance.

A partially busy (i.e., of type group_has_spare) local group with SMT 
cores will often have only one SMT sibling busy. If the destination CPU
is a non-SMT core, partially busy, lower-numbered, SMT cores should not
be considered when finding the first idle CPU. 

However, in should_we_balance(), when we encounter idle SMT first in partially
busy core, we prematurely break the search for the first idle CPU.

Higher-numbered, non-SMT cores is not given the chance to have
idle balance done on their behalf. Those CPUs will only be considered
for idle balancing by chance via CPU_NEWLY_IDLE.

Instead, consider the idle state of the whole SMT core.

Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Co-developed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/807bdd05331378ea3bf5956bda87ded1036ba769.1688770494.git.tim.c.chen@linux.intel.com
---
 kernel/sched/fair.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c6246fbcd74f..a87988327f88 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10902,7 +10902,7 @@ static int active_load_balance_cpu_stop(void *data);
 static int should_we_balance(struct lb_env *env)
 {
 	struct sched_group *sg = env->sd->groups;
-	int cpu;
+	int cpu, idle_smt = -1;
 
 	/*
 	 * Ensure the balancing environment is consistent; can happen
@@ -10929,10 +10929,24 @@ static int should_we_balance(struct lb_env *env)
 		if (!idle_cpu(cpu))
 			continue;
 
+		/*
+		 * Don't balance to idle SMT in busy core right away when
+		 * balancing cores, but remember the first idle SMT CPU for
+		 * later consideration.  Find CPU on an idle core first.
+		 */
+		if (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && !is_core_idle(cpu)) {
+			if (idle_smt == -1)
+				idle_smt = cpu;
+			continue;
+		}
+
 		/* Are we the first idle CPU? */
 		return cpu == env->dst_cpu;
 	}
 
+	if (idle_smt == env->dst_cpu)
+		return true;
+
 	/* Are we the first CPU of this group ? */
 	return group_balance_cpu(sg) == env->dst_cpu;
 }

From 17953249bf02448efaed75b097aa2e9086ca7685 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 8 Jul 2023 14:43:45 +0200
Subject: [PATCH 0143/2047] x86/sched: Enable cluster scheduling on Hybrid

With the SMT vs non-SMT balancing issues sorted, also enable the
cluster domain for Hybrid machines.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/smpboot.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e1aa2cd7734b..4c314475cc13 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -632,14 +632,9 @@ static void __init build_sched_topology(void)
 	};
 #endif
 #ifdef CONFIG_SCHED_CLUSTER
-	/*
-	 * For now, skip the cluster domain on Hybrid.
-	 */
-	if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
-		x86_topology[i++] = (struct sched_domain_topology_level){
-			cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS)
-		};
-	}
+	x86_topology[i++] = (struct sched_domain_topology_level){
+		cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS)
+	};
 #endif
 #ifdef CONFIG_SCHED_MC
 	x86_topology[i++] = (struct sched_domain_topology_level){

From ed74cc4995d314ea6cbf406caf978c442f451fa5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 7 Jul 2023 15:57:05 -0700
Subject: [PATCH 0144/2047] sched/debug: Dump domains' sched group flags

There have been a case where the SD_SHARE_CPUCAPACITY sched group flag
in a parent domain were not set and propagated properly when a degenerate
domain is removed.

Add dump of domain sched group flags of a CPU to make debug easier
in the future.

Usage:
cat /debug/sched/domains/cpu0/domain1/groups_flags
to dump cpu0 domain1's sched group flags.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lore.kernel.org/r/ed1749262d94d95a8296c86a415999eda90bcfe3.1688770494.git.tim.c.chen@linux.intel.com
---
 kernel/sched/debug.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 066ff1c8ae4e..aeeba46a096b 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -427,6 +427,7 @@ static void register_sd(struct sched_domain *sd, struct dentry *parent)
 #undef SDM
 
 	debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
+	debugfs_create_file("groups_flags", 0444, parent, &sd->groups->flags, &sd_flags_fops);
 }
 
 void update_sched_domain_debugfs(void)

From 7ee7642c91410fb90cc45e799a3a46e1607ecd79 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Tue, 11 Jul 2023 10:13:59 +0200
Subject: [PATCH 0145/2047] sched/fair: Stabilize asym cpu capacity system idle
 cpu selection

select_idle_capacity() not only looks for an idle cpu that fits for the
waking task but also for cpu with highest bandwidth when no cpu fits.
Start the loop with target cpu so it will be selected 1st when no cpu fits
but several cpus shared the same bandwidth. Starting with target cpu
prevents the task to migrate between cpus with same bandwidth at every
wakeup when no cpu fits.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230711081359.868862-1-vincent.guittot@linaro.org
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a87988327f88..0cd1cdbae534 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7096,7 +7096,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
 	util_min = uclamp_eff_value(p, UCLAMP_MIN);
 	util_max = uclamp_eff_value(p, UCLAMP_MAX);
 
-	for_each_cpu_wrap(cpu, cpus, target + 1) {
+	for_each_cpu_wrap(cpu, cpus, target) {
 		unsigned long cpu_cap = capacity_of(cpu);
 
 		if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))

From ccf61486fe1e1a48e18c638d1813cda77b3c0737 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Fri, 14 Jul 2023 00:09:58 +1000
Subject: [PATCH 0146/2047] procfs: block chmod on /proc/thread-self/comm

Due to an oversight in commit 1b3044e39a89 ("procfs: fix pthread
cross-thread naming if !PR_DUMPABLE") in switching from REG to NOD,
chmod operations on /proc/thread-self/comm were no longer blocked as
they are on almost all other procfs files.

A very similar situation with /proc/self/environ was used to as a root
exploit a long time ago, but procfs has SB_I_NOEXEC so this is simply a
correctness issue.

Ref: https://lwn.net/Articles/191954/
Ref: 6d76fa58b050 ("Don't allow chmod() on the /proc/<pid>/ files")
Fixes: 1b3044e39a89 ("procfs: fix pthread cross-thread naming if !PR_DUMPABLE")
Cc: stable@vger.kernel.org # v4.7+
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Message-Id: <20230713141001.27046-1-cyphar@cyphar.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/proc/base.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 05452c3b9872..7394229816f3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3583,7 +3583,8 @@ static int proc_tid_comm_permission(struct mnt_idmap *idmap,
 }
 
 static const struct inode_operations proc_tid_comm_inode_operations = {
-		.permission = proc_tid_comm_permission,
+		.setattr	= proc_setattr,
+		.permission	= proc_tid_comm_permission,
 };
 
 /*

From 47ee108a113c72ec8ceee2c7af52c19ed72e6ad7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 12 Jul 2023 00:21:52 +0100
Subject: [PATCH 0147/2047] regmap: Provide user selectable option to enable
 regmap

Since apparently enabling all the KUnit tests shouldn't enable any new
subsystems it is hard to enable the regmap KUnit tests in normal KUnit
testing scenarios that don't enable any drivers.  Add a Kconfig option
to help with this and include it in the KUnit all tests config.

Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230712-regmap-kunit-enable-v1-1-13e296bd0204@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/Kconfig                  | 12 +++++++++++-
 tools/testing/kunit/configs/all_tests.config |  2 ++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index 0db2021f7477..b1affac70d5d 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -4,7 +4,7 @@
 # subsystems should select the appropriate symbols.
 
 config REGMAP
-	bool "Register Map support" if KUNIT_ALL_TESTS
+	bool
 	default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SOUNDWIRE || REGMAP_SOUNDWIRE_MBQ || REGMAP_SCCB || REGMAP_I3C || REGMAP_SPI_AVMM || REGMAP_MDIO || REGMAP_FSI)
 	select IRQ_DOMAIN if REGMAP_IRQ
 	select MDIO_BUS if REGMAP_MDIO
@@ -23,6 +23,16 @@ config REGMAP_KUNIT
 	default KUNIT_ALL_TESTS
 	select REGMAP_RAM
 
+config REGMAP_BUILD
+	bool "Enable regmap build"
+	depends on KUNIT
+	select REGMAP
+	help
+	  This option exists purely to allow the regmap KUnit tests to
+	  be enabled without having to enable some driver that uses
+	  regmap due to unfortunate issues with how KUnit tests are
+	  normally enabled.
+
 config REGMAP_AC97
 	tristate
 
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index 0393940c706a..873f3e06ccad 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -33,5 +33,7 @@ CONFIG_DAMON_PADDR=y
 CONFIG_DEBUG_FS=y
 CONFIG_DAMON_DBGFS=y
 
+CONFIG_REGMAP_BUILD=y
+
 CONFIG_SECURITY=y
 CONFIG_SECURITY_APPARMOR=y

From 497897cb200d03b89524e6b4dfb71c77af324766 Mon Sep 17 00:00:00 2001
From: Christoph Niedermaier <cniedermaier@dh-electronics.com>
Date: Thu, 13 Jul 2023 11:03:28 +0200
Subject: [PATCH 0148/2047] regulator: da9062: Make the use of IRQ optional

This patch makes the use of IRQ optional to make the DA9061/62 usable
for designs that don't have the IRQ pin connected, because the regulator
is usable without IRQ.

Signed-off-by: Christoph Niedermaier <cniedermaier@dh-electronics.com>
Reviewed-by: Adam Ward <DLG-Adam.Ward.opensource@dm.renesas.com>
Reviewed-by: Marek Vasut <marex@denx.de>
Link: https://lore.kernel.org/r/20230713090328.3879-1-cniedermaier@dh-electronics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/da9062-regulator.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/regulator/da9062-regulator.c b/drivers/regulator/da9062-regulator.c
index c28b061eef02..1d354db0c1bd 100644
--- a/drivers/regulator/da9062-regulator.c
+++ b/drivers/regulator/da9062-regulator.c
@@ -924,7 +924,7 @@ static int da9062_regulator_probe(struct platform_device *pdev)
 	struct da9062_regulator *regl;
 	struct regulator_config config = { };
 	const struct da9062_regulator_info *rinfo;
-	int irq, n, ret;
+	int n, ret;
 	int max_regulators;
 
 	switch (chip->chip_type) {
@@ -1012,12 +1012,11 @@ static int da9062_regulator_probe(struct platform_device *pdev)
 	}
 
 	/* LDOs overcurrent event support */
-	irq = platform_get_irq_byname(pdev, "LDO_LIM");
-	if (irq < 0)
-		return irq;
-	regulators->irq_ldo_lim = irq;
+	regulators->irq_ldo_lim = platform_get_irq_byname_optional(pdev, "LDO_LIM");
+	if (regulators->irq_ldo_lim < 0)
+		return 0;
 
-	ret = devm_request_threaded_irq(&pdev->dev, irq,
+	ret = devm_request_threaded_irq(&pdev->dev, regulators->irq_ldo_lim,
 					NULL, da9062_ldo_lim_event,
 					IRQF_TRIGGER_LOW | IRQF_ONESHOT,
 					"LDO_LIM", regulators);

From d7de5d8e403a1293e10f8f59a1f2db320232c831 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 17 May 2023 21:10:20 +0200
Subject: [PATCH 0149/2047] cpupower: Bump soname version

Several functions in the libcpupower API are renamed or removed in
Linux 4.7.  This is an backward-incompatible ABI change, so the
library soname should change from libcpupower.so.0 to
libcpupower.so.1.

Fixes: ac5a181d065d ("cpupower: Add cpuidle parts into library")
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Salvatore Bonaccorso <carnil@debian.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 59bfa05dec5d..dc531805a570 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -53,7 +53,7 @@ DESTDIR ?=
 
 VERSION:=			$(shell ./utils/version-gen.sh)
 LIB_MAJ=			0.0.1
-LIB_MIN=			0
+LIB_MIN=			1
 
 PACKAGE =			cpupower
 PACKAGE_BUGREPORT =		linux-pm@vger.kernel.org

From 5d1f903f75a80daa4dfb3d84e114ec8ecbf29956 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 12 Jul 2023 20:58:49 +0200
Subject: [PATCH 0150/2047] attr: block mode changes of symlinks

Changing the mode of symlinks is meaningless as the vfs doesn't take the
mode of a symlink into account during path lookup permission checking.

However, the vfs doesn't block mode changes on symlinks. This however,
has lead to an untenable mess roughly classifiable into the following
two categories:

(1) Filesystems that don't implement a i_op->setattr() for symlinks.

    Such filesystems may or may not know that without i_op->setattr()
    defined, notify_change() falls back to simple_setattr() causing the
    inode's mode in the inode cache to be changed.

    That's a generic issue as this will affect all non-size changing
    inode attributes including ownership changes.

    Example: afs

(2) Filesystems that fail with EOPNOTSUPP but change the mode of the
    symlink nonetheless.

    Some filesystems will happily update the mode of a symlink but still
    return EOPNOTSUPP. This is the biggest source of confusion for
    userspace.

    The EOPNOTSUPP in this case comes from POSIX ACLs. Specifically it
    comes from filesystems that call posix_acl_chmod(), e.g., btrfs via

        if (!err && attr->ia_valid & ATTR_MODE)
                err = posix_acl_chmod(idmap, dentry, inode->i_mode);

    Filesystems including btrfs don't implement i_op->set_acl() so
    posix_acl_chmod() will report EOPNOTSUPP.

    When posix_acl_chmod() is called, most filesystems will have
    finished updating the inode.

    Perversely, this has the consequences that this behavior may depend
    on two kconfig options and mount options:

    * CONFIG_POSIX_ACL={y,n}
    * CONFIG_${FSTYPE}_POSIX_ACL={y,n}
    * Opt_acl, Opt_noacl

    Example: btrfs, ext4, xfs

The only way to change the mode on a symlink currently involves abusing
an O_PATH file descriptor in the following manner:

        fd = openat(-1, "/path/to/link", O_CLOEXEC | O_PATH | O_NOFOLLOW);

        char path[PATH_MAX];
        snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
        chmod(path, 0000);

But for most major filesystems with POSIX ACL support such as btrfs,
ext4, ceph, tmpfs, xfs and others this will fail with EOPNOTSUPP with
the mode still updated due to the aforementioned posix_acl_chmod()
nonsense.

So, given that for all major filesystems this would fail with EOPNOTSUPP
and that both glibc (cf. [1]) and musl (cf. [2]) outright block mode
changes on symlinks we should just try and block mode changes on
symlinks directly in the vfs and have a clean break with this nonsense.

If this causes any regressions, we do the next best thing and fix up all
filesystems that do return EOPNOTSUPP with the mode updated to not call
posix_acl_chmod() on symlinks.

But as usual, let's try the clean cut solution first. It's a simple
patch that can be easily reverted. Not marking this for backport as I'll
do that manually if we're reasonably sure that this works and there are
no strong objections.

We could block this in chmod_common() but it's more appropriate to do it
notify_change() as it will also mean that we catch filesystems that
change symlink permissions explicitly or accidently.

Similar proposals were floated in the past as in [3] and [4] and again
recently in [5]. There's also a couple of bugs about this inconsistency
as in [6] and [7].

Link: https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/fchmodat.c;h=99527a3727e44cb8661ee1f743068f108ec93979;hb=HEAD [1]
Link: https://git.musl-libc.org/cgit/musl/tree/src/stat/fchmodat.c [2]
Link: https://lore.kernel.org/all/20200911065733.GA31579@infradead.org [3]
Link: https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00518.html [4]
Link: https://lore.kernel.org/lkml/87lefmbppo.fsf@oldenburg.str.redhat.com [5]
Link: https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00467.html [6]
Link: https://sourceware.org/bugzilla/show_bug.cgi?id=14578#c17 [7]
Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org # please backport to all LTSes but not before v6.6-rc2 is tagged
Suggested-by: Christoph Hellwig <hch@lst.de>
Suggested-by: Florian Weimer <fweimer@redhat.com>
Message-Id: <20230712-vfs-chmod-symlinks-v2-1-08cfb92b61dd@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/attr.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/attr.c b/fs/attr.c
index d60dc1edb526..87af68bb8ad2 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -394,9 +394,25 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
 		return error;
 
 	if ((ia_valid & ATTR_MODE)) {
-		umode_t amode = attr->ia_mode;
+		/*
+		 * Don't allow changing the mode of symlinks:
+		 *
+		 * (1) The vfs doesn't take the mode of symlinks into account
+		 *     during permission checking.
+		 * (2) This has never worked correctly. Most major filesystems
+		 *     did return EOPNOTSUPP due to interactions with POSIX ACLs
+		 *     but did still updated the mode of the symlink.
+		 *     This inconsistency led system call wrapper providers such
+		 *     as libc to block changing the mode of symlinks with
+		 *     EOPNOTSUPP already.
+		 * (3) To even do this in the first place one would have to use
+		 *     specific file descriptors and quite some effort.
+		 */
+		if (S_ISLNK(inode->i_mode))
+			return -EOPNOTSUPP;
+
 		/* Flag setting protected by i_mutex */
-		if (is_sxid(amode))
+		if (is_sxid(attr->ia_mode))
 			inode->i_flags &= ~S_NOSEC;
 	}
 

From 62d9e475065597bfd9c3a646af4c4faab637a599 Mon Sep 17 00:00:00 2001
From: Danny Tsen <dtsen@linux.ibm.com>
Date: Wed, 26 Apr 2023 15:11:43 -0400
Subject: [PATCH 0151/2047] crypto: chacha20-p10 - An optimized Chacha20
 implementation with 8-way unrolling for ppc64le

Improve overall performance of chacha20 encrypt and decrypt operations
for Power10 or later CPU.

Signed-off-by: Danny Tsen <dtsen@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/chacha-p10le-8x.S | 842 ++++++++++++++++++++++++++
 1 file changed, 842 insertions(+)
 create mode 100644 arch/powerpc/crypto/chacha-p10le-8x.S

diff --git a/arch/powerpc/crypto/chacha-p10le-8x.S b/arch/powerpc/crypto/chacha-p10le-8x.S
new file mode 100644
index 000000000000..17bedb66b822
--- /dev/null
+++ b/arch/powerpc/crypto/chacha-p10le-8x.S
@@ -0,0 +1,842 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#
+# Accelerated chacha20 implementation for ppc64le.
+#
+# Copyright 2023- IBM Corp. All rights reserved
+#
+#===================================================================================
+# Written by Danny Tsen <dtsen@us.ibm.com>
+#
+# chacha_p10le_8x(u32 *state, byte *dst, const byte *src,
+#				 size_t len, int nrounds);
+#
+# do rounds,  8 quarter rounds
+# 1.  a += b; d ^= a; d <<<= 16;
+# 2.  c += d; b ^= c; b <<<= 12;
+# 3.  a += b; d ^= a; d <<<= 8;
+# 4.  c += d; b ^= c; b <<<= 7
+#
+# row1 = (row1 + row2),  row4 = row1 xor row4,  row4 rotate each word by 16
+# row3 = (row3 + row4),  row2 = row3 xor row2,  row2 rotate each word by 12
+# row1 = (row1 + row2), row4 = row1 xor row4,  row4 rotate each word by 8
+# row3 = (row3 + row4), row2 = row3 xor row2,  row2 rotate each word by 7
+#
+# 4 blocks (a b c d)
+#
+# a0 b0 c0 d0
+# a1 b1 c1 d1
+# ...
+# a4 b4 c4 d4
+# ...
+# a8 b8 c8 d8
+# ...
+# a12 b12 c12 d12
+# a13 ...
+# a14 ...
+# a15 b15 c15 d15
+#
+# Column round (v0, v4,  v8, v12, v1, v5,  v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
+# Diagnal round (v0, v5, v10, v15, v1, v6, v11, v12, v2, v7,  v8, v13, v3, v4,  v9, v14)
+#
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
+#include <linux/linkage.h>
+
+.machine	"any"
+.text
+
+.macro	SAVE_GPR GPR OFFSET FRAME
+	std	\GPR,\OFFSET(\FRAME)
+.endm
+
+.macro	SAVE_VRS VRS OFFSET FRAME
+	li	16, \OFFSET
+	stvx	\VRS, 16, \FRAME
+.endm
+
+.macro	SAVE_VSX VSX OFFSET FRAME
+	li	16, \OFFSET
+	stxvx	\VSX, 16, \FRAME
+.endm
+
+.macro	RESTORE_GPR GPR OFFSET FRAME
+	ld	\GPR,\OFFSET(\FRAME)
+.endm
+
+.macro	RESTORE_VRS VRS OFFSET FRAME
+	li	16, \OFFSET
+	lvx	\VRS, 16, \FRAME
+.endm
+
+.macro	RESTORE_VSX VSX OFFSET FRAME
+	li	16, \OFFSET
+	lxvx	\VSX, 16, \FRAME
+.endm
+
+.macro SAVE_REGS
+	mflr 0
+	std 0, 16(1)
+	stdu 1,-752(1)
+
+	SAVE_GPR 14, 112, 1
+	SAVE_GPR 15, 120, 1
+	SAVE_GPR 16, 128, 1
+	SAVE_GPR 17, 136, 1
+	SAVE_GPR 18, 144, 1
+	SAVE_GPR 19, 152, 1
+	SAVE_GPR 20, 160, 1
+	SAVE_GPR 21, 168, 1
+	SAVE_GPR 22, 176, 1
+	SAVE_GPR 23, 184, 1
+	SAVE_GPR 24, 192, 1
+	SAVE_GPR 25, 200, 1
+	SAVE_GPR 26, 208, 1
+	SAVE_GPR 27, 216, 1
+	SAVE_GPR 28, 224, 1
+	SAVE_GPR 29, 232, 1
+	SAVE_GPR 30, 240, 1
+	SAVE_GPR 31, 248, 1
+
+	addi	9, 1, 256
+	SAVE_VRS 20, 0, 9
+	SAVE_VRS 21, 16, 9
+	SAVE_VRS 22, 32, 9
+	SAVE_VRS 23, 48, 9
+	SAVE_VRS 24, 64, 9
+	SAVE_VRS 25, 80, 9
+	SAVE_VRS 26, 96, 9
+	SAVE_VRS 27, 112, 9
+	SAVE_VRS 28, 128, 9
+	SAVE_VRS 29, 144, 9
+	SAVE_VRS 30, 160, 9
+	SAVE_VRS 31, 176, 9
+
+	SAVE_VSX 14, 192, 9
+	SAVE_VSX 15, 208, 9
+	SAVE_VSX 16, 224, 9
+	SAVE_VSX 17, 240, 9
+	SAVE_VSX 18, 256, 9
+	SAVE_VSX 19, 272, 9
+	SAVE_VSX 20, 288, 9
+	SAVE_VSX 21, 304, 9
+	SAVE_VSX 22, 320, 9
+	SAVE_VSX 23, 336, 9
+	SAVE_VSX 24, 352, 9
+	SAVE_VSX 25, 368, 9
+	SAVE_VSX 26, 384, 9
+	SAVE_VSX 27, 400, 9
+	SAVE_VSX 28, 416, 9
+	SAVE_VSX 29, 432, 9
+	SAVE_VSX 30, 448, 9
+	SAVE_VSX 31, 464, 9
+.endm # SAVE_REGS
+
+.macro RESTORE_REGS
+	addi	9, 1, 256
+	RESTORE_VRS 20, 0, 9
+	RESTORE_VRS 21, 16, 9
+	RESTORE_VRS 22, 32, 9
+	RESTORE_VRS 23, 48, 9
+	RESTORE_VRS 24, 64, 9
+	RESTORE_VRS 25, 80, 9
+	RESTORE_VRS 26, 96, 9
+	RESTORE_VRS 27, 112, 9
+	RESTORE_VRS 28, 128, 9
+	RESTORE_VRS 29, 144, 9
+	RESTORE_VRS 30, 160, 9
+	RESTORE_VRS 31, 176, 9
+
+	RESTORE_VSX 14, 192, 9
+	RESTORE_VSX 15, 208, 9
+	RESTORE_VSX 16, 224, 9
+	RESTORE_VSX 17, 240, 9
+	RESTORE_VSX 18, 256, 9
+	RESTORE_VSX 19, 272, 9
+	RESTORE_VSX 20, 288, 9
+	RESTORE_VSX 21, 304, 9
+	RESTORE_VSX 22, 320, 9
+	RESTORE_VSX 23, 336, 9
+	RESTORE_VSX 24, 352, 9
+	RESTORE_VSX 25, 368, 9
+	RESTORE_VSX 26, 384, 9
+	RESTORE_VSX 27, 400, 9
+	RESTORE_VSX 28, 416, 9
+	RESTORE_VSX 29, 432, 9
+	RESTORE_VSX 30, 448, 9
+	RESTORE_VSX 31, 464, 9
+
+	RESTORE_GPR 14, 112, 1
+	RESTORE_GPR 15, 120, 1
+	RESTORE_GPR 16, 128, 1
+	RESTORE_GPR 17, 136, 1
+	RESTORE_GPR 18, 144, 1
+	RESTORE_GPR 19, 152, 1
+	RESTORE_GPR 20, 160, 1
+	RESTORE_GPR 21, 168, 1
+	RESTORE_GPR 22, 176, 1
+	RESTORE_GPR 23, 184, 1
+	RESTORE_GPR 24, 192, 1
+	RESTORE_GPR 25, 200, 1
+	RESTORE_GPR 26, 208, 1
+	RESTORE_GPR 27, 216, 1
+	RESTORE_GPR 28, 224, 1
+	RESTORE_GPR 29, 232, 1
+	RESTORE_GPR 30, 240, 1
+	RESTORE_GPR 31, 248, 1
+
+	addi    1, 1, 752
+	ld 0, 16(1)
+	mtlr 0
+.endm # RESTORE_REGS
+
+.macro QT_loop_8x
+	# QR(v0, v4,  v8, v12, v1, v5,  v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
+	xxlor	0, 32+25, 32+25
+	xxlor	32+25, 20, 20
+	vadduwm 0, 0, 4
+	vadduwm 1, 1, 5
+	vadduwm 2, 2, 6
+	vadduwm 3, 3, 7
+	  vadduwm 16, 16, 20
+	  vadduwm 17, 17, 21
+	  vadduwm 18, 18, 22
+	  vadduwm 19, 19, 23
+
+	  vpermxor 12, 12, 0, 25
+	  vpermxor 13, 13, 1, 25
+	  vpermxor 14, 14, 2, 25
+	  vpermxor 15, 15, 3, 25
+	  vpermxor 28, 28, 16, 25
+	  vpermxor 29, 29, 17, 25
+	  vpermxor 30, 30, 18, 25
+	  vpermxor 31, 31, 19, 25
+	xxlor	32+25, 0, 0
+	vadduwm 8, 8, 12
+	vadduwm 9, 9, 13
+	vadduwm 10, 10, 14
+	vadduwm 11, 11, 15
+	  vadduwm 24, 24, 28
+	  vadduwm 25, 25, 29
+	  vadduwm 26, 26, 30
+	  vadduwm 27, 27, 31
+	vxor 4, 4, 8
+	vxor 5, 5, 9
+	vxor 6, 6, 10
+	vxor 7, 7, 11
+	  vxor 20, 20, 24
+	  vxor 21, 21, 25
+	  vxor 22, 22, 26
+	  vxor 23, 23, 27
+
+	xxlor	0, 32+25, 32+25
+	xxlor	32+25, 21, 21
+	vrlw 4, 4, 25  #
+	vrlw 5, 5, 25
+	vrlw 6, 6, 25
+	vrlw 7, 7, 25
+	  vrlw 20, 20, 25  #
+	  vrlw 21, 21, 25
+	  vrlw 22, 22, 25
+	  vrlw 23, 23, 25
+	xxlor	32+25, 0, 0
+	vadduwm 0, 0, 4
+	vadduwm 1, 1, 5
+	vadduwm 2, 2, 6
+	vadduwm 3, 3, 7
+	  vadduwm 16, 16, 20
+	  vadduwm 17, 17, 21
+	  vadduwm 18, 18, 22
+	  vadduwm 19, 19, 23
+
+	xxlor	0, 32+25, 32+25
+	xxlor	32+25, 22, 22
+	  vpermxor 12, 12, 0, 25
+	  vpermxor 13, 13, 1, 25
+	  vpermxor 14, 14, 2, 25
+	  vpermxor 15, 15, 3, 25
+	  vpermxor 28, 28, 16, 25
+	  vpermxor 29, 29, 17, 25
+	  vpermxor 30, 30, 18, 25
+	  vpermxor 31, 31, 19, 25
+	xxlor	32+25, 0, 0
+	vadduwm 8, 8, 12
+	vadduwm 9, 9, 13
+	vadduwm 10, 10, 14
+	vadduwm 11, 11, 15
+	  vadduwm 24, 24, 28
+	  vadduwm 25, 25, 29
+	  vadduwm 26, 26, 30
+	  vadduwm 27, 27, 31
+	xxlor	0, 32+28, 32+28
+	xxlor	32+28, 23, 23
+	vxor 4, 4, 8
+	vxor 5, 5, 9
+	vxor 6, 6, 10
+	vxor 7, 7, 11
+	  vxor 20, 20, 24
+	  vxor 21, 21, 25
+	  vxor 22, 22, 26
+	  vxor 23, 23, 27
+	vrlw 4, 4, 28  #
+	vrlw 5, 5, 28
+	vrlw 6, 6, 28
+	vrlw 7, 7, 28
+	  vrlw 20, 20, 28  #
+	  vrlw 21, 21, 28
+	  vrlw 22, 22, 28
+	  vrlw 23, 23, 28
+	xxlor	32+28, 0, 0
+
+	# QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7,  v8, v13, v3, v4,  v9, v14)
+	xxlor	0, 32+25, 32+25
+	xxlor	32+25, 20, 20
+	vadduwm 0, 0, 5
+	vadduwm 1, 1, 6
+	vadduwm 2, 2, 7
+	vadduwm 3, 3, 4
+	  vadduwm 16, 16, 21
+	  vadduwm 17, 17, 22
+	  vadduwm 18, 18, 23
+	  vadduwm 19, 19, 20
+
+	  vpermxor 15, 15, 0, 25
+	  vpermxor 12, 12, 1, 25
+	  vpermxor 13, 13, 2, 25
+	  vpermxor 14, 14, 3, 25
+	  vpermxor 31, 31, 16, 25
+	  vpermxor 28, 28, 17, 25
+	  vpermxor 29, 29, 18, 25
+	  vpermxor 30, 30, 19, 25
+
+	xxlor	32+25, 0, 0
+	vadduwm 10, 10, 15
+	vadduwm 11, 11, 12
+	vadduwm 8, 8, 13
+	vadduwm 9, 9, 14
+	  vadduwm 26, 26, 31
+	  vadduwm 27, 27, 28
+	  vadduwm 24, 24, 29
+	  vadduwm 25, 25, 30
+	vxor 5, 5, 10
+	vxor 6, 6, 11
+	vxor 7, 7, 8
+	vxor 4, 4, 9
+	  vxor 21, 21, 26
+	  vxor 22, 22, 27
+	  vxor 23, 23, 24
+	  vxor 20, 20, 25
+
+	xxlor	0, 32+25, 32+25
+	xxlor	32+25, 21, 21
+	vrlw 5, 5, 25
+	vrlw 6, 6, 25
+	vrlw 7, 7, 25
+	vrlw 4, 4, 25
+	  vrlw 21, 21, 25
+	  vrlw 22, 22, 25
+	  vrlw 23, 23, 25
+	  vrlw 20, 20, 25
+	xxlor	32+25, 0, 0
+
+	vadduwm 0, 0, 5
+	vadduwm 1, 1, 6
+	vadduwm 2, 2, 7
+	vadduwm 3, 3, 4
+	  vadduwm 16, 16, 21
+	  vadduwm 17, 17, 22
+	  vadduwm 18, 18, 23
+	  vadduwm 19, 19, 20
+
+	xxlor	0, 32+25, 32+25
+	xxlor	32+25, 22, 22
+	  vpermxor 15, 15, 0, 25
+	  vpermxor 12, 12, 1, 25
+	  vpermxor 13, 13, 2, 25
+	  vpermxor 14, 14, 3, 25
+	  vpermxor 31, 31, 16, 25
+	  vpermxor 28, 28, 17, 25
+	  vpermxor 29, 29, 18, 25
+	  vpermxor 30, 30, 19, 25
+	xxlor	32+25, 0, 0
+
+	vadduwm 10, 10, 15
+	vadduwm 11, 11, 12
+	vadduwm 8, 8, 13
+	vadduwm 9, 9, 14
+	  vadduwm 26, 26, 31
+	  vadduwm 27, 27, 28
+	  vadduwm 24, 24, 29
+	  vadduwm 25, 25, 30
+
+	xxlor	0, 32+28, 32+28
+	xxlor	32+28, 23, 23
+	vxor 5, 5, 10
+	vxor 6, 6, 11
+	vxor 7, 7, 8
+	vxor 4, 4, 9
+	  vxor 21, 21, 26
+	  vxor 22, 22, 27
+	  vxor 23, 23, 24
+	  vxor 20, 20, 25
+	vrlw 5, 5, 28
+	vrlw 6, 6, 28
+	vrlw 7, 7, 28
+	vrlw 4, 4, 28
+	  vrlw 21, 21, 28
+	  vrlw 22, 22, 28
+	  vrlw 23, 23, 28
+	  vrlw 20, 20, 28
+	xxlor	32+28, 0, 0
+.endm
+
+.macro QT_loop_4x
+	# QR(v0, v4,  v8, v12, v1, v5,  v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
+	vadduwm 0, 0, 4
+	vadduwm 1, 1, 5
+	vadduwm 2, 2, 6
+	vadduwm 3, 3, 7
+	  vpermxor 12, 12, 0, 20
+	  vpermxor 13, 13, 1, 20
+	  vpermxor 14, 14, 2, 20
+	  vpermxor 15, 15, 3, 20
+	vadduwm 8, 8, 12
+	vadduwm 9, 9, 13
+	vadduwm 10, 10, 14
+	vadduwm 11, 11, 15
+	vxor 4, 4, 8
+	vxor 5, 5, 9
+	vxor 6, 6, 10
+	vxor 7, 7, 11
+	vrlw 4, 4, 21
+	vrlw 5, 5, 21
+	vrlw 6, 6, 21
+	vrlw 7, 7, 21
+	vadduwm 0, 0, 4
+	vadduwm 1, 1, 5
+	vadduwm 2, 2, 6
+	vadduwm 3, 3, 7
+	  vpermxor 12, 12, 0, 22
+	  vpermxor 13, 13, 1, 22
+	  vpermxor 14, 14, 2, 22
+	  vpermxor 15, 15, 3, 22
+	vadduwm 8, 8, 12
+	vadduwm 9, 9, 13
+	vadduwm 10, 10, 14
+	vadduwm 11, 11, 15
+	vxor 4, 4, 8
+	vxor 5, 5, 9
+	vxor 6, 6, 10
+	vxor 7, 7, 11
+	vrlw 4, 4, 23
+	vrlw 5, 5, 23
+	vrlw 6, 6, 23
+	vrlw 7, 7, 23
+
+	# QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7,  v8, v13, v3, v4,  v9, v14)
+	vadduwm 0, 0, 5
+	vadduwm 1, 1, 6
+	vadduwm 2, 2, 7
+	vadduwm 3, 3, 4
+	  vpermxor 15, 15, 0, 20
+	  vpermxor 12, 12, 1, 20
+	  vpermxor 13, 13, 2, 20
+	  vpermxor 14, 14, 3, 20
+	vadduwm 10, 10, 15
+	vadduwm 11, 11, 12
+	vadduwm 8, 8, 13
+	vadduwm 9, 9, 14
+	vxor 5, 5, 10
+	vxor 6, 6, 11
+	vxor 7, 7, 8
+	vxor 4, 4, 9
+	vrlw 5, 5, 21
+	vrlw 6, 6, 21
+	vrlw 7, 7, 21
+	vrlw 4, 4, 21
+	vadduwm 0, 0, 5
+	vadduwm 1, 1, 6
+	vadduwm 2, 2, 7
+	vadduwm 3, 3, 4
+	  vpermxor 15, 15, 0, 22
+	  vpermxor 12, 12, 1, 22
+	  vpermxor 13, 13, 2, 22
+	  vpermxor 14, 14, 3, 22
+	vadduwm 10, 10, 15
+	vadduwm 11, 11, 12
+	vadduwm 8, 8, 13
+	vadduwm 9, 9, 14
+	vxor 5, 5, 10
+	vxor 6, 6, 11
+	vxor 7, 7, 8
+	vxor 4, 4, 9
+	vrlw 5, 5, 23
+	vrlw 6, 6, 23
+	vrlw 7, 7, 23
+	vrlw 4, 4, 23
+.endm
+
+# Transpose
+.macro TP_4x a0 a1 a2 a3
+	xxmrghw  10, 32+\a0, 32+\a1	# a0, a1, b0, b1
+	xxmrghw  11, 32+\a2, 32+\a3	# a2, a3, b2, b3
+	xxmrglw  12, 32+\a0, 32+\a1	# c0, c1, d0, d1
+	xxmrglw  13, 32+\a2, 32+\a3	# c2, c3, d2, d3
+	xxpermdi	32+\a0, 10, 11, 0	# a0, a1, a2, a3
+	xxpermdi	32+\a1, 10, 11, 3	# b0, b1, b2, b3
+	xxpermdi	32+\a2, 12, 13, 0	# c0, c1, c2, c3
+	xxpermdi	32+\a3, 12, 13, 3	# d0, d1, d2, d3
+.endm
+
+# key stream = working state + state
+.macro Add_state S
+	vadduwm \S+0, \S+0, 16-\S
+	vadduwm \S+4, \S+4, 17-\S
+	vadduwm \S+8, \S+8, 18-\S
+	vadduwm \S+12, \S+12, 19-\S
+
+	vadduwm \S+1, \S+1, 16-\S
+	vadduwm \S+5, \S+5, 17-\S
+	vadduwm \S+9, \S+9, 18-\S
+	vadduwm \S+13, \S+13, 19-\S
+
+	vadduwm \S+2, \S+2, 16-\S
+	vadduwm \S+6, \S+6, 17-\S
+	vadduwm \S+10, \S+10, 18-\S
+	vadduwm \S+14, \S+14, 19-\S
+
+	vadduwm	\S+3, \S+3, 16-\S
+	vadduwm	\S+7, \S+7, 17-\S
+	vadduwm	\S+11, \S+11, 18-\S
+	vadduwm	\S+15, \S+15, 19-\S
+.endm
+
+#
+# write 256 bytes
+#
+.macro Write_256 S
+	add 9, 14, 5
+	add 16, 14, 4
+	lxvw4x 0, 0, 9
+	lxvw4x 1, 17, 9
+	lxvw4x 2, 18, 9
+	lxvw4x 3, 19, 9
+	lxvw4x 4, 20, 9
+	lxvw4x 5, 21, 9
+	lxvw4x 6, 22, 9
+	lxvw4x 7, 23, 9
+	lxvw4x 8, 24, 9
+	lxvw4x 9, 25, 9
+	lxvw4x 10, 26, 9
+	lxvw4x 11, 27, 9
+	lxvw4x 12, 28, 9
+	lxvw4x 13, 29, 9
+	lxvw4x 14, 30, 9
+	lxvw4x 15, 31, 9
+
+	xxlxor \S+32, \S+32, 0
+	xxlxor \S+36, \S+36, 1
+	xxlxor \S+40, \S+40, 2
+	xxlxor \S+44, \S+44, 3
+	xxlxor \S+33, \S+33, 4
+	xxlxor \S+37, \S+37, 5
+	xxlxor \S+41, \S+41, 6
+	xxlxor \S+45, \S+45, 7
+	xxlxor \S+34, \S+34, 8
+	xxlxor \S+38, \S+38, 9
+	xxlxor \S+42, \S+42, 10
+	xxlxor \S+46, \S+46, 11
+	xxlxor \S+35, \S+35, 12
+	xxlxor \S+39, \S+39, 13
+	xxlxor \S+43, \S+43, 14
+	xxlxor \S+47, \S+47, 15
+
+	stxvw4x \S+32, 0, 16
+	stxvw4x \S+36, 17, 16
+	stxvw4x \S+40, 18, 16
+	stxvw4x \S+44, 19, 16
+
+	stxvw4x \S+33, 20, 16
+	stxvw4x \S+37, 21, 16
+	stxvw4x \S+41, 22, 16
+	stxvw4x \S+45, 23, 16
+
+	stxvw4x \S+34, 24, 16
+	stxvw4x \S+38, 25, 16
+	stxvw4x \S+42, 26, 16
+	stxvw4x \S+46, 27, 16
+
+	stxvw4x \S+35, 28, 16
+	stxvw4x \S+39, 29, 16
+	stxvw4x \S+43, 30, 16
+	stxvw4x \S+47, 31, 16
+
+.endm
+
+#
+# chacha20_p10le_8x(u32 *state, byte *dst, const byte *src, size_t len, int nrounds);
+#
+SYM_FUNC_START(chacha_p10le_8x)
+.align 5
+	cmpdi	6, 0
+	ble	Out_no_chacha
+
+	SAVE_REGS
+
+	# r17 - r31 mainly for Write_256 macro.
+	li	17, 16
+	li	18, 32
+	li	19, 48
+	li	20, 64
+	li	21, 80
+	li	22, 96
+	li	23, 112
+	li	24, 128
+	li	25, 144
+	li	26, 160
+	li	27, 176
+	li	28, 192
+	li	29, 208
+	li	30, 224
+	li	31, 240
+
+	mr 15, 6			# len
+	li 14, 0			# offset to inp and outp
+
+        lxvw4x	48, 0, 3		#  vr16, constants
+	lxvw4x	49, 17, 3		#  vr17, key 1
+	lxvw4x	50, 18, 3		#  vr18, key 2
+	lxvw4x	51, 19, 3		#  vr19, counter, nonce
+
+	# create (0, 1, 2, 3) counters
+	vspltisw 0, 0
+	vspltisw 1, 1
+	vspltisw 2, 2
+	vspltisw 3, 3
+	vmrghw	4, 0, 1
+	vmrglw	5, 2, 3
+	vsldoi	30, 4, 5, 8		# vr30 counter, 4 (0, 1, 2, 3)
+
+	vspltisw 21, 12
+	vspltisw 23, 7
+
+	addis	11, 2, permx@toc@ha
+	addi	11, 11, permx@toc@l
+	lxvw4x	32+20, 0, 11
+	lxvw4x	32+22, 17, 11
+
+	sradi	8, 7, 1
+
+	mtctr 8
+
+	# save constants to vsx
+	xxlor	16, 48, 48
+	xxlor	17, 49, 49
+	xxlor	18, 50, 50
+	xxlor	19, 51, 51
+
+	vspltisw 25, 4
+	vspltisw 26, 8
+
+	xxlor	25, 32+26, 32+26
+	xxlor	24, 32+25, 32+25
+
+	vadduwm	31, 30, 25		# counter = (0, 1, 2, 3) + (4, 4, 4, 4)
+	xxlor	30, 32+30, 32+30
+	xxlor	31, 32+31, 32+31
+
+	xxlor	20, 32+20, 32+20
+	xxlor	21, 32+21, 32+21
+	xxlor	22, 32+22, 32+22
+	xxlor	23, 32+23, 32+23
+
+	cmpdi	6, 512
+	blt	Loop_last
+
+Loop_8x:
+	xxspltw  32+0, 16, 0
+	xxspltw  32+1, 16, 1
+	xxspltw  32+2, 16, 2
+	xxspltw  32+3, 16, 3
+
+	xxspltw  32+4, 17, 0
+	xxspltw  32+5, 17, 1
+	xxspltw  32+6, 17, 2
+	xxspltw  32+7, 17, 3
+	xxspltw  32+8, 18, 0
+	xxspltw  32+9, 18, 1
+	xxspltw  32+10, 18, 2
+	xxspltw  32+11, 18, 3
+	xxspltw  32+12, 19, 0
+	xxspltw  32+13, 19, 1
+	xxspltw  32+14, 19, 2
+	xxspltw  32+15, 19, 3
+	vadduwm	12, 12, 30	# increase counter
+
+	xxspltw  32+16, 16, 0
+	xxspltw  32+17, 16, 1
+	xxspltw  32+18, 16, 2
+	xxspltw  32+19, 16, 3
+
+	xxspltw  32+20, 17, 0
+	xxspltw  32+21, 17, 1
+	xxspltw  32+22, 17, 2
+	xxspltw  32+23, 17, 3
+	xxspltw  32+24, 18, 0
+	xxspltw  32+25, 18, 1
+	xxspltw  32+26, 18, 2
+	xxspltw  32+27, 18, 3
+	xxspltw  32+28, 19, 0
+	xxspltw  32+29, 19, 1
+	vadduwm	28, 28, 31	# increase counter
+	xxspltw  32+30, 19, 2
+	xxspltw  32+31, 19, 3
+
+.align 5
+quarter_loop_8x:
+	QT_loop_8x
+
+	bdnz	quarter_loop_8x
+
+	xxlor	0, 32+30, 32+30
+	xxlor	32+30, 30, 30
+	vadduwm	12, 12, 30
+	xxlor	32+30, 0, 0
+	TP_4x 0, 1, 2, 3
+	TP_4x 4, 5, 6, 7
+	TP_4x 8, 9, 10, 11
+	TP_4x 12, 13, 14, 15
+
+	xxlor	0, 48, 48
+	xxlor	1, 49, 49
+	xxlor	2, 50, 50
+	xxlor	3, 51, 51
+	xxlor	48, 16, 16
+	xxlor	49, 17, 17
+	xxlor	50, 18, 18
+	xxlor	51, 19, 19
+	Add_state 0
+	xxlor	48, 0, 0
+	xxlor	49, 1, 1
+	xxlor	50, 2, 2
+	xxlor	51, 3, 3
+	Write_256 0
+	addi	14, 14, 256	# offset +=256
+	addi	15, 15, -256	# len -=256
+
+	xxlor	5, 32+31, 32+31
+	xxlor	32+31, 31, 31
+	vadduwm	28, 28, 31
+	xxlor	32+31, 5, 5
+	TP_4x 16+0, 16+1, 16+2, 16+3
+	TP_4x 16+4, 16+5, 16+6, 16+7
+	TP_4x 16+8, 16+9, 16+10, 16+11
+	TP_4x 16+12, 16+13, 16+14, 16+15
+
+	xxlor	32, 16, 16
+	xxlor	33, 17, 17
+	xxlor	34, 18, 18
+	xxlor	35, 19, 19
+	Add_state 16
+	Write_256 16
+	addi	14, 14, 256	# offset +=256
+	addi	15, 15, -256	# len +=256
+
+	xxlor	32+24, 24, 24
+	xxlor	32+25, 25, 25
+	xxlor	32+30, 30, 30
+	vadduwm	30, 30, 25
+	vadduwm	31, 30, 24
+	xxlor	30, 32+30, 32+30
+	xxlor	31, 32+31, 32+31
+
+	cmpdi	15, 0
+	beq	Out_loop
+
+	cmpdi	15, 512
+	blt	Loop_last
+
+	mtctr 8
+	b Loop_8x
+
+Loop_last:
+        lxvw4x	48, 0, 3		#  vr16, constants
+	lxvw4x	49, 17, 3		#  vr17, key 1
+	lxvw4x	50, 18, 3		#  vr18, key 2
+	lxvw4x	51, 19, 3		#  vr19, counter, nonce
+
+	vspltisw 21, 12
+	vspltisw 23, 7
+	addis	11, 2, permx@toc@ha
+	addi	11, 11, permx@toc@l
+	lxvw4x	32+20, 0, 11
+	lxvw4x	32+22, 17, 11
+
+	sradi	8, 7, 1
+	mtctr 8
+
+Loop_4x:
+	vspltw  0, 16, 0
+	vspltw  1, 16, 1
+	vspltw  2, 16, 2
+	vspltw  3, 16, 3
+
+	vspltw  4, 17, 0
+	vspltw  5, 17, 1
+	vspltw  6, 17, 2
+	vspltw  7, 17, 3
+	vspltw  8, 18, 0
+	vspltw  9, 18, 1
+	vspltw  10, 18, 2
+	vspltw  11, 18, 3
+	vspltw  12, 19, 0
+	vadduwm	12, 12, 30	# increase counter
+	vspltw  13, 19, 1
+	vspltw  14, 19, 2
+	vspltw  15, 19, 3
+
+.align 5
+quarter_loop:
+	QT_loop_4x
+
+	bdnz	quarter_loop
+
+	vadduwm	12, 12, 30
+	TP_4x 0, 1, 2, 3
+	TP_4x 4, 5, 6, 7
+	TP_4x 8, 9, 10, 11
+	TP_4x 12, 13, 14, 15
+
+	Add_state 0
+	Write_256 0
+	addi	14, 14, 256	# offset += 256
+	addi	15, 15, -256	# len += 256
+
+	# Update state counter
+	vspltisw 25, 4
+	vadduwm	30, 30, 25
+
+	cmpdi	15, 0
+	beq	Out_loop
+	cmpdi	15, 256
+	blt	Out_loop
+
+	mtctr 8
+	b Loop_4x
+
+Out_loop:
+	RESTORE_REGS
+	blr
+
+Out_no_chacha:
+	li	3, 0
+	blr
+SYM_FUNC_END(chacha_p10le_8x)
+
+SYM_DATA_START_LOCAL(PERMX)
+.align 5
+permx:
+.long 0x22330011, 0x66774455, 0xaabb8899, 0xeeffccdd
+.long 0x11223300, 0x55667744, 0x99aabb88, 0xddeeffcc
+SYM_DATA_END(PERMX)

From a09450e59c5a93aa13164a3024f9c1ba02ced874 Mon Sep 17 00:00:00 2001
From: Danny Tsen <dtsen@linux.ibm.com>
Date: Wed, 26 Apr 2023 15:11:44 -0400
Subject: [PATCH 0152/2047] crypt: chacha20-p10 - Glue code for optmized
 Chacha20 implementation for ppc64le

Signed-off-by: Danny Tsen <dtsen@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/chacha-p10-glue.c | 221 ++++++++++++++++++++++++++
 1 file changed, 221 insertions(+)
 create mode 100644 arch/powerpc/crypto/chacha-p10-glue.c

diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c
new file mode 100644
index 000000000000..74fb86b0d209
--- /dev/null
+++ b/arch/powerpc/crypto/chacha-p10-glue.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC P10 (ppc64le) accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright 2023- IBM Corp. All rights reserved.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/sizes.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+asmlinkage void chacha_p10le_8x(u32 *state, u8 *dst, const u8 *src,
+				unsigned int len, int nrounds);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10);
+
+static void vsx_begin(void)
+{
+	preempt_disable();
+	enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+	disable_kernel_vsx();
+	preempt_enable();
+}
+
+static void chacha_p10_do_8x(u32 *state, u8 *dst, const u8 *src,
+			     unsigned int bytes, int nrounds)
+{
+	unsigned int l = bytes & ~0x0FF;
+
+	if (l > 0) {
+		chacha_p10le_8x(state, dst, src, l, nrounds);
+		bytes -= l;
+		src += l;
+		dst += l;
+		state[12] += l / CHACHA_BLOCK_SIZE;
+	}
+
+	if (bytes > 0)
+		chacha_crypt_generic(state, dst, src, bytes, nrounds);
+}
+
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+	hchacha_block_generic(state, stream, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+		       int nrounds)
+{
+	if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE ||
+	    !crypto_simd_usable())
+		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+	do {
+		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+		vsx_begin();
+		chacha_p10_do_8x(state, dst, src, todo, nrounds);
+		vsx_end();
+
+		bytes -= todo;
+		src += todo;
+		dst += todo;
+	} while (bytes);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static int chacha_p10_stream_xor(struct skcipher_request *req,
+				 const struct chacha_ctx *ctx, const u8 *iv)
+{
+	struct skcipher_walk walk;
+	u32 state[16];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
+
+	chacha_init_generic(state, ctx->key, iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+
+		if (nbytes < walk.total)
+			nbytes = rounddown(nbytes, walk.stride);
+
+		if (!crypto_simd_usable()) {
+			chacha_crypt_generic(state, walk.dst.virt.addr,
+					     walk.src.virt.addr, nbytes,
+					     ctx->nrounds);
+		} else {
+			vsx_begin();
+			chacha_p10_do_8x(state, walk.dst.virt.addr,
+				      walk.src.virt.addr, nbytes, ctx->nrounds);
+			vsx_end();
+		}
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int chacha_p10(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	return chacha_p10_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_p10(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct chacha_ctx subctx;
+	u32 state[16];
+	u8 real_iv[16];
+
+	chacha_init_generic(state, ctx->key, req->iv);
+	hchacha_block_arch(state, subctx.key, ctx->nrounds);
+	subctx.nrounds = ctx->nrounds;
+
+	memcpy(&real_iv[0], req->iv + 24, 8);
+	memcpy(&real_iv[8], req->iv + 16, 8);
+	return chacha_p10_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+	{
+		.base.cra_name		= "chacha20",
+		.base.cra_driver_name	= "chacha20-p10",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= CHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= chacha_p10,
+		.decrypt		= chacha_p10,
+	}, {
+		.base.cra_name		= "xchacha20",
+		.base.cra_driver_name	= "xchacha20-p10",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= xchacha_p10,
+		.decrypt		= xchacha_p10,
+	}, {
+		.base.cra_name		= "xchacha12",
+		.base.cra_driver_name	= "xchacha12-p10",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= XCHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha12_setkey,
+		.encrypt		= xchacha_p10,
+		.decrypt		= xchacha_p10,
+	}
+};
+
+static int __init chacha_p10_init(void)
+{
+	static_branch_enable(&have_p10);
+
+	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_p10_exit(void)
+{
+	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init);
+module_exit(chacha_p10_exit);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)");
+MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-p10");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-p10");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-p10");

From 09ef057bd2a125e01fe841c4f40ca9fb97cc5cd4 Mon Sep 17 00:00:00 2001
From: Danny Tsen <dtsen@linux.ibm.com>
Date: Wed, 26 Apr 2023 15:11:45 -0400
Subject: [PATCH 0153/2047] crypto: poly1305-p10 - An optimized Poly1305
 implementation with 4-way unrolling for ppc64le

Improve overall performance of Poly1305 for Power10 or later CPU.

Signed-off-by: Danny Tsen <dtsen@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/poly1305-p10le_64.S | 1075 +++++++++++++++++++++++
 1 file changed, 1075 insertions(+)
 create mode 100644 arch/powerpc/crypto/poly1305-p10le_64.S

diff --git a/arch/powerpc/crypto/poly1305-p10le_64.S b/arch/powerpc/crypto/poly1305-p10le_64.S
new file mode 100644
index 000000000000..a3c1987f1ecd
--- /dev/null
+++ b/arch/powerpc/crypto/poly1305-p10le_64.S
@@ -0,0 +1,1075 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#
+# Accelerated poly1305 implementation for ppc64le.
+#
+# Copyright 2023- IBM Corp. All rights reserved
+#
+#===================================================================================
+# Written by Danny Tsen <dtsen@us.ibm.com>
+#
+# Poly1305 - this version mainly using vector/VSX/Scalar
+#  - 26 bits limbs
+#  - Handle multiple 64 byte blcok.
+#
+# Block size 16 bytes
+# key = (r, s)
+# clamp r &= 0x0FFFFFFC0FFFFFFC 0x0FFFFFFC0FFFFFFF
+# p = 2^130 - 5
+# a += m
+# a = (r + a) % p
+# a += s
+#
+# Improve performance by breaking down polynominal to the sum of products with
+#     h4 = m1 * r⁴ + m2 * r³ + m3 * r² + m4 * r
+#
+#  07/22/21 - this revison based on the above sum of products.  Setup r^4, r^3, r^2, r and s3, s2, s1, s0
+#             to 9 vectors for multiplications.
+#
+# setup r^4, r^3, r^2, r vectors
+#    vs    [r^1, r^3, r^2, r^4]
+#    vs0 = [r0,.....]
+#    vs1 = [r1,.....]
+#    vs2 = [r2,.....]
+#    vs3 = [r3,.....]
+#    vs4 = [r4,.....]
+#    vs5 = [r1*5,...]
+#    vs6 = [r2*5,...]
+#    vs7 = [r2*5,...]
+#    vs8 = [r4*5,...]
+#
+#  Each word in a vector consists a member of a "r/s" in [a * r/s].
+#
+# r0, r4*5, r3*5, r2*5, r1*5;
+# r1, r0,   r4*5, r3*5, r2*5;
+# r2, r1,   r0,   r4*5, r3*5;
+# r3, r2,   r1,   r0,   r4*5;
+# r4, r3,   r2,   r1,   r0  ;
+#
+#
+# poly1305_p10le_4blocks( uint8_t *k, uint32_t mlen, uint8_t *m)
+#  k = 32 bytes key
+#  r3 = k (r, s)
+#  r4 = mlen
+#  r5 = m
+#
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
+#include <linux/linkage.h>
+
+.machine "any"
+
+.text
+
+.macro	SAVE_GPR GPR OFFSET FRAME
+	std	\GPR,\OFFSET(\FRAME)
+.endm
+
+.macro	SAVE_VRS VRS OFFSET FRAME
+	li	16, \OFFSET
+	stvx	\VRS, 16, \FRAME
+.endm
+
+.macro	SAVE_VSX VSX OFFSET FRAME
+	li	16, \OFFSET
+	stxvx	\VSX, 16, \FRAME
+.endm
+
+.macro	RESTORE_GPR GPR OFFSET FRAME
+	ld	\GPR,\OFFSET(\FRAME)
+.endm
+
+.macro	RESTORE_VRS VRS OFFSET FRAME
+	li	16, \OFFSET
+	lvx	\VRS, 16, \FRAME
+.endm
+
+.macro	RESTORE_VSX VSX OFFSET FRAME
+	li	16, \OFFSET
+	lxvx	\VSX, 16, \FRAME
+.endm
+
+.macro SAVE_REGS
+	mflr 0
+	std 0, 16(1)
+	stdu 1,-752(1)
+
+	SAVE_GPR 14, 112, 1
+	SAVE_GPR 15, 120, 1
+	SAVE_GPR 16, 128, 1
+	SAVE_GPR 17, 136, 1
+	SAVE_GPR 18, 144, 1
+	SAVE_GPR 19, 152, 1
+	SAVE_GPR 20, 160, 1
+	SAVE_GPR 21, 168, 1
+	SAVE_GPR 22, 176, 1
+	SAVE_GPR 23, 184, 1
+	SAVE_GPR 24, 192, 1
+	SAVE_GPR 25, 200, 1
+	SAVE_GPR 26, 208, 1
+	SAVE_GPR 27, 216, 1
+	SAVE_GPR 28, 224, 1
+	SAVE_GPR 29, 232, 1
+	SAVE_GPR 30, 240, 1
+	SAVE_GPR 31, 248, 1
+
+	addi	9, 1, 256
+	SAVE_VRS 20, 0, 9
+	SAVE_VRS 21, 16, 9
+	SAVE_VRS 22, 32, 9
+	SAVE_VRS 23, 48, 9
+	SAVE_VRS 24, 64, 9
+	SAVE_VRS 25, 80, 9
+	SAVE_VRS 26, 96, 9
+	SAVE_VRS 27, 112, 9
+	SAVE_VRS 28, 128, 9
+	SAVE_VRS 29, 144, 9
+	SAVE_VRS 30, 160, 9
+	SAVE_VRS 31, 176, 9
+
+	SAVE_VSX 14, 192, 9
+	SAVE_VSX 15, 208, 9
+	SAVE_VSX 16, 224, 9
+	SAVE_VSX 17, 240, 9
+	SAVE_VSX 18, 256, 9
+	SAVE_VSX 19, 272, 9
+	SAVE_VSX 20, 288, 9
+	SAVE_VSX 21, 304, 9
+	SAVE_VSX 22, 320, 9
+	SAVE_VSX 23, 336, 9
+	SAVE_VSX 24, 352, 9
+	SAVE_VSX 25, 368, 9
+	SAVE_VSX 26, 384, 9
+	SAVE_VSX 27, 400, 9
+	SAVE_VSX 28, 416, 9
+	SAVE_VSX 29, 432, 9
+	SAVE_VSX 30, 448, 9
+	SAVE_VSX 31, 464, 9
+.endm # SAVE_REGS
+
+.macro RESTORE_REGS
+	addi	9, 1, 256
+	RESTORE_VRS 20, 0, 9
+	RESTORE_VRS 21, 16, 9
+	RESTORE_VRS 22, 32, 9
+	RESTORE_VRS 23, 48, 9
+	RESTORE_VRS 24, 64, 9
+	RESTORE_VRS 25, 80, 9
+	RESTORE_VRS 26, 96, 9
+	RESTORE_VRS 27, 112, 9
+	RESTORE_VRS 28, 128, 9
+	RESTORE_VRS 29, 144, 9
+	RESTORE_VRS 30, 160, 9
+	RESTORE_VRS 31, 176, 9
+
+	RESTORE_VSX 14, 192, 9
+	RESTORE_VSX 15, 208, 9
+	RESTORE_VSX 16, 224, 9
+	RESTORE_VSX 17, 240, 9
+	RESTORE_VSX 18, 256, 9
+	RESTORE_VSX 19, 272, 9
+	RESTORE_VSX 20, 288, 9
+	RESTORE_VSX 21, 304, 9
+	RESTORE_VSX 22, 320, 9
+	RESTORE_VSX 23, 336, 9
+	RESTORE_VSX 24, 352, 9
+	RESTORE_VSX 25, 368, 9
+	RESTORE_VSX 26, 384, 9
+	RESTORE_VSX 27, 400, 9
+	RESTORE_VSX 28, 416, 9
+	RESTORE_VSX 29, 432, 9
+	RESTORE_VSX 30, 448, 9
+	RESTORE_VSX 31, 464, 9
+
+	RESTORE_GPR 14, 112, 1
+	RESTORE_GPR 15, 120, 1
+	RESTORE_GPR 16, 128, 1
+	RESTORE_GPR 17, 136, 1
+	RESTORE_GPR 18, 144, 1
+	RESTORE_GPR 19, 152, 1
+	RESTORE_GPR 20, 160, 1
+	RESTORE_GPR 21, 168, 1
+	RESTORE_GPR 22, 176, 1
+	RESTORE_GPR 23, 184, 1
+	RESTORE_GPR 24, 192, 1
+	RESTORE_GPR 25, 200, 1
+	RESTORE_GPR 26, 208, 1
+	RESTORE_GPR 27, 216, 1
+	RESTORE_GPR 28, 224, 1
+	RESTORE_GPR 29, 232, 1
+	RESTORE_GPR 30, 240, 1
+	RESTORE_GPR 31, 248, 1
+
+	addi    1, 1, 752
+	ld 0, 16(1)
+	mtlr 0
+.endm # RESTORE_REGS
+
+#
+# p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5;
+# p[1] = a0*r1 + a1*r0   + a2*r4*5 + a3*r3*5 + a4*r2*5;
+# p[2] = a0*r2 + a1*r1   + a2*r0   + a3*r4*5 + a4*r3*5;
+# p[3] = a0*r3 + a1*r2   + a2*r1   + a3*r0   + a4*r4*5;
+# p[4] = a0*r4 + a1*r3   + a2*r2   + a3*r1   + a4*r0  ;
+#
+#    [r^2, r^3, r^1, r^4]
+#    [m3,  m2,  m4,  m1]
+#
+# multiply odd and even words
+.macro mul_odd
+	vmulouw	14, 4, 26
+	vmulouw	10, 5, 3
+	vmulouw	11, 6, 2
+	vmulouw	12, 7, 1
+	vmulouw	13, 8, 0
+	vmulouw	15, 4, 27
+	vaddudm	14, 14, 10
+	vaddudm	14, 14, 11
+	vmulouw	10, 5, 26
+	vmulouw	11, 6, 3
+	vaddudm	14, 14, 12
+	vaddudm	14, 14, 13	# x0
+	vaddudm	15, 15, 10
+	vaddudm	15, 15, 11
+	vmulouw	12, 7, 2
+	vmulouw	13, 8, 1
+	vaddudm	15, 15, 12
+	vaddudm	15, 15, 13	# x1
+	vmulouw	16, 4, 28
+	vmulouw	10, 5, 27
+	vmulouw	11, 6, 26
+	vaddudm	16, 16, 10
+	vaddudm	16, 16, 11
+	vmulouw	12, 7, 3
+	vmulouw	13, 8, 2
+	vaddudm	16, 16, 12
+	vaddudm	16, 16, 13	# x2
+	vmulouw	17, 4, 29
+	vmulouw	10, 5, 28
+	vmulouw	11, 6, 27
+	vaddudm	17, 17, 10
+	vaddudm	17, 17, 11
+	vmulouw	12, 7, 26
+	vmulouw	13, 8, 3
+	vaddudm	17, 17, 12
+	vaddudm	17, 17, 13	# x3
+	vmulouw	18, 4, 30
+	vmulouw	10, 5, 29
+	vmulouw	11, 6, 28
+	vaddudm	18, 18, 10
+	vaddudm	18, 18, 11
+	vmulouw	12, 7, 27
+	vmulouw	13, 8, 26
+	vaddudm	18, 18, 12
+	vaddudm	18, 18, 13	# x4
+.endm
+
+.macro mul_even
+	vmuleuw	9, 4, 26
+	vmuleuw	10, 5, 3
+	vmuleuw	11, 6, 2
+	vmuleuw	12, 7, 1
+	vmuleuw	13, 8, 0
+	vaddudm	14, 14, 9
+	vaddudm	14, 14, 10
+	vaddudm	14, 14, 11
+	vaddudm	14, 14, 12
+	vaddudm	14, 14, 13	# x0
+
+	vmuleuw	9, 4, 27
+	vmuleuw	10, 5, 26
+	vmuleuw	11, 6, 3
+	vmuleuw	12, 7, 2
+	vmuleuw	13, 8, 1
+	vaddudm	15, 15, 9
+	vaddudm	15, 15, 10
+	vaddudm	15, 15, 11
+	vaddudm	15, 15, 12
+	vaddudm	15, 15, 13	# x1
+
+	vmuleuw	9, 4, 28
+	vmuleuw	10, 5, 27
+	vmuleuw	11, 6, 26
+	vmuleuw	12, 7, 3
+	vmuleuw	13, 8, 2
+	vaddudm	16, 16, 9
+	vaddudm	16, 16, 10
+	vaddudm	16, 16, 11
+	vaddudm	16, 16, 12
+	vaddudm	16, 16, 13	# x2
+
+	vmuleuw	9, 4, 29
+	vmuleuw	10, 5, 28
+	vmuleuw	11, 6, 27
+	vmuleuw	12, 7, 26
+	vmuleuw	13, 8, 3
+	vaddudm	17, 17, 9
+	vaddudm	17, 17, 10
+	vaddudm	17, 17, 11
+	vaddudm	17, 17, 12
+	vaddudm	17, 17, 13	# x3
+
+	vmuleuw	9, 4, 30
+	vmuleuw	10, 5, 29
+	vmuleuw	11, 6, 28
+	vmuleuw	12, 7, 27
+	vmuleuw	13, 8, 26
+	vaddudm	18, 18, 9
+	vaddudm	18, 18, 10
+	vaddudm	18, 18, 11
+	vaddudm	18, 18, 12
+	vaddudm	18, 18, 13	# x4
+.endm
+
+#
+# poly1305_setup_r
+#
+# setup r^4, r^3, r^2, r vectors
+#    [r, r^3, r^2, r^4]
+#    vs0 = [r0,...]
+#    vs1 = [r1,...]
+#    vs2 = [r2,...]
+#    vs3 = [r3,...]
+#    vs4 = [r4,...]
+#    vs5 = [r4*5,...]
+#    vs6 = [r3*5,...]
+#    vs7 = [r2*5,...]
+#    vs8 = [r1*5,...]
+#
+# r0, r4*5, r3*5, r2*5, r1*5;
+# r1, r0,   r4*5, r3*5, r2*5;
+# r2, r1,   r0,   r4*5, r3*5;
+# r3, r2,   r1,   r0,   r4*5;
+# r4, r3,   r2,   r1,   r0  ;
+#
+.macro poly1305_setup_r
+
+	# save r
+	xxlor	26, 58, 58
+	xxlor	27, 59, 59
+	xxlor	28, 60, 60
+	xxlor	29, 61, 61
+	xxlor	30, 62, 62
+
+	xxlxor	31, 31, 31
+
+#    [r, r^3, r^2, r^4]
+	# compute r^2
+	vmr	4, 26
+	vmr	5, 27
+	vmr	6, 28
+	vmr	7, 29
+	vmr	8, 30
+	bl	do_mul		# r^2 r^1
+	xxpermdi 58, 58, 36, 0x3		# r0
+	xxpermdi 59, 59, 37, 0x3		# r1
+	xxpermdi 60, 60, 38, 0x3		# r2
+	xxpermdi 61, 61, 39, 0x3		# r3
+	xxpermdi 62, 62, 40, 0x3		# r4
+	xxpermdi 36, 36, 36, 0x3
+	xxpermdi 37, 37, 37, 0x3
+	xxpermdi 38, 38, 38, 0x3
+	xxpermdi 39, 39, 39, 0x3
+	xxpermdi 40, 40, 40, 0x3
+	vspltisb 13, 2
+	vsld	9, 27, 13
+	vsld	10, 28, 13
+	vsld	11, 29, 13
+	vsld	12, 30, 13
+	vaddudm	0, 9, 27
+	vaddudm	1, 10, 28
+	vaddudm	2, 11, 29
+	vaddudm	3, 12, 30
+
+	bl	do_mul		# r^4 r^3
+	vmrgow	26, 26, 4
+	vmrgow	27, 27, 5
+	vmrgow	28, 28, 6
+	vmrgow	29, 29, 7
+	vmrgow	30, 30, 8
+	vspltisb 13, 2
+	vsld	9, 27, 13
+	vsld	10, 28, 13
+	vsld	11, 29, 13
+	vsld	12, 30, 13
+	vaddudm	0, 9, 27
+	vaddudm	1, 10, 28
+	vaddudm	2, 11, 29
+	vaddudm	3, 12, 30
+
+	# r^2 r^4
+	xxlor	0, 58, 58
+	xxlor	1, 59, 59
+	xxlor	2, 60, 60
+	xxlor	3, 61, 61
+	xxlor	4, 62, 62
+	xxlor	5, 32, 32
+	xxlor	6, 33, 33
+	xxlor	7, 34, 34
+	xxlor	8, 35, 35
+
+	vspltw	9, 26, 3
+	vspltw	10, 26, 2
+	vmrgow	26, 10, 9
+	vspltw	9, 27, 3
+	vspltw	10, 27, 2
+	vmrgow	27, 10, 9
+	vspltw	9, 28, 3
+	vspltw	10, 28, 2
+	vmrgow	28, 10, 9
+	vspltw	9, 29, 3
+	vspltw	10, 29, 2
+	vmrgow	29, 10, 9
+	vspltw	9, 30, 3
+	vspltw	10, 30, 2
+	vmrgow	30, 10, 9
+
+	vsld	9, 27, 13
+	vsld	10, 28, 13
+	vsld	11, 29, 13
+	vsld	12, 30, 13
+	vaddudm	0, 9, 27
+	vaddudm	1, 10, 28
+	vaddudm	2, 11, 29
+	vaddudm	3, 12, 30
+.endm
+
+SYM_FUNC_START_LOCAL(do_mul)
+	mul_odd
+
+	# do reduction ( h %= p )
+	# carry reduction
+	vspltisb 9, 2
+	vsrd	10, 14, 31
+	vsrd	11, 17, 31
+	vand	7, 17, 25
+	vand	4, 14, 25
+	vaddudm	18, 18, 11
+	vsrd	12, 18, 31
+	vaddudm	15, 15, 10
+
+	vsrd	11, 15, 31
+	vand	8, 18, 25
+	vand	5, 15, 25
+	vaddudm	4, 4, 12
+	vsld	10, 12, 9
+	vaddudm	6, 16, 11
+
+	vsrd	13, 6, 31
+	vand	6, 6, 25
+	vaddudm	4, 4, 10
+	vsrd	10, 4, 31
+	vaddudm	7, 7, 13
+
+	vsrd	11, 7, 31
+	vand	7, 7, 25
+	vand	4, 4, 25
+	vaddudm	5, 5, 10
+	vaddudm	8, 8, 11
+	blr
+SYM_FUNC_END(do_mul)
+
+#
+# init key
+#
+.macro do_poly1305_init
+	addis	10, 2, rmask@toc@ha
+	addi	10, 10, rmask@toc@l
+
+	ld	11, 0(10)
+	ld	12, 8(10)
+
+	li	14, 16
+	li	15, 32
+	addis	10, 2, cnum@toc@ha
+	addi	10, 10, cnum@toc@l
+	lvx	25, 0, 10	# v25 - mask
+	lvx	31, 14, 10	# v31 = 1a
+	lvx	19, 15, 10	# v19 = 1 << 24
+	lxv	24, 48(10)	# vs24
+	lxv	25, 64(10)	# vs25
+
+	# initialize
+	# load key from r3 to vectors
+	ld	9, 24(3)
+	ld	10, 32(3)
+	and.	9, 9, 11
+	and.	10, 10, 12
+
+	# break 26 bits
+	extrdi	14, 9, 26, 38
+	extrdi	15, 9, 26, 12
+	extrdi	16, 9, 12, 0
+	mtvsrdd	58, 0, 14
+	insrdi	16, 10, 14, 38
+	mtvsrdd	59, 0, 15
+	extrdi	17, 10, 26, 24
+	mtvsrdd	60, 0, 16
+	extrdi	18, 10, 24, 0
+	mtvsrdd	61, 0, 17
+	mtvsrdd	62, 0, 18
+
+	# r1 = r1 * 5, r2 = r2 * 5, r3 = r3 * 5, r4 = r4 * 5
+	li	9, 5
+	mtvsrdd	36, 0, 9
+	vmulouw	0, 27, 4		# v0 = rr0
+	vmulouw	1, 28, 4		# v1 = rr1
+	vmulouw	2, 29, 4		# v2 = rr2
+	vmulouw	3, 30, 4		# v3 = rr3
+.endm
+
+#
+# poly1305_p10le_4blocks( uint8_t *k, uint32_t mlen, uint8_t *m)
+#  k = 32 bytes key
+#  r3 = k (r, s)
+#  r4 = mlen
+#  r5 = m
+#
+SYM_FUNC_START(poly1305_p10le_4blocks)
+.align 5
+	cmpdi	5, 64
+	blt	Out_no_poly1305
+
+	SAVE_REGS
+
+	do_poly1305_init
+
+	li	21, 0	# counter to message
+
+	poly1305_setup_r
+
+	# load previous H state
+	# break/convert r6 to 26 bits
+	ld	9, 0(3)
+	ld	10, 8(3)
+	ld	19, 16(3)
+	sldi	19, 19, 24
+	mtvsrdd	41, 0, 19
+	extrdi	14, 9, 26, 38
+	extrdi	15, 9, 26, 12
+	extrdi	16, 9, 12, 0
+	mtvsrdd	36, 0, 14
+	insrdi	16, 10, 14, 38
+	mtvsrdd	37, 0, 15
+	extrdi	17, 10, 26, 24
+	mtvsrdd	38, 0, 16
+	extrdi	18, 10, 24, 0
+	mtvsrdd	39, 0, 17
+	mtvsrdd	40, 0, 18
+	vor	8, 8, 9
+
+	# input m1 m2
+	add	20, 4, 21
+	xxlor	49, 24, 24
+	xxlor	50, 25, 25
+	lxvw4x	43, 0, 20
+	addi	17, 20, 16
+	lxvw4x	44, 0, 17
+	vperm	14, 11, 12, 17
+	vperm	15, 11, 12, 18
+	vand	9, 14, 25	# a0
+	vsrd	10, 14, 31	# >> 26
+	vsrd	11, 10, 31	# 12 bits left
+	vand	10, 10, 25	# a1
+	vspltisb 13, 12
+	vand	16, 15, 25
+	vsld	12, 16, 13
+	vor	11, 11, 12
+	vand	11, 11, 25	# a2
+	vspltisb 13, 14
+	vsrd	12, 15, 13	# >> 14
+	vsrd	13, 12, 31	# >> 26, a4
+	vand	12, 12, 25	# a3
+
+	vaddudm	20, 4, 9
+	vaddudm	21, 5, 10
+	vaddudm	22, 6, 11
+	vaddudm	23, 7, 12
+	vaddudm	24, 8, 13
+
+	# m3 m4
+	addi	17, 17, 16
+	lxvw4x	43, 0, 17
+	addi	17, 17, 16
+	lxvw4x	44, 0, 17
+	vperm	14, 11, 12, 17
+	vperm	15, 11, 12, 18
+	vand	9, 14, 25	# a0
+	vsrd	10, 14, 31	# >> 26
+	vsrd	11, 10, 31	# 12 bits left
+	vand	10, 10, 25	# a1
+	vspltisb 13, 12
+	vand	16, 15, 25
+	vsld	12, 16, 13
+	vspltisb 13, 14
+	vor	11, 11, 12
+	vand	11, 11, 25	# a2
+	vsrd	12, 15, 13	# >> 14
+	vsrd	13, 12, 31	# >> 26, a4
+	vand	12, 12, 25	# a3
+
+	# Smash 4 message blocks into 5 vectors of [m4,  m2,  m3,  m1]
+	vmrgow	4, 9, 20
+	vmrgow	5, 10, 21
+	vmrgow	6, 11, 22
+	vmrgow	7, 12, 23
+	vmrgow	8, 13, 24
+	vaddudm	8, 8, 19
+
+	addi	5, 5, -64	# len -= 64
+	addi	21, 21, 64	# offset += 64
+
+	li      9, 64
+	divdu   31, 5, 9
+
+	cmpdi	31, 0
+	ble	Skip_block_loop
+
+	mtctr	31
+
+# h4 =   m1 * r⁴ + m2 * r³ + m3 * r² + m4 * r
+# Rewrite the polynominal sum of product as follows,
+# h1 = (h0 + m1) * r^2,	h2 = (h0 + m2) * r^2
+# h3 = (h1 + m3) * r^2,	h4 = (h2 + m4) * r^2  --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h0 + m4) r^2
+#  .... Repeat
+# h5 = (h3 + m5) * r^2,	h6 = (h4 + m6) * r^2  -->
+# h7 = (h5 + m7) * r^2,	h8 = (h6 + m8) * r^1  --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
+#
+loop_4blocks:
+
+	# Multiply odd words and even words
+	mul_odd
+	mul_even
+	# carry reduction
+	vspltisb 9, 2
+	vsrd	10, 14, 31
+	vsrd	11, 17, 31
+	vand	7, 17, 25
+	vand	4, 14, 25
+	vaddudm	18, 18, 11
+	vsrd	12, 18, 31
+	vaddudm	15, 15, 10
+
+	vsrd	11, 15, 31
+	vand	8, 18, 25
+	vand	5, 15, 25
+	vaddudm	4, 4, 12
+	vsld	10, 12, 9
+	vaddudm	6, 16, 11
+
+	vsrd	13, 6, 31
+	vand	6, 6, 25
+	vaddudm	4, 4, 10
+	vsrd	10, 4, 31
+	vaddudm	7, 7, 13
+
+	vsrd	11, 7, 31
+	vand	7, 7, 25
+	vand	4, 4, 25
+	vaddudm	5, 5, 10
+	vaddudm	8, 8, 11
+
+	# input m1  m2  m3  m4
+	add	20, 4, 21
+	xxlor	49, 24, 24
+	xxlor	50, 25, 25
+	lxvw4x	43, 0, 20
+	addi	17, 20, 16
+	lxvw4x	44, 0, 17
+	vperm	14, 11, 12, 17
+	vperm	15, 11, 12, 18
+	addi	17, 17, 16
+	lxvw4x	43, 0, 17
+	addi	17, 17, 16
+	lxvw4x	44, 0, 17
+	vperm	17, 11, 12, 17
+	vperm	18, 11, 12, 18
+
+	vand	20, 14, 25	# a0
+	vand	9, 17, 25	# a0
+	vsrd	21, 14, 31	# >> 26
+	vsrd	22, 21, 31	# 12 bits left
+	vsrd	10, 17, 31	# >> 26
+	vsrd	11, 10, 31	# 12 bits left
+
+	vand	21, 21, 25	# a1
+	vand	10, 10, 25	# a1
+
+	vspltisb 13, 12
+	vand	16, 15, 25
+	vsld	23, 16, 13
+	vor	22, 22, 23
+	vand	22, 22, 25	# a2
+	vand	16, 18, 25
+	vsld	12, 16, 13
+	vor	11, 11, 12
+	vand	11, 11, 25	# a2
+	vspltisb 13, 14
+	vsrd	23, 15, 13	# >> 14
+	vsrd	24, 23, 31	# >> 26, a4
+	vand	23, 23, 25	# a3
+	vsrd	12, 18, 13	# >> 14
+	vsrd	13, 12, 31	# >> 26, a4
+	vand	12, 12, 25	# a3
+
+	vaddudm	4, 4, 20
+	vaddudm	5, 5, 21
+	vaddudm	6, 6, 22
+	vaddudm	7, 7, 23
+	vaddudm	8, 8, 24
+
+	# Smash 4 message blocks into 5 vectors of [m4,  m2,  m3,  m1]
+	vmrgow	4, 9, 4
+	vmrgow	5, 10, 5
+	vmrgow	6, 11, 6
+	vmrgow	7, 12, 7
+	vmrgow	8, 13, 8
+	vaddudm	8, 8, 19
+
+	addi	5, 5, -64	# len -= 64
+	addi	21, 21, 64	# offset += 64
+
+	bdnz	loop_4blocks
+
+Skip_block_loop:
+	xxlor	58, 0, 0
+	xxlor	59, 1, 1
+	xxlor	60, 2, 2
+	xxlor	61, 3, 3
+	xxlor	62, 4, 4
+	xxlor	32, 5, 5
+	xxlor	33, 6, 6
+	xxlor	34, 7, 7
+	xxlor	35, 8, 8
+
+	# Multiply odd words and even words
+	mul_odd
+	mul_even
+
+	# Sum the products.
+	xxpermdi 41, 31, 46, 0
+	xxpermdi 42, 31, 47, 0
+	vaddudm	4, 14, 9
+	xxpermdi 36, 31, 36, 3
+	vaddudm	5, 15, 10
+	xxpermdi 37, 31, 37, 3
+	xxpermdi 43, 31, 48, 0
+	vaddudm	6, 16, 11
+	xxpermdi 38, 31, 38, 3
+	xxpermdi 44, 31, 49, 0
+	vaddudm	7, 17, 12
+	xxpermdi 39, 31, 39, 3
+	xxpermdi 45, 31, 50, 0
+	vaddudm	8, 18, 13
+	xxpermdi 40, 31, 40, 3
+
+	# carry reduction
+	vspltisb 9, 2
+	vsrd	10, 4, 31
+	vsrd	11, 7, 31
+	vand	7, 7, 25
+	vand	4, 4, 25
+	vaddudm	8, 8, 11
+	vsrd	12, 8, 31
+	vaddudm	5, 5, 10
+
+	vsrd	11, 5, 31
+	vand	8, 8, 25
+	vand	5, 5, 25
+	vaddudm	4, 4, 12
+	vsld	10, 12, 9
+	vaddudm	6, 6, 11
+
+	vsrd	13, 6, 31
+	vand	6, 6, 25
+	vaddudm	4, 4, 10
+	vsrd	10, 4, 31
+	vaddudm	7, 7, 13
+
+	vsrd	11, 7, 31
+	vand	7, 7, 25
+	vand	4, 4, 25
+	vaddudm	5, 5, 10
+	vsrd	10, 5, 31
+	vand	5, 5, 25
+	vaddudm	6, 6, 10
+	vaddudm	8, 8, 11
+
+	b	do_final_update
+
+do_final_update:
+	# combine 26 bit limbs
+	# v4, v5, v6, v7 and v8 are 26 bit vectors
+	vsld	5, 5, 31
+	vor	20, 4, 5
+	vspltisb 11, 12
+	vsrd	12, 6, 11
+	vsld	6, 6, 31
+	vsld	6, 6, 31
+	vor	20, 20, 6
+	vspltisb 11, 14
+	vsld	7, 7, 11
+	vor	21, 7, 12
+	mfvsrld	16, 40		# save last 2 bytes
+	vsld	8, 8, 11
+	vsld	8, 8, 31
+	vor	21, 21, 8
+	mfvsrld	17, 52
+	mfvsrld	19, 53
+	srdi	16, 16, 24
+
+	std	17, 0(3)
+	std	19, 8(3)
+	stw	16, 16(3)
+
+Out_loop:
+	li	3, 0
+
+	RESTORE_REGS
+
+	blr
+
+Out_no_poly1305:
+	li	3, 0
+	blr
+SYM_FUNC_END(poly1305_p10le_4blocks)
+
+#
+# =======================================================================
+# The following functions implement 64 x 64 bits multiplication poly1305.
+#
+SYM_FUNC_START_LOCAL(Poly1305_init_64)
+	#  mask 0x0FFFFFFC0FFFFFFC
+	#  mask 0x0FFFFFFC0FFFFFFF
+	addis	10, 2, rmask@toc@ha
+	addi	10, 10, rmask@toc@l
+	ld	11, 0(10)
+	ld	12, 8(10)
+
+	# initialize
+	# load key from r3
+	ld	9, 24(3)
+	ld	10, 32(3)
+	and.	9, 9, 11	# cramp mask r0
+	and.	10, 10, 12	# cramp mask r1
+
+        srdi    21, 10, 2
+        add     19, 21, 10      # s1: r19 - (r1 >> 2) *5
+
+        # setup r and s
+        li      25, 0
+	mtvsrdd 32+0, 9, 19	# r0, s1
+	mtvsrdd 32+1, 10, 9	# r1, r0
+	mtvsrdd 32+2, 19, 25	# s1
+	mtvsrdd 32+3, 9, 25	# r0
+
+	blr
+SYM_FUNC_END(Poly1305_init_64)
+
+# Poly1305_mult
+# v6 = (h0, h1), v8 = h2
+# v0 = (r0, s1), v1 = (r1, r0), v2 = s1, v3 = r0
+#
+# Output: v7, v10, v11
+#
+SYM_FUNC_START_LOCAL(Poly1305_mult)
+	#
+	#	d0 = h0 * r0 + h1 * s1
+	vmsumudm	7, 6, 0, 9		# h0 * r0, h1 * s1
+
+	#	d1 = h0 * r1 + h1 * r0 + h2 * s1
+	vmsumudm	11, 6, 1, 9		# h0 * r1, h1 * r0
+	vmsumudm	10, 8, 2, 11		# d1 += h2 * s1
+
+	#       d2 = r0
+	vmsumudm	11, 8, 3, 9		# d2 = h2 * r0
+	blr
+SYM_FUNC_END(Poly1305_mult)
+
+#
+# carry reduction
+# h %=p
+#
+# Input: v7, v10, v11
+# Output: r27, r28, r29
+#
+SYM_FUNC_START_LOCAL(Carry_reduction)
+	mfvsrld	27, 32+7
+	mfvsrld	28, 32+10
+	mfvsrld	29, 32+11
+	mfvsrd	20, 32+7	# h0.h
+	mfvsrd	21, 32+10	# h1.h
+
+	addc	28, 28, 20
+	adde	29, 29, 21
+	srdi	22, 29, 0x2
+	sldi	23, 22, 0x2
+	add	23, 23, 22	# (h2 & 3) * 5
+	addc	27, 27, 23	# h0
+	addze	28, 28		# h1
+	andi.	29, 29, 0x3	# h2
+	blr
+SYM_FUNC_END(Carry_reduction)
+
+#
+# poly1305 multiplication
+# h *= r, h %= p
+#	d0 = h0 * r0 + h1 * s1
+#	d1 = h0 * r1 + h1 * r0 + h2 * s1
+#       d2 = h0 * r0
+#
+#
+# unsigned int poly1305_test_64s(unisgned char *state, const byte *src, size_t len, highbit)
+#   - no highbit if final leftover block (highbit = 0)
+#
+SYM_FUNC_START(poly1305_64s)
+	cmpdi	5, 0
+	ble	Out_no_poly1305_64
+
+	mflr 0
+	std 0, 16(1)
+	stdu 1,-400(1)
+
+	SAVE_GPR 14, 112, 1
+	SAVE_GPR 15, 120, 1
+	SAVE_GPR 16, 128, 1
+	SAVE_GPR 17, 136, 1
+	SAVE_GPR 18, 144, 1
+	SAVE_GPR 19, 152, 1
+	SAVE_GPR 20, 160, 1
+	SAVE_GPR 21, 168, 1
+	SAVE_GPR 22, 176, 1
+	SAVE_GPR 23, 184, 1
+	SAVE_GPR 24, 192, 1
+	SAVE_GPR 25, 200, 1
+	SAVE_GPR 26, 208, 1
+	SAVE_GPR 27, 216, 1
+	SAVE_GPR 28, 224, 1
+	SAVE_GPR 29, 232, 1
+	SAVE_GPR 30, 240, 1
+	SAVE_GPR 31, 248, 1
+
+	# Init poly1305
+	bl Poly1305_init_64
+
+	li 25, 0			# offset to inp and outp
+
+	add 11, 25, 4
+
+	# load h
+	# h0, h1, h2?
+        ld	27, 0(3)
+        ld	28, 8(3)
+        lwz	29, 16(3)
+
+        li      30, 16
+        divdu   31, 5, 30
+
+        mtctr   31
+
+        mr      24, 6		# highbit
+
+Loop_block_64:
+	vxor	9, 9, 9
+
+	ld	20, 0(11)
+	ld	21, 8(11)
+	addi	11, 11, 16
+
+	addc	27, 27, 20
+	adde	28, 28, 21
+	adde	29, 29, 24
+
+	li	22, 0
+	mtvsrdd	32+6, 27, 28	# h0, h1
+	mtvsrdd	32+8, 29, 22	# h2
+
+	bl	Poly1305_mult
+
+	bl	Carry_reduction
+
+	bdnz	Loop_block_64
+
+	std	27, 0(3)
+	std	28, 8(3)
+	stw	29, 16(3)
+
+	li	3, 0
+
+	RESTORE_GPR 14, 112, 1
+	RESTORE_GPR 15, 120, 1
+	RESTORE_GPR 16, 128, 1
+	RESTORE_GPR 17, 136, 1
+	RESTORE_GPR 18, 144, 1
+	RESTORE_GPR 19, 152, 1
+	RESTORE_GPR 20, 160, 1
+	RESTORE_GPR 21, 168, 1
+	RESTORE_GPR 22, 176, 1
+	RESTORE_GPR 23, 184, 1
+	RESTORE_GPR 24, 192, 1
+	RESTORE_GPR 25, 200, 1
+	RESTORE_GPR 26, 208, 1
+	RESTORE_GPR 27, 216, 1
+	RESTORE_GPR 28, 224, 1
+	RESTORE_GPR 29, 232, 1
+	RESTORE_GPR 30, 240, 1
+	RESTORE_GPR 31, 248, 1
+
+	addi    1, 1, 400
+	ld 0, 16(1)
+	mtlr 0
+
+	blr
+
+Out_no_poly1305_64:
+	li	3, 0
+	blr
+SYM_FUNC_END(poly1305_64s)
+
+#
+# Input: r3 = h, r4 = s, r5 = mac
+# mac = h + s
+#
+SYM_FUNC_START(poly1305_emit_64)
+	ld	10, 0(3)
+	ld	11, 8(3)
+	ld	12, 16(3)
+
+	# compare modulus
+	# h + 5 + (-p)
+	mr	6, 10
+	mr	7, 11
+	mr	8, 12
+	addic.	6, 6, 5
+	addze	7, 7
+	addze	8, 8
+	srdi	9, 8, 2		# overflow?
+	cmpdi	9, 0
+	beq	Skip_h64
+	mr	10, 6
+	mr	11, 7
+	mr	12, 8
+
+Skip_h64:
+	ld	6, 0(4)
+	ld	7, 8(4)
+	addc	10, 10, 6
+	adde	11, 11, 7
+	addze	12, 12
+
+	std	10, 0(5)
+	std	11, 8(5)
+	blr
+SYM_FUNC_END(poly1305_emit_64)
+
+SYM_DATA_START_LOCAL(RMASK)
+.align 5
+rmask:
+.byte	0xff, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f
+cnum:
+.long	0x03ffffff, 0x00000000, 0x03ffffff, 0x00000000
+.long	0x1a, 0x00, 0x1a, 0x00
+.long	0x01000000, 0x01000000, 0x01000000, 0x01000000
+.long	0x00010203, 0x04050607, 0x10111213, 0x14151617
+.long	0x08090a0b, 0x0c0d0e0f, 0x18191a1b, 0x1c1d1e1f
+SYM_DATA_END(RMASK)

From ba8f8624fde2cbd92b995e6d92d4ecc97cf2a7f0 Mon Sep 17 00:00:00 2001
From: Danny Tsen <dtsen@linux.ibm.com>
Date: Wed, 26 Apr 2023 15:11:46 -0400
Subject: [PATCH 0154/2047] crypto: poly1305-p10 - Glue code for optmized
 Poly1305 implementation for ppc64le

Signed-off-by: Danny Tsen <dtsen@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/poly1305-p10-glue.c | 186 ++++++++++++++++++++++++
 1 file changed, 186 insertions(+)
 create mode 100644 arch/powerpc/crypto/poly1305-p10-glue.c

diff --git a/arch/powerpc/crypto/poly1305-p10-glue.c b/arch/powerpc/crypto/poly1305-p10-glue.c
new file mode 100644
index 000000000000..95dd708573ee
--- /dev/null
+++ b/arch/powerpc/crypto/poly1305-p10-glue.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Poly1305 authenticator algorithm, RFC7539.
+ *
+ * Copyright 2023- IBM Corp. All rights reserved.
+ */
+
+#include <crypto/algapi.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/jump_label.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <asm/unaligned.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+asmlinkage void poly1305_p10le_4blocks(void *h, const u8 *m, u32 mlen);
+asmlinkage void poly1305_64s(void *h, const u8 *m, u32 mlen, int highbit);
+asmlinkage void poly1305_emit_64(void *h, void *s, u8 *dst);
+
+static void vsx_begin(void)
+{
+	preempt_disable();
+	enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+	disable_kernel_vsx();
+	preempt_enable();
+}
+
+static int crypto_poly1305_p10_init(struct shash_desc *desc)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	poly1305_core_init(&dctx->h);
+	dctx->buflen = 0;
+	dctx->rset = 0;
+	dctx->sset = false;
+
+	return 0;
+}
+
+static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
+					       const u8 *inp, unsigned int len)
+{
+	unsigned int acc = 0;
+
+	if (unlikely(!dctx->sset)) {
+		if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) {
+			struct poly1305_core_key *key = &dctx->core_r;
+
+			key->key.r64[0] = get_unaligned_le64(&inp[0]);
+			key->key.r64[1] = get_unaligned_le64(&inp[8]);
+			inp += POLY1305_BLOCK_SIZE;
+			len -= POLY1305_BLOCK_SIZE;
+			acc += POLY1305_BLOCK_SIZE;
+			dctx->rset = 1;
+		}
+		if (len >= POLY1305_BLOCK_SIZE) {
+			dctx->s[0] = get_unaligned_le32(&inp[0]);
+			dctx->s[1] = get_unaligned_le32(&inp[4]);
+			dctx->s[2] = get_unaligned_le32(&inp[8]);
+			dctx->s[3] = get_unaligned_le32(&inp[12]);
+			acc += POLY1305_BLOCK_SIZE;
+			dctx->sset = true;
+		}
+	}
+	return acc;
+}
+
+static int crypto_poly1305_p10_update(struct shash_desc *desc,
+				      const u8 *src, unsigned int srclen)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+	unsigned int bytes, used;
+
+	if (unlikely(dctx->buflen)) {
+		bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		srclen -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf,
+							       POLY1305_BLOCK_SIZE))) {
+				vsx_begin();
+				poly1305_64s(&dctx->h, dctx->buf,
+						  POLY1305_BLOCK_SIZE, 1);
+				vsx_end();
+			}
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
+		bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
+		used = crypto_poly1305_setdctxkey(dctx, src, bytes);
+		if (likely(used)) {
+			srclen -= used;
+			src += used;
+		}
+		if (crypto_simd_usable() && (srclen >= POLY1305_BLOCK_SIZE*4)) {
+			vsx_begin();
+			poly1305_p10le_4blocks(&dctx->h, src, srclen);
+			vsx_end();
+			src += srclen - (srclen % (POLY1305_BLOCK_SIZE * 4));
+			srclen %= POLY1305_BLOCK_SIZE * 4;
+		}
+		while (srclen >= POLY1305_BLOCK_SIZE) {
+			vsx_begin();
+			poly1305_64s(&dctx->h, src, POLY1305_BLOCK_SIZE, 1);
+			vsx_end();
+			srclen -= POLY1305_BLOCK_SIZE;
+			src += POLY1305_BLOCK_SIZE;
+		}
+	}
+
+	if (unlikely(srclen)) {
+		dctx->buflen = srclen;
+		memcpy(dctx->buf, src, srclen);
+	}
+
+	return 0;
+}
+
+static int crypto_poly1305_p10_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
+
+	if ((dctx->buflen)) {
+		dctx->buf[dctx->buflen++] = 1;
+		memset(dctx->buf + dctx->buflen, 0,
+		       POLY1305_BLOCK_SIZE - dctx->buflen);
+		vsx_begin();
+		poly1305_64s(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+		vsx_end();
+		dctx->buflen = 0;
+	}
+
+	poly1305_emit_64(&dctx->h, &dctx->s, dst);
+	return 0;
+}
+
+static struct shash_alg poly1305_alg = {
+	.digestsize	= POLY1305_DIGEST_SIZE,
+	.init		= crypto_poly1305_p10_init,
+	.update		= crypto_poly1305_p10_update,
+	.final		= crypto_poly1305_p10_final,
+	.descsize	= sizeof(struct poly1305_desc_ctx),
+	.base		= {
+		.cra_name		= "poly1305",
+		.cra_driver_name	= "poly1305-p10",
+		.cra_priority		= 300,
+		.cra_blocksize		= POLY1305_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static int __init poly1305_p10_init(void)
+{
+	return crypto_register_shash(&poly1305_alg);
+}
+
+static void __exit poly1305_p10_exit(void)
+{
+	crypto_unregister_shash(&poly1305_alg);
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_P10, poly1305_p10_init);
+module_exit(poly1305_p10_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
+MODULE_DESCRIPTION("Optimized Poly1305 for P10");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-p10");

From 161fca7e3e905d5e99dbd79ed55d9212e6d80eb0 Mon Sep 17 00:00:00 2001
From: Danny Tsen <dtsen@linux.ibm.com>
Date: Wed, 26 Apr 2023 15:11:47 -0400
Subject: [PATCH 0155/2047] crypto: powerpc - Add chacha20/poly1305-p10 to
 Kconfig and Makefile

Defined CRYPTO_CHACHA20_P10 and CRYPTO POLY1305_P10 in Kconfig to
support optimized implementation for Power10 and later CPU.

Added new module driver chacha-p10-crypto and poly1305-p10-crypto.

Signed-off-by: Danny Tsen <dtsen@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/Kconfig  | 26 ++++++++++++++++++++++++++
 arch/powerpc/crypto/Makefile |  4 ++++
 2 files changed, 30 insertions(+)

diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index ad1872518992..f25024afdda5 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -111,4 +111,30 @@ config CRYPTO_AES_GCM_P10
 	  Support for cryptographic acceleration instructions on Power10 or
 	  later CPU. This module supports stitched acceleration for AES/GCM.
 
+config CRYPTO_CHACHA20_P10
+	tristate "Ciphers: ChaCha20, XChacha20, XChacha12 (P10 or later)"
+	depends on PPC64 && CPU_LITTLE_ENDIAN
+	select CRYPTO_SKCIPHER
+	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+	help
+	  Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
+	  stream cipher algorithms
+
+	  Architecture: PowerPC64
+	  - Power10 or later
+	  - Little-endian
+
+config CRYPTO_POLY1305_P10
+	tristate "Hash functions: Poly1305 (P10 or later)"
+	depends on PPC64 && CPU_LITTLE_ENDIAN
+	select CRYPTO_HASH
+	select CRYPTO_LIB_POLY1305_GENERIC
+	help
+	  Poly1305 authenticator algorithm (RFC7539)
+
+	  Architecture: PowerPC64
+	  - Power10 or later
+	  - Little-endian
+
 endmenu
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index 7b4f516abec1..ebdac1b9eb9a 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -14,6 +14,8 @@ obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
 obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o
 obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o
 obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o
+obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o
+obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o
 
 aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
 md5-ppc-y := md5-asm.o md5-glue.o
@@ -23,6 +25,8 @@ sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
 crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
 crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
 aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o
+chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o
+poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o
 
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@

From 31ba6dd2298fd9c49ecf66200103b1e88055f7a0 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Mon, 19 Jun 2023 21:24:24 +0800
Subject: [PATCH 0156/2047] KEYS: fix kernel-doc warnings in verify_pefile

Fix kernel-doc warnings in verify_pefile:

crypto/asymmetric_keys/verify_pefile.c:423: warning: Excess function
parameter 'trust_keys' description in 'verify_pefile_signature'

crypto/asymmetric_keys/verify_pefile.c:423: warning: Function parameter
or member 'trusted_keys' not described in 'verify_pefile_signature'

Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/asymmetric_keys/verify_pefile.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c
index 22beaf2213a2..f440767bd727 100644
--- a/crypto/asymmetric_keys/verify_pefile.c
+++ b/crypto/asymmetric_keys/verify_pefile.c
@@ -391,7 +391,7 @@ error_no_desc:
  * verify_pefile_signature - Verify the signature on a PE binary image
  * @pebuf: Buffer containing the PE binary image
  * @pelen: Length of the binary image
- * @trust_keys: Signing certificate(s) to use as starting points
+ * @trusted_keys: Signing certificate(s) to use as starting points
  * @usage: The use to which the key is being put.
  *
  * Validate that the certificate chain inside the PKCS#7 message inside the PE

From babb80b3ecc6f40c962e13c654ebcd27f25ee327 Mon Sep 17 00:00:00 2001
From: Azeem Shaikh <azeemshaikh38@gmail.com>
Date: Tue, 20 Jun 2023 20:08:32 +0000
Subject: [PATCH 0157/2047] crypto: lrw,xts - Replace strlcpy with strscpy

strlcpy() reads the entire source buffer first.
This read may exceed the destination size limit.
This is both inefficient and can lead to linear read
overflows if a source string is not NUL-terminated [1].
In an effort to remove strlcpy() completely [2], replace
strlcpy() here with strscpy().

Direct replacement is safe here since return value of -errno
is used to check for truncation instead of sizeof(dest).

[1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy
[2] https://github.com/KSPP/linux/issues/89

Signed-off-by: Azeem Shaikh <azeemshaikh38@gmail.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/lrw.c | 6 +++---
 crypto/xts.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/crypto/lrw.c b/crypto/lrw.c
index 1b0f76ba3eb5..59260aefed28 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -357,10 +357,10 @@ static int lrw_create(struct crypto_template *tmpl, struct rtattr **tb)
 	 * cipher name.
 	 */
 	if (!strncmp(cipher_name, "ecb(", 4)) {
-		unsigned len;
+		int len;
 
-		len = strlcpy(ecb_name, cipher_name + 4, sizeof(ecb_name));
-		if (len < 2 || len >= sizeof(ecb_name))
+		len = strscpy(ecb_name, cipher_name + 4, sizeof(ecb_name));
+		if (len < 2)
 			goto err_free_inst;
 
 		if (ecb_name[len - 1] != ')')
diff --git a/crypto/xts.c b/crypto/xts.c
index 09be909a6a1a..548b302c6c6a 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -396,10 +396,10 @@ static int xts_create(struct crypto_template *tmpl, struct rtattr **tb)
 	 * cipher name.
 	 */
 	if (!strncmp(cipher_name, "ecb(", 4)) {
-		unsigned len;
+		int len;
 
-		len = strlcpy(ctx->name, cipher_name + 4, sizeof(ctx->name));
-		if (len < 2 || len >= sizeof(ctx->name))
+		len = strscpy(ctx->name, cipher_name + 4, sizeof(ctx->name));
+		if (len < 2)
 			goto err_free_inst;
 
 		if (ctx->name[len - 1] != ')')

From 74c6df413f64f349e8ae4166d97324803bf55b58 Mon Sep 17 00:00:00 2001
From: "Chang S. Bae" <chang.seok.bae@intel.com>
Date: Wed, 21 Jun 2023 05:06:53 -0700
Subject: [PATCH 0158/2047] crypto: x86/aesni - Align the address before
 aes_set_key_common()

aes_set_key_common() performs runtime alignment to the void *raw_ctx
pointer. This facilitates consistent access to the 16byte-aligned
address during key extension.

However, the alignment is already handlded in the GCM-related setkey
functions before invoking the common function. Consequently, the
alignment in the common function is unnecessary for those functions.

To establish a consistent approach throughout the glue code, remove
the aes_ctx() call from its current location. Instead, place it at
each call site where the runtime alignment is currently absent.

Link: https://lore.kernel.org/lkml/20230605024623.GA4653@quark.localdomain/
Suggested-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Cc: linux-crypto@vger.kernel.org
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aesni-intel_glue.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index a5b0cb3efeba..c4eea7e746e7 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -229,10 +229,10 @@ static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
 	return (struct crypto_aes_ctx *)ALIGN(addr, align);
 }
 
-static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
+static int aes_set_key_common(struct crypto_tfm *tfm,
+			      struct crypto_aes_ctx *ctx,
 			      const u8 *in_key, unsigned int key_len)
 {
-	struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx);
 	int err;
 
 	if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
@@ -253,7 +253,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		       unsigned int key_len)
 {
-	return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len);
+	return aes_set_key_common(tfm, aes_ctx(crypto_tfm_ctx(tfm)), in_key, key_len);
 }
 
 static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
@@ -286,7 +286,7 @@ static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			         unsigned int len)
 {
 	return aes_set_key_common(crypto_skcipher_tfm(tfm),
-				  crypto_skcipher_ctx(tfm), key, len);
+				  aes_ctx(crypto_skcipher_ctx(tfm)), key, len);
 }
 
 static int ecb_encrypt(struct skcipher_request *req)
@@ -893,13 +893,13 @@ static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	keylen /= 2;
 
 	/* first half of xts-key is for crypt */
-	err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx,
+	err = aes_set_key_common(crypto_skcipher_tfm(tfm), aes_ctx(ctx->raw_crypt_ctx),
 				 key, keylen);
 	if (err)
 		return err;
 
 	/* second half of xts-key is for tweak */
-	return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx,
+	return aes_set_key_common(crypto_skcipher_tfm(tfm), aes_ctx(ctx->raw_tweak_ctx),
 				  key + keylen, keylen);
 }
 

From 0f942bdfe9d463be3073301519492f8d53c6b2d5 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Thu, 22 Jun 2023 10:26:35 +0100
Subject: [PATCH 0159/2047] crypto: qat - change value of default idle filter

The power management configuration of 4xxx devices is too aggressive
and in some conditions the device might be prematurely put to a low
power state.
Increase the idle filter value to prevent that.
In future, this will be set by firmware.

Fixes: e5745f34113b ("crypto: qat - enable power management for QAT GEN4")
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Damian Muszynski <damian.muszynski@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/intel/qat/qat_common/adf_gen4_pm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_pm.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_pm.h
index dd112923e006..c2768762cca3 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_pm.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_pm.h
@@ -35,7 +35,7 @@
 #define ADF_GEN4_PM_MSG_PENDING			BIT(0)
 #define ADF_GEN4_PM_MSG_PAYLOAD_BIT_MASK	GENMASK(28, 1)
 
-#define ADF_GEN4_PM_DEFAULT_IDLE_FILTER		(0x0)
+#define ADF_GEN4_PM_DEFAULT_IDLE_FILTER		(0x6)
 #define ADF_GEN4_PM_MAX_IDLE_FILTER		(0x7)
 #define ADF_GEN4_PM_DEFAULT_IDLE_SUPPORT	(0x1)
 

From 36124dea164cf684869e856b2ada23e8adab5f03 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 14 Jul 2023 12:17:45 +0300
Subject: [PATCH 0160/2047] spi: Remove code duplication in spi_add_device*()

The commit 0c79378c0199 ("spi: add ancillary device support")
added a dozen of duplicating lines of code. We may move them
to the __spi_add_device(). Note, that the code may be called
under the mutex.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20230714091748.89681-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 32 ++++++++++----------------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ae2693ba1744..8e70f4183e62 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -631,6 +631,16 @@ static int __spi_add_device(struct spi_device *spi)
 	struct device *dev = ctlr->dev.parent;
 	int status;
 
+	/* Chipselects are numbered 0..max; validate. */
+	if (spi_get_chipselect(spi, 0) >= ctlr->num_chipselect) {
+		dev_err(dev, "cs%d >= max %d\n", spi_get_chipselect(spi, 0),
+			ctlr->num_chipselect);
+		return -EINVAL;
+	}
+
+	/* Set the bus ID string */
+	spi_dev_set_name(spi);
+
 	/*
 	 * We need to make sure there's no other device with this
 	 * chipselect **BEFORE** we call setup(), else we'll trash
@@ -689,19 +699,8 @@ static int __spi_add_device(struct spi_device *spi)
 int spi_add_device(struct spi_device *spi)
 {
 	struct spi_controller *ctlr = spi->controller;
-	struct device *dev = ctlr->dev.parent;
 	int status;
 
-	/* Chipselects are numbered 0..max; validate. */
-	if (spi_get_chipselect(spi, 0) >= ctlr->num_chipselect) {
-		dev_err(dev, "cs%d >= max %d\n", spi_get_chipselect(spi, 0),
-			ctlr->num_chipselect);
-		return -EINVAL;
-	}
-
-	/* Set the bus ID string */
-	spi_dev_set_name(spi);
-
 	mutex_lock(&ctlr->add_lock);
 	status = __spi_add_device(spi);
 	mutex_unlock(&ctlr->add_lock);
@@ -712,17 +711,6 @@ EXPORT_SYMBOL_GPL(spi_add_device);
 static int spi_add_device_locked(struct spi_device *spi)
 {
 	struct spi_controller *ctlr = spi->controller;
-	struct device *dev = ctlr->dev.parent;
-
-	/* Chipselects are numbered 0..max; validate. */
-	if (spi_get_chipselect(spi, 0) >= ctlr->num_chipselect) {
-		dev_err(dev, "cs%d >= max %d\n", spi_get_chipselect(spi, 0),
-			ctlr->num_chipselect);
-		return -EINVAL;
-	}
-
-	/* Set the bus ID string */
-	spi_dev_set_name(spi);
 
 	WARN_ON(!mutex_is_locked(&ctlr->add_lock));
 	return __spi_add_device(spi);

From 7b5c6a545b3491fb785c75cee60e6b0c35a4de1b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 14 Jul 2023 12:17:46 +0300
Subject: [PATCH 0161/2047] spi: Kill spi_add_device_locked()

Now, spi_add_device_locked() has just a line on top of __spi_add_device().
Besides that, it has a single caller. So, just kill it and embed its parts
into the caller.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230714091748.89681-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8e70f4183e62..05f702339182 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -708,14 +708,6 @@ int spi_add_device(struct spi_device *spi)
 }
 EXPORT_SYMBOL_GPL(spi_add_device);
 
-static int spi_add_device_locked(struct spi_device *spi)
-{
-	struct spi_controller *ctlr = spi->controller;
-
-	WARN_ON(!mutex_is_locked(&ctlr->add_lock));
-	return __spi_add_device(spi);
-}
-
 /**
  * spi_new_device - instantiate one new SPI device
  * @ctlr: Controller to which device is connected
@@ -2417,11 +2409,12 @@ static void of_register_spi_devices(struct spi_controller *ctlr) { }
 struct spi_device *spi_new_ancillary_device(struct spi_device *spi,
 					     u8 chip_select)
 {
+	struct spi_controller *ctlr = spi->controller;
 	struct spi_device *ancillary;
 	int rc = 0;
 
 	/* Alloc an spi_device */
-	ancillary = spi_alloc_device(spi->controller);
+	ancillary = spi_alloc_device(ctlr);
 	if (!ancillary) {
 		rc = -ENOMEM;
 		goto err_out;
@@ -2436,8 +2429,10 @@ struct spi_device *spi_new_ancillary_device(struct spi_device *spi,
 	ancillary->max_speed_hz = spi->max_speed_hz;
 	ancillary->mode = spi->mode;
 
+	WARN_ON(!mutex_is_locked(&ctlr->add_lock));
+
 	/* Register the new device */
-	rc = spi_add_device_locked(ancillary);
+	rc = __spi_add_device(ancillary);
 	if (rc) {
 		dev_err(&spi->dev, "failed to register ancillary device\n");
 		goto err_out;

From 169f5312dc46deb986e368b6828bedbedd297f6e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 14 Jul 2023 12:17:47 +0300
Subject: [PATCH 0162/2047] spi: Use BITS_TO_BYTES()

BITS_TO_BYTES() is the existing macro which takes care about full
bytes that may fully hold the given amount of bits. Use it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230714091748.89681-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 05f702339182..8d6304cb061e 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3877,11 +3877,9 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 	 */
 	if ((spi->mode & SPI_CS_WORD) && (!(ctlr->mode_bits & SPI_CS_WORD) ||
 					  spi_get_csgpiod(spi, 0))) {
-		size_t maxsize;
+		size_t maxsize = BITS_TO_BYTES(spi->bits_per_word);
 		int ret;
 
-		maxsize = (spi->bits_per_word + 7) / 8;
-
 		/* spi_split_transfers_maxsize() requires message->spi */
 		message->spi = spi;
 

From 75e308ffc4f0d36b895f1110ece8b77d4116fdb1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 14 Jul 2023 12:17:48 +0300
Subject: [PATCH 0163/2047] spi: Use struct_size() helper

The Documentation/process/deprecated.rst suggests to use flexible array
members to provide a way to declare having a dynamically sized set of
trailing elements in a structure.This makes code robust agains bunch of
the issues described in the documentation, main of which is about the
correctness of the sizeof() calculation for this data structure.

Due to above, prefer struct_size() over open-coded versions.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230714091748.89681-5-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 04daf61dfd3f..7f8b478fdeb3 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -13,6 +13,7 @@
 #include <linux/gpio/consumer.h>
 #include <linux/kthread.h>
 #include <linux/mod_devicetable.h>
+#include <linux/overflow.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <linux/u64_stats_sync.h>
@@ -1085,6 +1086,8 @@ struct spi_transfer {
  * @state: for use by whichever driver currently owns the message
  * @resources: for resource management when the SPI message is processed
  * @prepared: spi_prepare_message was called for the this message
+ * @t: for use with spi_message_alloc() when message and transfers have
+ *	been allocated together
  *
  * A @spi_message is used to execute an atomic sequence of data transfers,
  * each represented by a struct spi_transfer.  The sequence is "atomic"
@@ -1139,6 +1142,9 @@ struct spi_message {
 
 	/* List of spi_res resources when the SPI message is processed */
 	struct list_head        resources;
+
+	/* For embedding transfers into the memory of the message */
+	struct spi_transfer	t[];
 };
 
 static inline void spi_message_init_no_memset(struct spi_message *m)
@@ -1199,16 +1205,13 @@ static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags
 {
 	struct spi_message *m;
 
-	m = kzalloc(sizeof(struct spi_message)
-			+ ntrans * sizeof(struct spi_transfer),
-			flags);
+	m = kzalloc(struct_size(m, t, ntrans), flags);
 	if (m) {
 		unsigned i;
-		struct spi_transfer *t = (struct spi_transfer *)(m + 1);
 
 		spi_message_init_no_memset(m);
-		for (i = 0; i < ntrans; i++, t++)
-			spi_message_add_tail(t, m);
+		for (i = 0; i < ntrans; i++)
+			spi_message_add_tail(&m->t[i], m);
 	}
 	return m;
 }

From 0d5a4f8f775ff990142cdc810a84eae078589d27 Mon Sep 17 00:00:00 2001
From: Wang Ming <machel@vivo.com>
Date: Thu, 13 Jul 2023 20:05:42 +0800
Subject: [PATCH 0164/2047] fs: Fix error checking for d_hash_and_lookup()

The d_hash_and_lookup() function returns error pointers or NULL.
Most incorrect error checks were fixed, but the one in int path_pts()
was forgotten.

Fixes: eedf265aa003 ("devpts: Make each mount of devpts an independent filesystem.")
Signed-off-by: Wang Ming <machel@vivo.com>
Message-Id: <20230713120555.7025-1-machel@vivo.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/namei.c b/fs/namei.c
index e56ff39a79bc..2bae29ea52ff 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2890,7 +2890,7 @@ int path_pts(struct path *path)
 	dput(path->dentry);
 	path->dentry = parent;
 	child = d_hash_and_lookup(parent, &this);
-	if (!child)
+	if (IS_ERR_OR_NULL(child))
 		return -ENOENT;
 
 	path->dentry = child;

From 0a0e2ea642f6e4a7fe4953f8f44a310d190c39ba Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 10 Jul 2023 17:03:29 +0300
Subject: [PATCH 0165/2047] ACPI: processor: Move MWAIT quirk out of
 acpi_processor.c

Commit 2a2a64714d9c ("ACPI: Disable MWAIT via DMI on broken Compal board")
introduced a workaround for MWAIT for a specific x86 system.

Move the code outside of acpi_processor.c to acpi/x86/ directory for
consistency and rename the functions associated with it, so their names
start with "acpi_proc_quirk_" to make the goal obvious.

No intentional functional impact.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits, two function names changed ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/internal.h      |  2 ++
 drivers/acpi/processor_pdc.c | 29 +----------------------------
 drivers/acpi/x86/utils.c     | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index f4148dc50b9c..a5b7ebfd99be 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -153,6 +153,8 @@ int acpi_wakeup_device_init(void);
    -------------------------------------------------------------------------- */
 #ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC
 void acpi_early_processor_set_pdc(void);
+
+void acpi_proc_quirk_mwait_check(void);
 #else
 static inline void acpi_early_processor_set_pdc(void) {}
 #endif
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index 18fb04523f93..06583cf37365 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -174,36 +174,9 @@ early_init_pdc(acpi_handle handle, u32 lvl, void *context, void **rv)
 	return AE_OK;
 }
 
-static int __init set_no_mwait(const struct dmi_system_id *id)
-{
-	pr_notice("%s detected - disabling mwait for CPU C-states\n",
-		  id->ident);
-	boot_option_idle_override = IDLE_NOMWAIT;
-	return 0;
-}
-
-static const struct dmi_system_id processor_idle_dmi_table[] __initconst = {
-	{
-	set_no_mwait, "Extensa 5220", {
-	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-	DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
-	DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
-	DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL},
-	{},
-};
-
-static void __init processor_dmi_check(void)
-{
-	/*
-	 * Check whether the system is DMI table. If yes, OSPM
-	 * should not use mwait for CPU-states.
-	 */
-	dmi_check_system(processor_idle_dmi_table);
-}
-
 void __init acpi_early_processor_set_pdc(void)
 {
-	processor_dmi_check();
+	acpi_proc_quirk_mwait_check();
 
 	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
 			    ACPI_UINT32_MAX,
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index c2b925f8cd4e..63d834dd3811 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -518,3 +518,38 @@ bool acpi_quirk_skip_acpi_ac_and_battery(void)
 	return false;
 }
 EXPORT_SYMBOL_GPL(acpi_quirk_skip_acpi_ac_and_battery);
+
+/* This section provides a workaround for a specific x86 system
+ * which requires disabling of mwait to work correctly.
+ */
+static int __init acpi_proc_quirk_set_no_mwait(const struct dmi_system_id *id)
+{
+	pr_notice("%s detected - disabling mwait for CPU C-states\n",
+		  id->ident);
+	boot_option_idle_override = IDLE_NOMWAIT;
+	return 0;
+}
+
+static const struct dmi_system_id acpi_proc_quirk_mwait_dmi_table[] __initconst = {
+	{
+		.callback = acpi_proc_quirk_set_no_mwait,
+		.ident = "Extensa 5220",
+		.matches =  {
+			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
+			DMI_MATCH(DMI_BOARD_NAME, "Columbia"),
+		},
+		.driver_data = NULL,
+	},
+	{}
+};
+
+void __init acpi_proc_quirk_mwait_check(void)
+{
+	/*
+	 * Check whether the system is DMI table. If yes, OSPM
+	 * should not use mwait for CPU-states.
+	 */
+	dmi_check_system(acpi_proc_quirk_mwait_dmi_table);
+}

From 3272a4aadc4c91f1f9518cf6638e7e81f9ac0336 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 10 Jul 2023 17:03:30 +0300
Subject: [PATCH 0166/2047] ACPI: processor: Move
 processor_physically_present() to acpi_processor.c

Since _PDC method is deprecated and a preferred method of communicating
OSPM processor power management capabilities is _OSC, there is a need to
move function checking whether processor is present as this logic is not
_PDC specific.

Move processor_physically_present() to acpi_processor.c.

No intentional functional impact.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_processor.c | 52 ++++++++++++++++++++++++++++++++++-
 drivers/acpi/internal.h       |  1 +
 drivers/acpi/processor_pdc.c  | 49 ---------------------------------
 3 files changed, 52 insertions(+), 50 deletions(-)

diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index f9aa02cac6d1..ebb4efd3d0aa 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -12,6 +12,7 @@
 
 #include <linux/acpi.h>
 #include <linux/device.h>
+#include <linux/dmi.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -21,6 +22,8 @@
 
 #include <asm/cpu.h>
 
+#include <xen/xen.h>
+
 #include "internal.h"
 
 DEFINE_PER_CPU(struct acpi_processor *, processors);
@@ -508,7 +511,54 @@ static void acpi_processor_remove(struct acpi_device *device)
 }
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
-#ifdef CONFIG_X86
+#ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC
+bool __init processor_physically_present(acpi_handle handle)
+{
+	int cpuid, type;
+	u32 acpi_id;
+	acpi_status status;
+	acpi_object_type acpi_type;
+	unsigned long long tmp;
+	union acpi_object object = {};
+	struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
+
+	status = acpi_get_type(handle, &acpi_type);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	switch (acpi_type) {
+	case ACPI_TYPE_PROCESSOR:
+		status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+		if (ACPI_FAILURE(status))
+			return false;
+		acpi_id = object.processor.proc_id;
+		break;
+	case ACPI_TYPE_DEVICE:
+		status = acpi_evaluate_integer(handle, METHOD_NAME__UID,
+					       NULL, &tmp);
+		if (ACPI_FAILURE(status))
+			return false;
+		acpi_id = tmp;
+		break;
+	default:
+		return false;
+	}
+
+	if (xen_initial_domain())
+		/*
+		 * When running as a Xen dom0 the number of processors Linux
+		 * sees can be different from the real number of processors on
+		 * the system, and we still need to execute _PDC or _OSC for
+		 * all of them.
+		 */
+		return xen_processor_present(acpi_id);
+
+	type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
+	cpuid = acpi_get_cpuid(handle, type, acpi_id);
+
+	return !invalid_logical_cpuid(cpuid);
+}
+
 static bool acpi_hwp_native_thermal_lvt_set;
 static acpi_status __init acpi_hwp_native_thermal_lvt_osc(acpi_handle handle,
 							  u32 lvl,
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index a5b7ebfd99be..ed2599ae3e83 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -155,6 +155,7 @@ int acpi_wakeup_device_init(void);
 void acpi_early_processor_set_pdc(void);
 
 void acpi_proc_quirk_mwait_check(void);
+bool processor_physically_present(acpi_handle handle);
 #else
 static inline void acpi_early_processor_set_pdc(void) {}
 #endif
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index 06583cf37365..4c5becd65737 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -9,61 +9,12 @@
 
 #define pr_fmt(fmt) "ACPI: " fmt
 
-#include <linux/dmi.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <acpi/processor.h>
 
-#include <xen/xen.h>
-
 #include "internal.h"
 
-static bool __init processor_physically_present(acpi_handle handle)
-{
-	int cpuid, type;
-	u32 acpi_id;
-	acpi_status status;
-	acpi_object_type acpi_type;
-	unsigned long long tmp;
-	union acpi_object object = { 0 };
-	struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
-
-	status = acpi_get_type(handle, &acpi_type);
-	if (ACPI_FAILURE(status))
-		return false;
-
-	switch (acpi_type) {
-	case ACPI_TYPE_PROCESSOR:
-		status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
-		if (ACPI_FAILURE(status))
-			return false;
-		acpi_id = object.processor.proc_id;
-		break;
-	case ACPI_TYPE_DEVICE:
-		status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
-		if (ACPI_FAILURE(status))
-			return false;
-		acpi_id = tmp;
-		break;
-	default:
-		return false;
-	}
-
-	if (xen_initial_domain())
-		/*
-		 * When running as a Xen dom0 the number of processors Linux
-		 * sees can be different from the real number of processors on
-		 * the system, and we still need to execute _PDC for all of
-		 * them.
-		 */
-		return xen_processor_present(acpi_id);
-
-	type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
-	cpuid = acpi_get_cpuid(handle, type, acpi_id);
-
-	return !invalid_logical_cpuid(cpuid);
-}
-
 static void acpi_set_pdc_bits(u32 *buf)
 {
 	buf[0] = ACPI_PDC_REVISION_ID;

From c5e4d05db8602acebb532a393f10fb4cc4761467 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 10 Jul 2023 17:03:31 +0300
Subject: [PATCH 0167/2047] ACPI: processor: Refactor arch_acpi_set_pdc_bits()

The capabilities buffer modified by the arch_acpi_set_pdc_bits() is not
_PDC-specific, as it can be used by _OSC too.

Change the name of that function to better reflect its independence from
_PDC and make it take the capabilities buffer address as the argument
directly, without any offset, as _OSC and _PDC use different capabilities
buffer offsets.

No intentional functional impact.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/ia64/include/asm/acpi.h |  4 ++--
 arch/x86/include/asm/acpi.h  | 10 +++++-----
 drivers/acpi/processor_pdc.c |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index 87927eb824cc..43797cb44383 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -69,9 +69,9 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
 #endif
 
 static inline bool arch_has_acpi_pdc(void) { return true; }
-static inline void arch_acpi_set_pdc_bits(u32 *buf)
+static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
 {
-	buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
+	*cap |= ACPI_PDC_EST_CAPABILITY_SMP;
 }
 
 #ifdef CONFIG_ACPI_NUMA
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 8eb74cf386db..6a498d1781e7 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -100,23 +100,23 @@ static inline bool arch_has_acpi_pdc(void)
 		c->x86_vendor == X86_VENDOR_CENTAUR);
 }
 
-static inline void arch_acpi_set_pdc_bits(u32 *buf)
+static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
 {
 	struct cpuinfo_x86 *c = &cpu_data(0);
 
-	buf[2] |= ACPI_PDC_C_CAPABILITY_SMP;
+	*cap |= ACPI_PDC_C_CAPABILITY_SMP;
 
 	if (cpu_has(c, X86_FEATURE_EST))
-		buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
+		*cap |= ACPI_PDC_EST_CAPABILITY_SWSMP;
 
 	if (cpu_has(c, X86_FEATURE_ACPI))
-		buf[2] |= ACPI_PDC_T_FFH;
+		*cap |= ACPI_PDC_T_FFH;
 
 	/*
 	 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
 	 */
 	if (!cpu_has(c, X86_FEATURE_MWAIT))
-		buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
+		*cap &= ~(ACPI_PDC_C_C2C3_FFH);
 }
 
 static inline bool acpi_has_cpu_in_madt(void)
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index 4c5becd65737..9e7b5ddb9ec2 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -24,7 +24,7 @@ static void acpi_set_pdc_bits(u32 *buf)
 	buf[2] = ACPI_PDC_SMP_T_SWCOORD;
 
 	/* Twiddle arch-specific bits needed for _PDC */
-	arch_acpi_set_pdc_bits(buf);
+	arch_acpi_set_proc_cap_bits(&buf[2]);
 }
 
 static struct acpi_object_list *acpi_processor_alloc_pdc(void)

From c9e6c5e64f0c00612dc429bc401da0ea673e5130 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 10 Jul 2023 17:03:32 +0300
Subject: [PATCH 0168/2047] ACPI: processor: Rename ACPI_PDC symbols

The prefix in the names of the ACPI_PDC symbols suggests that they are
only relevant for _PDC, but in fact they can also be used in the _OSC.

Change that prefix to a more generic ACPI_PROC_CAP that will better
reflect the purpose of those symbols as they represent bits in a general
processor capabilities buffer.

Rename pdc_intel.h to proc_cap_intel.h to follow the change of the
symbol name prefix.

No intentional functional impact.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/ia64/include/asm/acpi.h  |  4 ++--
 arch/x86/include/asm/acpi.h   | 11 +++++-----
 arch/x86/xen/enlighten_pv.c   |  8 +++----
 drivers/acpi/processor_pdc.c  |  2 +-
 include/acpi/pdc_intel.h      | 36 --------------------------------
 include/acpi/proc_cap_intel.h | 39 +++++++++++++++++++++++++++++++++++
 6 files changed, 52 insertions(+), 48 deletions(-)
 delete mode 100644 include/acpi/pdc_intel.h
 create mode 100644 include/acpi/proc_cap_intel.h

diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index 43797cb44383..58500a964238 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -11,7 +11,7 @@
 
 #ifdef __KERNEL__
 
-#include <acpi/pdc_intel.h>
+#include <acpi/proc_cap_intel.h>
 
 #include <linux/init.h>
 #include <linux/numa.h>
@@ -71,7 +71,7 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
 static inline bool arch_has_acpi_pdc(void) { return true; }
 static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
 {
-	*cap |= ACPI_PDC_EST_CAPABILITY_SMP;
+	*cap |= ACPI_PROC_CAP_EST_CAPABILITY_SMP;
 }
 
 #ifdef CONFIG_ACPI_NUMA
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 6a498d1781e7..ce5ad6a496e6 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -6,7 +6,7 @@
  *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
  */
-#include <acpi/pdc_intel.h>
+#include <acpi/proc_cap_intel.h>
 
 #include <asm/numa.h>
 #include <asm/fixmap.h>
@@ -104,19 +104,20 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
 {
 	struct cpuinfo_x86 *c = &cpu_data(0);
 
-	*cap |= ACPI_PDC_C_CAPABILITY_SMP;
+	*cap |= ACPI_PROC_CAP_C_CAPABILITY_SMP;
 
 	if (cpu_has(c, X86_FEATURE_EST))
-		*cap |= ACPI_PDC_EST_CAPABILITY_SWSMP;
+		*cap |= ACPI_PROC_CAP_EST_CAPABILITY_SWSMP;
 
 	if (cpu_has(c, X86_FEATURE_ACPI))
-		*cap |= ACPI_PDC_T_FFH;
+		*cap |= ACPI_PROC_CAP_T_FFH;
 
 	/*
 	 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
 	 */
 	if (!cpu_has(c, X86_FEATURE_MWAIT))
-		*cap &= ~(ACPI_PDC_C_C2C3_FFH);
+		*cap &= ~(ACPI_PROC_CAP_C_C2C3_FFH);
+
 }
 
 static inline bool acpi_has_cpu_in_madt(void)
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 93b658248d01..a3864e2167a8 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -79,7 +79,7 @@
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
 #include <asm/acpi.h>
-#include <acpi/pdc_intel.h>
+#include <acpi/proc_cap_intel.h>
 #include <acpi/processor.h>
 #include <xen/interface/platform.h>
 #endif
@@ -288,17 +288,17 @@ static bool __init xen_check_mwait(void)
 
 	native_cpuid(&ax, &bx, &cx, &dx);
 
-	/* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
+	/* Ask the Hypervisor whether to clear ACPI_PROC_CAP_C_C2C3_FFH. If so,
 	 * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
 	 */
 	buf[0] = ACPI_PDC_REVISION_ID;
 	buf[1] = 1;
-	buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
+	buf[2] = (ACPI_PROC_CAP_C_CAPABILITY_SMP | ACPI_PROC_CAP_EST_CAPABILITY_SWSMP);
 
 	set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
 
 	if ((HYPERVISOR_platform_op(&op) == 0) &&
-	    (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
+	    (buf[2] & (ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH))) {
 		cpuid_leaf5_ecx_val = cx;
 		cpuid_leaf5_edx_val = dx;
 	}
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index 9e7b5ddb9ec2..7948b5d41907 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -21,7 +21,7 @@ static void acpi_set_pdc_bits(u32 *buf)
 	buf[1] = 1;
 
 	/* Enable coordination with firmware's _TSD info */
-	buf[2] = ACPI_PDC_SMP_T_SWCOORD;
+	buf[2] = ACPI_PROC_CAP_SMP_T_SWCOORD;
 
 	/* Twiddle arch-specific bits needed for _PDC */
 	arch_acpi_set_proc_cap_bits(&buf[2]);
diff --git a/include/acpi/pdc_intel.h b/include/acpi/pdc_intel.h
deleted file mode 100644
index 967c552d1cd3..000000000000
--- a/include/acpi/pdc_intel.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/* _PDC bit definition for Intel processors */
-
-#ifndef __PDC_INTEL_H__
-#define __PDC_INTEL_H__
-
-#define ACPI_PDC_P_FFH			(0x0001)
-#define ACPI_PDC_C_C1_HALT		(0x0002)
-#define ACPI_PDC_T_FFH			(0x0004)
-#define ACPI_PDC_SMP_C1PT		(0x0008)
-#define ACPI_PDC_SMP_C2C3		(0x0010)
-#define ACPI_PDC_SMP_P_SWCOORD		(0x0020)
-#define ACPI_PDC_SMP_C_SWCOORD		(0x0040)
-#define ACPI_PDC_SMP_T_SWCOORD		(0x0080)
-#define ACPI_PDC_C_C1_FFH		(0x0100)
-#define ACPI_PDC_C_C2C3_FFH		(0x0200)
-#define ACPI_PDC_SMP_P_HWCOORD		(0x0800)
-
-#define ACPI_PDC_EST_CAPABILITY_SMP	(ACPI_PDC_SMP_C1PT | \
-					 ACPI_PDC_C_C1_HALT | \
-					 ACPI_PDC_P_FFH)
-
-#define ACPI_PDC_EST_CAPABILITY_SWSMP	(ACPI_PDC_SMP_C1PT | \
-					 ACPI_PDC_C_C1_HALT | \
-					 ACPI_PDC_SMP_P_SWCOORD | \
-					 ACPI_PDC_SMP_P_HWCOORD | \
-					 ACPI_PDC_P_FFH)
-
-#define ACPI_PDC_C_CAPABILITY_SMP	(ACPI_PDC_SMP_C2C3  | \
-					 ACPI_PDC_SMP_C1PT  | \
-					 ACPI_PDC_C_C1_HALT | \
-					 ACPI_PDC_C_C1_FFH  | \
-					 ACPI_PDC_C_C2C3_FFH)
-
-#endif				/* __PDC_INTEL_H__ */
diff --git a/include/acpi/proc_cap_intel.h b/include/acpi/proc_cap_intel.h
new file mode 100644
index 000000000000..57e5e2628abb
--- /dev/null
+++ b/include/acpi/proc_cap_intel.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Vendor specific processor capabilities bit definition
+ * for Intel processors. Those bits are used to convey OSPM
+ * power management capabilities to the platform.
+ */
+
+#ifndef __PROC_CAP_INTEL_H__
+#define __PROC_CAP_INTEL_H__
+
+#define ACPI_PROC_CAP_P_FFH			(0x0001)
+#define ACPI_PROC_CAP_C_C1_HALT			(0x0002)
+#define ACPI_PROC_CAP_T_FFH			(0x0004)
+#define ACPI_PROC_CAP_SMP_C1PT			(0x0008)
+#define ACPI_PROC_CAP_SMP_C2C3			(0x0010)
+#define ACPI_PROC_CAP_SMP_P_SWCOORD		(0x0020)
+#define ACPI_PROC_CAP_SMP_C_SWCOORD		(0x0040)
+#define ACPI_PROC_CAP_SMP_T_SWCOORD		(0x0080)
+#define ACPI_PROC_CAP_C_C1_FFH			(0x0100)
+#define ACPI_PROC_CAP_C_C2C3_FFH		(0x0200)
+#define ACPI_PROC_CAP_SMP_P_HWCOORD		(0x0800)
+
+#define ACPI_PROC_CAP_EST_CAPABILITY_SMP	(ACPI_PROC_CAP_SMP_C1PT | \
+						 ACPI_PROC_CAP_C_C1_HALT | \
+						 ACPI_PROC_CAP_P_FFH)
+
+#define ACPI_PROC_CAP_EST_CAPABILITY_SWSMP	(ACPI_PROC_CAP_SMP_C1PT | \
+						 ACPI_PROC_CAP_C_C1_HALT | \
+						 ACPI_PROC_CAP_SMP_P_SWCOORD | \
+						 ACPI_PROC_CAP_SMP_P_HWCOORD | \
+						 ACPI_PROC_CAP_P_FFH)
+
+#define ACPI_PROC_CAP_C_CAPABILITY_SMP		(ACPI_PROC_CAP_SMP_C2C3  | \
+						 ACPI_PROC_CAP_SMP_C1PT  | \
+						 ACPI_PROC_CAP_C_C1_HALT | \
+						 ACPI_PROC_CAP_C_C1_FFH  | \
+						 ACPI_PROC_CAP_C_C2C3_FFH)
+
+#endif /* __PROC_CAP_INTEL_H__ */

From 4f37ab5e05df9a30d64663266c7f4ba3a1407c68 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 10 Jul 2023 17:03:33 +0300
Subject: [PATCH 0169/2047] ACPI: processor: Clear C_C2C3_FFH and C_C1_FFH in
 arch_acpi_set_proc_cap_bits()

Currently arch_acpi_set_proc_cap_bits() clears ACPI_PDC_C_C2C3_FFH bit in
case MWAIT instruction is not supported.

It should also clear ACPI_PDC_C_C1_FFH, as when MWAIT is not supported,
C1 is entered by executing the HLT instruction.

Quote from the C_C1_FFH description:

"If set, OSPM is capable of performing native C State instructions (beyond
halt) for the C1 handler in multi-processor configurations". As without
MWAIT there is no native C-state instructions beyond HALT, this bit
should be toggled off."

Clear ACPI_PDC_C_C1_FFH and ACPI_PDC_C_C2C3_FFH in
arch_acpi_set_proc_cap_bits() in case MWAIT is not supported or
overridden.

Remove setting those bits from the processor_pdc.c code.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/asm/acpi.h  |  9 +++++----
 drivers/acpi/processor_pdc.c | 14 --------------
 2 files changed, 5 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index ce5ad6a496e6..d615238bcd78 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -113,11 +113,12 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
 		*cap |= ACPI_PROC_CAP_T_FFH;
 
 	/*
-	 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
+	 * If mwait/monitor is unsupported, C_C1_FFH and
+	 * C2/C3_FFH will be disabled.
 	 */
-	if (!cpu_has(c, X86_FEATURE_MWAIT))
-		*cap &= ~(ACPI_PROC_CAP_C_C2C3_FFH);
-
+	if (!cpu_has(c, X86_FEATURE_MWAIT) ||
+	    boot_option_idle_override == IDLE_NOMWAIT)
+		*cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH);
 }
 
 static inline bool acpi_has_cpu_in_madt(void)
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index 7948b5d41907..3c6035262dc2 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -74,20 +74,6 @@ acpi_processor_eval_pdc(acpi_handle handle, struct acpi_object_list *pdc_in)
 {
 	acpi_status status = AE_OK;
 
-	if (boot_option_idle_override == IDLE_NOMWAIT) {
-		/*
-		 * If mwait is disabled for CPU C-states, the C2C3_FFH access
-		 * mode will be disabled in the parameter of _PDC object.
-		 * Of course C1_FFH access mode will also be disabled.
-		 */
-		union acpi_object *obj;
-		u32 *buffer = NULL;
-
-		obj = pdc_in->pointer;
-		buffer = (u32 *)(obj->buffer.pointer);
-		buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
-
-	}
 	status = acpi_evaluate_object(handle, "_PDC", pdc_in, NULL);
 
 	if (ACPI_FAILURE(status))

From b9e8d0168a7a85e05d031130b247099018e2194e Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 10 Jul 2023 17:03:34 +0300
Subject: [PATCH 0170/2047] ACPI: processor: Set CAP_SMP_T_SWCOORD in
 arch_acpi_set_proc_cap_bits()

Currently, ACPI_PROC_CAP_SMP_T_SWCOORD is set in acpi_set_pdc_bits(), but
it is not _PDC-specific. It should be set along with the other processor
capability bits.

Move the setting of ACPI_PROC_CAP_SMP_T_SWCOORD to
arch_acpi_set_proc_cap_bits().

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/asm/acpi.h  | 3 +++
 drivers/acpi/processor_pdc.c | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index d615238bcd78..6f6752a2ea36 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -106,6 +106,9 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
 
 	*cap |= ACPI_PROC_CAP_C_CAPABILITY_SMP;
 
+	/* Enable coordination with firmware's _TSD info */
+	*cap |= ACPI_PROC_CAP_SMP_T_SWCOORD;
+
 	if (cpu_has(c, X86_FEATURE_EST))
 		*cap |= ACPI_PROC_CAP_EST_CAPABILITY_SWSMP;
 
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index 3c6035262dc2..1a8591e9a9bf 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -20,9 +20,6 @@ static void acpi_set_pdc_bits(u32 *buf)
 	buf[0] = ACPI_PDC_REVISION_ID;
 	buf[1] = 1;
 
-	/* Enable coordination with firmware's _TSD info */
-	buf[2] = ACPI_PROC_CAP_SMP_T_SWCOORD;
-
 	/* Twiddle arch-specific bits needed for _PDC */
 	arch_acpi_set_proc_cap_bits(&buf[2]);
 }

From 5ba30be7fd6e3fdff1e0b01f24173a5e6a3183b2 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 10 Jul 2023 17:03:35 +0300
Subject: [PATCH 0171/2047] ACPI: processor: Introduce acpi_processor_osc()

The processor _OSC method is already used for a workaround introduced
in commit a21211672c9a ("ACPI / processor: Request native thermal
interrupt handling via _OSC"), but in accordance with ACPI 6.5 (and
earlier), it should be used for negotiating all of the processor
capabilities instead of _PDC (which has been deprecated since ACPI 3.0
and got removed from ACPI 6.5 entirely).

Create a new callback function called acpi_processor_osc() to be invoked
for every processor object and processor device in the ACPI namespace, in
analogy with the already existing acpi_hwp_native_thermal_lvt_osc().

Make this function implement the workaround mentioned above and convey
all of the OSPM processor support information to the platform firmware
by setting all of the appropriate processor capabilities bits before
evaluating _OSC for the given processor. For this purpose, make it
call arch_acpi_set_proc_cap_bits() and modify the latter to set
ACPI_PROC_CAP_COLLAB_PROC_PERF along with the other processor
capabilities bits.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
[ rjw: Subject and changelog edits, whitespace fixup ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/asm/acpi.h   |  3 +++
 drivers/acpi/acpi_processor.c | 30 +++++++++++++++++++++++++++++-
 include/acpi/proc_cap_intel.h |  1 +
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 6f6752a2ea36..6c3af9486153 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -115,6 +115,9 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
 	if (cpu_has(c, X86_FEATURE_ACPI))
 		*cap |= ACPI_PROC_CAP_T_FFH;
 
+	if (cpu_has(c, X86_FEATURE_HWP))
+		*cap |= ACPI_PROC_CAP_COLLAB_PROC_PERF;
+
 	/*
 	 * If mwait/monitor is unsupported, C_C1_FFH and
 	 * C2/C3_FFH will be disabled.
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index ebb4efd3d0aa..163bd3d52acd 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -559,13 +559,41 @@ bool __init processor_physically_present(acpi_handle handle)
 	return !invalid_logical_cpuid(cpuid);
 }
 
+/* vendor specific UUID indicating an Intel platform */
+static u8 sb_uuid_str[] = "4077A616-290C-47BE-9EBD-D87058713953";
 static bool acpi_hwp_native_thermal_lvt_set;
+
+static acpi_status __init acpi_processor_osc(acpi_handle handle, u32 lvl,
+					     void *context, void **rv)
+{
+	u32 capbuf[2] = {};
+	struct acpi_osc_context osc_context = {
+		.uuid_str = sb_uuid_str,
+		.rev = 1,
+		.cap.length = 8,
+		.cap.pointer = capbuf,
+	};
+	acpi_status status;
+
+	if (!processor_physically_present(handle))
+		return AE_OK;
+
+	arch_acpi_set_proc_cap_bits(&capbuf[OSC_SUPPORT_DWORD]);
+
+	status = acpi_run_osc(handle, &osc_context);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	kfree(osc_context.ret.pointer);
+
+	return AE_OK;
+}
+
 static acpi_status __init acpi_hwp_native_thermal_lvt_osc(acpi_handle handle,
 							  u32 lvl,
 							  void *context,
 							  void **rv)
 {
-	u8 sb_uuid_str[] = "4077A616-290C-47BE-9EBD-D87058713953";
 	u32 capbuf[2];
 	struct acpi_osc_context osc_context = {
 		.uuid_str = sb_uuid_str,
diff --git a/include/acpi/proc_cap_intel.h b/include/acpi/proc_cap_intel.h
index 57e5e2628abb..ddcdc41d6c3e 100644
--- a/include/acpi/proc_cap_intel.h
+++ b/include/acpi/proc_cap_intel.h
@@ -19,6 +19,7 @@
 #define ACPI_PROC_CAP_C_C1_FFH			(0x0100)
 #define ACPI_PROC_CAP_C_C2C3_FFH		(0x0200)
 #define ACPI_PROC_CAP_SMP_P_HWCOORD		(0x0800)
+#define ACPI_PROC_CAP_COLLAB_PROC_PERF		(0x1000)
 
 #define ACPI_PROC_CAP_EST_CAPABILITY_SMP	(ACPI_PROC_CAP_SMP_C1PT | \
 						 ACPI_PROC_CAP_C_C1_HALT | \

From c542ce36a9f0bbf7c3a607ee31f1c39fda280782 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 3 Jul 2023 11:02:44 +0300
Subject: [PATCH 0172/2047] ACPI: bus: Introduce wrappers for ACPICA notify
 handler install/remove

Introduce new functions acpi_dev_install_notify_handler() and
acpi_dev_remove_notify_handler(), to install and remove, respectively,
a handler for AML Notify() operations targeted at a given ACPI device
object. They will allow drivers to install Notify() handlers directly
instead of providing an ACPI driver .notify() callback to be invoked
in the context of a Notify() handler installed by the ACPI bus type
code. In particular, this will help platform drivers to provide
Notify() handlers for the ACPI companions of the platform devices
they bind to.

These functions are replacements for acpi_device_install_notify_handler()
and acpi_device_remove_notify_handler(), respectively, and after all
drivers switch over to using them, the old ones will be dropped.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits, whitespace adjustments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c      | 24 ++++++++++++++++++++++++
 include/acpi/acpi_bus.h |  6 ++++++
 2 files changed, 30 insertions(+)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 2fc2b43a4ed3..b5688839be1b 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -554,6 +554,30 @@ static void acpi_device_remove_notify_handler(struct acpi_device *device,
 	acpi_os_wait_events_complete();
 }
 
+int acpi_dev_install_notify_handler(struct acpi_device *adev,
+				    u32 handler_type,
+				    acpi_notify_handler handler)
+{
+	acpi_status status;
+
+	status = acpi_install_notify_handler(adev->handle, handler_type,
+					     handler, adev);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_install_notify_handler);
+
+void acpi_dev_remove_notify_handler(struct acpi_device *adev,
+				    u32 handler_type,
+				    acpi_notify_handler handler)
+{
+	acpi_remove_notify_handler(adev->handle, handler_type, handler);
+	acpi_os_wait_events_complete();
+}
+EXPORT_SYMBOL_GPL(acpi_dev_remove_notify_handler);
+
 /* Handle events targeting \_SB device (at present only graceful shutdown) */
 
 #define ACPI_SB_NOTIFY_SHUTDOWN_REQUEST 0x81
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c941d99162c0..23fbe4a16972 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -515,6 +515,12 @@ void acpi_bus_private_data_handler(acpi_handle, void *);
 int acpi_bus_get_private_data(acpi_handle, void **);
 int acpi_bus_attach_private_data(acpi_handle, void *);
 void acpi_bus_detach_private_data(acpi_handle);
+int acpi_dev_install_notify_handler(struct acpi_device *adev,
+				    u32 handler_type,
+				    acpi_notify_handler handler);
+void acpi_dev_remove_notify_handler(struct acpi_device *adev,
+				    u32 handler_type,
+				    acpi_notify_handler handler);
 extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32);
 extern int register_acpi_notifier(struct notifier_block *);
 extern int unregister_acpi_notifier(struct notifier_block *);

From 0d16710146a10cf62b3efddee8ffd006432d5d7e Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 3 Jul 2023 11:02:45 +0300
Subject: [PATCH 0173/2047] ACPI: bus: Set driver_data to NULL every time
 .add() fails

Most ACPI drivers set driver_data in their .add() callbacks, but usually
they don't clear it in the error code path.

Set driver_data to NULL in acpi_device_probe() to prevent stale pointers
from staying around.

Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index b5688839be1b..0168b3a556b6 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1029,8 +1029,10 @@ static int acpi_device_probe(struct device *dev)
 		return -ENOSYS;
 
 	ret = acpi_drv->ops.add(acpi_dev);
-	if (ret)
+	if (ret) {
+		acpi_dev->driver_data = NULL;
 		return ret;
+	}
 
 	pr_debug("Driver [%s] successfully bound to device [%s]\n",
 		 acpi_drv->name, acpi_dev->pnp.bus_id);

From 543a2d115ae63dd4c6d931d714fd5b5d3c362f55 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 3 Jul 2023 11:02:46 +0300
Subject: [PATCH 0174/2047] ACPI: AC: Install Notify() handler directly

Modify the ACPI AC driver to install its own Notify() handler directly
instead of providing an ACPI driver .notify() callback.

This will allow the ACPI driver .notify() callback to be eliminated and
it will allow the AC driver to be switched over to a platform one in the
future.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits, whitespace adjustments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ac.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 1ace70b831cd..225dc6818751 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -34,7 +34,7 @@ MODULE_LICENSE("GPL");
 
 static int acpi_ac_add(struct acpi_device *device);
 static void acpi_ac_remove(struct acpi_device *device);
-static void acpi_ac_notify(struct acpi_device *device, u32 event);
+static void acpi_ac_notify(acpi_handle handle, u32 event, void *data);
 
 static const struct acpi_device_id ac_device_ids[] = {
 	{"ACPI0003", 0},
@@ -54,11 +54,9 @@ static struct acpi_driver acpi_ac_driver = {
 	.name = "ac",
 	.class = ACPI_AC_CLASS,
 	.ids = ac_device_ids,
-	.flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
 	.ops = {
 		.add = acpi_ac_add,
 		.remove = acpi_ac_remove,
-		.notify = acpi_ac_notify,
 		},
 	.drv.pm = &acpi_ac_pm,
 };
@@ -128,8 +126,9 @@ static enum power_supply_property ac_props[] = {
 };
 
 /* Driver Model */
-static void acpi_ac_notify(struct acpi_device *device, u32 event)
+static void acpi_ac_notify(acpi_handle handle, u32 event, void *data)
 {
+	struct acpi_device *device = data;
 	struct acpi_ac *ac = acpi_driver_data(device);
 
 	if (!ac)
@@ -235,7 +234,7 @@ static int acpi_ac_add(struct acpi_device *device)
 
 	result = acpi_ac_get_state(ac);
 	if (result)
-		goto end;
+		goto err_release_ac;
 
 	psy_cfg.drv_data = ac;
 
@@ -248,7 +247,7 @@ static int acpi_ac_add(struct acpi_device *device)
 					    &ac->charger_desc, &psy_cfg);
 	if (IS_ERR(ac->charger)) {
 		result = PTR_ERR(ac->charger);
-		goto end;
+		goto err_release_ac;
 	}
 
 	pr_info("%s [%s] (%s)\n", acpi_device_name(device),
@@ -256,9 +255,19 @@ static int acpi_ac_add(struct acpi_device *device)
 
 	ac->battery_nb.notifier_call = acpi_ac_battery_notify;
 	register_acpi_notifier(&ac->battery_nb);
-end:
+
+	result = acpi_dev_install_notify_handler(device, ACPI_ALL_NOTIFY,
+						 acpi_ac_notify);
 	if (result)
-		kfree(ac);
+		goto err_unregister;
+
+	return 0;
+
+err_unregister:
+	power_supply_unregister(ac->charger);
+	unregister_acpi_notifier(&ac->battery_nb);
+err_release_ac:
+	kfree(ac);
 
 	return result;
 }
@@ -297,6 +306,8 @@ static void acpi_ac_remove(struct acpi_device *device)
 
 	ac = acpi_driver_data(device);
 
+	acpi_dev_remove_notify_handler(device, ACPI_ALL_NOTIFY,
+				       acpi_ac_notify);
 	power_supply_unregister(ac->charger);
 	unregister_acpi_notifier(&ac->battery_nb);
 

From 6f7016819766a55bf90e21fdb8a8532418009adb Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 3 Jul 2023 11:02:47 +0300
Subject: [PATCH 0175/2047] ACPI: video: Install Notify() handler directly

Modify the ACPI video driver to install its own Notify() handler directly
instead of providing an ACPI driver .notify() callback.

This will allow the ACPI driver .notify() callback to be eliminated and
it will allow the ideo driver to be switched over to a platform one in
the future.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits, whitespace adjustments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_video.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 62f4364e4460..6760303def6f 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -77,7 +77,7 @@ static DEFINE_MUTEX(video_list_lock);
 static LIST_HEAD(video_bus_head);
 static int acpi_video_bus_add(struct acpi_device *device);
 static void acpi_video_bus_remove(struct acpi_device *device);
-static void acpi_video_bus_notify(struct acpi_device *device, u32 event);
+static void acpi_video_bus_notify(acpi_handle handle, u32 event, void *data);
 
 /*
  * Indices in the _BCL method response: the first two items are special,
@@ -104,7 +104,6 @@ static struct acpi_driver acpi_video_bus = {
 	.ops = {
 		.add = acpi_video_bus_add,
 		.remove = acpi_video_bus_remove,
-		.notify = acpi_video_bus_notify,
 		},
 };
 
@@ -1527,8 +1526,9 @@ static int acpi_video_bus_stop_devices(struct acpi_video_bus *video)
 				  acpi_osi_is_win8() ? 0 : 1);
 }
 
-static void acpi_video_bus_notify(struct acpi_device *device, u32 event)
+static void acpi_video_bus_notify(acpi_handle handle, u32 event, void *data)
 {
+	struct acpi_device *device = data;
 	struct acpi_video_bus *video = acpi_driver_data(device);
 	struct input_dev *input;
 	int keycode = 0;
@@ -2053,8 +2053,19 @@ static int acpi_video_bus_add(struct acpi_device *device)
 
 	acpi_video_bus_add_notify_handler(video);
 
+	error = acpi_dev_install_notify_handler(device, ACPI_DEVICE_NOTIFY,
+						acpi_video_bus_notify);
+	if (error)
+		goto err_remove;
+
 	return 0;
 
+err_remove:
+	mutex_lock(&video_list_lock);
+	list_del(&video->entry);
+	mutex_unlock(&video_list_lock);
+	acpi_video_bus_remove_notify_handler(video);
+	acpi_video_bus_unregister_backlight(video);
 err_put_video:
 	acpi_video_bus_put_devices(video);
 	kfree(video->attached_array);
@@ -2075,6 +2086,9 @@ static void acpi_video_bus_remove(struct acpi_device *device)
 
 	video = acpi_driver_data(device);
 
+	acpi_dev_remove_notify_handler(device, ACPI_DEVICE_NOTIFY,
+				       acpi_video_bus_notify);
+
 	mutex_lock(&video_list_lock);
 	list_del(&video->entry);
 	mutex_unlock(&video_list_lock);

From 10666251554c576617cbaf043d38c86f31f588a7 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 3 Jul 2023 11:02:48 +0300
Subject: [PATCH 0176/2047] ACPI: battery: Install Notify() handler directly

Modify the ACPI battery driver to install its own Notify() handler
directly instead of providing an ACPI driver .notify() callback.

This will allow the ACPI driver .notify() callback to be eliminated and
it will allow the battery driver to be switched over to a platform one
in the future.

While at it, fix up whitespaces in acpi_battery_remove().

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits, whitespace adjustments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/battery.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 9c67ed02d797..969bf81e8d54 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -1034,8 +1034,9 @@ static void acpi_battery_refresh(struct acpi_battery *battery)
 }
 
 /* Driver Interface */
-static void acpi_battery_notify(struct acpi_device *device, u32 event)
+static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
 {
+	struct acpi_device *device = data;
 	struct acpi_battery *battery = acpi_driver_data(device);
 	struct power_supply *old;
 
@@ -1212,13 +1213,22 @@ static int acpi_battery_add(struct acpi_device *device)
 
 	device_init_wakeup(&device->dev, 1);
 
-	return result;
+	result = acpi_dev_install_notify_handler(device, ACPI_ALL_NOTIFY,
+						 acpi_battery_notify);
+	if (result)
+		goto fail_pm;
 
+	return 0;
+
+fail_pm:
+	device_init_wakeup(&device->dev, 0);
+	unregister_pm_notifier(&battery->pm_nb);
 fail:
 	sysfs_remove_battery(battery);
 	mutex_destroy(&battery->lock);
 	mutex_destroy(&battery->sysfs_lock);
 	kfree(battery);
+
 	return result;
 }
 
@@ -1228,10 +1238,16 @@ static void acpi_battery_remove(struct acpi_device *device)
 
 	if (!device || !acpi_driver_data(device))
 		return;
-	device_init_wakeup(&device->dev, 0);
+
 	battery = acpi_driver_data(device);
+
+	acpi_dev_remove_notify_handler(device, ACPI_ALL_NOTIFY,
+				       acpi_battery_notify);
+
+	device_init_wakeup(&device->dev, 0);
 	unregister_pm_notifier(&battery->pm_nb);
 	sysfs_remove_battery(battery);
+
 	mutex_destroy(&battery->lock);
 	mutex_destroy(&battery->sysfs_lock);
 	kfree(battery);
@@ -1264,11 +1280,9 @@ static struct acpi_driver acpi_battery_driver = {
 	.name = "battery",
 	.class = ACPI_BATTERY_CLASS,
 	.ids = battery_device_ids,
-	.flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
 	.ops = {
 		.add = acpi_battery_add,
 		.remove = acpi_battery_remove,
-		.notify = acpi_battery_notify,
 		},
 	.drv.pm = &acpi_battery_pm,
 };

From 898ff0f005bada644913fc0e463ff9b9ff6d6ff3 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 3 Jul 2023 11:02:49 +0300
Subject: [PATCH 0177/2047] ACPI: HED: Install Notify() handler directly

Modify the ACPI HED driver to install its own Notify() handler directly
instead of providing an ACPI driver .notify() callback.

This will allow the ACPI driver .notify() callback to be eliminated and
it will allow the HED driver to be switched over to a platform one in
the future.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits, whitespace adjustments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/hed.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/hed.c b/drivers/acpi/hed.c
index 78d44e3fe129..46c6f8c35b43 100644
--- a/drivers/acpi/hed.c
+++ b/drivers/acpi/hed.c
@@ -42,22 +42,32 @@ EXPORT_SYMBOL_GPL(unregister_acpi_hed_notifier);
  * it is used by HEST Generic Hardware Error Source with notify type
  * SCI.
  */
-static void acpi_hed_notify(struct acpi_device *device, u32 event)
+static void acpi_hed_notify(acpi_handle handle, u32 event, void *data)
 {
 	blocking_notifier_call_chain(&acpi_hed_notify_list, 0, NULL);
 }
 
 static int acpi_hed_add(struct acpi_device *device)
 {
+	int err;
+
 	/* Only one hardware error device */
 	if (hed_handle)
 		return -EINVAL;
 	hed_handle = device->handle;
-	return 0;
+
+	err = acpi_dev_install_notify_handler(device, ACPI_DEVICE_NOTIFY,
+					      acpi_hed_notify);
+	if (err)
+		hed_handle = NULL;
+
+	return err;
 }
 
 static void acpi_hed_remove(struct acpi_device *device)
 {
+	acpi_dev_remove_notify_handler(device, ACPI_DEVICE_NOTIFY,
+				       acpi_hed_notify);
 	hed_handle = NULL;
 }
 
@@ -68,7 +78,6 @@ static struct acpi_driver acpi_hed_driver = {
 	.ops = {
 		.add = acpi_hed_add,
 		.remove = acpi_hed_remove,
-		.notify = acpi_hed_notify,
 	},
 };
 module_acpi_driver(acpi_hed_driver);

From dcca12ab62a27cc190335161bae39f4593f708ba Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 3 Jul 2023 11:02:50 +0300
Subject: [PATCH 0178/2047] ACPI: NFIT: Install Notify() handler directly

Modify the ACPI NFIT driver to install its own Notify() handler directly
instead of providing an ACPI driver .notify() callback.

This will allow the ACPI driver .notify() callback to be eliminated.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits, whitespace adjustments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/nfit/core.c | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 07204d482968..9dd6ff003765 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -3282,6 +3282,23 @@ static void acpi_nfit_put_table(void *table)
 	acpi_put_table(table);
 }
 
+static void acpi_nfit_notify(acpi_handle handle, u32 event, void *data)
+{
+	struct acpi_device *adev = data;
+
+	device_lock(&adev->dev);
+	__acpi_nfit_notify(&adev->dev, handle, event);
+	device_unlock(&adev->dev);
+}
+
+static void acpi_nfit_remove_notify_handler(void *data)
+{
+	struct acpi_device *adev = data;
+
+	acpi_dev_remove_notify_handler(adev, ACPI_DEVICE_NOTIFY,
+				       acpi_nfit_notify);
+}
+
 void acpi_nfit_shutdown(void *data)
 {
 	struct acpi_nfit_desc *acpi_desc = data;
@@ -3368,7 +3385,18 @@ static int acpi_nfit_add(struct acpi_device *adev)
 
 	if (rc)
 		return rc;
-	return devm_add_action_or_reset(dev, acpi_nfit_shutdown, acpi_desc);
+
+	rc = devm_add_action_or_reset(dev, acpi_nfit_shutdown, acpi_desc);
+	if (rc)
+		return rc;
+
+	rc = acpi_dev_install_notify_handler(adev, ACPI_DEVICE_NOTIFY,
+					     acpi_nfit_notify);
+	if (rc)
+		return rc;
+
+	return devm_add_action_or_reset(dev, acpi_nfit_remove_notify_handler,
+					adev);
 }
 
 static void acpi_nfit_remove(struct acpi_device *adev)
@@ -3446,13 +3474,6 @@ void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
 }
 EXPORT_SYMBOL_GPL(__acpi_nfit_notify);
 
-static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
-{
-	device_lock(&adev->dev);
-	__acpi_nfit_notify(&adev->dev, adev->handle, event);
-	device_unlock(&adev->dev);
-}
-
 static const struct acpi_device_id acpi_nfit_ids[] = {
 	{ "ACPI0012", 0 },
 	{ "", 0 },
@@ -3465,7 +3486,6 @@ static struct acpi_driver acpi_nfit_driver = {
 	.ops = {
 		.add = acpi_nfit_add,
 		.remove = acpi_nfit_remove,
-		.notify = acpi_nfit_notify,
 	},
 };
 

From 1c28250403ae25da150346de86e96322318339e5 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 3 Jul 2023 11:02:51 +0300
Subject: [PATCH 0179/2047] ACPI: NFIT: Remove unnecessary .remove callback

The ACPI NFIT driver provides an empty function as it's .remove() callback
which is not necessary, so drop the empty acpi_nfit_remove() and the
.remove() callback initialization from it.

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/nfit/core.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 9dd6ff003765..f0e6738ae3c9 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -3399,11 +3399,6 @@ static int acpi_nfit_add(struct acpi_device *adev)
 					adev);
 }
 
-static void acpi_nfit_remove(struct acpi_device *adev)
-{
-	/* see acpi_nfit_unregister */
-}
-
 static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
 {
 	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
@@ -3485,7 +3480,6 @@ static struct acpi_driver acpi_nfit_driver = {
 	.ids = acpi_nfit_ids,
 	.ops = {
 		.add = acpi_nfit_add,
-		.remove = acpi_nfit_remove,
 	},
 };
 

From 9d67b6acf1358b67a6de3302dc9c344a7cab4807 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 3 Jul 2023 11:02:52 +0300
Subject: [PATCH 0180/2047] ACPI: thermal: Install Notify() handler directly

Modify the ACPI thermal driver to install its own Notify() handler
directly instead of providing an ACPI driver .notify() callback.

This will allow the ACPI driver .notify() callback to be eliminated and
it will allow the thermal driver to be switched over to a platform one
in the future.

While at it, fix up whitespaces in acpi_thermal_remove().

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
[ rjw: Subject and changelog edits, whitespace adjustments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/thermal.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index f9f6ebb08fdb..7d543ede775e 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -825,8 +825,9 @@ static void acpi_queue_thermal_check(struct acpi_thermal *tz)
 		queue_work(acpi_thermal_pm_queue, &tz->thermal_check_work);
 }
 
-static void acpi_thermal_notify(struct acpi_device *device, u32 event)
+static void acpi_thermal_notify(acpi_handle handle, u32 event, void *data)
 {
+	struct acpi_device *device = data;
 	struct acpi_thermal *tz = acpi_driver_data(device);
 
 	if (!tz)
@@ -997,11 +998,20 @@ static int acpi_thermal_add(struct acpi_device *device)
 
 	pr_info("%s [%s] (%ld C)\n", acpi_device_name(device),
 		acpi_device_bid(device), deci_kelvin_to_celsius(tz->temperature));
-	goto end;
 
+	result = acpi_dev_install_notify_handler(device, ACPI_DEVICE_NOTIFY,
+						 acpi_thermal_notify);
+	if (result)
+		goto flush_wq;
+
+	return 0;
+
+flush_wq:
+	flush_workqueue(acpi_thermal_pm_queue);
+	acpi_thermal_unregister_thermal_zone(tz);
 free_memory:
 	kfree(tz);
-end:
+
 	return result;
 }
 
@@ -1012,10 +1022,14 @@ static void acpi_thermal_remove(struct acpi_device *device)
 	if (!device || !acpi_driver_data(device))
 		return;
 
-	flush_workqueue(acpi_thermal_pm_queue);
 	tz = acpi_driver_data(device);
 
+	acpi_dev_remove_notify_handler(device, ACPI_DEVICE_NOTIFY,
+				       acpi_thermal_notify);
+
+	flush_workqueue(acpi_thermal_pm_queue);
 	acpi_thermal_unregister_thermal_zone(tz);
+
 	kfree(tz);
 }
 
@@ -1078,7 +1092,6 @@ static struct acpi_driver acpi_thermal_driver = {
 	.ops = {
 		.add = acpi_thermal_add,
 		.remove = acpi_thermal_remove,
-		.notify = acpi_thermal_notify,
 		},
 	.drv.pm = &acpi_thermal_pm,
 };

From 12ad93ab6eeb0c30dbcb5df7f72415a092f6f191 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 7 Jul 2023 22:37:30 +0200
Subject: [PATCH 0181/2047] thermal/drivers/int3400: Use thermal zone device
 wrappers

Use the thermal core API to access the thermal zone "type" field
instead of directly using the structure field.  While here, remove
access to the temperature field, as this driver is reporting fake
temperature, which can be replaced with INT3400_FAKE_TEMP. Also
replace hardcoded 20C with INT3400_FAKE_TEMP

Acked-by: srinivas pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../thermal/intel/int340x_thermal/int3400_thermal.c  | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 5e1164226ada..6aadfd72d910 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -15,6 +15,7 @@
 #define INT3400_THERMAL_TABLE_CHANGED 0x83
 #define INT3400_ODVP_CHANGED 0x88
 #define INT3400_KEEP_ALIVE 0xA0
+#define INT3400_FAKE_TEMP (20 * 1000) /* faked temp sensor with 20C */
 
 enum int3400_thermal_uuid {
 	INT3400_THERMAL_ACTIVE = 0,
@@ -453,6 +454,7 @@ static void int3400_notify(acpi_handle handle,
 			void *data)
 {
 	struct int3400_thermal_priv *priv = data;
+	struct device *dev;
 	char *thermal_prop[5];
 	int therm_event;
 
@@ -475,12 +477,14 @@ static void int3400_notify(acpi_handle handle,
 		return;
 	}
 
-	thermal_prop[0] = kasprintf(GFP_KERNEL, "NAME=%s", priv->thermal->type);
-	thermal_prop[1] = kasprintf(GFP_KERNEL, "TEMP=%d", priv->thermal->temperature);
+	dev = thermal_zone_device(priv->thermal);
+
+	thermal_prop[0] = kasprintf(GFP_KERNEL, "NAME=%s", thermal_zone_device_type(priv->thermal));
+	thermal_prop[1] = kasprintf(GFP_KERNEL, "TEMP=%d", INT3400_FAKE_TEMP);
 	thermal_prop[2] = kasprintf(GFP_KERNEL, "TRIP=");
 	thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d", therm_event);
 	thermal_prop[4] = NULL;
-	kobject_uevent_env(&priv->thermal->device.kobj, KOBJ_CHANGE, thermal_prop);
+	kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, thermal_prop);
 	kfree(thermal_prop[0]);
 	kfree(thermal_prop[1]);
 	kfree(thermal_prop[2]);
@@ -490,7 +494,7 @@ static void int3400_notify(acpi_handle handle,
 static int int3400_thermal_get_temp(struct thermal_zone_device *thermal,
 			int *temp)
 {
-	*temp = 20 * 1000; /* faked temp sensor with 20C */
+	*temp = INT3400_FAKE_TEMP;
 	return 0;
 }
 

From e49c8ed8e0ec29d3ae8cb6342cad147d9738bfbe Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 7 Jul 2023 22:37:31 +0200
Subject: [PATCH 0182/2047] thermal/drivers/int340x: Do not check the thermal
 zone state

The driver is accessing the thermal zone state to ensure the state is
different from the one we want to set.

We don't want the driver to access the thermal zone device internals.

Actually, the thermal core code already checks if the thermal zone's
state is different before calling this function, thus this check is
duplicate.

Remove it.

Acked-by: srinivas pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../intel/int340x_thermal/int3400_thermal.c   | 32 ++++++++-----------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 6aadfd72d910..ddd600820f68 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -503,32 +503,28 @@ static int int3400_thermal_change_mode(struct thermal_zone_device *thermal,
 {
 	struct int3400_thermal_priv *priv = thermal_zone_device_priv(thermal);
 	int result = 0;
+	int enabled;
 
 	if (!priv)
 		return -EINVAL;
 
-	if (mode != thermal->mode) {
-		int enabled;
+	enabled = mode == THERMAL_DEVICE_ENABLED;
 
-		enabled = mode == THERMAL_DEVICE_ENABLED;
-
-		if (priv->os_uuid_mask) {
-			if (!enabled) {
-				priv->os_uuid_mask = 0;
-				result = set_os_uuid_mask(priv, priv->os_uuid_mask);
-			}
-			goto eval_odvp;
+	if (priv->os_uuid_mask) {
+		if (!enabled) {
+			priv->os_uuid_mask = 0;
+			result = set_os_uuid_mask(priv, priv->os_uuid_mask);
 		}
-
-		if (priv->current_uuid_index < 0 ||
-		    priv->current_uuid_index >= INT3400_THERMAL_MAXIMUM_UUID)
-			return -EINVAL;
-
-		result = int3400_thermal_run_osc(priv->adev->handle,
-						 int3400_thermal_uuids[priv->current_uuid_index],
-						 &enabled);
+		goto eval_odvp;
 	}
 
+	if (priv->current_uuid_index < 0 ||
+	    priv->current_uuid_index >= INT3400_THERMAL_MAXIMUM_UUID)
+		return -EINVAL;
+
+	result = int3400_thermal_run_osc(priv->adev->handle,
+					 int3400_thermal_uuids[priv->current_uuid_index],
+					 &enabled);
 eval_odvp:
 	evaluate_odvp(priv);
 

From 045a44d4c9b32578aacf0811063e5bb741c7c32c Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 14 Jul 2023 11:49:28 -0600
Subject: [PATCH 0183/2047] regulator: Explicitly include correct DT includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230714174930.4063320-1-robh@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/act8945a-regulator.c      | 2 +-
 drivers/regulator/atc260x-regulator.c       | 3 ++-
 drivers/regulator/axp20x-regulator.c        | 1 -
 drivers/regulator/cpcap-regulator.c         | 2 +-
 drivers/regulator/fan53555.c                | 2 +-
 drivers/regulator/fixed.c                   | 1 -
 drivers/regulator/ltc3589.c                 | 1 -
 drivers/regulator/max77826-regulator.c      | 1 -
 drivers/regulator/mp5416.c                  | 2 +-
 drivers/regulator/mp886x.c                  | 2 +-
 drivers/regulator/mpq7920.c                 | 1 -
 drivers/regulator/mt6315-regulator.c        | 2 +-
 drivers/regulator/mt6359-regulator.c        | 3 ++-
 drivers/regulator/mtk-dvfsrc-regulator.c    | 3 +--
 drivers/regulator/pbias-regulator.c         | 1 -
 drivers/regulator/pca9450-regulator.c       | 1 -
 drivers/regulator/pwm-regulator.c           | 2 +-
 drivers/regulator/qcom-rpmh-regulator.c     | 1 -
 drivers/regulator/qcom_smd-regulator.c      | 1 -
 drivers/regulator/qcom_usb_vbus-regulator.c | 1 -
 drivers/regulator/rk808-regulator.c         | 3 ++-
 drivers/regulator/rt5759-regulator.c        | 2 +-
 drivers/regulator/stm32-pwr.c               | 3 +--
 drivers/regulator/stm32-vrefbuf.c           | 2 +-
 drivers/regulator/sy8824x.c                 | 2 +-
 drivers/regulator/sy8827n.c                 | 2 +-
 drivers/regulator/tps6286x-regulator.c      | 2 +-
 drivers/regulator/tps6287x-regulator.c      | 2 +-
 drivers/regulator/tps65218-regulator.c      | 2 +-
 drivers/regulator/tps65219-regulator.c      | 2 +-
 drivers/regulator/tps6594-regulator.c       | 2 +-
 drivers/regulator/twl-regulator.c           | 1 -
 drivers/regulator/twl6030-regulator.c       | 1 -
 drivers/regulator/uniphier-regulator.c      | 2 +-
 drivers/regulator/vctrl-regulator.c         | 2 +-
 drivers/regulator/vexpress-regulator.c      | 3 ++-
 36 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/drivers/regulator/act8945a-regulator.c b/drivers/regulator/act8945a-regulator.c
index e26264529b74..24cbdd833863 100644
--- a/drivers/regulator/act8945a-regulator.c
+++ b/drivers/regulator/act8945a-regulator.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
diff --git a/drivers/regulator/atc260x-regulator.c b/drivers/regulator/atc260x-regulator.c
index 87e237d740bc..09fe51464090 100644
--- a/drivers/regulator/atc260x-regulator.c
+++ b/drivers/regulator/atc260x-regulator.c
@@ -7,7 +7,8 @@
 
 #include <linux/mfd/atc260x/core.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
 
diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c
index 810f90f3e2a1..c657820b0bbb 100644
--- a/drivers/regulator/axp20x-regulator.c
+++ b/drivers/regulator/axp20x-regulator.c
@@ -20,7 +20,6 @@
 #include <linux/mfd/axp20x.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
diff --git a/drivers/regulator/cpcap-regulator.c b/drivers/regulator/cpcap-regulator.c
index 1fd79fb17303..6958d154442b 100644
--- a/drivers/regulator/cpcap-regulator.c
+++ b/drivers/regulator/cpcap-regulator.c
@@ -12,7 +12,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index 289c06e09f47..48f312167e53 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -12,7 +12,7 @@
 #include <linux/err.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/param.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 364d1a2683b7..55130efae9b8 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -25,7 +25,6 @@
 #include <linux/gpio/consumer.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/machine.h>
 #include <linux/clk.h>
diff --git a/drivers/regulator/ltc3589.c b/drivers/regulator/ltc3589.c
index e9751c206d95..cf931b8c36dc 100644
--- a/drivers/regulator/ltc3589.c
+++ b/drivers/regulator/ltc3589.c
@@ -10,7 +10,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/max77826-regulator.c b/drivers/regulator/max77826-regulator.c
index 3855f5e686d8..5590cdf615b7 100644
--- a/drivers/regulator/max77826-regulator.c
+++ b/drivers/regulator/max77826-regulator.c
@@ -9,7 +9,6 @@
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/mp5416.c b/drivers/regulator/mp5416.c
index 3886b252fbe7..d068ac93d373 100644
--- a/drivers/regulator/mp5416.c
+++ b/drivers/regulator/mp5416.c
@@ -10,7 +10,7 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
diff --git a/drivers/regulator/mp886x.c b/drivers/regulator/mp886x.c
index ede1b1e58002..9911be2e6bac 100644
--- a/drivers/regulator/mp886x.c
+++ b/drivers/regulator/mp886x.c
@@ -9,7 +9,7 @@
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/mpq7920.c b/drivers/regulator/mpq7920.c
index bf677c535edc..4926c229109b 100644
--- a/drivers/regulator/mpq7920.c
+++ b/drivers/regulator/mpq7920.c
@@ -11,7 +11,6 @@
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/mt6315-regulator.c b/drivers/regulator/mt6315-regulator.c
index 8047081ea2f7..2608a6652d77 100644
--- a/drivers/regulator/mt6315-regulator.c
+++ b/drivers/regulator/mt6315-regulator.c
@@ -3,7 +3,7 @@
 // Copyright (c) 2021 MediaTek Inc.
 
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
diff --git a/drivers/regulator/mt6359-regulator.c b/drivers/regulator/mt6359-regulator.c
index 3eb86ec21d08..5cf6448fb05f 100644
--- a/drivers/regulator/mt6359-regulator.c
+++ b/drivers/regulator/mt6359-regulator.c
@@ -2,12 +2,13 @@
 //
 // Copyright (c) 2021 MediaTek Inc.
 
+#include <linux/platform_device.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/mt6359/registers.h>
 #include <linux/mfd/mt6359p/registers.h>
 #include <linux/mfd/mt6397/core.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
diff --git a/drivers/regulator/mtk-dvfsrc-regulator.c b/drivers/regulator/mtk-dvfsrc-regulator.c
index efca67207a5a..f1280d45265d 100644
--- a/drivers/regulator/mtk-dvfsrc-regulator.c
+++ b/drivers/regulator/mtk-dvfsrc-regulator.c
@@ -6,8 +6,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
+#include <linux/of_.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/soc/mediatek/mtk_dvfsrc.h>
diff --git a/drivers/regulator/pbias-regulator.c b/drivers/regulator/pbias-regulator.c
index 0c9873e9abdc..cd5a0d7e4455 100644
--- a/drivers/regulator/pbias-regulator.c
+++ b/drivers/regulator/pbias-regulator.c
@@ -25,7 +25,6 @@
 #include <linux/regmap.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 struct pbias_reg_info {
 	u32 enable;
diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c
index 91bfb7e026c9..2ab365d2749f 100644
--- a/drivers/regulator/pca9450-regulator.c
+++ b/drivers/regulator/pca9450-regulator.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c
index b64d99695b84..2aff6db748e2 100644
--- a/drivers/regulator/pwm-regulator.c
+++ b/drivers/regulator/pwm-regulator.c
@@ -10,11 +10,11 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/err.h>
+#include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pwm.h>
 #include <linux/gpio/consumer.h>
 
diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index f3b280af0773..ec1b50721ec6 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -7,7 +7,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/string.h>
diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index 18189f35db68..f53ada076252 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -6,7 +6,6 @@
 
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/qcom_usb_vbus-regulator.c b/drivers/regulator/qcom_usb_vbus-regulator.c
index 57ec613f4a0a..cd94ed67621f 100644
--- a/drivers/regulator/qcom_usb_vbus-regulator.c
+++ b/drivers/regulator/qcom_usb_vbus-regulator.c
@@ -8,7 +8,6 @@
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c
index 460525ed006c..867a2cf243f6 100644
--- a/drivers/regulator/rk808-regulator.c
+++ b/drivers/regulator/rk808-regulator.c
@@ -17,9 +17,10 @@
 #include <linux/delay.h>
 #include <linux/gpio.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/mfd/rk808.h>
+#include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/gpio/consumer.h>
diff --git a/drivers/regulator/rt5759-regulator.c b/drivers/regulator/rt5759-regulator.c
index 90555a9ef1b0..c2553dcee050 100644
--- a/drivers/regulator/rt5759-regulator.c
+++ b/drivers/regulator/rt5759-regulator.c
@@ -4,7 +4,7 @@
 #include <linux/i2c.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/stm32-pwr.c b/drivers/regulator/stm32-pwr.c
index 4c60eddad60d..85b0102fb9b1 100644
--- a/drivers/regulator/stm32-pwr.c
+++ b/drivers/regulator/stm32-pwr.c
@@ -6,8 +6,7 @@
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/stm32-vrefbuf.c b/drivers/regulator/stm32-vrefbuf.c
index f5ccc7dd309a..717144cbe0f9 100644
--- a/drivers/regulator/stm32-vrefbuf.c
+++ b/drivers/regulator/stm32-vrefbuf.c
@@ -10,7 +10,7 @@
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/sy8824x.c b/drivers/regulator/sy8824x.c
index d0703105c439..d49c0cba09fb 100644
--- a/drivers/regulator/sy8824x.c
+++ b/drivers/regulator/sy8824x.c
@@ -8,7 +8,7 @@
 
 #include <linux/module.h>
 #include <linux/i2c.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/sy8827n.c b/drivers/regulator/sy8827n.c
index 433959b43549..f11ff38b36c9 100644
--- a/drivers/regulator/sy8827n.c
+++ b/drivers/regulator/sy8827n.c
@@ -9,7 +9,7 @@
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/tps6286x-regulator.c b/drivers/regulator/tps6286x-regulator.c
index b1c4b5120745..c06f6d1dc737 100644
--- a/drivers/regulator/tps6286x-regulator.c
+++ b/drivers/regulator/tps6286x-regulator.c
@@ -4,7 +4,7 @@
 #include <linux/err.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/machine.h>
diff --git a/drivers/regulator/tps6287x-regulator.c b/drivers/regulator/tps6287x-regulator.c
index b1c0963586ac..19a4a300a963 100644
--- a/drivers/regulator/tps6287x-regulator.c
+++ b/drivers/regulator/tps6287x-regulator.c
@@ -8,8 +8,8 @@
 
 #include <linux/err.h>
 #include <linux/i2c.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/machine.h>
diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c
index 13985883e5f0..f44b5767099c 100644
--- a/drivers/regulator/tps65218-regulator.c
+++ b/drivers/regulator/tps65218-regulator.c
@@ -8,12 +8,12 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/platform_device.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/driver.h>
diff --git a/drivers/regulator/tps65219-regulator.c b/drivers/regulator/tps65219-regulator.c
index 8971b507a79a..b4065356392f 100644
--- a/drivers/regulator/tps65219-regulator.c
+++ b/drivers/regulator/tps65219-regulator.c
@@ -15,8 +15,8 @@
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/err.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/driver.h>
diff --git a/drivers/regulator/tps6594-regulator.c b/drivers/regulator/tps6594-regulator.c
index d5a574ec6d12..25ef102c8270 100644
--- a/drivers/regulator/tps6594-regulator.c
+++ b/drivers/regulator/tps6594-regulator.c
@@ -9,7 +9,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c
index 3e724f5345de..5bacfcebf59a 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -12,7 +12,6 @@
 #include <linux/err.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c
index f9c695f9bde8..6eed0f6e0adb 100644
--- a/drivers/regulator/twl6030-regulator.c
+++ b/drivers/regulator/twl6030-regulator.c
@@ -13,7 +13,6 @@
 #include <linux/err.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/uniphier-regulator.c b/drivers/regulator/uniphier-regulator.c
index 7e2785e10dc6..1d8304b88bd6 100644
--- a/drivers/regulator/uniphier-regulator.c
+++ b/drivers/regulator/uniphier-regulator.c
@@ -7,7 +7,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
diff --git a/drivers/regulator/vctrl-regulator.c b/drivers/regulator/vctrl-regulator.c
index 85dca90233f6..2796580a3a3c 100644
--- a/drivers/regulator/vctrl-regulator.c
+++ b/drivers/regulator/vctrl-regulator.c
@@ -10,7 +10,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/regulator/coupler.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
diff --git a/drivers/regulator/vexpress-regulator.c b/drivers/regulator/vexpress-regulator.c
index b545dbc70a4d..6687077e9a97 100644
--- a/drivers/regulator/vexpress-regulator.c
+++ b/drivers/regulator/vexpress-regulator.c
@@ -8,7 +8,8 @@
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/of_regulator.h>

From 749396cb29a7d82cb5e324bf61be3fc948d97141 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 14 Jul 2023 11:49:52 -0600
Subject: [PATCH 0184/2047] spi: Explicitly include correct DT includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230714174955.4064174-1-robh@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ar934x.c          | 3 ++-
 drivers/spi/spi-armada-3700.c     | 3 +--
 drivers/spi/spi-at91-usart.c      | 1 -
 drivers/spi/spi-bcm2835.c         | 2 +-
 drivers/spi/spi-bcm2835aux.c      | 4 +---
 drivers/spi/spi-cadence-quadspi.c | 1 -
 drivers/spi/spi-cadence-xspi.c    | 1 -
 drivers/spi/spi-davinci.c         | 1 -
 drivers/spi/spi-fsl-dspi.c        | 3 ++-
 drivers/spi/spi-fsl-lib.c         | 3 ++-
 drivers/spi/spi-fsl-lpspi.c       | 1 -
 drivers/spi/spi-fsl-qspi.c        | 1 -
 drivers/spi/spi-gpio.c            | 1 -
 drivers/spi/spi-gxp.c             | 1 -
 drivers/spi/spi-imx.c             | 1 -
 drivers/spi/spi-ingenic.c         | 2 +-
 drivers/spi/spi-lantiq-ssc.c      | 3 ++-
 drivers/spi/spi-loopback-test.c   | 2 +-
 drivers/spi/spi-lp8841-rtc.c      | 1 -
 drivers/spi/spi-meson-spicc.c     | 1 -
 drivers/spi/spi-mt7621.c          | 2 ++
 drivers/spi/spi-mtk-nor.c         | 3 ++-
 drivers/spi/spi-mtk-snfi.c        | 3 ++-
 drivers/spi/spi-npcm-fiu.c        | 2 +-
 drivers/spi/spi-nxp-fspi.c        | 1 -
 drivers/spi/spi-orion.c           | 1 -
 drivers/spi/spi-qcom-qspi.c       | 2 +-
 drivers/spi/spi-qup.c             | 1 -
 drivers/spi/spi-rspi.c            | 2 +-
 drivers/spi/spi-s3c64xx.c         | 1 -
 drivers/spi/spi-sc18is602.c       | 1 -
 drivers/spi/spi-sh-msiof.c        | 1 -
 drivers/spi/spi-sn-f-ospi.c       | 2 +-
 drivers/spi/spi-sprd-adi.c        | 1 -
 drivers/spi/spi-sprd.c            | 1 -
 drivers/spi/spi-stm32-qspi.c      | 1 -
 drivers/spi/spi-sun6i.c           | 2 +-
 drivers/spi/spi-tegra114.c        | 1 -
 drivers/spi/spi-tegra20-slink.c   | 1 -
 drivers/spi/spi-tegra210-quad.c   | 1 -
 drivers/spi/spi-ti-qspi.c         | 1 -
 drivers/spi/spi-wpcm-fiu.c        | 3 +--
 drivers/spi/spi-zynqmp-gqspi.c    | 4 +---
 43 files changed, 26 insertions(+), 48 deletions(-)

diff --git a/drivers/spi/spi-ar934x.c b/drivers/spi/spi-ar934x.c
index 9dcada8c4cb9..58b98cea31d9 100644
--- a/drivers/spi/spi-ar934x.c
+++ b/drivers/spi/spi-ar934x.c
@@ -14,7 +14,8 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/spi/spi.h>
 
 #define DRIVER_NAME "spi-ar934x"
diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c
index a7fb7c94e70e..0103ac0158c0 100644
--- a/drivers/spi/spi-armada-3700.c
+++ b/drivers/spi/spi-armada-3700.c
@@ -17,8 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_irq.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/spi/spi.h>
 
diff --git a/drivers/spi/spi-at91-usart.c b/drivers/spi/spi-at91-usart.c
index 7dfe2b6c2990..75d9bc606442 100644
--- a/drivers/spi/spi-at91-usart.c
+++ b/drivers/spi/spi-at91-usart.c
@@ -13,7 +13,6 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
 #include <linux/gpio/consumer.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index 3b253da98c05..b247ee887562 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -24,7 +24,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/gpio/consumer.h>
 #include <linux/gpio/machine.h> /* FIXME: using chip internals */
 #include <linux/gpio/driver.h> /* FIXME: using chip internals */
diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index 288f7b994b36..8ace417c0a29 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -20,9 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/spi/spi.h>
 #include <linux/spinlock.h>
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index abf10f92415d..e1a8cf08ef66 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -21,7 +21,6 @@
 #include <linux/kernel.h>
 #include <linux/log2.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/spi/spi-cadence-xspi.c b/drivers/spi/spi-cadence-xspi.c
index ce4a3145f065..11d623cbba2e 100644
--- a/drivers/spi/spi-cadence-xspi.c
+++ b/drivers/spi/spi-cadence-xspi.c
@@ -11,7 +11,6 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index a3470cbaabdc..9a9f3bc0e2d5 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -15,7 +15,6 @@
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/slab.h>
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 674cfe05f411..ca41c8a8ba42 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -13,7 +13,8 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/regmap.h>
 #include <linux/spi/spi.h>
diff --git a/drivers/spi/spi-fsl-lib.c b/drivers/spi/spi-fsl-lib.c
index 76e1192eb025..885757c29fbb 100644
--- a/drivers/spi/spi-fsl-lib.c
+++ b/drivers/spi/spi-fsl-lib.c
@@ -18,7 +18,8 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/spi/spi.h>
 #ifdef CONFIG_FSL_SOC
 #include <sysdev/fsl_soc.h>
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index 1c907d5d5bb3..dfdcb69f66ab 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/dma/imx-dma.h>
diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c
index 8ade61e5ebc0..e3de81248893 100644
--- a/drivers/spi/spi-fsl-qspi.c
+++ b/drivers/spi/spi-fsl-qspi.c
@@ -34,7 +34,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_qos.h>
 #include <linux/sizes.h>
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 612db477ad12..8aa905e407a8 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -10,7 +10,6 @@
 #include <linux/platform_device.h>
 #include <linux/gpio/consumer.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
diff --git a/drivers/spi/spi-gxp.c b/drivers/spi/spi-gxp.c
index 684d63f402f3..88ab3032736b 100644
--- a/drivers/spi/spi-gxp.c
+++ b/drivers/spi/spi-gxp.c
@@ -3,7 +3,6 @@
 
 #include <linux/iopoll.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-mem.h>
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index db22f685ce91..3634fe8f6d68 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -20,7 +20,6 @@
 #include <linux/spi/spi.h>
 #include <linux/types.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/property.h>
 
 #include <linux/dma/imx-dma.h>
diff --git a/drivers/spi/spi-ingenic.c b/drivers/spi/spi-ingenic.c
index 7d4b515a160d..cfa665a80bc6 100644
--- a/drivers/spi/spi-ingenic.c
+++ b/drivers/spi/spi-ingenic.c
@@ -12,7 +12,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/spi/spi.h>
diff --git a/drivers/spi/spi-lantiq-ssc.c b/drivers/spi/spi-lantiq-ssc.c
index 8d6ecc5d6f70..65568272cfd9 100644
--- a/drivers/spi/spi-lantiq-ssc.c
+++ b/drivers/spi/spi-lantiq-ssc.c
@@ -6,7 +6,8 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/delay.h>
diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c
index 675a73cf1579..bbf2015d8e5c 100644
--- a/drivers/spi/spi-loopback-test.c
+++ b/drivers/spi/spi-loopback-test.c
@@ -14,8 +14,8 @@
 #include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/list_sort.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/printk.h>
 #include <linux/vmalloc.h>
 #include <linux/spi/spi.h>
diff --git a/drivers/spi/spi-lp8841-rtc.c b/drivers/spi/spi-lp8841-rtc.c
index c6810720b3b5..b357461f1b8b 100644
--- a/drivers/spi/spi-lp8841-rtc.c
+++ b/drivers/spi/spi-lp8841-rtc.c
@@ -15,7 +15,6 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/spi/spi.h>
 
 #define DRIVER_NAME	"spi_lp8841_rtc"
diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
index 7a9eba35ac38..43d134f4b42b 100644
--- a/drivers/spi/spi-meson-spicc.c
+++ b/drivers/spi/spi-meson-spicc.c
@@ -15,7 +15,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/spi/spi.h>
 #include <linux/types.h>
diff --git a/drivers/spi/spi-mt7621.c b/drivers/spi/spi-mt7621.c
index 3e9d396b33bd..91600e5c22e4 100644
--- a/drivers/spi/spi-mt7621.c
+++ b/drivers/spi/spi-mt7621.c
@@ -14,7 +14,9 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/spi/spi.h>
 
diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
index baa7a5353987..cf4ee8b19e42 100644
--- a/drivers/spi/spi-mtk-nor.c
+++ b/drivers/spi/spi-mtk-nor.c
@@ -13,7 +13,8 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-mem.h>
diff --git a/drivers/spi/spi-mtk-snfi.c b/drivers/spi/spi-mtk-snfi.c
index bed8317cd205..4433a8a9299f 100644
--- a/drivers/spi/spi-mtk-snfi.c
+++ b/drivers/spi/spi-mtk-snfi.c
@@ -76,7 +76,8 @@
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/iopoll.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/mtd/nand-ecc-mtk.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-mem.h>
diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c
index eb353561509a..9e8c914ff26c 100644
--- a/drivers/spi/spi-npcm-fiu.c
+++ b/drivers/spi/spi-npcm-fiu.c
@@ -12,7 +12,7 @@
 #include <linux/io.h>
 #include <linux/vmalloc.h>
 #include <linux/regmap.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/spi/spi-mem.h>
 #include <linux/mfd/syscon.h>
 
diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
index 4cf9b3bf7b57..45a4acc95661 100644
--- a/drivers/spi/spi-nxp-fspi.c
+++ b/drivers/spi/spi-nxp-fspi.c
@@ -47,7 +47,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_qos.h>
 #include <linux/regmap.h>
diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index 4882aafd0505..faf2764b91a8 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c
@@ -16,7 +16,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/clk.h>
 #include <linux/sizes.h>
 #include <asm/unaligned.h>
diff --git a/drivers/spi/spi-qcom-qspi.c b/drivers/spi/spi-qcom-qspi.c
index 5a98c52bad32..a0ad9802b606 100644
--- a/drivers/spi/spi-qcom-qspi.c
+++ b/drivers/spi/spi-qcom-qspi.c
@@ -9,7 +9,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_opp.h>
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index 00e5e88e72c4..fd16acb1f578 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -11,7 +11,6 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 9312a9d58a1b..83bb16995e06 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -19,7 +19,7 @@
 #include <linux/clk.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
 #include <linux/sh_dma.h>
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index ad008597c7d8..889e48423c97 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -14,7 +14,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 #include <linux/platform_data/spi-s3c64xx.h>
 
diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index d52ed67243f7..c67a24daaaf9 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -12,7 +12,6 @@
 #include <linux/i2c.h>
 #include <linux/delay.h>
 #include <linux/pm_runtime.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/platform_data/sc18is602.h>
 #include <linux/gpio/consumer.h>
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 9e90b4f8b357..5c75b5c58d97 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -20,7 +20,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/sh_dma.h>
diff --git a/drivers/spi/spi-sn-f-ospi.c b/drivers/spi/spi-sn-f-ospi.c
index d64d3f75c726..85e4a01bc8b0 100644
--- a/drivers/spi/spi-sn-f-ospi.c
+++ b/drivers/spi/spi-sn-f-ospi.c
@@ -10,7 +10,7 @@
 #include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-mem.h>
diff --git a/drivers/spi/spi-sprd-adi.c b/drivers/spi/spi-sprd-adi.c
index 94d16aadfa44..bf01feedbf93 100644
--- a/drivers/spi/spi-sprd-adi.c
+++ b/drivers/spi/spi-sprd-adi.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/spi/spi.h>
diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c
index 518c7eaca84e..95377cf748c0 100644
--- a/drivers/spi/spi-sprd.c
+++ b/drivers/spi/spi-sprd.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_dma.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
index 2b6804aa6901..def74ae9b5f6 100644
--- a/drivers/spi/spi-stm32-qspi.c
+++ b/drivers/spi/spi-stm32-qspi.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_gpio.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index e9144d76bcdb..3f5b1556ece0 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
index 488df681eaef..ca4bdd45f56e 100644
--- a/drivers/spi/spi-tegra114.c
+++ b/drivers/spi/spi-tegra114.c
@@ -20,7 +20,6 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/reset.h>
 #include <linux/spi/spi.h>
 
diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index 8cfd9e4f60b5..4d6db6182c5e 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -21,7 +21,6 @@
 #include <linux/pm_opp.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/reset.h>
 #include <linux/spi/spi.h>
 
diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index fbd14dd7be44..e9ad9b0b598b 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -18,7 +18,6 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/reset.h>
 #include <linux/spi/spi.h>
 #include <linux/acpi.h>
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index bf4b40289bee..4c81516b67db 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -22,7 +22,6 @@
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
diff --git a/drivers/spi/spi-wpcm-fiu.c b/drivers/spi/spi-wpcm-fiu.c
index f15312fdcdaf..852ffe013d32 100644
--- a/drivers/spi/spi-wpcm-fiu.c
+++ b/drivers/spi/spi-wpcm-fiu.c
@@ -3,9 +3,8 @@
 
 #include <linux/clk.h>
 #include <linux/mfd/syscon.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/spi/spi-mem.h>
diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
index fb2ca9b90eab..a6b892d01038 100644
--- a/drivers/spi/spi-zynqmp-gqspi.c
+++ b/drivers/spi/spi-zynqmp-gqspi.c
@@ -14,9 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_irq.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>

From 401013e2ad9e1ba329a91a7b66779d8cb780bcd0 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tandersen@netflix.com>
Date: Wed, 7 Jun 2023 08:15:21 -0600
Subject: [PATCH 0185/2047] documentation/rcu: Fix typo

s/slat/splat/

Signed-off-by: Tycho Andersen <tandersen@netflix.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/RCU/lockdep-splat.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/RCU/lockdep-splat.rst b/Documentation/RCU/lockdep-splat.rst
index 2a5c79db57dc..bcbc4b3c88d7 100644
--- a/Documentation/RCU/lockdep-splat.rst
+++ b/Documentation/RCU/lockdep-splat.rst
@@ -10,7 +10,7 @@ misuses of the RCU API, most notably using one of the rcu_dereference()
 family to access an RCU-protected pointer without the proper protection.
 When such misuse is detected, an lockdep-RCU splat is emitted.
 
-The usual cause of a lockdep-RCU slat is someone accessing an
+The usual cause of a lockdep-RCU splat is someone accessing an
 RCU-protected data structure without either (1) being in the right kind of
 RCU read-side critical section or (2) holding the right update-side lock.
 This problem can therefore be serious: it might result in random memory

From bc25e7c3598e56992120505ea09f6cf0651da63b Mon Sep 17 00:00:00 2001
From: Alan Huang <mmpgouride@gmail.com>
Date: Tue, 13 Jun 2023 00:57:01 +0000
Subject: [PATCH 0186/2047] docs/RCU: Add the missing rcu_read_unlock()

We should exit the RCU read-side critical section before re-entering.

Signed-off-by: Alan Huang <mmpgouride@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/RCU/rculist_nulls.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/RCU/rculist_nulls.rst b/Documentation/RCU/rculist_nulls.rst
index 9a734bf54b76..0612a6387d8e 100644
--- a/Documentation/RCU/rculist_nulls.rst
+++ b/Documentation/RCU/rculist_nulls.rst
@@ -29,8 +29,10 @@ algorithms:
   rcu_read_lock()
   obj = lockless_lookup(key);
   if (obj) {
-    if (!try_get_ref(obj)) // might fail for free objects
+    if (!try_get_ref(obj)) { // might fail for free objects
+      rcu_read_unlock();
       goto begin;
+    }
     /*
     * Because a writer could delete object, and a writer could
     * reuse these object before the RCU grace period, we

From 3f831e38cecdd72a139a130da11f95be38481034 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 13 Jun 2023 18:24:31 +0000
Subject: [PATCH 0187/2047] Docs/RCU/rculist_nulls: Fix trivial coding style

Lookup example of non-hlist_nulls management is missing a semicolon, and
having inconsistent indentation (one line is using single space
indentation while others are using two spaces indentation).  Fix the
trivial issues.

Signed-off-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/RCU/rculist_nulls.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/RCU/rculist_nulls.rst b/Documentation/RCU/rculist_nulls.rst
index 0612a6387d8e..25b739885cfa 100644
--- a/Documentation/RCU/rculist_nulls.rst
+++ b/Documentation/RCU/rculist_nulls.rst
@@ -26,7 +26,7 @@ algorithms:
 ::
 
   begin:
-  rcu_read_lock()
+  rcu_read_lock();
   obj = lockless_lookup(key);
   if (obj) {
     if (!try_get_ref(obj)) { // might fail for free objects
@@ -70,8 +70,8 @@ And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
        pos && ({ prefetch(pos->next); 1; }) &&
        ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
        pos = rcu_dereference(pos->next))
-   if (obj->key == key)
-     return obj;
+    if (obj->key == key)
+      return obj;
   return NULL;
 
 Quoting Corey Minyard::

From 674dd365736125b56297aaea8845a90e5ea990b9 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 13 Jun 2023 18:24:32 +0000
Subject: [PATCH 0188/2047] Docs/RCU/rculist_nulls: Assign 'obj' before use
 from the examples

Lookup example code snippets in rculist_nulls.rst are using 'obj'
without assignment.  Fix the code to assign it properly.

Signed-off-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/RCU/rculist_nulls.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/RCU/rculist_nulls.rst b/Documentation/RCU/rculist_nulls.rst
index 25b739885cfa..4d6f077552ed 100644
--- a/Documentation/RCU/rculist_nulls.rst
+++ b/Documentation/RCU/rculist_nulls.rst
@@ -56,7 +56,7 @@ but a version with an additional memory barrier (smp_rmb())
     struct hlist_node *node, *next;
     for (pos = rcu_dereference((head)->first);
          pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
-         ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+         ({ obj = hlist_entry(pos, typeof(*obj), member); 1; });
          pos = rcu_dereference(next))
       if (obj->key == key)
         return obj;
@@ -68,7 +68,7 @@ And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
   struct hlist_node *node;
   for (pos = rcu_dereference((head)->first);
        pos && ({ prefetch(pos->next); 1; }) &&
-       ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+       ({ obj = hlist_entry(pos, typeof(*obj), member); 1; });
        pos = rcu_dereference(pos->next))
     if (obj->key == key)
       return obj;

From 5326caa7a147743340f68b5b7ed4c2a91b4abb56 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 16 Jun 2023 23:36:24 +0000
Subject: [PATCH 0189/2047] Docs/RCU/rculist_nulls: Specify type of the object
 in examples

The type of 'obj' in example code of rculist_nulls.rst is implicit.
Provide the specific type of it before the example code.

Suggested-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/rcu/43943609-f80c-4b6a-9844-994eef800757@paulmck-laptop/
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/RCU/rculist_nulls.rst | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/Documentation/RCU/rculist_nulls.rst b/Documentation/RCU/rculist_nulls.rst
index 4d6f077552ed..479cedfec446 100644
--- a/Documentation/RCU/rculist_nulls.rst
+++ b/Documentation/RCU/rculist_nulls.rst
@@ -18,7 +18,16 @@ to solve following problem.
 
 Without 'nulls', a typical RCU linked list managing objects which are
 allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use the following
-algorithms:
+algorithms.  Following examples assume 'obj' is a pointer to such
+objects, which is having below type.
+
+::
+
+  struct object {
+    struct hlist_node obj_node;
+    atomic_t refcnt;
+    unsigned int key;
+  };
 
 1) Lookup algorithm
 -------------------
@@ -144,6 +153,9 @@ the beginning. If the object was moved to the same chain,
 then the reader doesn't care: It might occasionally
 scan the list again without harm.
 
+Note that using hlist_nulls means the type of 'obj_node' field of
+'struct object' becomes 'struct hlist_nulls_node'.
+
 
 1) lookup algorithm
 -------------------

From d186204a98541c7d693290bfc91853bbfdfcf0cb Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 16 Jun 2023 23:36:25 +0000
Subject: [PATCH 0190/2047] Docs/RCU/rculist_nulls: Fix hlist_[nulls]_head
 field names of 'obj'

The example code snippets on rculist_nulls.rst are assuming 'obj' to
have the 'hlist_head' or 'hlist_nulls_head' field named 'obj_node', but
a sentence and some code snippets are wrongly calling
'obj->obj_node.next' as 'obj->obj_next', or 'obj->obj_node' as 'member'.
Fix it.

Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/RCU/rculist_nulls.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/RCU/rculist_nulls.rst b/Documentation/RCU/rculist_nulls.rst
index 479cedfec446..1fb086066377 100644
--- a/Documentation/RCU/rculist_nulls.rst
+++ b/Documentation/RCU/rculist_nulls.rst
@@ -65,7 +65,7 @@ but a version with an additional memory barrier (smp_rmb())
     struct hlist_node *node, *next;
     for (pos = rcu_dereference((head)->first);
          pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
-         ({ obj = hlist_entry(pos, typeof(*obj), member); 1; });
+         ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
          pos = rcu_dereference(next))
       if (obj->key == key)
         return obj;
@@ -77,7 +77,7 @@ And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
   struct hlist_node *node;
   for (pos = rcu_dereference((head)->first);
        pos && ({ prefetch(pos->next); 1; }) &&
-       ({ obj = hlist_entry(pos, typeof(*obj), member); 1; });
+       ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
        pos = rcu_dereference(pos->next))
     if (obj->key == key)
       return obj;
@@ -97,7 +97,7 @@ Quoting Corey Minyard::
 2) Insertion algorithm
 ----------------------
 
-We need to make sure a reader cannot read the new 'obj->obj_next' value
+We need to make sure a reader cannot read the new 'obj->obj_node.next' value
 and previous value of 'obj->key'. Otherwise, an item could be deleted
 from a chain, and inserted into another chain. If new chain was empty
 before the move, 'next' pointer is NULL, and lockless reader can not
@@ -165,7 +165,7 @@ Note that using hlist_nulls means the type of 'obj_node' field of
   head = &table[slot];
   begin:
   rcu_read_lock();
-  hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
+  hlist_nulls_for_each_entry_rcu(obj, node, head, obj_node) {
     if (obj->key == key) {
       if (!try_get_ref(obj)) { // might fail for free objects
 	rcu_read_unlock();

From 47d63d7a3918d8bc1ccd0b0527da8dbb66429100 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 16 Jun 2023 23:36:26 +0000
Subject: [PATCH 0191/2047] Docs/RCU/rculist_nulls: Fix text about
 atomic_set_release()

The document says we can avoid extra _release() in insert function when
hlist_nulls is used, but that's not true[1].  Drop it.

[1] https://lore.kernel.org/rcu/46440869-644a-4982-b790-b71b43976c66@paulmck-laptop/

Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/RCU/rculist_nulls.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Documentation/RCU/rculist_nulls.rst b/Documentation/RCU/rculist_nulls.rst
index 1fb086066377..21e40fcc08de 100644
--- a/Documentation/RCU/rculist_nulls.rst
+++ b/Documentation/RCU/rculist_nulls.rst
@@ -140,8 +140,7 @@ very very fast (before the end of RCU grace period)
 Avoiding extra smp_rmb()
 ========================
 
-With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
-and extra _release() in insert function.
+With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup().
 
 For example, if we choose to store the slot number as the 'nulls'
 end-of-list marker for each slot of the hash table, we can detect
@@ -196,6 +195,9 @@ Note that using hlist_nulls means the type of 'obj_node' field of
 2) Insert algorithm
 -------------------
 
+Same to the above one, but uses hlist_nulls_add_head_rcu() instead of
+hlist_add_head_rcu().
+
 ::
 
   /*

From d119357d0743548f90243315de905cf6a79e5221 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sun, 14 May 2023 18:06:12 -0700
Subject: [PATCH 0192/2047] rcu-tasks: Treat only synchronous grace periods
 urgently

The performance requirements on RCU Tasks, and in particular on RCU
Tasks Trace, have evolved over time as the workloads have evolved.
The current implementation is designed to provide low grace-period
latencies, and also to accommodate short-duration floods of callbacks.

However, current workloads can also provide a constant background
callback-queuing rate of a few hundred call_rcu_tasks_trace() invocations
per second.  This results in continuous back-to-back RCU Tasks Trace
grace periods, which in turn can consume the better part of 10% of a CPU.
One could take the attitude that there are several tens of other CPUs on
the systems running such workloads, but energy efficiency is a thing.
On these systems, although asynchronous grace-period requests happen
every few milliseconds, synchronous grace-period requests are quite rare.

This commit therefore arrranges for grace periods to be initiated
immediately in response to calls to synchronize_rcu_tasks*() and
also to calls to synchronize_rcu_mult() that are passed one of the
call_rcu_tasks*() functions.  These are recognized by the tell-tale
wakeme_after_rcu callback function.

In other cases, callbacks are gathered up for up to about 250 milliseconds
before a grace period is initiated.  This results in more than an order of
magnitude reduction in RCU Tasks Trace grace periods, with corresponding
reduction in consumption of CPU time.

Reported-by: Alexei Starovoitov <ast@kernel.org>
Reported-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tasks.h | 81 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 73 insertions(+), 8 deletions(-)

diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index b770add3f843..c88d7fe29d92 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -25,6 +25,8 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
  * @cblist: Callback list.
  * @lock: Lock protecting per-CPU callback list.
  * @rtp_jiffies: Jiffies counter value for statistics.
+ * @lazy_timer: Timer to unlazify callbacks.
+ * @urgent_gp: Number of additional non-lazy grace periods.
  * @rtp_n_lock_retries: Rough lock-contention statistic.
  * @rtp_work: Work queue for invoking callbacks.
  * @rtp_irq_work: IRQ work queue for deferred wakeups.
@@ -38,6 +40,8 @@ struct rcu_tasks_percpu {
 	raw_spinlock_t __private lock;
 	unsigned long rtp_jiffies;
 	unsigned long rtp_n_lock_retries;
+	struct timer_list lazy_timer;
+	unsigned int urgent_gp;
 	struct work_struct rtp_work;
 	struct irq_work rtp_irq_work;
 	struct rcu_head barrier_q_head;
@@ -51,7 +55,6 @@ struct rcu_tasks_percpu {
  * @cbs_wait: RCU wait allowing a new callback to get kthread's attention.
  * @cbs_gbl_lock: Lock protecting callback list.
  * @tasks_gp_mutex: Mutex protecting grace period, needed during mid-boot dead zone.
- * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
  * @gp_func: This flavor's grace-period-wait function.
  * @gp_state: Grace period's most recent state transition (debugging).
  * @gp_sleep: Per-grace-period sleep to prevent CPU-bound looping.
@@ -61,6 +64,8 @@ struct rcu_tasks_percpu {
  * @tasks_gp_seq: Number of grace periods completed since boot.
  * @n_ipis: Number of IPIs sent to encourage grace periods to end.
  * @n_ipis_fails: Number of IPI-send failures.
+ * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
+ * @lazy_jiffies: Number of jiffies to allow callbacks to be lazy.
  * @pregp_func: This flavor's pre-grace-period function (optional).
  * @pertask_func: This flavor's per-task scan function (optional).
  * @postscan_func: This flavor's post-task scan function (optional).
@@ -92,6 +97,7 @@ struct rcu_tasks {
 	unsigned long n_ipis;
 	unsigned long n_ipis_fails;
 	struct task_struct *kthread_ptr;
+	unsigned long lazy_jiffies;
 	rcu_tasks_gp_func_t gp_func;
 	pregp_func_t pregp_func;
 	pertask_func_t pertask_func;
@@ -127,6 +133,7 @@ static struct rcu_tasks rt_name =							\
 	.gp_func = gp,									\
 	.call_func = call,								\
 	.rtpcpu = &rt_name ## __percpu,							\
+	.lazy_jiffies = DIV_ROUND_UP(HZ, 4),						\
 	.name = n,									\
 	.percpu_enqueue_shift = order_base_2(CONFIG_NR_CPUS),				\
 	.percpu_enqueue_lim = 1,							\
@@ -276,6 +283,33 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
 			data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim), rcu_task_cb_adjust);
 }
 
+// Compute wakeup time for lazy callback timer.
+static unsigned long rcu_tasks_lazy_time(struct rcu_tasks *rtp)
+{
+	return jiffies + rtp->lazy_jiffies;
+}
+
+// Timer handler that unlazifies lazy callbacks.
+static void call_rcu_tasks_generic_timer(struct timer_list *tlp)
+{
+	unsigned long flags;
+	bool needwake = false;
+	struct rcu_tasks *rtp;
+	struct rcu_tasks_percpu *rtpcp = from_timer(rtpcp, tlp, lazy_timer);
+
+	rtp = rtpcp->rtpp;
+	raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
+	if (!rcu_segcblist_empty(&rtpcp->cblist) && rtp->lazy_jiffies) {
+		if (!rtpcp->urgent_gp)
+			rtpcp->urgent_gp = 1;
+		needwake = true;
+		mod_timer(&rtpcp->lazy_timer, rcu_tasks_lazy_time(rtp));
+	}
+	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
+	if (needwake)
+		rcuwait_wake_up(&rtp->cbs_wait);
+}
+
 // IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
 static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp)
 {
@@ -292,6 +326,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
 {
 	int chosen_cpu;
 	unsigned long flags;
+	bool havekthread = smp_load_acquire(&rtp->kthread_ptr);
 	int ideal_cpu;
 	unsigned long j;
 	bool needadjust = false;
@@ -321,7 +356,15 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
 		cblist_init_generic(rtp);
 		raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
 	}
-	needwake = rcu_segcblist_empty(&rtpcp->cblist);
+	needwake = func == wakeme_after_rcu;
+	if (havekthread && !timer_pending(&rtpcp->lazy_timer)) {
+		if (rtp->lazy_jiffies)
+			mod_timer(&rtpcp->lazy_timer, rcu_tasks_lazy_time(rtp));
+		else
+			needwake = rcu_segcblist_empty(&rtpcp->cblist);
+	}
+	if (needwake)
+		rtpcp->urgent_gp = 3;
 	rcu_segcblist_enqueue(&rtpcp->cblist, rhp);
 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
 	if (unlikely(needadjust)) {
@@ -415,9 +458,14 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
 		}
 		rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
 		(void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq));
-		if (rcu_segcblist_pend_cbs(&rtpcp->cblist))
+		if (rtpcp->urgent_gp > 0 && rcu_segcblist_pend_cbs(&rtpcp->cblist)) {
+			if (rtp->lazy_jiffies)
+				rtpcp->urgent_gp--;
 			needgpcb |= 0x3;
-		if (!rcu_segcblist_empty(&rtpcp->cblist))
+		} else if (rcu_segcblist_empty(&rtpcp->cblist)) {
+			rtpcp->urgent_gp = 0;
+		}
+		if (rcu_segcblist_ready_cbs(&rtpcp->cblist))
 			needgpcb |= 0x1;
 		raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
 	}
@@ -549,11 +597,19 @@ static void rcu_tasks_one_gp(struct rcu_tasks *rtp, bool midboot)
 // RCU-tasks kthread that detects grace periods and invokes callbacks.
 static int __noreturn rcu_tasks_kthread(void *arg)
 {
+	int cpu;
 	struct rcu_tasks *rtp = arg;
 
+	for_each_possible_cpu(cpu) {
+		struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
+
+		timer_setup(&rtpcp->lazy_timer, call_rcu_tasks_generic_timer, 0);
+		rtpcp->urgent_gp = 1;
+	}
+
 	/* Run on housekeeping CPUs by default.  Sysadm can move if desired. */
 	housekeeping_affine(current, HK_TYPE_RCU);
-	WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
+	smp_store_release(&rtp->kthread_ptr, current); // Let GPs start!
 
 	/*
 	 * Each pass through the following loop makes one check for
@@ -635,16 +691,22 @@ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
 {
 	int cpu;
 	bool havecbs = false;
+	bool haveurgent = false;
+	bool haveurgentcbs = false;
 
 	for_each_possible_cpu(cpu) {
 		struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
 
-		if (!data_race(rcu_segcblist_empty(&rtpcp->cblist))) {
+		if (!data_race(rcu_segcblist_empty(&rtpcp->cblist)))
 			havecbs = true;
+		if (data_race(rtpcp->urgent_gp))
+			haveurgent = true;
+		if (!data_race(rcu_segcblist_empty(&rtpcp->cblist)) && data_race(rtpcp->urgent_gp))
+			haveurgentcbs = true;
+		if (havecbs && haveurgent && haveurgentcbs)
 			break;
-		}
 	}
-	pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
+	pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c%c%c l:%lu %s\n",
 		rtp->kname,
 		tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
 		jiffies - data_race(rtp->gp_jiffies),
@@ -652,6 +714,9 @@ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
 		data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
 		".k"[!!data_race(rtp->kthread_ptr)],
 		".C"[havecbs],
+		".u"[haveurgent],
+		".U"[haveurgentcbs],
+		rtp->lazy_jiffies,
 		s);
 }
 #endif // #ifndef CONFIG_TINY_RCU

From 5ae769c611e7d77c1d945cb132140deb93a32d49 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 17 May 2023 05:20:03 -0700
Subject: [PATCH 0193/2047] rcu-tasks: Remove redundant #ifdef CONFIG_TASKS_RCU

The kernel/rcu/tasks.h file has a #endif immediately followed by an

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tasks.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index c88d7fe29d92..7eae67fbe47c 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -146,9 +146,7 @@ static struct rcu_tasks rt_name =							\
 #ifdef CONFIG_TASKS_RCU
 /* Track exiting tasks in order to allow them to be waited for. */
 DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
-#endif
 
-#ifdef CONFIG_TASKS_RCU
 /* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */
 static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
 static DEFINE_TIMER(tasks_rcu_exit_srcu_stall_timer, tasks_rcu_exit_srcu_stall);

From 450d461aa629024c0f01022122838b5243fe0f2a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 17 May 2023 06:40:23 -0700
Subject: [PATCH 0194/2047] rcu-tasks: Add kernel boot parameters for callback
 laziness

This commit adds kernel boot parameters for callback laziness, allowing
the RCU Tasks flavors to be individually adjusted.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../admin-guide/kernel-parameters.txt         | 23 +++++++++++++++++++
 kernel/rcu/tasks.h                            | 15 ++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1457995fd41..a0f427e1a562 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5293,6 +5293,29 @@
 			A change in value does not take effect until
 			the beginning of the next grace period.
 
+	rcupdate.rcu_tasks_lazy_ms= [KNL]
+			Set timeout in milliseconds RCU Tasks asynchronous
+			callback batching for call_rcu_tasks().
+			A negative value will take the default.  A value
+			of zero will disable batching.	Batching is
+			always disabled for synchronize_rcu_tasks().
+
+	rcupdate.rcu_tasks_rude_lazy_ms= [KNL]
+			Set timeout in milliseconds RCU Tasks
+			Rude asynchronous callback batching for
+			call_rcu_tasks_rude().	A negative value
+			will take the default.	A value of zero will
+			disable batching.  Batching is always disabled
+			for synchronize_rcu_tasks_rude().
+
+	rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
+			Set timeout in milliseconds RCU Tasks
+			Trace asynchronous callback batching for
+			call_rcu_tasks_trace().  A negative value
+			will take the default.	A value of zero will
+			disable batching.  Batching is always disabled
+			for synchronize_rcu_tasks_trace().
+
 	rcupdate.rcu_self_test= [KNL]
 			Run the RCU early boot self tests
 
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 7eae67fbe47c..28e986627e3f 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1083,11 +1083,16 @@ void rcu_barrier_tasks(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
 
+int rcu_tasks_lazy_ms = -1;
+module_param(rcu_tasks_lazy_ms, int, 0444);
+
 static int __init rcu_spawn_tasks_kthread(void)
 {
 	cblist_init_generic(&rcu_tasks);
 	rcu_tasks.gp_sleep = HZ / 10;
 	rcu_tasks.init_fract = HZ / 10;
+	if (rcu_tasks_lazy_ms >= 0)
+		rcu_tasks.lazy_jiffies = msecs_to_jiffies(rcu_tasks_lazy_ms);
 	rcu_tasks.pregp_func = rcu_tasks_pregp_step;
 	rcu_tasks.pertask_func = rcu_tasks_pertask;
 	rcu_tasks.postscan_func = rcu_tasks_postscan;
@@ -1236,10 +1241,15 @@ void rcu_barrier_tasks_rude(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);
 
+int rcu_tasks_rude_lazy_ms = -1;
+module_param(rcu_tasks_rude_lazy_ms, int, 0444);
+
 static int __init rcu_spawn_tasks_rude_kthread(void)
 {
 	cblist_init_generic(&rcu_tasks_rude);
 	rcu_tasks_rude.gp_sleep = HZ / 10;
+	if (rcu_tasks_rude_lazy_ms >= 0)
+		rcu_tasks_rude.lazy_jiffies = msecs_to_jiffies(rcu_tasks_rude_lazy_ms);
 	rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
 	return 0;
 }
@@ -1856,6 +1866,9 @@ void rcu_barrier_tasks_trace(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace);
 
+int rcu_tasks_trace_lazy_ms = -1;
+module_param(rcu_tasks_trace_lazy_ms, int, 0444);
+
 static int __init rcu_spawn_tasks_trace_kthread(void)
 {
 	cblist_init_generic(&rcu_tasks_trace);
@@ -1870,6 +1883,8 @@ static int __init rcu_spawn_tasks_trace_kthread(void)
 		if (rcu_tasks_trace.init_fract <= 0)
 			rcu_tasks_trace.init_fract = 1;
 	}
+	if (rcu_tasks_trace_lazy_ms >= 0)
+		rcu_tasks_trace.lazy_jiffies = msecs_to_jiffies(rcu_tasks_trace_lazy_ms);
 	rcu_tasks_trace.pregp_func = rcu_tasks_trace_pregp_step;
 	rcu_tasks_trace.postscan_func = rcu_tasks_trace_postscan;
 	rcu_tasks_trace.holdouts_func = check_all_holdout_tasks_trace;

From db13710a0365ba66f8f232eb3bdfaa1a10d820f7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 26 Jun 2023 13:45:13 -0700
Subject: [PATCH 0195/2047] rcu-tasks: Cancel callback laziness if too many
 callbacks

The various RCU Tasks flavors now do lazy grace periods when there are
only asynchronous grace period requests.  By default, the system will let
250 milliseconds elapse after the first call_rcu_tasks*() callbacki is
queued before starting a grace period.  In contrast, synchronous grace
period requests such as synchronize_rcu_tasks*() will start a grace
period immediately.

However, invoking one of the call_rcu_tasks*() functions in a too-tight
loop can result in a callback flood, which in turn can exhaust memory
if grace periods are delayed for too long.

This commit therefore sets a limit so that the grace-period kthread
will be awakened when any CPU's callback list expands to contain
rcupdate.rcu_task_lazy_lim callbacks elements (defaulting to 32, set to -1
to disable), the grace-period kthread will be awakened, thus cancelling
any ongoing laziness and getting out in front of the potential callback
flood.

Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 7 +++++++
 kernel/rcu/tasks.h                              | 7 +++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a0f427e1a562..44705a3618b9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5264,6 +5264,13 @@
 			number avoids disturbing real-time workloads,
 			but lengthens grace periods.
 
+	rcupdate.rcu_task_lazy_lim= [KNL]
+			Number of callbacks on a given CPU that will
+			cancel laziness on that CPU.  Use -1 to disable
+			cancellation of laziness, but be advised that
+			doing so increases the danger of OOM due to
+			callback flooding.
+
 	rcupdate.rcu_task_stall_info= [KNL]
 			Set initial timeout in jiffies for RCU task stall
 			informational messages, which give some indication
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 28e986627e3f..291d536b1789 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -176,6 +176,8 @@ static int rcu_task_contend_lim __read_mostly = 100;
 module_param(rcu_task_contend_lim, int, 0444);
 static int rcu_task_collapse_lim __read_mostly = 10;
 module_param(rcu_task_collapse_lim, int, 0444);
+static int rcu_task_lazy_lim __read_mostly = 32;
+module_param(rcu_task_lazy_lim, int, 0444);
 
 /* RCU tasks grace-period state for debugging. */
 #define RTGS_INIT		 0
@@ -354,8 +356,9 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
 		cblist_init_generic(rtp);
 		raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
 	}
-	needwake = func == wakeme_after_rcu;
-	if (havekthread && !timer_pending(&rtpcp->lazy_timer)) {
+	needwake = (func == wakeme_after_rcu) ||
+		   (rcu_segcblist_n_cbs(&rtpcp->cblist) == rcu_task_lazy_lim);
+	if (havekthread && !needwake && !timer_pending(&rtpcp->lazy_timer)) {
 		if (rtp->lazy_jiffies)
 			mod_timer(&rtpcp->lazy_timer, rcu_tasks_lazy_time(rtp));
 		else

From 8f78f046e27b72897f15ea62c8b7882a7d7b3bc6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 6 Jul 2023 15:22:37 -0700
Subject: [PATCH 0196/2047] refscale: Add CONFIG_PREEMPT_DYNAMIC=n to NOPREEMPT
 scenario

It is no longer possible to build a kernel with a preemption-disabled
RCU without use of CONFIG_PREEMPT_DYNAMIC=n.  This commit therefore
adds CONFIG_PREEMPT_DYNAMIC=n to the refscale torture type's NOPREEMPT
scenario file.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
index ef2b501a6971..67f9d2998afd 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
@@ -2,6 +2,7 @@ CONFIG_SMP=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_PREEMPT_RCU=n
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y

From f5063e8948dad7f31adb007284a5d5038ae31bb8 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Fri, 7 Jul 2023 13:53:55 -0400
Subject: [PATCH 0197/2047] refscale: Fix uninitalized use of wait_queue_head_t

Running the refscale test occasionally crashes the kernel with the
following error:

[ 8569.952896] BUG: unable to handle page fault for address: ffffffffffffffe8
[ 8569.952900] #PF: supervisor read access in kernel mode
[ 8569.952902] #PF: error_code(0x0000) - not-present page
[ 8569.952904] PGD c4b048067 P4D c4b049067 PUD c4b04b067 PMD 0
[ 8569.952910] Oops: 0000 [#1] PREEMPT_RT SMP NOPTI
[ 8569.952916] Hardware name: Dell Inc. PowerEdge R750/0WMWCR, BIOS 1.2.4 05/28/2021
[ 8569.952917] RIP: 0010:prepare_to_wait_event+0x101/0x190
  :
[ 8569.952940] Call Trace:
[ 8569.952941]  <TASK>
[ 8569.952944]  ref_scale_reader+0x380/0x4a0 [refscale]
[ 8569.952959]  kthread+0x10e/0x130
[ 8569.952966]  ret_from_fork+0x1f/0x30
[ 8569.952973]  </TASK>

The likely cause is that init_waitqueue_head() is called after the call to
the torture_create_kthread() function that creates the ref_scale_reader
kthread.  Although this init_waitqueue_head() call will very likely
complete before this kthread is created and starts running, it is
possible that the calling kthread will be delayed between the calls to
torture_create_kthread() and init_waitqueue_head().  In this case, the
new kthread will use the waitqueue head before it is properly initialized,
which is not good for the kernel's health and well-being.

The above crash happened here:

	static inline void __add_wait_queue(...)
	{
		:
		if (!(wq->flags & WQ_FLAG_PRIORITY)) <=== Crash here

The offset of flags from list_head entry in wait_queue_entry is
-0x18. If reader_tasks[i].wq.head.next is NULL as allocated reader_task
structure is zero initialized, the instruction will try to access address
0xffffffffffffffe8, which is exactly the fault address listed above.

This commit therefore invokes init_waitqueue_head() before creating
the kthread.

Fixes: 653ed64b01dc ("refperf: Add a test to measure performance of read-side synchronization")
Signed-off-by: Waiman Long <longman@redhat.com>
Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Acked-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/refscale.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index 1970ce5f22d4..71d138573856 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -1107,12 +1107,11 @@ ref_scale_init(void)
 	VERBOSE_SCALEOUT("Starting %d reader threads", nreaders);
 
 	for (i = 0; i < nreaders; i++) {
+		init_waitqueue_head(&reader_tasks[i].wq);
 		firsterr = torture_create_kthread(ref_scale_reader, (void *)i,
 						  reader_tasks[i].task);
 		if (torture_init_error(firsterr))
 			goto unwind;
-
-		init_waitqueue_head(&(reader_tasks[i].wq));
 	}
 
 	// Main Task

From b5a2801fc05350b3b81e6fdcefddd6cfdbf8b238 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 11 Jul 2023 18:01:38 -0700
Subject: [PATCH 0198/2047] refscale: Add a "jiffies" test

This commit adds a "jiffies" test to refscale, allowing use of jiffies
to be compared to ktime_get_real_fast_ns().  On my x86 laptop, jiffies
is more than 20x faster.  (Though for many uses, the tens-of-nanoseconds
overhead of ktime_get_real_fast_ns() will be just fine.)

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/refscale.c | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index 71d138573856..91a0fd0d4d9a 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -528,6 +528,38 @@ static struct ref_scale_ops clock_ops = {
 	.name		= "clock"
 };
 
+static void ref_jiffies_section(const int nloops)
+{
+	u64 x = 0;
+	int i;
+
+	preempt_disable();
+	for (i = nloops; i >= 0; i--)
+		x += jiffies;
+	preempt_enable();
+	stopopts = x;
+}
+
+static void ref_jiffies_delay_section(const int nloops, const int udl, const int ndl)
+{
+	u64 x = 0;
+	int i;
+
+	preempt_disable();
+	for (i = nloops; i >= 0; i--) {
+		x += jiffies;
+		un_delay(udl, ndl);
+	}
+	preempt_enable();
+	stopopts = x;
+}
+
+static struct ref_scale_ops jiffies_ops = {
+	.readsection	= ref_jiffies_section,
+	.delaysection	= ref_jiffies_delay_section,
+	.name		= "jiffies"
+};
+
 ////////////////////////////////////////////////////////////////////////
 //
 // Methods leveraging SLAB_TYPESAFE_BY_RCU.
@@ -1047,7 +1079,7 @@ ref_scale_init(void)
 	int firsterr = 0;
 	static struct ref_scale_ops *scale_ops[] = {
 		&rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
-		&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
+		&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops, &jiffies_ops,
 		&typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
 	};
 

From 2226f3dc05a988ce81308bfc3d125f5d0b0d592c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 16 May 2023 07:02:01 -0700
Subject: [PATCH 0199/2047] rcuscale: Permit blocking delays between writers

Some workloads do isolated RCU work, and this can affect operation
latencies.  This commit therefore adds a writer_holdoff_jiffies module
parameter that causes writers to block for the specified number of
jiffies between each pair of consecutive write-side operations.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 5 +++++
 kernel/rcu/rcuscale.c                           | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1457995fd41..5ba231b786f8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4983,6 +4983,11 @@
 			in microseconds.  The default of zero says
 			no holdoff.
 
+	rcuscale.writer_holdoff_jiffies= [KNL]
+			Additional write-side holdoff between grace
+			periods, but in jiffies.  The default of zero
+			says no holdoff.
+
 	rcutorture.fqs_duration= [KNL]
 			Set duration of force_quiescent_state bursts
 			in microseconds.
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index d1221731c7cf..4277037e0996 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -93,6 +93,7 @@ torture_param(bool, shutdown, RCUSCALE_SHUTDOWN,
 	      "Shutdown at end of scalability tests.");
 torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
+torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable");
 torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?");
 torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
 torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?");
@@ -414,6 +415,7 @@ rcu_scale_writer(void *arg)
 	struct rcu_head *rhp = NULL;
 	bool started = false, done = false, alldone = false;
 	u64 t;
+	DEFINE_TORTURE_RANDOM(tr);
 	u64 *wdp;
 	u64 *wdpp = writer_durations[me];
 
@@ -448,6 +450,8 @@ rcu_scale_writer(void *arg)
 	do {
 		if (writer_holdoff)
 			udelay(writer_holdoff);
+		if (writer_holdoff_jiffies)
+			schedule_timeout_idle(torture_random(&tr) % writer_holdoff_jiffies + 1);
 		wdp = &wdpp[i];
 		*wdp = ktime_get_mono_fast_ns();
 		if (gp_async) {

From ee7516a16350ce2c1d45314a8c79bc93750fca8f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 16 May 2023 08:21:17 -0700
Subject: [PATCH 0200/2047] rcuscale: Fix gp_async_max typo: s/reader/writer/

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcuscale.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 4277037e0996..15edd8c82933 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -84,7 +84,7 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
 #endif
 
 torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
-torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
+torture_param(int, gp_async_max, 1000, "Max # outstanding waits per writer");
 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
 torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
 torture_param(int, nreaders, -1, "Number of RCU reader threads");

From 7221f493c5ff8bee5c39de1e43cb8a8997c2cda5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 16 May 2023 08:22:31 -0700
Subject: [PATCH 0201/2047] rcuscale: Add minruntime module parameter

By default, rcuscale collects only 100 points of data per writer, but
arranging for all kthreads to be actively collecting (if not recording)
data during the time that any kthread might be recording.  This works
well, but does not allow much time to bring external performance tools
to bear.  This commit therefore adds a minruntime module parameter
that specifies a minimum data-collection interval in seconds.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 6 ++++++
 kernel/rcu/rcuscale.c                           | 5 ++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 5ba231b786f8..a6888e3dfc20 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4953,6 +4953,12 @@
 			Number of loops doing rcuscale.kfree_alloc_num number
 			of allocations and frees.
 
+	rcuscale.minruntime= [KNL]
+			Set the minimum test run time in seconds.  This
+			does not affect the data-collection interval,
+			but instead allows better measurement of things
+			like CPU consumption.
+
 	rcuscale.nreaders= [KNL]
 			Set number of RCU readers.  The value -1 selects
 			N, where N is the number of CPUs.  A value
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 15edd8c82933..7c5bab5a4f19 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -87,6 +87,7 @@ torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
 torture_param(int, gp_async_max, 1000, "Max # outstanding waits per writer");
 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
 torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
+torture_param(int, minruntime, 0, "Minimum run time (s)");
 torture_param(int, nreaders, -1, "Number of RCU reader threads");
 torture_param(int, nwriters, -1, "Number of RCU updater threads");
 torture_param(bool, shutdown, RCUSCALE_SHUTDOWN,
@@ -411,6 +412,7 @@ rcu_scale_writer(void *arg)
 {
 	int i = 0;
 	int i_max;
+	unsigned long jdone;
 	long me = (long)arg;
 	struct rcu_head *rhp = NULL;
 	bool started = false, done = false, alldone = false;
@@ -447,6 +449,7 @@ rcu_scale_writer(void *arg)
 		}
 	}
 
+	jdone = jiffies + minruntime * HZ;
 	do {
 		if (writer_holdoff)
 			udelay(writer_holdoff);
@@ -479,7 +482,7 @@ retry:
 		if (!started &&
 		    atomic_read(&n_rcu_scale_writer_started) >= nrealwriters)
 			started = true;
-		if (!done && i >= MIN_MEAS) {
+		if (!done && i >= MIN_MEAS && time_after(jiffies, jdone)) {
 			done = true;
 			sched_set_normal(current, 0);
 			pr_alert("%s%s rcu_scale_writer %ld has %d measurements\n",

From c68465dfaac3a36e2a72030f834811e651da8097 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 16 May 2023 13:45:16 -0700
Subject: [PATCH 0202/2047] rcuscale: Print out full set of module parameters

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcuscale.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 7c5bab5a4f19..18f1b0e13604 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -525,8 +525,8 @@ static void
 rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag)
 {
 	pr_alert("%s" SCALE_FLAG
-		 "--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n",
-		 scale_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
+		 "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown=%d\n",
+		 scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown);
 }
 
 /*

From bb7bad3dae42b669cfe471716f0073ac611e7f75 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 16 May 2023 16:04:52 -0700
Subject: [PATCH 0203/2047] rcuscale: Print out full set of kfree_rcu
 parameters

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 kernel/rcu/rcuscale.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 18f1b0e13604..821a3e65c54a 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -708,6 +708,10 @@ kfree_scale_init(void)
 	unsigned long jif_start;
 	unsigned long orig_jif;
 
+	pr_alert("%s" SCALE_FLAG
+		 "--- kfree_rcu_test: kfree_mult=%d kfree_by_call_rcu=%d kfree_nthreads=%d kfree_alloc_num=%d kfree_loops=%d kfree_rcu_test_double=%d kfree_rcu_test_single=%d\n",
+		 scale_type, kfree_mult, kfree_by_call_rcu, kfree_nthreads, kfree_alloc_num, kfree_loops, kfree_rcu_test_double, kfree_rcu_test_single);
+
 	// Also, do a quick self-test to ensure laziness is as much as
 	// expected.
 	if (kfree_by_call_rcu && !IS_ENABLED(CONFIG_RCU_LAZY)) {

From 5f8e3202696f5e94d3fc54cfed75d270ae843ee9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 17 May 2023 05:01:28 -0700
Subject: [PATCH 0204/2047] rcuscale: Measure grace-period kthread CPU time

This commit adds the ability to output the CPU time consumed by the
grace-period kthread for the RCU variant under test.  The CPU time is
whatever is in the designated task's current->stime field, and thus is
controlled by whatever CPU-time accounting scheme is in effect.

This output appears in microseconds as follows on the console:

rcu_scale: Grace-period kthread CPU time: 42367.037

[ paulmck: Apply feedback from Stephen Rothwell and kernel test robot. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: Yujie Liu <yujie.liu@intel.com>
---
 include/linux/rcupdate_trace.h |  1 +
 kernel/rcu/rcuscale.c          | 21 +++++++++++++++++++++
 kernel/rcu/tasks.h             |  6 ++++++
 3 files changed, 28 insertions(+)

diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index 9bc8cbb33340..eda493200663 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -87,6 +87,7 @@ static inline void rcu_read_unlock_trace(void)
 void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
 void synchronize_rcu_tasks_trace(void);
 void rcu_barrier_tasks_trace(void);
+struct task_struct *get_rcu_tasks_trace_gp_kthread(void);
 #else
 /*
  * The BPF JIT forms these addresses even when it doesn't call these
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 821a3e65c54a..7fba3ab66e35 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -141,6 +141,7 @@ struct rcu_scale_ops {
 	void (*gp_barrier)(void);
 	void (*sync)(void);
 	void (*exp_sync)(void);
+	struct task_struct *(*rso_gp_kthread)(void);
 	const char *name;
 };
 
@@ -336,6 +337,7 @@ static struct rcu_scale_ops tasks_tracing_ops = {
 	.gp_barrier	= rcu_barrier_tasks_trace,
 	.sync		= synchronize_rcu_tasks_trace,
 	.exp_sync	= synchronize_rcu_tasks_trace,
+	.rso_gp_kthread	= get_rcu_tasks_trace_gp_kthread,
 	.name		= "tasks-tracing"
 };
 
@@ -563,6 +565,8 @@ static struct task_struct **kfree_reader_tasks;
 static int kfree_nrealthreads;
 static atomic_t n_kfree_scale_thread_started;
 static atomic_t n_kfree_scale_thread_ended;
+static struct task_struct *kthread_tp;
+static u64 kthread_stime;
 
 struct kfree_obj {
 	char kfree_obj[8];
@@ -808,6 +812,18 @@ rcu_scale_cleanup(void)
 	if (gp_exp && gp_async)
 		SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
 
+	// If built-in, just report all of the GP kthread's CPU time.
+	if (IS_BUILTIN(CONFIG_RCU_SCALE_TEST) && !kthread_tp && cur_ops->rso_gp_kthread)
+		kthread_tp = cur_ops->rso_gp_kthread();
+	if (kthread_tp) {
+		u32 ns;
+		u64 us;
+
+		kthread_stime = kthread_tp->stime - kthread_stime;
+		us = div_u64_rem(kthread_stime, 1000, &ns);
+		pr_info("rcu_scale: Grace-period kthread CPU time: %llu.%03u us\n", us, ns);
+		show_rcu_gp_kthreads();
+	}
 	if (kfree_rcu_test) {
 		kfree_scale_cleanup();
 		return;
@@ -921,6 +937,11 @@ rcu_scale_init(void)
 	if (cur_ops->init)
 		cur_ops->init();
 
+	if (cur_ops->rso_gp_kthread) {
+		kthread_tp = cur_ops->rso_gp_kthread();
+		if (kthread_tp)
+			kthread_stime = kthread_tp->stime;
+	}
 	if (kfree_rcu_test)
 		return kfree_scale_init();
 
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index b770add3f843..990a6cf5fa35 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1830,6 +1830,12 @@ void show_rcu_tasks_trace_gp_kthread(void)
 EXPORT_SYMBOL_GPL(show_rcu_tasks_trace_gp_kthread);
 #endif // !defined(CONFIG_TINY_RCU)
 
+struct task_struct *get_rcu_tasks_trace_gp_kthread(void)
+{
+	return rcu_tasks_trace.kthread_ptr;
+}
+EXPORT_SYMBOL_GPL(get_rcu_tasks_trace_gp_kthread);
+
 #else /* #ifdef CONFIG_TASKS_TRACE_RCU */
 static void exit_tasks_rcu_finish_trace(struct task_struct *t) { }
 #endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */

From 2d7b2b344c0c665d47a3a9b540e03b834852b679 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 1 Jun 2023 09:38:50 -0700
Subject: [PATCH 0205/2047] rcuscale: Add kfree_by_call_rcu and kfree_mult to
 documentation

This commit adds the kfree_by_call_rcu and kfree_mult rcuscale module
parameters to kernel-parameters.txt.  While in the area, it updates
to rcuscale.scale_type.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a6888e3dfc20..3329dc22f53b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4928,6 +4928,15 @@
 			test until boot completes in order to avoid
 			interference.
 
+	rcuscale.kfree_by_call_rcu= [KNL]
+			In kernels built with CONFIG_RCU_LAZY=y, test
+			call_rcu() instead of kfree_rcu().
+
+	rcuscale.kfree_mult= [KNL]
+			Instead of allocating an object of size kfree_obj,
+			allocate one of kfree_mult * sizeof(kfree_obj).
+			Defaults to 1.
+
 	rcuscale.kfree_rcu_test= [KNL]
 			Set to measure performance of kfree_rcu() flooding.
 
@@ -4973,7 +4982,7 @@
 			the same as for rcuscale.nreaders.
 			N, where N is the number of CPUs
 
-	rcuscale.perf_type= [KNL]
+	rcuscale.scale_type= [KNL]
 			Specify the RCU implementation to test.
 
 	rcuscale.shutdown= [KNL]

From 1d702e2eff8be1f39a1774f8a0f66f077b663b27 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 2 Jun 2023 14:01:44 -0700
Subject: [PATCH 0206/2047] rcuscale: Print grace-period kthread CPU time, if
 recorded

This commit prints out the CPU time consumed by the grace-period kthread,
if the specified RCU flavor supports this notion.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/kvm-recheck-rcuscale.sh      | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
index b582113178ac..f683e424ddd5 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
@@ -40,6 +40,10 @@ awk '
 	sum += $5 / 1000.;
 }
 
+/rcu_scale: Grace-period kthread CPU time/ {
+	cputime = $6;
+}
+
 END {
 	newNR = asort(gptimes);
 	if (newNR <= 0) {
@@ -78,6 +82,8 @@ END {
 	print "90th percentile grace-period duration: " gptimes[pct90];
 	print "99th percentile grace-period duration: " gptimes[pct99];
 	print "Maximum grace-period duration: " gptimes[newNR];
-	print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
+	if (cputime != "")
+		cpustr = " CPU: " cputime;
+	print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches cpustr;
 	print "Computed from rcuscale printk output.";
 }'

From 271a8467a5f7ab7654f06541d2483b5340bb7192 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 2 Jun 2023 14:38:44 -0700
Subject: [PATCH 0207/2047] rcuscale: Measure RCU Tasks Trace grace-period
 kthread CPU time

This commit causes RCU Tasks Trace to output the CPU time consumed by
its grace-period kthread.  The CPU time is whatever is in the designated
task's current->stime field, and thus is controlled by whatever CPU-time
accounting scheme is in effect.

This output appears in microseconds as follows on the console:

rcu_scale: Grace-period kthread CPU time: 42367.037

[ paulmck: Apply Willy Tarreau feedback. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcu.h      | 3 +++
 kernel/rcu/rcuscale.c | 1 +
 kernel/rcu/tasks.h    | 6 ++++++
 3 files changed, 10 insertions(+)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 98c1544cf572..d58bc0e86769 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -505,6 +505,9 @@ void rcu_async_relax(void);
 void rcupdate_announce_bootup_oddness(void);
 #ifdef CONFIG_TASKS_RCU_GENERIC
 void show_rcu_tasks_gp_kthreads(void);
+# ifdef CONFIG_TASKS_RCU
+struct task_struct *get_rcu_tasks_gp_kthread(void);
+# endif // # ifdef CONFIG_TASKS_RCU
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
 static inline void show_rcu_tasks_gp_kthreads(void) {}
 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 7fba3ab66e35..35e06c86acc9 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -298,6 +298,7 @@ static struct rcu_scale_ops tasks_ops = {
 	.gp_barrier	= rcu_barrier_tasks,
 	.sync		= synchronize_rcu_tasks,
 	.exp_sync	= synchronize_rcu_tasks,
+	.rso_gp_kthread	= get_rcu_tasks_gp_kthread,
 	.name		= "tasks"
 };
 
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 990a6cf5fa35..0f03de023097 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1042,6 +1042,12 @@ void show_rcu_tasks_classic_gp_kthread(void)
 EXPORT_SYMBOL_GPL(show_rcu_tasks_classic_gp_kthread);
 #endif // !defined(CONFIG_TINY_RCU)
 
+struct task_struct *get_rcu_tasks_gp_kthread(void)
+{
+	return rcu_tasks.kthread_ptr;
+}
+EXPORT_SYMBOL_GPL(get_rcu_tasks_gp_kthread);
+
 /*
  * Contribute to protect against tasklist scan blind spot while the
  * task is exiting and may be removed from the tasklist. See

From a15ec57cfcf83858aced058e00a7feb824322fa4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 3 Jun 2023 09:07:44 -0700
Subject: [PATCH 0208/2047] rcuscale: Add RCU Tasks Rude testing

Add a "tasks-rude" option to the rcuscale.scale_type module parameter.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcu.h                              |  3 ++
 kernel/rcu/rcuscale.c                         | 31 ++++++++++++++++++-
 kernel/rcu/tasks.h                            |  7 +++++
 .../rcutorture/configs/rcuscale/CFcommon      |  2 ++
 4 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index d58bc0e86769..9829d8161b21 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -508,6 +508,9 @@ void show_rcu_tasks_gp_kthreads(void);
 # ifdef CONFIG_TASKS_RCU
 struct task_struct *get_rcu_tasks_gp_kthread(void);
 # endif // # ifdef CONFIG_TASKS_RCU
+# ifdef CONFIG_TASKS_RUDE_RCU
+struct task_struct *get_rcu_tasks_rude_gp_kthread(void);
+# endif // # ifdef CONFIG_TASKS_RUDE_RCU
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
 static inline void show_rcu_tasks_gp_kthreads(void) {}
 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 35e06c86acc9..5ce3b4e7ce71 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -310,6 +310,35 @@ static struct rcu_scale_ops tasks_ops = {
 
 #endif // #else // #ifdef CONFIG_TASKS_RCU
 
+#ifdef CONFIG_TASKS_RUDE_RCU
+
+/*
+ * Definitions for RCU-tasks-rude scalability testing.
+ */
+
+static struct rcu_scale_ops tasks_rude_ops = {
+	.ptype		= RCU_TASKS_RUDE_FLAVOR,
+	.init		= rcu_sync_scale_init,
+	.readlock	= tasks_scale_read_lock,
+	.readunlock	= tasks_scale_read_unlock,
+	.get_gp_seq	= rcu_no_completed,
+	.gp_diff	= rcu_seq_diff,
+	.async		= call_rcu_tasks_rude,
+	.gp_barrier	= rcu_barrier_tasks_rude,
+	.sync		= synchronize_rcu_tasks_rude,
+	.exp_sync	= synchronize_rcu_tasks_rude,
+	.rso_gp_kthread	= get_rcu_tasks_rude_gp_kthread,
+	.name		= "tasks-rude"
+};
+
+#define TASKS_RUDE_OPS &tasks_rude_ops,
+
+#else // #ifdef CONFIG_TASKS_RUDE_RCU
+
+#define TASKS_RUDE_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_RUDE_RCU
+
 #ifdef CONFIG_TASKS_TRACE_RCU
 
 /*
@@ -913,7 +942,7 @@ rcu_scale_init(void)
 	long i;
 	int firsterr = 0;
 	static struct rcu_scale_ops *scale_ops[] = {
-		&rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_TRACING_OPS
+		&rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
 	};
 
 	if (!torture_init_begin(scale_type, verbose))
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 0f03de023097..0372c5cbc83b 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1194,6 +1194,13 @@ void show_rcu_tasks_rude_gp_kthread(void)
 }
 EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread);
 #endif // !defined(CONFIG_TINY_RCU)
+
+struct task_struct *get_rcu_tasks_rude_gp_kthread(void)
+{
+	return rcu_tasks_rude.kthread_ptr;
+}
+EXPORT_SYMBOL_GPL(get_rcu_tasks_rude_gp_kthread);
+
 #endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
 
 ////////////////////////////////////////////////////////////////////////
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
index 6a00157bee5b..b1ffd7c67604 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
@@ -2,5 +2,7 @@ CONFIG_RCU_SCALE_TEST=y
 CONFIG_PRINTK_TIME=y
 CONFIG_FORCE_TASKS_RCU=y
 #CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_RUDE_RCU=y
+#CHECK#CONFIG_TASKS_RUDE_RCU=y
 CONFIG_FORCE_TASKS_TRACE_RCU=y
 #CHECK#CONFIG_TASKS_TRACE_RCU=y

From e0a34641eb551e3c51e1588ef7874f2de2844a06 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Jun 2023 14:05:14 +0200
Subject: [PATCH 0209/2047] rcuscale: fix building with RCU_TINY

Both the CONFIG_TASKS_RCU and CONFIG_TASKS_RUDE_RCU options
are broken when RCU_TINY is enabled as well, as some functions
are missing a declaration.

In file included from kernel/rcu/update.c:649:
kernel/rcu/tasks.h:1271:21: error: no previous prototype for 'get_rcu_tasks_rude_gp_kthread' [-Werror=missing-prototypes]
 1271 | struct task_struct *get_rcu_tasks_rude_gp_kthread(void)
      |                     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
kernel/rcu/rcuscale.c:330:27: error: 'get_rcu_tasks_rude_gp_kthread' undeclared here (not in a function); did you mean 'get_rcu_tasks_trace_gp_kthread'?
  330 |         .rso_gp_kthread = get_rcu_tasks_rude_gp_kthread,
      |                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      |                           get_rcu_tasks_trace_gp_kthread

In file included from /home/arnd/arm-soc/kernel/rcu/update.c:649:
kernel/rcu/tasks.h:1113:21: error: no previous prototype for 'get_rcu_tasks_gp_kthread' [-Werror=missing-prototypes]
 1113 | struct task_struct *get_rcu_tasks_gp_kthread(void)
      |                     ^~~~~~~~~~~~~~~~~~~~~~~~

Also, building with CONFIG_TASKS_RUDE_RCU but not CONFIG_TASKS_RCU is
broken because of some missing stub functions:

kernel/rcu/rcuscale.c:322:27: error: 'tasks_scale_read_lock' undeclared here (not in a function); did you mean 'srcu_scale_read_lock'?
  322 |         .readlock       = tasks_scale_read_lock,
      |                           ^~~~~~~~~~~~~~~~~~~~~
      |                           srcu_scale_read_lock
kernel/rcu/rcuscale.c:323:27: error: 'tasks_scale_read_unlock' undeclared here (not in a function); did you mean 'srcu_scale_read_unlock'?
  323 |         .readunlock     = tasks_scale_read_unlock,
      |                           ^~~~~~~~~~~~~~~~~~~~~~~
      |                           srcu_scale_read_unlock

Move the declarations outside of the RCU_TINY #ifdef and duplicate the
shared stub functions to address all of the above.

Fixes: 88d7ff818f0ce ("rcuscale: Add RCU Tasks Rude testing")
Fixes: 755f1c5eb416b ("rcuscale: Measure RCU Tasks Trace grace-period kthread CPU time")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcu.h      | 14 ++++++++------
 kernel/rcu/rcuscale.c | 13 +++++++++++--
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 9829d8161b21..5befd8780dcd 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -505,18 +505,20 @@ void rcu_async_relax(void);
 void rcupdate_announce_bootup_oddness(void);
 #ifdef CONFIG_TASKS_RCU_GENERIC
 void show_rcu_tasks_gp_kthreads(void);
-# ifdef CONFIG_TASKS_RCU
-struct task_struct *get_rcu_tasks_gp_kthread(void);
-# endif // # ifdef CONFIG_TASKS_RCU
-# ifdef CONFIG_TASKS_RUDE_RCU
-struct task_struct *get_rcu_tasks_rude_gp_kthread(void);
-# endif // # ifdef CONFIG_TASKS_RUDE_RCU
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
 static inline void show_rcu_tasks_gp_kthreads(void) {}
 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
 void rcu_request_urgent_qs_task(struct task_struct *t);
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 
+#ifdef CONFIG_TASKS_RCU
+struct task_struct *get_rcu_tasks_gp_kthread(void);
+#endif // # ifdef CONFIG_TASKS_RCU
+
+#ifdef CONFIG_TASKS_RUDE_RCU
+struct task_struct *get_rcu_tasks_rude_gp_kthread(void);
+#endif // # ifdef CONFIG_TASKS_RUDE_RCU
+
 #define RCU_SCHEDULER_INACTIVE	0
 #define RCU_SCHEDULER_INIT	1
 #define RCU_SCHEDULER_RUNNING	2
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 5ce3b4e7ce71..a0eae1900708 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -316,11 +316,20 @@ static struct rcu_scale_ops tasks_ops = {
  * Definitions for RCU-tasks-rude scalability testing.
  */
 
+static int tasks_rude_scale_read_lock(void)
+{
+	return 0;
+}
+
+static void tasks_rude_scale_read_unlock(int idx)
+{
+}
+
 static struct rcu_scale_ops tasks_rude_ops = {
 	.ptype		= RCU_TASKS_RUDE_FLAVOR,
 	.init		= rcu_sync_scale_init,
-	.readlock	= tasks_scale_read_lock,
-	.readunlock	= tasks_scale_read_unlock,
+	.readlock	= tasks_rude_scale_read_lock,
+	.readunlock	= tasks_rude_scale_read_unlock,
 	.get_gp_seq	= rcu_no_completed,
 	.gp_diff	= rcu_seq_diff,
 	.async		= call_rcu_tasks_rude,

From e60c122a1614b4f65b29a7bef9d83b9fd30e937a Mon Sep 17 00:00:00 2001
From: Zqiang <qiang.zhang1211@gmail.com>
Date: Fri, 16 Jun 2023 15:39:26 +0800
Subject: [PATCH 0210/2047] rcuscale: Move rcu_scale_writer()
 schedule_timeout_uninterruptible() to _idle()

The rcuscale.holdoff module parameter can be used to delay the start
of rcu_scale_writer() kthread.  However, the hung-task timeout will
trigger when the timeout specified by rcuscale.holdoff is greater than
hung_task_timeout_secs:

runqemu kvm nographic slirp qemuparams="-smp 4 -m 2048M"
bootparams="rcuscale.shutdown=0 rcuscale.holdoff=300"

[  247.071753] INFO: task rcu_scale_write:59 blocked for more than 122 seconds.
[  247.072529]       Not tainted 6.4.0-rc1-00134-gb9ed6de8d4ff #7
[  247.073400] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  247.074331] task:rcu_scale_write state:D stack:30144 pid:59    ppid:2      flags:0x00004000
[  247.075346] Call Trace:
[  247.075660]  <TASK>
[  247.075965]  __schedule+0x635/0x1280
[  247.076448]  ? __pfx___schedule+0x10/0x10
[  247.076967]  ? schedule_timeout+0x2dc/0x4d0
[  247.077471]  ? __pfx_lock_release+0x10/0x10
[  247.078018]  ? enqueue_timer+0xe2/0x220
[  247.078522]  schedule+0x84/0x120
[  247.078957]  schedule_timeout+0x2e1/0x4d0
[  247.079447]  ? __pfx_schedule_timeout+0x10/0x10
[  247.080032]  ? __pfx_rcu_scale_writer+0x10/0x10
[  247.080591]  ? __pfx_process_timeout+0x10/0x10
[  247.081163]  ? __pfx_sched_set_fifo_low+0x10/0x10
[  247.081760]  ? __pfx_rcu_scale_writer+0x10/0x10
[  247.082287]  rcu_scale_writer+0x6b1/0x7f0
[  247.082773]  ? mark_held_locks+0x29/0xa0
[  247.083252]  ? __pfx_rcu_scale_writer+0x10/0x10
[  247.083865]  ? __pfx_rcu_scale_writer+0x10/0x10
[  247.084412]  kthread+0x179/0x1c0
[  247.084759]  ? __pfx_kthread+0x10/0x10
[  247.085098]  ret_from_fork+0x2c/0x50
[  247.085433]  </TASK>

This commit therefore replaces schedule_timeout_uninterruptible() with
schedule_timeout_idle().

Signed-off-by: Zqiang <qiang.zhang1211@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcuscale.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index a0eae1900708..ffdb30495e3c 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -469,7 +469,7 @@ rcu_scale_writer(void *arg)
 	sched_set_fifo_low(current);
 
 	if (holdoff)
-		schedule_timeout_uninterruptible(holdoff * HZ);
+		schedule_timeout_idle(holdoff * HZ);
 
 	/*
 	 * Wait until rcu_end_inkernel_boot() is called for normal GP tests

From 8afeb54190e80cb4d1c9583b06dc16d15198dce0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 6 Jul 2023 16:17:27 -0700
Subject: [PATCH 0211/2047] rcuscale: Add CONFIG_PREEMPT_DYNAMIC=n to TRACE01
 scenario

It is no longer possible to build a kernel with a preemption-disabled
RCU without use of CONFIG_PREEMPT_DYNAMIC=n.  This commit therefore
adds CONFIG_PREEMPT_DYNAMIC=n to the rcuscale torture type's TRACE01
scenario file.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
index 227aba7783af..0059592c7408 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
@@ -2,6 +2,8 @@ CONFIG_SMP=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_TREE_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n

From 822e42509997cfc8582ab1108eef1a280524b26a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 16 May 2023 06:18:32 -0700
Subject: [PATCH 0212/2047] torture: Scale scftorture memory based on number of
 CPUs

As the number of CPUs increases, the number of outstanding no-wait
smp_call_function() handlers also increases, so that the default of
2G of memory is not always sufficient on 80-CPU systems.  This commit
therefore scales the amount of memory specified to qemu based on the
number of CPUs specified to the scftorture test instance.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 5a2ae2264403..9fa6526067f0 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -376,8 +376,10 @@ fi
 
 if test "$do_scftorture" = "yes"
 then
+	# Scale memory based on the number of CPUs.
+	scfmem=$((2+HALF_ALLOTED_CPUS/16))
 	torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
-	torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
+	torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory ${scfmem}G --trust-make
 fi
 
 if test "$do_rt" = "yes"

From 013608cd0812bdb21fc26d39ed8fdd2fc76e8b9b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 15 May 2023 19:00:10 -0700
Subject: [PATCH 0213/2047] scftorture: Forgive memory-allocation failure if
 KASAN

Kernels built with CONFIG_KASAN=y quarantine newly freed memory in order
to better detect use-after-free errors.  However, this can exhaust memory
more quickly in allocator-heavy tests, which can result in spurious
scftorture failure.  This commit therefore forgives memory-allocation
failure in kernels built with CONFIG_KASAN=y, but continues counting
the errors for use in detailed test-result analyses.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/scftorture.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/scftorture.c b/kernel/scftorture.c
index 5d113aa59e77..83c33ba0ca7e 100644
--- a/kernel/scftorture.c
+++ b/kernel/scftorture.c
@@ -171,7 +171,8 @@ static void scf_torture_stats_print(void)
 		scfs.n_all_wait += scf_stats_p[i].n_all_wait;
 	}
 	if (atomic_read(&n_errs) || atomic_read(&n_mb_in_errs) ||
-	    atomic_read(&n_mb_out_errs) || atomic_read(&n_alloc_errs))
+	    atomic_read(&n_mb_out_errs) ||
+	    (!IS_ENABLED(CONFIG_KASAN) && atomic_read(&n_alloc_errs)))
 		bangstr = "!!! ";
 	pr_alert("%s %sscf_invoked_count %s: %lld resched: %lld single: %lld/%lld single_ofl: %lld/%lld single_rpc: %lld single_rpc_ofl: %lld many: %lld/%lld all: %lld/%lld ",
 		 SCFTORT_FLAG, bangstr, isdone ? "VER" : "ver", invoked_count, scfs.n_resched,
@@ -323,7 +324,8 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra
 		preempt_disable();
 	if (scfsp->scfs_prim == SCF_PRIM_SINGLE || scfsp->scfs_wait) {
 		scfcp = kmalloc(sizeof(*scfcp), GFP_ATOMIC);
-		if (WARN_ON_ONCE(!scfcp)) {
+		if (!scfcp) {
+			WARN_ON_ONCE(!IS_ENABLED(CONFIG_KASAN));
 			atomic_inc(&n_alloc_errs);
 		} else {
 			scfcp->scfc_cpu = -1;

From 4a71be93876171e2d30e413abd33126d8c526791 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 16 May 2023 06:21:02 -0700
Subject: [PATCH 0214/2047] scftorture: Pause testing after memory-allocation
 failure

The scftorture test can quickly execute a large number of calls to no-wait
smp_call_function(), each of which holds a block of memory until the
corresponding handler is invoked.  Especially when the longwait module
parameter is specified, this can chew up an arbitrarily large amount
of memory.  This commit therefore blocks after each memory-allocation
failure, with the duration a function of longwait.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/scftorture.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/scftorture.c b/kernel/scftorture.c
index 83c33ba0ca7e..59032aaccd18 100644
--- a/kernel/scftorture.c
+++ b/kernel/scftorture.c
@@ -313,6 +313,7 @@ static void scf_handler_1(void *scfc_in)
 // Randomly do an smp_call_function*() invocation.
 static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_random_state *trsp)
 {
+	bool allocfail = false;
 	uintptr_t cpu;
 	int ret = 0;
 	struct scf_check *scfcp = NULL;
@@ -327,6 +328,7 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra
 		if (!scfcp) {
 			WARN_ON_ONCE(!IS_ENABLED(CONFIG_KASAN));
 			atomic_inc(&n_alloc_errs);
+			allocfail = true;
 		} else {
 			scfcp->scfc_cpu = -1;
 			scfcp->scfc_wait = scfsp->scfs_wait;
@@ -433,7 +435,9 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra
 		cpus_read_unlock();
 	else
 		preempt_enable();
-	if (!(torture_random(trsp) & 0xfff))
+	if (allocfail)
+		schedule_timeout_idle((1 + longwait) * HZ);  // Let no-wait handlers complete.
+	else if (!(torture_random(trsp) & 0xfff))
 		schedule_timeout_uninterruptible(1);
 }
 

From 3f68f9c822ebe208f961ab0402e49a10465278ca Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 6 Jul 2023 17:44:15 -0700
Subject: [PATCH 0215/2047] scftorture: Add CONFIG_PREEMPT_DYNAMIC=n to
 NOPREEMPT scenario

It is no longer possible to build a kernel with a preemption-disabled
RCU without use of CONFIG_PREEMPT_DYNAMIC=n.  This commit therefore
adds CONFIG_PREEMPT_DYNAMIC=n to the scf torture type's NOPREEMPT
scenario file.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
index 3a59346b3de7..6133f54ce2a7 100644
--- a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
@@ -2,6 +2,8 @@ CONFIG_SMP=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_PREEMPT_RCU=n
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y

From 9cafe974cf46425c18cf78971400ae12e416e166 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 15 May 2023 09:18:20 -0700
Subject: [PATCH 0216/2047] rcutorture: Dump grace-period state upon
 rtort_pipe_count incidents

The rtort_pipe_count WARN() indicates that grace periods were unable
to invoke all callbacks during a stutter_wait() interval.  But it is
sometimes helpful to have a bit more information as to why.  This commit
therefore invokes show_rcu_gp_kthreads() immediately before that WARN()
in order to dump out some relevant information.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcutorture.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 147551c23baf..d291a1438c30 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1581,6 +1581,7 @@ rcu_torture_writer(void *arg)
 				    rcu_access_pointer(rcu_torture_current) !=
 				    &rcu_tortures[i]) {
 					tracing_off();
+					show_rcu_gp_kthreads();
 					WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
 					rcu_ftrace_dump(DUMP_ALL);
 				}

From e2a0b786c50cf4f0c3abfcd05888ae583c98cf25 Mon Sep 17 00:00:00 2001
From: Connor O'Brien <connoro@google.com>
Date: Fri, 2 Jun 2023 22:02:09 +0000
Subject: [PATCH 0217/2047] torture: Support randomized shuffling for proxy
 exec testing

Currently shuffling sets the same cpu affinities for all tasks,
which makes us less likely to hit paths involving migrating
blocked tasks onto a cpu where they can't run.

This patch adds an element of randomness to allow affinities of
different writer tasks to diverge.

This has helped uncover issues in testing with Proxy Execution

Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: kernel-team@android.com
Signed-off-by: Connor O'Brien <connoro@google.com>
Signed-off-by: John Stultz <jstultz@google.com>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/torture.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/torture.c b/kernel/torture.c
index 1a0519b836ac..8be83fdc6be1 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -54,6 +54,9 @@ module_param(verbose_sleep_frequency, int, 0444);
 static int verbose_sleep_duration = 1;
 module_param(verbose_sleep_duration, int, 0444);
 
+static int random_shuffle;
+module_param(random_shuffle, int, 0444);
+
 static char *torture_type;
 static int verbose;
 
@@ -518,6 +521,7 @@ static void torture_shuffle_task_unregister_all(void)
  */
 static void torture_shuffle_tasks(void)
 {
+	DEFINE_TORTURE_RANDOM(rand);
 	struct shuffle_task *stp;
 
 	cpumask_setall(shuffle_tmp_mask);
@@ -537,8 +541,10 @@ static void torture_shuffle_tasks(void)
 		cpumask_clear_cpu(shuffle_idle_cpu, shuffle_tmp_mask);
 
 	mutex_lock(&shuffle_task_mutex);
-	list_for_each_entry(stp, &shuffle_task_list, st_l)
-		set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask);
+	list_for_each_entry(stp, &shuffle_task_list, st_l) {
+		if (!random_shuffle || torture_random(&rand) & 0x1)
+			set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask);
+	}
 	mutex_unlock(&shuffle_task_mutex);
 
 	cpus_read_unlock();

From 965167e8e6c9877390bb9574f88da21dc42f03eb Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 21 Jun 2023 09:44:52 -0700
Subject: [PATCH 0218/2047] rcutorture: Remove obsolete BOOTPARAM_HOTPLUG_CPU0
 Kconfig option

Now that the BOOTPARAM_HOTPLUG_CPU0 Kconfig option is in the process of
being removed, it is time to remove rcutorture's use of it.

Link: https://lore.kernel.org/lkml/20230414232309.510911744@linutronix.de/
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <x86@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/TREE01 | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index 04831ef1f9b5..8ae41d5f81a3 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -15,4 +15,3 @@ CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_BOOTPARAM_HOTPLUG_CPU0=y

From 1304affd35739d83b4089a297d39ab08f1624158 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 11 May 2023 19:12:16 -0700
Subject: [PATCH 0219/2047] rcu: Remove formal-verification tests

The CBMC-based formal-verification testing for SRCU was quite the thing
back in 2016, but the problem is that SRCU changes too quickly for the
scripting to keep up.  In addition, more recently, SRCU's grace-period
ordering has been formally modeled by a group of Linux-kernel memory-model
litmus tests.

This commit therefore removes the pioneering formal-verification tests.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../rcutorture/formal/srcu-cbmc/.gitignore    |   2 -
 .../rcutorture/formal/srcu-cbmc/Makefile      |  17 -
 .../srcu-cbmc/empty_includes/linux/delay.h    |   0
 .../srcu-cbmc/empty_includes/linux/export.h   |   0
 .../srcu-cbmc/empty_includes/linux/mutex.h    |   0
 .../srcu-cbmc/empty_includes/linux/percpu.h   |   0
 .../srcu-cbmc/empty_includes/linux/preempt.h  |   0
 .../srcu-cbmc/empty_includes/linux/rcupdate.h |   0
 .../srcu-cbmc/empty_includes/linux/sched.h    |   0
 .../srcu-cbmc/empty_includes/linux/smp.h      |   0
 .../empty_includes/linux/workqueue.h          |   0
 .../empty_includes/uapi/linux/types.h         |   0
 .../formal/srcu-cbmc/include/linux/.gitignore |   2 -
 .../formal/srcu-cbmc/include/linux/kconfig.h  |   1 -
 .../formal/srcu-cbmc/include/linux/types.h    | 152 -------
 .../formal/srcu-cbmc/modify_srcu.awk          | 376 ------------------
 .../rcutorture/formal/srcu-cbmc/src/assume.h  |  17 -
 .../formal/srcu-cbmc/src/barriers.h           |  41 --
 .../rcutorture/formal/srcu-cbmc/src/bug_on.h  |  14 -
 .../formal/srcu-cbmc/src/combined_source.c    |  14 -
 .../rcutorture/formal/srcu-cbmc/src/config.h  |  28 --
 .../formal/srcu-cbmc/src/include_srcu.c       |  32 --
 .../formal/srcu-cbmc/src/int_typedefs.h       |  34 --
 .../rcutorture/formal/srcu-cbmc/src/locks.h   | 221 ----------
 .../rcutorture/formal/srcu-cbmc/src/misc.c    |  12 -
 .../rcutorture/formal/srcu-cbmc/src/misc.h    |  58 ---
 .../rcutorture/formal/srcu-cbmc/src/percpu.h  |  93 -----
 .../rcutorture/formal/srcu-cbmc/src/preempt.c |  79 ----
 .../rcutorture/formal/srcu-cbmc/src/preempt.h |  59 ---
 .../formal/srcu-cbmc/src/simple_sync_srcu.c   |  51 ---
 .../formal/srcu-cbmc/src/workqueues.h         | 103 -----
 .../tests/store_buffering/.gitignore          |   2 -
 .../srcu-cbmc/tests/store_buffering/Makefile  |  12 -
 .../tests/store_buffering/assert_end.fail     |   1 -
 .../tests/store_buffering/force.fail          |   1 -
 .../tests/store_buffering/force2.fail         |   1 -
 .../tests/store_buffering/force3.fail         |   1 -
 .../srcu-cbmc/tests/store_buffering/main.pass |   0
 .../srcu-cbmc/tests/store_buffering/test.c    |  73 ----
 .../formal/srcu-cbmc/tests/test_script.sh     | 103 -----
 40 files changed, 1600 deletions(-)
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
 delete mode 100755 tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
 delete mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
 delete mode 100755 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh

diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
deleted file mode 100644
index 24e27957efcc..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-srcu.c
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
deleted file mode 100644
index 4bed0b678f8b..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-all: srcu.c store_buffering
-
-LINUX_SOURCE = ../../../../../..
-
-modified_srcu_input = $(LINUX_SOURCE)/include/linux/srcu.h \
-		      $(LINUX_SOURCE)/kernel/rcu/srcu.c
-
-modified_srcu_output = include/linux/srcu.h srcu.c
-
-include/linux/srcu.h: srcu.c
-
-srcu.c: modify_srcu.awk Makefile $(modified_srcu_input)
-	awk -f modify_srcu.awk $(modified_srcu_input) $(modified_srcu_output)
-
-store_buffering:
-	@cd tests/store_buffering; make
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
deleted file mode 100644
index 57d296341304..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-srcu.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
deleted file mode 100644
index f2860dd1b407..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <LINUX_SOURCE/linux/kconfig.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
deleted file mode 100644
index 8bc960e5e713..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This header has been modifies to remove definitions of types that
- * are defined in standard userspace headers or are problematic for some
- * other reason.
- */
-
-#ifndef _LINUX_TYPES_H
-#define _LINUX_TYPES_H
-
-#define __EXPORTED_HEADERS__
-#include <uapi/linux/types.h>
-
-#ifndef __ASSEMBLY__
-
-#define DECLARE_BITMAP(name, bits) \
-	unsigned long name[BITS_TO_LONGS(bits)]
-
-typedef __u32 __kernel_dev_t;
-
-/* bsd */
-typedef unsigned char		u_char;
-typedef unsigned short		u_short;
-typedef unsigned int		u_int;
-typedef unsigned long		u_long;
-
-/* sysv */
-typedef unsigned char		unchar;
-typedef unsigned short		ushort;
-typedef unsigned int		uint;
-typedef unsigned long		ulong;
-
-#ifndef __BIT_TYPES_DEFINED__
-#define __BIT_TYPES_DEFINED__
-
-typedef		__u8		u_int8_t;
-typedef		__s8		int8_t;
-typedef		__u16		u_int16_t;
-typedef		__s16		int16_t;
-typedef		__u32		u_int32_t;
-typedef		__s32		int32_t;
-
-#endif /* !(__BIT_TYPES_DEFINED__) */
-
-typedef		__u8		uint8_t;
-typedef		__u16		uint16_t;
-typedef		__u32		uint32_t;
-
-/* this is a special 64bit data type that is 8-byte aligned */
-#define aligned_u64 __u64 __attribute__((aligned(8)))
-#define aligned_be64 __be64 __attribute__((aligned(8)))
-#define aligned_le64 __le64 __attribute__((aligned(8)))
-
-/**
- * The type used for indexing onto a disc or disc partition.
- *
- * Linux always considers sectors to be 512 bytes long independently
- * of the devices real block size.
- *
- * blkcnt_t is the type of the inode's block count.
- */
-typedef u64 sector_t;
-
-/*
- * The type of an index into the pagecache.
- */
-#define pgoff_t unsigned long
-
-/*
- * A dma_addr_t can hold any valid DMA address, i.e., any address returned
- * by the DMA API.
- *
- * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32
- * bits wide.  Bus addresses, e.g., PCI BARs, may be wider than 32 bits,
- * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses,
- * so they don't care about the size of the actual bus addresses.
- */
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-typedef u64 dma_addr_t;
-#else
-typedef u32 dma_addr_t;
-#endif
-
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-typedef u64 phys_addr_t;
-#else
-typedef u32 phys_addr_t;
-#endif
-
-typedef phys_addr_t resource_size_t;
-
-/*
- * This type is the placeholder for a hardware interrupt number. It has to be
- * big enough to enclose whatever representation is used by a given platform.
- */
-typedef unsigned long irq_hw_number_t;
-
-typedef struct {
-	int counter;
-} atomic_t;
-
-#ifdef CONFIG_64BIT
-typedef struct {
-	long counter;
-} atomic64_t;
-#endif
-
-struct list_head {
-	struct list_head *next, *prev;
-};
-
-struct hlist_head {
-	struct hlist_node *first;
-};
-
-struct hlist_node {
-	struct hlist_node *next, **pprev;
-};
-
-/**
- * struct callback_head - callback structure for use with RCU and task_work
- * @next: next update requests in a list
- * @func: actual update function to call after the grace period.
- *
- * The struct is aligned to size of pointer. On most architectures it happens
- * naturally due ABI requirements, but some architectures (like CRIS) have
- * weird ABI and we need to ask it explicitly.
- *
- * The alignment is required to guarantee that bits 0 and 1 of @next will be
- * clear under normal conditions -- as long as we use call_rcu() or
- * call_srcu() to queue callback.
- *
- * This guarantee is important for few reasons:
- *  - future call_rcu_lazy() will make use of lower bits in the pointer;
- *  - the structure shares storage spacer in struct page with @compound_head,
- *    which encode PageTail() in bit 0. The guarantee is needed to avoid
- *    false-positive PageTail().
- */
-struct callback_head {
-	struct callback_head *next;
-	void (*func)(struct callback_head *head);
-} __attribute__((aligned(sizeof(void *))));
-#define rcu_head callback_head
-
-typedef void (*rcu_callback_t)(struct rcu_head *head);
-typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
-
-/* clocksource cycle base type */
-typedef u64 cycle_t;
-
-#endif /*  __ASSEMBLY__ */
-#endif /* _LINUX_TYPES_H */
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
deleted file mode 100755
index e05182d3e47d..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
+++ /dev/null
@@ -1,376 +0,0 @@
-#!/usr/bin/awk -f
-# SPDX-License-Identifier: GPL-2.0
-
-# Modify SRCU for formal verification. The first argument should be srcu.h and
-# the second should be srcu.c. Outputs modified srcu.h and srcu.c into the
-# current directory.
-
-BEGIN {
-	if (ARGC != 5) {
-		print "Usange: input.h input.c output.h output.c" > "/dev/stderr";
-		exit 1;
-	}
-	h_output = ARGV[3];
-	c_output = ARGV[4];
-	ARGC = 3;
-
-	# Tokenize using FS and not RS as FS supports regular expressions. Each
-	# record is one line of source, except that backslashed lines are
-	# combined. Comments are treated as field separators, as are quotes.
-	quote_regexp="\"([^\\\\\"]|\\\\.)*\"";
-	comment_regexp="\\/\\*([^*]|\\*+[^*/])*\\*\\/|\\/\\/.*(\n|$)";
-	FS="([ \\\\\t\n\v\f;,.=(){}+*/<>&|^-]|\\[|\\]|" comment_regexp "|" quote_regexp ")+";
-
-	inside_srcu_struct = 0;
-	inside_srcu_init_def = 0;
-	srcu_init_param_name = "";
-	in_macro = 0;
-	brace_nesting = 0;
-	paren_nesting = 0;
-
-	# Allow the manipulation of the last field separator after has been
-	# seen.
-	last_fs = "";
-	# Whether the last field separator was intended to be output.
-	last_fs_print = 0;
-
-	# rcu_batches stores the initialization for each instance of struct
-	# rcu_batch
-
-	in_comment = 0;
-
-	outputfile = "";
-}
-
-{
-	prev_outputfile = outputfile;
-	if (FILENAME ~ /\.h$/) {
-		outputfile = h_output;
-		if (FNR != NR) {
-			print "Incorrect file order" > "/dev/stderr";
-			exit 1;
-		}
-	}
-	else
-		outputfile = c_output;
-
-	if (prev_outputfile && outputfile != prev_outputfile) {
-		new_outputfile = outputfile;
-		outputfile = prev_outputfile;
-		update_fieldsep("", 0);
-		outputfile = new_outputfile;
-	}
-}
-
-# Combine the next line into $0.
-function combine_line() {
-	ret = getline next_line;
-	if (ret == 0) {
-		# Don't allow two consecutive getlines at the end of the file
-		if (eof_found) {
-			print "Error: expected more input." > "/dev/stderr";
-			exit 1;
-		} else {
-			eof_found = 1;
-		}
-	} else if (ret == -1) {
-		print "Error reading next line of file" FILENAME > "/dev/stderr";
-		exit 1;
-	}
-	$0 = $0 "\n" next_line;
-}
-
-# Combine backslashed lines and multiline comments.
-function combine_backslashes() {
-	while (/\\$|\/\*([^*]|\*+[^*\/])*\**$/) {
-		combine_line();
-	}
-}
-
-function read_line() {
-	combine_line();
-	combine_backslashes();
-}
-
-# Print out field separators and update variables that depend on them. Only
-# print if p is true. Call with sep="" and p=0 to print out the last field
-# separator.
-function update_fieldsep(sep, p) {
-	# Count braces
-	sep_tmp = sep;
-	gsub(quote_regexp "|" comment_regexp, "", sep_tmp);
-	while (1)
-	{
-		if (sub("[^{}()]*\\{", "", sep_tmp)) {
-			brace_nesting++;
-			continue;
-		}
-		if (sub("[^{}()]*\\}", "", sep_tmp)) {
-			brace_nesting--;
-			if (brace_nesting < 0) {
-				print "Unbalanced braces!" > "/dev/stderr";
-				exit 1;
-			}
-			continue;
-		}
-		if (sub("[^{}()]*\\(", "", sep_tmp)) {
-			paren_nesting++;
-			continue;
-		}
-		if (sub("[^{}()]*\\)", "", sep_tmp)) {
-			paren_nesting--;
-			if (paren_nesting < 0) {
-				print "Unbalanced parenthesis!" > "/dev/stderr";
-				exit 1;
-			}
-			continue;
-		}
-
-		break;
-	}
-
-	if (last_fs_print)
-		printf("%s", last_fs) > outputfile;
-	last_fs = sep;
-	last_fs_print = p;
-}
-
-# Shifts the fields down by n positions. Calls next if there are no more. If p
-# is true then print out field separators.
-function shift_fields(n, p) {
-	do {
-		if (match($0, FS) > 0) {
-			update_fieldsep(substr($0, RSTART, RLENGTH), p);
-			if (RSTART + RLENGTH <= length())
-				$0 = substr($0, RSTART + RLENGTH);
-			else
-				$0 = "";
-		} else {
-			update_fieldsep("", 0);
-			print "" > outputfile;
-			next;
-		}
-	} while (--n > 0);
-}
-
-# Shifts and prints the first n fields.
-function print_fields(n) {
-	do {
-		update_fieldsep("", 0);
-		printf("%s", $1) > outputfile;
-		shift_fields(1, 1);
-	} while (--n > 0);
-}
-
-{
-	combine_backslashes();
-}
-
-# Print leading FS
-{
-	if (match($0, "^(" FS ")+") > 0) {
-		update_fieldsep(substr($0, RSTART, RLENGTH), 1);
-		if (RSTART + RLENGTH <= length())
-			$0 = substr($0, RSTART + RLENGTH);
-		else
-			$0 = "";
-	}
-}
-
-# Parse the line.
-{
-	while (NF > 0) {
-		if ($1 == "struct" && NF < 3) {
-			read_line();
-			continue;
-		}
-
-		if (FILENAME ~ /\.h$/ && !inside_srcu_struct &&
-		    brace_nesting == 0 && paren_nesting == 0 &&
-		    $1 == "struct" && $2 == "srcu_struct" &&
-		    $0 ~ "^struct(" FS ")+srcu_struct(" FS ")+\\{") {
-			inside_srcu_struct = 1;
-			print_fields(2);
-			continue;
-		}
-		if (inside_srcu_struct && brace_nesting == 0 &&
-		    paren_nesting == 0) {
-			inside_srcu_struct = 0;
-			update_fieldsep("", 0);
-			for (name in rcu_batches)
-				print "extern struct rcu_batch " name ";" > outputfile;
-		}
-
-		if (inside_srcu_struct && $1 == "struct" && $2 == "rcu_batch") {
-			# Move rcu_batches outside of the struct.
-			rcu_batches[$3] = "";
-			shift_fields(3, 1);
-			sub(/;[[:space:]]*$/, "", last_fs);
-			continue;
-		}
-
-		if (FILENAME ~ /\.h$/ && !inside_srcu_init_def &&
-		    $1 == "#define" && $2 == "__SRCU_STRUCT_INIT") {
-			inside_srcu_init_def = 1;
-			srcu_init_param_name = $3;
-			in_macro = 1;
-			print_fields(3);
-			continue;
-		}
-		if (inside_srcu_init_def && brace_nesting == 0 &&
-		    paren_nesting == 0) {
-			inside_srcu_init_def = 0;
-			in_macro = 0;
-			continue;
-		}
-
-		if (inside_srcu_init_def && brace_nesting == 1 &&
-		    paren_nesting == 0 && last_fs ~ /\.[[:space:]]*$/ &&
-		    $1 ~ /^[[:alnum:]_]+$/) {
-			name = $1;
-			if (name in rcu_batches) {
-				# Remove the dot.
-				sub(/\.[[:space:]]*$/, "", last_fs);
-
-				old_record = $0;
-				do
-					shift_fields(1, 0);
-				while (last_fs !~ /,/ || paren_nesting > 0);
-				end_loc = length(old_record) - length($0);
-				end_loc += index(last_fs, ",") - length(last_fs);
-
-				last_fs = substr(last_fs, index(last_fs, ",") + 1);
-				last_fs_print = 1;
-
-				match(old_record, "^"name"("FS")+=");
-				start_loc = RSTART + RLENGTH;
-
-				len = end_loc - start_loc;
-				initializer = substr(old_record, start_loc, len);
-				gsub(srcu_init_param_name "\\.", "", initializer);
-				rcu_batches[name] = initializer;
-				continue;
-			}
-		}
-
-		# Don't include a nonexistent file
-		if (!in_macro && $1 == "#include" && /^#include[[:space:]]+"rcu\.h"/) {
-			update_fieldsep("", 0);
-			next;
-		}
-
-		# Ignore most preprocessor stuff.
-		if (!in_macro && $1 ~ /#/) {
-			break;
-		}
-
-		if (brace_nesting > 0 && $1 ~ "^[[:alnum:]_]+$" && NF < 2) {
-			read_line();
-			continue;
-		}
-		if (brace_nesting > 0 &&
-		    $0 ~ "^[[:alnum:]_]+[[:space:]]*(\\.|->)[[:space:]]*[[:alnum:]_]+" &&
-		    $2 in rcu_batches) {
-			# Make uses of rcu_batches global. Somewhat unreliable.
-			shift_fields(1, 0);
-			print_fields(1);
-			continue;
-		}
-
-		if ($1 == "static" && NF < 3) {
-			read_line();
-			continue;
-		}
-		if ($1 == "static" && ($2 == "bool" && $3 == "try_check_zero" ||
-		                       $2 == "void" && $3 == "srcu_flip")) {
-			shift_fields(1, 1);
-			print_fields(2);
-			continue;
-		}
-
-		# Distinguish between read-side and write-side memory barriers.
-		if ($1 == "smp_mb" && NF < 2) {
-			read_line();
-			continue;
-		}
-		if (match($0, /^smp_mb[[:space:]();\/*]*[[:alnum:]]/)) {
-			barrier_letter = substr($0, RLENGTH, 1);
-			if (barrier_letter ~ /A|D/)
-				new_barrier_name = "sync_smp_mb";
-			else if (barrier_letter ~ /B|C/)
-				new_barrier_name = "rs_smp_mb";
-			else {
-				print "Unrecognized memory barrier." > "/dev/null";
-				exit 1;
-			}
-
-			shift_fields(1, 1);
-			printf("%s", new_barrier_name) > outputfile;
-			continue;
-		}
-
-		# Skip definition of rcu_synchronize, since it is already
-		# defined in misc.h. Only present in old versions of srcu.
-		if (brace_nesting == 0 && paren_nesting == 0 &&
-		    $1 == "struct" && $2 == "rcu_synchronize" &&
-		    $0 ~ "^struct(" FS ")+rcu_synchronize(" FS ")+\\{") {
-			shift_fields(2, 0);
-			while (brace_nesting) {
-				if (NF < 2)
-					read_line();
-				shift_fields(1, 0);
-			}
-		}
-
-		# Skip definition of wakeme_after_rcu for the same reason
-		if (brace_nesting == 0 && $1 == "static" && $2 == "void" &&
-		    $3 == "wakeme_after_rcu") {
-			while (NF < 5)
-				read_line();
-			shift_fields(3, 0);
-			do {
-				while (NF < 3)
-					read_line();
-				shift_fields(1, 0);
-			} while (paren_nesting || brace_nesting);
-		}
-
-		if ($1 ~ /^(unsigned|long)$/ && NF < 3) {
-			read_line();
-			continue;
-		}
-
-		# Give srcu_batches_completed the correct type for old SRCU.
-		if (brace_nesting == 0 && $1 == "long" &&
-		    $2 == "srcu_batches_completed") {
-			update_fieldsep("", 0);
-			printf("unsigned ") > outputfile;
-			print_fields(2);
-			continue;
-		}
-		if (brace_nesting == 0 && $1 == "unsigned" && $2 == "long" &&
-		    $3 == "srcu_batches_completed") {
-			print_fields(3);
-			continue;
-		}
-
-		# Just print out the input code by default.
-		print_fields(1);
-	}
-	update_fieldsep("", 0);
-	print > outputfile;
-	next;
-}
-
-END {
-	update_fieldsep("", 0);
-
-	if (brace_nesting != 0) {
-		print "Unbalanced braces!" > "/dev/stderr";
-		exit 1;
-	}
-
-	# Define the rcu_batches
-	for (name in rcu_batches)
-		print "struct rcu_batch " name " = " rcu_batches[name] ";" > c_output;
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
deleted file mode 100644
index 570a49d9da7e..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASSUME_H
-#define ASSUME_H
-
-/* Provide an assumption macro that can be disabled for gcc. */
-#ifdef RUN
-#define assume(x) \
-	do { \
-		/* Evaluate x to suppress warnings. */ \
-		(void) (x); \
-	} while (0)
-
-#else
-#define assume(x) __CPROVER_assume(x)
-#endif
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
deleted file mode 100644
index 3f95a768a03b..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef BARRIERS_H
-#define BARRIERS_H
-
-#define barrier() __asm__ __volatile__("" : : : "memory")
-
-#ifdef RUN
-#define smp_mb() __sync_synchronize()
-#define smp_mb__after_unlock_lock() __sync_synchronize()
-#else
-/*
- * Copied from CBMC's implementation of __sync_synchronize(), which
- * seems to be disabled by default.
- */
-#define smp_mb() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
-				 "WWcumul", "RRcumul", "RWcumul", "WRcumul")
-#define smp_mb__after_unlock_lock() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
-				    "WWcumul", "RRcumul", "RWcumul", "WRcumul")
-#endif
-
-/*
- * Allow memory barriers to be disabled in either the read or write side
- * of SRCU individually.
- */
-
-#ifndef NO_SYNC_SMP_MB
-#define sync_smp_mb() smp_mb()
-#else
-#define sync_smp_mb() do {} while (0)
-#endif
-
-#ifndef NO_READ_SIDE_SMP_MB
-#define rs_smp_mb() smp_mb()
-#else
-#define rs_smp_mb() do {} while (0)
-#endif
-
-#define READ_ONCE(x) (*(volatile typeof(x) *) &(x))
-#define WRITE_ONCE(x) ((*(volatile typeof(x) *) &(x)) = (val))
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
deleted file mode 100644
index 5e7912c6a521..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef BUG_ON_H
-#define BUG_ON_H
-
-#include <assert.h>
-
-#define BUG() assert(0)
-#define BUG_ON(x) assert(!(x))
-
-/* Does it make sense to treat warnings as errors? */
-#define WARN() BUG()
-#define WARN_ON(x) (BUG_ON(x), false)
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
deleted file mode 100644
index e67ee5b3dd7c..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-/* Include all source files. */
-
-#include "include_srcu.c"
-
-#include "preempt.c"
-#include "misc.c"
-
-/* Used by test.c files */
-#include <pthread.h>
-#include <stdlib.h>
-#include <linux/srcu.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
deleted file mode 100644
index 283d7103334f..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* "Cheater" definitions based on restricted Kconfig choices. */
-
-#undef CONFIG_TINY_RCU
-#undef __CHECKER__
-#undef CONFIG_DEBUG_LOCK_ALLOC
-#undef CONFIG_DEBUG_OBJECTS_RCU_HEAD
-#undef CONFIG_HOTPLUG_CPU
-#undef CONFIG_MODULES
-#undef CONFIG_NO_HZ_FULL_SYSIDLE
-#undef CONFIG_PREEMPT_COUNT
-#undef CONFIG_PREEMPT_RCU
-#undef CONFIG_PROVE_RCU
-#undef CONFIG_RCU_NOCB_CPU
-#undef CONFIG_RCU_NOCB_CPU_ALL
-#undef CONFIG_RCU_STALL_COMMON
-#undef CONFIG_RCU_TRACE
-#undef CONFIG_RCU_USER_QS
-#undef CONFIG_TASKS_RCU
-#define CONFIG_TREE_RCU
-
-#define CONFIG_GENERIC_ATOMIC64
-
-#if NR_CPUS > 1
-#define CONFIG_SMP
-#else
-#undef CONFIG_SMP
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
deleted file mode 100644
index e5202d4cff30..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-#include <assert.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <pthread.h>
-#include <stddef.h>
-#include <string.h>
-#include <sys/types.h>
-
-#include "int_typedefs.h"
-
-#include "barriers.h"
-#include "bug_on.h"
-#include "locks.h"
-#include "misc.h"
-#include "preempt.h"
-#include "percpu.h"
-#include "workqueues.h"
-
-#ifdef USE_SIMPLE_SYNC_SRCU
-#define synchronize_srcu(sp) synchronize_srcu_original(sp)
-#endif
-
-#include <srcu.c>
-
-#ifdef USE_SIMPLE_SYNC_SRCU
-#undef synchronize_srcu
-
-#include "simple_sync_srcu.c"
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
deleted file mode 100644
index 0dd27aa517a7..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef INT_TYPEDEFS_H
-#define INT_TYPEDEFS_H
-
-#include <inttypes.h>
-
-typedef int8_t s8;
-typedef uint8_t u8;
-typedef int16_t s16;
-typedef uint16_t u16;
-typedef int32_t s32;
-typedef uint32_t u32;
-typedef int64_t s64;
-typedef uint64_t u64;
-
-typedef int8_t __s8;
-typedef uint8_t __u8;
-typedef int16_t __s16;
-typedef uint16_t __u16;
-typedef int32_t __s32;
-typedef uint32_t __u32;
-typedef int64_t __s64;
-typedef uint64_t __u64;
-
-#define S8_C(x) INT8_C(x)
-#define U8_C(x) UINT8_C(x)
-#define S16_C(x) INT16_C(x)
-#define U16_C(x) UINT16_C(x)
-#define S32_C(x) INT32_C(x)
-#define U32_C(x) UINT32_C(x)
-#define S64_C(x) INT64_C(x)
-#define U64_C(x) UINT64_C(x)
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
deleted file mode 100644
index 1e24827f96f1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
+++ /dev/null
@@ -1,221 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef LOCKS_H
-#define LOCKS_H
-
-#include <limits.h>
-#include <pthread.h>
-#include <stdbool.h>
-
-#include "assume.h"
-#include "bug_on.h"
-#include "preempt.h"
-
-int nondet_int(void);
-
-#define __acquire(x)
-#define __acquires(x)
-#define __release(x)
-#define __releases(x)
-
-/* Only use one lock mechanism. Select which one. */
-#ifdef PTHREAD_LOCK
-struct lock_impl {
-	pthread_mutex_t mutex;
-};
-
-static inline void lock_impl_lock(struct lock_impl *lock)
-{
-	BUG_ON(pthread_mutex_lock(&lock->mutex));
-}
-
-static inline void lock_impl_unlock(struct lock_impl *lock)
-{
-	BUG_ON(pthread_mutex_unlock(&lock->mutex));
-}
-
-static inline bool lock_impl_trylock(struct lock_impl *lock)
-{
-	int err = pthread_mutex_trylock(&lock->mutex);
-
-	if (!err)
-		return true;
-	else if (err == EBUSY)
-		return false;
-	BUG();
-}
-
-static inline void lock_impl_init(struct lock_impl *lock)
-{
-	pthread_mutex_init(&lock->mutex, NULL);
-}
-
-#define LOCK_IMPL_INITIALIZER {.mutex = PTHREAD_MUTEX_INITIALIZER}
-
-#else /* !defined(PTHREAD_LOCK) */
-/* Spinlock that assumes that it always gets the lock immediately. */
-
-struct lock_impl {
-	bool locked;
-};
-
-static inline bool lock_impl_trylock(struct lock_impl *lock)
-{
-#ifdef RUN
-	/* TODO: Should this be a test and set? */
-	return __sync_bool_compare_and_swap(&lock->locked, false, true);
-#else
-	__CPROVER_atomic_begin();
-	bool old_locked = lock->locked;
-	lock->locked = true;
-	__CPROVER_atomic_end();
-
-	/* Minimal barrier to prevent accesses leaking out of lock. */
-	__CPROVER_fence("RRfence", "RWfence");
-
-	return !old_locked;
-#endif
-}
-
-static inline void lock_impl_lock(struct lock_impl *lock)
-{
-	/*
-	 * CBMC doesn't support busy waiting, so just assume that the
-	 * lock is available.
-	 */
-	assume(lock_impl_trylock(lock));
-
-	/*
-	 * If the lock was already held by this thread then the assumption
-	 * is unsatisfiable (deadlock).
-	 */
-}
-
-static inline void lock_impl_unlock(struct lock_impl *lock)
-{
-#ifdef RUN
-	BUG_ON(!__sync_bool_compare_and_swap(&lock->locked, true, false));
-#else
-	/* Minimal barrier to prevent accesses leaking out of lock. */
-	__CPROVER_fence("RWfence", "WWfence");
-
-	__CPROVER_atomic_begin();
-	bool old_locked = lock->locked;
-	lock->locked = false;
-	__CPROVER_atomic_end();
-
-	BUG_ON(!old_locked);
-#endif
-}
-
-static inline void lock_impl_init(struct lock_impl *lock)
-{
-	lock->locked = false;
-}
-
-#define LOCK_IMPL_INITIALIZER {.locked = false}
-
-#endif /* !defined(PTHREAD_LOCK) */
-
-/*
- * Implement spinlocks using the lock mechanism. Wrap the lock to prevent mixing
- * locks of different types.
- */
-typedef struct {
-	struct lock_impl internal_lock;
-} spinlock_t;
-
-#define SPIN_LOCK_UNLOCKED {.internal_lock = LOCK_IMPL_INITIALIZER}
-#define __SPIN_LOCK_UNLOCKED(x) SPIN_LOCK_UNLOCKED
-#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
-
-static inline void spin_lock_init(spinlock_t *lock)
-{
-	lock_impl_init(&lock->internal_lock);
-}
-
-static inline void spin_lock(spinlock_t *lock)
-{
-	/*
-	 * Spin locks also need to be removed in order to eliminate all
-	 * memory barriers. They are only used by the write side anyway.
-	 */
-#ifndef NO_SYNC_SMP_MB
-	preempt_disable();
-	lock_impl_lock(&lock->internal_lock);
-#endif
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-#ifndef NO_SYNC_SMP_MB
-	lock_impl_unlock(&lock->internal_lock);
-	preempt_enable();
-#endif
-}
-
-/* Don't bother with interrupts */
-#define spin_lock_irq(lock) spin_lock(lock)
-#define spin_unlock_irq(lock) spin_unlock(lock)
-#define spin_lock_irqsave(lock, flags) spin_lock(lock)
-#define spin_unlock_irqrestore(lock, flags) spin_unlock(lock)
-
-/*
- * This is supposed to return an int, but I think that a bool should work as
- * well.
- */
-static inline bool spin_trylock(spinlock_t *lock)
-{
-#ifndef NO_SYNC_SMP_MB
-	preempt_disable();
-	return lock_impl_trylock(&lock->internal_lock);
-#else
-	return true;
-#endif
-}
-
-struct completion {
-	/* Hopefully this won't overflow. */
-	unsigned int count;
-};
-
-#define COMPLETION_INITIALIZER(x) {.count = 0}
-#define DECLARE_COMPLETION(x) struct completion x = COMPLETION_INITIALIZER(x)
-#define DECLARE_COMPLETION_ONSTACK(x) DECLARE_COMPLETION(x)
-
-static inline void init_completion(struct completion *c)
-{
-	c->count = 0;
-}
-
-static inline void wait_for_completion(struct completion *c)
-{
-	unsigned int prev_count = __sync_fetch_and_sub(&c->count, 1);
-
-	assume(prev_count);
-}
-
-static inline void complete(struct completion *c)
-{
-	unsigned int prev_count = __sync_fetch_and_add(&c->count, 1);
-
-	BUG_ON(prev_count == UINT_MAX);
-}
-
-/* This function probably isn't very useful for CBMC. */
-static inline bool try_wait_for_completion(struct completion *c)
-{
-	BUG();
-}
-
-static inline bool completion_done(struct completion *c)
-{
-	return c->count;
-}
-
-/* TODO: Implement complete_all */
-static inline void complete_all(struct completion *c)
-{
-	BUG();
-}
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
deleted file mode 100644
index 9440cc39e3c6..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
+++ /dev/null
@@ -1,12 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-#include "misc.h"
-#include "bug_on.h"
-
-struct rcu_head;
-
-void wakeme_after_rcu(struct rcu_head *head)
-{
-	BUG();
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
deleted file mode 100644
index aca50030f954..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef MISC_H
-#define MISC_H
-
-#include "assume.h"
-#include "int_typedefs.h"
-#include "locks.h"
-
-#include <linux/types.h>
-
-/* Probably won't need to deal with bottom halves. */
-static inline void local_bh_disable(void) {}
-static inline void local_bh_enable(void) {}
-
-#define MODULE_ALIAS(X)
-#define module_param(...)
-#define EXPORT_SYMBOL_GPL(x)
-
-#define container_of(ptr, type, member) ({			\
-	const typeof(((type *)0)->member) *__mptr = (ptr);	\
-	(type *)((char *)__mptr - offsetof(type, member));	\
-})
-
-#ifndef USE_SIMPLE_SYNC_SRCU
-/* Abuse udelay to make sure that busy loops terminate. */
-#define udelay(x) assume(0)
-
-#else
-
-/* The simple custom synchronize_srcu is ok with try_check_zero failing. */
-#define udelay(x) do { } while (0)
-#endif
-
-#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
-	do { } while (0)
-
-#define notrace
-
-/* Avoid including rcupdate.h */
-struct rcu_synchronize {
-	struct rcu_head head;
-	struct completion completion;
-};
-
-void wakeme_after_rcu(struct rcu_head *head);
-
-#define rcu_lock_acquire(a) do { } while (0)
-#define rcu_lock_release(a) do { } while (0)
-#define rcu_lockdep_assert(c, s) do { } while (0)
-#define RCU_LOCKDEP_WARN(c, s) do { } while (0)
-
-/* Let CBMC non-deterministically choose switch between normal and expedited. */
-bool rcu_gp_is_normal(void);
-bool rcu_gp_is_expedited(void);
-
-/* Do the same for old versions of rcu. */
-#define rcu_expedited (rcu_gp_is_expedited())
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
deleted file mode 100644
index 27e67a3f291f..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef PERCPU_H
-#define PERCPU_H
-
-#include <stddef.h>
-#include "bug_on.h"
-#include "preempt.h"
-
-#define __percpu
-
-/* Maximum size of any percpu data. */
-#define PERCPU_OFFSET (4 * sizeof(long))
-
-/* Ignore alignment, as CBMC doesn't care about false sharing. */
-#define alloc_percpu(type) __alloc_percpu(sizeof(type), 1)
-
-static inline void *__alloc_percpu(size_t size, size_t align)
-{
-	BUG();
-	return NULL;
-}
-
-static inline void free_percpu(void *ptr)
-{
-	BUG();
-}
-
-#define per_cpu_ptr(ptr, cpu) \
-	((typeof(ptr)) ((char *) (ptr) + PERCPU_OFFSET * cpu))
-
-#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1)
-#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1)
-#define __this_cpu_sub(pcp, n) __this_cpu_add(pcp, -(typeof(pcp)) (n))
-
-#define this_cpu_inc(pcp) this_cpu_add(pcp, 1)
-#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1)
-#define this_cpu_sub(pcp, n) this_cpu_add(pcp, -(typeof(pcp)) (n))
-
-/* Make CBMC use atomics to work around bug. */
-#ifdef RUN
-#define THIS_CPU_ADD_HELPER(ptr, x) (*(ptr) += (x))
-#else
-/*
- * Split the atomic into a read and a write so that it has the least
- * possible ordering.
- */
-#define THIS_CPU_ADD_HELPER(ptr, x) \
-	do { \
-		typeof(ptr) this_cpu_add_helper_ptr = (ptr); \
-		typeof(ptr) this_cpu_add_helper_x = (x); \
-		typeof(*ptr) this_cpu_add_helper_temp; \
-		__CPROVER_atomic_begin(); \
-		this_cpu_add_helper_temp = *(this_cpu_add_helper_ptr); \
-		__CPROVER_atomic_end(); \
-		this_cpu_add_helper_temp += this_cpu_add_helper_x; \
-		__CPROVER_atomic_begin(); \
-		*(this_cpu_add_helper_ptr) = this_cpu_add_helper_temp; \
-		__CPROVER_atomic_end(); \
-	} while (0)
-#endif
-
-/*
- * For some reason CBMC needs an atomic operation even though this is percpu
- * data.
- */
-#define __this_cpu_add(pcp, n) \
-	do { \
-		BUG_ON(preemptible()); \
-		THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), thread_cpu_id), \
-				    (typeof(pcp)) (n)); \
-	} while (0)
-
-#define this_cpu_add(pcp, n) \
-	do { \
-		int this_cpu_add_impl_cpu = get_cpu(); \
-		THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), this_cpu_add_impl_cpu), \
-				    (typeof(pcp)) (n)); \
-		put_cpu(); \
-	} while (0)
-
-/*
- * This will cause a compiler warning because of the cast from char[][] to
- * type*. This will cause a compile time error if type is too big.
- */
-#define DEFINE_PER_CPU(type, name) \
-	char name[NR_CPUS][PERCPU_OFFSET]; \
-	typedef char percpu_too_big_##name \
-		[sizeof(type) > PERCPU_OFFSET ? -1 : 1]
-
-#define for_each_possible_cpu(cpu) \
-	for ((cpu) = 0; (cpu) < NR_CPUS; ++(cpu))
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
deleted file mode 100644
index b4083ae348fb..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
+++ /dev/null
@@ -1,79 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-#include "preempt.h"
-
-#include "assume.h"
-#include "locks.h"
-
-/* Support NR_CPUS of at most 64 */
-#define CPU_PREEMPTION_LOCKS_INIT0 LOCK_IMPL_INITIALIZER
-#define CPU_PREEMPTION_LOCKS_INIT1 \
-	CPU_PREEMPTION_LOCKS_INIT0, CPU_PREEMPTION_LOCKS_INIT0
-#define CPU_PREEMPTION_LOCKS_INIT2 \
-	CPU_PREEMPTION_LOCKS_INIT1, CPU_PREEMPTION_LOCKS_INIT1
-#define CPU_PREEMPTION_LOCKS_INIT3 \
-	CPU_PREEMPTION_LOCKS_INIT2, CPU_PREEMPTION_LOCKS_INIT2
-#define CPU_PREEMPTION_LOCKS_INIT4 \
-	CPU_PREEMPTION_LOCKS_INIT3, CPU_PREEMPTION_LOCKS_INIT3
-#define CPU_PREEMPTION_LOCKS_INIT5 \
-	CPU_PREEMPTION_LOCKS_INIT4, CPU_PREEMPTION_LOCKS_INIT4
-
-/*
- * Simulate disabling preemption by locking a particular cpu. NR_CPUS
- * should be the actual number of cpus, not just the maximum.
- */
-struct lock_impl cpu_preemption_locks[NR_CPUS] = {
-	CPU_PREEMPTION_LOCKS_INIT0
-#if (NR_CPUS - 1) & 1
-	, CPU_PREEMPTION_LOCKS_INIT0
-#endif
-#if (NR_CPUS - 1) & 2
-	, CPU_PREEMPTION_LOCKS_INIT1
-#endif
-#if (NR_CPUS - 1) & 4
-	, CPU_PREEMPTION_LOCKS_INIT2
-#endif
-#if (NR_CPUS - 1) & 8
-	, CPU_PREEMPTION_LOCKS_INIT3
-#endif
-#if (NR_CPUS - 1) & 16
-	, CPU_PREEMPTION_LOCKS_INIT4
-#endif
-#if (NR_CPUS - 1) & 32
-	, CPU_PREEMPTION_LOCKS_INIT5
-#endif
-};
-
-#undef CPU_PREEMPTION_LOCKS_INIT0
-#undef CPU_PREEMPTION_LOCKS_INIT1
-#undef CPU_PREEMPTION_LOCKS_INIT2
-#undef CPU_PREEMPTION_LOCKS_INIT3
-#undef CPU_PREEMPTION_LOCKS_INIT4
-#undef CPU_PREEMPTION_LOCKS_INIT5
-
-__thread int thread_cpu_id;
-__thread int preempt_disable_count;
-
-void preempt_disable(void)
-{
-	BUG_ON(preempt_disable_count < 0 || preempt_disable_count == INT_MAX);
-
-	if (preempt_disable_count++)
-		return;
-
-	thread_cpu_id = nondet_int();
-	assume(thread_cpu_id >= 0);
-	assume(thread_cpu_id < NR_CPUS);
-	lock_impl_lock(&cpu_preemption_locks[thread_cpu_id]);
-}
-
-void preempt_enable(void)
-{
-	BUG_ON(preempt_disable_count < 1);
-
-	if (--preempt_disable_count)
-		return;
-
-	lock_impl_unlock(&cpu_preemption_locks[thread_cpu_id]);
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
deleted file mode 100644
index f8b762cd214c..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef PREEMPT_H
-#define PREEMPT_H
-
-#include <stdbool.h>
-
-#include "bug_on.h"
-
-/* This flag contains garbage if preempt_disable_count is 0. */
-extern __thread int thread_cpu_id;
-
-/* Support recursive preemption disabling. */
-extern __thread int preempt_disable_count;
-
-void preempt_disable(void);
-void preempt_enable(void);
-
-static inline void preempt_disable_notrace(void)
-{
-	preempt_disable();
-}
-
-static inline void preempt_enable_no_resched(void)
-{
-	preempt_enable();
-}
-
-static inline void preempt_enable_notrace(void)
-{
-	preempt_enable();
-}
-
-static inline int preempt_count(void)
-{
-	return preempt_disable_count;
-}
-
-static inline bool preemptible(void)
-{
-	return !preempt_count();
-}
-
-static inline int get_cpu(void)
-{
-	preempt_disable();
-	return thread_cpu_id;
-}
-
-static inline void put_cpu(void)
-{
-	preempt_enable();
-}
-
-static inline void might_sleep(void)
-{
-	BUG_ON(preempt_disable_count);
-}
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
deleted file mode 100644
index 97f592048e0b..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
+++ /dev/null
@@ -1,51 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-#include <assert.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <pthread.h>
-#include <stddef.h>
-#include <string.h>
-#include <sys/types.h>
-
-#include "int_typedefs.h"
-
-#include "barriers.h"
-#include "bug_on.h"
-#include "locks.h"
-#include "misc.h"
-#include "preempt.h"
-#include "percpu.h"
-#include "workqueues.h"
-
-#include <linux/srcu.h>
-
-/* Functions needed from modify_srcu.c */
-bool try_check_zero(struct srcu_struct *sp, int idx, int trycount);
-void srcu_flip(struct srcu_struct *sp);
-
-/* Simpler implementation of synchronize_srcu that ignores batching. */
-void synchronize_srcu(struct srcu_struct *sp)
-{
-	int idx;
-	/*
-	 * This code assumes that try_check_zero will succeed anyway,
-	 * so there is no point in multiple tries.
-	 */
-	const int trycount = 1;
-
-	might_sleep();
-
-	/* Ignore the lock, as multiple writers aren't working yet anyway. */
-
-	idx = 1 ^ (sp->completed & 1);
-
-	/* For comments see srcu_advance_batches. */
-
-	assume(try_check_zero(sp, idx, trycount));
-
-	srcu_flip(sp);
-
-	assume(try_check_zero(sp, idx^1, trycount));
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
deleted file mode 100644
index 28b960300971..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef WORKQUEUES_H
-#define WORKQUEUES_H
-
-#include <stdbool.h>
-
-#include "barriers.h"
-#include "bug_on.h"
-#include "int_typedefs.h"
-
-#include <linux/types.h>
-
-/* Stub workqueue implementation. */
-
-struct work_struct;
-typedef void (*work_func_t)(struct work_struct *work);
-void delayed_work_timer_fn(unsigned long __data);
-
-struct work_struct {
-/*	atomic_long_t data; */
-	unsigned long data;
-
-	struct list_head entry;
-	work_func_t func;
-#ifdef CONFIG_LOCKDEP
-	struct lockdep_map lockdep_map;
-#endif
-};
-
-struct timer_list {
-	struct hlist_node	entry;
-	unsigned long		expires;
-	void			(*function)(unsigned long);
-	unsigned long		data;
-	u32			flags;
-	int			slack;
-};
-
-struct delayed_work {
-	struct work_struct work;
-	struct timer_list timer;
-
-	/* target workqueue and CPU ->timer uses to queue ->work */
-	struct workqueue_struct *wq;
-	int cpu;
-};
-
-
-static inline bool schedule_work(struct work_struct *work)
-{
-	BUG();
-	return true;
-}
-
-static inline bool schedule_work_on(int cpu, struct work_struct *work)
-{
-	BUG();
-	return true;
-}
-
-static inline bool queue_work(struct workqueue_struct *wq,
-			      struct work_struct *work)
-{
-	BUG();
-	return true;
-}
-
-static inline bool queue_delayed_work(struct workqueue_struct *wq,
-				      struct delayed_work *dwork,
-				      unsigned long delay)
-{
-	BUG();
-	return true;
-}
-
-#define INIT_WORK(w, f) \
-	do { \
-		(w)->data = 0; \
-		(w)->func = (f); \
-	} while (0)
-
-#define INIT_DELAYED_WORK(w, f) INIT_WORK(&(w)->work, (f))
-
-#define __WORK_INITIALIZER(n, f) { \
-		.data = 0, \
-		.entry = { &(n).entry, &(n).entry }, \
-		.func = f \
-	}
-
-/* Don't bother initializing timer. */
-#define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \
-	.work = __WORK_INITIALIZER((n).work, (f)), \
-	}
-
-#define DECLARE_WORK(n, f) \
-	struct workqueue_struct n = __WORK_INITIALIZER
-
-#define DECLARE_DELAYED_WORK(n, f) \
-	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0)
-
-#define system_power_efficient_wq ((struct workqueue_struct *) NULL)
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
deleted file mode 100644
index d65462d64816..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-*.out
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
deleted file mode 100644
index ad21b925fbb4..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-CBMC_FLAGS = -I../.. -I../../src -I../../include -I../../empty_includes -32 -pointer-check -mm pso
-
-all:
-	for i in ./*.pass; do \
-		echo $$i ; \
-		CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-pass $$i > $$i.out 2>&1 ; \
-	done
-	for i in ./*.fail; do \
-		echo $$i ; \
-		CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-fail $$i > $$i.out 2>&1 ; \
-	done
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
deleted file mode 100644
index 40c8075919d1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
+++ /dev/null
@@ -1 +0,0 @@
-test_cbmc_options="-DASSERT_END"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
deleted file mode 100644
index ada5baf0b60d..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
+++ /dev/null
@@ -1 +0,0 @@
-test_cbmc_options="-DFORCE_FAILURE"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
deleted file mode 100644
index 8fe00c8db466..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
+++ /dev/null
@@ -1 +0,0 @@
-test_cbmc_options="-DFORCE_FAILURE_2"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
deleted file mode 100644
index 612ed6772844..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
+++ /dev/null
@@ -1 +0,0 @@
-test_cbmc_options="-DFORCE_FAILURE_3"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
deleted file mode 100644
index 2ce2016f7871..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <src/combined_source.c>
-
-int x;
-int y;
-
-int __unbuffered_tpr_x;
-int __unbuffered_tpr_y;
-
-DEFINE_SRCU(ss);
-
-void rcu_reader(void)
-{
-	int idx;
-
-#ifndef FORCE_FAILURE_3
-	idx = srcu_read_lock(&ss);
-#endif
-	might_sleep();
-
-	__unbuffered_tpr_y = READ_ONCE(y);
-#ifdef FORCE_FAILURE
-	srcu_read_unlock(&ss, idx);
-	idx = srcu_read_lock(&ss);
-#endif
-	WRITE_ONCE(x, 1);
-
-#ifndef FORCE_FAILURE_3
-	srcu_read_unlock(&ss, idx);
-#endif
-	might_sleep();
-}
-
-void *thread_update(void *arg)
-{
-	WRITE_ONCE(y, 1);
-#ifndef FORCE_FAILURE_2
-	synchronize_srcu(&ss);
-#endif
-	might_sleep();
-	__unbuffered_tpr_x = READ_ONCE(x);
-
-	return NULL;
-}
-
-void *thread_process_reader(void *arg)
-{
-	rcu_reader();
-
-	return NULL;
-}
-
-int main(int argc, char *argv[])
-{
-	pthread_t tu;
-	pthread_t tpr;
-
-	if (pthread_create(&tu, NULL, thread_update, NULL))
-		abort();
-	if (pthread_create(&tpr, NULL, thread_process_reader, NULL))
-		abort();
-	if (pthread_join(tu, NULL))
-		abort();
-	if (pthread_join(tpr, NULL))
-		abort();
-	assert(__unbuffered_tpr_y != 0 || __unbuffered_tpr_x != 0);
-
-#ifdef ASSERT_END
-	assert(0);
-#endif
-
-	return 0;
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
deleted file mode 100755
index 2fe1f0339b4f..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-# This script expects a mode (either --should-pass or --should-fail) followed by
-# an input file. The script uses the following environment variables. The test C
-# source file is expected to be named test.c in the directory containing the
-# input file.
-#
-# CBMC: The command to run CBMC. Default: cbmc
-# CBMC_FLAGS: Additional flags to pass to CBMC
-# NR_CPUS: Number of cpus to run tests with. Default specified by the test
-# SYNC_SRCU_MODE: Choose implementation of synchronize_srcu. Defaults to simple.
-#                 kernel: Version included in the linux kernel source.
-#                 simple: Use try_check_zero directly.
-#
-# The input file is a script that is sourced by this file. It can define any of
-# the following variables to configure the test.
-#
-# test_cbmc_options: Extra options to pass to CBMC.
-# min_cpus_fail: Minimum number of CPUs (NR_CPUS) for verification to fail.
-#                The test is expected to pass if it is run with fewer. (Only
-#                useful for .fail files)
-# default_cpus: Quantity of CPUs to use for the test, if not specified on the
-#               command line. Default: Larger of 2 and MIN_CPUS_FAIL.
-
-set -e
-
-if test "$#" -ne 2; then
-	echo "Expected one option followed by an input file" 1>&2
-	exit 99
-fi
-
-if test "x$1" = "x--should-pass"; then
-	should_pass="yes"
-elif test "x$1" = "x--should-fail"; then
-	should_pass="no"
-else
-	echo "Unrecognized argument '$1'" 1>&2
-
-	# Exit code 99 indicates a hard error.
-	exit 99
-fi
-
-CBMC=${CBMC:-cbmc}
-
-SYNC_SRCU_MODE=${SYNC_SRCU_MODE:-simple}
-
-case ${SYNC_SRCU_MODE} in
-kernel) sync_srcu_mode_flags="" ;;
-simple) sync_srcu_mode_flags="-DUSE_SIMPLE_SYNC_SRCU" ;;
-
-*)
-	echo "Unrecognized argument '${SYNC_SRCU_MODE}'" 1>&2
-	exit 99
-	;;
-esac
-
-min_cpus_fail=1
-
-c_file=`dirname "$2"`/test.c
-
-# Source the input file.
-. $2
-
-if test ${min_cpus_fail} -gt 2; then
-	default_default_cpus=${min_cpus_fail}
-else
-	default_default_cpus=2
-fi
-default_cpus=${default_cpus:-${default_default_cpus}}
-cpus=${NR_CPUS:-${default_cpus}}
-
-# Check if there are two few cpus to make the test fail.
-if test $cpus -lt ${min_cpus_fail:-0}; then
-	should_pass="yes"
-fi
-
-cbmc_opts="-DNR_CPUS=${cpus} ${sync_srcu_mode_flags} ${test_cbmc_options} ${CBMC_FLAGS}"
-
-echo "Running CBMC: ${CBMC} ${cbmc_opts} ${c_file}"
-if ${CBMC} ${cbmc_opts} "${c_file}"; then
-	# Verification successful. Make sure that it was supposed to verify.
-	test "x${should_pass}" = xyes
-else
-	cbmc_exit_status=$?
-
-	# An exit status of 10 indicates a failed verification.
-	# (see cbmc_parse_optionst::do_bmc in the CBMC source code)
-	if test ${cbmc_exit_status} -eq 10 && test "x${should_pass}" = xno; then
-		:
-	else
-		echo "CBMC returned ${cbmc_exit_status} exit status" 1>&2
-
-		# Parse errors have exit status 6. Any other type of error
-		# should be considered a hard error.
-		if test ${cbmc_exit_status} -ne 6 && \
-		   test ${cbmc_exit_status} -ne 10; then
-			exit 99
-		else
-			exit 1
-		fi
-	fi
-fi

From 0feda4da6da5ce5d2094df406a8c5090e8403bf3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 7 Jun 2023 14:10:40 -0700
Subject: [PATCH 0220/2047] torture: Add "--no-" as synonym for "--do-no-" in
 torture.sh

In order to (for example) omit the real-time testing that torture.sh would
otherwise carry out, you put "--do-no-rt" on the torture.sh command line.
This works, but it is all too easy to instead type "--no-rt".  This is
unambiguous and easier to type, so this commit therefore allows all
"--no-" arguments as synonyms for their "--do-no-" counterparts.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/torture.sh       | 44 +++++++++----------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 5a2ae2264403..c3190ca9c96f 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -73,18 +73,18 @@ usage () {
 	echo "       --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\""
 	echo "       --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
 	echo "       --do-all"
-	echo "       --do-allmodconfig / --do-no-allmodconfig"
-	echo "       --do-clocksourcewd / --do-no-clocksourcewd"
-	echo "       --do-kasan / --do-no-kasan"
-	echo "       --do-kcsan / --do-no-kcsan"
-	echo "       --do-kvfree / --do-no-kvfree"
-	echo "       --do-locktorture / --do-no-locktorture"
+	echo "       --do-allmodconfig / --do-no-allmodconfig / --no-allmodconfig"
+	echo "       --do-clocksourcewd / --do-no-clocksourcewd / --no-clocksourcewd"
+	echo "       --do-kasan / --do-no-kasan / --no-kasan"
+	echo "       --do-kcsan / --do-no-kcsan / --no-kcsan"
+	echo "       --do-kvfree / --do-no-kvfree / --no-kvfree"
+	echo "       --do-locktorture / --do-no-locktorture / --no-locktorture"
 	echo "       --do-none"
-	echo "       --do-rcuscale / --do-no-rcuscale"
-	echo "       --do-rcutorture / --do-no-rcutorture"
-	echo "       --do-refscale / --do-no-refscale"
-	echo "       --do-rt / --do-no-rt"
-	echo "       --do-scftorture / --do-no-scftorture"
+	echo "       --do-rcuscale / --do-no-rcuscale / --no-rcuscale"
+	echo "       --do-rcutorture / --do-no-rcutorture / --no-rcutorture"
+	echo "       --do-refscale / --do-no-refscale / --no-refscale"
+	echo "       --do-rt / --do-no-rt / --no-rt"
+	echo "       --do-scftorture / --do-no-scftorture / --no-scftorture"
 	echo "       --duration [ <minutes> | <hours>h | <days>d ]"
 	echo "       --kcsan-kmake-arg kernel-make-arguments"
 	exit 1
@@ -126,22 +126,22 @@ do
 		do_kcsan=yes
 		do_clocksourcewd=yes
 		;;
-	--do-allmodconfig|--do-no-allmodconfig)
+	--do-allmodconfig|--do-no-allmodconfig|--no-allmodconfig)
 		do_allmodconfig=`doyesno "$1" --do-allmodconfig`
 		;;
-	--do-clocksourcewd|--do-no-clocksourcewd)
+	--do-clocksourcewd|--do-no-clocksourcewd|--no-clocksourcewd)
 		do_clocksourcewd=`doyesno "$1" --do-clocksourcewd`
 		;;
-	--do-kasan|--do-no-kasan)
+	--do-kasan|--do-no-kasan|--no-kasan)
 		do_kasan=`doyesno "$1" --do-kasan`
 		;;
-	--do-kcsan|--do-no-kcsan)
+	--do-kcsan|--do-no-kcsan|--no-kcsan)
 		do_kcsan=`doyesno "$1" --do-kcsan`
 		;;
-	--do-kvfree|--do-no-kvfree)
+	--do-kvfree|--do-no-kvfree|--no-kvfree)
 		do_kvfree=`doyesno "$1" --do-kvfree`
 		;;
-	--do-locktorture|--do-no-locktorture)
+	--do-locktorture|--do-no-locktorture|--no-locktorture)
 		do_locktorture=`doyesno "$1" --do-locktorture`
 		;;
 	--do-none|--donone)
@@ -157,19 +157,19 @@ do
 		do_kcsan=no
 		do_clocksourcewd=no
 		;;
-	--do-rcuscale|--do-no-rcuscale)
+	--do-rcuscale|--do-no-rcuscale|--no-rcuscale)
 		do_rcuscale=`doyesno "$1" --do-rcuscale`
 		;;
-	--do-rcutorture|--do-no-rcutorture)
+	--do-rcutorture|--do-no-rcutorture|--no-rcutorture)
 		do_rcutorture=`doyesno "$1" --do-rcutorture`
 		;;
-	--do-refscale|--do-no-refscale)
+	--do-refscale|--do-no-refscale|--no-refscale)
 		do_refscale=`doyesno "$1" --do-refscale`
 		;;
-	--do-rt|--do-no-rt)
+	--do-rt|--do-no-rt|--no-rt)
 		do_rt=`doyesno "$1" --do-rt`
 		;;
-	--do-scftorture|--do-no-scftorture)
+	--do-scftorture|--do-no-scftorture|--no-scftorture)
 		do_scftorture=`doyesno "$1" --do-scftorture`
 		;;
 	--duration)

From 4f64435417b450e8c0e76fba11a5cd23d81610c6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 15 Jun 2023 06:49:17 -0700
Subject: [PATCH 0221/2047] rcutorture: Remove contradictory Kconfig constraint

The TASKS03 scenario specifies both CONFIG_RCU_EXPERT=y and
removes #CHECK#CONFIG_RCU_EXPERT=n in order to be consistent with
CONFIG_RCU_EXPERT=y prerequisites.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/TASKS03 | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
index dea26c568678..2ef2fb69c360 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -6,6 +6,5 @@ CONFIG_PREEMPT=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
-#CHECK#CONFIG_RCU_EXPERT=n
 CONFIG_TASKS_RCU=y
 CONFIG_RCU_EXPERT=y

From 5cec64e49519320ef3dab451f52ca0855de5da37 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 13 Jun 2023 10:34:37 -0700
Subject: [PATCH 0222/2047] torture: Allow #CHECK# in --kconfig argument to
 kvm.sh

Testing building of a given RCU Tasks flavor with the other two
flavors disabled requires checking that the other two flavors are in
fact disabled.  This commit therefore modifies the scripting to permit
things like "#CHECK#CONFIG_TASKS_TRACE_RCU=n" to be passed into the
kvm.sh script's --kconfig parameter.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/configcheck.sh   | 61 ++++++++++---------
 .../rcutorture/bin/kvm-test-1-run.sh          |  1 -
 tools/testing/selftests/rcutorture/bin/kvm.sh |  2 +-
 3 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
index b92dfeb7fbbf..99162d18bad3 100755
--- a/tools/testing/selftests/rcutorture/bin/configcheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -3,6 +3,8 @@
 #
 # Usage: configcheck.sh .config .config-template
 #
+# Non-empty output if errors detected.
+#
 # Copyright (C) IBM Corporation, 2011
 #
 # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
@@ -10,32 +12,35 @@
 T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`"
 trap 'rm -rf $T' 0
 
-sed -e 's/"//g' < $1 > $T/.config
+# function test_kconfig_enabled ( Kconfig-var=val )
+function test_kconfig_enabled () {
+	if ! grep -q "^$1$" $T/.config
+	then
+		echo :$1: improperly set
+		return 1
+	fi
+	return 0
+}
 
-sed -e 's/"//g' -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' < $2 |
-awk	'
-{
-		print "if grep -q \"" $0 "\" < '"$T/.config"'";
-		print "then";
-		print "\t:";
-		print "else";
-		if ($1 == "#") {
-			print "\tif grep -q \"" $2 "\" < '"$T/.config"'";
-			print "\tthen";
-			print "\t\tif test \"$firsttime\" = \"\""
-			print "\t\tthen"
-			print "\t\t\tfirsttime=1"
-			print "\t\tfi"
-			print "\t\techo \":" $2 ": improperly set\"";
-			print "\telse";
-			print "\t\t:";
-			print "\tfi";
-		} else {
-			print "\tif test \"$firsttime\" = \"\""
-			print "\tthen"
-			print "\t\tfirsttime=1"
-			print "\tfi"
-			print "\techo \":" $0 ": improperly set\"";
-		}
-		print "fi";
-	}' | sh
+# function test_kconfig_disabled ( Kconfig-var )
+function test_kconfig_disabled () {
+	if grep -q "^$1=n$" $T/.config
+	then
+		return 0
+	fi
+	if grep -q "^$1=" $T/.config
+	then
+		echo :$1=n: improperly set
+		return 1
+	fi
+	return 0
+}
+
+sed -e 's/"//g' < $1 > $T/.config
+sed -e 's/^#CHECK#//' < $2 > $T/ConfigFragment
+grep '^CONFIG_.*=n$' $T/ConfigFragment |
+	sed -e 's/^/test_kconfig_disabled /' -e 's/=n$//' > $T/kconfig-n.sh
+. $T/kconfig-n.sh
+grep -v '^CONFIG_.*=n$' $T/ConfigFragment | grep '^CONFIG_' |
+	sed -e 's/^/test_kconfig_enabled /' > $T/kconfig-not-n.sh
+. $T/kconfig-not-n.sh
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index d2a3710a5f2a..b9912de9d71e 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -57,7 +57,6 @@ config_override_param () {
 		cat $T/Kconfig_args >> $resdir/ConfigFragment.input
 		config_override.sh $T/$2 $T/Kconfig_args > $T/$2.tmp
 		mv $T/$2.tmp $T/$2
-		# Note that "#CHECK#" is not permitted on commandline.
 	fi
 }
 
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index d3cdc2d33d4b..b0f36a638a69 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -186,7 +186,7 @@ do
 		fi
 		;;
 	--kconfig|--kconfigs)
-		checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$'
+		checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$'
 		TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
 		shift
 		;;

From cd1955d050f81450a48aa07b60933bec990c6166 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 13 Jun 2023 11:27:04 -0700
Subject: [PATCH 0223/2047] torture: Make kvm-recheck.sh report .config errors

Currently, kvm-recheck.sh will print out any .config errors with messages
of the form:

:CONFIG_TASKS_TRACE_RCU=y: improperly set

However, if these are the only errors, the resulting exit code will
declare the run successful.  This commit therefore causes kvm-recheck.sh
to record .config errors in the results directory in a file named
ConfigFragment.diags and also returns a non-zero error code in that case.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/kvm-recheck.sh   | 37 +++++++++++++++----
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 1df7e695edf7..97bdacc3fc55 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -16,6 +16,8 @@
 T=/tmp/kvm-recheck.sh.$$
 trap 'rm -f $T' 0 2
 
+configerrors=0
+
 PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
 . functions.sh
 for rd in "$@"
@@ -32,7 +34,7 @@ do
 		fi
 		TORTURE_SUITE="`cat $i/../torture_suite`" ; export TORTURE_SUITE
 		configfile=`echo $i | sed -e 's,^.*/,,'`
-		rm -f $i/console.log.*.diags
+		rm -f $i/console.log.*.diags $i/ConfigFragment.diags
 		case "${TORTURE_SUITE}" in
 		X*)
 			;;
@@ -49,8 +51,14 @@ do
 			then
 				echo QEMU killed
 			fi
-			configcheck.sh $i/.config $i/ConfigFragment > $T 2>&1
-			cat $T
+			configcheck.sh $i/.config $i/ConfigFragment > $i/ConfigFragment.diags 2>&1
+			if test -s $i/ConfigFragment.diags
+			then
+				cat $i/ConfigFragment.diags
+				configerrors=$((configerrors+1))
+			else
+				rm $i/ConfigFragment.diags
+			fi
 			if test -r $i/Make.oldconfig.err
 			then
 				cat $i/Make.oldconfig.err
@@ -65,7 +73,14 @@ do
 			if test -f "$i/buildonly"
 			then
 				echo Build-only run, no boot/test
-				configcheck.sh $i/.config $i/ConfigFragment
+				configcheck.sh $i/.config $i/ConfigFragment > $i/ConfigFragment.diags 2>&1
+				if test -s $i/ConfigFragment.diags
+				then
+					cat $i/ConfigFragment.diags
+					configerrors=$((configerrors+1))
+				else
+					rm $i/ConfigFragment.diags
+				fi
 				parse-build.sh $i/Make.out $configfile
 			elif test -f "$i/qemu-cmd"
 			then
@@ -79,10 +94,10 @@ do
 	done
 	if test -f "$rd/kcsan.sum"
 	then
-		if ! test -f $T
+		if ! test -f $i/ConfigFragment.diags
 		then
 			:
-		elif grep -q CONFIG_KCSAN=y $T
+		elif grep -q CONFIG_KCSAN=y $i/ConfigFragment.diags
 		then
 			echo "Compiler or architecture does not support KCSAN!"
 			echo Did you forget to switch your compiler with '--kmake-arg CC=<cc-that-supports-kcsan>'?
@@ -94,17 +109,23 @@ do
 		fi
 	fi
 done
+
+if test "$configerrors" -gt 0
+then
+	echo $configerrors runs with .config errors.
+	ret=1
+fi
 EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1
 builderrors="`tr ' ' '\012' < $T | grep -c '/Make.out.diags'`"
 if test "$builderrors" -gt 0
 then
 	echo $builderrors runs with build errors.
-	ret=1
+	ret=2
 fi
 runerrors="`tr ' ' '\012' < $T | grep -c '/console.log.diags'`"
 if test "$runerrors" -gt 0
 then
 	echo $runerrors runs with runtime errors.
-	ret=2
+	ret=3
 fi
 exit $ret

From dd3ffd125f89cdb38093df56eecdfa5539d983b4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 13 Jun 2023 18:03:48 -0700
Subject: [PATCH 0224/2047] torture: Add RCU Tasks individual-flavor build
 tests

This commit adds build tests of the individual RCU Tasks flavors in
order to detect inadvertent dependencies among the flavors.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/torture.sh       | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index c3190ca9c96f..dd3bfadb0639 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -55,6 +55,7 @@ do_kasan=yes
 do_kcsan=no
 do_clocksourcewd=yes
 do_rt=yes
+do_rcutasksflavors=yes
 
 # doyesno - Helper function for yes/no arguments
 function doyesno () {
@@ -81,6 +82,7 @@ usage () {
 	echo "       --do-locktorture / --do-no-locktorture / --no-locktorture"
 	echo "       --do-none"
 	echo "       --do-rcuscale / --do-no-rcuscale / --no-rcuscale"
+	echo "       --do-rcutasksflavors / --do-no-rcutasksflavors / --no-rcutasksflavors"
 	echo "       --do-rcutorture / --do-no-rcutorture / --no-rcutorture"
 	echo "       --do-refscale / --do-no-refscale / --no-refscale"
 	echo "       --do-rt / --do-no-rt / --no-rt"
@@ -115,6 +117,7 @@ do
 		;;
 	--do-all|--doall)
 		do_allmodconfig=yes
+		do_rcutasksflavor=yes
 		do_rcutorture=yes
 		do_locktorture=yes
 		do_scftorture=yes
@@ -146,6 +149,7 @@ do
 		;;
 	--do-none|--donone)
 		do_allmodconfig=no
+		do_rcutasksflavors=no
 		do_rcutorture=no
 		do_locktorture=no
 		do_scftorture=no
@@ -160,6 +164,9 @@ do
 	--do-rcuscale|--do-no-rcuscale|--no-rcuscale)
 		do_rcuscale=`doyesno "$1" --do-rcuscale`
 		;;
+	--do-rcutasksflavors|--do-no-rcutasksflavors|--no-rcutasksflavors)
+		do_rcutasksflavors=`doyesno "$1" --do-rcutasksflavors`
+		;;
 	--do-rcutorture|--do-no-rcutorture|--no-rcutorture)
 		do_rcutorture=`doyesno "$1" --do-rcutorture`
 		;;
@@ -361,6 +368,40 @@ then
 	fi
 fi
 
+# Test building RCU Tasks flavors in isolation, both SMP and !SMP
+if test "$do_rcutasksflavors" = "yes"
+then
+	echo " --- rcutasksflavors:" Start `date` | tee -a $T/log
+	rtfdir="tools/testing/selftests/rcutorture/res/$ds/results-rcutasksflavors"
+	mkdir -p "$rtfdir"
+	cat > $T/rcutasksflavors << __EOF__
+#CHECK#CONFIG_TASKS_RCU=n
+#CHECK#CONFIG_TASKS_RUDE_RCU=n
+#CHECK#CONFIG_TASKS_TRACE_RCU=n
+__EOF__
+	for flavor in CONFIG_TASKS_RCU CONFIG_TASKS_RUDE_RCU CONFIG_TASKS_TRACE_RCU
+	do
+		forceflavor="`echo $flavor | sed -e 's/^CONFIG/CONFIG_FORCE/'`"
+		deselectedflavors="`grep -v $flavor $T/rcutasksflavors | tr '\012' ' ' | tr -s ' ' | sed -e 's/ *$//'`"
+		echo " --- Running RCU Tasks Trace flavor $flavor `date`" >> $rtfdir/log
+		tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1
+		retcode=$?
+		if test "$retcode" -ne 0
+		then
+			break
+		fi
+	done
+	if test "$retcode" -eq 0
+	then
+		echo "rcutasksflavors($retcode)" $rtfdir >> $T/successes
+		echo Success >> $rtfdir/log
+	else
+		echo "rcutasksflavors($retcode)" $rtfdir >> $T/failures
+		echo " --- rcutasksflavors Test summary:" >> $rtfdir/log
+		echo " --- Summary: Exit code $retcode from $flavor, see Make.out" >> $rtfdir/log
+	fi
+fi
+
 # --torture rcu
 if test "$do_rcutorture" = "yes"
 then

From 80021ffb6820baf77d17b7a3b7ec9e89faa69578 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 15 Jun 2023 20:11:21 -0700
Subject: [PATCH 0225/2047] torture: Make kvm-remote print diagnostics on
 initial ssh failure

Currently, if the initial ssh fails, kvm-remote.sh gives up, printing a
message saying so.  But it would be nice to get a better idea as to why
ssh failed.  This commit therefore dumps out ssh's exit code, stdout,
and stderr upon ssh failure for diagnostic purposes.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-remote.sh | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index a2328163eba1..134cdef5a6e0 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -137,14 +137,20 @@ chmod +x $T/bin/kvm-remote-*.sh
 # Check first to avoid the need for cleanup for system-name typos
 for i in $systems
 do
-	ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
+	ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN > $T/ssh.stdout 2> $T/ssh.stderr
 	ret=$?
 	if test "$ret" -ne 0
 	then
-		echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
+		echo "System $i unreachable ($ret), giving up." | tee -a "$oldrun/remote-log"
+		echo ' --- ssh stdout: vvv' | tee -a "$oldrun/remote-log"
+		cat $T/ssh.stdout | tee -a "$oldrun/remote-log"
+		echo ' --- ssh stdout: ^^^' | tee -a "$oldrun/remote-log"
+		echo ' --- ssh stderr: vvv' | tee -a "$oldrun/remote-log"
+		cat $T/ssh.stderr | tee -a "$oldrun/remote-log"
+		echo ' --- ssh stderr: ^^^' | tee -a "$oldrun/remote-log"
 		exit 4
 	fi
-	echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
+	echo $i: `cat $T/ssh.stdout` CPUs " " `date` | tee -a "$oldrun/remote-log"
 done
 
 # Download and expand the tarball on all systems.

From 2f25542c6f626011ecf52cf8eb4a9b666cb8da86 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 5 Jul 2023 15:49:29 -0700
Subject: [PATCH 0226/2047] rcutorture: Remove obsolete parameter check from
 mkinitrd.sh

The mkinitrd.sh script no longer takes an argument, so this commit
therefore removes the code that checks for the parameter being present.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/mkinitrd.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
index 71f0dfbb2a6d..3af3a86a0091 100755
--- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
+++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
@@ -10,7 +10,6 @@
 D=tools/testing/selftests/rcutorture
 
 # Prerequisite checks
-[ -z "$D" ] && echo >&2 "No argument supplied" && exit 1
 if [ ! -d "$D" ]; then
     echo >&2 "$D does not exist: Malformed kernel source tree?"
     exit 1

From 171cfa011ea4a3fd0fe5a1818fd6871daebabe3f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 6 Jul 2023 14:19:26 -0700
Subject: [PATCH 0227/2047] torture: Place --bootargs parameters at end of
 -append list

Currently, the kernel boot parameters specified by the kvm.sh --bootargs
parameter are placed near the beginning of the -append list that is
passed to qemu.  This means that in the not-uncommon case of a kernel
boot parameter where the last argument wins, the --bootargs list overrides
neither the list in the .boot file nor the additional parameters supplied
by the rcutorture scripting.

This commit therefore places the kernel boot parameters specified by
the kvm.sh --bootargs parameter at the end of qemu's -append list.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/functions.sh          | 2 +-
 tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh     | 2 +-
 .../selftests/rcutorture/configs/lock/ver_functions.sh       | 5 +++--
 .../selftests/rcutorture/configs/rcu/ver_functions.sh        | 5 +++--
 .../selftests/rcutorture/configs/rcuscale/ver_functions.sh   | 5 +++--
 .../selftests/rcutorture/configs/refscale/ver_functions.sh   | 5 +++--
 .../selftests/rcutorture/configs/scf/ver_functions.sh        | 5 +++--
 7 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index 48b9147e8c91..b8e2ea23cb3f 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -45,7 +45,7 @@ checkarg () {
 configfrag_boot_params () {
 	if test -r "$2.boot"
 	then
-		echo $1 `grep -v '^#' "$2.boot" | tr '\012' ' '`
+		echo `grep -v '^#' "$2.boot" | tr '\012' ' '` $1
 	else
 		echo $1
 	fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index b9912de9d71e..1e7566997e51 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -162,7 +162,7 @@ boot_args="`configfrag_boot_params "$boot_args_in" "$config_template"`"
 boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
 if test -n "$TORTURE_BOOT_GDB_ARG"
 then
-	boot_args="$boot_args $TORTURE_BOOT_GDB_ARG"
+	boot_args="$TORTURE_BOOT_GDB_ARG $boot_args"
 fi
 
 # Give bare-metal advice
diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
index d3e4b2971f92..e7bb32709d78 100644
--- a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
@@ -22,8 +22,9 @@ locktorture_param_onoff () {
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
-	echo $1 `locktorture_param_onoff "$1" "$2"` \
+	echo	`locktorture_param_onoff "$1" "$2"` \
 		locktorture.stat_interval=15 \
 		locktorture.shutdown_secs=$3 \
-		locktorture.verbose=1
+		locktorture.verbose=1 \
+		$1
 }
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
index e2bc99c785e7..c044df386876 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -46,10 +46,11 @@ rcutorture_param_stat_interval () {
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
-	echo $1 `rcutorture_param_onoff "$1" "$2"` \
+	echo	`rcutorture_param_onoff "$1" "$2"` \
 		`rcutorture_param_n_barrier_cbs "$1"` \
 		`rcutorture_param_stat_interval "$1"` \
 		rcutorture.shutdown_secs=$3 \
 		rcutorture.test_no_idle_hz=1 \
-		rcutorture.verbose=1
+		rcutorture.verbose=1 \
+		$1
 }
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
index ffbe15109f0d..28070b43f017 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
@@ -11,6 +11,7 @@
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
-	echo $1 rcuscale.shutdown=1 \
-		rcuscale.verbose=0
+	echo	rcuscale.shutdown=1 \
+		rcuscale.verbose=0 \
+		$1
 }
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
index f81fa2c541a6..748465627601 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
@@ -11,6 +11,7 @@
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
-	echo $1 refscale.shutdown=1 \
-		refscale.verbose=0
+	echo	refscale.shutdown=1 \
+		refscale.verbose=0 \
+		$1
 }
diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
index 2d949e58f5a5..7637f68ef0ce 100644
--- a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
@@ -22,8 +22,9 @@ scftorture_param_onoff () {
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
-	echo $1 `scftorture_param_onoff "$1" "$2"` \
+	echo	`scftorture_param_onoff "$1" "$2"` \
 		scftorture.stat_interval=15 \
 		scftorture.shutdown_secs=$3 \
-		scftorture.verbose=1
+		scftorture.verbose=1 \
+		$1
 }

From eb3156f78b490f7d9482c9f44004d8e7638e3e64 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 6 Jul 2023 17:21:31 -0700
Subject: [PATCH 0228/2047] torture: Make torture.sh summarize config and build
 errors

If some of the torture.sh runs had config and/or build errors, but all
runs for which kernels were built ran successfully to completion, then
torture.sh will incorrectly claim that all errors were KCSAN errors.
This commit therefore makes torture.sh print the number of runs with
config and build errors, and to refrain from claiming that all bugs were
KCSAN bugs in that case.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index dd3bfadb0639..d36b839bf586 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -582,11 +582,23 @@ then
 fi
 echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
 echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log
+tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
+find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors
+find "$tdir" -name 'Make.out.diags' -print > $T/builderrors
+if test -s "$T/configerrors"
+then
+	echo "  Scenarios with .config errors: `wc -l "$T/configerrors" | awk '{ print $1 }'`"
+	nonkcsanbug="yes"
+fi
+if test -s "$T/builderrors"
+then
+	echo "  Scenarios with build errors: `wc -l "$T/builderrors" | awk '{ print $1 }'`"
+	nonkcsanbug="yes"
+fi
 if test -z "$nonkcsanbug" && test -s "$T/failuresum"
 then
 	echo "  All bugs were KCSAN failures."
 fi
-tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
 if test -n "$tdir" && test $compress_concurrency -gt 0
 then
 	# KASAN vmlinux files can approach 1GB in size, so compress them.

From adec488031f60476fc2e2aa95ab569f6f0b07c72 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 10 Jul 2023 11:45:34 -0700
Subject: [PATCH 0229/2047] torture: Loosen .config checks for KCSAN kernels

KCSAN enables some Kconfig options unilaterally and unconditionally,
including CONFIG_PROVE_LOCKING.  This in turn enables CONFIG_PROVE_RCU
and CONFIG_PREEMPT_COUNT, which conflicts with constraints in SRCU-T,
TRACE01, and TREE10, which in turn causes rcutorture to emit spurious
configuration complaints.  This commit therefore forgives configuration
complaints involving CONFIG_PROVE_RCU and CONFIG_PREEMPT_COUNT.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-recheck.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 97bdacc3fc55..5be670dd4009 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -52,6 +52,13 @@ do
 				echo QEMU killed
 			fi
 			configcheck.sh $i/.config $i/ConfigFragment > $i/ConfigFragment.diags 2>&1
+			if grep -q '^CONFIG_KCSAN=y$' $i/ConfigFragment.input
+			then
+				# KCSAN forces a number of Kconfig options, so remove
+				# complaints about those Kconfig options in KCSAN runs.
+				mv $i/ConfigFragment.diags $i/ConfigFragment.diags.kcsan
+				grep -v -E 'CONFIG_PROVE_RCU|CONFIG_PREEMPT_COUNT' $i/ConfigFragment.diags.kcsan > $i/ConfigFragment.diags
+			fi
 			if test -s $i/ConfigFragment.diags
 			then
 				cat $i/ConfigFragment.diags

From e40806e9bcf8aaa86dbf0d484e7cf3cfa09cb86c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 7 Jun 2023 11:59:49 -0700
Subject: [PATCH 0230/2047] clocksource: Handle negative skews in "skew is too
 large" messages

The nanosecond-to-millisecond skew computation uses unsigned arithmetic,
which produces user-unfriendly large positive numbers for negative skews.
Therefore, use signed arithmetic for this computation in order to preserve
the negativity.

Reported-by: Chris Bainbridge <chris.bainbridge@gmail.com>
Reported-by: Feng Tang <feng.tang@intel.com>
Fixes: dd029269947a ("clocksource: Improve "skew is too large" messages")
Reviewed-by: Feng Tang <feng.tang@intel.com>
Tested-by: Chris Bainbridge <chris.bainbridge@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/time/clocksource.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 88cbc1181b23..c108ed8a9804 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -473,8 +473,8 @@ static void clocksource_watchdog(struct timer_list *unused)
 		/* Check the deviation from the watchdog clocksource. */
 		md = cs->uncertainty_margin + watchdog->uncertainty_margin;
 		if (abs(cs_nsec - wd_nsec) > md) {
-			u64 cs_wd_msec;
-			u64 wd_msec;
+			s64 cs_wd_msec;
+			s64 wd_msec;
 			u32 wd_rem;
 
 			pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
@@ -483,8 +483,8 @@ static void clocksource_watchdog(struct timer_list *unused)
 				watchdog->name, wd_nsec, wdnow, wdlast, watchdog->mask);
 			pr_warn("                      '%s' cs_nsec: %lld cs_now: %llx cs_last: %llx mask: %llx\n",
 				cs->name, cs_nsec, csnow, cslast, cs->mask);
-			cs_wd_msec = div_u64_rem(cs_nsec - wd_nsec, 1000U * 1000U, &wd_rem);
-			wd_msec = div_u64_rem(wd_nsec, 1000U * 1000U, &wd_rem);
+			cs_wd_msec = div_s64_rem(cs_nsec - wd_nsec, 1000 * 1000, &wd_rem);
+			wd_msec = div_s64_rem(wd_nsec, 1000 * 1000, &wd_rem);
 			pr_warn("                      Clocksource '%s' skewed %lld ns (%lld ms) over watchdog '%s' interval of %lld ns (%lld ms)\n",
 				cs->name, cs_nsec - wd_nsec, cs_wd_msec, watchdog->name, wd_nsec, wd_msec);
 			if (curr_clocksource == cs)

From 233756a640be811efae33763db718fe29753b1e9 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Wed, 7 Jun 2023 15:54:33 +0800
Subject: [PATCH 0231/2047] x86/tsc: Extend watchdog check exemption to
 4-Sockets platform

There were reports again that the tsc clocksource on 4 sockets x86
servers was wrongly judged as 'unstable' by 'jiffies' and other
watchdogs, and disabled [1][2].

Commit b50db7095fe0 ("x86/tsc: Disable clocksource watchdog for TSC
on qualified platorms") was introduce to deal with these false
alarms of tsc unstable issues, covering qualified platforms for 2
sockets or smaller ones. And from history of chasing TSC issues,
Thomas and Peter only saw real TSC synchronization issue on 8 socket
machines.

So extend the exemption to 4 sockets to fix the issue.

Rui also proposed another way to disable 'jiffies' as clocksource
watchdog [3], which can also solve problem in [1]. in an architecture
independent way, but can't cure the problem in [2]. whose watchdog
is HPET or PMTIMER, while 'jiffies' is mostly used as watchdog in
boot phase.

'nr_online_nodes' has known inaccurate problem for cases like
platform with cpu-less memory nodes, sub numa cluster enabled,
fakenuma, kernel cmdline parameter 'maxcpus=', etc. The harmful case
is the 'maxcpus' one which could possibly under estimates the package
number, and disable the watchdog, but bright side is it is mostly
for debug usage. All these will be addressed in other patches, as
discussed in thread [4].

[1]. https://lore.kernel.org/all/9d3bf570-3108-0336-9c52-9bee15767d29@huawei.com/
[2]. https://lore.kernel.org/lkml/06df410c-2177-4671-832f-339cff05b1d9@paulmck-laptop/
[3]. https://lore.kernel.org/all/bd5b97f89ab2887543fc262348d1c7cafcaae536.camel@intel.com/
[4]. https://lore.kernel.org/all/20221021062131.1826810-1-feng.tang@intel.com/

Reported-by: Yu Liao <liaoyu15@huawei.com>
Reported-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 arch/x86/kernel/tsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3425c6a943e4..15f97c0abc9d 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1258,7 +1258,7 @@ static void __init check_system_tsc_reliable(void)
 	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
 	    boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
 	    boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
-	    nr_online_nodes <= 2)
+	    nr_online_nodes <= 4)
 		tsc_disable_clocksource_watchdog();
 }
 

From a55265eeedafee12d9743196ce5bb43266509c31 Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@ew.tq-group.com>
Date: Wed, 5 Jul 2023 11:01:43 +0200
Subject: [PATCH 0232/2047] spi: spi-fsl-lpspi: Remove num_cs from device
 struct

This is only used during probe() call, so there is no need to store it
longer than that.

Signed-off-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Link: https://lore.kernel.org/r/20230705090145.1354663-1-alexander.stein@ew.tq-group.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-lpspi.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index dfdcb69f66ab..0a0677f11a3d 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -97,7 +97,6 @@ struct fsl_lpspi_data {
 	struct clk *clk_ipg;
 	struct clk *clk_per;
 	bool is_slave;
-	u32 num_cs;
 	bool is_only_cs1;
 	bool is_first_byte;
 
@@ -825,6 +824,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 	struct spi_controller *controller;
 	struct resource *res;
 	int ret, irq;
+	u32 num_cs;
 	u32 temp;
 	bool is_slave;
 
@@ -847,8 +847,8 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 	fsl_lpspi->is_only_cs1 = of_property_read_bool((&pdev->dev)->of_node,
 						"fsl,spi-only-use-cs1-sel");
 	if (of_property_read_u32((&pdev->dev)->of_node, "num-cs",
-				 &fsl_lpspi->num_cs))
-		fsl_lpspi->num_cs = 1;
+				 &num_cs))
+		num_cs = 1;
 
 	controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32);
 	controller->transfer_one = fsl_lpspi_transfer_one;
@@ -858,7 +858,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 	controller->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX;
 	controller->dev.of_node = pdev->dev.of_node;
 	controller->bus_num = pdev->id;
-	controller->num_chipselect = fsl_lpspi->num_cs;
+	controller->num_chipselect = num_cs;
 	controller->slave_abort = fsl_lpspi_slave_abort;
 	if (!fsl_lpspi->is_slave)
 		controller->use_gpio_descriptors = true;

From af71cccadecedad3484c2208e2c4fc8eff927d4a Mon Sep 17 00:00:00 2001
From: Okan Sahin <okan.sahin@analog.com>
Date: Mon, 17 Jul 2023 08:07:35 +0300
Subject: [PATCH 0233/2047] regulator: max77857: Add ADI MAX77857/59/MAX77831
 Regulator Support

Regulator driver for  MAX77857/59 and MAX77831.
The MAX77857 is a high-efficiency, high-performance
buck-boost converter targeted for systems requiring
a wide input voltage range (2.5V to 16V).

The MAX77859 is high-Efficiency Buck-Boost Converter
for USB-PD/PPS Applications. It has wide input range
(2.5V to 22V)

The MAX77831 is a high-efficiency, high-performance
buck-boost converter targeted for systems requiring
wide input voltage range (2.5V to 16V).

Signed-off-by: Okan Sahin <okan.sahin@analog.com>
Link: https://lore.kernel.org/r/20230717050736.10075-3-okan.sahin@analog.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig              |  10 +
 drivers/regulator/Makefile             |   1 +
 drivers/regulator/max77857-regulator.c | 459 +++++++++++++++++++++++++
 3 files changed, 470 insertions(+)
 create mode 100644 drivers/regulator/max77857-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 18a05b09406f..542a7b427994 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -584,6 +584,16 @@ config REGULATOR_MAX77650
 	  Semiconductor. This device has a SIMO with three independent
 	  power rails and an LDO.
 
+config REGULATOR_MAX77857
+	tristate "ADI MAX77857/MAX77831 regulator support"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  This driver controls a ADI MAX77857 and MAX77831 regulators.
+	  via I2C bus. MAX77857 and MAX77831 are high efficiency buck-boost
+	  converters with input voltage range (2.5V to 16V). Say Y here to
+	  enable the regulator driver
+
 config REGULATOR_MAX8649
 	tristate "Maxim 8649 voltage regulator"
 	depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 4f4589877e81..77b22bdd2791 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -86,6 +86,7 @@ obj-$(CONFIG_REGULATOR_MAX77686) += max77686-regulator.o
 obj-$(CONFIG_REGULATOR_MAX77693) += max77693-regulator.o
 obj-$(CONFIG_REGULATOR_MAX77802) += max77802-regulator.o
 obj-$(CONFIG_REGULATOR_MAX77826) += max77826-regulator.o
+obj-$(CONFIG_REGULATOR_MAX77857) += max77857-regulator.o
 obj-$(CONFIG_REGULATOR_MC13783) += mc13783-regulator.o
 obj-$(CONFIG_REGULATOR_MC13892) += mc13892-regulator.o
 obj-$(CONFIG_REGULATOR_MC13XXX_CORE) +=  mc13xxx-regulator-core.o
diff --git a/drivers/regulator/max77857-regulator.c b/drivers/regulator/max77857-regulator.c
new file mode 100644
index 000000000000..c5482ffd606e
--- /dev/null
+++ b/drivers/regulator/max77857-regulator.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Analog Devices, Inc.
+ * ADI Regulator driver for the MAX77857
+ * MAX77859 and MAX77831.
+ */
+#include <linux/bitfield.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/util_macros.h>
+
+#define MAX77857_REG_INT_SRC		0x10
+#define MAX77857_REG_INT_MASK		0x11
+#define MAX77857_REG_CONT1		0x12
+#define MAX77857_REG_CONT2		0x13
+#define MAX77857_REG_CONT3		0x14
+
+#define MAX77857_INT_SRC_OCP		BIT(0)
+#define MAX77857_INT_SRC_THS		BIT(1)
+#define MAX77857_INT_SRC_HARDSHORT	BIT(2)
+#define MAX77857_INT_SRC_OVP		BIT(3)
+#define MAX77857_INT_SRC_POK		BIT(4)
+
+#define MAX77857_ILIM_MASK		GENMASK(2, 0)
+#define MAX77857_CONT1_FREQ		GENMASK(4, 3)
+#define MAX77857_CONT3_FPWM		BIT(5)
+
+#define MAX77859_REG_INT_SRC		0x11
+#define MAX77859_REG_CONT1		0x13
+#define MAX77859_REG_CONT2		0x14
+#define MAX77859_REG_CONT3		0x15
+#define MAX77859_REG_CONT5		0x17
+#define MAX77859_CONT2_FPWM		BIT(2)
+#define MAX77859_CONT2_INTB		BIT(3)
+#define MAX77859_CONT3_DVS_START	BIT(2)
+#define MAX77859_VOLTAGE_SEL_MASK	GENMASK(9, 0)
+
+#define MAX77859_CURRENT_MIN		1000000
+#define MAX77859_CURRENT_MAX		5000000
+#define MAX77859_CURRENT_STEP		50000
+
+enum max77857_id {
+	ID_MAX77831 = 1,
+	ID_MAX77857,
+	ID_MAX77859,
+	ID_MAX77859A,
+};
+
+static bool max77857_volatile_reg(struct device *dev, unsigned int reg)
+{
+	enum max77857_id id = (enum max77857_id)dev_get_drvdata(dev);
+
+	switch (id) {
+	case ID_MAX77831:
+	case ID_MAX77857:
+		return reg == MAX77857_REG_INT_SRC;
+	case ID_MAX77859:
+	case ID_MAX77859A:
+		return reg == MAX77859_REG_INT_SRC;
+	default:
+		return true;
+	}
+}
+
+struct regmap_config max77857_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.cache_type = REGCACHE_MAPLE,
+	.volatile_reg = max77857_volatile_reg,
+};
+
+static int max77857_get_status(struct regulator_dev *rdev)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(rdev->regmap, MAX77857_REG_INT_SRC, &val);
+	if (ret)
+		return ret;
+
+	if (FIELD_GET(MAX77857_INT_SRC_POK, val))
+		return REGULATOR_STATUS_ON;
+
+	return REGULATOR_STATUS_ERROR;
+}
+
+static unsigned int max77857_get_mode(struct regulator_dev *rdev)
+{
+	enum max77857_id id = (enum max77857_id)rdev_get_drvdata(rdev);
+	unsigned int regval;
+	int ret;
+
+	switch (id) {
+	case ID_MAX77831:
+	case ID_MAX77857:
+		ret = regmap_read(rdev->regmap, MAX77857_REG_CONT3, &regval);
+		if (ret)
+			return ret;
+
+		if (FIELD_GET(MAX77857_CONT3_FPWM, regval))
+			return REGULATOR_MODE_FAST;
+
+		break;
+	case ID_MAX77859:
+	case ID_MAX77859A:
+		ret = regmap_read(rdev->regmap, MAX77859_REG_CONT2, &regval);
+		if (ret)
+			return ret;
+
+		if (FIELD_GET(MAX77859_CONT2_FPWM, regval))
+			return REGULATOR_MODE_FAST;
+
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return REGULATOR_MODE_NORMAL;
+}
+
+static int max77857_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	enum max77857_id id = (enum max77857_id)rdev_get_drvdata(rdev);
+	unsigned int reg, val;
+
+	switch (id) {
+	case ID_MAX77831:
+	case ID_MAX77857:
+		reg = MAX77857_REG_CONT3;
+		val = MAX77857_CONT3_FPWM;
+		break;
+	case ID_MAX77859:
+	case ID_MAX77859A:
+		reg = MAX77859_REG_CONT2;
+		val = MAX77859_CONT2_FPWM;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		return regmap_set_bits(rdev->regmap, reg, val);
+	case REGULATOR_MODE_NORMAL:
+		return regmap_clear_bits(rdev->regmap, reg, val);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int max77857_get_error_flags(struct regulator_dev *rdev,
+				    unsigned int *flags)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(rdev->regmap, MAX77857_REG_INT_SRC, &val);
+	if (ret)
+		return ret;
+
+	*flags = 0;
+
+	if (FIELD_GET(MAX77857_INT_SRC_OVP, val))
+		*flags |= REGULATOR_ERROR_OVER_VOLTAGE_WARN;
+
+	if (FIELD_GET(MAX77857_INT_SRC_OCP, val) ||
+	    FIELD_GET(MAX77857_INT_SRC_HARDSHORT, val))
+		*flags |= REGULATOR_ERROR_OVER_CURRENT;
+
+	if (FIELD_GET(MAX77857_INT_SRC_THS, val))
+		*flags |= REGULATOR_ERROR_OVER_TEMP;
+
+	if (!FIELD_GET(MAX77857_INT_SRC_POK, val))
+		*flags |= REGULATOR_ERROR_FAIL;
+
+	return 0;
+}
+
+static struct linear_range max77859_lin_ranges[] = {
+	REGULATOR_LINEAR_RANGE(3200000, 0x0A0, 0x320, 20000)
+};
+
+static const unsigned int max77859_ramp_table[4] = {
+	1000, 500, 250, 125
+};
+
+static int max77859_set_voltage_sel(struct regulator_dev *rdev,
+				    unsigned int sel)
+{
+	__be16 reg;
+	int ret;
+
+	reg = cpu_to_be16(sel);
+
+	ret = regmap_bulk_write(rdev->regmap, MAX77859_REG_CONT3, &reg, 2);
+	if (ret)
+		return ret;
+
+	/* actually apply new voltage */
+	return regmap_set_bits(rdev->regmap, MAX77859_REG_CONT3,
+			       MAX77859_CONT3_DVS_START);
+}
+
+int max77859_get_voltage_sel(struct regulator_dev *rdev)
+{
+	__be16 reg;
+	int ret;
+
+	ret = regmap_bulk_read(rdev->regmap, MAX77859_REG_CONT3, &reg, 2);
+	if (ret)
+		return ret;
+
+	return FIELD_GET(MAX77859_VOLTAGE_SEL_MASK, __be16_to_cpu(reg));
+}
+
+int max77859_set_current_limit(struct regulator_dev *rdev, int min_uA, int max_uA)
+{
+	u32 selector;
+
+	if (max_uA < MAX77859_CURRENT_MIN)
+		return -EINVAL;
+
+	selector = 0x12 + (max_uA - MAX77859_CURRENT_MIN) / MAX77859_CURRENT_STEP;
+
+	selector = clamp_val(selector, 0x00, 0x7F);
+
+	return regmap_write(rdev->regmap, MAX77859_REG_CONT5, selector);
+}
+
+int max77859_get_current_limit(struct regulator_dev *rdev)
+{
+	u32 selector;
+	int ret;
+
+	ret = regmap_read(rdev->regmap, MAX77859_REG_CONT5, &selector);
+	if (ret)
+		return ret;
+
+	if (selector <= 0x12)
+		return MAX77859_CURRENT_MIN;
+
+	if (selector >= 0x64)
+		return MAX77859_CURRENT_MAX;
+
+	return MAX77859_CURRENT_MIN + (selector - 0x12) * MAX77859_CURRENT_STEP;
+}
+
+static const struct regulator_ops max77859_regulator_ops = {
+	.list_voltage = regulator_list_voltage_linear_range,
+	.set_voltage_sel = max77859_set_voltage_sel,
+	.get_voltage_sel = max77859_get_voltage_sel,
+	.set_ramp_delay = regulator_set_ramp_delay_regmap,
+	.get_status = max77857_get_status,
+	.set_mode = max77857_set_mode,
+	.get_mode = max77857_get_mode,
+	.get_error_flags = max77857_get_error_flags,
+};
+
+static const struct regulator_ops max77859a_regulator_ops = {
+	.list_voltage = regulator_list_voltage_linear_range,
+	.set_voltage_sel = max77859_set_voltage_sel,
+	.get_voltage_sel = max77859_get_voltage_sel,
+	.set_current_limit = max77859_set_current_limit,
+	.get_current_limit = max77859_get_current_limit,
+	.set_ramp_delay = regulator_set_ramp_delay_regmap,
+	.get_status = max77857_get_status,
+	.set_mode = max77857_set_mode,
+	.get_mode = max77857_get_mode,
+	.get_error_flags = max77857_get_error_flags,
+};
+
+static const struct regulator_ops max77857_regulator_ops = {
+	.list_voltage = regulator_list_voltage_linear_range,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_ramp_delay = regulator_set_ramp_delay_regmap,
+	.get_status = max77857_get_status,
+	.set_mode = max77857_set_mode,
+	.get_mode = max77857_get_mode,
+	.get_error_flags = max77857_get_error_flags,
+};
+
+static struct linear_range max77857_lin_ranges[] = {
+	REGULATOR_LINEAR_RANGE(4485000, 0x3D, 0xCC, 73500)
+};
+
+static const unsigned int max77857_switch_freq[] = {
+	1200000, 1500000, 1800000, 2100000
+};
+
+static const unsigned int max77857_ramp_table[2][4] = {
+	{ 1333, 667, 333, 227 }, /* when switch freq is 1.8MHz or 2.1MHz */
+	{ 1166, 667, 333, 167 }, /* when switch freq is 1.2MHz or 1.5MHz */
+};
+
+static struct regulator_desc max77857_regulator_desc = {
+	.ops = &max77857_regulator_ops,
+	.name = "max77857",
+	.linear_ranges = max77857_lin_ranges,
+	.n_linear_ranges = ARRAY_SIZE(max77857_lin_ranges),
+	.vsel_mask = 0xFF,
+	.vsel_reg = MAX77857_REG_CONT2,
+	.ramp_delay_table = max77857_ramp_table[0],
+	.n_ramp_values = ARRAY_SIZE(max77857_ramp_table[0]),
+	.ramp_reg = MAX77857_REG_CONT3,
+	.ramp_mask = GENMASK(1, 0),
+	.ramp_delay = max77857_ramp_table[0][0],
+	.owner = THIS_MODULE,
+};
+
+static void max77857_calc_range(struct device *dev, enum max77857_id id)
+{
+	struct linear_range *range;
+	unsigned long vref_step;
+	u32 rtop = 0;
+	u32 rbot = 0;
+
+	device_property_read_u32(dev, "adi,rtop-ohms", &rtop);
+	device_property_read_u32(dev, "adi,rbot-ohms", &rbot);
+
+	if (!rbot || !rtop)
+		return;
+
+	switch (id) {
+	case ID_MAX77831:
+	case ID_MAX77857:
+		range = max77857_lin_ranges;
+		vref_step = 4900UL;
+		break;
+	case ID_MAX77859:
+	case ID_MAX77859A:
+		range = max77859_lin_ranges;
+		vref_step = 1250UL;
+		break;
+	}
+
+	range->step = DIV_ROUND_CLOSEST(vref_step * (rbot + rtop), rbot);
+	range->min = range->step * range->min_sel;
+}
+
+static int max77857_probe(struct i2c_client *client)
+{
+	const struct i2c_device_id *i2c_id;
+	struct device *dev = &client->dev;
+	struct regulator_config cfg = { };
+	struct regulator_dev *rdev;
+	struct regmap *regmap;
+	enum max77857_id id;
+	u32 switch_freq = 0;
+	int ret;
+
+	i2c_id = i2c_client_get_device_id(client);
+	if (!i2c_id)
+		return -EINVAL;
+
+	id = i2c_id->driver_data;
+
+	dev_set_drvdata(dev, (void *)id);
+
+	if (id == ID_MAX77859 || id == ID_MAX77859A) {
+		max77857_regulator_desc.ops = &max77859_regulator_ops;
+		max77857_regulator_desc.linear_ranges = max77859_lin_ranges;
+		max77857_regulator_desc.ramp_delay_table = max77859_ramp_table;
+		max77857_regulator_desc.ramp_delay = max77859_ramp_table[0];
+	}
+
+	if (id == ID_MAX77859A)
+		max77857_regulator_desc.ops = &max77859a_regulator_ops;
+
+	max77857_calc_range(dev, id);
+
+	regmap = devm_regmap_init_i2c(client, &max77857_regmap_config);
+	if (IS_ERR(regmap))
+		return dev_err_probe(dev, PTR_ERR(regmap),
+				     "cannot initialize regmap\n");
+
+	device_property_read_u32(dev, "adi,switch-frequency-hz", &switch_freq);
+	if (switch_freq) {
+		switch_freq = find_closest(switch_freq, max77857_switch_freq,
+					   ARRAY_SIZE(max77857_switch_freq));
+
+		if (id == ID_MAX77831 && switch_freq == 3)
+			switch_freq = 2;
+
+		switch (id) {
+		case ID_MAX77831:
+		case ID_MAX77857:
+			ret = regmap_update_bits(regmap, MAX77857_REG_CONT1,
+						 MAX77857_CONT1_FREQ, switch_freq);
+
+			if (switch_freq >= 2)
+				break;
+
+			max77857_regulator_desc.ramp_delay_table = max77857_ramp_table[1];
+			max77857_regulator_desc.ramp_delay = max77857_ramp_table[1][0];
+			break;
+		case ID_MAX77859:
+		case ID_MAX77859A:
+			ret = regmap_update_bits(regmap, MAX77859_REG_CONT1,
+						 MAX77857_CONT1_FREQ, switch_freq);
+			break;
+		}
+		if (ret)
+			return ret;
+	}
+
+	cfg.dev = dev;
+	cfg.driver_data = (void *)id;
+	cfg.regmap = regmap;
+	cfg.init_data = of_get_regulator_init_data(dev, dev->of_node,
+						   &max77857_regulator_desc);
+	if (!cfg.init_data)
+		return -ENOMEM;
+
+	rdev = devm_regulator_register(dev, &max77857_regulator_desc, &cfg);
+	if (IS_ERR(rdev))
+		return dev_err_probe(dev, PTR_ERR(rdev),
+				     "cannot register regulator\n");
+
+	return 0;
+}
+
+const struct i2c_device_id max77857_id[] = {
+	{ "max77831", ID_MAX77831 },
+	{ "max77857", ID_MAX77857 },
+	{ "max77859", ID_MAX77859 },
+	{ "max77859a", ID_MAX77859A },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max77857_id);
+
+static const struct of_device_id max77857_of_id[] = {
+	{ .compatible = "adi,max77831", .data = (void *)ID_MAX77831 },
+	{ .compatible = "adi,max77857", .data = (void *)ID_MAX77857 },
+	{ .compatible = "adi,max77859", .data = (void *)ID_MAX77859 },
+	{ .compatible = "adi,max77859a", .data = (void *)ID_MAX77859A },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, max77857_of_id);
+
+struct i2c_driver max77857_driver = {
+	.driver = {
+		.name = "max77857",
+		.of_match_table = max77857_of_id,
+	},
+	.id_table = max77857_id,
+	.probe_new = max77857_probe,
+};
+module_i2c_driver(max77857_driver);
+
+MODULE_DESCRIPTION("Analog Devices MAX77857 Buck-Boost Converter Driver");
+MODULE_AUTHOR("Ibrahim Tilki <Ibrahim.Tilki@analog.com>");
+MODULE_AUTHOR("Okan Sahin <Okan.Sahin@analog.com>");
+MODULE_LICENSE("GPL");

From 6d5373e98b37721987c16fd1c0f9500ecbb69f20 Mon Sep 17 00:00:00 2001
From: Okan Sahin <okan.sahin@analog.com>
Date: Mon, 17 Jul 2023 08:07:34 +0300
Subject: [PATCH 0234/2047] regulator: max77857: Add ADI MAX77857/59/MAX77831
 Regulator bindings

Add ADI MAX77857/59 and MAX77831 Regulator device tree document.

Signed-off-by: Okan Sahin <okan.sahin@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230717050736.10075-2-okan.sahin@analog.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/adi,max77857.yaml      | 86 +++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/adi,max77857.yaml

diff --git a/Documentation/devicetree/bindings/regulator/adi,max77857.yaml b/Documentation/devicetree/bindings/regulator/adi,max77857.yaml
new file mode 100644
index 000000000000..d1fa74aca721
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/adi,max77857.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2022 Analog Devices Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/adi,max77857.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices MAX77857 Buck-Boost Converter
+
+maintainers:
+  - Ibrahim Tilki <Ibrahim.Tilki@analog.com>
+  - Okan Sahin <Okan.Sahin@analog.com>
+
+description: Analog Devices MAX77857 Buck-Boost Converter
+
+properties:
+  compatible:
+    enum:
+      - adi,max77831
+      - adi,max77857
+      - adi,max77859
+      - adi,max77859a
+
+  reg:
+    description: I2C address of the device
+    items:
+      - enum: [0x66, 0x67, 0x6E, 0x6F]
+
+  interrupts:
+    maxItems: 1
+
+  adi,switch-frequency-hz:
+    description: Switching frequency of the Buck-Boost converter in Hz.
+    items:
+      - enum: [1200000, 1500000, 1800000, 2100000]
+
+  adi,rtop-ohms:
+    description: Top feedback resistor value in ohms for external feedback.
+    minimum: 150000
+    maximum: 330000
+
+  adi,rbot-ohms:
+    description: Bottom feedback resistor value in ohms for external feedback.
+
+dependencies:
+  adi,rtop-ohms: [ 'adi,rbot-ohms' ]
+  adi,rbot-ohms: [ 'adi,rtop-ohms' ]
+
+required:
+  - compatible
+  - reg
+
+allOf:
+  - $ref: regulator.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - adi,max77831
+
+    then:
+      properties:
+        adi,switch-frequency-hz:
+          items:
+            enum: [1200000, 1500000, 1800000]
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        regulator@66 {
+            reg = <0x66>;
+            compatible = "adi,max77857";
+            interrupt-parent = <&gpio>;
+            interrupts = <26 IRQ_TYPE_EDGE_FALLING>;
+
+            adi,rtop-ohms = <312000>;
+            adi,rbot-ohms = <12000>;
+        };
+    };

From f572ba797c639c9b1705908d3f5d71ed7c3f53e0 Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Date: Sat, 15 Jul 2023 02:03:58 +0100
Subject: [PATCH 0235/2047] spi: rzv2m-csi: Add missing include

Add missing include of bits.h file.

Signed-off-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20230715010407.1751715-2-fabrizio.castro.jz@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rzv2m-csi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-rzv2m-csi.c b/drivers/spi/spi-rzv2m-csi.c
index c0d9a776770f..b595f3b7294d 100644
--- a/drivers/spi/spi-rzv2m-csi.c
+++ b/drivers/spi/spi-rzv2m-csi.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2023 Renesas Electronics Corporation
  */
 
+#include <linux/bits.h>
 #include <linux/clk.h>
 #include <linux/count_zeros.h>
 #include <linux/interrupt.h>

From 74e27ce8d23c3aeb1a9fdcaf6261462506bbbfc3 Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Date: Sat, 15 Jul 2023 02:03:59 +0100
Subject: [PATCH 0236/2047] spi: rzv2m-csi: Adopt HZ_PER_MHZ for max spi clock

Make use of HZ_PER_MHZ for CSI_MAX_SPI_SCKO to make it clear
what its value means.

Signed-off-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20230715010407.1751715-3-fabrizio.castro.jz@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rzv2m-csi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-rzv2m-csi.c b/drivers/spi/spi-rzv2m-csi.c
index b595f3b7294d..3931045a85eb 100644
--- a/drivers/spi/spi-rzv2m-csi.c
+++ b/drivers/spi/spi-rzv2m-csi.c
@@ -13,6 +13,7 @@
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/spi/spi.h>
+#include <linux/units.h>
 
 /* Registers */
 #define CSI_MODE		0x00	/* CSI mode control */
@@ -71,7 +72,7 @@
 #define TX_TIMEOUT_ERROR	BIT(2)
 #define RX_TIMEOUT_ERROR	BIT(3)
 
-#define CSI_MAX_SPI_SCKO	8000000
+#define CSI_MAX_SPI_SCKO	(8 * HZ_PER_MHZ)
 
 struct rzv2m_csi_priv {
 	void __iomem *base;

From aecf9fbdb7a4dc6d83e8d9984c8d9dc074d8ea2e Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Date: Sat, 15 Jul 2023 02:04:00 +0100
Subject: [PATCH 0237/2047] spi: rzv2m-csi: Rework CSI_CKS_MAX definition

Clock "csiclk" gets divided by 2 * CSI_CLKSEL_CKS in order to generate
the serial clock (output from master), with CSI_CLKSEL_CKS ranging from
0x1 (that means "csiclk" is divided by 2) to 0x3FFF ("csiclk" is divided
by 32766). CSI_CKS_MAX is used for referring to the setting
corresponding to the maximum frequency divider.
Value 0x3FFF for CSI_CKS_MAX doesn't really means much to the reader
without an explanation and a more readable definition.

Add a comment with a meaningful description and also replace value
0x3FFF with the corresponding GENMASK, to make it very clear what the
macro means.

Signed-off-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20230715010407.1751715-4-fabrizio.castro.jz@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rzv2m-csi.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-rzv2m-csi.c b/drivers/spi/spi-rzv2m-csi.c
index 3931045a85eb..621774949bde 100644
--- a/drivers/spi/spi-rzv2m-csi.c
+++ b/drivers/spi/spi-rzv2m-csi.c
@@ -65,7 +65,12 @@
 #define CSI_FIFO_SIZE_BYTES	32
 #define CSI_FIFO_HALF_SIZE	16
 #define CSI_EN_DIS_TIMEOUT_US	100
-#define CSI_CKS_MAX		0x3FFF
+/*
+ * Clock "csiclk" gets divided by 2 * CSI_CLKSEL_CKS in order to generate the
+ * serial clock (output from master), with CSI_CLKSEL_CKS ranging from 0x1 (that
+ * means "csiclk" is divided by 2) to 0x3FFF ("csiclk" is divided by 32766).
+ */
+#define CSI_CKS_MAX		GENMASK(13, 0)
 
 #define UNDERRUN_ERROR		BIT(0)
 #define OVERFLOW_ERROR		BIT(1)

From 2ed2699f58891c72fcd462129345d09424f986c5 Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Date: Sat, 15 Jul 2023 02:04:01 +0100
Subject: [PATCH 0238/2047] spi: rzv2m-csi: Leave readl_poll_timeout calls for
 last

Both rzv2m_csi_sw_reset and rzv2m_csi_start_stop_operation
call into readl_poll_timeout upon a certain condition, and
return 0 otherwise.
Flip the logic to improve readability.

Signed-off-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20230715010407.1751715-5-fabrizio.castro.jz@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rzv2m-csi.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/spi/spi-rzv2m-csi.c b/drivers/spi/spi-rzv2m-csi.c
index 621774949bde..1c65cd5f2039 100644
--- a/drivers/spi/spi-rzv2m-csi.c
+++ b/drivers/spi/spi-rzv2m-csi.c
@@ -131,13 +131,12 @@ static int rzv2m_csi_sw_reset(struct rzv2m_csi_priv *csi, int assert)
 
 	rzv2m_csi_reg_write_bit(csi, CSI_CNT, CSI_CNT_CSIRST, assert);
 
-	if (assert) {
-		return readl_poll_timeout(csi->base + CSI_MODE, reg,
-					  !(reg & CSI_MODE_CSOT), 0,
-					  CSI_EN_DIS_TIMEOUT_US);
-	}
+	if (!assert)
+		return 0;
 
-	return 0;
+	return readl_poll_timeout(csi->base + CSI_MODE, reg,
+				  !(reg & CSI_MODE_CSOT), 0,
+				  CSI_EN_DIS_TIMEOUT_US);
 }
 
 static int rzv2m_csi_start_stop_operation(const struct rzv2m_csi_priv *csi,
@@ -147,12 +146,12 @@ static int rzv2m_csi_start_stop_operation(const struct rzv2m_csi_priv *csi,
 
 	rzv2m_csi_reg_write_bit(csi, CSI_MODE, CSI_MODE_CSIE, enable);
 
-	if (!enable && wait)
-		return readl_poll_timeout(csi->base + CSI_MODE, reg,
-					  !(reg & CSI_MODE_CSOT), 0,
-					  CSI_EN_DIS_TIMEOUT_US);
+	if (enable || !wait)
+		return 0;
 
-	return 0;
+	return readl_poll_timeout(csi->base + CSI_MODE, reg,
+				  !(reg & CSI_MODE_CSOT), 0,
+				  CSI_EN_DIS_TIMEOUT_US);
 }
 
 static int rzv2m_csi_fill_txfifo(struct rzv2m_csi_priv *csi)

From 9f5ac599801c0f7c0969fa94c638265ed988b9bc Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Date: Sat, 15 Jul 2023 02:04:02 +0100
Subject: [PATCH 0239/2047] spi: rzv2m-csi: Replace unnecessary ternary
 operators

The ternary operators used to initialize tx_completed and rx_completed
are not necessary, replace them with a better implementation.

Signed-off-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20230715010407.1751715-6-fabrizio.castro.jz@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rzv2m-csi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-rzv2m-csi.c b/drivers/spi/spi-rzv2m-csi.c
index 1c65cd5f2039..038f1486b7d7 100644
--- a/drivers/spi/spi-rzv2m-csi.c
+++ b/drivers/spi/spi-rzv2m-csi.c
@@ -439,8 +439,8 @@ static int rzv2m_csi_setup(struct spi_device *spi)
 
 static int rzv2m_csi_pio_transfer(struct rzv2m_csi_priv *csi)
 {
-	bool tx_completed = csi->txbuf ? false : true;
-	bool rx_completed = csi->rxbuf ? false : true;
+	bool tx_completed = !csi->txbuf;
+	bool rx_completed = !csi->rxbuf;
 	int ret = 0;
 
 	/* Make sure the TX FIFO is empty */

From 95272641338a32734792fe4b261001847b11ed80 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 10 Jul 2023 17:03:36 +0300
Subject: [PATCH 0240/2047] ACPI: processor: Use _OSC to convey OSPM processor
 support information

Change acpi_early_processor_osc() to return a value in case of a
failure and make it static.

Also make it run acpi_processor_osc() for every processor object or
processor device found in the ACPI Namespace (previously, its only purpose
was to work around platform firmware defects on Skylake systems).

Introduce a new function called acpi_early_processor_control_setup() that
will invoke acpi_early_processor_osc() first in order to convey the OS
processor support information to the platform firmware and if that fails,
it will fall back to using _PDC.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
[ rjw: Subject and changelog edits, change function return value to bool,
  add missing 'static', change messages ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_processor.c | 35 ++++++++++++++++++++++++++---------
 drivers/acpi/bus.c            |  5 +----
 drivers/acpi/internal.h       |  9 ++-------
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 163bd3d52acd..8f4d70ae694f 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -624,16 +624,33 @@ static acpi_status __init acpi_hwp_native_thermal_lvt_osc(acpi_handle handle,
 	return AE_OK;
 }
 
-void __init acpi_early_processor_osc(void)
+static bool __init acpi_early_processor_osc(void)
 {
-	if (boot_cpu_has(X86_FEATURE_HWP)) {
-		acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
-				    ACPI_UINT32_MAX,
-				    acpi_hwp_native_thermal_lvt_osc,
-				    NULL, NULL, NULL);
-		acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID,
-				 acpi_hwp_native_thermal_lvt_osc,
-				 NULL, NULL);
+	acpi_status status;
+
+	acpi_proc_quirk_mwait_check();
+
+	status = acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+				     ACPI_UINT32_MAX, acpi_processor_osc, NULL,
+				     NULL, NULL);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	status = acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, acpi_processor_osc,
+				  NULL, NULL);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	return true;
+}
+
+void __init acpi_early_processor_control_setup(void)
+{
+	if (acpi_early_processor_osc()) {
+		pr_info("_OSC evaluated successfully\n");
+	} else {
+		pr_info("_OSC evaluation failed, trying _PDC\n");
+		acpi_early_processor_set_pdc();
 	}
 }
 #endif
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 2fc2b43a4ed3..a39f2f3a2cd6 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1296,9 +1296,6 @@ static int __init acpi_bus_init(void)
 		goto error1;
 	}
 
-	/* Set capability bits for _OSC under processor scope */
-	acpi_early_processor_osc();
-
 	/*
 	 * _OSC method may exist in module level code,
 	 * so it must be run after ACPI_FULL_INITIALIZATION
@@ -1314,7 +1311,7 @@ static int __init acpi_bus_init(void)
 
 	acpi_sysfs_init();
 
-	acpi_early_processor_set_pdc();
+	acpi_early_processor_control_setup();
 
 	/*
 	 * Maybe EC region is required at bus_scan/acpi_get_devices. So it
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index ed2599ae3e83..ced8224d7472 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -152,18 +152,13 @@ int acpi_wakeup_device_init(void);
                                   Processor
    -------------------------------------------------------------------------- */
 #ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC
+void acpi_early_processor_control_setup(void);
 void acpi_early_processor_set_pdc(void);
 
 void acpi_proc_quirk_mwait_check(void);
 bool processor_physically_present(acpi_handle handle);
 #else
-static inline void acpi_early_processor_set_pdc(void) {}
-#endif
-
-#ifdef CONFIG_X86
-void acpi_early_processor_osc(void);
-#else
-static inline void acpi_early_processor_osc(void) {}
+static inline void acpi_early_processor_control_setup(void) {}
 #endif
 
 /* --------------------------------------------------------------------------

From 11e7bf5629fa6a78b72a2b755c0a8f0bbd512593 Mon Sep 17 00:00:00 2001
From: Michal Wilczynski <michal.wilczynski@intel.com>
Date: Mon, 10 Jul 2023 17:03:37 +0300
Subject: [PATCH 0241/2047] ACPI: processor: Remove
 acpi_hwp_native_thermal_lvt_osc()

The workaround for buggy skylake BIOSes is now implemented in
acpi_processor_osc() and acpi_hwp_native_thermal_lvt_osc() has no
callers, so drop it.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_processor.c | 36 -----------------------------------
 1 file changed, 36 deletions(-)

diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 8f4d70ae694f..9e35b8d8d334 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -561,7 +561,6 @@ bool __init processor_physically_present(acpi_handle handle)
 
 /* vendor specific UUID indicating an Intel platform */
 static u8 sb_uuid_str[] = "4077A616-290C-47BE-9EBD-D87058713953";
-static bool acpi_hwp_native_thermal_lvt_set;
 
 static acpi_status __init acpi_processor_osc(acpi_handle handle, u32 lvl,
 					     void *context, void **rv)
@@ -589,41 +588,6 @@ static acpi_status __init acpi_processor_osc(acpi_handle handle, u32 lvl,
 	return AE_OK;
 }
 
-static acpi_status __init acpi_hwp_native_thermal_lvt_osc(acpi_handle handle,
-							  u32 lvl,
-							  void *context,
-							  void **rv)
-{
-	u32 capbuf[2];
-	struct acpi_osc_context osc_context = {
-		.uuid_str = sb_uuid_str,
-		.rev = 1,
-		.cap.length = 8,
-		.cap.pointer = capbuf,
-	};
-
-	if (acpi_hwp_native_thermal_lvt_set)
-		return AE_CTRL_TERMINATE;
-
-	capbuf[0] = 0x0000;
-	capbuf[1] = 0x1000; /* set bit 12 */
-
-	if (ACPI_SUCCESS(acpi_run_osc(handle, &osc_context))) {
-		if (osc_context.ret.pointer && osc_context.ret.length > 1) {
-			u32 *capbuf_ret = osc_context.ret.pointer;
-
-			if (capbuf_ret[1] & 0x1000) {
-				acpi_handle_info(handle,
-					"_OSC native thermal LVT Acked\n");
-				acpi_hwp_native_thermal_lvt_set = true;
-			}
-		}
-		kfree(osc_context.ret.pointer);
-	}
-
-	return AE_OK;
-}
-
 static bool __init acpi_early_processor_osc(void)
 {
 	acpi_status status;

From 96b709be183c56293933ef45b8b75f8af268c6de Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Tue, 18 Apr 2023 08:42:00 +0200
Subject: [PATCH 0242/2047] ACPI: video: Add backlight=native DMI quirk for
 Lenovo Ideapad Z470

The Lenovo Ideapad Z470 predates Windows 8, so it defaults to using
acpi_video for backlight control. But this is not functional on this
model.

Add a DMI quirk to use the native backlight interface which works.

Link: https://bugzilla.suse.com/show_bug.cgi?id=1208724
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/video_detect.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 18cc08c858cf..0c376edd64fe 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -445,6 +445,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
 		DMI_MATCH(DMI_BOARD_NAME, "Lenovo IdeaPad S405"),
 		},
 	},
+	{
+	 /* https://bugzilla.suse.com/show_bug.cgi?id=1208724 */
+	 .callback = video_detect_force_native,
+	 /* Lenovo Ideapad Z470 */
+	 .matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		DMI_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Z470"),
+		},
+	},
 	{
 	 /* https://bugzilla.redhat.com/show_bug.cgi?id=1187004 */
 	 .callback = video_detect_force_native,

From 5f641174a12b8a876a4101201a21ef4675ecc014 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 12 Jul 2023 12:24:59 -0500
Subject: [PATCH 0243/2047] ACPI: thermal: Drop nocrt parameter

The `nocrt` module parameter has no code associated with it and does
nothing.  As `crt=-1` has same functionality as what nocrt should be
doing drop `nocrt` and associated documentation.

This should fix a quirk for Gigabyte GA-7ZX that used `nocrt` and
thus didn't function properly.

Fixes: 8c99fdce3078 ("ACPI: thermal: set "thermal.nocrt" via DMI on Gigabyte GA-7ZX")
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 4 ----
 drivers/acpi/thermal.c                          | 6 +-----
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1457995fd41..2de235d52fac 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6243,10 +6243,6 @@
 			-1: disable all critical trip points in all thermal zones
 			<degrees C>: override all critical trip points
 
-	thermal.nocrt=	[HW,ACPI]
-			Set to disable actions on ACPI thermal zone
-			critical and hot trip points.
-
 	thermal.off=	[HW,ACPI]
 			1: disable ACPI thermal control
 
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index f9f6ebb08fdb..3163a40f02e3 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -82,10 +82,6 @@ static int tzp;
 module_param(tzp, int, 0444);
 MODULE_PARM_DESC(tzp, "Thermal zone polling frequency, in 1/10 seconds.");
 
-static int nocrt;
-module_param(nocrt, int, 0);
-MODULE_PARM_DESC(nocrt, "Set to take no action upon ACPI thermal zone critical trips points.");
-
 static int off;
 module_param(off, int, 0);
 MODULE_PARM_DESC(off, "Set to disable ACPI thermal support.");
@@ -1094,7 +1090,7 @@ static int thermal_act(const struct dmi_system_id *d) {
 static int thermal_nocrt(const struct dmi_system_id *d) {
 	pr_notice("%s detected: disabling all critical thermal trip point actions.\n",
 		  d->ident);
-	nocrt = 1;
+	crt = -1;
 	return 0;
 }
 static int thermal_tzp(const struct dmi_system_id *d) {

From 880b9577855edddda1e732748e849c63199d489b Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 17 Jul 2023 09:00:09 -0700
Subject: [PATCH 0244/2047] fs: distinguish between user initiated freeze and
 kernel initiated freeze

Userspace can freeze a filesystem using the FIFREEZE ioctl or by
suspending the block device; this state persists until userspace thaws
the filesystem with the FITHAW ioctl or resuming the block device.
Since commit 18e9e5104fcd ("Introduce freeze_super and thaw_super for
the fsfreeze ioctl") we only allow the first freeze command to succeed.

The kernel may decide that it is necessary to freeze a filesystem for
its own internal purposes, such as suspends in progress, filesystem fsck
activities, or quiescing a device prior to removal.  Userspace thaw
commands must never break a kernel freeze, and kernel thaw commands
shouldn't undo userspace's freeze command.

Introduce a couple of freeze holder flags and wire it into the
sb_writers state.  One kernel and one userspace freeze are allowed to
coexist at the same time; the filesystem will not thaw until both are
lifted.

I wonder if the f2fs/gfs2 code should be using a kernel freeze here, but
for now we'll use FREEZE_HOLDER_USERSPACE to preserve existing
behaviors.

Cc: mcgrof@kernel.org
Cc: jack@suse.cz
Cc: hch@infradead.org
Cc: ruansy.fnst@fujitsu.com
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/vfs.rst |  6 ++-
 block/bdev.c                      |  8 ++--
 fs/f2fs/gc.c                      |  8 ++--
 fs/gfs2/super.c                   | 12 +++--
 fs/gfs2/sys.c                     |  4 +-
 fs/ioctl.c                        |  8 ++--
 fs/super.c                        | 79 +++++++++++++++++++++++++++----
 include/linux/fs.h                | 15 ++++--
 8 files changed, 106 insertions(+), 34 deletions(-)

diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index cb2a97e49872..176dd4606d10 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -260,9 +260,11 @@ filesystem.  The following members are defined:
 		void (*evict_inode) (struct inode *);
 		void (*put_super) (struct super_block *);
 		int (*sync_fs)(struct super_block *sb, int wait);
-		int (*freeze_super) (struct super_block *);
+		int (*freeze_super) (struct super_block *sb,
+					enum freeze_holder who);
 		int (*freeze_fs) (struct super_block *);
-		int (*thaw_super) (struct super_block *);
+		int (*thaw_super) (struct super_block *sb,
+					enum freeze_wholder who);
 		int (*unfreeze_fs) (struct super_block *);
 		int (*statfs) (struct dentry *, struct kstatfs *);
 		int (*remount_fs) (struct super_block *, int *, char *);
diff --git a/block/bdev.c b/block/bdev.c
index 979e28a46b98..80ea3fa3593b 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -248,9 +248,9 @@ int freeze_bdev(struct block_device *bdev)
 	if (!sb)
 		goto sync;
 	if (sb->s_op->freeze_super)
-		error = sb->s_op->freeze_super(sb);
+		error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
 	else
-		error = freeze_super(sb);
+		error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
 	deactivate_super(sb);
 
 	if (error) {
@@ -291,9 +291,9 @@ int thaw_bdev(struct block_device *bdev)
 		goto out;
 
 	if (sb->s_op->thaw_super)
-		error = sb->s_op->thaw_super(sb);
+		error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
 	else
-		error = thaw_super(sb);
+		error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
 	if (error)
 		bdev->bd_fsfreeze_count++;
 	else
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 01effd3fcb6c..a1ca394bc327 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -2181,12 +2181,14 @@ out_drop_write:
 	if (err)
 		return err;
 
-	err = freeze_super(sbi->sb);
+	err = freeze_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
 	if (err)
 		return err;
 
 	if (f2fs_readonly(sbi->sb)) {
-		thaw_super(sbi->sb);
+		err = thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
+		if (err)
+			return err;
 		return -EROFS;
 	}
 
@@ -2240,6 +2242,6 @@ recover_out:
 out_err:
 	f2fs_up_write(&sbi->cp_global_sem);
 	f2fs_up_write(&sbi->gc_lock);
-	thaw_super(sbi->sb);
+	thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
 	return err;
 }
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 9f4d5d6549ee..ae7fedc6ddcd 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -689,7 +689,7 @@ static int gfs2_freeze_locally(struct gfs2_sbd *sdp)
 	struct super_block *sb = sdp->sd_vfs;
 	int error;
 
-	error = freeze_super(sb);
+	error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
 	if (error)
 		return error;
 
@@ -697,7 +697,9 @@ static int gfs2_freeze_locally(struct gfs2_sbd *sdp)
 		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
 			       GFS2_LFC_FREEZE_GO_SYNC);
 		if (gfs2_withdrawn(sdp)) {
-			thaw_super(sb);
+			error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+			if (error)
+				return error;
 			return -EIO;
 		}
 	}
@@ -712,7 +714,7 @@ static int gfs2_do_thaw(struct gfs2_sbd *sdp)
 	error = gfs2_freeze_lock_shared(sdp);
 	if (error)
 		goto fail;
-	error = thaw_super(sb);
+	error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
 	if (!error)
 		return 0;
 
@@ -761,7 +763,7 @@ out:
  *
  */
 
-static int gfs2_freeze_super(struct super_block *sb)
+static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	int error;
@@ -816,7 +818,7 @@ out:
  *
  */
 
-static int gfs2_thaw_super(struct super_block *sb)
+static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	int error;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 2dfbe2f188dd..c60bc7f628e1 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -168,10 +168,10 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 
 	switch (n) {
 	case 0:
-		error = thaw_super(sdp->sd_vfs);
+		error = thaw_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE);
 		break;
 	case 1:
-		error = freeze_super(sdp->sd_vfs);
+		error = freeze_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE);
 		break;
 	default:
 		return -EINVAL;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5b2481cd4750..a56cbceedcd1 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -396,8 +396,8 @@ static int ioctl_fsfreeze(struct file *filp)
 
 	/* Freeze */
 	if (sb->s_op->freeze_super)
-		return sb->s_op->freeze_super(sb);
-	return freeze_super(sb);
+		return sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+	return freeze_super(sb, FREEZE_HOLDER_USERSPACE);
 }
 
 static int ioctl_fsthaw(struct file *filp)
@@ -409,8 +409,8 @@ static int ioctl_fsthaw(struct file *filp)
 
 	/* Thaw */
 	if (sb->s_op->thaw_super)
-		return sb->s_op->thaw_super(sb);
-	return thaw_super(sb);
+		return sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+	return thaw_super(sb, FREEZE_HOLDER_USERSPACE);
 }
 
 static int ioctl_file_dedupe_range(struct file *file,
diff --git a/fs/super.c b/fs/super.c
index e781226e2880..e6db39aadaee 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -39,7 +39,7 @@
 #include <uapi/linux/mount.h>
 #include "internal.h"
 
-static int thaw_super_locked(struct super_block *sb);
+static int thaw_super_locked(struct super_block *sb, enum freeze_holder who);
 
 static LIST_HEAD(super_blocks);
 static DEFINE_SPINLOCK(sb_lock);
@@ -1030,7 +1030,7 @@ static void do_thaw_all_callback(struct super_block *sb)
 	down_write(&sb->s_umount);
 	if (sb->s_root && sb->s_flags & SB_BORN) {
 		emergency_thaw_bdev(sb);
-		thaw_super_locked(sb);
+		thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE);
 	} else {
 		up_write(&sb->s_umount);
 	}
@@ -1647,11 +1647,22 @@ static void sb_freeze_unlock(struct super_block *sb, int level)
 /**
  * freeze_super - lock the filesystem and force it into a consistent state
  * @sb: the super to lock
+ * @who: context that wants to freeze
  *
  * Syncs the super to make sure the filesystem is consistent and calls the fs's
- * freeze_fs.  Subsequent calls to this without first thawing the fs will return
+ * freeze_fs.  Subsequent calls to this without first thawing the fs may return
  * -EBUSY.
  *
+ * @who should be:
+ * * %FREEZE_HOLDER_USERSPACE if userspace wants to freeze the fs;
+ * * %FREEZE_HOLDER_KERNEL if the kernel wants to freeze the fs.
+ *
+ * The @who argument distinguishes between the kernel and userspace trying to
+ * freeze the filesystem.  Although there cannot be multiple kernel freezes or
+ * multiple userspace freezes in effect at any given time, the kernel and
+ * userspace can both hold a filesystem frozen.  The filesystem remains frozen
+ * until there are no kernel or userspace freezes in effect.
+ *
  * During this function, sb->s_writers.frozen goes through these values:
  *
  * SB_UNFROZEN: File system is normal, all writes progress as usual.
@@ -1677,12 +1688,30 @@ static void sb_freeze_unlock(struct super_block *sb, int level)
  *
  * sb->s_writers.frozen is protected by sb->s_umount.
  */
-int freeze_super(struct super_block *sb)
+int freeze_super(struct super_block *sb, enum freeze_holder who)
 {
 	int ret;
 
 	atomic_inc(&sb->s_active);
 	down_write(&sb->s_umount);
+
+	if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
+		if (sb->s_writers.freeze_holders & who) {
+			deactivate_locked_super(sb);
+			return -EBUSY;
+		}
+
+		WARN_ON(sb->s_writers.freeze_holders == 0);
+
+		/*
+		 * Someone else already holds this type of freeze; share the
+		 * freeze and assign the active ref to the freeze.
+		 */
+		sb->s_writers.freeze_holders |= who;
+		up_write(&sb->s_umount);
+		return 0;
+	}
+
 	if (sb->s_writers.frozen != SB_UNFROZEN) {
 		deactivate_locked_super(sb);
 		return -EBUSY;
@@ -1695,6 +1724,7 @@ int freeze_super(struct super_block *sb)
 
 	if (sb_rdonly(sb)) {
 		/* Nothing to do really... */
+		sb->s_writers.freeze_holders |= who;
 		sb->s_writers.frozen = SB_FREEZE_COMPLETE;
 		up_write(&sb->s_umount);
 		return 0;
@@ -1738,6 +1768,7 @@ int freeze_super(struct super_block *sb)
 	 * For debugging purposes so that fs can warn if it sees write activity
 	 * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
 	 */
+	sb->s_writers.freeze_holders |= who;
 	sb->s_writers.frozen = SB_FREEZE_COMPLETE;
 	lockdep_sb_freeze_release(sb);
 	up_write(&sb->s_umount);
@@ -1745,16 +1776,39 @@ int freeze_super(struct super_block *sb)
 }
 EXPORT_SYMBOL(freeze_super);
 
-static int thaw_super_locked(struct super_block *sb)
+/*
+ * Undoes the effect of a freeze_super_locked call.  If the filesystem is
+ * frozen both by userspace and the kernel, a thaw call from either source
+ * removes that state without releasing the other state or unlocking the
+ * filesystem.
+ */
+static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
 {
 	int error;
 
-	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
+	if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
+		if (!(sb->s_writers.freeze_holders & who)) {
+			up_write(&sb->s_umount);
+			return -EINVAL;
+		}
+
+		/*
+		 * Freeze is shared with someone else.  Release our hold and
+		 * drop the active ref that freeze_super assigned to the
+		 * freezer.
+		 */
+		if (sb->s_writers.freeze_holders & ~who) {
+			sb->s_writers.freeze_holders &= ~who;
+			deactivate_locked_super(sb);
+			return 0;
+		}
+	} else {
 		up_write(&sb->s_umount);
 		return -EINVAL;
 	}
 
 	if (sb_rdonly(sb)) {
+		sb->s_writers.freeze_holders &= ~who;
 		sb->s_writers.frozen = SB_UNFROZEN;
 		goto out;
 	}
@@ -1772,6 +1826,7 @@ static int thaw_super_locked(struct super_block *sb)
 		}
 	}
 
+	sb->s_writers.freeze_holders &= ~who;
 	sb->s_writers.frozen = SB_UNFROZEN;
 	sb_freeze_unlock(sb, SB_FREEZE_FS);
 out:
@@ -1782,13 +1837,19 @@ out:
 /**
  * thaw_super -- unlock filesystem
  * @sb: the super to thaw
+ * @who: context that wants to freeze
  *
- * Unlocks the filesystem and marks it writeable again after freeze_super().
+ * Unlocks the filesystem and marks it writeable again after freeze_super()
+ * if there are no remaining freezes on the filesystem.
+ *
+ * @who should be:
+ * * %FREEZE_HOLDER_USERSPACE if userspace wants to thaw the fs;
+ * * %FREEZE_HOLDER_KERNEL if the kernel wants to thaw the fs.
  */
-int thaw_super(struct super_block *sb)
+int thaw_super(struct super_block *sb, enum freeze_holder who)
 {
 	down_write(&sb->s_umount);
-	return thaw_super_locked(sb);
+	return thaw_super_locked(sb, who);
 }
 EXPORT_SYMBOL(thaw_super);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6867512907d6..5e8d81212abc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1147,7 +1147,8 @@ enum {
 #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
 
 struct sb_writers {
-	int				frozen;		/* Is sb frozen? */
+	unsigned short			frozen;		/* Is sb frozen? */
+	unsigned short			freeze_holders;	/* Who froze fs? */
 	struct percpu_rw_semaphore	rw_sem[SB_FREEZE_LEVELS];
 };
 
@@ -1902,6 +1903,10 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 					struct file *dst_file, loff_t dst_pos,
 					loff_t len, unsigned int remap_flags);
 
+enum freeze_holder {
+	FREEZE_HOLDER_KERNEL	= (1U << 0),
+	FREEZE_HOLDER_USERSPACE	= (1U << 1),
+};
 
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
@@ -1914,9 +1919,9 @@ struct super_operations {
 	void (*evict_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
 	int (*sync_fs)(struct super_block *sb, int wait);
-	int (*freeze_super) (struct super_block *);
+	int (*freeze_super) (struct super_block *, enum freeze_holder who);
 	int (*freeze_fs) (struct super_block *);
-	int (*thaw_super) (struct super_block *);
+	int (*thaw_super) (struct super_block *, enum freeze_holder who);
 	int (*unfreeze_fs) (struct super_block *);
 	int (*statfs) (struct dentry *, struct kstatfs *);
 	int (*remount_fs) (struct super_block *, int *, char *);
@@ -2290,8 +2295,8 @@ extern int unregister_filesystem(struct file_system_type *);
 extern int vfs_statfs(const struct path *, struct kstatfs *);
 extern int user_statfs(const char __user *, struct kstatfs *);
 extern int fd_statfs(int, struct kstatfs *);
-extern int freeze_super(struct super_block *super);
-extern int thaw_super(struct super_block *super);
+int freeze_super(struct super_block *super, enum freeze_holder who);
+int thaw_super(struct super_block *super, enum freeze_holder who);
 extern __printf(2, 3)
 int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
 extern int super_setup_bdi(struct super_block *sb);

From 59ba4fdd2d1f9dd7af98f5168c846150c9aec56d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 17 Jul 2023 09:00:10 -0700
Subject: [PATCH 0245/2047] fs: wait for partially frozen filesystems

Jan Kara suggested that when one thread is in the middle of freezing a
filesystem, another thread trying to freeze the same fs but with a
different freeze_holder should wait until the freezer reaches either end
state (UNFROZEN or COMPLETE) instead of returning EBUSY immediately.

Neither caller can do anything sensible with this race other than retry
but they cannot really distinguish EBUSY as in "some other holder of the
same type has the sb already frozen" from "freezing raced with holder of
a different type".

Plumb in the extra code needed to wait for the fs freezer to reach an
end state and try the freeze again.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/super.c | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index e6db39aadaee..da68584815e4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1644,6 +1644,24 @@ static void sb_freeze_unlock(struct super_block *sb, int level)
 		percpu_up_write(sb->s_writers.rw_sem + level);
 }
 
+static int wait_for_partially_frozen(struct super_block *sb)
+{
+	int ret = 0;
+
+	do {
+		unsigned short old = sb->s_writers.frozen;
+
+		up_write(&sb->s_umount);
+		ret = wait_var_event_killable(&sb->s_writers.frozen,
+					       sb->s_writers.frozen != old);
+		down_write(&sb->s_umount);
+	} while (ret == 0 &&
+		 sb->s_writers.frozen != SB_UNFROZEN &&
+		 sb->s_writers.frozen != SB_FREEZE_COMPLETE);
+
+	return ret;
+}
+
 /**
  * freeze_super - lock the filesystem and force it into a consistent state
  * @sb: the super to lock
@@ -1695,6 +1713,7 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
 	atomic_inc(&sb->s_active);
 	down_write(&sb->s_umount);
 
+retry:
 	if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
 		if (sb->s_writers.freeze_holders & who) {
 			deactivate_locked_super(sb);
@@ -1713,8 +1732,13 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
 	}
 
 	if (sb->s_writers.frozen != SB_UNFROZEN) {
-		deactivate_locked_super(sb);
-		return -EBUSY;
+		ret = wait_for_partially_frozen(sb);
+		if (ret) {
+			deactivate_locked_super(sb);
+			return ret;
+		}
+
+		goto retry;
 	}
 
 	if (!(sb->s_flags & SB_BORN)) {
@@ -1726,6 +1750,7 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
 		/* Nothing to do really... */
 		sb->s_writers.freeze_holders |= who;
 		sb->s_writers.frozen = SB_FREEZE_COMPLETE;
+		wake_up_var(&sb->s_writers.frozen);
 		up_write(&sb->s_umount);
 		return 0;
 	}
@@ -1745,6 +1770,7 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
 	if (ret) {
 		sb->s_writers.frozen = SB_UNFROZEN;
 		sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
+		wake_up_var(&sb->s_writers.frozen);
 		deactivate_locked_super(sb);
 		return ret;
 	}
@@ -1760,6 +1786,7 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
 				"VFS:Filesystem freeze failed\n");
 			sb->s_writers.frozen = SB_UNFROZEN;
 			sb_freeze_unlock(sb, SB_FREEZE_FS);
+			wake_up_var(&sb->s_writers.frozen);
 			deactivate_locked_super(sb);
 			return ret;
 		}
@@ -1770,6 +1797,7 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
 	 */
 	sb->s_writers.freeze_holders |= who;
 	sb->s_writers.frozen = SB_FREEZE_COMPLETE;
+	wake_up_var(&sb->s_writers.frozen);
 	lockdep_sb_freeze_release(sb);
 	up_write(&sb->s_umount);
 	return 0;
@@ -1810,6 +1838,7 @@ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
 	if (sb_rdonly(sb)) {
 		sb->s_writers.freeze_holders &= ~who;
 		sb->s_writers.frozen = SB_UNFROZEN;
+		wake_up_var(&sb->s_writers.frozen);
 		goto out;
 	}
 
@@ -1828,6 +1857,7 @@ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
 
 	sb->s_writers.freeze_holders &= ~who;
 	sb->s_writers.frozen = SB_UNFROZEN;
+	wake_up_var(&sb->s_writers.frozen);
 	sb_freeze_unlock(sb, SB_FREEZE_FS);
 out:
 	deactivate_locked_super(sb);

From ce6616724fb425d6043d0dad6af996cd7c79bcc4 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Wed, 5 Jul 2023 23:51:27 +0200
Subject: [PATCH 0246/2047] ubsan: Clarify Kconfig text for CONFIG_UBSAN_TRAP

Make it clearer in the one-line description and the verbose description
text that CONFIG_UBSAN_TRAP as currently implemented involves a tradeoff of
much less helpful oops messages in exchange for a smaller kernel image.
(With the additional effect of turning UBSAN warnings into crashes, which
may or may not be desired.)

Signed-off-by: Jann Horn <jannh@google.com>
Link: https://lore.kernel.org/r/20230705215128.486054-1-jannh@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 lib/Kconfig.ubsan | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index efae7e011956..59e21bfec188 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -13,7 +13,7 @@ menuconfig UBSAN
 if UBSAN
 
 config UBSAN_TRAP
-	bool "On Sanitizer warnings, abort the running kernel code"
+	bool "Abort on Sanitizer warnings (smaller kernel but less verbose)"
 	depends on !COMPILE_TEST
 	help
 	  Building kernels with Sanitizer features enabled tends to grow
@@ -26,6 +26,14 @@ config UBSAN_TRAP
 	  the system. For some system builders this is an acceptable
 	  trade-off.
 
+	  Also note that selecting Y will cause your kernel to Oops
+	  with an "illegal instruction" error with no further details
+	  when a UBSAN violation occurs. (Except on arm64, which will
+	  report which Sanitizer failed.) This may make it hard to
+	  determine whether an Oops was caused by UBSAN or to figure
+	  out the details of a UBSAN violation. It makes the kernel log
+	  output less useful for bug reports.
+
 config CC_HAS_UBSAN_BOUNDS_STRICT
 	def_bool $(cc-option,-fsanitize=bounds-strict)
 	help

From 8453e7924a1a9130f2b4d2c507de2cdc3892a5b5 Mon Sep 17 00:00:00 2001
From: Azeem Shaikh <azeemshaikh38@gmail.com>
Date: Tue, 23 May 2023 02:14:25 +0000
Subject: [PATCH 0247/2047] soc: fsl: qe: Replace all non-returning strlcpy
 with strscpy

strlcpy() reads the entire source buffer first.
This read may exceed the destination size limit.
This is both inefficient and can lead to linear read
overflows if a source string is not NUL-terminated [1].
In an effort to remove strlcpy() completely [2], replace
strlcpy() here with strscpy().
No return values were used, so direct replacement is safe.

[1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy
[2] https://github.com/KSPP/linux/issues/89

Signed-off-by: Azeem Shaikh <azeemshaikh38@gmail.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230523021425.2406309-1-azeemshaikh38@gmail.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/soc/fsl/qe/qe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index b3c226eb5292..58746e570d14 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -524,7 +524,7 @@ int qe_upload_firmware(const struct qe_firmware *firmware)
 	 * saved microcode information and put in the new.
 	 */
 	memset(&qe_firmware_info, 0, sizeof(qe_firmware_info));
-	strlcpy(qe_firmware_info.id, firmware->id, sizeof(qe_firmware_info.id));
+	strscpy(qe_firmware_info.id, firmware->id, sizeof(qe_firmware_info.id));
 	qe_firmware_info.extended_modes = be64_to_cpu(firmware->extended_modes);
 	memcpy(qe_firmware_info.vtraps, firmware->vtraps,
 		sizeof(firmware->vtraps));
@@ -599,7 +599,7 @@ struct qe_firmware_info *qe_get_firmware_info(void)
 	/* Copy the data into qe_firmware_info*/
 	sprop = of_get_property(fw, "id", NULL);
 	if (sprop)
-		strlcpy(qe_firmware_info.id, sprop,
+		strscpy(qe_firmware_info.id, sprop,
 			sizeof(qe_firmware_info.id));
 
 	of_property_read_u64(fw, "extended-modes",

From 4943b66df18a0e8aedd006792ed73257cd2da8f8 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@google.com>
Date: Tue, 7 Mar 2023 23:31:56 -0800
Subject: [PATCH 0248/2047] seccomp: don't use semaphore and wait_queue
 together

The main reason is to use new wake_up helpers that will be added in the
following patches. But here are a few other reasons:

* if we use two different ways, we always need to call them both. This
  patch fixes seccomp_notify_recv where we forgot to call wake_up_poll
  in the error path.

* If we use one primitive, we can control how many waiters are woken up
  for each request. Our goal is to wake up just one that will handle a
  request. Right now, wake_up_poll can wake up one waiter and
  up(&match->notif->request) can wake up one more.

Signed-off-by: Andrei Vagin <avagin@google.com>
Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230308073201.3102738-2-avagin@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/seccomp.c | 41 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d3e584065c7f..1386dcedda1a 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -145,7 +145,7 @@ struct seccomp_kaddfd {
  * @notifications: A list of struct seccomp_knotif elements.
  */
 struct notification {
-	struct semaphore request;
+	atomic_t requests;
 	u64 next_id;
 	struct list_head notifications;
 };
@@ -1116,7 +1116,7 @@ static int seccomp_do_user_notification(int this_syscall,
 	list_add_tail(&n.list, &match->notif->notifications);
 	INIT_LIST_HEAD(&n.addfd);
 
-	up(&match->notif->request);
+	atomic_inc(&match->notif->requests);
 	wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
 
 	/*
@@ -1450,6 +1450,37 @@ find_notification(struct seccomp_filter *filter, u64 id)
 	return NULL;
 }
 
+static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
+				  void *key)
+{
+	/* Avoid a wakeup if event not interesting for us. */
+	if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
+		return 0;
+	return autoremove_wake_function(wait, mode, sync, key);
+}
+
+static int recv_wait_event(struct seccomp_filter *filter)
+{
+	DEFINE_WAIT_FUNC(wait, recv_wake_function);
+	int ret;
+
+	if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
+		return 0;
+
+	for (;;) {
+		ret = prepare_to_wait_event(&filter->wqh, &wait, TASK_INTERRUPTIBLE);
+
+		if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
+			break;
+
+		if (ret)
+			return ret;
+
+		schedule();
+	}
+	finish_wait(&filter->wqh, &wait);
+	return 0;
+}
 
 static long seccomp_notify_recv(struct seccomp_filter *filter,
 				void __user *buf)
@@ -1467,7 +1498,7 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
 
 	memset(&unotif, 0, sizeof(unotif));
 
-	ret = down_interruptible(&filter->notif->request);
+	ret = recv_wait_event(filter);
 	if (ret < 0)
 		return ret;
 
@@ -1515,7 +1546,8 @@ out:
 			if (should_sleep_killable(filter, knotif))
 				complete(&knotif->ready);
 			knotif->state = SECCOMP_NOTIFY_INIT;
-			up(&filter->notif->request);
+			atomic_inc(&filter->notif->requests);
+			wake_up_poll(&filter->wqh, EPOLLIN | EPOLLRDNORM);
 		}
 		mutex_unlock(&filter->notify_lock);
 	}
@@ -1777,7 +1809,6 @@ static struct file *init_listener(struct seccomp_filter *filter)
 	if (!filter->notif)
 		goto out;
 
-	sema_init(&filter->notif->request, 0);
 	filter->notif->next_id = get_random_u64();
 	INIT_LIST_HEAD(&filter->notif->notifications);
 

From ab83f455f04df5b2f7c6d4de03b6d2eaeaa27b8a Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Tue, 7 Mar 2023 23:31:57 -0800
Subject: [PATCH 0249/2047] sched: add WF_CURRENT_CPU and externise ttwu

Add WF_CURRENT_CPU wake flag that advices the scheduler to
move the wakee to the current CPU. This is useful for fast on-CPU
context switching use cases.

In addition, make ttwu external rather than static so that
the flag could be passed to it from outside of sched/core.c.

Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Andrei Vagin <avagin@google.com>
Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230308073201.3102738-3-avagin@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/sched/core.c  |  3 +--
 kernel/sched/fair.c  |  4 ++++
 kernel/sched/sched.h | 13 ++++++++-----
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c52c2eba7c73..1574989627b0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4193,8 +4193,7 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
  * Return: %true if @p->state changes (an actual wakeup was done),
  *	   %false otherwise.
  */
-static int
-try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
+int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 {
 	unsigned long flags;
 	int cpu, success = 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b3e25be58e2b..ceb5d4c4738e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7741,6 +7741,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 	if (wake_flags & WF_TTWU) {
 		record_wakee(p);
 
+		if ((wake_flags & WF_CURRENT_CPU) &&
+		    cpumask_test_cpu(cpu, p->cpus_ptr))
+			return cpu;
+
 		if (sched_energy_enabled()) {
 			new_cpu = find_energy_efficient_cpu(p, prev_cpu);
 			if (new_cpu >= 0)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e93e006a942b..48d0be005f08 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2131,12 +2131,13 @@ static inline int task_on_rq_migrating(struct task_struct *p)
 }
 
 /* Wake flags. The first three directly map to some SD flag value */
-#define WF_EXEC     0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
-#define WF_FORK     0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
-#define WF_TTWU     0x08 /* Wakeup;            maps to SD_BALANCE_WAKE */
+#define WF_EXEC         0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
+#define WF_FORK         0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
+#define WF_TTWU         0x08 /* Wakeup;            maps to SD_BALANCE_WAKE */
 
-#define WF_SYNC     0x10 /* Waker goes to sleep after wakeup */
-#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
+#define WF_SYNC         0x10 /* Waker goes to sleep after wakeup */
+#define WF_MIGRATED     0x20 /* Internal use, task got migrated */
+#define WF_CURRENT_CPU  0x40 /* Prefer to move the wakee to the current CPU. */
 
 #ifdef CONFIG_SMP
 static_assert(WF_EXEC == SD_BALANCE_EXEC);
@@ -3229,6 +3230,8 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
 extern void swake_up_all_locked(struct swait_queue_head *q);
 extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
 
+extern int try_to_wake_up(struct task_struct *tsk, unsigned int state, int wake_flags);
+
 #ifdef CONFIG_PREEMPT_DYNAMIC
 extern int preempt_dynamic_mode;
 extern int sched_dynamic_mode(const char *str);

From 6f63904c8f3edb65bd85c1be01d69214ff8ca4c5 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@google.com>
Date: Tue, 7 Mar 2023 23:31:58 -0800
Subject: [PATCH 0250/2047] sched: add a few helpers to wake up tasks on the
 current cpu

Add complete_on_current_cpu, wake_up_poll_on_current_cpu helpers to wake
up tasks on the current CPU.

These two helpers are useful when the task needs to make a synchronous context
switch to another task. In this context, synchronous means it wakes up the
target task and falls asleep right after that.

One example of such workloads is seccomp user notifies. This mechanism allows
the  supervisor process handles system calls on behalf of a target process.
While the supervisor is handling an intercepted system call, the target process
will be blocked in the kernel, waiting for a response to come back.

On-CPU context switches are much faster than regular ones.

Signed-off-by: Andrei Vagin <avagin@google.com>
Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230308073201.3102738-4-avagin@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/completion.h |  1 +
 include/linux/swait.h      |  2 +-
 include/linux/wait.h       |  3 +++
 kernel/sched/completion.c  | 26 ++++++++++++++++++--------
 kernel/sched/core.c        |  2 +-
 kernel/sched/swait.c       |  8 ++++----
 kernel/sched/wait.c        |  5 +++++
 7 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 62b32b19e0a8..fb2915676574 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -116,6 +116,7 @@ extern bool try_wait_for_completion(struct completion *x);
 extern bool completion_done(struct completion *x);
 
 extern void complete(struct completion *);
+extern void complete_on_current_cpu(struct completion *x);
 extern void complete_all(struct completion *);
 
 #endif
diff --git a/include/linux/swait.h b/include/linux/swait.h
index 6a8c22b8c2a5..d324419482a0 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -146,7 +146,7 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)
 
 extern void swake_up_one(struct swait_queue_head *q);
 extern void swake_up_all(struct swait_queue_head *q);
-extern void swake_up_locked(struct swait_queue_head *q);
+extern void swake_up_locked(struct swait_queue_head *q, int wake_flags);
 
 extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
 extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a0307b516b09..5ec7739400f4 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -210,6 +210,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
 }
 
 int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key);
 void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
 void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
 		unsigned int mode, void *key, wait_queue_entry_t *bookmark);
@@ -237,6 +238,8 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
 #define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m))
 #define wake_up_poll(x, m)							\
 	__wake_up(x, TASK_NORMAL, 1, poll_to_key(m))
+#define wake_up_poll_on_current_cpu(x, m)					\
+	__wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m))
 #define wake_up_locked_poll(x, m)						\
 	__wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m))
 #define wake_up_interruptible_poll(x, m)					\
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index d57a5c1c1cd9..3561ab533dd4 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -13,6 +13,23 @@
  * Waiting for completion is a typically sync point, but not an exclusion point.
  */
 
+static void complete_with_flags(struct completion *x, int wake_flags)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&x->wait.lock, flags);
+
+	if (x->done != UINT_MAX)
+		x->done++;
+	swake_up_locked(&x->wait, wake_flags);
+	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+}
+
+void complete_on_current_cpu(struct completion *x)
+{
+	return complete_with_flags(x, WF_CURRENT_CPU);
+}
+
 /**
  * complete: - signals a single thread waiting on this completion
  * @x:  holds the state of this particular completion
@@ -27,14 +44,7 @@
  */
 void complete(struct completion *x)
 {
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&x->wait.lock, flags);
-
-	if (x->done != UINT_MAX)
-		x->done++;
-	swake_up_locked(&x->wait);
-	raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+	complete_with_flags(x, 0);
 }
 EXPORT_SYMBOL(complete);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1574989627b0..4d63e063608a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7029,7 +7029,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
 int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
 			  void *key)
 {
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC|WF_CURRENT_CPU));
 	return try_to_wake_up(curr->private, mode, wake_flags);
 }
 EXPORT_SYMBOL(default_wake_function);
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index 76b9b796e695..72505cd3b60a 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -18,7 +18,7 @@ EXPORT_SYMBOL(__init_swait_queue_head);
  * If for some reason it would return 0, that means the previously waiting
  * task is already running, so it will observe condition true (or has already).
  */
-void swake_up_locked(struct swait_queue_head *q)
+void swake_up_locked(struct swait_queue_head *q, int wake_flags)
 {
 	struct swait_queue *curr;
 
@@ -26,7 +26,7 @@ void swake_up_locked(struct swait_queue_head *q)
 		return;
 
 	curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
-	wake_up_process(curr->task);
+	try_to_wake_up(curr->task, TASK_NORMAL, wake_flags);
 	list_del_init(&curr->task_list);
 }
 EXPORT_SYMBOL(swake_up_locked);
@@ -41,7 +41,7 @@ EXPORT_SYMBOL(swake_up_locked);
 void swake_up_all_locked(struct swait_queue_head *q)
 {
 	while (!list_empty(&q->task_list))
-		swake_up_locked(q);
+		swake_up_locked(q, 0);
 }
 
 void swake_up_one(struct swait_queue_head *q)
@@ -49,7 +49,7 @@ void swake_up_one(struct swait_queue_head *q)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&q->lock, flags);
-	swake_up_locked(q);
+	swake_up_locked(q, 0);
 	raw_spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(swake_up_one);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 48c53e4739ea..802d98cf2de3 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -161,6 +161,11 @@ int __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
 }
 EXPORT_SYMBOL(__wake_up);
 
+void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key)
+{
+	__wake_up_common_lock(wq_head, mode, 1, WF_CURRENT_CPU, key);
+}
+
 /*
  * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
  */

From 48a1084a8b7423642b5f17ca6202f6f277c5392b Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@google.com>
Date: Tue, 7 Mar 2023 23:31:59 -0800
Subject: [PATCH 0251/2047] seccomp: add the synchronous mode for
 seccomp_unotify
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

seccomp_unotify allows more privileged processes do actions on behalf
of less privileged processes.

In many cases, the workflow is fully synchronous. It means a target
process triggers a system call and passes controls to a supervisor
process that handles the system call and returns controls to the target
process. In this context, "synchronous" means that only one process is
running and another one is waiting.

There is the WF_CURRENT_CPU flag that is used to advise the scheduler to
move the wakee to the current CPU. For such synchronous workflows, it
makes context switches a few times faster.

Right now, each interaction takes 12µs. With this patch, it takes about
3µs.

This change introduce the SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP flag that
it used to enable the sync mode.

Signed-off-by: Andrei Vagin <avagin@google.com>
Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230308073201.3102738-5-avagin@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h |  4 ++++
 kernel/seccomp.c             | 31 +++++++++++++++++++++++++++++--
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 0fdc6ef02b94..dbfc9b37fcae 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -115,6 +115,8 @@ struct seccomp_notif_resp {
 	__u32 flags;
 };
 
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+
 /* valid flags for seccomp_notif_addfd */
 #define SECCOMP_ADDFD_FLAG_SETFD	(1UL << 0) /* Specify remote fd */
 #define SECCOMP_ADDFD_FLAG_SEND		(1UL << 1) /* Addfd and return it, atomically */
@@ -150,4 +152,6 @@ struct seccomp_notif_addfd {
 #define SECCOMP_IOCTL_NOTIF_ADDFD	SECCOMP_IOW(3, \
 						struct seccomp_notif_addfd)
 
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS	SECCOMP_IOW(4, __u64)
+
 #endif /* _UAPI_LINUX_SECCOMP_H */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 1386dcedda1a..d3fdc0086168 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -143,9 +143,12 @@ struct seccomp_kaddfd {
  *           filter->notify_lock.
  * @next_id: The id of the next request.
  * @notifications: A list of struct seccomp_knotif elements.
+ * @flags: A set of SECCOMP_USER_NOTIF_FD_* flags.
  */
+
 struct notification {
 	atomic_t requests;
+	u32 flags;
 	u64 next_id;
 	struct list_head notifications;
 };
@@ -1117,7 +1120,10 @@ static int seccomp_do_user_notification(int this_syscall,
 	INIT_LIST_HEAD(&n.addfd);
 
 	atomic_inc(&match->notif->requests);
-	wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
+	if (match->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
+		wake_up_poll_on_current_cpu(&match->wqh, EPOLLIN | EPOLLRDNORM);
+	else
+		wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
 
 	/*
 	 * This is where we wait for a reply from userspace.
@@ -1593,7 +1599,10 @@ static long seccomp_notify_send(struct seccomp_filter *filter,
 	knotif->error = resp.error;
 	knotif->val = resp.val;
 	knotif->flags = resp.flags;
-	complete(&knotif->ready);
+	if (filter->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
+		complete_on_current_cpu(&knotif->ready);
+	else
+		complete(&knotif->ready);
 out:
 	mutex_unlock(&filter->notify_lock);
 	return ret;
@@ -1623,6 +1632,22 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter,
 	return ret;
 }
 
+static long seccomp_notify_set_flags(struct seccomp_filter *filter,
+				    unsigned long flags)
+{
+	long ret;
+
+	if (flags & ~SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
+		return -EINVAL;
+
+	ret = mutex_lock_interruptible(&filter->notify_lock);
+	if (ret < 0)
+		return ret;
+	filter->notif->flags = flags;
+	mutex_unlock(&filter->notify_lock);
+	return 0;
+}
+
 static long seccomp_notify_addfd(struct seccomp_filter *filter,
 				 struct seccomp_notif_addfd __user *uaddfd,
 				 unsigned int size)
@@ -1752,6 +1777,8 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
 	case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
 	case SECCOMP_IOCTL_NOTIF_ID_VALID:
 		return seccomp_notify_id_valid(filter, buf);
+	case SECCOMP_IOCTL_NOTIF_SET_FLAGS:
+		return seccomp_notify_set_flags(filter, arg);
 	}
 
 	/* Extensible Argument ioctls */

From 8feae5adec17ec190118ad84da84a6ad9fa3e30b Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@google.com>
Date: Tue, 7 Mar 2023 23:32:00 -0800
Subject: [PATCH 0252/2047] selftest/seccomp: add a new test for the sync mode
 of seccomp_user_notify

Test output:
 #  RUN           global.user_notification_sync ...
 #            OK  global.user_notification_sync
 ok 51 global.user_notification_sync

Signed-off-by: Andrei Vagin <avagin@google.com>
Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230308073201.3102738-6-avagin@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 55 +++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 43ec36b179dc..f6a04d88e02f 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -4255,6 +4255,61 @@ TEST(user_notification_addfd_rlimit)
 	close(memfd);
 }
 
+#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS  SECCOMP_IOW(4, __u64)
+#endif
+
+TEST(user_notification_sync)
+{
+	struct seccomp_notif req = {};
+	struct seccomp_notif_resp resp = {};
+	int status, listener;
+	pid_t pid;
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	listener = user_notif_syscall(__NR_getppid,
+				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
+	ASSERT_GE(listener, 0);
+
+	/* Try to set invalid flags. */
+	EXPECT_SYSCALL_RETURN(-EINVAL,
+		ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0));
+
+	ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
+			SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0) {
+		ret = syscall(__NR_getppid);
+		ASSERT_EQ(ret, USER_NOTIF_MAGIC) {
+			_exit(1);
+		}
+		_exit(0);
+	}
+
+	req.pid = 0;
+	ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+	ASSERT_EQ(req.data.nr,  __NR_getppid);
+
+	resp.id = req.id;
+	resp.error = 0;
+	resp.val = USER_NOTIF_MAGIC;
+	resp.flags = 0;
+	ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+	ASSERT_EQ(waitpid(pid, &status, 0), pid);
+	ASSERT_EQ(status, 0);
+}
+
+
 /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
 FIXTURE(O_SUSPEND_SECCOMP) {
 	pid_t pid;

From 7d5cb68af638fc370e89df4c2d8d8c4708600e67 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@google.com>
Date: Tue, 7 Mar 2023 23:32:01 -0800
Subject: [PATCH 0253/2047] perf/benchmark: add a new benchmark for
 seccom_unotify

The benchmark is similar to the pipe benchmark. It creates two processes,
one is calling syscalls, and another process is handling them via seccomp
user notifications. It measures the time required to run a specified number
of interations.

 $ ./perf bench sched  seccomp-notify --sync-mode --loop 1000000
 # Running 'sched/seccomp-notify' benchmark:
 # Executed 1000000 system calls

     Total time: 2.769 [sec]

       2.769629 usecs/op
         361059 ops/sec

 $ ./perf bench sched  seccomp-notify
 # Running 'sched/seccomp-notify' benchmark:
 # Executed 1000000 system calls

     Total time: 8.571 [sec]

       8.571119 usecs/op
         116670 ops/sec

Signed-off-by: Andrei Vagin <avagin@google.com>
Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230308073201.3102738-7-avagin@google.com
Link: https://lore.kernel.org/r/20230630051953.454638-1-avagin@gmail.com
[kees: Added PRIu64 format string]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/arch/x86/include/uapi/asm/unistd_32.h |   3 +
 tools/arch/x86/include/uapi/asm/unistd_64.h |   3 +
 tools/perf/bench/Build                      |   1 +
 tools/perf/bench/bench.h                    |   1 +
 tools/perf/bench/sched-seccomp-notify.c     | 178 ++++++++++++++++++++
 tools/perf/builtin-bench.c                  |   1 +
 6 files changed, 187 insertions(+)
 create mode 100644 tools/perf/bench/sched-seccomp-notify.c

diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h
index bc48a4dabe5d..4798f9d18fe8 100644
--- a/tools/arch/x86/include/uapi/asm/unistd_32.h
+++ b/tools/arch/x86/include/uapi/asm/unistd_32.h
@@ -26,3 +26,6 @@
 #ifndef __NR_setns
 #define __NR_setns 346
 #endif
+#ifdef __NR_seccomp
+#define __NR_seccomp 354
+#endif
diff --git a/tools/arch/x86/include/uapi/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h
index f70d2cada256..d0f2043d7132 100644
--- a/tools/arch/x86/include/uapi/asm/unistd_64.h
+++ b/tools/arch/x86/include/uapi/asm/unistd_64.h
@@ -26,3 +26,6 @@
 #ifndef __NR_getcpu
 #define __NR_getcpu 309
 #endif
+#ifndef __NR_seccomp
+#define __NR_seccomp 317
+#endif
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 0f158dc8139b..07bbc449329e 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -1,5 +1,6 @@
 perf-y += sched-messaging.o
 perf-y += sched-pipe.o
+perf-y += sched-seccomp-notify.o
 perf-y += syscall.o
 perf-y += mem-functions.o
 perf-y += futex-hash.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 0d2b65976212..a0625c77bea3 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -21,6 +21,7 @@ extern struct timeval bench__start, bench__end, bench__runtime;
 int bench_numa(int argc, const char **argv);
 int bench_sched_messaging(int argc, const char **argv);
 int bench_sched_pipe(int argc, const char **argv);
+int bench_sched_seccomp_notify(int argc, const char **argv);
 int bench_syscall_basic(int argc, const char **argv);
 int bench_syscall_getpgid(int argc, const char **argv);
 int bench_syscall_fork(int argc, const char **argv);
diff --git a/tools/perf/bench/sched-seccomp-notify.c b/tools/perf/bench/sched-seccomp-notify.c
new file mode 100644
index 000000000000..b04ebcde4036
--- /dev/null
+++ b/tools/perf/bench/sched-seccomp-notify.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <subcmd/parse-options.h>
+#include "bench.h"
+
+#include <uapi/linux/filter.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <linux/unistd.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <linux/time64.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+
+#include <unistd.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <string.h>
+#include <errno.h>
+#include <err.h>
+#include <inttypes.h>
+
+#define LOOPS_DEFAULT 1000000UL
+static uint64_t loops = LOOPS_DEFAULT;
+static bool sync_mode;
+
+static const struct option options[] = {
+	OPT_U64('l', "loop",	&loops,		"Specify number of loops"),
+	OPT_BOOLEAN('s', "sync-mode", &sync_mode,
+		    "Enable the synchronious mode for seccomp notifications"),
+	OPT_END()
+};
+
+static const char * const bench_seccomp_usage[] = {
+	"perf bench sched secccomp-notify <options>",
+	NULL
+};
+
+static int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+	return syscall(__NR_seccomp, op, flags, args);
+}
+
+static int user_notif_syscall(int nr, unsigned int flags)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
+}
+
+#define USER_NOTIF_MAGIC INT_MAX
+static void user_notification_sync_loop(int listener)
+{
+	struct seccomp_notif_resp resp;
+	struct seccomp_notif req;
+	uint64_t nr;
+
+	for (nr = 0; nr < loops; nr++) {
+		memset(&req, 0, sizeof(req));
+		if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req))
+			err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_RECV failed");
+
+		if (req.data.nr != __NR_gettid)
+			errx(EXIT_FAILURE, "unexpected syscall: %d", req.data.nr);
+
+		resp.id = req.id;
+		resp.error = 0;
+		resp.val = USER_NOTIF_MAGIC;
+		resp.flags = 0;
+		if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp))
+			err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_SEND failed");
+	}
+}
+
+#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS  SECCOMP_IOW(4, __u64)
+#endif
+int bench_sched_seccomp_notify(int argc, const char **argv)
+{
+	struct timeval start, stop, diff;
+	unsigned long long result_usec = 0;
+	int status, listener;
+	pid_t pid;
+	long ret;
+
+	argc = parse_options(argc, argv, options, bench_seccomp_usage, 0);
+
+	gettimeofday(&start, NULL);
+
+	prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	listener = user_notif_syscall(__NR_gettid,
+				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
+	if (listener < 0)
+		err(EXIT_FAILURE, "can't create a notification descriptor");
+
+	pid = fork();
+	if (pid < 0)
+		err(EXIT_FAILURE, "fork");
+	if (pid == 0) {
+		if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0))
+			err(EXIT_FAILURE, "can't set the parent death signal");
+		while (1) {
+			ret = syscall(__NR_gettid);
+			if (ret == USER_NOTIF_MAGIC)
+				continue;
+			break;
+		}
+		_exit(1);
+	}
+
+	if (sync_mode) {
+		if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
+			     SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0))
+			err(EXIT_FAILURE,
+			    "can't set SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP");
+	}
+	user_notification_sync_loop(listener);
+
+	kill(pid, SIGKILL);
+	if (waitpid(pid, &status, 0) != pid)
+		err(EXIT_FAILURE, "waitpid(%d) failed", pid);
+	if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGKILL)
+		errx(EXIT_FAILURE, "unexpected exit code: %d", status);
+
+	gettimeofday(&stop, NULL);
+	timersub(&stop, &start, &diff);
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		printf("# Executed %" PRIu64 " system calls\n\n",
+			loops);
+
+		result_usec = diff.tv_sec * USEC_PER_SEC;
+		result_usec += diff.tv_usec;
+
+		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+		       (unsigned long) diff.tv_sec,
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+
+		printf(" %14lf usecs/op\n",
+		       (double)result_usec / (double)loops);
+		printf(" %14d ops/sec\n",
+		       (int)((double)loops /
+			     ((double)result_usec / (double)USEC_PER_SEC)));
+		break;
+
+	case BENCH_FORMAT_SIMPLE:
+		printf("%lu.%03lu\n",
+		       (unsigned long) diff.tv_sec,
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+		break;
+
+	default:
+		/* reaching here is something disaster */
+		fprintf(stderr, "Unknown format:%d\n", bench_format);
+		exit(1);
+		break;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index db435b791a09..5033e8bab276 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -47,6 +47,7 @@ static struct bench numa_benchmarks[] = {
 static struct bench sched_benchmarks[] = {
 	{ "messaging",	"Benchmark for scheduling and IPC",		bench_sched_messaging	},
 	{ "pipe",	"Benchmark for pipe() between two processes",	bench_sched_pipe	},
+	{ "seccomp-notify",	"Benchmark for seccomp user notify",	bench_sched_seccomp_notify},
 	{ "all",	"Run all scheduler benchmarks",		NULL			},
 	{ NULL,		NULL,						NULL			}
 };

From 1756ddea6916669125933a8625120c84b57f6559 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Wed, 12 Jul 2023 18:23:31 +0200
Subject: [PATCH 0254/2047] pstore: Remove worst-case compression size logic

The worst case compression size used by pstore gives an upper bound for
how much the data might inadvertently *grow* due to encapsulation
overhead if the input is not compressible at all.

Given that pstore records have individual 'compressed' flags, we can
simply store the uncompressed data if compressing it would end up using
more space, making the worst case identical to the uncompressed case.

This means we can just drop all the elaborate logic that reasons about
upper bounds for each individual compression algorithm, and just store
the uncompressed data directly if compression fails for any reason.

Co-developed-by: Kees Cook <keescook@chromium.org>
Tested-by: "Guilherme G. Piccoli" <gpiccoli@igalia.com> # Steam Deck
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20230712162332.2670437-2-ardb@kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/platform.c | 206 ++++---------------------------------------
 1 file changed, 19 insertions(+), 187 deletions(-)

diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index cbc0b468c1ab..13fd1a297846 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -16,15 +16,6 @@
 #include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pstore.h>
-#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS)
-#include <linux/lzo.h>
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) || IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
-#include <linux/lz4.h>
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
-#include <linux/zstd.h>
-#endif
 #include <linux/crypto.h>
 #include <linux/string.h>
 #include <linux/timer.h>
@@ -97,13 +88,7 @@ MODULE_PARM_DESC(kmsg_bytes, "amount of kernel log to snapshot (in bytes)");
 /* Compression parameters */
 static struct crypto_comp *tfm;
 
-struct pstore_zbackend {
-	int (*zbufsize)(size_t size);
-	const char *name;
-};
-
 static char *big_oops_buf;
-static size_t big_oops_buf_sz;
 
 void pstore_set_kmsg_bytes(int bytes)
 {
@@ -168,105 +153,6 @@ static bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
 	}
 }
 
-#if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS)
-static int zbufsize_deflate(size_t size)
-{
-	size_t cmpr;
-
-	switch (size) {
-	/* buffer range for efivars */
-	case 1000 ... 2000:
-		cmpr = 56;
-		break;
-	case 2001 ... 3000:
-		cmpr = 54;
-		break;
-	case 3001 ... 3999:
-		cmpr = 52;
-		break;
-	/* buffer range for nvram, erst */
-	case 4000 ... 10000:
-		cmpr = 45;
-		break;
-	default:
-		cmpr = 60;
-		break;
-	}
-
-	return (size * 100) / cmpr;
-}
-#endif
-
-#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS)
-static int zbufsize_lzo(size_t size)
-{
-	return lzo1x_worst_compress(size);
-}
-#endif
-
-#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) || IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
-static int zbufsize_lz4(size_t size)
-{
-	return LZ4_compressBound(size);
-}
-#endif
-
-#if IS_ENABLED(CONFIG_PSTORE_842_COMPRESS)
-static int zbufsize_842(size_t size)
-{
-	return size;
-}
-#endif
-
-#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
-static int zbufsize_zstd(size_t size)
-{
-	return zstd_compress_bound(size);
-}
-#endif
-
-static const struct pstore_zbackend *zbackend __ro_after_init;
-
-static const struct pstore_zbackend zbackends[] = {
-#if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS)
-	{
-		.zbufsize	= zbufsize_deflate,
-		.name		= "deflate",
-	},
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS)
-	{
-		.zbufsize	= zbufsize_lzo,
-		.name		= "lzo",
-	},
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS)
-	{
-		.zbufsize	= zbufsize_lz4,
-		.name		= "lz4",
-	},
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
-	{
-		.zbufsize	= zbufsize_lz4,
-		.name		= "lz4hc",
-	},
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_842_COMPRESS)
-	{
-		.zbufsize	= zbufsize_842,
-		.name		= "842",
-	},
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
-	{
-		.zbufsize	= zbufsize_zstd,
-		.name		= "zstd",
-	},
-#endif
-	{ }
-};
-
 static int pstore_compress(const void *in, void *out,
 			   unsigned int inlen, unsigned int outlen)
 {
@@ -287,50 +173,46 @@ static int pstore_compress(const void *in, void *out,
 static void allocate_buf_for_compression(void)
 {
 	struct crypto_comp *ctx;
-	int size;
 	char *buf;
 
 	/* Skip if not built-in or compression backend not selected yet. */
-	if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !zbackend)
+	if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !compress)
 		return;
 
 	/* Skip if no pstore backend yet or compression init already done. */
 	if (!psinfo || tfm)
 		return;
 
-	if (!crypto_has_comp(zbackend->name, 0, 0)) {
-		pr_err("Unknown compression: %s\n", zbackend->name);
+	if (!crypto_has_comp(compress, 0, 0)) {
+		pr_err("Unknown compression: %s\n", compress);
 		return;
 	}
 
-	size = zbackend->zbufsize(psinfo->bufsize);
-	if (size <= 0) {
-		pr_err("Invalid compression size for %s: %d\n",
-		       zbackend->name, size);
-		return;
-	}
-
-	buf = kmalloc(size, GFP_KERNEL);
+	/*
+	 * The compression buffer only needs to be as large as the maximum
+	 * uncompressed record size, since any record that would be expanded by
+	 * compression is just stored uncompressed.
+	 */
+	buf = kmalloc(psinfo->bufsize, GFP_KERNEL);
 	if (!buf) {
-		pr_err("Failed %d byte compression buffer allocation for: %s\n",
-		       size, zbackend->name);
+		pr_err("Failed %zu byte compression buffer allocation for: %s\n",
+		       psinfo->bufsize, compress);
 		return;
 	}
 
-	ctx = crypto_alloc_comp(zbackend->name, 0, 0);
+	ctx = crypto_alloc_comp(compress, 0, 0);
 	if (IS_ERR_OR_NULL(ctx)) {
 		kfree(buf);
-		pr_err("crypto_alloc_comp('%s') failed: %ld\n", zbackend->name,
+		pr_err("crypto_alloc_comp('%s') failed: %ld\n", compress,
 		       PTR_ERR(ctx));
 		return;
 	}
 
 	/* A non-NULL big_oops_buf indicates compression is available. */
 	tfm = ctx;
-	big_oops_buf_sz = size;
 	big_oops_buf = buf;
 
-	pr_info("Using crash dump compression: %s\n", zbackend->name);
+	pr_info("Using crash dump compression: %s\n", compress);
 }
 
 static void free_buf_for_compression(void)
@@ -341,33 +223,6 @@ static void free_buf_for_compression(void)
 	}
 	kfree(big_oops_buf);
 	big_oops_buf = NULL;
-	big_oops_buf_sz = 0;
-}
-
-/*
- * Called when compression fails, since the printk buffer
- * would be fetched for compression calling it again when
- * compression fails would have moved the iterator of
- * printk buffer which results in fetching old contents.
- * Copy the recent messages from big_oops_buf to psinfo->buf
- */
-static size_t copy_kmsg_to_buffer(int hsize, size_t len)
-{
-	size_t total_len;
-	size_t diff;
-
-	total_len = hsize + len;
-
-	if (total_len > psinfo->bufsize) {
-		diff = total_len - psinfo->bufsize + hsize;
-		memcpy(psinfo->buf, big_oops_buf, hsize);
-		memcpy(psinfo->buf + hsize, big_oops_buf + diff,
-					psinfo->bufsize - hsize);
-		total_len = psinfo->bufsize;
-	} else
-		memcpy(psinfo->buf, big_oops_buf, total_len);
-
-	return total_len;
 }
 
 void pstore_record_init(struct pstore_record *record,
@@ -426,13 +281,8 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		record.part = part;
 		record.buf = psinfo->buf;
 
-		if (big_oops_buf) {
-			dst = big_oops_buf;
-			dst_size = big_oops_buf_sz;
-		} else {
-			dst = psinfo->buf;
-			dst_size = psinfo->bufsize;
-		}
+		dst = big_oops_buf ?: psinfo->buf;
+		dst_size = psinfo->bufsize;
 
 		/* Write dump header. */
 		header_size = snprintf(dst, dst_size, "%s#%d Part%u\n", why,
@@ -453,8 +303,8 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 				record.compressed = true;
 				record.size = zipped_len;
 			} else {
-				record.size = copy_kmsg_to_buffer(header_size,
-								  dump_size);
+				record.size = header_size + dump_size;
+				memcpy(psinfo->buf, dst, record.size);
 			}
 		} else {
 			record.size = header_size + dump_size;
@@ -703,8 +553,7 @@ static void decompress_record(struct pstore_record *record)
 	}
 
 	/* Allocate enough space to hold max decompression and ECC. */
-	unzipped_len = big_oops_buf_sz;
-	workspace = kmalloc(unzipped_len + record->ecc_notice_size,
+	workspace = kmalloc(psinfo->bufsize + record->ecc_notice_size,
 			    GFP_KERNEL);
 	if (!workspace)
 		return;
@@ -818,27 +667,10 @@ static void pstore_timefunc(struct timer_list *unused)
 	pstore_timer_kick();
 }
 
-static void __init pstore_choose_compression(void)
-{
-	const struct pstore_zbackend *step;
-
-	if (!compress)
-		return;
-
-	for (step = zbackends; step->name; step++) {
-		if (!strcmp(compress, step->name)) {
-			zbackend = step;
-			return;
-		}
-	}
-}
-
 static int __init pstore_init(void)
 {
 	int ret;
 
-	pstore_choose_compression();
-
 	/*
 	 * Check if any pstore backends registered earlier but did not
 	 * initialize compression because crypto was not ready. If so,

From 438b805003a07606f9a9f222a7ddb7dcdb87aeaa Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Wed, 12 Jul 2023 18:23:32 +0200
Subject: [PATCH 0255/2047] pstore: Replace crypto API compression with
 zlib_deflate library calls

Pstore supports compression using a variety of algorithms exposed by the
crypto API. This uses the deprecated comp (as opposed to scomp/acomp)
API, and so we should stop using that, and either move to the new API,
or switch to a different approach entirely.

Given that we only compress ASCII text in pstore, and considering that
this happens when the system is likely to be in an unstable state, the
flexibility that the complex crypto API provides does not outweigh its
impact on the risk that we might encounter additional problems when
trying to commit the kernel log contents to the pstore backend.

So let's switch [back] to the zlib deflate library API, and remove all
the complexity that really has no place in a low-level diagnostic
facility. Note that, while more modern compression algorithms have been
added to the kernel in recent years, the code size of zlib deflate is
substantially smaller than, e.g., zstd, while its performance in terms
of compression ratio is comparable for ASCII text, and speed is deemed
irrelevant in this context.

Note that this means that compressed pstore records may no longer be
accessible after a kernel upgrade, but this has never been part of the
contract. (The choice of compression algorithm is not stored in the
pstore records either)

Tested-by: "Guilherme G. Piccoli" <gpiccoli@igalia.com> # Steam Deck
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20230712162332.2670437-3-ardb@kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/Kconfig    | 100 +++----------------------------
 fs/pstore/platform.c | 136 ++++++++++++++++++++++++++++---------------
 2 files changed, 97 insertions(+), 139 deletions(-)

diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index c49d554cc9ae..3acc38600cd1 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config PSTORE
 	tristate "Persistent store support"
-	select CRYPTO if PSTORE_COMPRESS
 	default n
 	help
 	   This option enables generic access to platform level
@@ -22,99 +21,18 @@ config PSTORE_DEFAULT_KMSG_BYTES
 	  Defines default size of pstore kernel log storage.
 	  Can be enlarged if needed, not recommended to shrink it.
 
-config PSTORE_DEFLATE_COMPRESS
-	tristate "DEFLATE (ZLIB) compression"
-	default y
-	depends on PSTORE
-	select CRYPTO_DEFLATE
-	help
-	  This option enables DEFLATE (also known as ZLIB) compression
-	  algorithm support.
-
-config PSTORE_LZO_COMPRESS
-	tristate "LZO compression"
-	depends on PSTORE
-	select CRYPTO_LZO
-	help
-	  This option enables LZO compression algorithm support.
-
-config PSTORE_LZ4_COMPRESS
-	tristate "LZ4 compression"
-	depends on PSTORE
-	select CRYPTO_LZ4
-	help
-	  This option enables LZ4 compression algorithm support.
-
-config PSTORE_LZ4HC_COMPRESS
-	tristate "LZ4HC compression"
-	depends on PSTORE
-	select CRYPTO_LZ4HC
-	help
-	  This option enables LZ4HC (high compression) mode algorithm.
-
-config PSTORE_842_COMPRESS
-	bool "842 compression"
-	depends on PSTORE
-	select CRYPTO_842
-	help
-	  This option enables 842 compression algorithm support.
-
-config PSTORE_ZSTD_COMPRESS
-	bool "zstd compression"
-	depends on PSTORE
-	select CRYPTO_ZSTD
-	help
-	  This option enables zstd compression algorithm support.
-
 config PSTORE_COMPRESS
-	def_bool y
+	bool "Pstore compression (deflate)"
 	depends on PSTORE
-	depends on PSTORE_DEFLATE_COMPRESS || PSTORE_LZO_COMPRESS ||	\
-		   PSTORE_LZ4_COMPRESS || PSTORE_LZ4HC_COMPRESS ||	\
-		   PSTORE_842_COMPRESS || PSTORE_ZSTD_COMPRESS
-
-choice
-	prompt "Default pstore compression algorithm"
-	depends on PSTORE_COMPRESS
+	select ZLIB_INFLATE
+	select ZLIB_DEFLATE
+	default y
 	help
-	  This option chooses the default active compression algorithm.
-	  This change be changed at boot with "pstore.compress=..." on
-	  the kernel command line.
-
-	  Currently, pstore has support for 6 compression algorithms:
-	  deflate, lzo, lz4, lz4hc, 842 and zstd.
-
-	  The default compression algorithm is deflate.
-
-	config PSTORE_DEFLATE_COMPRESS_DEFAULT
-		bool "deflate" if PSTORE_DEFLATE_COMPRESS
-
-	config PSTORE_LZO_COMPRESS_DEFAULT
-		bool "lzo" if PSTORE_LZO_COMPRESS
-
-	config PSTORE_LZ4_COMPRESS_DEFAULT
-		bool "lz4" if PSTORE_LZ4_COMPRESS
-
-	config PSTORE_LZ4HC_COMPRESS_DEFAULT
-		bool "lz4hc" if PSTORE_LZ4HC_COMPRESS
-
-	config PSTORE_842_COMPRESS_DEFAULT
-		bool "842" if PSTORE_842_COMPRESS
-
-	config PSTORE_ZSTD_COMPRESS_DEFAULT
-		bool "zstd" if PSTORE_ZSTD_COMPRESS
-
-endchoice
-
-config PSTORE_COMPRESS_DEFAULT
-	string
-	depends on PSTORE_COMPRESS
-	default "deflate" if PSTORE_DEFLATE_COMPRESS_DEFAULT
-	default "lzo" if PSTORE_LZO_COMPRESS_DEFAULT
-	default "lz4" if PSTORE_LZ4_COMPRESS_DEFAULT
-	default "lz4hc" if PSTORE_LZ4HC_COMPRESS_DEFAULT
-	default "842" if PSTORE_842_COMPRESS_DEFAULT
-	default "zstd" if PSTORE_ZSTD_COMPRESS_DEFAULT
+	  Whether pstore records should be compressed before being written to
+	  the backing store. This is implemented using the zlib 'deflate'
+	  algorithm, using the library implementation instead of using the full
+	  blown crypto API. This reduces the risk of secondary oopses or other
+	  problems while pstore is recording panic metadata.
 
 config PSTORE_CONSOLE
 	bool "Log kernel console messages"
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 13fd1a297846..aaa5e4e66db4 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -16,13 +16,14 @@
 #include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pstore.h>
-#include <linux/crypto.h>
 #include <linux/string.h>
 #include <linux/timer.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/jiffies.h>
+#include <linux/vmalloc.h>
 #include <linux/workqueue.h>
+#include <linux/zlib.h>
 
 #include "internal.h"
 
@@ -71,12 +72,21 @@ static char *backend;
 module_param(backend, charp, 0444);
 MODULE_PARM_DESC(backend, "specific backend to use");
 
-static char *compress =
-#ifdef CONFIG_PSTORE_COMPRESS_DEFAULT
-		CONFIG_PSTORE_COMPRESS_DEFAULT;
-#else
-		NULL;
-#endif
+/*
+ * pstore no longer implements compression via the crypto API, and only
+ * supports zlib deflate compression implemented using the zlib library
+ * interface. This removes additional complexity which is hard to justify for a
+ * diagnostic facility that has to operate in conditions where the system may
+ * have become unstable. Zlib deflate is comparatively small in terms of code
+ * size, and compresses ASCII text comparatively well. In terms of compression
+ * speed, deflate is not the best performer but for recording the log output on
+ * a kernel panic, this is not considered critical.
+ *
+ * The only remaining arguments supported by the compress= module parameter are
+ * 'deflate' and 'none'. To retain compatibility with existing installations,
+ * all other values are logged and replaced with 'deflate'.
+ */
+static char *compress = "deflate";
 module_param(compress, charp, 0444);
 MODULE_PARM_DESC(compress, "compression to use");
 
@@ -85,8 +95,7 @@ unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES;
 module_param(kmsg_bytes, ulong, 0444);
 MODULE_PARM_DESC(kmsg_bytes, "amount of kernel log to snapshot (in bytes)");
 
-/* Compression parameters */
-static struct crypto_comp *tfm;
+static void *compress_workspace;
 
 static char *big_oops_buf;
 
@@ -156,36 +165,49 @@ static bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
 static int pstore_compress(const void *in, void *out,
 			   unsigned int inlen, unsigned int outlen)
 {
+	struct z_stream_s zstream = {
+		.next_in	= in,
+		.avail_in	= inlen,
+		.next_out	= out,
+		.avail_out	= outlen,
+		.workspace	= compress_workspace,
+	};
 	int ret;
 
 	if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS))
 		return -EINVAL;
 
-	ret = crypto_comp_compress(tfm, in, inlen, out, &outlen);
-	if (ret) {
-		pr_err("crypto_comp_compress failed, ret = %d!\n", ret);
-		return ret;
-	}
+	ret = zlib_deflateInit2(&zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
+				-MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+	if (ret != Z_OK)
+		return -EINVAL;
 
-	return outlen;
+	ret = zlib_deflate(&zstream, Z_FINISH);
+	if (ret != Z_STREAM_END)
+		return -EINVAL;
+
+	ret = zlib_deflateEnd(&zstream);
+	if (ret != Z_OK)
+		pr_warn_once("zlib_deflateEnd() failed: %d\n", ret);
+
+	return zstream.total_out;
 }
 
 static void allocate_buf_for_compression(void)
 {
-	struct crypto_comp *ctx;
 	char *buf;
 
-	/* Skip if not built-in or compression backend not selected yet. */
-	if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !compress)
+	/* Skip if not built-in or compression disabled. */
+	if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !compress ||
+	    !strcmp(compress, "none")) {
+		compress = NULL;
 		return;
+	}
 
-	/* Skip if no pstore backend yet or compression init already done. */
-	if (!psinfo || tfm)
-		return;
-
-	if (!crypto_has_comp(compress, 0, 0)) {
-		pr_err("Unknown compression: %s\n", compress);
-		return;
+	if (strcmp(compress, "deflate")) {
+		pr_err("Unsupported compression '%s', falling back to deflate\n",
+		       compress);
+		compress = "deflate";
 	}
 
 	/*
@@ -200,16 +222,15 @@ static void allocate_buf_for_compression(void)
 		return;
 	}
 
-	ctx = crypto_alloc_comp(compress, 0, 0);
-	if (IS_ERR_OR_NULL(ctx)) {
+	compress_workspace =
+		vmalloc(zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL));
+	if (!compress_workspace) {
+		pr_err("Failed to allocate zlib deflate workspace\n");
 		kfree(buf);
-		pr_err("crypto_alloc_comp('%s') failed: %ld\n", compress,
-		       PTR_ERR(ctx));
 		return;
 	}
 
 	/* A non-NULL big_oops_buf indicates compression is available. */
-	tfm = ctx;
 	big_oops_buf = buf;
 
 	pr_info("Using crash dump compression: %s\n", compress);
@@ -217,10 +238,11 @@ static void allocate_buf_for_compression(void)
 
 static void free_buf_for_compression(void)
 {
-	if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && tfm) {
-		crypto_free_comp(tfm);
-		tfm = NULL;
+	if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && compress_workspace) {
+		vfree(compress_workspace);
+		compress_workspace = NULL;
 	}
+
 	kfree(big_oops_buf);
 	big_oops_buf = NULL;
 }
@@ -531,7 +553,8 @@ void pstore_unregister(struct pstore_info *psi)
 }
 EXPORT_SYMBOL_GPL(pstore_unregister);
 
-static void decompress_record(struct pstore_record *record)
+static void decompress_record(struct pstore_record *record,
+			      struct z_stream_s *zstream)
 {
 	int ret;
 	int unzipped_len;
@@ -547,26 +570,37 @@ static void decompress_record(struct pstore_record *record)
 	}
 
 	/* Missing compression buffer means compression was not initialized. */
-	if (!big_oops_buf) {
+	if (!zstream->workspace) {
 		pr_warn("no decompression method initialized!\n");
 		return;
 	}
 
+	ret = zlib_inflateReset(zstream);
+	if (ret != Z_OK) {
+		pr_err("zlib_inflateReset() failed, ret = %d!\n", ret);
+		return;
+	}
+
 	/* Allocate enough space to hold max decompression and ECC. */
 	workspace = kmalloc(psinfo->bufsize + record->ecc_notice_size,
 			    GFP_KERNEL);
 	if (!workspace)
 		return;
 
-	/* After decompression "unzipped_len" is almost certainly smaller. */
-	ret = crypto_comp_decompress(tfm, record->buf, record->size,
-					  workspace, &unzipped_len);
-	if (ret) {
-		pr_err("crypto_comp_decompress failed, ret = %d!\n", ret);
+	zstream->next_in	= record->buf;
+	zstream->avail_in	= record->size;
+	zstream->next_out	= workspace;
+	zstream->avail_out	= psinfo->bufsize;
+
+	ret = zlib_inflate(zstream, Z_FINISH);
+	if (ret != Z_STREAM_END) {
+		pr_err("zlib_inflate() failed, ret = %d!\n", ret);
 		kfree(workspace);
 		return;
 	}
 
+	unzipped_len = zstream->total_out;
+
 	/* Append ECC notice to decompressed buffer. */
 	memcpy(workspace + unzipped_len, record->buf + record->size,
 	       record->ecc_notice_size);
@@ -596,10 +630,17 @@ void pstore_get_backend_records(struct pstore_info *psi,
 {
 	int failed = 0;
 	unsigned int stop_loop = 65536;
+	struct z_stream_s zstream = {};
 
 	if (!psi || !root)
 		return;
 
+	if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && compress) {
+		zstream.workspace = kvmalloc(zlib_inflate_workspacesize(),
+					     GFP_KERNEL);
+		zlib_inflateInit2(&zstream, -DEF_WBITS);
+	}
+
 	mutex_lock(&psi->read_mutex);
 	if (psi->open && psi->open(psi))
 		goto out;
@@ -628,7 +669,7 @@ void pstore_get_backend_records(struct pstore_info *psi,
 			break;
 		}
 
-		decompress_record(record);
+		decompress_record(record, &zstream);
 		rc = pstore_mkfile(root, record);
 		if (rc) {
 			/* pstore_mkfile() did not take record, so free it. */
@@ -644,6 +685,12 @@ void pstore_get_backend_records(struct pstore_info *psi,
 out:
 	mutex_unlock(&psi->read_mutex);
 
+	if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && compress) {
+		if (zlib_inflateEnd(&zstream) != Z_OK)
+			pr_warn("zlib_inflateEnd() failed\n");
+		kvfree(zstream.workspace);
+	}
+
 	if (failed)
 		pr_warn("failed to create %d record(s) from '%s'\n",
 			failed, psi->name);
@@ -671,13 +718,6 @@ static int __init pstore_init(void)
 {
 	int ret;
 
-	/*
-	 * Check if any pstore backends registered earlier but did not
-	 * initialize compression because crypto was not ready. If so,
-	 * initialize compression now.
-	 */
-	allocate_buf_for_compression();
-
 	ret = pstore_init_fs();
 	if (ret)
 		free_buf_for_compression();

From dfc07ee62c03eaaaf806fccc50bd52fcbc9f828f Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@ew.tq-group.com>
Date: Mon, 17 Jul 2023 10:59:33 +0200
Subject: [PATCH 0256/2047] spi: spi-fsl-lpspi: Move controller initialization
 further down

This is a preparation for reading number of chip-selects from hardware.
This needs IO resources mapped and peripheral clocking enabled.
No functional changes intended.

Signed-off-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Link: https://lore.kernel.org/r/20230717085934.409476-2-alexander.stein@ew.tq-group.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-lpspi.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index 0a0677f11a3d..f3b7bb1e52eb 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -846,22 +846,6 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 	fsl_lpspi->is_slave = is_slave;
 	fsl_lpspi->is_only_cs1 = of_property_read_bool((&pdev->dev)->of_node,
 						"fsl,spi-only-use-cs1-sel");
-	if (of_property_read_u32((&pdev->dev)->of_node, "num-cs",
-				 &num_cs))
-		num_cs = 1;
-
-	controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32);
-	controller->transfer_one = fsl_lpspi_transfer_one;
-	controller->prepare_transfer_hardware = lpspi_prepare_xfer_hardware;
-	controller->unprepare_transfer_hardware = lpspi_unprepare_xfer_hardware;
-	controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
-	controller->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX;
-	controller->dev.of_node = pdev->dev.of_node;
-	controller->bus_num = pdev->id;
-	controller->num_chipselect = num_cs;
-	controller->slave_abort = fsl_lpspi_slave_abort;
-	if (!fsl_lpspi->is_slave)
-		controller->use_gpio_descriptors = true;
 
 	init_completion(&fsl_lpspi->xfer_done);
 
@@ -911,6 +895,22 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 	temp = readl(fsl_lpspi->base + IMX7ULP_PARAM);
 	fsl_lpspi->txfifosize = 1 << (temp & 0x0f);
 	fsl_lpspi->rxfifosize = 1 << ((temp >> 8) & 0x0f);
+	if (of_property_read_u32((&pdev->dev)->of_node, "num-cs",
+				 &num_cs))
+		num_cs = 1;
+
+	controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32);
+	controller->transfer_one = fsl_lpspi_transfer_one;
+	controller->prepare_transfer_hardware = lpspi_prepare_xfer_hardware;
+	controller->unprepare_transfer_hardware = lpspi_unprepare_xfer_hardware;
+	controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	controller->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX;
+	controller->dev.of_node = pdev->dev.of_node;
+	controller->bus_num = pdev->id;
+	controller->num_chipselect = num_cs;
+	controller->slave_abort = fsl_lpspi_slave_abort;
+	if (!fsl_lpspi->is_slave)
+		controller->use_gpio_descriptors = true;
 
 	ret = fsl_lpspi_dma_init(&pdev->dev, fsl_lpspi, controller);
 	if (ret == -EPROBE_DEFER)

From f46b06e62c86a688ebe691b004f3056818c8ed15 Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@ew.tq-group.com>
Date: Mon, 17 Jul 2023 10:59:34 +0200
Subject: [PATCH 0257/2047] spi: spi-fsl-lpspi: Read chip-select amount from
 hardware for i.MX93

PARAM.PCSNUM register on i.MX93 indicates the number of supported
(hw) chip-selects. LPSPI4 has 3 while others have only 2.
Still allow overwriting from DT.

Signed-off-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Link: https://lore.kernel.org/r/20230717085934.409476-3-alexander.stein@ew.tq-group.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-lpspi.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index f3b7bb1e52eb..e32e8cab5aa8 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -896,8 +896,12 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 	fsl_lpspi->txfifosize = 1 << (temp & 0x0f);
 	fsl_lpspi->rxfifosize = 1 << ((temp >> 8) & 0x0f);
 	if (of_property_read_u32((&pdev->dev)->of_node, "num-cs",
-				 &num_cs))
-		num_cs = 1;
+				 &num_cs)) {
+		if (of_device_is_compatible(pdev->dev.of_node, "fsl,imx93-spi"))
+			num_cs = ((temp >> 16) & 0xf);
+		else
+			num_cs = 1;
+	}
 
 	controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32);
 	controller->transfer_one = fsl_lpspi_transfer_one;

From 78908f45ccf1dc2f4d5fb395c460fdbbf7e9ac3a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 17 Jul 2023 21:33:03 +0100
Subject: [PATCH 0258/2047] regmap: Let users check if a register is cached

The HDA driver has a use case for checking if a register is cached which
it bodges in awkwardly and unclearly. Provide an API which allows it to
directly do what it's trying to do.

Reviewed-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230717-regmap-cache-check-v1-1-73ef688afae3@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regcache.c | 23 +++++++++++++++++++++++
 include/linux/regmap.h         |  1 +
 2 files changed, 24 insertions(+)

diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 156490ab7f34..8accf6292fe1 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -561,6 +561,29 @@ void regcache_cache_bypass(struct regmap *map, bool enable)
 }
 EXPORT_SYMBOL_GPL(regcache_cache_bypass);
 
+/**
+ * regcache_reg_cached - Check if a register is cached
+ *
+ * @map: map to check
+ * @reg: register to check
+ *
+ * Reports if a register is cached.
+ */
+bool regcache_reg_cached(struct regmap *map, unsigned int reg)
+{
+	unsigned int val;
+	int ret;
+
+	map->lock(map->lock_arg);
+
+	ret = regcache_read(map, reg, &val);
+
+	map->unlock(map->lock_arg);
+
+	return ret == 0;
+}
+EXPORT_SYMBOL_GPL(regcache_reg_cached);
+
 void regcache_set_val(struct regmap *map, void *base, unsigned int idx,
 		      unsigned int val)
 {
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 8fc0b3ebce44..c9182a47736e 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1287,6 +1287,7 @@ int regcache_drop_region(struct regmap *map, unsigned int min,
 void regcache_cache_only(struct regmap *map, bool enable);
 void regcache_cache_bypass(struct regmap *map, bool enable);
 void regcache_mark_dirty(struct regmap *map);
+bool regcache_reg_cached(struct regmap *map, unsigned int reg);
 
 bool regmap_check_range_table(struct regmap *map, unsigned int reg,
 			      const struct regmap_access_table *table);

From d881ee5a872fd539a8c693e4c8656b9343c9aae0 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 17 Jul 2023 21:33:04 +0100
Subject: [PATCH 0259/2047] regmap: Provide test for regcache_reg_present()

Provide a KUnit test for the newly added API.

Reviewed-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230717-regmap-cache-check-v1-2-73ef688afae3@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-kunit.c | 40 ++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/drivers/base/regmap/regmap-kunit.c b/drivers/base/regmap/regmap-kunit.c
index 24257aa9004d..8dd96a55d976 100644
--- a/drivers/base/regmap/regmap-kunit.c
+++ b/drivers/base/regmap/regmap-kunit.c
@@ -833,6 +833,45 @@ static void cache_drop(struct kunit *test)
 	regmap_exit(map);
 }
 
+static void cache_present(struct kunit *test)
+{
+	struct regcache_types *t = (struct regcache_types *)test->param_value;
+	struct regmap *map;
+	struct regmap_config config;
+	struct regmap_ram_data *data;
+	unsigned int val;
+	int i;
+
+	config = test_regmap_config;
+	config.cache_type = t->type;
+
+	map = gen_regmap(&config, &data);
+	KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+	if (IS_ERR(map))
+		return;
+
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		data->read[i] = false;
+
+	/* No defaults so no registers cached. */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_ASSERT_FALSE(test, regcache_reg_cached(map, i));
+
+	/* We didn't trigger any reads */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_ASSERT_FALSE(test, data->read[i]);
+
+	/* Fill the cache */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &val));
+
+	/* Now everything should be cached */
+	for (i = 0; i < BLOCK_TEST_SIZE; i++)
+		KUNIT_ASSERT_TRUE(test, regcache_reg_cached(map, i));
+
+	regmap_exit(map);
+}
+
 struct raw_test_types {
 	const char *name;
 
@@ -1172,6 +1211,7 @@ static struct kunit_case regmap_test_cases[] = {
 	KUNIT_CASE_PARAM(cache_sync_readonly, real_cache_types_gen_params),
 	KUNIT_CASE_PARAM(cache_sync_patch, real_cache_types_gen_params),
 	KUNIT_CASE_PARAM(cache_drop, sparse_cache_types_gen_params),
+	KUNIT_CASE_PARAM(cache_present, sparse_cache_types_gen_params),
 
 	KUNIT_CASE_PARAM(raw_read_defaults_single, raw_test_types_gen_params),
 	KUNIT_CASE_PARAM(raw_read_defaults, raw_test_types_gen_params),

From 99aae70551f99536936438bbcfc562df69eeb79c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 17 Jul 2023 21:33:05 +0100
Subject: [PATCH 0260/2047] ALSA: hda: Use regcache_reg_cached() rather than
 open coding

The HDA driver intentionally drops repeated writes to registers in some
circumstances, beyond the suppression of noop writes that regmap does in
regmap_update_bits(). It does this by checking if the register is cached
before doing a regmap_update_bits(), now we have an API for querying this
directly use it directly rather than trying a read in cache only mode
making the code a little clearer.

Reviewed-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230717-regmap-cache-check-v1-3-73ef688afae3@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/hda/hdac_regmap.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c
index 9b1bcabd8414..97cee096a286 100644
--- a/sound/hda/hdac_regmap.c
+++ b/sound/hda/hdac_regmap.c
@@ -556,17 +556,14 @@ EXPORT_SYMBOL_GPL(snd_hdac_regmap_update_raw);
 static int reg_raw_update_once(struct hdac_device *codec, unsigned int reg,
 			       unsigned int mask, unsigned int val)
 {
-	unsigned int orig;
-	int err;
+	int err = 0;
 
 	if (!codec->regmap)
 		return reg_raw_update(codec, reg, mask, val);
 
 	mutex_lock(&codec->regmap_lock);
-	regcache_cache_only(codec->regmap, true);
-	err = regmap_read(codec->regmap, reg, &orig);
-	regcache_cache_only(codec->regmap, false);
-	if (err < 0)
+	/* Discard any updates to already initialised registers. */
+	if (!regcache_reg_cached(codec->regmap, reg))
 		err = regmap_update_bits(codec->regmap, reg, mask, val);
 	mutex_unlock(&codec->regmap_lock);
 	return err;

From 6023fffc3e276d1ab0d1262ea9be3b720325fede Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Tue, 18 Jul 2023 08:32:55 +0800
Subject: [PATCH 0261/2047] regulator: Remove duplicated include in
 mt6359-regulator.c

./drivers/regulator/mt6359-regulator.c: linux/platform_device.h is included more than once.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=5901
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230718003255.124594-1-yang.lee@linux.alibaba.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mt6359-regulator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/mt6359-regulator.c b/drivers/regulator/mt6359-regulator.c
index 5cf6448fb05f..c8a788858824 100644
--- a/drivers/regulator/mt6359-regulator.c
+++ b/drivers/regulator/mt6359-regulator.c
@@ -2,7 +2,6 @@
 //
 // Copyright (c) 2021 MediaTek Inc.
 
-#include <linux/platform_device.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/mt6359/registers.h>
 #include <linux/mfd/mt6359p/registers.h>

From 55b2395e4e92adc492c6b30ac109eb78250dcd9d Mon Sep 17 00:00:00 2001
From: Asmaa Mnebhi <asmaa@nvidia.com>
Date: Tue, 11 Jul 2023 11:12:48 -0400
Subject: [PATCH 0262/2047] gpio: mmio: handle "ngpios" properly in
 bgpio_init()

bgpio_init() uses "sz" argument to populate ngpio, which is not
accurate. Instead, read the "ngpios" property from the DT and if it
doesn't exist, use the "sz" argument. With this change, drivers no
longer need to overwrite the ngpio variable after calling bgpio_init().

If the "ngpios" property is specified, bgpio_bits is calculated
as the round up value of ngpio. At the moment, the only requirement
specified is that the round up value must be a multiple of 8 but
it should also be a power of 2 because we provide accessors based
on the bank size in bgpio_setup_accessors().

Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-mmio.c |  9 +++++-
 drivers/gpio/gpiolib.c   | 68 ++++++++++++++++++++++------------------
 drivers/gpio/gpiolib.h   |  1 +
 3 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c
index d9dff3dc92ae..74fdf0d87b2c 100644
--- a/drivers/gpio/gpio-mmio.c
+++ b/drivers/gpio/gpio-mmio.c
@@ -60,6 +60,8 @@ o        `                     ~~~~\___/~~~~    ` controller in FPGA is ,.`
 #include <linux/of.h>
 #include <linux/of_device.h>
 
+#include "gpiolib.h"
+
 static void bgpio_write8(void __iomem *reg, unsigned long data)
 {
 	writeb(data, reg);
@@ -614,10 +616,15 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev,
 	gc->parent = dev;
 	gc->label = dev_name(dev);
 	gc->base = -1;
-	gc->ngpio = gc->bgpio_bits;
 	gc->request = bgpio_request;
 	gc->be_bits = !!(flags & BGPIOF_BIG_ENDIAN);
 
+	ret = gpiochip_get_ngpios(gc, dev);
+	if (ret)
+		gc->ngpio = gc->bgpio_bits;
+	else
+		gc->bgpio_bits = roundup_pow_of_two(round_up(gc->ngpio, 8));
+
 	ret = bgpio_setup_io(gc, dat, set, clr, flags);
 	if (ret)
 		return ret;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 62199ec96db2..edab00c9cb3c 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -700,6 +700,40 @@ void *gpiochip_get_data(struct gpio_chip *gc)
 }
 EXPORT_SYMBOL_GPL(gpiochip_get_data);
 
+int gpiochip_get_ngpios(struct gpio_chip *gc, struct device *dev)
+{
+	u32 ngpios = gc->ngpio;
+	int ret;
+
+	if (ngpios == 0) {
+		ret = device_property_read_u32(dev, "ngpios", &ngpios);
+		if (ret == -ENODATA)
+			/*
+			 * -ENODATA means that there is no property found and
+			 * we want to issue the error message to the user.
+			 * Besides that, we want to return different error code
+			 * to state that supplied value is not valid.
+			 */
+			ngpios = 0;
+		else if (ret)
+			return ret;
+
+		gc->ngpio = ngpios;
+	}
+
+	if (gc->ngpio == 0) {
+		chip_err(gc, "tried to insert a GPIO chip with zero lines\n");
+		return -EINVAL;
+	}
+
+	if (gc->ngpio > FASTPATH_NGPIO)
+		chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n",
+			gc->ngpio, FASTPATH_NGPIO);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gpiochip_get_ngpios);
+
 int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 			       struct lock_class_key *lock_key,
 			       struct lock_class_key *request_key)
@@ -707,7 +741,6 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 	struct gpio_device *gdev;
 	unsigned long flags;
 	unsigned int i;
-	u32 ngpios = 0;
 	int base = 0;
 	int ret = 0;
 
@@ -753,36 +786,9 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 	else
 		gdev->owner = THIS_MODULE;
 
-	/*
-	 * Try the device properties if the driver didn't supply the number
-	 * of GPIO lines.
-	 */
-	ngpios = gc->ngpio;
-	if (ngpios == 0) {
-		ret = device_property_read_u32(&gdev->dev, "ngpios", &ngpios);
-		if (ret == -ENODATA)
-			/*
-			 * -ENODATA means that there is no property found and
-			 * we want to issue the error message to the user.
-			 * Besides that, we want to return different error code
-			 * to state that supplied value is not valid.
-			 */
-			ngpios = 0;
-		else if (ret)
-			goto err_free_dev_name;
-
-		gc->ngpio = ngpios;
-	}
-
-	if (gc->ngpio == 0) {
-		chip_err(gc, "tried to insert a GPIO chip with zero lines\n");
-		ret = -EINVAL;
+	ret = gpiochip_get_ngpios(gc, &gdev->dev);
+	if (ret)
 		goto err_free_dev_name;
-	}
-
-	if (gc->ngpio > FASTPATH_NGPIO)
-		chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n",
-			  gc->ngpio, FASTPATH_NGPIO);
 
 	gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL);
 	if (!gdev->descs) {
@@ -947,7 +953,7 @@ err_print_message:
 	/* failures here can mean systems won't boot... */
 	if (ret != -EPROBE_DEFER) {
 		pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__,
-		       base, base + (int)ngpios - 1,
+		       base, base + (int)gc->ngpio - 1,
 		       gc->label ? : "generic", ret);
 	}
 	return ret;
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 7c562fbb9fb0..08e8e8274883 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -218,6 +218,7 @@ int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
 int gpio_set_debounce_timeout(struct gpio_desc *desc, unsigned int debounce);
 int gpiod_hog(struct gpio_desc *desc, const char *name,
 		unsigned long lflags, enum gpiod_flags dflags);
+int gpiochip_get_ngpios(struct gpio_chip *gc, struct device *dev);
 
 /*
  * Return the GPIO number of the passed descriptor relative to its chip

From 1b0975ee3bdd3eb19a47371c26fd7ef8f7f6b599 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 12 Jul 2023 21:23:17 +0100
Subject: [PATCH 0263/2047] kunit: qemu_configs: Enable all architectural
 features for arm64

While it probably doesn't make a huge difference given the current KUnit
coverage we will get the best coverage of arm64 architecture features if
we specify -cpu=max rather than picking a specific CPU, this will include
all architecture features that qemu supports including many which have not
yet made it into physical implementations.

Due to performance issues emulating the architected pointer authentication
algorithm it is recommended to use the implementation defined algorithm
that qemu has instead, this should make no meaningful difference to the
coverage and will run the tests faster.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/qemu_configs/arm64.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/kunit/qemu_configs/arm64.py b/tools/testing/kunit/qemu_configs/arm64.py
index 67d04064f785..d3ff27024755 100644
--- a/tools/testing/kunit/qemu_configs/arm64.py
+++ b/tools/testing/kunit/qemu_configs/arm64.py
@@ -9,4 +9,4 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y''',
 			   qemu_arch='aarch64',
 			   kernel_path='arch/arm64/boot/Image.gz',
 			   kernel_command_line='console=ttyAMA0',
-			   extra_qemu_params=['-machine', 'virt', '-cpu', 'cortex-a57'])
+			   extra_qemu_params=['-machine', 'virt', '-cpu', 'max,pauth-impdef=on'])

From df3cb7ac0edcc9c850799ae1c7a9cb33e95f834d Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Tue, 18 Jul 2023 07:27:46 +0200
Subject: [PATCH 0264/2047] kunit: test-bug.h: include `stddef.h` for `NULL`

The header uses `NULL` in both `CONFIG_KUNIT=y` and `=n` cases,
but does not include it explicitly.

When `CONFIG_KUNIT=y`, the header is already getting included via
the other headers, so it is not a problem for users.

However, when `CONFIG_KUNIT=n`, it is not, and thus a user could hit
a build error when including `kunit/test-bug.h`, like we are doing
later in this series [1].

Thus include `linux/stddef.h`, and do so outside the `#if`, since it
is used in both cases.

Reported-by: Boqun Feng <boqun.feng@gmail.com>
Closes: https://lore.kernel.org/rust-for-linux/ZJ8cNUW3oR2p+gL1@boqun-archlinux/ [1]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test-bug.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/kunit/test-bug.h b/include/kunit/test-bug.h
index 30ca541b6ff2..47aa8f21ccce 100644
--- a/include/kunit/test-bug.h
+++ b/include/kunit/test-bug.h
@@ -9,6 +9,8 @@
 #ifndef _KUNIT_TEST_BUG_H
 #define _KUNIT_TEST_BUG_H
 
+#include <linux/stddef.h> /* for NULL */
+
 #if IS_ENABLED(CONFIG_KUNIT)
 
 #include <linux/jump_label.h> /* For static branch */

From a30e94c2967308d45dcecfd1eede50fc3f6495eb Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Tue, 18 Jul 2023 07:27:47 +0200
Subject: [PATCH 0265/2047] rust: init: make doctests compilable/testable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rust documentation tests are going to be build/run-tested
with the KUnit integration added in a future patch, thus
update them to make them compilable/testable so that we
may start enforcing it.

Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Vincenzo Palazzo <vincenzopalazzodev@gmail.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Benno Lossin <benno.lossin@proton.me>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Björn Roy Baron <bjorn3_gh@protonmail.com>
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 rust/kernel/init.rs | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index b4332a4ec1f4..e05563aad2ed 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -120,14 +120,24 @@
 //!   `slot` gets called.
 //!
 //! ```rust
-//! use kernel::{prelude::*, init};
+//! # #![allow(unreachable_pub, clippy::disallowed_names)]
+//! use kernel::{prelude::*, init, types::Opaque};
 //! use core::{ptr::addr_of_mut, marker::PhantomPinned, pin::Pin};
 //! # mod bindings {
+//! #     #![allow(non_camel_case_types)]
 //! #     pub struct foo;
 //! #     pub unsafe fn init_foo(_ptr: *mut foo) {}
 //! #     pub unsafe fn destroy_foo(_ptr: *mut foo) {}
 //! #     pub unsafe fn enable_foo(_ptr: *mut foo, _flags: u32) -> i32 { 0 }
 //! # }
+//! # // `Error::from_errno` is `pub(crate)` in the `kernel` crate, thus provide a workaround.
+//! # trait FromErrno {
+//! #     fn from_errno(errno: core::ffi::c_int) -> Error {
+//! #         // Dummy error that can be constructed outside the `kernel` crate.
+//! #         Error::from(core::fmt::Error)
+//! #     }
+//! # }
+//! # impl FromErrno for Error {}
 //! /// # Invariants
 //! ///
 //! /// `foo` is always initialized
@@ -158,7 +168,7 @@
 //!                 if err != 0 {
 //!                     // Enabling has failed, first clean up the foo and then return the error.
 //!                     bindings::destroy_foo(Opaque::raw_get(foo));
-//!                     return Err(Error::from_kernel_errno(err));
+//!                     return Err(Error::from_errno(err));
 //!                 }
 //!
 //!                 // All fields of `RawFoo` have been initialized, since `_p` is a ZST.
@@ -226,8 +236,7 @@ pub mod macros;
 ///
 /// ```rust
 /// # #![allow(clippy::disallowed_names, clippy::new_ret_no_self)]
-/// # use kernel::{init, pin_init, stack_pin_init, init::*, sync::Mutex, new_mutex};
-/// # use macros::pin_data;
+/// # use kernel::{init, macros::pin_data, pin_init, stack_pin_init, init::*, sync::Mutex, new_mutex};
 /// # use core::pin::Pin;
 /// #[pin_data]
 /// struct Foo {
@@ -277,7 +286,7 @@ macro_rules! stack_pin_init {
 ///
 /// # Examples
 ///
-/// ```rust
+/// ```rust,ignore
 /// # #![allow(clippy::disallowed_names, clippy::new_ret_no_self)]
 /// # use kernel::{init, pin_init, stack_try_pin_init, init::*, sync::Mutex, new_mutex};
 /// # use macros::pin_data;
@@ -303,7 +312,7 @@ macro_rules! stack_pin_init {
 /// pr_info!("a: {}", &*foo.a.lock());
 /// ```
 ///
-/// ```rust
+/// ```rust,ignore
 /// # #![allow(clippy::disallowed_names, clippy::new_ret_no_self)]
 /// # use kernel::{init, pin_init, stack_try_pin_init, init::*, sync::Mutex, new_mutex};
 /// # use macros::pin_data;
@@ -513,8 +522,7 @@ macro_rules! stack_try_pin_init {
 /// For instance:
 ///
 /// ```rust
-/// # use kernel::pin_init;
-/// # use macros::pin_data;
+/// # use kernel::{macros::pin_data, pin_init};
 /// # use core::{ptr::addr_of_mut, marker::PhantomPinned};
 /// #[pin_data]
 /// struct Buf {
@@ -841,7 +849,7 @@ macro_rules! init {
 /// # Examples
 ///
 /// ```rust
-/// use kernel::{init::PinInit, error::Error, InPlaceInit};
+/// use kernel::{init::{PinInit, zeroed}, error::Error};
 /// struct BigBuf {
 ///     big: Box<[u8; 1024 * 1024 * 1024]>,
 ///     small: [u8; 1024 * 1024],

From a1cf97c2d43ae51f3de091d51adff6b70a5cd55c Mon Sep 17 00:00:00 2001
From: Wyes Karny <wyes.karny@amd.com>
Date: Mon, 19 Jun 2023 19:04:59 +0000
Subject: [PATCH 0266/2047] cpupower: Recognise amd-pstate active mode driver

amd-pstate active mode driver name is "amd-pstate-epp". Use common
prefix for string matching condition to recognise amd-pstate active mode
driver.

Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Wyes Karny <wyes.karny@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Tested-by: Perry Yuan <Perry.Yuan@amd.com>
Acked-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/helpers/misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index 9547b29254a7..0c56fc77f93b 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -95,7 +95,7 @@ bool cpupower_amd_pstate_enabled(void)
 	if (!driver)
 		return ret;
 
-	if (!strcmp(driver, "amd-pstate"))
+	if (!strncmp(driver, "amd", 3))
 		ret = true;
 
 	cpufreq_put_driver(driver);

From 1ce5ab7c1dbfa82f0d382a48cccb51178cbf5416 Mon Sep 17 00:00:00 2001
From: Wyes Karny <wyes.karny@amd.com>
Date: Mon, 19 Jun 2023 19:05:00 +0000
Subject: [PATCH 0267/2047] cpupower: Add is_valid_path API

Add is_valid_path API to check whether the sysfs file is present or not.

Suggested-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Wyes Karny <wyes.karny@amd.com>
Tested-by: Perry Yuan <Perry.Yuan@amd.com>
Acked-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/lib/cpupower.c        | 7 +++++++
 tools/power/cpupower/lib/cpupower_intern.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
index 3f7d0c0c5067..7a2ef691b20e 100644
--- a/tools/power/cpupower/lib/cpupower.c
+++ b/tools/power/cpupower/lib/cpupower.c
@@ -14,6 +14,13 @@
 #include "cpupower.h"
 #include "cpupower_intern.h"
 
+int is_valid_path(const char *path)
+{
+	if (access(path, F_OK) == -1)
+		return 0;
+	return 1;
+}
+
 unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen)
 {
 	ssize_t numread;
diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h
index ac1112b956ec..5fdb8620d41b 100644
--- a/tools/power/cpupower/lib/cpupower_intern.h
+++ b/tools/power/cpupower/lib/cpupower_intern.h
@@ -7,5 +7,6 @@
 
 #define SYSFS_PATH_MAX 255
 
+int is_valid_path(const char *path);
 unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen);
 unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen);

From f2ab5557119a5ccd0ceaf7ecdca00ab782cd76c5 Mon Sep 17 00:00:00 2001
From: Wyes Karny <wyes.karny@amd.com>
Date: Mon, 19 Jun 2023 19:05:01 +0000
Subject: [PATCH 0268/2047] cpupower: Add EPP value change support

amd_pstate and intel_pstate active mode drivers support energy
performance preference feature. Through this user can convey it's
energy/performance preference to platform. Add this value change
capability to cpupower.

To change the EPP value use below command:
cpupower set --epp performance

Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Wyes Karny <wyes.karny@amd.com>
Tested-by: Perry Yuan <Perry.Yuan@amd.com>
Acked-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/cpupower-set.c    | 23 +++++++++++++++++++-
 tools/power/cpupower/utils/helpers/helpers.h |  5 +++++
 tools/power/cpupower/utils/helpers/misc.c    | 19 ++++++++++++++++
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index 180d5ba877e6..a789b123dbd4 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -18,6 +18,7 @@
 
 static struct option set_opts[] = {
 	{"perf-bias", required_argument, NULL, 'b'},
+	{"epp", required_argument, NULL, 'e'},
 	{ },
 };
 
@@ -37,11 +38,13 @@ int cmd_set(int argc, char **argv)
 	union {
 		struct {
 			int perf_bias:1;
+			int epp:1;
 		};
 		int params;
 	} params;
 	int perf_bias = 0;
 	int ret = 0;
+	char epp[30];
 
 	ret = uname(&uts);
 	if (!ret && (!strcmp(uts.machine, "ppc64le") ||
@@ -55,7 +58,7 @@ int cmd_set(int argc, char **argv)
 
 	params.params = 0;
 	/* parameter parsing */
-	while ((ret = getopt_long(argc, argv, "b:",
+	while ((ret = getopt_long(argc, argv, "b:e:",
 						set_opts, NULL)) != -1) {
 		switch (ret) {
 		case 'b':
@@ -69,6 +72,15 @@ int cmd_set(int argc, char **argv)
 			}
 			params.perf_bias = 1;
 			break;
+		case 'e':
+			if (params.epp)
+				print_wrong_arg_exit();
+			if (sscanf(optarg, "%29s", epp) != 1) {
+				print_wrong_arg_exit();
+				return -EINVAL;
+			}
+			params.epp = 1;
+			break;
 		default:
 			print_wrong_arg_exit();
 		}
@@ -102,6 +114,15 @@ int cmd_set(int argc, char **argv)
 				break;
 			}
 		}
+
+		if (params.epp) {
+			ret = cpupower_set_epp(cpu, epp);
+			if (ret) {
+				fprintf(stderr,
+					"Error setting epp value on CPU %d\n", cpu);
+				break;
+			}
+		}
 	}
 	return ret;
 }
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 96e4bede078b..5d998de2d291 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -116,6 +116,8 @@ extern int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val);
 extern int cpupower_intel_get_perf_bias(unsigned int cpu);
 extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu);
 
+extern int cpupower_set_epp(unsigned int cpu, char *epp);
+
 /* Read/Write msr ****************************/
 
 /* PCI stuff ****************************/
@@ -173,6 +175,9 @@ static inline int cpupower_intel_get_perf_bias(unsigned int cpu)
 static inline unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu)
 { return 0; };
 
+static inline int cpupower_set_epp(unsigned int cpu, char *epp)
+{ return -1; };
+
 /* Read/Write msr ****************************/
 
 static inline int cpufreq_has_boost_support(unsigned int cpu, int *support,
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index 0c56fc77f93b..583df38ab13c 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -87,6 +87,25 @@ int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val)
 	return 0;
 }
 
+int cpupower_set_epp(unsigned int cpu, char *epp)
+{
+	char path[SYSFS_PATH_MAX];
+	char linebuf[30] = {};
+
+	snprintf(path, sizeof(path),
+		PATH_TO_CPU "cpu%u/cpufreq/energy_performance_preference", cpu);
+
+	if (!is_valid_path(path))
+		return -1;
+
+	snprintf(linebuf, sizeof(linebuf), "%s", epp);
+
+	if (cpupower_write_sysfs(path, linebuf, 30) <= 0)
+		return -1;
+
+	return 0;
+}
+
 bool cpupower_amd_pstate_enabled(void)
 {
 	char *driver = cpufreq_get_driver(0);

From df8776b03689987e3239f3c010b6dc6ab4185d30 Mon Sep 17 00:00:00 2001
From: Wyes Karny <wyes.karny@amd.com>
Date: Mon, 19 Jun 2023 19:05:02 +0000
Subject: [PATCH 0269/2047] cpupower: Add support for amd_pstate mode change

amd_pstate supports changing of its mode dynamically via `status` sysfs
file. Add the same capability in cpupower. To change the mode to active
mode use below command:

cpupower set --amd-pstate-mode active

Acked-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Wyes Karny <wyes.karny@amd.com>
Tested-by: Perry Yuan <Perry.Yuan@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/cpupower-set.c    | 24 ++++++++++++++++++--
 tools/power/cpupower/utils/helpers/helpers.h |  3 +++
 tools/power/cpupower/utils/helpers/misc.c    | 18 +++++++++++++++
 3 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index a789b123dbd4..c2ba69b7ea54 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -19,6 +19,7 @@
 static struct option set_opts[] = {
 	{"perf-bias", required_argument, NULL, 'b'},
 	{"epp", required_argument, NULL, 'e'},
+	{"amd-pstate-mode", required_argument, NULL, 'm'},
 	{ },
 };
 
@@ -39,12 +40,13 @@ int cmd_set(int argc, char **argv)
 		struct {
 			int perf_bias:1;
 			int epp:1;
+			int mode:1;
 		};
 		int params;
 	} params;
 	int perf_bias = 0;
 	int ret = 0;
-	char epp[30];
+	char epp[30], mode[20];
 
 	ret = uname(&uts);
 	if (!ret && (!strcmp(uts.machine, "ppc64le") ||
@@ -58,7 +60,7 @@ int cmd_set(int argc, char **argv)
 
 	params.params = 0;
 	/* parameter parsing */
-	while ((ret = getopt_long(argc, argv, "b:e:",
+	while ((ret = getopt_long(argc, argv, "b:e:m:",
 						set_opts, NULL)) != -1) {
 		switch (ret) {
 		case 'b':
@@ -81,6 +83,17 @@ int cmd_set(int argc, char **argv)
 			}
 			params.epp = 1;
 			break;
+		case 'm':
+			if (cpupower_cpu_info.vendor != X86_VENDOR_AMD)
+				print_wrong_arg_exit();
+			if (params.mode)
+				print_wrong_arg_exit();
+			if (sscanf(optarg, "%19s", mode) != 1) {
+				print_wrong_arg_exit();
+				return -EINVAL;
+			}
+			params.mode = 1;
+			break;
 		default:
 			print_wrong_arg_exit();
 		}
@@ -89,6 +102,12 @@ int cmd_set(int argc, char **argv)
 	if (!params.params)
 		print_wrong_arg_exit();
 
+	if (params.mode) {
+		ret = cpupower_set_amd_pstate_mode(mode);
+		if (ret)
+			fprintf(stderr, "Error setting mode\n");
+	}
+
 	/* Default is: set all CPUs */
 	if (bitmask_isallclear(cpus_chosen))
 		bitmask_setall(cpus_chosen);
@@ -123,6 +142,7 @@ int cmd_set(int argc, char **argv)
 				break;
 			}
 		}
+
 	}
 	return ret;
 }
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 5d998de2d291..d35596631eef 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -117,6 +117,7 @@ extern int cpupower_intel_get_perf_bias(unsigned int cpu);
 extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu);
 
 extern int cpupower_set_epp(unsigned int cpu, char *epp);
+extern int cpupower_set_amd_pstate_mode(char *mode);
 
 /* Read/Write msr ****************************/
 
@@ -177,6 +178,8 @@ static inline unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu)
 
 static inline int cpupower_set_epp(unsigned int cpu, char *epp)
 { return -1; };
+static inline int cpupower_set_amd_pstate_mode(char *mode)
+{ return -1; };
 
 /* Read/Write msr ****************************/
 
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index 583df38ab13c..075c136a100c 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -106,6 +106,24 @@ int cpupower_set_epp(unsigned int cpu, char *epp)
 	return 0;
 }
 
+int cpupower_set_amd_pstate_mode(char *mode)
+{
+	char path[SYSFS_PATH_MAX];
+	char linebuf[20] = {};
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "amd_pstate/status");
+
+	if (!is_valid_path(path))
+		return -1;
+
+	snprintf(linebuf, sizeof(linebuf), "%s\n", mode);
+
+	if (cpupower_write_sysfs(path, linebuf, 20) <= 0)
+		return -1;
+
+	return 0;
+}
+
 bool cpupower_amd_pstate_enabled(void)
 {
 	char *driver = cpufreq_get_driver(0);

From eb426fc6bdd6b731cbc0900f24a8c5fba10a7631 Mon Sep 17 00:00:00 2001
From: Wyes Karny <wyes.karny@amd.com>
Date: Mon, 19 Jun 2023 19:05:03 +0000
Subject: [PATCH 0270/2047] cpupower: Add turbo-boost support in cpupower

If boost sysfs (/sys/devices/system/cpu/cpufreq/boost) file is present
turbo-boost is feature is supported in the hardware. By default this
feature should be enabled. But to disable/enable it write to the sysfs
file. Use the same to control this feature via cpupower.

To enable:
cpupower set --turbo-boost 1

To disable:
cpupower set --turbo-boost 0

Acked-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Wyes Karny <wyes.karny@amd.com>
Tested-by: Perry Yuan <Perry.Yuan@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/cpupower-set.c    | 22 +++++++++++++++++++-
 tools/power/cpupower/utils/helpers/helpers.h |  3 +++
 tools/power/cpupower/utils/helpers/misc.c    | 18 ++++++++++++++++
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index c2ba69b7ea54..0677b58374ab 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -20,6 +20,7 @@ static struct option set_opts[] = {
 	{"perf-bias", required_argument, NULL, 'b'},
 	{"epp", required_argument, NULL, 'e'},
 	{"amd-pstate-mode", required_argument, NULL, 'm'},
+	{"turbo-boost", required_argument, NULL, 't'},
 	{ },
 };
 
@@ -41,10 +42,11 @@ int cmd_set(int argc, char **argv)
 			int perf_bias:1;
 			int epp:1;
 			int mode:1;
+			int turbo_boost:1;
 		};
 		int params;
 	} params;
-	int perf_bias = 0;
+	int perf_bias = 0, turbo_boost = 1;
 	int ret = 0;
 	char epp[30], mode[20];
 
@@ -94,6 +96,18 @@ int cmd_set(int argc, char **argv)
 			}
 			params.mode = 1;
 			break;
+		case 't':
+			if (params.turbo_boost)
+				print_wrong_arg_exit();
+			turbo_boost = atoi(optarg);
+			if (turbo_boost < 0 || turbo_boost > 1) {
+				printf("--turbo-boost param out of range [0-1]\n");
+				print_wrong_arg_exit();
+			}
+			params.turbo_boost = 1;
+			break;
+
+
 		default:
 			print_wrong_arg_exit();
 		}
@@ -108,6 +122,12 @@ int cmd_set(int argc, char **argv)
 			fprintf(stderr, "Error setting mode\n");
 	}
 
+	if (params.turbo_boost) {
+		ret = cpupower_set_turbo_boost(turbo_boost);
+		if (ret)
+			fprintf(stderr, "Error setting turbo-boost\n");
+	}
+
 	/* Default is: set all CPUs */
 	if (bitmask_isallclear(cpus_chosen))
 		bitmask_setall(cpus_chosen);
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index d35596631eef..95749b8ee475 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -118,6 +118,7 @@ extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu);
 
 extern int cpupower_set_epp(unsigned int cpu, char *epp);
 extern int cpupower_set_amd_pstate_mode(char *mode);
+extern int cpupower_set_turbo_boost(int turbo_boost);
 
 /* Read/Write msr ****************************/
 
@@ -180,6 +181,8 @@ static inline int cpupower_set_epp(unsigned int cpu, char *epp)
 { return -1; };
 static inline int cpupower_set_amd_pstate_mode(char *mode)
 { return -1; };
+static inline int cpupower_set_turbo_boost(int turbo_boost)
+{ return -1; };
 
 /* Read/Write msr ****************************/
 
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index 075c136a100c..76e461ff4f74 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -124,6 +124,24 @@ int cpupower_set_amd_pstate_mode(char *mode)
 	return 0;
 }
 
+int cpupower_set_turbo_boost(int turbo_boost)
+{
+	char path[SYSFS_PATH_MAX];
+	char linebuf[2] = {};
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost");
+
+	if (!is_valid_path(path))
+		return -1;
+
+	snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost);
+
+	if (cpupower_write_sysfs(path, linebuf, 2) <= 0)
+		return -1;
+
+	return 0;
+}
+
 bool cpupower_amd_pstate_enabled(void)
 {
 	char *driver = cpufreq_get_driver(0);

From 99481d2195bfd13a663904e6014887abf46b57c7 Mon Sep 17 00:00:00 2001
From: Likhitha Korrapati <likhitha@linux.ibm.com>
Date: Fri, 14 Jul 2023 15:10:21 +0530
Subject: [PATCH 0271/2047] cpupower: Fix cpuidle_set to accept only numeric
 values for idle-set operation.

For both the d and e options in 'cpupower idle_set' command, an
atoi() conversion is done without checking if the input argument
is all numeric. So, an atoi conversion is done on any character
provided as input and the CPU idle_set operation continues with
that integer value, which may not be what is intended or entirely
correct.

The output of cpuidle-set before patch is as follows:

[root@xxx cpupower]# cpupower idle-set -e 1$
Idlestate 1 enabled on CPU 0
[snip]
Idlestate 1 enabled on CPU 47

[root@xxx cpupower]# cpupower idle-set -e 11
Idlestate 11 not available on CPU 0
[snip]
Idlestate 11 not available on CPU 47

[root@xxx cpupower]# cpupower idle-set -d 12
Idlestate 12 not available on CPU 0
[snip]
Idlestate 12 not available on CPU 47

[root@xxx cpupower]# cpupower idle-set -d qw
Idlestate 0 disabled on CPU 0
[snip]
Idlestate 0 disabled on CPU 47

This patch adds a check for both d and e options in cpuidle-set.c
to see that the idle_set value is all numeric before doing a
string-to-int conversion using strtol().

The output of cpuidle-set after the patch is as below:

[root@xxx cpupower]# ./cpupower idle-set -e 1$
Bad idle_set value: 1$. Integer expected

[root@xxx cpupower]# ./cpupower idle-set -e 11
Idlestate 11 not available on CPU 0
[snip]
Idlestate 11 not available on CPU 47

[root@xxx cpupower]# ./cpupower idle-set -d 12
Idlestate 12 not available on CPU 0
[snip]
Idlestate 12 not available on CPU 47

[root@xxx cpupower]# ./cpupower idle-set -d qw
Bad idle_set value: qw. Integer expected

Signed-off-by: Brahadambal Srinivasan <latha@linux.vnet.ibm.com>
Signed-off-by: Likhitha Korrapati <likhitha@linux.ibm.com>
Tested-by: Pavithra Prakash <pavrampu@linux.vnet.ibm.com>
Reviewed-by: Rick Lindsley <ricklind@linux.vnet.ibm.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/cpuidle-set.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c
index 46158928f9ad..a551d1d4ac51 100644
--- a/tools/power/cpupower/utils/cpuidle-set.c
+++ b/tools/power/cpupower/utils/cpuidle-set.c
@@ -41,14 +41,6 @@ int cmd_idle_set(int argc, char **argv)
 			cont = 0;
 			break;
 		case 'd':
-			if (param) {
-				param = -1;
-				cont = 0;
-				break;
-			}
-			param = ret;
-			idlestate = atoi(optarg);
-			break;
 		case 'e':
 			if (param) {
 				param = -1;
@@ -56,7 +48,13 @@ int cmd_idle_set(int argc, char **argv)
 				break;
 			}
 			param = ret;
-			idlestate = atoi(optarg);
+			strtol(optarg, &endptr, 10);
+			if (*endptr != '\0') {
+				printf(_("Bad value: %s, Integer expected\n"), optarg);
+				exit(EXIT_FAILURE);
+			} else {
+				idlestate = atoi(optarg);
+			}
 			break;
 		case 'D':
 			if (param) {

From 48b5583719cdfbdee238f9549a6a1a47af2b0469 Mon Sep 17 00:00:00 2001
From: Chin Yik Ming <yikming2222@gmail.com>
Date: Mon, 17 Jul 2023 14:49:52 +0800
Subject: [PATCH 0272/2047] sched/headers: Rename task_struct::state to
 task_struct::__state in the comments too

The rename in 2f064a59a11f ("sched: Change task_struct::state") missed the
comments.

[ mingo: Improved the changelog. ]

Signed-off-by: Chin Yik Ming <yikming2222@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Link: https://lore.kernel.org/r/20230717064952.2804-1-yikming2222@gmail.com
---
 include/linux/sched.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index efc9f4bdc4ca..2aab7be46f7e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -75,14 +75,14 @@ struct user_event_mm;
  * Task state bitmask. NOTE! These bits are also
  * encoded in fs/proc/array.c: get_task_state().
  *
- * We have two separate sets of flags: task->state
+ * We have two separate sets of flags: task->__state
  * is about runnability, while task->exit_state are
  * about the task exiting. Confusing, but this way
  * modifying one set can't modify the other one by
  * mistake.
  */
 
-/* Used in tsk->state: */
+/* Used in tsk->__state: */
 #define TASK_RUNNING			0x00000000
 #define TASK_INTERRUPTIBLE		0x00000001
 #define TASK_UNINTERRUPTIBLE		0x00000002
@@ -92,7 +92,7 @@ struct user_event_mm;
 #define EXIT_DEAD			0x00000010
 #define EXIT_ZOMBIE			0x00000020
 #define EXIT_TRACE			(EXIT_ZOMBIE | EXIT_DEAD)
-/* Used in tsk->state again: */
+/* Used in tsk->__state again: */
 #define TASK_PARKED			0x00000040
 #define TASK_DEAD			0x00000080
 #define TASK_WAKEKILL			0x00000100
@@ -173,7 +173,7 @@ struct user_event_mm;
 #endif
 
 /*
- * set_current_state() includes a barrier so that the write of current->state
+ * set_current_state() includes a barrier so that the write of current->__state
  * is correctly serialised wrt the caller's subsequent test of whether to
  * actually sleep:
  *
@@ -196,9 +196,9 @@ struct user_event_mm;
  *   wake_up_state(p, TASK_UNINTERRUPTIBLE);
  *
  * where wake_up_state()/try_to_wake_up() executes a full memory barrier before
- * accessing p->state.
+ * accessing p->__state.
  *
- * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
+ * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is,
  * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
  * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
  *

From af4cf40470c22efa3987200fd19478199e08e103 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 13:58:40 +0200
Subject: [PATCH 0273/2047] sched/fair: Add cfs_rq::avg_vruntime

In order to move to an eligibility based scheduling policy, we need
to have a better approximation of the ideal scheduler.

Specifically, for a virtual time weighted fair queueing based
scheduler the ideal scheduler will be the weighted average of the
individual virtual runtimes (math in the comment).

As such, compute the weighted average to approximate the ideal
scheduler -- note that the approximation is in the individual task
behaviour, which isn't strictly conformant.

Specifically consider adding a task with a vruntime left of center, in
this case the average will move backwards in time -- something the
ideal scheduler would of course never do.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230531124603.654144274@infradead.org
---
 kernel/sched/debug.c |  32 +++++-----
 kernel/sched/fair.c  | 137 ++++++++++++++++++++++++++++++++++++++++++-
 kernel/sched/sched.h |   5 ++
 3 files changed, 154 insertions(+), 20 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index aeeba46a096b..e48d2b2db7bc 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -627,10 +627,9 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
-	s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
-		spread, rq0_min_vruntime, spread0;
+	s64 left_vruntime = -1, min_vruntime, right_vruntime = -1, spread;
+	struct sched_entity *last, *first;
 	struct rq *rq = cpu_rq(cpu);
-	struct sched_entity *last;
 	unsigned long flags;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -644,26 +643,25 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 			SPLIT_NS(cfs_rq->exec_clock));
 
 	raw_spin_rq_lock_irqsave(rq, flags);
-	if (rb_first_cached(&cfs_rq->tasks_timeline))
-		MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
+	first = __pick_first_entity(cfs_rq);
+	if (first)
+		left_vruntime = first->vruntime;
 	last = __pick_last_entity(cfs_rq);
 	if (last)
-		max_vruntime = last->vruntime;
+		right_vruntime = last->vruntime;
 	min_vruntime = cfs_rq->min_vruntime;
-	rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
 	raw_spin_rq_unlock_irqrestore(rq, flags);
-	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
-			SPLIT_NS(MIN_vruntime));
+
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "left_vruntime",
+			SPLIT_NS(left_vruntime));
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
 			SPLIT_NS(min_vruntime));
-	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
-			SPLIT_NS(max_vruntime));
-	spread = max_vruntime - MIN_vruntime;
-	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
-			SPLIT_NS(spread));
-	spread0 = min_vruntime - rq0_min_vruntime;
-	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
-			SPLIT_NS(spread0));
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "avg_vruntime",
+			SPLIT_NS(avg_vruntime(cfs_rq)));
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "right_vruntime",
+			SPLIT_NS(right_vruntime));
+	spread = right_vruntime - left_vruntime;
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread));
 	SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
 			cfs_rq->nr_spread_over);
 	SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d3df5b1642a6..bb5460682ae2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -601,9 +601,134 @@ static inline bool entity_before(const struct sched_entity *a,
 	return (s64)(a->vruntime - b->vruntime) < 0;
 }
 
+static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	return (s64)(se->vruntime - cfs_rq->min_vruntime);
+}
+
 #define __node_2_se(node) \
 	rb_entry((node), struct sched_entity, run_node)
 
+/*
+ * Compute virtual time from the per-task service numbers:
+ *
+ * Fair schedulers conserve lag:
+ *
+ *   \Sum lag_i = 0
+ *
+ * Where lag_i is given by:
+ *
+ *   lag_i = S - s_i = w_i * (V - v_i)
+ *
+ * Where S is the ideal service time and V is it's virtual time counterpart.
+ * Therefore:
+ *
+ *   \Sum lag_i = 0
+ *   \Sum w_i * (V - v_i) = 0
+ *   \Sum w_i * V - w_i * v_i = 0
+ *
+ * From which we can solve an expression for V in v_i (which we have in
+ * se->vruntime):
+ *
+ *       \Sum v_i * w_i   \Sum v_i * w_i
+ *   V = -------------- = --------------
+ *          \Sum w_i            W
+ *
+ * Specifically, this is the weighted average of all entity virtual runtimes.
+ *
+ * [[ NOTE: this is only equal to the ideal scheduler under the condition
+ *          that join/leave operations happen at lag_i = 0, otherwise the
+ *          virtual time has non-continguous motion equivalent to:
+ *
+ *	      V +-= lag_i / W
+ *
+ *	    Also see the comment in place_entity() that deals with this. ]]
+ *
+ * However, since v_i is u64, and the multiplcation could easily overflow
+ * transform it into a relative form that uses smaller quantities:
+ *
+ * Substitute: v_i == (v_i - v0) + v0
+ *
+ *     \Sum ((v_i - v0) + v0) * w_i   \Sum (v_i - v0) * w_i
+ * V = ---------------------------- = --------------------- + v0
+ *                  W                            W
+ *
+ * Which we track using:
+ *
+ *                    v0 := cfs_rq->min_vruntime
+ * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
+ *              \Sum w_i := cfs_rq->avg_load
+ *
+ * Since min_vruntime is a monotonic increasing variable that closely tracks
+ * the per-task service, these deltas: (v_i - v), will be in the order of the
+ * maximal (virtual) lag induced in the system due to quantisation.
+ *
+ * Also, we use scale_load_down() to reduce the size.
+ *
+ * As measured, the max (key * weight) value was ~44 bits for a kernel build.
+ */
+static void
+avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	unsigned long weight = scale_load_down(se->load.weight);
+	s64 key = entity_key(cfs_rq, se);
+
+	cfs_rq->avg_vruntime += key * weight;
+	cfs_rq->avg_load += weight;
+}
+
+static void
+avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	unsigned long weight = scale_load_down(se->load.weight);
+	s64 key = entity_key(cfs_rq, se);
+
+	cfs_rq->avg_vruntime -= key * weight;
+	cfs_rq->avg_load -= weight;
+}
+
+static inline
+void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+{
+	/*
+	 * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+	 */
+	cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+}
+
+u64 avg_vruntime(struct cfs_rq *cfs_rq)
+{
+	struct sched_entity *curr = cfs_rq->curr;
+	s64 avg = cfs_rq->avg_vruntime;
+	long load = cfs_rq->avg_load;
+
+	if (curr && curr->on_rq) {
+		unsigned long weight = scale_load_down(curr->load.weight);
+
+		avg += entity_key(cfs_rq, curr) * weight;
+		load += weight;
+	}
+
+	if (load)
+		avg = div_s64(avg, load);
+
+	return cfs_rq->min_vruntime + avg;
+}
+
+static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
+{
+	u64 min_vruntime = cfs_rq->min_vruntime;
+	/*
+	 * open coded max_vruntime() to allow updating avg_vruntime
+	 */
+	s64 delta = (s64)(vruntime - min_vruntime);
+	if (delta > 0) {
+		avg_vruntime_update(cfs_rq, delta);
+		min_vruntime = vruntime;
+	}
+	return min_vruntime;
+}
+
 static void update_min_vruntime(struct cfs_rq *cfs_rq)
 {
 	struct sched_entity *curr = cfs_rq->curr;
@@ -629,7 +754,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
 
 	/* ensure we never gain time by being placed backwards. */
 	u64_u32_store(cfs_rq->min_vruntime,
-		      max_vruntime(cfs_rq->min_vruntime, vruntime));
+		      __update_min_vruntime(cfs_rq, vruntime));
 }
 
 static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
@@ -642,12 +767,14 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
  */
 static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+	avg_vruntime_add(cfs_rq, se);
 	rb_add_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less);
 }
 
 static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline);
+	avg_vruntime_sub(cfs_rq, se);
 }
 
 struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
@@ -3379,6 +3506,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		/* commit outstanding execution time */
 		if (cfs_rq->curr == se)
 			update_curr(cfs_rq);
+		else
+			avg_vruntime_sub(cfs_rq, se);
 		update_load_sub(&cfs_rq->load, se->load.weight);
 	}
 	dequeue_load_avg(cfs_rq, se);
@@ -3394,9 +3523,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 #endif
 
 	enqueue_load_avg(cfs_rq, se);
-	if (se->on_rq)
+	if (se->on_rq) {
 		update_load_add(&cfs_rq->load, se->load.weight);
-
+		if (cfs_rq->curr != se)
+			avg_vruntime_add(cfs_rq, se);
+	}
 }
 
 void reweight_task(struct task_struct *p, int prio)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9baeb1a2dfdd..52a0a4bde193 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -548,6 +548,9 @@ struct cfs_rq {
 	unsigned int		idle_nr_running;   /* SCHED_IDLE */
 	unsigned int		idle_h_nr_running; /* SCHED_IDLE */
 
+	s64			avg_vruntime;
+	u64			avg_load;
+
 	u64			exec_clock;
 	u64			min_vruntime;
 #ifdef CONFIG_SCHED_CORE
@@ -3483,4 +3486,6 @@ static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
 static inline void init_sched_mm_cid(struct task_struct *t) { }
 #endif
 
+extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
+
 #endif /* _KERNEL_SCHED_SCHED_H */

From e0c2ff903c320d3fd3c2c604dc401b3b7c0a1d13 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 13:58:41 +0200
Subject: [PATCH 0274/2047] sched/fair: Remove sched_feat(START_DEBIT)

With the introduction of avg_vruntime() there is no need to use worse
approximations. Take the 0-lag point as starting point for inserting
new tasks.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230531124603.722361178@infradead.org
---
 kernel/sched/fair.c     | 21 +--------------------
 kernel/sched/features.h |  6 ------
 2 files changed, 1 insertion(+), 26 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bb5460682ae2..fc43482c13e9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -906,16 +906,6 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	return slice;
 }
 
-/*
- * We calculate the vruntime slice of a to-be-inserted task.
- *
- * vs = s/w
- */
-static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-	return calc_delta_fair(sched_slice(cfs_rq, se), se);
-}
-
 #include "pelt.h"
 #ifdef CONFIG_SMP
 
@@ -4862,16 +4852,7 @@ static inline bool entity_is_long_sleeper(struct sched_entity *se)
 static void
 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 {
-	u64 vruntime = cfs_rq->min_vruntime;
-
-	/*
-	 * The 'current' period is already promised to the current tasks,
-	 * however the extra weight of the new task will slow them down a
-	 * little, place the new task so that it fits in the slot that
-	 * stays open at the end.
-	 */
-	if (initial && sched_feat(START_DEBIT))
-		vruntime += sched_vslice(cfs_rq, se);
+	u64 vruntime = avg_vruntime(cfs_rq);
 
 	/* sleeps up to a single latency don't count. */
 	if (!initial) {
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index ee7f23c76bd3..fa828b36533d 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -6,12 +6,6 @@
  */
 SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
 
-/*
- * Place new tasks ahead so that they do not starve already running
- * tasks
- */
-SCHED_FEAT(START_DEBIT, true)
-
 /*
  * Prefer to schedule the task we woke last (assuming it failed
  * wakeup-preemption), since its likely going to consume data we

From 86bfbb7ce4f67a88df2639198169b685668e7349 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 13:58:42 +0200
Subject: [PATCH 0275/2047] sched/fair: Add lag based placement

With the introduction of avg_vruntime, it is possible to approximate
lag (the entire purpose of introducing it in fact). Use this to do lag
based placement over sleep+wake.

Specifically, the FAIR_SLEEPERS thing places things too far to the
left and messes up the deadline aspect of EEVDF.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230531124603.794929315@infradead.org
---
 include/linux/sched.h   |   3 +-
 kernel/sched/core.c     |   1 +
 kernel/sched/fair.c     | 172 ++++++++++++++++++++++++++++++----------
 kernel/sched/features.h |   8 ++
 4 files changed, 143 insertions(+), 41 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2aab7be46f7e..ba1828b2a6a5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -554,8 +554,9 @@ struct sched_entity {
 
 	u64				exec_start;
 	u64				sum_exec_runtime;
-	u64				vruntime;
 	u64				prev_sum_exec_runtime;
+	u64				vruntime;
+	s64				vlag;
 
 	u64				nr_migrations;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 83e36547af17..84b0d47ed9b8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4501,6 +4501,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
+	p->se.vlag			= 0;
 	INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fc43482c13e9..dd12ada69b12 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -715,6 +715,15 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
 	return cfs_rq->min_vruntime + avg;
 }
 
+/*
+ * lag_i = S - s_i = w_i * (V - v_i)
+ */
+void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	SCHED_WARN_ON(!se->on_rq);
+	se->vlag = avg_vruntime(cfs_rq) - se->vruntime;
+}
+
 static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
 {
 	u64 min_vruntime = cfs_rq->min_vruntime;
@@ -3492,6 +3501,8 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
 static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 			    unsigned long weight)
 {
+	unsigned long old_weight = se->load.weight;
+
 	if (se->on_rq) {
 		/* commit outstanding execution time */
 		if (cfs_rq->curr == se)
@@ -3504,6 +3515,14 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 
 	update_load_set(&se->load, weight);
 
+	if (!se->on_rq) {
+		/*
+		 * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
+		 * we need to scale se->vlag when w_i changes.
+		 */
+		se->vlag = div_s64(se->vlag * old_weight, weight);
+	}
+
 #ifdef CONFIG_SMP
 	do {
 		u32 divider = get_pelt_divider(&se->avg);
@@ -4853,49 +4872,119 @@ static void
 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 {
 	u64 vruntime = avg_vruntime(cfs_rq);
-
-	/* sleeps up to a single latency don't count. */
-	if (!initial) {
-		unsigned long thresh;
-
-		if (se_is_idle(se))
-			thresh = sysctl_sched_min_granularity;
-		else
-			thresh = sysctl_sched_latency;
-
-		/*
-		 * Halve their sleep time's effect, to allow
-		 * for a gentler effect of sleepers:
-		 */
-		if (sched_feat(GENTLE_FAIR_SLEEPERS))
-			thresh >>= 1;
-
-		vruntime -= thresh;
-	}
+	s64 lag = 0;
 
 	/*
-	 * Pull vruntime of the entity being placed to the base level of
-	 * cfs_rq, to prevent boosting it if placed backwards.
-	 * However, min_vruntime can advance much faster than real time, with
-	 * the extreme being when an entity with the minimal weight always runs
-	 * on the cfs_rq. If the waking entity slept for a long time, its
-	 * vruntime difference from min_vruntime may overflow s64 and their
-	 * comparison may get inversed, so ignore the entity's original
-	 * vruntime in that case.
-	 * The maximal vruntime speedup is given by the ratio of normal to
-	 * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
-	 * When placing a migrated waking entity, its exec_start has been set
-	 * from a different rq. In order to take into account a possible
-	 * divergence between new and prev rq's clocks task because of irq and
-	 * stolen time, we take an additional margin.
-	 * So, cutting off on the sleep time of
-	 *     2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
-	 * should be safe.
+	 * Due to how V is constructed as the weighted average of entities,
+	 * adding tasks with positive lag, or removing tasks with negative lag
+	 * will move 'time' backwards, this can screw around with the lag of
+	 * other tasks.
+	 *
+	 * EEVDF: placement strategy #1 / #2
 	 */
-	if (entity_is_long_sleeper(se))
-		se->vruntime = vruntime;
-	else
-		se->vruntime = max_vruntime(se->vruntime, vruntime);
+	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running > 1) {
+		struct sched_entity *curr = cfs_rq->curr;
+		unsigned long load;
+
+		lag = se->vlag;
+
+		/*
+		 * If we want to place a task and preserve lag, we have to
+		 * consider the effect of the new entity on the weighted
+		 * average and compensate for this, otherwise lag can quickly
+		 * evaporate.
+		 *
+		 * Lag is defined as:
+		 *
+		 *   lag_i = S - s_i = w_i * (V - v_i)
+		 *
+		 * To avoid the 'w_i' term all over the place, we only track
+		 * the virtual lag:
+		 *
+		 *   vl_i = V - v_i <=> v_i = V - vl_i
+		 *
+		 * And we take V to be the weighted average of all v:
+		 *
+		 *   V = (\Sum w_j*v_j) / W
+		 *
+		 * Where W is: \Sum w_j
+		 *
+		 * Then, the weighted average after adding an entity with lag
+		 * vl_i is given by:
+		 *
+		 *   V' = (\Sum w_j*v_j + w_i*v_i) / (W + w_i)
+		 *      = (W*V + w_i*(V - vl_i)) / (W + w_i)
+		 *      = (W*V + w_i*V - w_i*vl_i) / (W + w_i)
+		 *      = (V*(W + w_i) - w_i*l) / (W + w_i)
+		 *      = V - w_i*vl_i / (W + w_i)
+		 *
+		 * And the actual lag after adding an entity with vl_i is:
+		 *
+		 *   vl'_i = V' - v_i
+		 *         = V - w_i*vl_i / (W + w_i) - (V - vl_i)
+		 *         = vl_i - w_i*vl_i / (W + w_i)
+		 *
+		 * Which is strictly less than vl_i. So in order to preserve lag
+		 * we should inflate the lag before placement such that the
+		 * effective lag after placement comes out right.
+		 *
+		 * As such, invert the above relation for vl'_i to get the vl_i
+		 * we need to use such that the lag after placement is the lag
+		 * we computed before dequeue.
+		 *
+		 *   vl'_i = vl_i - w_i*vl_i / (W + w_i)
+		 *         = ((W + w_i)*vl_i - w_i*vl_i) / (W + w_i)
+		 *
+		 *   (W + w_i)*vl'_i = (W + w_i)*vl_i - w_i*vl_i
+		 *                   = W*vl_i
+		 *
+		 *   vl_i = (W + w_i)*vl'_i / W
+		 */
+		load = cfs_rq->avg_load;
+		if (curr && curr->on_rq)
+			load += curr->load.weight;
+
+		lag *= load + se->load.weight;
+		if (WARN_ON_ONCE(!load))
+			load = 1;
+		lag = div_s64(lag, load);
+
+		vruntime -= lag;
+	}
+
+	if (sched_feat(FAIR_SLEEPERS)) {
+
+		/* sleeps up to a single latency don't count. */
+		if (!initial) {
+			unsigned long thresh;
+
+			if (se_is_idle(se))
+				thresh = sysctl_sched_min_granularity;
+			else
+				thresh = sysctl_sched_latency;
+
+			/*
+			 * Halve their sleep time's effect, to allow
+			 * for a gentler effect of sleepers:
+			 */
+			if (sched_feat(GENTLE_FAIR_SLEEPERS))
+				thresh >>= 1;
+
+			vruntime -= thresh;
+		}
+
+		/*
+		 * Pull vruntime of the entity being placed to the base level of
+		 * cfs_rq, to prevent boosting it if placed backwards.  If the entity
+		 * slept for a long time, don't even try to compare its vruntime with
+		 * the base as it may be too far off and the comparison may get
+		 * inversed due to s64 overflow.
+		 */
+		if (!entity_is_long_sleeper(se))
+			vruntime = max_vruntime(se->vruntime, vruntime);
+	}
+
+	se->vruntime = vruntime;
 }
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
@@ -5077,6 +5166,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	clear_buddies(cfs_rq, se);
 
+	if (flags & DEQUEUE_SLEEP)
+		update_entity_lag(cfs_rq, se);
+
 	if (se != cfs_rq->curr)
 		__dequeue_entity(cfs_rq, se);
 	se->on_rq = 0;
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index fa828b36533d..7958a10fe23b 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -1,11 +1,19 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+
 /*
  * Only give sleepers 50% of their service deficit. This allows
  * them to run sooner, but does not allow tons of sleepers to
  * rip the spread apart.
  */
+SCHED_FEAT(FAIR_SLEEPERS, false)
 SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
 
+/*
+ * Using the avg_vruntime, do the right thing and preserve lag across
+ * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
+ */
+SCHED_FEAT(PLACE_LAG, true)
+
 /*
  * Prefer to schedule the task we woke last (assuming it failed
  * wakeup-preemption), since its likely going to consume data we

From 99d4d26551b56f4e523dd04e4970b94aa796a64e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 13:58:43 +0200
Subject: [PATCH 0276/2047] rbtree: Add rb_add_augmented_cached() helper

While slightly sub-optimal, updating the augmented data while going
down the tree during lookup would be faster -- alas the augment
interface does not currently allow for that, provide a generic helper
to add a node to an augmented cached tree.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230531124603.862983648@infradead.org
---
 include/linux/rbtree_augmented.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 7ee7ed5de722..6dbc5a1bf6a8 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -60,6 +60,32 @@ rb_insert_augmented_cached(struct rb_node *node,
 	rb_insert_augmented(node, &root->rb_root, augment);
 }
 
+static __always_inline struct rb_node *
+rb_add_augmented_cached(struct rb_node *node, struct rb_root_cached *tree,
+			bool (*less)(struct rb_node *, const struct rb_node *),
+			const struct rb_augment_callbacks *augment)
+{
+	struct rb_node **link = &tree->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	bool leftmost = true;
+
+	while (*link) {
+		parent = *link;
+		if (less(node, parent)) {
+			link = &parent->rb_left;
+		} else {
+			link = &parent->rb_right;
+			leftmost = false;
+		}
+	}
+
+	rb_link_node(node, parent, link);
+	augment->propagate(parent, NULL); /* suboptimal */
+	rb_insert_augmented_cached(node, tree, leftmost, augment);
+
+	return leftmost ? node : NULL;
+}
+
 /*
  * Template for declaring augmented rbtree callbacks (generic case)
  *

From 147f3efaa24182a21706bca15eab2f3f4630b5fe Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 13:58:44 +0200
Subject: [PATCH 0277/2047] sched/fair: Implement an EEVDF-like scheduling
 policy

Where CFS is currently a WFQ based scheduler with only a single knob,
the weight. The addition of a second, latency oriented parameter,
makes something like WF2Q or EEVDF based a much better fit.

Specifically, EEVDF does EDF like scheduling in the left half of the
tree -- those entities that are owed service. Except because this is a
virtual time scheduler, the deadlines are in virtual time as well,
which is what allows over-subscription.

EEVDF has two parameters:

 - weight, or time-slope: which is mapped to nice just as before

 - request size, or slice length: which is used to compute
   the virtual deadline as: vd_i = ve_i + r_i/w_i

Basically, by setting a smaller slice, the deadline will be earlier
and the task will be more eligible and ran earlier.

Tick driven preemption is driven by request/slice completion; while
wakeup preemption is driven by the deadline.

Because the tree is now effectively an interval tree, and the
selection is no longer 'leftmost', over-scheduling is less of a
problem.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230531124603.931005524@infradead.org
---
 include/linux/sched.h   |   4 +
 kernel/sched/core.c     |   1 +
 kernel/sched/debug.c    |   6 +-
 kernel/sched/fair.c     | 338 ++++++++++++++++++++++++++++++++++------
 kernel/sched/features.h |   3 +
 kernel/sched/sched.h    |   4 +-
 6 files changed, 308 insertions(+), 48 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ba1828b2a6a5..177b3f3676ef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -549,6 +549,9 @@ struct sched_entity {
 	/* For load-balancing: */
 	struct load_weight		load;
 	struct rb_node			run_node;
+	u64				deadline;
+	u64				min_deadline;
+
 	struct list_head		group_node;
 	unsigned int			on_rq;
 
@@ -557,6 +560,7 @@ struct sched_entity {
 	u64				prev_sum_exec_runtime;
 	u64				vruntime;
 	s64				vlag;
+	u64				slice;
 
 	u64				nr_migrations;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84b0d47ed9b8..e85a2fd258e2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4502,6 +4502,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
 	p->se.vlag			= 0;
+	p->se.slice			= sysctl_sched_min_granularity;
 	INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index e48d2b2db7bc..18efc6d0cc5a 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -582,9 +582,13 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 	else
 		SEQ_printf(m, " %c", task_state_to_char(p));
 
-	SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ",
+	SEQ_printf(m, "%15s %5d %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld.%06ld %9Ld %5d ",
 		p->comm, task_pid_nr(p),
 		SPLIT_NS(p->se.vruntime),
+		entity_eligible(cfs_rq_of(&p->se), &p->se) ? 'E' : 'N',
+		SPLIT_NS(p->se.deadline),
+		SPLIT_NS(p->se.slice),
+		SPLIT_NS(p->se.sum_exec_runtime),
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dd12ada69b12..4d3505dba476 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -47,6 +47,7 @@
 #include <linux/psi.h>
 #include <linux/ratelimit.h>
 #include <linux/task_work.h>
+#include <linux/rbtree_augmented.h>
 
 #include <asm/switch_to.h>
 
@@ -347,6 +348,16 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight
 	return mul_u64_u32_shr(delta_exec, fact, shift);
 }
 
+/*
+ * delta /= w
+ */
+static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
+{
+	if (unlikely(se->load.weight != NICE_0_LOAD))
+		delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
+
+	return delta;
+}
 
 const struct sched_class fair_sched_class;
 
@@ -717,11 +728,62 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
 
 /*
  * lag_i = S - s_i = w_i * (V - v_i)
+ *
+ * However, since V is approximated by the weighted average of all entities it
+ * is possible -- by addition/removal/reweight to the tree -- to move V around
+ * and end up with a larger lag than we started with.
+ *
+ * Limit this to either double the slice length with a minimum of TICK_NSEC
+ * since that is the timing granularity.
+ *
+ * EEVDF gives the following limit for a steady state system:
+ *
+ *   -r_max < lag < max(r_max, q)
+ *
+ * XXX could add max_slice to the augmented data to track this.
  */
 void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+	s64 lag, limit;
+
 	SCHED_WARN_ON(!se->on_rq);
-	se->vlag = avg_vruntime(cfs_rq) - se->vruntime;
+	lag = avg_vruntime(cfs_rq) - se->vruntime;
+
+	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
+	se->vlag = clamp(lag, -limit, limit);
+}
+
+/*
+ * Entity is eligible once it received less service than it ought to have,
+ * eg. lag >= 0.
+ *
+ * lag_i = S - s_i = w_i*(V - v_i)
+ *
+ * lag_i >= 0 -> V >= v_i
+ *
+ *     \Sum (v_i - v)*w_i
+ * V = ------------------ + v
+ *          \Sum w_i
+ *
+ * lag_i >= 0 -> \Sum (v_i - v)*w_i >= (v_i - v)*(\Sum w_i)
+ *
+ * Note: using 'avg_vruntime() > se->vruntime' is inacurate due
+ *       to the loss in precision caused by the division.
+ */
+int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	struct sched_entity *curr = cfs_rq->curr;
+	s64 avg = cfs_rq->avg_vruntime;
+	long load = cfs_rq->avg_load;
+
+	if (curr && curr->on_rq) {
+		unsigned long weight = scale_load_down(curr->load.weight);
+
+		avg += entity_key(cfs_rq, curr) * weight;
+		load += weight;
+	}
+
+	return avg >= entity_key(cfs_rq, se) * load;
 }
 
 static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
@@ -740,8 +802,8 @@ static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
 
 static void update_min_vruntime(struct cfs_rq *cfs_rq)
 {
+	struct sched_entity *se = __pick_first_entity(cfs_rq);
 	struct sched_entity *curr = cfs_rq->curr;
-	struct rb_node *leftmost = rb_first_cached(&cfs_rq->tasks_timeline);
 
 	u64 vruntime = cfs_rq->min_vruntime;
 
@@ -752,9 +814,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
 			curr = NULL;
 	}
 
-	if (leftmost) { /* non-empty tree */
-		struct sched_entity *se = __node_2_se(leftmost);
-
+	if (se) {
 		if (!curr)
 			vruntime = se->vruntime;
 		else
@@ -771,18 +831,50 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
 	return entity_before(__node_2_se(a), __node_2_se(b));
 }
 
+#define deadline_gt(field, lse, rse) ({ (s64)((lse)->field - (rse)->field) > 0; })
+
+static inline void __update_min_deadline(struct sched_entity *se, struct rb_node *node)
+{
+	if (node) {
+		struct sched_entity *rse = __node_2_se(node);
+		if (deadline_gt(min_deadline, se, rse))
+			se->min_deadline = rse->min_deadline;
+	}
+}
+
+/*
+ * se->min_deadline = min(se->deadline, left->min_deadline, right->min_deadline)
+ */
+static inline bool min_deadline_update(struct sched_entity *se, bool exit)
+{
+	u64 old_min_deadline = se->min_deadline;
+	struct rb_node *node = &se->run_node;
+
+	se->min_deadline = se->deadline;
+	__update_min_deadline(se, node->rb_right);
+	__update_min_deadline(se, node->rb_left);
+
+	return se->min_deadline == old_min_deadline;
+}
+
+RB_DECLARE_CALLBACKS(static, min_deadline_cb, struct sched_entity,
+		     run_node, min_deadline, min_deadline_update);
+
 /*
  * Enqueue an entity into the rb-tree:
  */
 static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	avg_vruntime_add(cfs_rq, se);
-	rb_add_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less);
+	se->min_deadline = se->deadline;
+	rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
+				__entity_less, &min_deadline_cb);
 }
 
 static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline);
+	rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
+				  &min_deadline_cb);
 	avg_vruntime_sub(cfs_rq, se);
 }
 
@@ -806,6 +898,97 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se)
 	return __node_2_se(next);
 }
 
+static struct sched_entity *pick_cfs(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+{
+	struct sched_entity *left = __pick_first_entity(cfs_rq);
+
+	/*
+	 * If curr is set we have to see if its left of the leftmost entity
+	 * still in the tree, provided there was anything in the tree at all.
+	 */
+	if (!left || (curr && entity_before(curr, left)))
+		left = curr;
+
+	return left;
+}
+
+/*
+ * Earliest Eligible Virtual Deadline First
+ *
+ * In order to provide latency guarantees for different request sizes
+ * EEVDF selects the best runnable task from two criteria:
+ *
+ *  1) the task must be eligible (must be owed service)
+ *
+ *  2) from those tasks that meet 1), we select the one
+ *     with the earliest virtual deadline.
+ *
+ * We can do this in O(log n) time due to an augmented RB-tree. The
+ * tree keeps the entries sorted on service, but also functions as a
+ * heap based on the deadline by keeping:
+ *
+ *  se->min_deadline = min(se->deadline, se->{left,right}->min_deadline)
+ *
+ * Which allows an EDF like search on (sub)trees.
+ */
+static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
+{
+	struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
+	struct sched_entity *curr = cfs_rq->curr;
+	struct sched_entity *best = NULL;
+
+	if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
+		curr = NULL;
+
+	while (node) {
+		struct sched_entity *se = __node_2_se(node);
+
+		/*
+		 * If this entity is not eligible, try the left subtree.
+		 */
+		if (!entity_eligible(cfs_rq, se)) {
+			node = node->rb_left;
+			continue;
+		}
+
+		/*
+		 * If this entity has an earlier deadline than the previous
+		 * best, take this one. If it also has the earliest deadline
+		 * of its subtree, we're done.
+		 */
+		if (!best || deadline_gt(deadline, best, se)) {
+			best = se;
+			if (best->deadline == best->min_deadline)
+				break;
+		}
+
+		/*
+		 * If the earlest deadline in this subtree is in the fully
+		 * eligible left half of our space, go there.
+		 */
+		if (node->rb_left &&
+		    __node_2_se(node->rb_left)->min_deadline == se->min_deadline) {
+			node = node->rb_left;
+			continue;
+		}
+
+		node = node->rb_right;
+	}
+
+	if (!best || (curr && deadline_gt(deadline, best, curr)))
+		best = curr;
+
+	if (unlikely(!best)) {
+		struct sched_entity *left = __pick_first_entity(cfs_rq);
+		if (left) {
+			pr_err("EEVDF scheduling fail, picking leftmost\n");
+			return left;
+		}
+	}
+
+	return best;
+}
+
 #ifdef CONFIG_SCHED_DEBUG
 struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 {
@@ -839,17 +1022,6 @@ int sched_update_scaling(void)
 }
 #endif
 
-/*
- * delta /= w
- */
-static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
-{
-	if (unlikely(se->load.weight != NICE_0_LOAD))
-		delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
-
-	return delta;
-}
-
 /*
  * The idea is to set a period in which each task runs once.
  *
@@ -915,6 +1087,48 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	return slice;
 }
 
+static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
+
+/*
+ * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
+ * this is probably good enough.
+ */
+static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	if ((s64)(se->vruntime - se->deadline) < 0)
+		return;
+
+	if (sched_feat(EEVDF)) {
+		/*
+		 * For EEVDF the virtual time slope is determined by w_i (iow.
+		 * nice) while the request time r_i is determined by
+		 * sysctl_sched_min_granularity.
+		 */
+		se->slice = sysctl_sched_min_granularity;
+
+		/*
+		 * The task has consumed its request, reschedule.
+		 */
+		if (cfs_rq->nr_running > 1) {
+			resched_curr(rq_of(cfs_rq));
+			clear_buddies(cfs_rq, se);
+		}
+	} else {
+		/*
+		 * When many tasks blow up the sched_period; it is possible
+		 * that sched_slice() reports unusually large results (when
+		 * many tasks are very light for example). Therefore impose a
+		 * maximum.
+		 */
+		se->slice = min_t(u64, sched_slice(cfs_rq, se), sysctl_sched_latency);
+	}
+
+	/*
+	 * EEVDF: vd_i = ve_i + r_i / w_i
+	 */
+	se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
+}
+
 #include "pelt.h"
 #ifdef CONFIG_SMP
 
@@ -1047,6 +1261,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	schedstat_add(cfs_rq->exec_clock, delta_exec);
 
 	curr->vruntime += calc_delta_fair(delta_exec, curr);
+	update_deadline(cfs_rq, curr);
 	update_min_vruntime(cfs_rq);
 
 	if (entity_is_task(curr)) {
@@ -3521,6 +3736,14 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		 * we need to scale se->vlag when w_i changes.
 		 */
 		se->vlag = div_s64(se->vlag * old_weight, weight);
+	} else {
+		s64 deadline = se->deadline - se->vruntime;
+		/*
+		 * When the weight changes, the virtual time slope changes and
+		 * we should adjust the relative virtual deadline accordingly.
+		 */
+		deadline = div_s64(deadline * old_weight, weight);
+		se->deadline = se->vruntime + deadline;
 	}
 
 #ifdef CONFIG_SMP
@@ -4871,6 +5094,7 @@ static inline bool entity_is_long_sleeper(struct sched_entity *se)
 static void
 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 {
+	u64 vslice = calc_delta_fair(se->slice, se);
 	u64 vruntime = avg_vruntime(cfs_rq);
 	s64 lag = 0;
 
@@ -4942,9 +5166,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 		 */
 		load = cfs_rq->avg_load;
 		if (curr && curr->on_rq)
-			load += curr->load.weight;
+			load += scale_load_down(curr->load.weight);
 
-		lag *= load + se->load.weight;
+		lag *= load + scale_load_down(se->load.weight);
 		if (WARN_ON_ONCE(!load))
 			load = 1;
 		lag = div_s64(lag, load);
@@ -4985,6 +5209,19 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 	}
 
 	se->vruntime = vruntime;
+
+	/*
+	 * When joining the competition; the exisiting tasks will be,
+	 * on average, halfway through their slice, as such start tasks
+	 * off with half a slice to ease into the competition.
+	 */
+	if (sched_feat(PLACE_DEADLINE_INITIAL) && initial)
+		vslice /= 2;
+
+	/*
+	 * EEVDF: vd_i = ve_i + r_i/w_i
+	 */
+	se->deadline = se->vruntime + vslice;
 }
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
@@ -5207,19 +5444,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 static void
 check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
-	unsigned long ideal_runtime, delta_exec;
+	unsigned long delta_exec;
 	struct sched_entity *se;
 	s64 delta;
 
-	/*
-	 * When many tasks blow up the sched_period; it is possible that
-	 * sched_slice() reports unusually large results (when many tasks are
-	 * very light for example). Therefore impose a maximum.
-	 */
-	ideal_runtime = min_t(u64, sched_slice(cfs_rq, curr), sysctl_sched_latency);
-
 	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
-	if (delta_exec > ideal_runtime) {
+	if (delta_exec > curr->slice) {
 		resched_curr(rq_of(cfs_rq));
 		/*
 		 * The current task ran long enough, ensure it doesn't get
@@ -5243,7 +5473,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 	if (delta < 0)
 		return;
 
-	if (delta > ideal_runtime)
+	if (delta > curr->slice)
 		resched_curr(rq_of(cfs_rq));
 }
 
@@ -5298,17 +5528,20 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
 static struct sched_entity *
 pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
-	struct sched_entity *left = __pick_first_entity(cfs_rq);
-	struct sched_entity *se;
+	struct sched_entity *left, *se;
 
-	/*
-	 * If curr is set we have to see if its left of the leftmost entity
-	 * still in the tree, provided there was anything in the tree at all.
-	 */
-	if (!left || (curr && entity_before(curr, left)))
-		left = curr;
+	if (sched_feat(EEVDF)) {
+		/*
+		 * Enabling NEXT_BUDDY will affect latency but not fairness.
+		 */
+		if (sched_feat(NEXT_BUDDY) &&
+		    cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next))
+			return cfs_rq->next;
 
-	se = left; /* ideally we run the leftmost entity */
+		return pick_eevdf(cfs_rq);
+	}
+
+	se = left = pick_cfs(cfs_rq, curr);
 
 	/*
 	 * Avoid running the skip buddy, if running something else can
@@ -5401,7 +5634,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 		return;
 #endif
 
-	if (cfs_rq->nr_running > 1)
+	if (!sched_feat(EEVDF) && cfs_rq->nr_running > 1)
 		check_preempt_tick(cfs_rq, curr);
 }
 
@@ -6445,13 +6678,12 @@ static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
 static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
 {
 	struct sched_entity *se = &p->se;
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
 	SCHED_WARN_ON(task_rq(p) != rq);
 
 	if (rq->cfs.h_nr_running > 1) {
-		u64 slice = sched_slice(cfs_rq, se);
 		u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+		u64 slice = se->slice;
 		s64 delta = slice - ran;
 
 		if (delta < 0) {
@@ -8228,7 +8460,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	if (cse_is_idle != pse_is_idle)
 		return;
 
-	update_curr(cfs_rq_of(se));
+	cfs_rq = cfs_rq_of(se);
+	update_curr(cfs_rq);
+
+	if (sched_feat(EEVDF)) {
+		/*
+		 * XXX pick_eevdf(cfs_rq) != se ?
+		 */
+		if (pick_eevdf(cfs_rq) == pse)
+			goto preempt;
+
+		return;
+	}
+
 	if (wakeup_preempt_entity(se, pse) == 1) {
 		/*
 		 * Bias pick_next to pick the sched entity that is
@@ -8474,7 +8718,7 @@ static void yield_task_fair(struct rq *rq)
 
 	clear_buddies(cfs_rq, se);
 
-	if (curr->policy != SCHED_BATCH) {
+	if (sched_feat(EEVDF) || curr->policy != SCHED_BATCH) {
 		update_rq_clock(rq);
 		/*
 		 * Update run-time statistics of the 'current'.
@@ -8487,6 +8731,8 @@ static void yield_task_fair(struct rq *rq)
 		 */
 		rq_clock_skip_update(rq);
 	}
+	if (sched_feat(EEVDF))
+		se->deadline += calc_delta_fair(se->slice, se);
 
 	set_skip_buddy(se);
 }
@@ -12363,8 +12609,8 @@ static void rq_offline_fair(struct rq *rq)
 static inline bool
 __entity_slice_used(struct sched_entity *se, int min_nr_tasks)
 {
-	u64 slice = sched_slice(cfs_rq_of(se), se);
 	u64 rtime = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+	u64 slice = se->slice;
 
 	return (rtime * min_nr_tasks > slice);
 }
@@ -13059,7 +13305,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
 	 * idle runqueue:
 	 */
 	if (rq->cfs.load.weight)
-		rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se));
+		rr_interval = NS_TO_JIFFIES(se->slice);
 
 	return rr_interval;
 }
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 7958a10fe23b..60cce1e6f37b 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -13,6 +13,7 @@ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
  * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
  */
 SCHED_FEAT(PLACE_LAG, true)
+SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
 
 /*
  * Prefer to schedule the task we woke last (assuming it failed
@@ -103,3 +104,5 @@ SCHED_FEAT(LATENCY_WARN, false)
 
 SCHED_FEAT(ALT_PERIOD, true)
 SCHED_FEAT(BASE_SLICE, true)
+
+SCHED_FEAT(EEVDF, true)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 52a0a4bde193..aa5b293ca4ed 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2505,9 +2505,10 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
 extern const_debug unsigned int sysctl_sched_nr_migrate;
 extern const_debug unsigned int sysctl_sched_migration_cost;
 
+extern unsigned int sysctl_sched_min_granularity;
+
 #ifdef CONFIG_SCHED_DEBUG
 extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_idle_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern int sysctl_resched_latency_warn_ms;
@@ -3487,5 +3488,6 @@ static inline void init_sched_mm_cid(struct task_struct *t) { }
 #endif
 
 extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
+extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
 
 #endif /* _KERNEL_SCHED_SCHED_H */

From 76cae9dbe185b82aeb0640aa2b73da4a8e0088ce Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 13:58:45 +0200
Subject: [PATCH 0278/2047] sched/fair: Commit to lag based placement

Removes the FAIR_SLEEPERS code in favour of the new LAG based
placement.

Specifically, the whole FAIR_SLEEPER thing was a very crude
approximation to make up for the lack of lag based placement,
specifically the 'service owed' part. This is important for things
like 'starve' and 'hackbench'.

One side effect of FAIR_SLEEPER is that it caused 'small' unfairness,
specifically, by always ignoring up-to 'thresh' sleeptime it would
have a 50%/50% time distribution for a 50% sleeper vs a 100% runner,
while strictly speaking this should (of course) result in a 33%/67%
split (as CFS will also do if the sleep period exceeds 'thresh').

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230531124604.000198861@infradead.org
---
 kernel/sched/fair.c     | 59 +----------------------------------------
 kernel/sched/features.h |  8 ------
 2 files changed, 1 insertion(+), 66 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4d3505dba476..58798dae11b6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5068,29 +5068,6 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
 #endif
 }
 
-static inline bool entity_is_long_sleeper(struct sched_entity *se)
-{
-	struct cfs_rq *cfs_rq;
-	u64 sleep_time;
-
-	if (se->exec_start == 0)
-		return false;
-
-	cfs_rq = cfs_rq_of(se);
-
-	sleep_time = rq_clock_task(rq_of(cfs_rq));
-
-	/* Happen while migrating because of clock task divergence */
-	if (sleep_time <= se->exec_start)
-		return false;
-
-	sleep_time -= se->exec_start;
-	if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
-		return true;
-
-	return false;
-}
-
 static void
 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 {
@@ -5172,43 +5149,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 		if (WARN_ON_ONCE(!load))
 			load = 1;
 		lag = div_s64(lag, load);
-
-		vruntime -= lag;
 	}
 
-	if (sched_feat(FAIR_SLEEPERS)) {
-
-		/* sleeps up to a single latency don't count. */
-		if (!initial) {
-			unsigned long thresh;
-
-			if (se_is_idle(se))
-				thresh = sysctl_sched_min_granularity;
-			else
-				thresh = sysctl_sched_latency;
-
-			/*
-			 * Halve their sleep time's effect, to allow
-			 * for a gentler effect of sleepers:
-			 */
-			if (sched_feat(GENTLE_FAIR_SLEEPERS))
-				thresh >>= 1;
-
-			vruntime -= thresh;
-		}
-
-		/*
-		 * Pull vruntime of the entity being placed to the base level of
-		 * cfs_rq, to prevent boosting it if placed backwards.  If the entity
-		 * slept for a long time, don't even try to compare its vruntime with
-		 * the base as it may be too far off and the comparison may get
-		 * inversed due to s64 overflow.
-		 */
-		if (!entity_is_long_sleeper(se))
-			vruntime = max_vruntime(se->vruntime, vruntime);
-	}
-
-	se->vruntime = vruntime;
+	se->vruntime = vruntime - lag;
 
 	/*
 	 * When joining the competition; the exisiting tasks will be,
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 60cce1e6f37b..2a830eccda3e 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -1,13 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
-/*
- * Only give sleepers 50% of their service deficit. This allows
- * them to run sooner, but does not allow tons of sleepers to
- * rip the spread apart.
- */
-SCHED_FEAT(FAIR_SLEEPERS, false)
-SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
-
 /*
  * Using the avg_vruntime, do the right thing and preserve lag across
  * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.

From e8f331bcc270354a803c2127c486190d33eac441 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 13:58:46 +0200
Subject: [PATCH 0279/2047] sched/smp: Use lag to simplify cross-runqueue
 placement

Using lag is both more correct and simpler when moving between
runqueues.

Notable, min_vruntime() was invented as a cheap approximation of
avg_vruntime() for this very purpose (SMP migration). Since we now
have the real thing; use it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230531124604.068911180@infradead.org
---
 kernel/sched/fair.c | 145 ++++++--------------------------------------
 1 file changed, 19 insertions(+), 126 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 58798dae11b6..57e8bc14b06e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5083,7 +5083,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 	 *
 	 * EEVDF: placement strategy #1 / #2
 	 */
-	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running > 1) {
+	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
 		struct sched_entity *curr = cfs_rq->curr;
 		unsigned long load;
 
@@ -5172,60 +5172,20 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
 
 static inline bool cfs_bandwidth_used(void);
 
-/*
- * MIGRATION
- *
- *	dequeue
- *	  update_curr()
- *	    update_min_vruntime()
- *	  vruntime -= min_vruntime
- *
- *	enqueue
- *	  update_curr()
- *	    update_min_vruntime()
- *	  vruntime += min_vruntime
- *
- * this way the vruntime transition between RQs is done when both
- * min_vruntime are up-to-date.
- *
- * WAKEUP (remote)
- *
- *	->migrate_task_rq_fair() (p->state == TASK_WAKING)
- *	  vruntime -= min_vruntime
- *
- *	enqueue
- *	  update_curr()
- *	    update_min_vruntime()
- *	  vruntime += min_vruntime
- *
- * this way we don't have the most up-to-date min_vruntime on the originating
- * CPU and an up-to-date min_vruntime on the destination CPU.
- */
-
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-	bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
 	bool curr = cfs_rq->curr == se;
 
 	/*
 	 * If we're the current task, we must renormalise before calling
 	 * update_curr().
 	 */
-	if (renorm && curr)
-		se->vruntime += cfs_rq->min_vruntime;
+	if (curr)
+		place_entity(cfs_rq, se, 0);
 
 	update_curr(cfs_rq);
 
-	/*
-	 * Otherwise, renormalise after, such that we're placed at the current
-	 * moment in time, instead of some random moment in the past. Being
-	 * placed in the past could significantly boost this task to the
-	 * fairness detriment of existing tasks.
-	 */
-	if (renorm && !curr)
-		se->vruntime += cfs_rq->min_vruntime;
-
 	/*
 	 * When enqueuing a sched_entity, we must:
 	 *   - Update loads to have both entity and cfs_rq synced with now.
@@ -5237,11 +5197,22 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 */
 	update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
 	se_update_runnable(se);
+	/*
+	 * XXX update_load_avg() above will have attached us to the pelt sum;
+	 * but update_cfs_group() here will re-adjust the weight and have to
+	 * undo/redo all that. Seems wasteful.
+	 */
 	update_cfs_group(se);
+
+	/*
+	 * XXX now that the entity has been re-weighted, and it's lag adjusted,
+	 * we can place the entity.
+	 */
+	if (!curr)
+		place_entity(cfs_rq, se, 0);
+
 	account_entity_enqueue(cfs_rq, se);
 
-	if (flags & ENQUEUE_WAKEUP)
-		place_entity(cfs_rq, se, 0);
 	/* Entity has migrated, no longer consider this task hot */
 	if (flags & ENQUEUE_MIGRATED)
 		se->exec_start = 0;
@@ -5346,23 +5317,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	clear_buddies(cfs_rq, se);
 
-	if (flags & DEQUEUE_SLEEP)
-		update_entity_lag(cfs_rq, se);
-
+	update_entity_lag(cfs_rq, se);
 	if (se != cfs_rq->curr)
 		__dequeue_entity(cfs_rq, se);
 	se->on_rq = 0;
 	account_entity_dequeue(cfs_rq, se);
 
-	/*
-	 * Normalize after update_curr(); which will also have moved
-	 * min_vruntime if @se is the one holding it back. But before doing
-	 * update_min_vruntime() again, which will discount @se's position and
-	 * can move min_vruntime forward still more.
-	 */
-	if (!(flags & DEQUEUE_SLEEP))
-		se->vruntime -= cfs_rq->min_vruntime;
-
 	/* return excess runtime on last dequeue */
 	return_cfs_rq_runtime(cfs_rq);
 
@@ -8208,18 +8168,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
 {
 	struct sched_entity *se = &p->se;
 
-	/*
-	 * As blocked tasks retain absolute vruntime the migration needs to
-	 * deal with this by subtracting the old and adding the new
-	 * min_vruntime -- the latter is done by enqueue_entity() when placing
-	 * the task on the new runqueue.
-	 */
-	if (READ_ONCE(p->__state) == TASK_WAKING) {
-		struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
-		se->vruntime -= u64_u32_load(cfs_rq->min_vruntime);
-	}
-
 	if (!task_on_rq_migrating(p)) {
 		remove_entity_load_avg(se);
 
@@ -12709,8 +12657,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
  */
 static void task_fork_fair(struct task_struct *p)
 {
-	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se, *curr;
+	struct cfs_rq *cfs_rq;
 	struct rq *rq = this_rq();
 	struct rq_flags rf;
 
@@ -12719,22 +12667,9 @@ static void task_fork_fair(struct task_struct *p)
 
 	cfs_rq = task_cfs_rq(current);
 	curr = cfs_rq->curr;
-	if (curr) {
+	if (curr)
 		update_curr(cfs_rq);
-		se->vruntime = curr->vruntime;
-	}
 	place_entity(cfs_rq, se, 1);
-
-	if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
-		/*
-		 * Upon rescheduling, sched_class::put_prev_task() will place
-		 * 'current' within the tree based on its new key value.
-		 */
-		swap(curr->vruntime, se->vruntime);
-		resched_curr(rq);
-	}
-
-	se->vruntime -= cfs_rq->min_vruntime;
 	rq_unlock(rq, &rf);
 }
 
@@ -12763,34 +12698,6 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
 		check_preempt_curr(rq, p, 0);
 }
 
-static inline bool vruntime_normalized(struct task_struct *p)
-{
-	struct sched_entity *se = &p->se;
-
-	/*
-	 * In both the TASK_ON_RQ_QUEUED and TASK_ON_RQ_MIGRATING cases,
-	 * the dequeue_entity(.flags=0) will already have normalized the
-	 * vruntime.
-	 */
-	if (p->on_rq)
-		return true;
-
-	/*
-	 * When !on_rq, vruntime of the task has usually NOT been normalized.
-	 * But there are some cases where it has already been normalized:
-	 *
-	 * - A forked child which is waiting for being woken up by
-	 *   wake_up_new_task().
-	 * - A task which has been woken up by try_to_wake_up() and
-	 *   waiting for actually being woken up by sched_ttwu_pending().
-	 */
-	if (!se->sum_exec_runtime ||
-	    (READ_ONCE(p->__state) == TASK_WAKING && p->sched_remote_wakeup))
-		return true;
-
-	return false;
-}
-
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
  * Propagate the changes of the sched_entity across the tg tree to make it
@@ -12861,16 +12768,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
 static void detach_task_cfs_rq(struct task_struct *p)
 {
 	struct sched_entity *se = &p->se;
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
-	if (!vruntime_normalized(p)) {
-		/*
-		 * Fix up our vruntime so that the current sleep doesn't
-		 * cause 'unlimited' sleep bonus.
-		 */
-		place_entity(cfs_rq, se, 0);
-		se->vruntime -= cfs_rq->min_vruntime;
-	}
 
 	detach_entity_cfs_rq(se);
 }
@@ -12878,12 +12775,8 @@ static void detach_task_cfs_rq(struct task_struct *p)
 static void attach_task_cfs_rq(struct task_struct *p)
 {
 	struct sched_entity *se = &p->se;
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
 	attach_entity_cfs_rq(se);
-
-	if (!vruntime_normalized(p))
-		se->vruntime += cfs_rq->min_vruntime;
 }
 
 static void switched_from_fair(struct rq *rq, struct task_struct *p)

From 5e963f2bd4654a202a8a05aa3a86cb0300b10e6c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 13:58:47 +0200
Subject: [PATCH 0280/2047] sched/fair: Commit to EEVDF

EEVDF is a better defined scheduling policy, as a result it has less
heuristics/tunables. There is no compelling reason to keep CFS around.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230531124604.137187212@infradead.org
---
 kernel/sched/debug.c    |   6 -
 kernel/sched/fair.c     | 465 ++++------------------------------------
 kernel/sched/features.h |  12 --
 kernel/sched/sched.h    |   5 -
 4 files changed, 38 insertions(+), 450 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 18efc6d0cc5a..f8d190c7c8c0 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -347,10 +347,7 @@ static __init int sched_init_debug(void)
 	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
 #endif
 
-	debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency);
 	debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity);
-	debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity);
-	debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched, &sysctl_sched_wakeup_granularity);
 
 	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
 	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
@@ -866,10 +863,7 @@ static void sched_debug_header(struct seq_file *m)
 	SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
 #define PN(x) \
 	SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
-	PN(sysctl_sched_latency);
 	PN(sysctl_sched_min_granularity);
-	PN(sysctl_sched_idle_min_granularity);
-	PN(sysctl_sched_wakeup_granularity);
 	P(sysctl_sched_child_runs_first);
 	P(sysctl_sched_features);
 #undef PN
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 57e8bc14b06e..0605eb45c58a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -57,22 +57,6 @@
 #include "stats.h"
 #include "autogroup.h"
 
-/*
- * Targeted preemption latency for CPU-bound tasks:
- *
- * NOTE: this latency value is not the same as the concept of
- * 'timeslice length' - timeslices in CFS are of variable length
- * and have no persistent notion like in traditional, time-slice
- * based scheduling concepts.
- *
- * (to see the precise effective timeslice length of your workload,
- *  run vmstat and monitor the context-switches (cs) field)
- *
- * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
- */
-unsigned int sysctl_sched_latency			= 6000000ULL;
-static unsigned int normalized_sysctl_sched_latency	= 6000000ULL;
-
 /*
  * The initial- and re-scaling of tunables is configurable
  *
@@ -94,37 +78,12 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
 unsigned int sysctl_sched_min_granularity			= 750000ULL;
 static unsigned int normalized_sysctl_sched_min_granularity	= 750000ULL;
 
-/*
- * Minimal preemption granularity for CPU-bound SCHED_IDLE tasks.
- * Applies only when SCHED_IDLE tasks compete with normal tasks.
- *
- * (default: 0.75 msec)
- */
-unsigned int sysctl_sched_idle_min_granularity			= 750000ULL;
-
-/*
- * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
- */
-static unsigned int sched_nr_latency = 8;
-
 /*
  * After fork, child runs first. If set to 0 (default) then
  * parent will (try to) run first.
  */
 unsigned int sysctl_sched_child_runs_first __read_mostly;
 
-/*
- * SCHED_OTHER wake-up granularity.
- *
- * This option delays the preemption effects of decoupled workloads
- * and reduces their over-scheduling. Synchronous workloads will still
- * have immediate wakeup/sleep latencies.
- *
- * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
- */
-unsigned int sysctl_sched_wakeup_granularity			= 1000000UL;
-static unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
-
 const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
 
 int sched_thermal_decay_shift;
@@ -279,8 +238,6 @@ static void update_sysctl(void)
 #define SET_SYSCTL(name) \
 	(sysctl_##name = (factor) * normalized_sysctl_##name)
 	SET_SYSCTL(sched_min_granularity);
-	SET_SYSCTL(sched_latency);
-	SET_SYSCTL(sched_wakeup_granularity);
 #undef SET_SYSCTL
 }
 
@@ -888,30 +845,6 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
 	return __node_2_se(left);
 }
 
-static struct sched_entity *__pick_next_entity(struct sched_entity *se)
-{
-	struct rb_node *next = rb_next(&se->run_node);
-
-	if (!next)
-		return NULL;
-
-	return __node_2_se(next);
-}
-
-static struct sched_entity *pick_cfs(struct cfs_rq *cfs_rq, struct sched_entity *curr)
-{
-	struct sched_entity *left = __pick_first_entity(cfs_rq);
-
-	/*
-	 * If curr is set we have to see if its left of the leftmost entity
-	 * still in the tree, provided there was anything in the tree at all.
-	 */
-	if (!left || (curr && entity_before(curr, left)))
-		left = curr;
-
-	return left;
-}
-
 /*
  * Earliest Eligible Virtual Deadline First
  *
@@ -1008,85 +941,15 @@ int sched_update_scaling(void)
 {
 	unsigned int factor = get_update_sysctl_factor();
 
-	sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
-					sysctl_sched_min_granularity);
-
 #define WRT_SYSCTL(name) \
 	(normalized_sysctl_##name = sysctl_##name / (factor))
 	WRT_SYSCTL(sched_min_granularity);
-	WRT_SYSCTL(sched_latency);
-	WRT_SYSCTL(sched_wakeup_granularity);
 #undef WRT_SYSCTL
 
 	return 0;
 }
 #endif
 
-/*
- * The idea is to set a period in which each task runs once.
- *
- * When there are too many tasks (sched_nr_latency) we have to stretch
- * this period because otherwise the slices get too small.
- *
- * p = (nr <= nl) ? l : l*nr/nl
- */
-static u64 __sched_period(unsigned long nr_running)
-{
-	if (unlikely(nr_running > sched_nr_latency))
-		return nr_running * sysctl_sched_min_granularity;
-	else
-		return sysctl_sched_latency;
-}
-
-static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq);
-
-/*
- * We calculate the wall-time slice from the period by taking a part
- * proportional to the weight.
- *
- * s = p*P[w/rw]
- */
-static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-	unsigned int nr_running = cfs_rq->nr_running;
-	struct sched_entity *init_se = se;
-	unsigned int min_gran;
-	u64 slice;
-
-	if (sched_feat(ALT_PERIOD))
-		nr_running = rq_of(cfs_rq)->cfs.h_nr_running;
-
-	slice = __sched_period(nr_running + !se->on_rq);
-
-	for_each_sched_entity(se) {
-		struct load_weight *load;
-		struct load_weight lw;
-		struct cfs_rq *qcfs_rq;
-
-		qcfs_rq = cfs_rq_of(se);
-		load = &qcfs_rq->load;
-
-		if (unlikely(!se->on_rq)) {
-			lw = qcfs_rq->load;
-
-			update_load_add(&lw, se->load.weight);
-			load = &lw;
-		}
-		slice = __calc_delta(slice, se->load.weight, load);
-	}
-
-	if (sched_feat(BASE_SLICE)) {
-		if (se_is_idle(init_se) && !sched_idle_cfs_rq(cfs_rq))
-			min_gran = sysctl_sched_idle_min_granularity;
-		else
-			min_gran = sysctl_sched_min_granularity;
-
-		slice = max_t(u64, slice, min_gran);
-	}
-
-	return slice;
-}
-
 static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
 
 /*
@@ -1098,35 +961,25 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	if ((s64)(se->vruntime - se->deadline) < 0)
 		return;
 
-	if (sched_feat(EEVDF)) {
-		/*
-		 * For EEVDF the virtual time slope is determined by w_i (iow.
-		 * nice) while the request time r_i is determined by
-		 * sysctl_sched_min_granularity.
-		 */
-		se->slice = sysctl_sched_min_granularity;
-
-		/*
-		 * The task has consumed its request, reschedule.
-		 */
-		if (cfs_rq->nr_running > 1) {
-			resched_curr(rq_of(cfs_rq));
-			clear_buddies(cfs_rq, se);
-		}
-	} else {
-		/*
-		 * When many tasks blow up the sched_period; it is possible
-		 * that sched_slice() reports unusually large results (when
-		 * many tasks are very light for example). Therefore impose a
-		 * maximum.
-		 */
-		se->slice = min_t(u64, sched_slice(cfs_rq, se), sysctl_sched_latency);
-	}
+	/*
+	 * For EEVDF the virtual time slope is determined by w_i (iow.
+	 * nice) while the request time r_i is determined by
+	 * sysctl_sched_min_granularity.
+	 */
+	se->slice = sysctl_sched_min_granularity;
 
 	/*
 	 * EEVDF: vd_i = ve_i + r_i / w_i
 	 */
 	se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
+
+	/*
+	 * The task has consumed its request, reschedule.
+	 */
+	if (cfs_rq->nr_running > 1) {
+		resched_curr(rq_of(cfs_rq));
+		clear_buddies(cfs_rq, se);
+	}
 }
 
 #include "pelt.h"
@@ -5055,19 +4908,6 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
 
 #endif /* CONFIG_SMP */
 
-static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-#ifdef CONFIG_SCHED_DEBUG
-	s64 d = se->vruntime - cfs_rq->min_vruntime;
-
-	if (d < 0)
-		d = -d;
-
-	if (d > 3*sysctl_sched_latency)
-		schedstat_inc(cfs_rq->nr_spread_over);
-#endif
-}
-
 static void
 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 {
@@ -5219,7 +5059,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	check_schedstat_required();
 	update_stats_enqueue_fair(cfs_rq, se, flags);
-	check_spread(cfs_rq, se);
 	if (!curr)
 		__enqueue_entity(cfs_rq, se);
 	se->on_rq = 1;
@@ -5241,17 +5080,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	}
 }
 
-static void __clear_buddies_last(struct sched_entity *se)
-{
-	for_each_sched_entity(se) {
-		struct cfs_rq *cfs_rq = cfs_rq_of(se);
-		if (cfs_rq->last != se)
-			break;
-
-		cfs_rq->last = NULL;
-	}
-}
-
 static void __clear_buddies_next(struct sched_entity *se)
 {
 	for_each_sched_entity(se) {
@@ -5263,27 +5091,10 @@ static void __clear_buddies_next(struct sched_entity *se)
 	}
 }
 
-static void __clear_buddies_skip(struct sched_entity *se)
-{
-	for_each_sched_entity(se) {
-		struct cfs_rq *cfs_rq = cfs_rq_of(se);
-		if (cfs_rq->skip != se)
-			break;
-
-		cfs_rq->skip = NULL;
-	}
-}
-
 static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	if (cfs_rq->last == se)
-		__clear_buddies_last(se);
-
 	if (cfs_rq->next == se)
 		__clear_buddies_next(se);
-
-	if (cfs_rq->skip == se)
-		__clear_buddies_skip(se);
 }
 
 static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
@@ -5341,45 +5152,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		update_idle_cfs_rq_clock_pelt(cfs_rq);
 }
 
-/*
- * Preempt the current task with a newly woken task if needed:
- */
-static void
-check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
-{
-	unsigned long delta_exec;
-	struct sched_entity *se;
-	s64 delta;
-
-	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
-	if (delta_exec > curr->slice) {
-		resched_curr(rq_of(cfs_rq));
-		/*
-		 * The current task ran long enough, ensure it doesn't get
-		 * re-elected due to buddy favours.
-		 */
-		clear_buddies(cfs_rq, curr);
-		return;
-	}
-
-	/*
-	 * Ensure that a task that missed wakeup preemption by a
-	 * narrow margin doesn't have to wait for a full slice.
-	 * This also mitigates buddy induced latencies under load.
-	 */
-	if (delta_exec < sysctl_sched_min_granularity)
-		return;
-
-	se = __pick_first_entity(cfs_rq);
-	delta = curr->vruntime - se->vruntime;
-
-	if (delta < 0)
-		return;
-
-	if (delta > curr->slice)
-		resched_curr(rq_of(cfs_rq));
-}
-
 static void
 set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
@@ -5418,9 +5190,6 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	se->prev_sum_exec_runtime = se->sum_exec_runtime;
 }
 
-static int
-wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
-
 /*
  * Pick the next process, keeping these things in mind, in this order:
  * 1) keep things fair between processes/task groups
@@ -5431,53 +5200,14 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
 static struct sched_entity *
 pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
-	struct sched_entity *left, *se;
-
-	if (sched_feat(EEVDF)) {
-		/*
-		 * Enabling NEXT_BUDDY will affect latency but not fairness.
-		 */
-		if (sched_feat(NEXT_BUDDY) &&
-		    cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next))
-			return cfs_rq->next;
-
-		return pick_eevdf(cfs_rq);
-	}
-
-	se = left = pick_cfs(cfs_rq, curr);
-
 	/*
-	 * Avoid running the skip buddy, if running something else can
-	 * be done without getting too unfair.
+	 * Enabling NEXT_BUDDY will affect latency but not fairness.
 	 */
-	if (cfs_rq->skip && cfs_rq->skip == se) {
-		struct sched_entity *second;
+	if (sched_feat(NEXT_BUDDY) &&
+	    cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next))
+		return cfs_rq->next;
 
-		if (se == curr) {
-			second = __pick_first_entity(cfs_rq);
-		} else {
-			second = __pick_next_entity(se);
-			if (!second || (curr && entity_before(curr, second)))
-				second = curr;
-		}
-
-		if (second && wakeup_preempt_entity(second, left) < 1)
-			se = second;
-	}
-
-	if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) {
-		/*
-		 * Someone really wants this to run. If it's not unfair, run it.
-		 */
-		se = cfs_rq->next;
-	} else if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) {
-		/*
-		 * Prefer last buddy, try to return the CPU to a preempted task.
-		 */
-		se = cfs_rq->last;
-	}
-
-	return se;
+	return pick_eevdf(cfs_rq);
 }
 
 static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
@@ -5494,8 +5224,6 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 	/* throttle cfs_rqs exceeding runtime */
 	check_cfs_rq_runtime(cfs_rq);
 
-	check_spread(cfs_rq, prev);
-
 	if (prev->on_rq) {
 		update_stats_wait_start_fair(cfs_rq, prev);
 		/* Put 'current' back into the tree. */
@@ -5536,9 +5264,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
 		return;
 #endif
-
-	if (!sched_feat(EEVDF) && cfs_rq->nr_running > 1)
-		check_preempt_tick(cfs_rq, curr);
 }
 
 
@@ -6610,8 +6335,7 @@ static void hrtick_update(struct rq *rq)
 	if (!hrtick_enabled_fair(rq) || curr->sched_class != &fair_sched_class)
 		return;
 
-	if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
-		hrtick_start_fair(rq, curr);
+	hrtick_start_fair(rq, curr);
 }
 #else /* !CONFIG_SCHED_HRTICK */
 static inline void
@@ -6652,17 +6376,6 @@ static int sched_idle_rq(struct rq *rq)
 			rq->nr_running);
 }
 
-/*
- * Returns true if cfs_rq only has SCHED_IDLE entities enqueued. Note the use
- * of idle_nr_running, which does not consider idle descendants of normal
- * entities.
- */
-static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq)
-{
-	return cfs_rq->nr_running &&
-		cfs_rq->nr_running == cfs_rq->idle_nr_running;
-}
-
 #ifdef CONFIG_SMP
 static int sched_idle_cpu(int cpu)
 {
@@ -8205,66 +7918,6 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 }
 #endif /* CONFIG_SMP */
 
-static unsigned long wakeup_gran(struct sched_entity *se)
-{
-	unsigned long gran = sysctl_sched_wakeup_granularity;
-
-	/*
-	 * Since its curr running now, convert the gran from real-time
-	 * to virtual-time in his units.
-	 *
-	 * By using 'se' instead of 'curr' we penalize light tasks, so
-	 * they get preempted easier. That is, if 'se' < 'curr' then
-	 * the resulting gran will be larger, therefore penalizing the
-	 * lighter, if otoh 'se' > 'curr' then the resulting gran will
-	 * be smaller, again penalizing the lighter task.
-	 *
-	 * This is especially important for buddies when the leftmost
-	 * task is higher priority than the buddy.
-	 */
-	return calc_delta_fair(gran, se);
-}
-
-/*
- * Should 'se' preempt 'curr'.
- *
- *             |s1
- *        |s2
- *   |s3
- *         g
- *      |<--->|c
- *
- *  w(c, s1) = -1
- *  w(c, s2) =  0
- *  w(c, s3) =  1
- *
- */
-static int
-wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
-{
-	s64 gran, vdiff = curr->vruntime - se->vruntime;
-
-	if (vdiff <= 0)
-		return -1;
-
-	gran = wakeup_gran(se);
-	if (vdiff > gran)
-		return 1;
-
-	return 0;
-}
-
-static void set_last_buddy(struct sched_entity *se)
-{
-	for_each_sched_entity(se) {
-		if (SCHED_WARN_ON(!se->on_rq))
-			return;
-		if (se_is_idle(se))
-			return;
-		cfs_rq_of(se)->last = se;
-	}
-}
-
 static void set_next_buddy(struct sched_entity *se)
 {
 	for_each_sched_entity(se) {
@@ -8276,12 +7929,6 @@ static void set_next_buddy(struct sched_entity *se)
 	}
 }
 
-static void set_skip_buddy(struct sched_entity *se)
-{
-	for_each_sched_entity(se)
-		cfs_rq_of(se)->skip = se;
-}
-
 /*
  * Preempt the current task with a newly woken task if needed:
  */
@@ -8290,7 +7937,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	struct task_struct *curr = rq->curr;
 	struct sched_entity *se = &curr->se, *pse = &p->se;
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
-	int scale = cfs_rq->nr_running >= sched_nr_latency;
 	int next_buddy_marked = 0;
 	int cse_is_idle, pse_is_idle;
 
@@ -8306,7 +7952,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
 		return;
 
-	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
+	if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) {
 		set_next_buddy(pse);
 		next_buddy_marked = 1;
 	}
@@ -8354,44 +8000,16 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	cfs_rq = cfs_rq_of(se);
 	update_curr(cfs_rq);
 
-	if (sched_feat(EEVDF)) {
-		/*
-		 * XXX pick_eevdf(cfs_rq) != se ?
-		 */
-		if (pick_eevdf(cfs_rq) == pse)
-			goto preempt;
-
-		return;
-	}
-
-	if (wakeup_preempt_entity(se, pse) == 1) {
-		/*
-		 * Bias pick_next to pick the sched entity that is
-		 * triggering this preemption.
-		 */
-		if (!next_buddy_marked)
-			set_next_buddy(pse);
+	/*
+	 * XXX pick_eevdf(cfs_rq) != se ?
+	 */
+	if (pick_eevdf(cfs_rq) == pse)
 		goto preempt;
-	}
 
 	return;
 
 preempt:
 	resched_curr(rq);
-	/*
-	 * Only set the backward buddy when the current task is still
-	 * on the rq. This can happen when a wakeup gets interleaved
-	 * with schedule on the ->pre_schedule() or idle_balance()
-	 * point, either of which can * drop the rq lock.
-	 *
-	 * Also, during early boot the idle thread is in the fair class,
-	 * for obvious reasons its a bad idea to schedule back to it.
-	 */
-	if (unlikely(!se->on_rq || curr == rq->idle))
-		return;
-
-	if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
-		set_last_buddy(se);
 }
 
 #ifdef CONFIG_SMP
@@ -8592,8 +8210,6 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
 
 /*
  * sched_yield() is very simple
- *
- * The magic of dealing with the ->skip buddy is in pick_next_entity.
  */
 static void yield_task_fair(struct rq *rq)
 {
@@ -8609,23 +8225,19 @@ static void yield_task_fair(struct rq *rq)
 
 	clear_buddies(cfs_rq, se);
 
-	if (sched_feat(EEVDF) || curr->policy != SCHED_BATCH) {
-		update_rq_clock(rq);
-		/*
-		 * Update run-time statistics of the 'current'.
-		 */
-		update_curr(cfs_rq);
-		/*
-		 * Tell update_rq_clock() that we've just updated,
-		 * so we don't do microscopic update in schedule()
-		 * and double the fastpath cost.
-		 */
-		rq_clock_skip_update(rq);
-	}
-	if (sched_feat(EEVDF))
-		se->deadline += calc_delta_fair(se->slice, se);
+	update_rq_clock(rq);
+	/*
+	 * Update run-time statistics of the 'current'.
+	 */
+	update_curr(cfs_rq);
+	/*
+	 * Tell update_rq_clock() that we've just updated,
+	 * so we don't do microscopic update in schedule()
+	 * and double the fastpath cost.
+	 */
+	rq_clock_skip_update(rq);
 
-	set_skip_buddy(se);
+	se->deadline += calc_delta_fair(se->slice, se);
 }
 
 static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
@@ -8873,8 +8485,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
 	 * Buddy candidates are cache hot:
 	 */
 	if (sched_feat(CACHE_HOT_BUDDY) && env->dst_rq->nr_running &&
-			(&p->se == cfs_rq_of(&p->se)->next ||
-			 &p->se == cfs_rq_of(&p->se)->last))
+	    (&p->se == cfs_rq_of(&p->se)->next))
 		return 1;
 
 	if (sysctl_sched_migration_cost == -1)
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 2a830eccda3e..54334ca5c5c6 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -14,13 +14,6 @@ SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
  */
 SCHED_FEAT(NEXT_BUDDY, false)
 
-/*
- * Prefer to schedule the task that ran last (when we did
- * wake-preempt) as that likely will touch the same data, increases
- * cache locality.
- */
-SCHED_FEAT(LAST_BUDDY, true)
-
 /*
  * Consider buddies to be cache hot, decreases the likeliness of a
  * cache buddy being migrated away, increases cache locality.
@@ -93,8 +86,3 @@ SCHED_FEAT(UTIL_EST, true)
 SCHED_FEAT(UTIL_EST_FASTUP, true)
 
 SCHED_FEAT(LATENCY_WARN, false)
-
-SCHED_FEAT(ALT_PERIOD, true)
-SCHED_FEAT(BASE_SLICE, true)
-
-SCHED_FEAT(EEVDF, true)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index aa5b293ca4ed..f814bb731235 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -570,8 +570,6 @@ struct cfs_rq {
 	 */
 	struct sched_entity	*curr;
 	struct sched_entity	*next;
-	struct sched_entity	*last;
-	struct sched_entity	*skip;
 
 #ifdef	CONFIG_SCHED_DEBUG
 	unsigned int		nr_spread_over;
@@ -2508,9 +2506,6 @@ extern const_debug unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_min_granularity;
 
 #ifdef CONFIG_SCHED_DEBUG
-extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_idle_min_granularity;
-extern unsigned int sysctl_sched_wakeup_granularity;
 extern int sysctl_resched_latency_warn_ms;
 extern int sysctl_resched_latency_warn_once;
 

From e4ec3318a17f5dcf11bc23b2d2c1da4c1c5bb507 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 13:58:48 +0200
Subject: [PATCH 0281/2047] sched/debug: Rename sysctl_sched_min_granularity to
 sysctl_sched_base_slice

EEVDF uses this tunable as the base request/slice -- make sure the
name reflects this.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230531124604.205287511@infradead.org
---
 kernel/sched/core.c  |  2 +-
 kernel/sched/debug.c |  4 ++--
 kernel/sched/fair.c  | 12 ++++++------
 kernel/sched/sched.h |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e85a2fd258e2..a5d3422f7d0d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4502,7 +4502,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
 	p->se.vlag			= 0;
-	p->se.slice			= sysctl_sched_min_granularity;
+	p->se.slice			= sysctl_sched_base_slice;
 	INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index f8d190c7c8c0..4c3d0d9f3db6 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -347,7 +347,7 @@ static __init int sched_init_debug(void)
 	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
 #endif
 
-	debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity);
+	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
 
 	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
 	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
@@ -863,7 +863,7 @@ static void sched_debug_header(struct seq_file *m)
 	SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
 #define PN(x) \
 	SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
-	PN(sysctl_sched_min_granularity);
+	PN(sysctl_sched_base_slice);
 	P(sysctl_sched_child_runs_first);
 	P(sysctl_sched_features);
 #undef PN
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0605eb45c58a..61747a25d06d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -75,8 +75,8 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
  *
  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
  */
-unsigned int sysctl_sched_min_granularity			= 750000ULL;
-static unsigned int normalized_sysctl_sched_min_granularity	= 750000ULL;
+unsigned int sysctl_sched_base_slice			= 750000ULL;
+static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
 
 /*
  * After fork, child runs first. If set to 0 (default) then
@@ -237,7 +237,7 @@ static void update_sysctl(void)
 
 #define SET_SYSCTL(name) \
 	(sysctl_##name = (factor) * normalized_sysctl_##name)
-	SET_SYSCTL(sched_min_granularity);
+	SET_SYSCTL(sched_base_slice);
 #undef SET_SYSCTL
 }
 
@@ -943,7 +943,7 @@ int sched_update_scaling(void)
 
 #define WRT_SYSCTL(name) \
 	(normalized_sysctl_##name = sysctl_##name / (factor))
-	WRT_SYSCTL(sched_min_granularity);
+	WRT_SYSCTL(sched_base_slice);
 #undef WRT_SYSCTL
 
 	return 0;
@@ -964,9 +964,9 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	/*
 	 * For EEVDF the virtual time slope is determined by w_i (iow.
 	 * nice) while the request time r_i is determined by
-	 * sysctl_sched_min_granularity.
+	 * sysctl_sched_base_slice.
 	 */
-	se->slice = sysctl_sched_min_granularity;
+	se->slice = sysctl_sched_base_slice;
 
 	/*
 	 * EEVDF: vd_i = ve_i + r_i / w_i
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f814bb731235..7ff9965570e6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2503,7 +2503,7 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
 extern const_debug unsigned int sysctl_sched_nr_migrate;
 extern const_debug unsigned int sysctl_sched_migration_cost;
 
-extern unsigned int sysctl_sched_min_granularity;
+extern unsigned int sysctl_sched_base_slice;
 
 #ifdef CONFIG_SCHED_DEBUG
 extern int sysctl_resched_latency_warn_ms;

From d07f09a1f99cabbc86bc5c97d962eb8a466106b5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 13:58:49 +0200
Subject: [PATCH 0282/2047] sched/fair: Propagate enqueue flags into
 place_entity()

This allows place_entity() to consider ENQUEUE_WAKEUP and
ENQUEUE_MIGRATED.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230531124604.274010996@infradead.org
---
 kernel/sched/fair.c  | 10 +++++-----
 kernel/sched/sched.h |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 61747a25d06d..5c8c9f7d8496 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4909,7 +4909,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
 #endif /* CONFIG_SMP */
 
 static void
-place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
+place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
 	u64 vslice = calc_delta_fair(se->slice, se);
 	u64 vruntime = avg_vruntime(cfs_rq);
@@ -4998,7 +4998,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 	 * on average, halfway through their slice, as such start tasks
 	 * off with half a slice to ease into the competition.
 	 */
-	if (sched_feat(PLACE_DEADLINE_INITIAL) && initial)
+	if (sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL))
 		vslice /= 2;
 
 	/*
@@ -5022,7 +5022,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * update_curr().
 	 */
 	if (curr)
-		place_entity(cfs_rq, se, 0);
+		place_entity(cfs_rq, se, flags);
 
 	update_curr(cfs_rq);
 
@@ -5049,7 +5049,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * we can place the entity.
 	 */
 	if (!curr)
-		place_entity(cfs_rq, se, 0);
+		place_entity(cfs_rq, se, flags);
 
 	account_entity_enqueue(cfs_rq, se);
 
@@ -12280,7 +12280,7 @@ static void task_fork_fair(struct task_struct *p)
 	curr = cfs_rq->curr;
 	if (curr)
 		update_curr(cfs_rq);
-	place_entity(cfs_rq, se, 1);
+	place_entity(cfs_rq, se, ENQUEUE_INITIAL);
 	rq_unlock(rq, &rf);
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7ff9965570e6..db5853761b1f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2199,6 +2199,7 @@ extern const u32		sched_prio_to_wmult[40];
 #else
 #define ENQUEUE_MIGRATED	0x00
 #endif
+#define ENQUEUE_INITIAL		0x80
 
 #define RETRY_TASK		((void *)-1UL)
 

From e91d0f05e66afba9a3804bccf3d0a4310bb30024 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 14 Jul 2023 11:44:58 -0600
Subject: [PATCH 0283/2047] gpio: Explicitly include correct DT includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Acked-by: Romain Perier <romain.perier@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-ath79.c         | 3 ++-
 drivers/gpio/gpio-brcmstb.c       | 4 ++--
 drivers/gpio/gpio-creg-snps.c     | 2 +-
 drivers/gpio/gpio-eic-sprd.c      | 2 +-
 drivers/gpio/gpio-ge.c            | 2 +-
 drivers/gpio/gpio-grgpio.c        | 2 +-
 drivers/gpio/gpio-hlwd.c          | 2 +-
 drivers/gpio/gpio-logicvc.c       | 1 -
 drivers/gpio/gpio-mb86s7x.c       | 2 +-
 drivers/gpio/gpio-mpc5200.c       | 2 +-
 drivers/gpio/gpio-mpc8xxx.c       | 4 +---
 drivers/gpio/gpio-msc313.c        | 1 -
 drivers/gpio/gpio-mxc.c           | 1 -
 drivers/gpio/gpio-mxs.c           | 1 -
 drivers/gpio/gpio-omap.c          | 1 -
 drivers/gpio/gpio-palmas.c        | 1 -
 drivers/gpio/gpio-pmic-eic-sprd.c | 2 +-
 drivers/gpio/gpio-pxa.c           | 1 -
 drivers/gpio/gpio-rcar.c          | 1 -
 drivers/gpio/gpio-rockchip.c      | 2 +-
 drivers/gpio/gpio-sprd.c          | 2 +-
 drivers/gpio/gpio-stp-xway.c      | 3 ++-
 drivers/gpio/gpio-syscon.c        | 1 -
 drivers/gpio/gpio-tegra.c         | 2 +-
 drivers/gpio/gpio-tegra186.c      | 2 +-
 drivers/gpio/gpio-tps6586x.c      | 2 +-
 drivers/gpio/gpio-tps65910.c      | 2 +-
 drivers/gpio/gpio-ts4800.c        | 3 +--
 drivers/gpio/gpio-ts4900.c        | 2 +-
 drivers/gpio/gpio-uniphier.c      | 1 -
 drivers/gpio/gpio-vf610.c         | 1 -
 drivers/gpio/gpio-wcd934x.c       | 3 ++-
 drivers/gpio/gpio-xilinx.c        | 4 ++--
 drivers/gpio/gpio-xra1403.c       | 2 +-
 34 files changed, 28 insertions(+), 39 deletions(-)

diff --git a/drivers/gpio/gpio-ath79.c b/drivers/gpio/gpio-ath79.c
index aa0a954b8392..f0c0c0f77eb0 100644
--- a/drivers/gpio/gpio-ath79.c
+++ b/drivers/gpio/gpio-ath79.c
@@ -9,8 +9,9 @@
  */
 
 #include <linux/gpio/driver.h>
+#include <linux/platform_device.h>
 #include <linux/platform_data/gpio-ath79.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/irq.h>
diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c
index 6566517fe0d8..bccdbfd5ec80 100644
--- a/drivers/gpio/gpio-brcmstb.c
+++ b/drivers/gpio/gpio-brcmstb.c
@@ -3,12 +3,12 @@
 
 #include <linux/bitops.h>
 #include <linux/gpio/driver.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
 #include <linux/module.h>
 #include <linux/irqdomain.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/interrupt.h>
+#include <linux/platform_device.h>
 
 enum gio_reg_index {
 	GIO_REG_ODEN = 0,
diff --git a/drivers/gpio/gpio-creg-snps.c b/drivers/gpio/gpio-creg-snps.c
index 789384c6e178..4968232f70f2 100644
--- a/drivers/gpio/gpio-creg-snps.c
+++ b/drivers/gpio/gpio-creg-snps.c
@@ -8,7 +8,7 @@
 #include <linux/gpio/driver.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 #define MAX_GPIO	32
 
diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c
index 84352a6f4973..67b1e09e8985 100644
--- a/drivers/gpio/gpio-eic-sprd.c
+++ b/drivers/gpio/gpio-eic-sprd.c
@@ -9,7 +9,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
 
diff --git a/drivers/gpio/gpio-ge.c b/drivers/gpio/gpio-ge.c
index 7bd4c2a4cc11..4eecbc862abc 100644
--- a/drivers/gpio/gpio-ge.c
+++ b/drivers/gpio/gpio-ge.c
@@ -20,7 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/slab.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/module.h>
 #include <linux/gpio/driver.h>
diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c
index bea0e32c195d..0163c95f6dd7 100644
--- a/drivers/gpio/gpio-grgpio.c
+++ b/drivers/gpio/gpio-grgpio.c
@@ -19,10 +19,10 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/gpio/driver.h>
 #include <linux/slab.h>
 #include <linux/err.h>
diff --git a/drivers/gpio/gpio-hlwd.c b/drivers/gpio/gpio-hlwd.c
index c208ac1c54a6..1bcfc1835dae 100644
--- a/drivers/gpio/gpio-hlwd.c
+++ b/drivers/gpio/gpio-hlwd.c
@@ -10,7 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 
diff --git a/drivers/gpio/gpio-logicvc.c b/drivers/gpio/gpio-logicvc.c
index 992cc958a43f..2b9876bc1383 100644
--- a/drivers/gpio/gpio-logicvc.c
+++ b/drivers/gpio/gpio-logicvc.c
@@ -8,7 +8,6 @@
 #include <linux/gpio/driver.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
diff --git a/drivers/gpio/gpio-mb86s7x.c b/drivers/gpio/gpio-mb86s7x.c
index 37c5363e391e..ca7eb5e8bfaa 100644
--- a/drivers/gpio/gpio-mb86s7x.c
+++ b/drivers/gpio/gpio-mb86s7x.c
@@ -10,11 +10,11 @@
 #include <linux/io.h>
 #include <linux/init.h>
 #include <linux/clk.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
-#include <linux/of_device.h>
 #include <linux/gpio/driver.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
diff --git a/drivers/gpio/gpio-mpc5200.c b/drivers/gpio/gpio-mpc5200.c
index 3b0bfff8c778..b49e3ca64015 100644
--- a/drivers/gpio/gpio-mpc5200.c
+++ b/drivers/gpio/gpio-mpc5200.c
@@ -10,7 +10,7 @@
 #include <linux/slab.h>
 #include <linux/gpio/legacy-of-mm-gpiochip.h>
 #include <linux/io.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/module.h>
 
 #include <asm/mpc52xx.h>
diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
index 5979a36bf754..ebf2f511df59 100644
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -9,12 +9,10 @@
 #include <linux/acpi.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/property.h>
 #include <linux/mod_devicetable.h>
 #include <linux/slab.h>
diff --git a/drivers/gpio/gpio-msc313.c b/drivers/gpio/gpio-msc313.c
index 036ad2324892..2f448eb23abb 100644
--- a/drivers/gpio/gpio-msc313.c
+++ b/drivers/gpio/gpio-msc313.c
@@ -6,7 +6,6 @@
 #include <linux/types.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/gpio/driver.h>
 #include <linux/module.h>
diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index a9fb6bd9aa6f..443f3e4d9aff 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -23,7 +23,6 @@
 #include <linux/syscore_ops.h>
 #include <linux/gpio/driver.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/bug.h>
 
 #define IMX_SCU_WAKEUP_OFF		0
diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c
index 390e619a2831..8e04c9c4b5a2 100644
--- a/drivers/gpio/gpio-mxs.c
+++ b/drivers/gpio/gpio-mxs.c
@@ -14,7 +14,6 @@
 #include <linux/irqdomain.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/gpio/driver.h>
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index a08be5bf6808..2b78fde74e30 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -22,7 +22,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/pm.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/gpio/driver.h>
 #include <linux/bitops.h>
 #include <linux/platform_data/gpio-omap.h>
diff --git a/drivers/gpio/gpio-palmas.c b/drivers/gpio/gpio-palmas.c
index bac10c2faf56..6140e87c6754 100644
--- a/drivers/gpio/gpio-palmas.c
+++ b/drivers/gpio/gpio-palmas.c
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/mfd/palmas.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 
 struct palmas_gpio {
diff --git a/drivers/gpio/gpio-pmic-eic-sprd.c b/drivers/gpio/gpio-pmic-eic-sprd.c
index c3e4d90f6b18..dabb0da3bd33 100644
--- a/drivers/gpio/gpio-pmic-eic-sprd.c
+++ b/drivers/gpio/gpio-pmic-eic-sprd.c
@@ -8,7 +8,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c
index a1630ed4b741..7e9f7a32d3ee 100644
--- a/drivers/gpio/gpio-pxa.c
+++ b/drivers/gpio/gpio-pxa.c
@@ -20,7 +20,6 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/syscore_ops.h>
diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index 2525adb52f4f..86e69cde04da 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -15,7 +15,6 @@
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c
index e5de15a2ab9a..b35b9604413f 100644
--- a/drivers/gpio/gpio-rockchip.c
+++ b/drivers/gpio/gpio-rockchip.c
@@ -17,10 +17,10 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pinctrl/pinconf-generic.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 
 #include "../pinctrl/core.h"
diff --git a/drivers/gpio/gpio-sprd.c b/drivers/gpio/gpio-sprd.c
index 072b4e653216..c117c11bfb29 100644
--- a/drivers/gpio/gpio-sprd.c
+++ b/drivers/gpio/gpio-sprd.c
@@ -7,8 +7,8 @@
 #include <linux/bitops.h>
 #include <linux/gpio/driver.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
 
diff --git a/drivers/gpio/gpio-stp-xway.c b/drivers/gpio/gpio-stp-xway.c
index 4750ea34204c..053d616f2e02 100644
--- a/drivers/gpio/gpio-stp-xway.c
+++ b/drivers/gpio/gpio-stp-xway.c
@@ -4,11 +4,12 @@
  *  Copyright (C) 2012 John Crispin <john@phrozen.org>
  */
 
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/mutex.h>
 #include <linux/gpio/driver.h>
 #include <linux/io.h>
diff --git a/drivers/gpio/gpio-syscon.c b/drivers/gpio/gpio-syscon.c
index 6076937b18e7..41bf47c8c983 100644
--- a/drivers/gpio/gpio-syscon.c
+++ b/drivers/gpio/gpio-syscon.c
@@ -9,7 +9,6 @@
 #include <linux/gpio/driver.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index 5b265a6fd3c1..ea715582bcf3 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -15,7 +15,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/gpio/driver.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c
index 80d08ddde40e..d87dd06db40d 100644
--- a/drivers/gpio/gpio-tegra186.c
+++ b/drivers/gpio/gpio-tegra186.c
@@ -11,7 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/seq_file.h>
 
diff --git a/drivers/gpio/gpio-tps6586x.c b/drivers/gpio/gpio-tps6586x.c
index c5713524b581..d277aa951143 100644
--- a/drivers/gpio/gpio-tps6586x.c
+++ b/drivers/gpio/gpio-tps6586x.c
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/mfd/tps6586x.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 
 /* GPIO control registers */
diff --git a/drivers/gpio/gpio-tps65910.c b/drivers/gpio/gpio-tps65910.c
index 321e6945f0be..187d21580573 100644
--- a/drivers/gpio/gpio-tps65910.c
+++ b/drivers/gpio/gpio-tps65910.c
@@ -15,7 +15,7 @@
 #include <linux/i2c.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/tps65910.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 struct tps65910_gpio {
 	struct gpio_chip gpio_chip;
diff --git a/drivers/gpio/gpio-ts4800.c b/drivers/gpio/gpio-ts4800.c
index 95d80ba14bee..4748e3d47106 100644
--- a/drivers/gpio/gpio-ts4800.c
+++ b/drivers/gpio/gpio-ts4800.c
@@ -7,8 +7,7 @@
 
 #include <linux/gpio/driver.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 
 #define DEFAULT_PIN_NUMBER      16
diff --git a/drivers/gpio/gpio-ts4900.c b/drivers/gpio/gpio-ts4900.c
index eba96319dac2..0f6397b77c9d 100644
--- a/drivers/gpio/gpio-ts4900.c
+++ b/drivers/gpio/gpio-ts4900.c
@@ -8,7 +8,7 @@
 
 #include <linux/gpio/driver.h>
 #include <linux/i2c.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/module.h>
 #include <linux/regmap.h>
 
diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c
index 19ce6675cbc0..9725b7aa18a7 100644
--- a/drivers/gpio/gpio-uniphier.c
+++ b/drivers/gpio/gpio-uniphier.c
@@ -9,7 +9,6 @@
 #include <linux/irqdomain.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c
index d3f3a69d4907..54e7c51f48c8 100644
--- a/drivers/gpio/gpio-vf610.c
+++ b/drivers/gpio/gpio-vf610.c
@@ -17,7 +17,6 @@
 #include <linux/irq.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/pinctrl/consumer.h>
 
diff --git a/drivers/gpio/gpio-wcd934x.c b/drivers/gpio/gpio-wcd934x.c
index 817750e4e033..2bba27b13947 100644
--- a/drivers/gpio/gpio-wcd934x.c
+++ b/drivers/gpio/gpio-wcd934x.c
@@ -1,11 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019, Linaro Limited
 
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/gpio/driver.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
-#include <linux/of_device.h>
 
 #define WCD_PIN_MASK(p) BIT(p)
 #define WCD_REG_DIR_CTL_OFFSET 0x42
diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c
index 1fa66f2a667f..a16945e8319e 100644
--- a/drivers/gpio/gpio-xilinx.c
+++ b/drivers/gpio/gpio-xilinx.c
@@ -15,8 +15,8 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 
diff --git a/drivers/gpio/gpio-xra1403.c b/drivers/gpio/gpio-xra1403.c
index bbc06cdd9634..eed8a1684830 100644
--- a/drivers/gpio/gpio-xra1403.c
+++ b/drivers/gpio/gpio-xra1403.c
@@ -8,9 +8,9 @@
 #include <linux/bitops.h>
 #include <linux/gpio/driver.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
 #include <linux/seq_file.h>
 #include <linux/spi/spi.h>
 #include <linux/regmap.h>

From c9ab610e5cd98f7fe0213b382870437368f11457 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 19 Jul 2023 01:30:53 +0300
Subject: [PATCH 0284/2047] gpio: bcm-kona: Make driver OF-independent

There is nothing in the driver that requires OF APIs,
make the driver OF independent.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig         |  2 +-
 drivers/gpio/gpio-bcm-kona.c | 21 ++++++++-------------
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index e382dfebad7c..1cf267a3ed8b 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -191,7 +191,7 @@ config GPIO_RASPBERRYPI_EXP
 
 config GPIO_BCM_KONA
 	bool "Broadcom Kona GPIO"
-	depends on OF_GPIO && (ARCH_BCM_MOBILE || COMPILE_TEST)
+	depends on ARCH_BCM_MOBILE || COMPILE_TEST
 	help
 	  Turn on GPIO support for Broadcom "Kona" chips.
 
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index 70770429ba48..c977144eff10 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -8,12 +8,14 @@
 
 #include <linux/bitops.h>
 #include <linux/err.h>
-#include <linux/io.h>
 #include <linux/gpio/driver.h>
-#include <linux/of_device.h>
 #include <linux/init.h>
+#include <linux/io.h>
 #include <linux/irqdomain.h>
 #include <linux/irqchip/chained_irq.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
 
 #define BCM_GPIO_PASSWD				0x00a5a501
 #define GPIO_PER_BANK				32
@@ -556,19 +558,12 @@ static void bcm_kona_gpio_reset(struct bcm_kona_gpio *kona_gpio)
 static int bcm_kona_gpio_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	const struct of_device_id *match;
 	struct bcm_kona_gpio_bank *bank;
 	struct bcm_kona_gpio *kona_gpio;
 	struct gpio_chip *chip;
 	int ret;
 	int i;
 
-	match = of_match_device(bcm_kona_gpio_of_match, dev);
-	if (!match) {
-		dev_err(dev, "Failed to find gpio controller\n");
-		return -ENODEV;
-	}
-
 	kona_gpio = devm_kzalloc(dev, sizeof(*kona_gpio), GFP_KERNEL);
 	if (!kona_gpio)
 		return -ENOMEM;
@@ -601,10 +596,10 @@ static int bcm_kona_gpio_probe(struct platform_device *pdev)
 	chip->parent = dev;
 	chip->ngpio = kona_gpio->num_bank * GPIO_PER_BANK;
 
-	kona_gpio->irq_domain = irq_domain_add_linear(dev->of_node,
-						      chip->ngpio,
-						      &bcm_kona_irq_ops,
-						      kona_gpio);
+	kona_gpio->irq_domain = irq_domain_create_linear(dev_fwnode(dev),
+							 chip->ngpio,
+							 &bcm_kona_irq_ops,
+							 kona_gpio);
 	if (!kona_gpio->irq_domain) {
 		dev_err(dev, "Couldn't allocate IRQ domain\n");
 		return -ENXIO;

From ec72293cc2099859f79485861fbced41ae3fde60 Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Wed, 19 Jul 2023 01:30:54 +0300
Subject: [PATCH 0285/2047] gpio: bcm-kona: remove unneeded
 platform_set_drvdata() call

The platform_set_drvdata() call was never used, ever since the driver was
originally added.
It looks like this copy+paste left-over. Possibly the author copied from a
driver that had this line, but also had a remove hook.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-bcm-kona.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index c977144eff10..f67c0f76a196 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -592,7 +592,6 @@ static int bcm_kona_gpio_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	kona_gpio->pdev = pdev;
-	platform_set_drvdata(pdev, kona_gpio);
 	chip->parent = dev;
 	chip->ngpio = kona_gpio->num_bank * GPIO_PER_BANK;
 

From 91093b57ee3b2080a1f30895f862690f8d794161 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 19 Jul 2023 01:30:55 +0300
Subject: [PATCH 0286/2047] gpio: bcm-kona: Drop unused pdev member in private
 data structure

The pdev member is assigned and not used, drop it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-bcm-kona.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index f67c0f76a196..5321ef98f442 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -64,7 +64,6 @@ struct bcm_kona_gpio {
 	struct gpio_chip gpio_chip;
 	struct irq_domain *irq_domain;
 	struct bcm_kona_gpio_bank *banks;
-	struct platform_device *pdev;
 };
 
 struct bcm_kona_gpio_bank {
@@ -591,7 +590,6 @@ static int bcm_kona_gpio_probe(struct platform_device *pdev)
 	if (!kona_gpio->banks)
 		return -ENOMEM;
 
-	kona_gpio->pdev = pdev;
 	chip->parent = dev;
 	chip->ngpio = kona_gpio->num_bank * GPIO_PER_BANK;
 

From 2920e08bef609c8b59f9996fd6852a7b97119d75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Tue, 18 Jul 2023 22:14:53 +0200
Subject: [PATCH 0287/2047] regulator: max77857: Switch back to use struct
 i2c_driver's .probe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After commit b8a1a4cd5a98 ("i2c: Provide a temporary .probe_new()
call-back type"), all drivers being converted to .probe_new() and then
commit 03c835f498b5 ("i2c: Switch .probe() to not take an id parameter")
convert back to (the new) .probe() to be able to eventually drop
.probe_new() from struct i2c_driver.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20230718201453.3953602-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max77857-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/max77857-regulator.c b/drivers/regulator/max77857-regulator.c
index c5482ffd606e..d0fcb080f825 100644
--- a/drivers/regulator/max77857-regulator.c
+++ b/drivers/regulator/max77857-regulator.c
@@ -449,7 +449,7 @@ struct i2c_driver max77857_driver = {
 		.of_match_table = max77857_of_id,
 	},
 	.id_table = max77857_id,
-	.probe_new = max77857_probe,
+	.probe = max77857_probe,
 };
 module_i2c_driver(max77857_driver);
 

From 541e75954cadde0355ce7bebed5675625b2943a8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 18 Jul 2023 21:39:31 +0200
Subject: [PATCH 0288/2047] regulator: max77857: mark more functions static

A few functions in the new driver are global but only used in this file:

drivers/regulator/max77857-regulator.c:209:5: error: no previous prototype for 'max77859_get_voltage_sel' [-Werror=missing-prototypes]
drivers/regulator/max77857-regulator.c:221:5: error: no previous prototype for 'max77859_set_current_limit' [-Werror=missing-prototypes]
drivers/regulator/max77857-regulator.c:235:5: error: no previous prototype for 'max77859_get_current_limit' [-Werror=missing-prototypes]

Mark them static, which produces potentially better code and avoids the warning.

Fixes: af71cccadeced ("regulator: max77857: Add ADI MAX77857/59/MAX77831 Regulator Support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20230718193938.3593118-1-arnd@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max77857-regulator.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/regulator/max77857-regulator.c b/drivers/regulator/max77857-regulator.c
index d0fcb080f825..58f825d8d2b0 100644
--- a/drivers/regulator/max77857-regulator.c
+++ b/drivers/regulator/max77857-regulator.c
@@ -206,7 +206,7 @@ static int max77859_set_voltage_sel(struct regulator_dev *rdev,
 			       MAX77859_CONT3_DVS_START);
 }
 
-int max77859_get_voltage_sel(struct regulator_dev *rdev)
+static int max77859_get_voltage_sel(struct regulator_dev *rdev)
 {
 	__be16 reg;
 	int ret;
@@ -218,7 +218,7 @@ int max77859_get_voltage_sel(struct regulator_dev *rdev)
 	return FIELD_GET(MAX77859_VOLTAGE_SEL_MASK, __be16_to_cpu(reg));
 }
 
-int max77859_set_current_limit(struct regulator_dev *rdev, int min_uA, int max_uA)
+static int max77859_set_current_limit(struct regulator_dev *rdev, int min_uA, int max_uA)
 {
 	u32 selector;
 
@@ -232,7 +232,7 @@ int max77859_set_current_limit(struct regulator_dev *rdev, int min_uA, int max_u
 	return regmap_write(rdev->regmap, MAX77859_REG_CONT5, selector);
 }
 
-int max77859_get_current_limit(struct regulator_dev *rdev)
+static int max77859_get_current_limit(struct regulator_dev *rdev)
 {
 	u32 selector;
 	int ret;

From d5737d12779a171e76ad07635d1ed06a22009da7 Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Date: Tue, 18 Jul 2023 20:24:50 +0100
Subject: [PATCH 0289/2047] spi: rzv2m-csi: Squash timing settings into one
 statement

Register CLKSEL hosts the configuration for both clock polarity
and data phase, and both values can be set in one write operation.

Squash the clock polarity and data phase register writes into
one statement, for efficiency.

Signed-off-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20230718192453.543549-2-fabrizio.castro.jz@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rzv2m-csi.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-rzv2m-csi.c b/drivers/spi/spi-rzv2m-csi.c
index 038f1486b7d7..faf5898bc3e0 100644
--- a/drivers/spi/spi-rzv2m-csi.c
+++ b/drivers/spi/spi-rzv2m-csi.c
@@ -38,6 +38,7 @@
 /* CSI_CLKSEL */
 #define CSI_CLKSEL_CKP		BIT(17)
 #define CSI_CLKSEL_DAP		BIT(16)
+#define CSI_CLKSEL_MODE		(CSI_CLKSEL_CKP|CSI_CLKSEL_DAP)
 #define CSI_CLKSEL_SLAVE	BIT(15)
 #define CSI_CLKSEL_CKS		GENMASK(14, 1)
 
@@ -408,10 +409,8 @@ static int rzv2m_csi_setup(struct spi_device *spi)
 	writel(CSI_MODE_SETUP, csi->base + CSI_MODE);
 
 	/* Setup clock polarity and phase timing */
-	rzv2m_csi_reg_write_bit(csi, CSI_CLKSEL, CSI_CLKSEL_CKP,
-				!(spi->mode & SPI_CPOL));
-	rzv2m_csi_reg_write_bit(csi, CSI_CLKSEL, CSI_CLKSEL_DAP,
-				!(spi->mode & SPI_CPHA));
+	rzv2m_csi_reg_write_bit(csi, CSI_CLKSEL, CSI_CLKSEL_MODE,
+				~spi->mode & SPI_MODE_X_MASK);
 
 	/* Setup serial data order */
 	rzv2m_csi_reg_write_bit(csi, CSI_MODE, CSI_MODE_DIR,

From 8dc4038a026a79b6222a43ccf7adf070c4ba54ea Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Date: Tue, 18 Jul 2023 20:24:51 +0100
Subject: [PATCH 0290/2047] spi: rzv2m-csi: Improve data types, casting and
 alignment

"unsigned int" is more appropriate than "int" for the members
of "struct rzv2m_csi_priv".
Using void* rather than u8* for txbuf and rxbuf allows for
the removal of some type casting.
Remove the unnecessary casting of "data" to "struct rzv2m_csi_priv*"
in function "rzv2m_csi_irq_handler".
Also, members "bytes_per_word" and "errors" introduce gaps
in the structure.
Adjust "struct rzv2m_csi_priv" and its members usage accordingly.

Signed-off-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Suggested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230718192453.543549-3-fabrizio.castro.jz@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rzv2m-csi.c | 46 ++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/spi/spi-rzv2m-csi.c b/drivers/spi/spi-rzv2m-csi.c
index faf5898bc3e0..4dbb8c185a8a 100644
--- a/drivers/spi/spi-rzv2m-csi.c
+++ b/drivers/spi/spi-rzv2m-csi.c
@@ -63,8 +63,8 @@
 /* CSI_FIFOTRG */
 #define CSI_FIFOTRG_R_TRG       GENMASK(2, 0)
 
-#define CSI_FIFO_SIZE_BYTES	32
-#define CSI_FIFO_HALF_SIZE	16
+#define CSI_FIFO_SIZE_BYTES	32U
+#define CSI_FIFO_HALF_SIZE	16U
 #define CSI_EN_DIS_TIMEOUT_US	100
 /*
  * Clock "csiclk" gets divided by 2 * CSI_CLKSEL_CKS in order to generate the
@@ -86,16 +86,16 @@ struct rzv2m_csi_priv {
 	struct clk *pclk;
 	struct device *dev;
 	struct spi_controller *controller;
-	const u8 *txbuf;
-	u8 *rxbuf;
-	int buffer_len;
-	int bytes_sent;
-	int bytes_received;
-	int bytes_to_transfer;
-	int words_to_transfer;
-	unsigned char bytes_per_word;
+	const void *txbuf;
+	void *rxbuf;
+	unsigned int buffer_len;
+	unsigned int bytes_sent;
+	unsigned int bytes_received;
+	unsigned int bytes_to_transfer;
+	unsigned int words_to_transfer;
+	unsigned int bytes_per_word;
 	wait_queue_head_t wait;
-	u8 errors;
+	u32 errors;
 	u32 status;
 };
 
@@ -157,18 +157,18 @@ static int rzv2m_csi_start_stop_operation(const struct rzv2m_csi_priv *csi,
 
 static int rzv2m_csi_fill_txfifo(struct rzv2m_csi_priv *csi)
 {
-	int i;
+	unsigned int i;
 
 	if (readl(csi->base + CSI_OFIFOL))
 		return -EIO;
 
 	if (csi->bytes_per_word == 2) {
-		u16 *buf = (u16 *)csi->txbuf;
+		const u16 *buf = csi->txbuf;
 
 		for (i = 0; i < csi->words_to_transfer; i++)
 			writel(buf[i], csi->base + CSI_OFIFO);
 	} else {
-		u8 *buf = (u8 *)csi->txbuf;
+		const u8 *buf = csi->txbuf;
 
 		for (i = 0; i < csi->words_to_transfer; i++)
 			writel(buf[i], csi->base + CSI_OFIFO);
@@ -182,18 +182,18 @@ static int rzv2m_csi_fill_txfifo(struct rzv2m_csi_priv *csi)
 
 static int rzv2m_csi_read_rxfifo(struct rzv2m_csi_priv *csi)
 {
-	int i;
+	unsigned int i;
 
 	if (readl(csi->base + CSI_IFIFOL) != csi->bytes_to_transfer)
 		return -EIO;
 
 	if (csi->bytes_per_word == 2) {
-		u16 *buf = (u16 *)csi->rxbuf;
+		u16 *buf = csi->rxbuf;
 
 		for (i = 0; i < csi->words_to_transfer; i++)
 			buf[i] = (u16)readl(csi->base + CSI_IFIFO);
 	} else {
-		u8 *buf = (u8 *)csi->rxbuf;
+		u8 *buf = csi->rxbuf;
 
 		for (i = 0; i < csi->words_to_transfer; i++)
 			buf[i] = (u8)readl(csi->base + CSI_IFIFO);
@@ -207,9 +207,9 @@ static int rzv2m_csi_read_rxfifo(struct rzv2m_csi_priv *csi)
 
 static inline void rzv2m_csi_calc_current_transfer(struct rzv2m_csi_priv *csi)
 {
-	int bytes_transferred = max_t(int, csi->bytes_received, csi->bytes_sent);
-	int bytes_remaining = csi->buffer_len - bytes_transferred;
-	int to_transfer;
+	unsigned int bytes_transferred = max(csi->bytes_received, csi->bytes_sent);
+	unsigned int bytes_remaining = csi->buffer_len - bytes_transferred;
+	unsigned int to_transfer;
 
 	if (csi->txbuf)
 		/*
@@ -217,9 +217,9 @@ static inline void rzv2m_csi_calc_current_transfer(struct rzv2m_csi_priv *csi)
 		 * hard to raise an overflow error (which is only possible
 		 * when IP transmits and receives at the same time).
 		 */
-		to_transfer = min_t(int, CSI_FIFO_HALF_SIZE, bytes_remaining);
+		to_transfer = min(CSI_FIFO_HALF_SIZE, bytes_remaining);
 	else
-		to_transfer = min_t(int, CSI_FIFO_SIZE_BYTES, bytes_remaining);
+		to_transfer = min(CSI_FIFO_SIZE_BYTES, bytes_remaining);
 
 	if (csi->bytes_per_word == 2)
 		to_transfer >>= 1;
@@ -339,7 +339,7 @@ static inline int rzv2m_csi_wait_for_rx_ready(struct rzv2m_csi_priv *csi)
 
 static irqreturn_t rzv2m_csi_irq_handler(int irq, void *data)
 {
-	struct rzv2m_csi_priv *csi = (struct rzv2m_csi_priv *)data;
+	struct rzv2m_csi_priv *csi = data;
 
 	csi->status = readl(csi->base + CSI_INT);
 	rzv2m_csi_disable_irqs(csi, csi->status);

From 7b63568fce9cb34cb0ad4caf9231555eb768c8e6 Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Date: Tue, 18 Jul 2023 20:24:52 +0100
Subject: [PATCH 0291/2047] spi: rzv2m-csi: Get rid of the x_trg{_words} tables

Table x_trg can be replaced with ilog2(), and table x_trg_words
can be replaced with rounddown_pow_of_two().
Replace the tables usage with the corresponding macros.
While at it, remove a couple of unnecessary empty lines.

Signed-off-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230718192453.543549-4-fabrizio.castro.jz@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rzv2m-csi.c | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/drivers/spi/spi-rzv2m-csi.c b/drivers/spi/spi-rzv2m-csi.c
index 4dbb8c185a8a..62575a61608a 100644
--- a/drivers/spi/spi-rzv2m-csi.c
+++ b/drivers/spi/spi-rzv2m-csi.c
@@ -10,6 +10,7 @@
 #include <linux/count_zeros.h>
 #include <linux/interrupt.h>
 #include <linux/iopoll.h>
+#include <linux/log2.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/spi/spi.h>
@@ -99,20 +100,6 @@ struct rzv2m_csi_priv {
 	u32 status;
 };
 
-static const unsigned char x_trg[] = {
-	0, 1, 1, 2, 2, 2, 2, 3,
-	3, 3, 3, 3, 3, 3, 3, 4,
-	4, 4, 4, 4, 4, 4, 4, 4,
-	4, 4, 4, 4, 4, 4, 4, 5
-};
-
-static const unsigned char x_trg_words[] = {
-	1,  2,  2,  4,  4,  4,  4,  8,
-	8,  8,  8,  8,  8,  8,  8,  16,
-	16, 16, 16, 16, 16, 16, 16, 16,
-	16, 16, 16, 16, 16, 16, 16, 32
-};
-
 static void rzv2m_csi_reg_write_bit(const struct rzv2m_csi_priv *csi,
 				    int reg_offs, int bit_mask, u32 value)
 {
@@ -230,7 +217,7 @@ static inline void rzv2m_csi_calc_current_transfer(struct rzv2m_csi_priv *csi)
 	 * less than or equal to the number of bytes we need to transfer.
 	 * This may result in multiple smaller transfers.
 	 */
-	csi->words_to_transfer = x_trg_words[to_transfer - 1];
+	csi->words_to_transfer = rounddown_pow_of_two(to_transfer);
 
 	if (csi->bytes_per_word == 2)
 		csi->bytes_to_transfer = csi->words_to_transfer << 1;
@@ -241,7 +228,7 @@ static inline void rzv2m_csi_calc_current_transfer(struct rzv2m_csi_priv *csi)
 static inline void rzv2m_csi_set_rx_fifo_trigger_level(struct rzv2m_csi_priv *csi)
 {
 	rzv2m_csi_reg_write_bit(csi, CSI_FIFOTRG, CSI_FIFOTRG_R_TRG,
-				x_trg[csi->words_to_transfer - 1]);
+				ilog2(csi->words_to_transfer));
 }
 
 static inline void rzv2m_csi_enable_rx_trigger(struct rzv2m_csi_priv *csi,
@@ -314,7 +301,6 @@ static int rzv2m_csi_wait_for_tx_empty(struct rzv2m_csi_priv *csi)
 		return 0;
 
 	ret = rzv2m_csi_wait_for_interrupt(csi, CSI_INT_TREND, CSI_CNT_TREND_E);
-
 	if (ret == -ETIMEDOUT)
 		csi->errors |= TX_TIMEOUT_ERROR;
 
@@ -330,7 +316,6 @@ static inline int rzv2m_csi_wait_for_rx_ready(struct rzv2m_csi_priv *csi)
 
 	ret = rzv2m_csi_wait_for_interrupt(csi, CSI_INT_R_TRGR,
 					   CSI_CNT_R_TRGR_E);
-
 	if (ret == -ETIMEDOUT)
 		csi->errors |= RX_TIMEOUT_ERROR;
 

From c5a7b66811d22a4901bd358447e59160dbda8f65 Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Date: Tue, 18 Jul 2023 20:24:53 +0100
Subject: [PATCH 0292/2047] spi: rzv2m-csi: Make use of device_set_node

Use device_set_node instead of assigning controller->dev.of_node
directly because it also sets the firmware node.

Signed-off-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20230718192453.543549-5-fabrizio.castro.jz@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rzv2m-csi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-rzv2m-csi.c b/drivers/spi/spi-rzv2m-csi.c
index 62575a61608a..d098aefa370d 100644
--- a/drivers/spi/spi-rzv2m-csi.c
+++ b/drivers/spi/spi-rzv2m-csi.c
@@ -12,6 +12,7 @@
 #include <linux/iopoll.h>
 #include <linux/log2.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/reset.h>
 #include <linux/spi/spi.h>
 #include <linux/units.h>
@@ -589,12 +590,13 @@ static int rzv2m_csi_probe(struct platform_device *pdev)
 	init_waitqueue_head(&csi->wait);
 
 	controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
-	controller->dev.of_node = pdev->dev.of_node;
 	controller->bits_per_word_mask = SPI_BPW_MASK(16) | SPI_BPW_MASK(8);
 	controller->setup = rzv2m_csi_setup;
 	controller->transfer_one = rzv2m_csi_transfer_one;
 	controller->use_gpio_descriptors = true;
 
+	device_set_node(&controller->dev, dev_fwnode(dev));
+
 	ret = devm_request_irq(dev, irq, rzv2m_csi_irq_handler, 0,
 			       dev_name(dev), csi);
 	if (ret)

From cf36a495f07059d23d62640a3f7b6b157da1bfa0 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Tue, 18 Jul 2023 07:27:48 +0200
Subject: [PATCH 0293/2047] rust: str: make doctests compilable/testable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rust documentation tests are going to be build/run-tested
with the KUnit integration added in a future patch, thus
update them to make them compilable/testable so that we
may start enforcing it.

Reviewed-by: Björn Roy Baron <bjorn3_gh@protonmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Vincenzo Palazzo <vincenzopalazzodev@gmail.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 rust/kernel/str.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs
index c9dd3bf59e34..c41607b2e4fe 100644
--- a/rust/kernel/str.rs
+++ b/rust/kernel/str.rs
@@ -213,6 +213,7 @@ impl fmt::Display for CStr {
     ///
     /// ```
     /// # use kernel::c_str;
+    /// # use kernel::fmt;
     /// # use kernel::str::CStr;
     /// # use kernel::str::CString;
     /// let penguin = c_str!("🐧");
@@ -241,6 +242,7 @@ impl fmt::Debug for CStr {
     ///
     /// ```
     /// # use kernel::c_str;
+    /// # use kernel::fmt;
     /// # use kernel::str::CStr;
     /// # use kernel::str::CString;
     /// let penguin = c_str!("🐧");
@@ -529,7 +531,7 @@ impl fmt::Write for Formatter {
 /// # Examples
 ///
 /// ```
-/// use kernel::str::CString;
+/// use kernel::{str::CString, fmt};
 ///
 /// let s = CString::try_from_fmt(fmt!("{}{}{}", "abc", 10, 20)).unwrap();
 /// assert_eq!(s.as_bytes_with_nul(), "abc1020\0".as_bytes());

From bfa7dff036f0cbb2ccb0385c8b666204d6b8eb3a Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Tue, 18 Jul 2023 07:27:49 +0200
Subject: [PATCH 0294/2047] rust: sync: make doctests compilable/testable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rust documentation tests are going to be build/run-tested
with the KUnit integration added in a future patch, thus
update them to make them compilable/testable so that we
may start enforcing it.

Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Björn Roy Baron <bjorn3_gh@protonmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 rust/kernel/sync/arc.rs           | 9 +++++++--
 rust/kernel/sync/lock/mutex.rs    | 1 +
 rust/kernel/sync/lock/spinlock.rs | 1 +
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs
index a89843cacaad..1ecb2efab51e 100644
--- a/rust/kernel/sync/arc.rs
+++ b/rust/kernel/sync/arc.rs
@@ -73,6 +73,7 @@ mod std_vendor;
 /// assert_eq!(cloned.b, 20);
 ///
 /// // The refcount drops to zero when `cloned` goes out of scope, and the memory is freed.
+/// # Ok::<(), Error>(())
 /// ```
 ///
 /// Using `Arc<T>` as the type of `self`:
@@ -98,6 +99,7 @@ mod std_vendor;
 /// let obj = Arc::try_new(Example { a: 10, b: 20 })?;
 /// obj.use_reference();
 /// obj.take_over();
+/// # Ok::<(), Error>(())
 /// ```
 ///
 /// Coercion from `Arc<Example>` to `Arc<dyn MyTrait>`:
@@ -121,6 +123,7 @@ mod std_vendor;
 ///
 /// // `coerced` has type `Arc<dyn MyTrait>`.
 /// let coerced: Arc<dyn MyTrait> = obj;
+/// # Ok::<(), Error>(())
 /// ```
 pub struct Arc<T: ?Sized> {
     ptr: NonNull<ArcInner<T>>,
@@ -337,7 +340,7 @@ impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> {
 /// # Example
 ///
 /// ```
-/// use crate::sync::{Arc, ArcBorrow};
+/// use kernel::sync::{Arc, ArcBorrow};
 ///
 /// struct Example;
 ///
@@ -350,12 +353,13 @@ impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> {
 ///
 /// // Assert that both `obj` and `cloned` point to the same underlying object.
 /// assert!(core::ptr::eq(&*obj, &*cloned));
+/// # Ok::<(), Error>(())
 /// ```
 ///
 /// Using `ArcBorrow<T>` as the type of `self`:
 ///
 /// ```
-/// use crate::sync::{Arc, ArcBorrow};
+/// use kernel::sync::{Arc, ArcBorrow};
 ///
 /// struct Example {
 ///     a: u32,
@@ -370,6 +374,7 @@ impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> {
 ///
 /// let obj = Arc::try_new(Example { a: 10, b: 20 })?;
 /// obj.as_arc_borrow().use_reference();
+/// # Ok::<(), Error>(())
 /// ```
 pub struct ArcBorrow<'a, T: ?Sized + 'a> {
     inner: NonNull<ArcInner<T>>,
diff --git a/rust/kernel/sync/lock/mutex.rs b/rust/kernel/sync/lock/mutex.rs
index 923472f04af4..09276fedc091 100644
--- a/rust/kernel/sync/lock/mutex.rs
+++ b/rust/kernel/sync/lock/mutex.rs
@@ -63,6 +63,7 @@ macro_rules! new_mutex {
 /// assert_eq!(e.c, 10);
 /// assert_eq!(e.d.lock().a, 20);
 /// assert_eq!(e.d.lock().b, 30);
+/// # Ok::<(), Error>(())
 /// ```
 ///
 /// The following example shows how to use interior mutability to modify the contents of a struct
diff --git a/rust/kernel/sync/lock/spinlock.rs b/rust/kernel/sync/lock/spinlock.rs
index 979b56464a4e..91eb2c9e9123 100644
--- a/rust/kernel/sync/lock/spinlock.rs
+++ b/rust/kernel/sync/lock/spinlock.rs
@@ -61,6 +61,7 @@ macro_rules! new_spinlock {
 /// assert_eq!(e.c, 10);
 /// assert_eq!(e.d.lock().a, 20);
 /// assert_eq!(e.d.lock().b, 30);
+/// # Ok::<(), Error>(())
 /// ```
 ///
 /// The following example shows how to use interior mutability to modify the contents of a struct

From ed615fb8ee6d166166b6cdff5ddacef6f6ef7dab Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Tue, 18 Jul 2023 07:27:50 +0200
Subject: [PATCH 0295/2047] rust: types: make doctests compilable/testable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rust documentation tests are going to be build/run-tested
with the KUnit integration added in a future patch, thus
update them to make them compilable/testable so that we
may start enforcing it.

Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Björn Roy Baron <bjorn3_gh@protonmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 rust/kernel/types.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index 1e5380b16ed5..696d6c5a3b9d 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -109,7 +109,7 @@ impl ForeignOwnable for () {
 /// In the example below, we have multiple exit paths and we want to log regardless of which one is
 /// taken:
 /// ```
-/// # use kernel::ScopeGuard;
+/// # use kernel::types::ScopeGuard;
 /// fn example1(arg: bool) {
 ///     let _log = ScopeGuard::new(|| pr_info!("example1 completed\n"));
 ///
@@ -127,7 +127,7 @@ impl ForeignOwnable for () {
 /// In the example below, we want to log the same message on all early exits but a different one on
 /// the main exit path:
 /// ```
-/// # use kernel::ScopeGuard;
+/// # use kernel::types::ScopeGuard;
 /// fn example2(arg: bool) {
 ///     let log = ScopeGuard::new(|| pr_info!("example2 returned early\n"));
 ///
@@ -148,7 +148,7 @@ impl ForeignOwnable for () {
 /// In the example below, we need a mutable object (the vector) to be accessible within the log
 /// function, so we wrap it in the [`ScopeGuard`]:
 /// ```
-/// # use kernel::ScopeGuard;
+/// # use kernel::types::ScopeGuard;
 /// fn example3(arg: bool) -> Result {
 ///     let mut vec =
 ///         ScopeGuard::new_with_data(Vec::new(), |v| pr_info!("vec had {} elements\n", v.len()));

From a66d733da8010c732979041cd602cfceab7f587b Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Tue, 18 Jul 2023 07:27:51 +0200
Subject: [PATCH 0296/2047] rust: support running Rust documentation tests as
 KUnit ones

Rust has documentation tests: these are typically examples of
usage of any item (e.g. function, struct, module...).

They are very convenient because they are just written
alongside the documentation. For instance:

    /// Sums two numbers.
    ///
    /// ```
    /// assert_eq!(mymod::f(10, 20), 30);
    /// ```
    pub fn f(a: i32, b: i32) -> i32 {
        a + b
    }

In userspace, the tests are collected and run via `rustdoc`.
Using the tool as-is would be useful already, since it allows
to compile-test most tests (thus enforcing they are kept
in sync with the code they document) and run those that do not
depend on in-kernel APIs.

However, by transforming the tests into a KUnit test suite,
they can also be run inside the kernel. Moreover, the tests
get to be compiled as other Rust kernel objects instead of
targeting userspace.

On top of that, the integration with KUnit means the Rust
support gets to reuse the existing testing facilities. For
instance, the kernel log would look like:

    KTAP version 1
    1..1
        KTAP version 1
        # Subtest: rust_doctests_kernel
        1..59
        # rust_doctest_kernel_build_assert_rs_0.location: rust/kernel/build_assert.rs:13
        ok 1 rust_doctest_kernel_build_assert_rs_0
        # rust_doctest_kernel_build_assert_rs_1.location: rust/kernel/build_assert.rs:56
        ok 2 rust_doctest_kernel_build_assert_rs_1
        # rust_doctest_kernel_init_rs_0.location: rust/kernel/init.rs:122
        ok 3 rust_doctest_kernel_init_rs_0
        ...
        # rust_doctest_kernel_types_rs_2.location: rust/kernel/types.rs:150
        ok 59 rust_doctest_kernel_types_rs_2
    # rust_doctests_kernel: pass:59 fail:0 skip:0 total:59
    # Totals: pass:59 fail:0 skip:0 total:59
    ok 1 rust_doctests_kernel

Therefore, add support for running Rust documentation tests
in KUnit. Some other notes about the current implementation
and support follow.

The transformation is performed by a couple scripts written
as Rust hostprogs.

Tests using the `?` operator are also supported as usual, e.g.:

    /// ```
    /// # use kernel::{spawn_work_item, workqueue};
    /// spawn_work_item!(workqueue::system(), || pr_info!("x"))?;
    /// # Ok::<(), Error>(())
    /// ```

The tests are also compiled with Clippy under `CLIPPY=1`, just
like normal code, thus also benefitting from extra linting.

The names of the tests are currently automatically generated.
This allows to reduce the burden for documentation writers,
while keeping them fairly stable for bisection. This is an
improvement over the `rustdoc`-generated names, which include
the line number; but ideally we would like to get `rustdoc` to
provide the Rust item path and a number (for multiple examples
in a single documented Rust item).

In order for developers to easily see from which original line
a failed doctests came from, a KTAP diagnostic line is printed
to the log, containing the location (file and line) of the
original test (i.e. instead of the location in the generated
Rust file):

    # rust_doctest_kernel_types_rs_2.location: rust/kernel/types.rs:150

This line follows the syntax for declaring test metadata in the
proposed KTAP v2 spec [1], which may be used for the proposed
KUnit test attributes API [2]. Thus hopefully this will make
migration easier later on (suggested by David [3]).

The original line in that test attribute is figured out by
providing an anchor (suggested by Boqun [4]). The original file
is found by walking the filesystem, checking directory prefixes
to reduce the amount of combinations to check, and it is only
done once per file. Ambiguities are detected and reported.

A notable difference from KUnit C tests is that the Rust tests
appear to assert using the usual `assert!` and `assert_eq!`
macros from the Rust standard library (`core`). We provide
a custom version that forwards the call to KUnit instead.
Importantly, these macros do not require passing context,
unlike the KUnit C ones (i.e. `struct kunit *`). This makes
them easier to use, and readers of the documentation do not need
to care about which testing framework is used. In addition, it
may allow us to test third-party code more easily in the future.

However, a current limitation is that KUnit does not support
assertions in other tasks. Thus we presently simply print an
error to the kernel log if an assertion actually failed. This
should be revisited to properly fail the test, perhaps saving
the context somewhere else, or letting KUnit handle it.

Link: https://lore.kernel.org/lkml/20230420205734.1288498-1-rmoar@google.com/ [1]
Link: https://lore.kernel.org/linux-kselftest/20230707210947.1208717-1-rmoar@google.com/ [2]
Link: https://lore.kernel.org/rust-for-linux/CABVgOSkOLO-8v6kdAGpmYnZUb+LKOX0CtYCo-Bge7r_2YTuXDQ@mail.gmail.com/ [3]
Link: https://lore.kernel.org/rust-for-linux/ZIps86MbJF%2FiGIzd@boqun-archlinux/ [4]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 lib/Kconfig.debug               |  13 ++
 rust/.gitignore                 |   2 +
 rust/Makefile                   |  29 ++++
 rust/bindings/bindings_helper.h |   1 +
 rust/helpers.c                  |   7 +
 rust/kernel/kunit.rs            | 163 ++++++++++++++++++++
 rust/kernel/lib.rs              |   2 +
 scripts/.gitignore              |   2 +
 scripts/Makefile                |   4 +
 scripts/rustdoc_test_builder.rs |  72 +++++++++
 scripts/rustdoc_test_gen.rs     | 260 ++++++++++++++++++++++++++++++++
 11 files changed, 555 insertions(+)
 create mode 100644 rust/kernel/kunit.rs
 create mode 100644 scripts/rustdoc_test_builder.rs
 create mode 100644 scripts/rustdoc_test_gen.rs

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index fbc89baf7de6..550cb967b668 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -3010,6 +3010,19 @@ config RUST_BUILD_ASSERT_ALLOW
 
 	  If unsure, say N.
 
+config RUST_KERNEL_DOCTESTS
+	bool "Doctests for the `kernel` crate" if !KUNIT_ALL_TESTS
+	depends on RUST && KUNIT=y
+	default KUNIT_ALL_TESTS
+	help
+	  This builds the documentation tests of the `kernel` crate
+	  as KUnit tests.
+
+	  For more information on KUnit and unit tests in general,
+	  please refer to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  If unsure, say N.
+
 endmenu # "Rust"
 
 endmenu # Kernel hacking
diff --git a/rust/.gitignore b/rust/.gitignore
index 21552992b401..d3829ffab80b 100644
--- a/rust/.gitignore
+++ b/rust/.gitignore
@@ -2,6 +2,8 @@
 
 bindings_generated.rs
 bindings_helpers_generated.rs
+doctests_kernel_generated.rs
+doctests_kernel_generated_kunit.c
 uapi_generated.rs
 exports_*_generated.h
 doc/
diff --git a/rust/Makefile b/rust/Makefile
index 7c9d9f11aec5..92482144c0bb 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -27,6 +27,12 @@ endif
 
 obj-$(CONFIG_RUST) += exports.o
 
+always-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated.rs
+always-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated_kunit.c
+
+obj-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated.o
+obj-$(CONFIG_RUST_KERNEL_DOCTESTS) += doctests_kernel_generated_kunit.o
+
 # Avoids running `$(RUSTC)` for the sysroot when it may not be available.
 ifdef CONFIG_RUST
 
@@ -39,9 +45,11 @@ ifeq ($(quiet),silent_)
 cargo_quiet=-q
 rust_test_quiet=-q
 rustdoc_test_quiet=--test-args -q
+rustdoc_test_kernel_quiet=>/dev/null
 else ifeq ($(quiet),quiet_)
 rust_test_quiet=-q
 rustdoc_test_quiet=--test-args -q
+rustdoc_test_kernel_quiet=>/dev/null
 else
 cargo_quiet=--verbose
 endif
@@ -157,6 +165,27 @@ quiet_cmd_rustdoc_test = RUSTDOC T $<
 		-L$(objtree)/$(obj)/test --output $(objtree)/$(obj)/doc \
 		--crate-name $(subst rusttest-,,$@) $<
 
+quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $<
+      cmd_rustdoc_test_kernel = \
+	rm -rf $(objtree)/$(obj)/test/doctests/kernel; \
+	mkdir -p $(objtree)/$(obj)/test/doctests/kernel; \
+	OBJTREE=$(abspath $(objtree)) \
+	$(RUSTDOC) --test $(rust_flags) \
+		@$(objtree)/include/generated/rustc_cfg \
+		-L$(objtree)/$(obj) --extern alloc --extern kernel \
+		--extern build_error --extern macros \
+		--extern bindings --extern uapi \
+		--no-run --crate-name kernel -Zunstable-options \
+		--test-builder $(objtree)/scripts/rustdoc_test_builder \
+		$< $(rustdoc_test_kernel_quiet); \
+	$(objtree)/scripts/rustdoc_test_gen
+
+%/doctests_kernel_generated.rs %/doctests_kernel_generated_kunit.c: \
+    $(src)/kernel/lib.rs $(obj)/kernel.o \
+    $(objtree)/scripts/rustdoc_test_builder \
+    $(objtree)/scripts/rustdoc_test_gen FORCE
+	$(call if_changed,rustdoc_test_kernel)
+
 # We cannot use `-Zpanic-abort-tests` because some tests are dynamic,
 # so for the moment we skip `-Cpanic=abort`.
 quiet_cmd_rustc_test = RUSTC T  $<
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index 3e601ce2548d..0f8d37c31ac2 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -6,6 +6,7 @@
  * Sorted alphabetically.
  */
 
+#include <kunit/test.h>
 #include <linux/errname.h>
 #include <linux/slab.h>
 #include <linux/refcount.h>
diff --git a/rust/helpers.c b/rust/helpers.c
index bb594da56137..49a5e1a4f0ae 100644
--- a/rust/helpers.c
+++ b/rust/helpers.c
@@ -18,6 +18,7 @@
  * accidentally exposed.
  */
 
+#include <kunit/test-bug.h>
 #include <linux/bug.h>
 #include <linux/build_bug.h>
 #include <linux/err.h>
@@ -135,6 +136,12 @@ void rust_helper_put_task_struct(struct task_struct *t)
 }
 EXPORT_SYMBOL_GPL(rust_helper_put_task_struct);
 
+struct kunit *rust_helper_kunit_get_current_test(void)
+{
+	return kunit_get_current_test();
+}
+EXPORT_SYMBOL_GPL(rust_helper_kunit_get_current_test);
+
 /*
  * We use `bindgen`'s `--size_t-is-usize` option to bind the C `size_t` type
  * as the Rust `usize` type, so we can use it in contexts where Rust
diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs
new file mode 100644
index 000000000000..722655b2d62d
--- /dev/null
+++ b/rust/kernel/kunit.rs
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! KUnit-based macros for Rust unit tests.
+//!
+//! C header: [`include/kunit/test.h`](../../../../../include/kunit/test.h)
+//!
+//! Reference: <https://docs.kernel.org/dev-tools/kunit/index.html>
+
+use core::{ffi::c_void, fmt};
+
+/// Prints a KUnit error-level message.
+///
+/// Public but hidden since it should only be used from KUnit generated code.
+#[doc(hidden)]
+pub fn err(args: fmt::Arguments<'_>) {
+    // SAFETY: The format string is null-terminated and the `%pA` specifier matches the argument we
+    // are passing.
+    #[cfg(CONFIG_PRINTK)]
+    unsafe {
+        bindings::_printk(
+            b"\x013%pA\0".as_ptr() as _,
+            &args as *const _ as *const c_void,
+        );
+    }
+}
+
+/// Prints a KUnit info-level message.
+///
+/// Public but hidden since it should only be used from KUnit generated code.
+#[doc(hidden)]
+pub fn info(args: fmt::Arguments<'_>) {
+    // SAFETY: The format string is null-terminated and the `%pA` specifier matches the argument we
+    // are passing.
+    #[cfg(CONFIG_PRINTK)]
+    unsafe {
+        bindings::_printk(
+            b"\x016%pA\0".as_ptr() as _,
+            &args as *const _ as *const c_void,
+        );
+    }
+}
+
+/// Asserts that a boolean expression is `true` at runtime.
+///
+/// Public but hidden since it should only be used from generated tests.
+///
+/// Unlike the one in `core`, this one does not panic; instead, it is mapped to the KUnit
+/// facilities. See [`assert!`] for more details.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! kunit_assert {
+    ($name:literal, $file:literal, $diff:expr, $condition:expr $(,)?) => {
+        'out: {
+            // Do nothing if the condition is `true`.
+            if $condition {
+                break 'out;
+            }
+
+            static FILE: &'static $crate::str::CStr = $crate::c_str!($file);
+            static LINE: i32 = core::line!() as i32 - $diff;
+            static CONDITION: &'static $crate::str::CStr = $crate::c_str!(stringify!($condition));
+
+            // SAFETY: FFI call without safety requirements.
+            let kunit_test = unsafe { $crate::bindings::kunit_get_current_test() };
+            if kunit_test.is_null() {
+                // The assertion failed but this task is not running a KUnit test, so we cannot call
+                // KUnit, but at least print an error to the kernel log. This may happen if this
+                // macro is called from an spawned thread in a test (see
+                // `scripts/rustdoc_test_gen.rs`) or if some non-test code calls this macro by
+                // mistake (it is hidden to prevent that).
+                //
+                // This mimics KUnit's failed assertion format.
+                $crate::kunit::err(format_args!(
+                    "    # {}: ASSERTION FAILED at {FILE}:{LINE}\n",
+                    $name
+                ));
+                $crate::kunit::err(format_args!(
+                    "    Expected {CONDITION} to be true, but is false\n"
+                ));
+                $crate::kunit::err(format_args!(
+                    "    Failure not reported to KUnit since this is a non-KUnit task\n"
+                ));
+                break 'out;
+            }
+
+            #[repr(transparent)]
+            struct Location($crate::bindings::kunit_loc);
+
+            #[repr(transparent)]
+            struct UnaryAssert($crate::bindings::kunit_unary_assert);
+
+            // SAFETY: There is only a static instance and in that one the pointer field points to
+            // an immutable C string.
+            unsafe impl Sync for Location {}
+
+            // SAFETY: There is only a static instance and in that one the pointer field points to
+            // an immutable C string.
+            unsafe impl Sync for UnaryAssert {}
+
+            static LOCATION: Location = Location($crate::bindings::kunit_loc {
+                file: FILE.as_char_ptr(),
+                line: LINE,
+            });
+            static ASSERTION: UnaryAssert = UnaryAssert($crate::bindings::kunit_unary_assert {
+                assert: $crate::bindings::kunit_assert {},
+                condition: CONDITION.as_char_ptr(),
+                expected_true: true,
+            });
+
+            // SAFETY:
+            //   - FFI call.
+            //   - The `kunit_test` pointer is valid because we got it from
+            //     `kunit_get_current_test()` and it was not null. This means we are in a KUnit
+            //     test, and that the pointer can be passed to KUnit functions and assertions.
+            //   - The string pointers (`file` and `condition` above) point to null-terminated
+            //     strings since they are `CStr`s.
+            //   - The function pointer (`format`) points to the proper function.
+            //   - The pointers passed will remain valid since they point to `static`s.
+            //   - The format string is allowed to be null.
+            //   - There are, however, problems with this: first of all, this will end up stopping
+            //     the thread, without running destructors. While that is problematic in itself,
+            //     it is considered UB to have what is effectively a forced foreign unwind
+            //     with `extern "C"` ABI. One could observe the stack that is now gone from
+            //     another thread. We should avoid pinning stack variables to prevent library UB,
+            //     too. For the moment, given that test failures are reported immediately before the
+            //     next test runs, that test failures should be fixed and that KUnit is explicitly
+            //     documented as not suitable for production environments, we feel it is reasonable.
+            unsafe {
+                $crate::bindings::__kunit_do_failed_assertion(
+                    kunit_test,
+                    core::ptr::addr_of!(LOCATION.0),
+                    $crate::bindings::kunit_assert_type_KUNIT_ASSERTION,
+                    core::ptr::addr_of!(ASSERTION.0.assert),
+                    Some($crate::bindings::kunit_unary_assert_format),
+                    core::ptr::null(),
+                );
+            }
+
+            // SAFETY: FFI call; the `test` pointer is valid because this hidden macro should only
+            // be called by the generated documentation tests which forward the test pointer given
+            // by KUnit.
+            unsafe {
+                $crate::bindings::__kunit_abort(kunit_test);
+            }
+        }
+    };
+}
+
+/// Asserts that two expressions are equal to each other (using [`PartialEq`]).
+///
+/// Public but hidden since it should only be used from generated tests.
+///
+/// Unlike the one in `core`, this one does not panic; instead, it is mapped to the KUnit
+/// facilities. See [`assert!`] for more details.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! kunit_assert_eq {
+    ($name:literal, $file:literal, $diff:expr, $left:expr, $right:expr $(,)?) => {{
+        // For the moment, we just forward to the expression assert because, for binary asserts,
+        // KUnit supports only a few types (e.g. integers).
+        $crate::kunit_assert!($name, $file, $diff, $left == $right);
+    }};
+}
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 85b261209977..3642cadc34b1 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -34,6 +34,8 @@ mod build_assert;
 pub mod error;
 pub mod init;
 pub mod ioctl;
+#[cfg(CONFIG_KUNIT)]
+pub mod kunit;
 pub mod prelude;
 pub mod print;
 mod static_assert;
diff --git a/scripts/.gitignore b/scripts/.gitignore
index 6e9ce6720a05..3dbb8bb2457b 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -5,6 +5,8 @@
 /kallsyms
 /module.lds
 /recordmcount
+/rustdoc_test_builder
+/rustdoc_test_gen
 /sign-file
 /sorttable
 /target.json
diff --git a/scripts/Makefile b/scripts/Makefile
index 32b6ba722728..576cf64be667 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -9,6 +9,8 @@ hostprogs-always-$(CONFIG_BUILDTIME_TABLE_SORT)		+= sorttable
 hostprogs-always-$(CONFIG_ASN1)				+= asn1_compiler
 hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT)		+= sign-file
 hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE)	+= insert-sys-cert
+hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS)		+= rustdoc_test_builder
+hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS)		+= rustdoc_test_gen
 always-$(CONFIG_RUST)					+= target.json
 
 filechk_rust_target = $< < include/config/auto.conf
@@ -18,6 +20,8 @@ $(obj)/target.json: scripts/generate_rust_target include/config/auto.conf FORCE
 
 hostprogs += generate_rust_target
 generate_rust_target-rust := y
+rustdoc_test_builder-rust := y
+rustdoc_test_gen-rust := y
 
 HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include
 HOSTLDLIBS_sorttable = -lpthread
diff --git a/scripts/rustdoc_test_builder.rs b/scripts/rustdoc_test_builder.rs
new file mode 100644
index 000000000000..e5894652f12c
--- /dev/null
+++ b/scripts/rustdoc_test_builder.rs
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Test builder for `rustdoc`-generated tests.
+//!
+//! This script is a hack to extract the test from `rustdoc`'s output. Ideally, `rustdoc` would
+//! have an option to generate this information instead, e.g. as JSON output.
+//!
+//! The `rustdoc`-generated test names look like `{file}_{line}_{number}`, e.g.
+//! `...path_rust_kernel_sync_arc_rs_42_0`. `number` is the "test number", needed in cases like
+//! a macro that expands into items with doctests is invoked several times within the same line.
+//!
+//! However, since these names are used for bisection in CI, the line number makes it not stable
+//! at all. In the future, we would like `rustdoc` to give us the Rust item path associated with
+//! the test, plus a "test number" (for cases with several examples per item) and generate a name
+//! from that. For the moment, we generate ourselves a new name, `{file}_{number}` instead, in
+//! the `gen` script (done there since we need to be aware of all the tests in a given file).
+
+use std::io::Read;
+
+fn main() {
+    let mut stdin = std::io::stdin().lock();
+    let mut body = String::new();
+    stdin.read_to_string(&mut body).unwrap();
+
+    // Find the generated function name looking for the inner function inside `main()`.
+    //
+    // The line we are looking for looks like one of the following:
+    //
+    // ```
+    // fn main() { #[allow(non_snake_case)] fn _doctest_main_rust_kernel_file_rs_28_0() {
+    // fn main() { #[allow(non_snake_case)] fn _doctest_main_rust_kernel_file_rs_37_0() -> Result<(), impl core::fmt::Debug> {
+    // ```
+    //
+    // It should be unlikely that doctest code matches such lines (when code is formatted properly).
+    let rustdoc_function_name = body
+        .lines()
+        .find_map(|line| {
+            Some(
+                line.split_once("fn main() {")?
+                    .1
+                    .split_once("fn ")?
+                    .1
+                    .split_once("()")?
+                    .0,
+            )
+            .filter(|x| x.chars().all(|c| c.is_alphanumeric() || c == '_'))
+        })
+        .expect("No test function found in `rustdoc`'s output.");
+
+    // Qualify `Result` to avoid the collision with our own `Result` coming from the prelude.
+    let body = body.replace(
+        &format!("{rustdoc_function_name}() -> Result<(), impl core::fmt::Debug> {{"),
+        &format!("{rustdoc_function_name}() -> core::result::Result<(), impl core::fmt::Debug> {{"),
+    );
+
+    // For tests that get generated with `Result`, like above, `rustdoc` generates an `unwrap()` on
+    // the return value to check there were no returned errors. Instead, we use our assert macro
+    // since we want to just fail the test, not panic the kernel.
+    //
+    // We save the result in a variable so that the failed assertion message looks nicer.
+    let body = body.replace(
+        &format!("}} {rustdoc_function_name}().unwrap() }}"),
+        &format!("}} let test_return_value = {rustdoc_function_name}(); assert!(test_return_value.is_ok()); }}"),
+    );
+
+    // Figure out a smaller test name based on the generated function name.
+    let name = rustdoc_function_name.split_once("_rust_kernel_").unwrap().1;
+
+    let path = format!("rust/test/doctests/kernel/{name}");
+
+    std::fs::write(path, body.as_bytes()).unwrap();
+}
diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs
new file mode 100644
index 000000000000..5ebd42ae4a3f
--- /dev/null
+++ b/scripts/rustdoc_test_gen.rs
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Generates KUnit tests from saved `rustdoc`-generated tests.
+//!
+//! KUnit passes a context (`struct kunit *`) to each test, which should be forwarded to the other
+//! KUnit functions and macros.
+//!
+//! However, we want to keep this as an implementation detail because:
+//!
+//!   - Test code should not care about the implementation.
+//!
+//!   - Documentation looks worse if it needs to carry extra details unrelated to the piece
+//!     being described.
+//!
+//!   - Test code should be able to define functions and call them, without having to carry
+//!     the context.
+//!
+//!   - Later on, we may want to be able to test non-kernel code (e.g. `core`, `alloc` or
+//!     third-party crates) which likely use the standard library `assert*!` macros.
+//!
+//! For this reason, instead of the passed context, `kunit_get_current_test()` is used instead
+//! (i.e. `current->kunit_test`).
+//!
+//! Note that this means other threads/tasks potentially spawned by a given test, if failing, will
+//! report the failure in the kernel log but will not fail the actual test. Saving the pointer in
+//! e.g. a `static` per test does not fully solve the issue either, because currently KUnit does
+//! not support assertions (only expectations) from other tasks. Thus leave that feature for
+//! the future, which simplifies the code here too. We could also simply not allow `assert`s in
+//! other tasks, but that seems overly constraining, and we do want to support them, eventually.
+
+use std::{
+    fs,
+    fs::File,
+    io::{BufWriter, Read, Write},
+    path::{Path, PathBuf},
+};
+
+/// Find the real path to the original file based on the `file` portion of the test name.
+///
+/// `rustdoc` generated `file`s look like `sync_locked_by_rs`. Underscores (except the last one)
+/// may represent an actual underscore in a directory/file, or a path separator. Thus the actual
+/// file might be `sync_locked_by.rs`, `sync/locked_by.rs`, `sync_locked/by.rs` or
+/// `sync/locked/by.rs`. This function walks the file system to determine which is the real one.
+///
+/// This does require that ambiguities do not exist, but that seems fair, especially since this is
+/// all supposed to be temporary until `rustdoc` gives us proper metadata to build this. If such
+/// ambiguities are detected, they are diagnosed and the script panics.
+fn find_real_path<'a>(srctree: &Path, valid_paths: &'a mut Vec<PathBuf>, file: &str) -> &'a str {
+    valid_paths.clear();
+
+    let potential_components: Vec<&str> = file.strip_suffix("_rs").unwrap().split('_').collect();
+
+    find_candidates(srctree, valid_paths, Path::new(""), &potential_components);
+    fn find_candidates(
+        srctree: &Path,
+        valid_paths: &mut Vec<PathBuf>,
+        prefix: &Path,
+        potential_components: &[&str],
+    ) {
+        // The base case: check whether all the potential components left, joined by underscores,
+        // is a file.
+        let joined_potential_components = potential_components.join("_") + ".rs";
+        if srctree
+            .join("rust/kernel")
+            .join(prefix)
+            .join(&joined_potential_components)
+            .is_file()
+        {
+            // Avoid `srctree` here in order to keep paths relative to it in the KTAP output.
+            valid_paths.push(
+                Path::new("rust/kernel")
+                    .join(prefix)
+                    .join(joined_potential_components),
+            );
+        }
+
+        // In addition, check whether each component prefix, joined by underscores, is a directory.
+        // If not, there is no need to check for combinations with that prefix.
+        for i in 1..potential_components.len() {
+            let (components_prefix, components_rest) = potential_components.split_at(i);
+            let prefix = prefix.join(components_prefix.join("_"));
+            if srctree.join("rust/kernel").join(&prefix).is_dir() {
+                find_candidates(srctree, valid_paths, &prefix, components_rest);
+            }
+        }
+    }
+
+    assert!(
+        valid_paths.len() > 0,
+        "No path candidates found. This is likely a bug in the build system, or some files went \
+        away while compiling."
+    );
+
+    if valid_paths.len() > 1 {
+        eprintln!("Several path candidates found:");
+        for path in valid_paths {
+            eprintln!("    {path:?}");
+        }
+        panic!(
+            "Several path candidates found, please resolve the ambiguity by renaming a file or \
+            folder."
+        );
+    }
+
+    valid_paths[0].to_str().unwrap()
+}
+
+fn main() {
+    let srctree = std::env::var("srctree").unwrap();
+    let srctree = Path::new(&srctree);
+
+    let mut paths = fs::read_dir("rust/test/doctests/kernel")
+        .unwrap()
+        .map(|entry| entry.unwrap().path())
+        .collect::<Vec<_>>();
+
+    // Sort paths.
+    paths.sort();
+
+    let mut rust_tests = String::new();
+    let mut c_test_declarations = String::new();
+    let mut c_test_cases = String::new();
+    let mut body = String::new();
+    let mut last_file = String::new();
+    let mut number = 0;
+    let mut valid_paths: Vec<PathBuf> = Vec::new();
+    let mut real_path: &str = "";
+    for path in paths {
+        // The `name` follows the `{file}_{line}_{number}` pattern (see description in
+        // `scripts/rustdoc_test_builder.rs`). Discard the `number`.
+        let name = path.file_name().unwrap().to_str().unwrap().to_string();
+
+        // Extract the `file` and the `line`, discarding the `number`.
+        let (file, line) = name.rsplit_once('_').unwrap().0.rsplit_once('_').unwrap();
+
+        // Generate an ID sequence ("test number") for each one in the file.
+        if file == last_file {
+            number += 1;
+        } else {
+            number = 0;
+            last_file = file.to_string();
+
+            // Figure out the real path, only once per file.
+            real_path = find_real_path(srctree, &mut valid_paths, file);
+        }
+
+        // Generate a KUnit name (i.e. test name and C symbol) for this test.
+        //
+        // We avoid the line number, like `rustdoc` does, to make things slightly more stable for
+        // bisection purposes. However, to aid developers in mapping back what test failed, we will
+        // print a diagnostics line in the KTAP report.
+        let kunit_name = format!("rust_doctest_kernel_{file}_{number}");
+
+        // Read the test's text contents to dump it below.
+        body.clear();
+        File::open(path).unwrap().read_to_string(&mut body).unwrap();
+
+        // Calculate how many lines before `main` function (including the `main` function line).
+        let body_offset = body
+            .lines()
+            .take_while(|line| !line.contains("fn main() {"))
+            .count()
+            + 1;
+
+        use std::fmt::Write;
+        write!(
+            rust_tests,
+            r#"/// Generated `{name}` KUnit test case from a Rust documentation test.
+#[no_mangle]
+pub extern "C" fn {kunit_name}(__kunit_test: *mut kernel::bindings::kunit) {{
+    /// Overrides the usual [`assert!`] macro with one that calls KUnit instead.
+    #[allow(unused)]
+    macro_rules! assert {{
+        ($cond:expr $(,)?) => {{{{
+            kernel::kunit_assert!("{kunit_name}", "{real_path}", __DOCTEST_ANCHOR - {line}, $cond);
+        }}}}
+    }}
+
+    /// Overrides the usual [`assert_eq!`] macro with one that calls KUnit instead.
+    #[allow(unused)]
+    macro_rules! assert_eq {{
+        ($left:expr, $right:expr $(,)?) => {{{{
+            kernel::kunit_assert_eq!("{kunit_name}", "{real_path}", __DOCTEST_ANCHOR - {line}, $left, $right);
+        }}}}
+    }}
+
+    // Many tests need the prelude, so provide it by default.
+    #[allow(unused)]
+    use kernel::prelude::*;
+
+    // Unconditionally print the location of the original doctest (i.e. rather than the location in
+    // the generated file) so that developers can easily map the test back to the source code.
+    //
+    // This information is also printed when assertions fail, but this helps in the successful cases
+    // when the user is running KUnit manually, or when passing `--raw_output` to `kunit.py`.
+    //
+    // This follows the syntax for declaring test metadata in the proposed KTAP v2 spec, which may
+    // be used for the proposed KUnit test attributes API. Thus hopefully this will make migration
+    // easier later on.
+    kernel::kunit::info(format_args!("    # {kunit_name}.location: {real_path}:{line}\n"));
+
+    /// The anchor where the test code body starts.
+    #[allow(unused)]
+    static __DOCTEST_ANCHOR: i32 = core::line!() as i32 + {body_offset} + 1;
+    {{
+        {body}
+        main();
+    }}
+}}
+
+"#
+        )
+        .unwrap();
+
+        write!(c_test_declarations, "void {kunit_name}(struct kunit *);\n").unwrap();
+        write!(c_test_cases, "    KUNIT_CASE({kunit_name}),\n").unwrap();
+    }
+
+    let rust_tests = rust_tests.trim();
+    let c_test_declarations = c_test_declarations.trim();
+    let c_test_cases = c_test_cases.trim();
+
+    write!(
+        BufWriter::new(File::create("rust/doctests_kernel_generated.rs").unwrap()),
+        r#"//! `kernel` crate documentation tests.
+
+const __LOG_PREFIX: &[u8] = b"rust_doctests_kernel\0";
+
+{rust_tests}
+"#
+    )
+    .unwrap();
+
+    write!(
+        BufWriter::new(File::create("rust/doctests_kernel_generated_kunit.c").unwrap()),
+        r#"/*
+ * `kernel` crate documentation tests.
+ */
+
+#include <kunit/test.h>
+
+{c_test_declarations}
+
+static struct kunit_case test_cases[] = {{
+    {c_test_cases}
+    {{ }}
+}};
+
+static struct kunit_suite test_suite = {{
+    .name = "rust_doctests_kernel",
+    .test_cases = test_cases,
+}};
+
+kunit_test_suite(test_suite);
+
+MODULE_LICENSE("GPL");
+"#
+    )
+    .unwrap();
+}

From 64bd4641310c41a1ecf07c13c67bc0ed61045dfd Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Tue, 18 Jul 2023 07:27:52 +0200
Subject: [PATCH 0297/2047] MAINTAINERS: add Rust KUnit files to the KUnit
 entry

The KUnit maintainers would like to maintain these files on their
side too (thanks!), so add them to their entry.

With this in place, `scripts/get_maintainer.pl` prints both sets
of maintainers/reviewers (i.e. KUnit and Rust) for those files,
which is the behavior we are looking for.

Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Vincenzo Palazzo <vincenzopalazzodev@gmail.com>
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3be1bdfe8ecc..2a942fe59144 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11372,6 +11372,8 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git k
 F:	Documentation/dev-tools/kunit/
 F:	include/kunit/
 F:	lib/kunit/
+F:	rust/kernel/kunit.rs
+F:	scripts/rustdoc_test_*
 F:	tools/testing/kunit/
 
 KERNEL USERMODE HELPER

From 6716f4d39c17febf7aa4fa5f5923da67a8d10e85 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 16 May 2023 03:41:37 -0700
Subject: [PATCH 0298/2047] rcu: Update synchronize_rcu_mult() comment for
 call_rcu_hurry()

Those who have worked with RCU for some time will naturally think in
terms of the long-standing call_rcu() API rather than the much newer
call_rcu_hurry() API.  But it is call_rcu_hurry() that you should normally
pass to synchronize_rcu_mult().  This commit therefore updates the header
comment to point this out.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 include/linux/rcupdate_wait.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index 699b938358bf..5e0f74f2f8ca 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -42,6 +42,11 @@ do {									\
  * call_srcu() function, with this wrapper supplying the pointer to the
  * corresponding srcu_struct.
  *
+ * Note that call_rcu_hurry() should be used instead of call_rcu()
+ * because in kernels built with CONFIG_RCU_LAZY=y the delay between the
+ * invocation of call_rcu() and that of the corresponding RCU callback
+ * can be multiple seconds.
+ *
  * The first argument tells Tiny RCU's _wait_rcu_gp() not to
  * bother waiting for RCU.  The reason for this is because anywhere
  * synchronize_rcu_mult() can be called is automatically already a full

From c924bf5a43e47d5591d527d39b474f8ca9e63c0e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 23 May 2023 02:22:10 -0700
Subject: [PATCH 0299/2047] rcu: Clarify rcu_is_watching() kernel-doc comment

Make it clear that this function always returns either true or false
without other planned failure modes.

Reported-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/rcu/tree.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1449cb69a0e0..f788981272f8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -677,12 +677,16 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
 }
 
 /**
- * rcu_is_watching - see if RCU thinks that the current CPU is not idle
+ * rcu_is_watching - RCU read-side critical sections permitted on current CPU?
  *
- * Return true if RCU is watching the running CPU, which means that this
- * CPU can safely enter RCU read-side critical sections.  In other words,
- * if the current CPU is not in its idle loop or is in an interrupt or
- * NMI handler, return true.
+ * Return @true if RCU is watching the running CPU and @false otherwise.
+ * An @true return means that this CPU can safely enter RCU read-side
+ * critical sections.
+ *
+ * Although calls to rcu_is_watching() from most parts of the kernel
+ * will return @true, there are important exceptions.  For example, if the
+ * current CPU is deep within its idle loop, in kernel entry/exit code,
+ * or offline, rcu_is_watching() will return @false.
  *
  * Make notrace because it can be called by the internal functions of
  * ftrace, and making this notrace removes unnecessary recursion calls.

From 2ce8284c31156c432df60d4497ec68cca04e128f Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jul 2023 08:30:43 -0600
Subject: [PATCH 0300/2047] power: Explicitly include correct DT includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Acked-by: David Lechner <david@lechnology.com>
Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/reset/as3722-poweroff.c            | 1 -
 drivers/power/reset/brcm-kona-reset.c            | 4 ++--
 drivers/power/reset/gpio-poweroff.c              | 3 ++-
 drivers/power/reset/gpio-restart.c               | 2 +-
 drivers/power/reset/keystone-reset.c             | 3 ++-
 drivers/power/reset/ocelot-reset.c               | 4 ++--
 drivers/power/reset/odroid-go-ultra-poweroff.c   | 3 ++-
 drivers/power/reset/oxnas-restart.c              | 2 --
 drivers/power/reset/st-poweroff.c                | 2 +-
 drivers/power/reset/syscon-poweroff.c            | 3 +--
 drivers/power/reset/syscon-reboot.c              | 3 +--
 drivers/power/reset/xgene-reboot.c               | 2 +-
 drivers/power/supply/axp20x_ac_power.c           | 1 -
 drivers/power/supply/axp20x_battery.c            | 1 -
 drivers/power/supply/axp20x_usb_power.c          | 1 -
 drivers/power/supply/cpcap-battery.c             | 2 +-
 drivers/power/supply/da9150-charger.c            | 2 --
 drivers/power/supply/da9150-fg.c                 | 1 -
 drivers/power/supply/lego_ev3_battery.c          | 2 +-
 drivers/power/supply/ltc2941-battery-gauge.c     | 2 +-
 drivers/power/supply/ltc4162-l-charger.c         | 2 +-
 drivers/power/supply/max14656_charger_detector.c | 2 +-
 drivers/power/supply/max17040_battery.c          | 2 +-
 drivers/power/supply/max8903_charger.c           | 1 -
 drivers/power/supply/rn5t618_power.c             | 1 -
 drivers/power/supply/rt5033_charger.c            | 2 +-
 drivers/power/supply/rt9455_charger.c            | 3 +--
 drivers/power/supply/sbs-battery.c               | 2 +-
 drivers/power/supply/tps65090-charger.c          | 2 +-
 drivers/power/supply/tps65217_charger.c          | 1 -
 drivers/power/supply/twl4030_charger.c           | 1 +
 31 files changed, 26 insertions(+), 37 deletions(-)

diff --git a/drivers/power/reset/as3722-poweroff.c b/drivers/power/reset/as3722-poweroff.c
index 80edff1a556f..829e0dba2fda 100644
--- a/drivers/power/reset/as3722-poweroff.c
+++ b/drivers/power/reset/as3722-poweroff.c
@@ -10,7 +10,6 @@
 #include <linux/mfd/as3722.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
diff --git a/drivers/power/reset/brcm-kona-reset.c b/drivers/power/reset/brcm-kona-reset.c
index 3de024e3ceb7..64e241c77f83 100644
--- a/drivers/power/reset/brcm-kona-reset.c
+++ b/drivers/power/reset/brcm-kona-reset.c
@@ -2,8 +2,8 @@
 // Copyright (C) 2016 Broadcom
 
 #include <linux/io.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 #include <linux/reboot.h>
 
 #define RSTMGR_REG_WR_ACCESS_OFFSET	0
diff --git a/drivers/power/reset/gpio-poweroff.c b/drivers/power/reset/gpio-poweroff.c
index 84b3c3528afa..b28f24da1b3c 100644
--- a/drivers/power/reset/gpio-poweroff.c
+++ b/drivers/power/reset/gpio-poweroff.c
@@ -11,8 +11,9 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/gpio/consumer.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 
 #define DEFAULT_TIMEOUT_MS 3000
diff --git a/drivers/power/reset/gpio-restart.c b/drivers/power/reset/gpio-restart.c
index a479d3536eb1..3aa19765772d 100644
--- a/drivers/power/reset/gpio-restart.c
+++ b/drivers/power/reset/gpio-restart.c
@@ -12,8 +12,8 @@
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/gpio/consumer.h>
-#include <linux/of_platform.h>
 #include <linux/module.h>
+#include <linux/of.h>
 
 struct gpio_restart {
 	struct gpio_desc *reset_gpio;
diff --git a/drivers/power/reset/keystone-reset.c b/drivers/power/reset/keystone-reset.c
index 83a4e1c9bf94..dbc4ff61cd74 100644
--- a/drivers/power/reset/keystone-reset.c
+++ b/drivers/power/reset/keystone-reset.c
@@ -10,10 +10,11 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
+#include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 
 #define RSTYPE_RG			0x0
 #define RSCTRL_RG			0x4
diff --git a/drivers/power/reset/ocelot-reset.c b/drivers/power/reset/ocelot-reset.c
index 8caa90cb58fc..44c7730086e4 100644
--- a/drivers/power/reset/ocelot-reset.c
+++ b/drivers/power/reset/ocelot-reset.c
@@ -8,10 +8,10 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/notifier.h>
+#include <linux/mod_devicetable.h>
 #include <linux/mfd/syscon.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/reboot.h>
 #include <linux/regmap.h>
 
diff --git a/drivers/power/reset/odroid-go-ultra-poweroff.c b/drivers/power/reset/odroid-go-ultra-poweroff.c
index f46271da4e8e..9cac7aef77f0 100644
--- a/drivers/power/reset/odroid-go-ultra-poweroff.c
+++ b/drivers/power/reset/odroid-go-ultra-poweroff.c
@@ -4,7 +4,8 @@
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/mfd/rk808.h>
 #include <linux/regmap.h>
 #include <linux/module.h>
diff --git a/drivers/power/reset/oxnas-restart.c b/drivers/power/reset/oxnas-restart.c
index 13090bec058a..dd321caa28b6 100644
--- a/drivers/power/reset/oxnas-restart.c
+++ b/drivers/power/reset/oxnas-restart.c
@@ -13,8 +13,6 @@
 #include <linux/io.h>
 #include <linux/notifier.h>
 #include <linux/mfd/syscon.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/regmap.h>
diff --git a/drivers/power/reset/st-poweroff.c b/drivers/power/reset/st-poweroff.c
index 5ccaacffab54..56ba21873882 100644
--- a/drivers/power/reset/st-poweroff.c
+++ b/drivers/power/reset/st-poweroff.c
@@ -9,7 +9,7 @@
 
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/syscon.h>
 #include <linux/reboot.h>
diff --git a/drivers/power/reset/syscon-poweroff.c b/drivers/power/reset/syscon-poweroff.c
index ed58bdf41e27..430d440d55c6 100644
--- a/drivers/power/reset/syscon-poweroff.c
+++ b/drivers/power/reset/syscon-poweroff.c
@@ -10,8 +10,7 @@
 #include <linux/io.h>
 #include <linux/notifier.h>
 #include <linux/mfd/syscon.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/regmap.h>
diff --git a/drivers/power/reset/syscon-reboot.c b/drivers/power/reset/syscon-reboot.c
index 45e34e6885f7..4d622c19bc48 100644
--- a/drivers/power/reset/syscon-reboot.c
+++ b/drivers/power/reset/syscon-reboot.c
@@ -9,8 +9,7 @@
 #include <linux/io.h>
 #include <linux/notifier.h>
 #include <linux/mfd/syscon.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/regmap.h>
diff --git a/drivers/power/reset/xgene-reboot.c b/drivers/power/reset/xgene-reboot.c
index 0b0d2fd2bd0c..3260bd93158e 100644
--- a/drivers/power/reset/xgene-reboot.c
+++ b/drivers/power/reset/xgene-reboot.c
@@ -13,7 +13,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/notifier.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
diff --git a/drivers/power/supply/axp20x_ac_power.c b/drivers/power/supply/axp20x_ac_power.c
index 57e50208d537..19a118633115 100644
--- a/drivers/power/supply/axp20x_ac_power.c
+++ b/drivers/power/supply/axp20x_ac_power.c
@@ -13,7 +13,6 @@
 #include <linux/mfd/axp20x.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/power_supply.h>
diff --git a/drivers/power/supply/axp20x_battery.c b/drivers/power/supply/axp20x_battery.c
index 9106077c0dbb..6ac5c80cfda2 100644
--- a/drivers/power/supply/axp20x_battery.c
+++ b/drivers/power/supply/axp20x_battery.c
@@ -22,7 +22,6 @@
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/regmap.h>
diff --git a/drivers/power/supply/axp20x_usb_power.c b/drivers/power/supply/axp20x_usb_power.c
index 51c3f9b6458d..bde17406c130 100644
--- a/drivers/power/supply/axp20x_usb_power.c
+++ b/drivers/power/supply/axp20x_usb_power.c
@@ -15,7 +15,6 @@
 #include <linux/mfd/axp20x.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/power_supply.h>
diff --git a/drivers/power/supply/cpcap-battery.c b/drivers/power/supply/cpcap-battery.c
index d98d9244e394..5dd76c0ac98d 100644
--- a/drivers/power/supply/cpcap-battery.c
+++ b/drivers/power/supply/cpcap-battery.c
@@ -15,7 +15,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/reboot.h>
diff --git a/drivers/power/supply/da9150-charger.c b/drivers/power/supply/da9150-charger.c
index a87aeaea38e1..27f897067aa3 100644
--- a/drivers/power/supply/da9150-charger.c
+++ b/drivers/power/supply/da9150-charger.c
@@ -11,8 +11,6 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/interrupt.h>
 #include <linux/power_supply.h>
 #include <linux/notifier.h>
diff --git a/drivers/power/supply/da9150-fg.c b/drivers/power/supply/da9150-fg.c
index 8c5e2c49d6c1..652c1f213af1 100644
--- a/drivers/power/supply/da9150-fg.c
+++ b/drivers/power/supply/da9150-fg.c
@@ -11,7 +11,6 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
diff --git a/drivers/power/supply/lego_ev3_battery.c b/drivers/power/supply/lego_ev3_battery.c
index ccb00be38e2c..9085de0ae1b2 100644
--- a/drivers/power/supply/lego_ev3_battery.c
+++ b/drivers/power/supply/lego_ev3_battery.c
@@ -20,7 +20,7 @@
 #include <linux/iio/types.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 
diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c
index cad39a8f829d..a1ddc4b060ce 100644
--- a/drivers/power/supply/ltc2941-battery-gauge.c
+++ b/drivers/power/supply/ltc2941-battery-gauge.c
@@ -11,7 +11,7 @@
 #include <linux/devm-helpers.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/swab.h>
diff --git a/drivers/power/supply/ltc4162-l-charger.c b/drivers/power/supply/ltc4162-l-charger.c
index ec5b4a20ad43..f0eace731480 100644
--- a/drivers/power/supply/ltc4162-l-charger.c
+++ b/drivers/power/supply/ltc4162-l-charger.c
@@ -6,7 +6,7 @@
 
 #include <linux/module.h>
 #include <linux/delay.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/pm_runtime.h>
 #include <linux/power_supply.h>
 #include <linux/i2c.h>
diff --git a/drivers/power/supply/max14656_charger_detector.c b/drivers/power/supply/max14656_charger_detector.c
index fbdf12cf64eb..89f2af72dfcd 100644
--- a/drivers/power/supply/max14656_charger_detector.c
+++ b/drivers/power/supply/max14656_charger_detector.c
@@ -14,8 +14,8 @@
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
 #include <linux/slab.h>
-#include <linux/of_device.h>
 #include <linux/workqueue.h>
 #include <linux/power_supply.h>
 #include <linux/devm-helpers.h>
diff --git a/drivers/power/supply/max17040_battery.c b/drivers/power/supply/max17040_battery.c
index 22ea7de47a53..ff42db672899 100644
--- a/drivers/power/supply/max17040_battery.c
+++ b/drivers/power/supply/max17040_battery.c
@@ -15,7 +15,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/power_supply.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
 
diff --git a/drivers/power/supply/max8903_charger.c b/drivers/power/supply/max8903_charger.c
index 54d50b55fbae..e65d0141f260 100644
--- a/drivers/power/supply/max8903_charger.c
+++ b/drivers/power/supply/max8903_charger.c
@@ -10,7 +10,6 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/slab.h>
 #include <linux/power_supply.h>
 #include <linux/platform_device.h>
diff --git a/drivers/power/supply/rn5t618_power.c b/drivers/power/supply/rn5t618_power.c
index a5e09ac78a50..ebea3522a2ac 100644
--- a/drivers/power/supply/rn5t618_power.c
+++ b/drivers/power/supply/rn5t618_power.c
@@ -14,7 +14,6 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/mfd/rn5t618.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/regmap.h>
diff --git a/drivers/power/supply/rt5033_charger.c b/drivers/power/supply/rt5033_charger.c
index 5218dfbf5e1b..c0c516f22c66 100644
--- a/drivers/power/supply/rt5033_charger.c
+++ b/drivers/power/supply/rt5033_charger.c
@@ -6,8 +6,8 @@
  * Author: Beomho Seo <beomho.seo@samsung.com>
  */
 
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/regmap.h>
diff --git a/drivers/power/supply/rt9455_charger.c b/drivers/power/supply/rt9455_charger.c
index ff4dcf77c788..c345a77f9f78 100644
--- a/drivers/power/supply/rt9455_charger.c
+++ b/drivers/power/supply/rt9455_charger.c
@@ -8,8 +8,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
-#include <linux/of_irq.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/pm_runtime.h>
 #include <linux/power_supply.h>
 #include <linux/i2c.h>
diff --git a/drivers/power/supply/sbs-battery.c b/drivers/power/supply/sbs-battery.c
index 62a354e01204..cdfc8466d129 100644
--- a/drivers/power/supply/sbs-battery.c
+++ b/drivers/power/supply/sbs-battery.c
@@ -16,7 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/property.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/power/sbs-battery.h>
 #include <linux/power_supply.h>
 #include <linux/slab.h>
diff --git a/drivers/power/supply/tps65090-charger.c b/drivers/power/supply/tps65090-charger.c
index 0990b2fa6cd8..f96c705e0a9f 100644
--- a/drivers/power/supply/tps65090-charger.c
+++ b/drivers/power/supply/tps65090-charger.c
@@ -13,7 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/slab.h>
diff --git a/drivers/power/supply/tps65217_charger.c b/drivers/power/supply/tps65217_charger.c
index a4bc9f2a10bc..96341cbde4fa 100644
--- a/drivers/power/supply/tps65217_charger.c
+++ b/drivers/power/supply/tps65217_charger.c
@@ -17,7 +17,6 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/power_supply.h>
 
 #include <linux/mfd/core.h>
diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c
index 7adfd69fe649..e78d061d8d32 100644
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/mfd/twl.h>

From da28e18d6a31448a5f67e59e835669e930f8c044 Mon Sep 17 00:00:00 2001
From: Ricardo Rivera-Matos <rriveram@opensource.cirrus.com>
Date: Fri, 14 Jul 2023 12:45:29 -0500
Subject: [PATCH 0301/2047] power: supply: bq24190: Considers FORCE_20PCT when
 getting IPRECHG

Adds a check of the FORCE_20PCT bit when getting the precharge
current value.

According to the bit description for the FORCE_20PCT bit, when
FORCE_20PCT is true, the precharge current target is 50% of
what is configured in the IPRECHG bit field.

Signed-off-by: Ricardo Rivera-Matos <rriveram@opensource.cirrus.com>
Reviewed-by: David Rhodes <drhodes@opensource.cirrus.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq24190_charger.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c
index ef8235848f56..3f99cb9590ba 100644
--- a/drivers/power/supply/bq24190_charger.c
+++ b/drivers/power/supply/bq24190_charger.c
@@ -965,7 +965,7 @@ static int bq24190_charger_get_precharge(struct bq24190_dev_info *bdi,
 		union power_supply_propval *val)
 {
 	u8 v;
-	int ret;
+	int curr, ret;
 
 	ret = bq24190_read_mask(bdi, BQ24190_REG_PCTCC,
 			BQ24190_REG_PCTCC_IPRECHG_MASK,
@@ -973,7 +973,20 @@ static int bq24190_charger_get_precharge(struct bq24190_dev_info *bdi,
 	if (ret < 0)
 		return ret;
 
-	val->intval = ++v * 128 * 1000;
+	curr = ++v * 128 * 1000;
+
+	ret = bq24190_read_mask(bdi, BQ24190_REG_CCC,
+			BQ24190_REG_CCC_FORCE_20PCT_MASK,
+			BQ24190_REG_CCC_FORCE_20PCT_SHIFT, &v);
+	if (ret < 0)
+		return ret;
+
+	/* If FORCE_20PCT is enabled, then current is 50% of IPRECHG value */
+	if (v)
+		curr /= 2;
+
+	val->intval = curr;
+
 	return 0;
 }
 

From 7b9fc309c0b57c5b1230bf3e0a8bb9e937eb6529 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 27 Jun 2023 13:17:07 +0100
Subject: [PATCH 0302/2047] power: supply: bd99954: make read-only array
 sub_status_reg

Don't populate the read-only array on the stack, instead make it
static const.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Matti Vaittinen <mazziesaccount@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bd99954-charger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/bd99954-charger.c b/drivers/power/supply/bd99954-charger.c
index 086dcf4033c1..1ed1d9f99fb3 100644
--- a/drivers/power/supply/bd99954-charger.c
+++ b/drivers/power/supply/bd99954-charger.c
@@ -536,7 +536,7 @@ static irqreturn_t bd9995x_irq_handler_thread(int irq, void *private)
 
 	for_each_set_bit(i, &tmp, 7) {
 		int sub_status, sub_mask;
-		int sub_status_reg[] = {
+		static const int sub_status_reg[] = {
 			INT1_STATUS, INT2_STATUS, INT3_STATUS, INT4_STATUS,
 			INT5_STATUS, INT6_STATUS, INT7_STATUS,
 		};

From d40befed9a581740f6ceb0e5998ec2f59bfbc559 Mon Sep 17 00:00:00 2001
From: Kamel Bouhara <kamel.bouhara@bootlin.com>
Date: Tue, 20 Jun 2023 08:26:57 +0200
Subject: [PATCH 0303/2047] power: reset: at91-reset: add sysfs interface to
 the power on reason

Introduce a list of generic reset sources and use them to export the
power on reason through sysfs. Update the ABI documentation to describe
this new interface.

Signed-off-by: Kamel Bouhara <kamel.bouhara@bootlin.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
[Miquel Raynal: Follow-up on Kamel's work, 4 years later]
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../testing/sysfs-platform-power-on-reason    | 12 ++++++
 drivers/power/reset/at91-reset.c              | 37 ++++++++++++++-----
 include/linux/power/power_on_reason.h         | 19 ++++++++++
 3 files changed, 58 insertions(+), 10 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-platform-power-on-reason
 create mode 100644 include/linux/power/power_on_reason.h

diff --git a/Documentation/ABI/testing/sysfs-platform-power-on-reason b/Documentation/ABI/testing/sysfs-platform-power-on-reason
new file mode 100644
index 000000000000..c3b29dbc64bf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-power-on-reason
@@ -0,0 +1,12 @@
+What:		/sys/devices/platform/.../power_on_reason
+Date:		June 2023
+KernelVersion:	6.5
+Contact:	Kamel Bouhara <kamel.bouhara@bootlin.com>
+Description:	Shows system power on reason. The following strings/reasons can
+		be read (the list can be extended):
+		"regular power-up", "RTC wakeup", "watchdog timeout",
+		"software reset", "reset button action", "CPU clock failure",
+		"crystal oscillator failure", "brown-out reset",
+		"unknown reason".
+
+		The file is read only.
diff --git a/drivers/power/reset/at91-reset.c b/drivers/power/reset/at91-reset.c
index d6884841a6dc..aa9b012d3d00 100644
--- a/drivers/power/reset/at91-reset.c
+++ b/drivers/power/reset/at91-reset.c
@@ -18,6 +18,7 @@
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/reset-controller.h>
+#include <linux/power/power_on_reason.h>
 
 #include <soc/at91/at91sam9_ddrsdr.h>
 #include <soc/at91/at91sam9_sdramc.h>
@@ -149,44 +150,54 @@ static int at91_reset(struct notifier_block *this, unsigned long mode,
 	return NOTIFY_DONE;
 }
 
-static const char * __init at91_reset_reason(struct at91_reset *reset)
+static const char *at91_reset_reason(struct at91_reset *reset)
 {
 	u32 reg = readl(reset->rstc_base + AT91_RSTC_SR);
 	const char *reason;
 
 	switch ((reg & AT91_RSTC_RSTTYP) >> 8) {
 	case RESET_TYPE_GENERAL:
-		reason = "general reset";
+		reason = POWER_ON_REASON_REGULAR;
 		break;
 	case RESET_TYPE_WAKEUP:
-		reason = "wakeup";
+		reason = POWER_ON_REASON_RTC;
 		break;
 	case RESET_TYPE_WATCHDOG:
-		reason = "watchdog reset";
+		reason = POWER_ON_REASON_WATCHDOG;
 		break;
 	case RESET_TYPE_SOFTWARE:
-		reason = "software reset";
+		reason = POWER_ON_REASON_SOFTWARE;
 		break;
 	case RESET_TYPE_USER:
-		reason = "user reset";
+		reason = POWER_ON_REASON_RST_BTN;
 		break;
 	case RESET_TYPE_CPU_FAIL:
-		reason = "CPU clock failure detection";
+		reason = POWER_ON_REASON_CPU_CLK_FAIL;
 		break;
 	case RESET_TYPE_XTAL_FAIL:
-		reason = "32.768 kHz crystal failure detection";
+		reason = POWER_ON_REASON_XTAL_FAIL;
 		break;
 	case RESET_TYPE_ULP2:
-		reason = "ULP2 reset";
+		reason = POWER_ON_REASON_BROWN_OUT;
 		break;
 	default:
-		reason = "unknown reset";
+		reason = POWER_ON_REASON_UNKNOWN;
 		break;
 	}
 
 	return reason;
 }
 
+static ssize_t power_on_reason_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct at91_reset *reset = platform_get_drvdata(pdev);
+
+	return sprintf(buf, "%s\n", at91_reset_reason(reset));
+}
+static DEVICE_ATTR_RO(power_on_reason);
+
 static const struct of_device_id at91_ramc_of_match[] = {
 	{
 		.compatible = "atmel,at91sam9260-sdramc",
@@ -391,6 +402,12 @@ static int __init at91_reset_probe(struct platform_device *pdev)
 	if (ret)
 		goto disable_clk;
 
+	ret = device_create_file(&pdev->dev, &dev_attr_power_on_reason);
+	if (ret) {
+		dev_err(&pdev->dev, "Could not create sysfs entry\n");
+		return ret;
+	}
+
 	dev_info(&pdev->dev, "Starting after %s\n", at91_reset_reason(reset));
 
 	return 0;
diff --git a/include/linux/power/power_on_reason.h b/include/linux/power/power_on_reason.h
new file mode 100644
index 000000000000..95a1ec0c403c
--- /dev/null
+++ b/include/linux/power/power_on_reason.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Author: Kamel Bouhra <kamel.bouhara@bootlin.com>
+ */
+
+#ifndef POWER_ON_REASON_H
+#define POWER_ON_REASON_H
+
+#define POWER_ON_REASON_REGULAR "regular power-up"
+#define POWER_ON_REASON_RTC "RTC wakeup"
+#define POWER_ON_REASON_WATCHDOG "watchdog timeout"
+#define POWER_ON_REASON_SOFTWARE "software reset"
+#define POWER_ON_REASON_RST_BTN "reset button action"
+#define POWER_ON_REASON_CPU_CLK_FAIL "CPU clock failure"
+#define POWER_ON_REASON_XTAL_FAIL "crystal oscillator failure"
+#define POWER_ON_REASON_BROWN_OUT "brown-out reset"
+#define POWER_ON_REASON_UNKNOWN "unknown reason"
+
+#endif /* POWER_ON_REASON_H */

From 0b64a0e53cc7875c2410b26a20c1269d72cd1428 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 21:03:03 +0800
Subject: [PATCH 0304/2047] power: reset: at91-poweroff: Convert to
 devm_platform_ioremap_resource()

Use devm_platform_ioremap_resource() to simplify code.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/reset/at91-poweroff.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/power/reset/at91-poweroff.c b/drivers/power/reset/at91-poweroff.c
index 9e74e131c675..dd5399785b69 100644
--- a/drivers/power/reset/at91-poweroff.c
+++ b/drivers/power/reset/at91-poweroff.c
@@ -151,13 +151,11 @@ static void at91_poweroff_dt_set_wakeup_mode(struct platform_device *pdev)
 
 static int __init at91_poweroff_probe(struct platform_device *pdev)
 {
-	struct resource *res;
 	struct device_node *np;
 	u32 ddr_type;
 	int ret;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	at91_shdwc.shdwc_base = devm_ioremap_resource(&pdev->dev, res);
+	at91_shdwc.shdwc_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(at91_shdwc.shdwc_base))
 		return PTR_ERR(at91_shdwc.shdwc_base);
 

From 5d002da7296c50d3fcc50af29bf1e507585703e0 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 21:03:04 +0800
Subject: [PATCH 0305/2047] power: reset: at91-sama5d2_shdwc: Convert to
 devm_platform_ioremap_resource()

Use devm_platform_ioremap_resource() to simplify code.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/reset/at91-sama5d2_shdwc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/power/reset/at91-sama5d2_shdwc.c b/drivers/power/reset/at91-sama5d2_shdwc.c
index d8ecffe72f16..e76b102b57b1 100644
--- a/drivers/power/reset/at91-sama5d2_shdwc.c
+++ b/drivers/power/reset/at91-sama5d2_shdwc.c
@@ -331,7 +331,6 @@ static const struct of_device_id at91_pmc_ids[] = {
 
 static int __init at91_shdwc_probe(struct platform_device *pdev)
 {
-	struct resource *res;
 	const struct of_device_id *match;
 	struct device_node *np;
 	u32 ddr_type;
@@ -349,8 +348,7 @@ static int __init at91_shdwc_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, at91_shdwc);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	at91_shdwc->shdwc_base = devm_ioremap_resource(&pdev->dev, res);
+	at91_shdwc->shdwc_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(at91_shdwc->shdwc_base))
 		return PTR_ERR(at91_shdwc->shdwc_base);
 

From 79d2a09d953fd1939b16e4baf83ab60d2b305e59 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 21:03:05 +0800
Subject: [PATCH 0306/2047] power: reset: brcm-kona: Convert to
 devm_platform_ioremap_resource()

Use devm_platform_ioremap_resource() to simplify code.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/reset/brcm-kona-reset.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/power/reset/brcm-kona-reset.c b/drivers/power/reset/brcm-kona-reset.c
index 64e241c77f83..d05728b1db09 100644
--- a/drivers/power/reset/brcm-kona-reset.c
+++ b/drivers/power/reset/brcm-kona-reset.c
@@ -38,9 +38,7 @@ static struct notifier_block kona_reset_nb = {
 
 static int kona_reset_probe(struct platform_device *pdev)
 {
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-	kona_reset_base = devm_ioremap_resource(&pdev->dev, res);
+	kona_reset_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(kona_reset_base))
 		return PTR_ERR(kona_reset_base);
 

From 808e964e302f4e7b2a84d9f87c7cfd2bdd62a4e5 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 21:03:06 +0800
Subject: [PATCH 0307/2047] power: reset: gemini-poweroff: Convert to
 devm_platform_ioremap_resource()

Use devm_platform_ioremap_resource() to simplify code.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/reset/gemini-poweroff.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/power/reset/gemini-poweroff.c b/drivers/power/reset/gemini-poweroff.c
index b7f7a8225f22..d309b610142c 100644
--- a/drivers/power/reset/gemini-poweroff.c
+++ b/drivers/power/reset/gemini-poweroff.c
@@ -91,7 +91,6 @@ static void gemini_poweroff(void)
 static int gemini_poweroff_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct resource *res;
 	struct gemini_powercon *gpw;
 	u32 val;
 	int irq;
@@ -101,8 +100,7 @@ static int gemini_poweroff_probe(struct platform_device *pdev)
 	if (!gpw)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	gpw->base = devm_ioremap_resource(dev, res);
+	gpw->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(gpw->base))
 		return PTR_ERR(gpw->base);
 

From 19223ffa3d84f66ccb09a154dfa26cd40034595c Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 21:03:07 +0800
Subject: [PATCH 0308/2047] power: reset: msm: Convert to
 devm_platform_ioremap_resource()

Use devm_platform_ioremap_resource() to simplify code.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/reset/msm-poweroff.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/power/reset/msm-poweroff.c b/drivers/power/reset/msm-poweroff.c
index 0c439f83bf65..b9a401bd280b 100644
--- a/drivers/power/reset/msm-poweroff.c
+++ b/drivers/power/reset/msm-poweroff.c
@@ -35,11 +35,7 @@ static void do_msm_poweroff(void)
 
 static int msm_restart_probe(struct platform_device *pdev)
 {
-	struct device *dev = &pdev->dev;
-	struct resource *mem;
-
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	msm_ps_hold = devm_ioremap_resource(dev, mem);
+	msm_ps_hold = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(msm_ps_hold))
 		return PTR_ERR(msm_ps_hold);
 

From 1a80588314caa933fbc1b295d16723086c7b797a Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 21:03:08 +0800
Subject: [PATCH 0309/2047] power: reset: ocelot: Convert to
 devm_platform_ioremap_resource()

Use devm_platform_ioremap_resource() to simplify code.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/reset/ocelot-reset.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/power/reset/ocelot-reset.c b/drivers/power/reset/ocelot-reset.c
index 44c7730086e4..56be64decf54 100644
--- a/drivers/power/reset/ocelot-reset.c
+++ b/drivers/power/reset/ocelot-reset.c
@@ -69,8 +69,6 @@ static int ocelot_restart_handle(struct notifier_block *this,
 static int ocelot_reset_probe(struct platform_device *pdev)
 {
 	struct ocelot_reset_context *ctx;
-	struct resource *res;
-
 	struct device *dev = &pdev->dev;
 	int err;
 
@@ -78,8 +76,7 @@ static int ocelot_reset_probe(struct platform_device *pdev)
 	if (!ctx)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ctx->base = devm_ioremap_resource(dev, res);
+	ctx->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(ctx->base))
 		return PTR_ERR(ctx->base);
 

From b52d51d88432dd7cf7bc57ea11d3ef1efad8cb96 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Tue, 13 Jun 2023 17:07:23 +0800
Subject: [PATCH 0310/2047] power: supply: core: Avoid duplicate hwmon device
 from thermal framework

When the power supply device being registered supports a temperature
readout, the core registers a thermal zone for it. The thermal core
would register a hwmon device for that unless told otherwise.

When CONFIG_POWER_SUPPLY_HWMON is enabled, the power supply core creates
a hwmon device. This results in a second entry, one which has a better
name than the one registered through the thermal framework. It could
potentially have readouts other than temperature.

To simplify the result, tell the thermal framework to not register a
hwmon device if CONFIG_POWER_SUPPLY_HWMON is enabled. The result is
one hwmon device with all the readings the device supports.

Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/power_supply_core.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index 3791aec69ddc..4aa466c945e2 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -1305,8 +1305,12 @@ static int psy_register_thermal(struct power_supply *psy)
 
 	/* Register battery zone device psy reports temperature */
 	if (psy_has_property(psy->desc, POWER_SUPPLY_PROP_TEMP)) {
+		/* Prefer our hwmon device and avoid duplicates */
+		struct thermal_zone_params tzp = {
+			.no_hwmon = IS_ENABLED(CONFIG_POWER_SUPPLY_HWMON)
+		};
 		psy->tzd = thermal_zone_device_register(psy->desc->name,
-				0, 0, psy, &psy_tzd_ops, NULL, 0, 0);
+				0, 0, psy, &psy_tzd_ops, &tzp, 0, 0);
 		if (IS_ERR(psy->tzd))
 			return PTR_ERR(psy->tzd);
 		ret = thermal_zone_device_enable(psy->tzd);

From af73fd32b4f2d23bb61c37f533ddae49e10ef3b0 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Fri, 30 Jun 2023 18:58:37 +0200
Subject: [PATCH 0311/2047] power: reset: oxnas-restart: remove obsolete
 restart driver

Due to lack of maintenance and stall of development for a few years now,
and since no new features will ever be added upstream, remove support
for OX810 and OX820 restart feature.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Daniel Golle <daniel@makrotopia.org>
Acked-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/reset/Kconfig         |   7 -
 drivers/power/reset/Makefile        |   1 -
 drivers/power/reset/oxnas-restart.c | 231 ----------------------------
 3 files changed, 239 deletions(-)
 delete mode 100644 drivers/power/reset/oxnas-restart.c

diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
index fff07b2bd77b..59e1ebb7842e 100644
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -148,13 +148,6 @@ config POWER_RESET_ODROID_GO_ULTRA_POWEROFF
 	help
 	  This driver supports Power off for Odroid Go Ultra device.
 
-config POWER_RESET_OXNAS
-	bool "OXNAS SoC restart driver"
-	depends on ARCH_OXNAS
-	default MACH_OX820
-	help
-	  Restart support for OXNAS/PLXTECH OX820 SoC.
-
 config POWER_RESET_PIIX4_POWEROFF
 	tristate "Intel PIIX4 power-off driver"
 	depends on PCI
diff --git a/drivers/power/reset/Makefile b/drivers/power/reset/Makefile
index d763e6735ee3..a95d1bd275d1 100644
--- a/drivers/power/reset/Makefile
+++ b/drivers/power/reset/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_POWER_RESET_HISI) += hisi-reboot.o
 obj-$(CONFIG_POWER_RESET_LINKSTATION) += linkstation-poweroff.o
 obj-$(CONFIG_POWER_RESET_MSM) += msm-poweroff.o
 obj-$(CONFIG_POWER_RESET_MT6323) += mt6323-poweroff.o
-obj-$(CONFIG_POWER_RESET_OXNAS) += oxnas-restart.o
 obj-$(CONFIG_POWER_RESET_QCOM_PON) += qcom-pon.o
 obj-$(CONFIG_POWER_RESET_OCELOT_RESET) += ocelot-reset.o
 obj-$(CONFIG_POWER_RESET_ODROID_GO_ULTRA_POWEROFF) += odroid-go-ultra-poweroff.o
diff --git a/drivers/power/reset/oxnas-restart.c b/drivers/power/reset/oxnas-restart.c
deleted file mode 100644
index dd321caa28b6..000000000000
--- a/drivers/power/reset/oxnas-restart.c
+++ /dev/null
@@ -1,231 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0)
-/*
- * oxnas SoC reset driver
- * based on:
- * Microsemi MIPS SoC reset driver
- * and ox820_assert_system_reset() written by Ma Hajun <mahaijuns@gmail.com>
- *
- * Copyright (c) 2013 Ma Hajun <mahaijuns@gmail.com>
- * Copyright (c) 2017 Microsemi Corporation
- * Copyright (c) 2020 Daniel Golle <daniel@makrotopia.org>
- */
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/notifier.h>
-#include <linux/mfd/syscon.h>
-#include <linux/platform_device.h>
-#include <linux/reboot.h>
-#include <linux/regmap.h>
-
-/* bit numbers of reset control register */
-#define OX820_SYS_CTRL_RST_SCU                0
-#define OX820_SYS_CTRL_RST_COPRO              1
-#define OX820_SYS_CTRL_RST_ARM0               2
-#define OX820_SYS_CTRL_RST_ARM1               3
-#define OX820_SYS_CTRL_RST_USBHS              4
-#define OX820_SYS_CTRL_RST_USBHSPHYA          5
-#define OX820_SYS_CTRL_RST_MACA               6
-#define OX820_SYS_CTRL_RST_MAC                OX820_SYS_CTRL_RST_MACA
-#define OX820_SYS_CTRL_RST_PCIEA              7
-#define OX820_SYS_CTRL_RST_SGDMA              8
-#define OX820_SYS_CTRL_RST_CIPHER             9
-#define OX820_SYS_CTRL_RST_DDR                10
-#define OX820_SYS_CTRL_RST_SATA               11
-#define OX820_SYS_CTRL_RST_SATA_LINK          12
-#define OX820_SYS_CTRL_RST_SATA_PHY           13
-#define OX820_SYS_CTRL_RST_PCIEPHY            14
-#define OX820_SYS_CTRL_RST_STATIC             15
-#define OX820_SYS_CTRL_RST_GPIO               16
-#define OX820_SYS_CTRL_RST_UART1              17
-#define OX820_SYS_CTRL_RST_UART2              18
-#define OX820_SYS_CTRL_RST_MISC               19
-#define OX820_SYS_CTRL_RST_I2S                20
-#define OX820_SYS_CTRL_RST_SD                 21
-#define OX820_SYS_CTRL_RST_MACB               22
-#define OX820_SYS_CTRL_RST_PCIEB              23
-#define OX820_SYS_CTRL_RST_VIDEO              24
-#define OX820_SYS_CTRL_RST_DDR_PHY            25
-#define OX820_SYS_CTRL_RST_USBHSPHYB          26
-#define OX820_SYS_CTRL_RST_USBDEV             27
-#define OX820_SYS_CTRL_RST_ARMDBG             29
-#define OX820_SYS_CTRL_RST_PLLA               30
-#define OX820_SYS_CTRL_RST_PLLB               31
-
-/* bit numbers of clock control register */
-#define OX820_SYS_CTRL_CLK_COPRO              0
-#define OX820_SYS_CTRL_CLK_DMA                1
-#define OX820_SYS_CTRL_CLK_CIPHER             2
-#define OX820_SYS_CTRL_CLK_SD                 3
-#define OX820_SYS_CTRL_CLK_SATA               4
-#define OX820_SYS_CTRL_CLK_I2S                5
-#define OX820_SYS_CTRL_CLK_USBHS              6
-#define OX820_SYS_CTRL_CLK_MACA               7
-#define OX820_SYS_CTRL_CLK_MAC                OX820_SYS_CTRL_CLK_MACA
-#define OX820_SYS_CTRL_CLK_PCIEA              8
-#define OX820_SYS_CTRL_CLK_STATIC             9
-#define OX820_SYS_CTRL_CLK_MACB               10
-#define OX820_SYS_CTRL_CLK_PCIEB              11
-#define OX820_SYS_CTRL_CLK_REF600             12
-#define OX820_SYS_CTRL_CLK_USBDEV             13
-#define OX820_SYS_CTRL_CLK_DDR                14
-#define OX820_SYS_CTRL_CLK_DDRPHY             15
-#define OX820_SYS_CTRL_CLK_DDRCK              16
-
-/* Regmap offsets */
-#define OX820_CLK_SET_REGOFFSET               0x2c
-#define OX820_CLK_CLR_REGOFFSET               0x30
-#define OX820_RST_SET_REGOFFSET               0x34
-#define OX820_RST_CLR_REGOFFSET               0x38
-#define OX820_SECONDARY_SEL_REGOFFSET         0x14
-#define OX820_TERTIARY_SEL_REGOFFSET          0x8c
-#define OX820_QUATERNARY_SEL_REGOFFSET        0x94
-#define OX820_DEBUG_SEL_REGOFFSET             0x9c
-#define OX820_ALTERNATIVE_SEL_REGOFFSET       0xa4
-#define OX820_PULLUP_SEL_REGOFFSET            0xac
-#define OX820_SEC_SECONDARY_SEL_REGOFFSET     0x100014
-#define OX820_SEC_TERTIARY_SEL_REGOFFSET      0x10008c
-#define OX820_SEC_QUATERNARY_SEL_REGOFFSET    0x100094
-#define OX820_SEC_DEBUG_SEL_REGOFFSET         0x10009c
-#define OX820_SEC_ALTERNATIVE_SEL_REGOFFSET   0x1000a4
-#define OX820_SEC_PULLUP_SEL_REGOFFSET        0x1000ac
-
-struct oxnas_restart_context {
-	struct regmap *sys_ctrl;
-	struct notifier_block restart_handler;
-};
-
-static int ox820_restart_handle(struct notifier_block *this,
-				 unsigned long mode, void *cmd)
-{
-	struct oxnas_restart_context *ctx = container_of(this, struct
-							oxnas_restart_context,
-							restart_handler);
-	u32 value;
-
-	/*
-	 * Assert reset to cores as per power on defaults
-	 * Don't touch the DDR interface as things will come to an impromptu
-	 * stop NB Possibly should be asserting reset for PLLB, but there are
-	 * timing concerns here according to the docs
-	 */
-	value = BIT(OX820_SYS_CTRL_RST_COPRO)		|
-		BIT(OX820_SYS_CTRL_RST_USBHS)		|
-		BIT(OX820_SYS_CTRL_RST_USBHSPHYA)	|
-		BIT(OX820_SYS_CTRL_RST_MACA)		|
-		BIT(OX820_SYS_CTRL_RST_PCIEA)		|
-		BIT(OX820_SYS_CTRL_RST_SGDMA)		|
-		BIT(OX820_SYS_CTRL_RST_CIPHER)		|
-		BIT(OX820_SYS_CTRL_RST_SATA)		|
-		BIT(OX820_SYS_CTRL_RST_SATA_LINK)	|
-		BIT(OX820_SYS_CTRL_RST_SATA_PHY)	|
-		BIT(OX820_SYS_CTRL_RST_PCIEPHY)		|
-		BIT(OX820_SYS_CTRL_RST_STATIC)		|
-		BIT(OX820_SYS_CTRL_RST_UART1)		|
-		BIT(OX820_SYS_CTRL_RST_UART2)		|
-		BIT(OX820_SYS_CTRL_RST_MISC)		|
-		BIT(OX820_SYS_CTRL_RST_I2S)		|
-		BIT(OX820_SYS_CTRL_RST_SD)		|
-		BIT(OX820_SYS_CTRL_RST_MACB)		|
-		BIT(OX820_SYS_CTRL_RST_PCIEB)		|
-		BIT(OX820_SYS_CTRL_RST_VIDEO)		|
-		BIT(OX820_SYS_CTRL_RST_USBHSPHYB)	|
-		BIT(OX820_SYS_CTRL_RST_USBDEV);
-
-	regmap_write(ctx->sys_ctrl, OX820_RST_SET_REGOFFSET, value);
-
-	/* Release reset to cores as per power on defaults */
-	regmap_write(ctx->sys_ctrl, OX820_RST_CLR_REGOFFSET,
-			BIT(OX820_SYS_CTRL_RST_GPIO));
-
-	/*
-	 * Disable clocks to cores as per power-on defaults - must leave DDR
-	 * related clocks enabled otherwise we'll stop rather abruptly.
-	 */
-	value = BIT(OX820_SYS_CTRL_CLK_COPRO)		|
-		BIT(OX820_SYS_CTRL_CLK_DMA)		|
-		BIT(OX820_SYS_CTRL_CLK_CIPHER)		|
-		BIT(OX820_SYS_CTRL_CLK_SD)		|
-		BIT(OX820_SYS_CTRL_CLK_SATA)		|
-		BIT(OX820_SYS_CTRL_CLK_I2S)		|
-		BIT(OX820_SYS_CTRL_CLK_USBHS)		|
-		BIT(OX820_SYS_CTRL_CLK_MAC)		|
-		BIT(OX820_SYS_CTRL_CLK_PCIEA)		|
-		BIT(OX820_SYS_CTRL_CLK_STATIC)		|
-		BIT(OX820_SYS_CTRL_CLK_MACB)		|
-		BIT(OX820_SYS_CTRL_CLK_PCIEB)		|
-		BIT(OX820_SYS_CTRL_CLK_REF600)		|
-		BIT(OX820_SYS_CTRL_CLK_USBDEV);
-
-	regmap_write(ctx->sys_ctrl, OX820_CLK_CLR_REGOFFSET, value);
-
-	/* Enable clocks to cores as per power-on defaults */
-
-	/* Set sys-control pin mux'ing as per power-on defaults */
-	regmap_write(ctx->sys_ctrl, OX820_SECONDARY_SEL_REGOFFSET, 0);
-	regmap_write(ctx->sys_ctrl, OX820_TERTIARY_SEL_REGOFFSET, 0);
-	regmap_write(ctx->sys_ctrl, OX820_QUATERNARY_SEL_REGOFFSET, 0);
-	regmap_write(ctx->sys_ctrl, OX820_DEBUG_SEL_REGOFFSET, 0);
-	regmap_write(ctx->sys_ctrl, OX820_ALTERNATIVE_SEL_REGOFFSET, 0);
-	regmap_write(ctx->sys_ctrl, OX820_PULLUP_SEL_REGOFFSET, 0);
-
-	regmap_write(ctx->sys_ctrl, OX820_SEC_SECONDARY_SEL_REGOFFSET, 0);
-	regmap_write(ctx->sys_ctrl, OX820_SEC_TERTIARY_SEL_REGOFFSET, 0);
-	regmap_write(ctx->sys_ctrl, OX820_SEC_QUATERNARY_SEL_REGOFFSET, 0);
-	regmap_write(ctx->sys_ctrl, OX820_SEC_DEBUG_SEL_REGOFFSET, 0);
-	regmap_write(ctx->sys_ctrl, OX820_SEC_ALTERNATIVE_SEL_REGOFFSET, 0);
-	regmap_write(ctx->sys_ctrl, OX820_SEC_PULLUP_SEL_REGOFFSET, 0);
-
-	/*
-	 * No need to save any state, as the ROM loader can determine whether
-	 * reset is due to power cycling or programatic action, just hit the
-	 * (self-clearing) CPU reset bit of the block reset register
-	 */
-	value =
-		BIT(OX820_SYS_CTRL_RST_SCU) |
-		BIT(OX820_SYS_CTRL_RST_ARM0) |
-		BIT(OX820_SYS_CTRL_RST_ARM1);
-
-	regmap_write(ctx->sys_ctrl, OX820_RST_SET_REGOFFSET, value);
-
-	pr_emerg("Unable to restart system\n");
-	return NOTIFY_DONE;
-}
-
-static int ox820_restart_probe(struct platform_device *pdev)
-{
-	struct oxnas_restart_context *ctx;
-	struct regmap *sys_ctrl;
-	struct device *dev = &pdev->dev;
-	int err = 0;
-
-	sys_ctrl = syscon_node_to_regmap(pdev->dev.of_node);
-	if (IS_ERR(sys_ctrl))
-		return PTR_ERR(sys_ctrl);
-
-	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
-
-	ctx->sys_ctrl = sys_ctrl;
-	ctx->restart_handler.notifier_call = ox820_restart_handle;
-	ctx->restart_handler.priority = 192;
-	err = register_restart_handler(&ctx->restart_handler);
-	if (err)
-		dev_err(dev, "can't register restart notifier (err=%d)\n", err);
-
-	return err;
-}
-
-static const struct of_device_id ox820_restart_of_match[] = {
-	{ .compatible = "oxsemi,ox820-sys-ctrl" },
-	{}
-};
-
-static struct platform_driver ox820_restart_driver = {
-	.probe = ox820_restart_probe,
-	.driver = {
-		.name = "ox820-chip-reset",
-		.of_match_table = ox820_restart_of_match,
-	},
-};
-builtin_platform_driver(ox820_restart_driver);

From b582763721828f6c3fcf94d371623127f0198f3f Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 23 Jun 2023 08:49:51 -0500
Subject: [PATCH 0312/2047] crypto: ccp - Rename macro for security attributes

The attribute_show() macro is only valid for determining the availability
of security related sysfs entries. Rename the macro to better show this
relationship.

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/sp-pci.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index b603ad9b8341..c8f075a7f49f 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -32,7 +32,7 @@ struct sp_pci {
 };
 static struct sp_device *sp_dev_master;
 
-#define attribute_show(name, def)						\
+#define security_attribute_show(name, def)					\
 static ssize_t name##_show(struct device *d, struct device_attribute *attr,	\
 			   char *buf)						\
 {										\
@@ -42,24 +42,24 @@ static ssize_t name##_show(struct device *d, struct device_attribute *attr,	\
 	return sysfs_emit(buf, "%d\n", (psp->capability & bit) > 0);		\
 }
 
-attribute_show(fused_part, FUSED_PART)
+security_attribute_show(fused_part, FUSED_PART)
 static DEVICE_ATTR_RO(fused_part);
-attribute_show(debug_lock_on, DEBUG_LOCK_ON)
+security_attribute_show(debug_lock_on, DEBUG_LOCK_ON)
 static DEVICE_ATTR_RO(debug_lock_on);
-attribute_show(tsme_status, TSME_STATUS)
+security_attribute_show(tsme_status, TSME_STATUS)
 static DEVICE_ATTR_RO(tsme_status);
-attribute_show(anti_rollback_status, ANTI_ROLLBACK_STATUS)
+security_attribute_show(anti_rollback_status, ANTI_ROLLBACK_STATUS)
 static DEVICE_ATTR_RO(anti_rollback_status);
-attribute_show(rpmc_production_enabled, RPMC_PRODUCTION_ENABLED)
+security_attribute_show(rpmc_production_enabled, RPMC_PRODUCTION_ENABLED)
 static DEVICE_ATTR_RO(rpmc_production_enabled);
-attribute_show(rpmc_spirom_available, RPMC_SPIROM_AVAILABLE)
+security_attribute_show(rpmc_spirom_available, RPMC_SPIROM_AVAILABLE)
 static DEVICE_ATTR_RO(rpmc_spirom_available);
-attribute_show(hsp_tpm_available, HSP_TPM_AVAILABLE)
+security_attribute_show(hsp_tpm_available, HSP_TPM_AVAILABLE)
 static DEVICE_ATTR_RO(hsp_tpm_available);
-attribute_show(rom_armor_enforced, ROM_ARMOR_ENFORCED)
+security_attribute_show(rom_armor_enforced, ROM_ARMOR_ENFORCED)
 static DEVICE_ATTR_RO(rom_armor_enforced);
 
-static struct attribute *psp_attrs[] = {
+static struct attribute *psp_security_attrs[] = {
 	&dev_attr_fused_part.attr,
 	&dev_attr_debug_lock_on.attr,
 	&dev_attr_tsme_status.attr,
@@ -83,13 +83,13 @@ static umode_t psp_security_is_visible(struct kobject *kobj, struct attribute *a
 	return 0;
 }
 
-static struct attribute_group psp_attr_group = {
-	.attrs = psp_attrs,
+static struct attribute_group psp_security_attr_group = {
+	.attrs = psp_security_attrs,
 	.is_visible = psp_security_is_visible,
 };
 
 static const struct attribute_group *psp_groups[] = {
-	&psp_attr_group,
+	&psp_security_attr_group,
 	NULL,
 };
 

From 2e424c33d8e748b65b683988f80e711cd6a7f619 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 23 Jun 2023 08:49:52 -0500
Subject: [PATCH 0313/2047] crypto: ccp - Add support for displaying PSP
 firmware versions

As it's not always obvious what PSP bootloader or TEE version are
present in OEM systems, add the ability to get this information from
sysfs for supported platforms.

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/ABI/testing/sysfs-driver-ccp | 18 ++++++
 drivers/crypto/ccp/sp-dev.h                |  2 +
 drivers/crypto/ccp/sp-pci.c                | 64 ++++++++++++++++++++++
 3 files changed, 84 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-driver-ccp b/Documentation/ABI/testing/sysfs-driver-ccp
index 7aded9b75553..ee6b787eee7a 100644
--- a/Documentation/ABI/testing/sysfs-driver-ccp
+++ b/Documentation/ABI/testing/sysfs-driver-ccp
@@ -85,3 +85,21 @@ Description:
 		Possible values:
 		0: Not enforced
 		1: Enforced
+
+What:		/sys/bus/pci/devices/<BDF>/bootloader_version
+Date:		June 2023
+KernelVersion:	6.4
+Contact:	mario.limonciello@amd.com
+Description:
+		The /sys/bus/pci/devices/<BDF>/bootloader_version
+		file reports the firmware version of the AMD AGESA
+		bootloader.
+
+What:		/sys/bus/pci/devices/<BDF>/tee_version
+Date:		June 2023
+KernelVersion:	6.4
+Contact:	mario.limonciello@amd.com
+Description:
+		The /sys/bus/pci/devices/<BDF>/tee_version
+		file reports the firmware version of the AMD Trusted
+		Execution Environment (TEE).
diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h
index 1253a0217985..76c32ee6bd65 100644
--- a/drivers/crypto/ccp/sp-dev.h
+++ b/drivers/crypto/ccp/sp-dev.h
@@ -51,6 +51,7 @@ struct tee_vdata {
 	const unsigned int cmdbuff_addr_hi_reg;
 	const unsigned int ring_wptr_reg;
 	const unsigned int ring_rptr_reg;
+	const unsigned int info_reg;
 };
 
 struct platform_access_vdata {
@@ -69,6 +70,7 @@ struct psp_vdata {
 	const unsigned int feature_reg;
 	const unsigned int inten_reg;
 	const unsigned int intsts_reg;
+	const unsigned int bootloader_info_reg;
 };
 
 /* Structure to hold SP device data */
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index c8f075a7f49f..6c93577950c7 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -8,6 +8,7 @@
  * Author: Gary R Hook <gary.hook@amd.com>
  */
 
+#include <linux/bitfield.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
@@ -24,6 +25,12 @@
 #include "ccp-dev.h"
 #include "psp-dev.h"
 
+/* used for version string AA.BB.CC.DD */
+#define AA				GENMASK(31, 24)
+#define BB				GENMASK(23, 16)
+#define CC				GENMASK(15, 8)
+#define DD				GENMASK(7, 0)
+
 #define MSIX_VECTORS			2
 
 struct sp_pci {
@@ -88,8 +95,65 @@ static struct attribute_group psp_security_attr_group = {
 	.is_visible = psp_security_is_visible,
 };
 
+#define version_attribute_show(name, _offset)					\
+static ssize_t name##_show(struct device *d, struct device_attribute *attr,	\
+			   char *buf)						\
+{										\
+	struct sp_device *sp = dev_get_drvdata(d);				\
+	struct psp_device *psp = sp->psp_data;					\
+	unsigned int val = ioread32(psp->io_regs + _offset);			\
+	return sysfs_emit(buf, "%02lx.%02lx.%02lx.%02lx\n",			\
+			  FIELD_GET(AA, val),			\
+			  FIELD_GET(BB, val),			\
+			  FIELD_GET(CC, val),			\
+			  FIELD_GET(DD, val));			\
+}
+
+version_attribute_show(bootloader_version, psp->vdata->bootloader_info_reg)
+static DEVICE_ATTR_RO(bootloader_version);
+version_attribute_show(tee_version, psp->vdata->tee->info_reg)
+static DEVICE_ATTR_RO(tee_version);
+
+static struct attribute *psp_firmware_attrs[] = {
+	&dev_attr_bootloader_version.attr,
+	&dev_attr_tee_version.attr,
+	NULL,
+};
+
+static umode_t psp_firmware_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct sp_device *sp = dev_get_drvdata(dev);
+	struct psp_device *psp = sp->psp_data;
+	unsigned int val = 0xffffffff;
+
+	if (!psp)
+		return 0;
+
+	if (attr == &dev_attr_bootloader_version.attr &&
+	    psp->vdata->bootloader_info_reg)
+		val = ioread32(psp->io_regs + psp->vdata->bootloader_info_reg);
+
+	if (attr == &dev_attr_tee_version.attr &&
+	    psp->capability & PSP_CAPABILITY_TEE &&
+	    psp->vdata->tee->info_reg)
+		val = ioread32(psp->io_regs + psp->vdata->tee->info_reg);
+
+	/* If platform disallows accessing this register it will be all f's */
+	if (val != 0xffffffff)
+		return 0444;
+
+	return 0;
+}
+
+static struct attribute_group psp_firmware_attr_group = {
+	.attrs = psp_firmware_attrs,
+	.is_visible = psp_firmware_is_visible,
+};
+
 static const struct attribute_group *psp_groups[] = {
 	&psp_security_attr_group,
+	&psp_firmware_attr_group,
 	NULL,
 };
 

From e938b08ad8cd7b757b7b37e8fb1c20897dd3ec09 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 23 Jun 2023 08:49:53 -0500
Subject: [PATCH 0314/2047] crypto: ccp - Add bootloader and TEE version
 offsets

The bootloader and TEE versions are stored in registers that can be
accessed from sysfs.  This exports the information for recent client
and datacenter parts.

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/sp-pci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index 6c93577950c7..205b93d229a9 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -423,6 +423,7 @@ static const struct tee_vdata teev1 = {
 	.cmdbuff_addr_hi_reg	= 0x1054c,	/* C2PMSG_19 */
 	.ring_wptr_reg          = 0x10550,	/* C2PMSG_20 */
 	.ring_rptr_reg          = 0x10554,	/* C2PMSG_21 */
+	.info_reg		= 0x109e8,	/* C2PMSG_58 */
 };
 
 static const struct tee_vdata teev2 = {
@@ -448,6 +449,7 @@ static const struct platform_access_vdata pa_v2 = {
 
 static const struct psp_vdata pspv1 = {
 	.sev			= &sevv1,
+	.bootloader_info_reg	= 0x105ec,	/* C2PMSG_59 */
 	.feature_reg		= 0x105fc,	/* C2PMSG_63 */
 	.inten_reg		= 0x10610,	/* P2CMSG_INTEN */
 	.intsts_reg		= 0x10614,	/* P2CMSG_INTSTS */
@@ -455,6 +457,7 @@ static const struct psp_vdata pspv1 = {
 
 static const struct psp_vdata pspv2 = {
 	.sev			= &sevv2,
+	.bootloader_info_reg	= 0x109ec,	/* C2PMSG_59 */
 	.feature_reg		= 0x109fc,	/* C2PMSG_63 */
 	.inten_reg		= 0x10690,	/* P2CMSG_INTEN */
 	.intsts_reg		= 0x10694,	/* P2CMSG_INTSTS */
@@ -463,6 +466,7 @@ static const struct psp_vdata pspv2 = {
 static const struct psp_vdata pspv3 = {
 	.tee			= &teev1,
 	.platform_access	= &pa_v1,
+	.bootloader_info_reg	= 0x109ec,	/* C2PMSG_59 */
 	.feature_reg		= 0x109fc,	/* C2PMSG_63 */
 	.inten_reg		= 0x10690,	/* P2CMSG_INTEN */
 	.intsts_reg		= 0x10694,	/* P2CMSG_INTSTS */
@@ -471,6 +475,7 @@ static const struct psp_vdata pspv3 = {
 static const struct psp_vdata pspv4 = {
 	.sev			= &sevv2,
 	.tee			= &teev1,
+	.bootloader_info_reg	= 0x109ec,	/* C2PMSG_59 */
 	.feature_reg		= 0x109fc,	/* C2PMSG_63 */
 	.inten_reg		= 0x10690,	/* P2CMSG_INTEN */
 	.intsts_reg		= 0x10694,	/* P2CMSG_INTSTS */

From b8440d55f7d4ad2b669902301c87c482faf9a8f4 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 23 Jun 2023 08:49:54 -0500
Subject: [PATCH 0315/2047] crypto: ccp - move setting PSP master to earlier in
 the init

Dynamic boost control needs to use platform access symbols
that look for the PSP master as part of initialization.

So move the PSP master before psp_init() so that dynamic boost
control can be initialized properly.

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/psp-dev.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index e3d6955d3265..3390f0bd6408 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -173,13 +173,14 @@ int psp_dev_init(struct sp_device *sp)
 		goto e_err;
 	}
 
+	/* master device must be set for platform access */
+	if (psp->sp->set_psp_master_device)
+		psp->sp->set_psp_master_device(psp->sp);
+
 	ret = psp_init(psp);
 	if (ret)
 		goto e_irq;
 
-	if (sp->set_psp_master_device)
-		sp->set_psp_master_device(sp);
-
 	/* Enable interrupt */
 	iowrite32(-1, psp->io_regs + psp->vdata->inten_reg);
 
@@ -188,6 +189,9 @@ int psp_dev_init(struct sp_device *sp)
 	return 0;
 
 e_irq:
+	if (sp->clear_psp_master_device)
+		sp->clear_psp_master_device(sp);
+
 	sp_free_psp_irq(psp->sp, psp);
 e_err:
 	sp->psp_data = NULL;

From c04cf9e14f109ebcc425c1efd2c01294c52a4d62 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 23 Jun 2023 08:49:55 -0500
Subject: [PATCH 0316/2047] crypto: ccp - Add support for fetching a nonce for
 dynamic boost control

Dynamic Boost Control is a feature offered on AMD client platforms that
allows software to request and set power or frequency limits.

Only software that has authenticated with the PSP can retrieve or set
these limits.

Create a character device and ioctl for fetching the nonce. This ioctl
supports optionally passing authentication information which will influence
how many calls the nonce is valid for.

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/Makefile         |   3 +-
 drivers/crypto/ccp/dbc.c            | 191 ++++++++++++++++++++++++++++
 drivers/crypto/ccp/dbc.h            |  44 +++++++
 drivers/crypto/ccp/psp-dev.c        |   9 ++
 drivers/crypto/ccp/psp-dev.h        |   1 +
 drivers/crypto/ccp/sp-dev.h         |   5 +
 drivers/crypto/ccp/sp-pci.c         |   1 +
 include/linux/psp-platform-access.h |   1 +
 include/uapi/linux/psp-dbc.h        |  67 ++++++++++
 9 files changed, 321 insertions(+), 1 deletion(-)
 create mode 100644 drivers/crypto/ccp/dbc.c
 create mode 100644 drivers/crypto/ccp/dbc.h
 create mode 100644 include/uapi/linux/psp-dbc.h

diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index f6196495e862..aa0ba2d17e1e 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -11,7 +11,8 @@ ccp-$(CONFIG_PCI) += sp-pci.o
 ccp-$(CONFIG_CRYPTO_DEV_SP_PSP) += psp-dev.o \
                                    sev-dev.o \
                                    tee-dev.o \
-                                   platform-access.o
+                                   platform-access.o \
+                                   dbc.o
 
 obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
 ccp-crypto-objs := ccp-crypto-main.o \
diff --git a/drivers/crypto/ccp/dbc.c b/drivers/crypto/ccp/dbc.c
new file mode 100644
index 000000000000..f65e93a81e53
--- /dev/null
+++ b/drivers/crypto/ccp/dbc.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Secure Processor Dynamic Boost Control interface
+ *
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ *
+ * Author: Mario Limonciello <mario.limonciello@amd.com>
+ */
+
+#include "dbc.h"
+
+struct error_map {
+	u32 psp;
+	int ret;
+};
+
+#define DBC_ERROR_ACCESS_DENIED		0x0001
+#define DBC_ERROR_EXCESS_DATA		0x0004
+#define DBC_ERROR_BAD_PARAMETERS	0x0006
+#define DBC_ERROR_BAD_STATE		0x0007
+#define DBC_ERROR_NOT_IMPLEMENTED	0x0009
+#define DBC_ERROR_BUSY			0x000D
+#define DBC_ERROR_MESSAGE_FAILURE	0x0307
+#define DBC_ERROR_OVERFLOW		0x300F
+#define DBC_ERROR_SIGNATURE_INVALID	0x3072
+
+static struct error_map error_codes[] = {
+	{DBC_ERROR_ACCESS_DENIED,	-EACCES},
+	{DBC_ERROR_EXCESS_DATA,		-E2BIG},
+	{DBC_ERROR_BAD_PARAMETERS,	-EINVAL},
+	{DBC_ERROR_BAD_STATE,		-EAGAIN},
+	{DBC_ERROR_MESSAGE_FAILURE,	-ENOENT},
+	{DBC_ERROR_NOT_IMPLEMENTED,	-ENOENT},
+	{DBC_ERROR_BUSY,		-EBUSY},
+	{DBC_ERROR_OVERFLOW,		-ENFILE},
+	{DBC_ERROR_SIGNATURE_INVALID,	-EPERM},
+	{0x0,	0x0},
+};
+
+static int send_dbc_cmd(struct psp_dbc_device *dbc_dev,
+			enum psp_platform_access_msg msg)
+{
+	int ret;
+
+	dbc_dev->mbox->req.header.status = 0;
+	ret = psp_send_platform_access_msg(msg, (struct psp_request *)dbc_dev->mbox);
+	if (ret == -EIO) {
+		int i;
+
+		dev_dbg(dbc_dev->dev,
+			 "msg 0x%x failed with PSP error: 0x%x\n",
+			 msg, dbc_dev->mbox->req.header.status);
+
+		for (i = 0; error_codes[i].psp; i++) {
+			if (dbc_dev->mbox->req.header.status == error_codes[i].psp)
+				return error_codes[i].ret;
+		}
+	}
+
+	return ret;
+}
+
+static int send_dbc_nonce(struct psp_dbc_device *dbc_dev)
+{
+	int ret;
+
+	dbc_dev->mbox->req.header.payload_size = sizeof(dbc_dev->mbox->dbc_nonce);
+	ret = send_dbc_cmd(dbc_dev, PSP_DYNAMIC_BOOST_GET_NONCE);
+	if (ret == -EAGAIN) {
+		dev_dbg(dbc_dev->dev, "retrying get nonce\n");
+		ret = send_dbc_cmd(dbc_dev, PSP_DYNAMIC_BOOST_GET_NONCE);
+	}
+
+	return ret;
+}
+
+void dbc_dev_destroy(struct psp_device *psp)
+{
+	struct psp_dbc_device *dbc_dev = psp->dbc_data;
+
+	if (!dbc_dev)
+		return;
+
+	misc_deregister(&dbc_dev->char_dev);
+	mutex_destroy(&dbc_dev->ioctl_mutex);
+	psp->dbc_data = NULL;
+}
+
+static long dbc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct psp_device *psp_master = psp_get_master_device();
+	void __user *argp = (void __user *)arg;
+	struct psp_dbc_device *dbc_dev;
+	int ret;
+
+	if (!psp_master || !psp_master->dbc_data)
+		return -ENODEV;
+	dbc_dev = psp_master->dbc_data;
+
+	mutex_lock(&dbc_dev->ioctl_mutex);
+
+	switch (cmd) {
+	case DBCIOCNONCE:
+		if (copy_from_user(&dbc_dev->mbox->dbc_nonce.user, argp,
+				   sizeof(struct dbc_user_nonce))) {
+			ret = -EFAULT;
+			goto unlock;
+		}
+
+		ret = send_dbc_nonce(dbc_dev);
+		if (ret)
+			goto unlock;
+
+		if (copy_to_user(argp, &dbc_dev->mbox->dbc_nonce.user,
+				 sizeof(struct dbc_user_nonce))) {
+			ret = -EFAULT;
+			goto unlock;
+		}
+		break;
+	default:
+		ret = -EINVAL;
+
+	}
+unlock:
+	mutex_unlock(&dbc_dev->ioctl_mutex);
+
+	return ret;
+}
+
+static const struct file_operations dbc_fops = {
+	.owner	= THIS_MODULE,
+	.unlocked_ioctl = dbc_ioctl,
+};
+
+int dbc_dev_init(struct psp_device *psp)
+{
+	struct device *dev = psp->dev;
+	struct psp_dbc_device *dbc_dev;
+	int ret;
+
+	if (!PSP_FEATURE(psp, DBC))
+		return 0;
+
+	dbc_dev = devm_kzalloc(dev, sizeof(*dbc_dev), GFP_KERNEL);
+	if (!dbc_dev)
+		return -ENOMEM;
+
+	BUILD_BUG_ON(sizeof(union dbc_buffer) > PAGE_SIZE);
+	dbc_dev->mbox = (void *)devm_get_free_pages(dev, GFP_KERNEL, 0);
+	if (!dbc_dev->mbox) {
+		ret = -ENOMEM;
+		goto cleanup_dev;
+	}
+
+	psp->dbc_data = dbc_dev;
+	dbc_dev->dev = dev;
+
+	ret = send_dbc_nonce(dbc_dev);
+	if (ret == -EACCES) {
+		dev_dbg(dbc_dev->dev,
+			"dynamic boost control was previously authenticated\n");
+		ret = 0;
+	}
+	dev_dbg(dbc_dev->dev, "dynamic boost control is %savailable\n",
+		ret ? "un" : "");
+	if (ret) {
+		ret = 0;
+		goto cleanup_mbox;
+	}
+
+	dbc_dev->char_dev.minor = MISC_DYNAMIC_MINOR;
+	dbc_dev->char_dev.name = "dbc";
+	dbc_dev->char_dev.fops = &dbc_fops;
+	dbc_dev->char_dev.mode = 0600;
+	ret = misc_register(&dbc_dev->char_dev);
+	if (ret)
+		goto cleanup_mbox;
+
+	mutex_init(&dbc_dev->ioctl_mutex);
+
+	return 0;
+
+cleanup_mbox:
+	devm_free_pages(dev, (unsigned long)dbc_dev->mbox);
+
+cleanup_dev:
+	psp->dbc_data = NULL;
+	devm_kfree(dev, dbc_dev);
+
+	return ret;
+}
diff --git a/drivers/crypto/ccp/dbc.h b/drivers/crypto/ccp/dbc.h
new file mode 100644
index 000000000000..1c3a0a078d15
--- /dev/null
+++ b/drivers/crypto/ccp/dbc.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AMD Platform Security Processor (PSP) Dynamic Boost Control support
+ *
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ *
+ * Author: Mario Limonciello <mario.limonciello@amd.com>
+ */
+
+#ifndef __DBC_H__
+#define __DBC_H__
+
+#include <uapi/linux/psp-dbc.h>
+
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/psp-platform-access.h>
+
+#include "psp-dev.h"
+
+struct psp_dbc_device {
+	struct device *dev;
+
+	union dbc_buffer *mbox;
+
+	struct mutex ioctl_mutex;
+
+	struct miscdevice char_dev;
+};
+
+struct dbc_nonce {
+	struct psp_req_buffer_hdr	header;
+	struct dbc_user_nonce		user;
+} __packed;
+
+union dbc_buffer {
+	struct psp_request		req;
+	struct dbc_nonce		dbc_nonce;
+};
+
+void dbc_dev_destroy(struct psp_device *psp);
+int dbc_dev_init(struct psp_device *psp);
+
+#endif /* __DBC_H */
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index 3390f0bd6408..d42d7bc62352 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -15,6 +15,7 @@
 #include "sev-dev.h"
 #include "tee-dev.h"
 #include "platform-access.h"
+#include "dbc.h"
 
 struct psp_device *psp_master;
 
@@ -112,6 +113,12 @@ static void psp_init_platform_access(struct psp_device *psp)
 		dev_warn(psp->dev, "platform access init failed: %d\n", ret);
 		return;
 	}
+
+	/* dbc must come after platform access as it tests the feature */
+	ret = dbc_dev_init(psp);
+	if (ret)
+		dev_warn(psp->dev, "failed to init dynamic boost control: %d\n",
+			 ret);
 }
 
 static int psp_init(struct psp_device *psp)
@@ -217,6 +224,8 @@ void psp_dev_destroy(struct sp_device *sp)
 
 	tee_dev_destroy(psp);
 
+	dbc_dev_destroy(psp);
+
 	platform_access_dev_destroy(psp);
 
 	sp_free_psp_irq(sp, psp);
diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h
index 505e4bdeaca8..8a4de69399c5 100644
--- a/drivers/crypto/ccp/psp-dev.h
+++ b/drivers/crypto/ccp/psp-dev.h
@@ -40,6 +40,7 @@ struct psp_device {
 	void *sev_data;
 	void *tee_data;
 	void *platform_access_data;
+	void *dbc_data;
 
 	unsigned int capability;
 };
diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h
index 76c32ee6bd65..2329ad524b49 100644
--- a/drivers/crypto/ccp/sp-dev.h
+++ b/drivers/crypto/ccp/sp-dev.h
@@ -28,6 +28,10 @@
 #define CACHE_NONE			0x00
 #define CACHE_WB_NO_ALLOC		0xb7
 
+#define PLATFORM_FEATURE_DBC		0x1
+
+#define PSP_FEATURE(psp, feat)	(psp->vdata && psp->vdata->platform_features & PLATFORM_FEATURE_##feat)
+
 /* Structure to hold CCP device data */
 struct ccp_device;
 struct ccp_vdata {
@@ -71,6 +75,7 @@ struct psp_vdata {
 	const unsigned int inten_reg;
 	const unsigned int intsts_reg;
 	const unsigned int bootloader_info_reg;
+	const unsigned int platform_features;
 };
 
 /* Structure to hold SP device data */
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index 205b93d229a9..b6ab56abeb68 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -470,6 +470,7 @@ static const struct psp_vdata pspv3 = {
 	.feature_reg		= 0x109fc,	/* C2PMSG_63 */
 	.inten_reg		= 0x10690,	/* P2CMSG_INTEN */
 	.intsts_reg		= 0x10694,	/* P2CMSG_INTSTS */
+	.platform_features	= PLATFORM_FEATURE_DBC,
 };
 
 static const struct psp_vdata pspv4 = {
diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h
index 75da8f5f7ad8..53b4a1df5180 100644
--- a/include/linux/psp-platform-access.h
+++ b/include/linux/psp-platform-access.h
@@ -8,6 +8,7 @@
 enum psp_platform_access_msg {
 	PSP_CMD_NONE = 0x0,
 	PSP_I2C_REQ_BUS_CMD = 0x64,
+	PSP_DYNAMIC_BOOST_GET_NONCE,
 };
 
 struct psp_req_buffer_hdr {
diff --git a/include/uapi/linux/psp-dbc.h b/include/uapi/linux/psp-dbc.h
new file mode 100644
index 000000000000..d032f78934e2
--- /dev/null
+++ b/include/uapi/linux/psp-dbc.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Userspace interface for AMD Dynamic Boost Control (DBC)
+ *
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ *
+ * Author: Mario Limonciello <mario.limonciello@amd.com>
+ */
+
+#ifndef __PSP_DBC_USER_H__
+#define __PSP_DBC_USER_H__
+
+#include <linux/types.h>
+
+/**
+ * DOC: AMD Dynamic Boost Control (DBC) interface
+ */
+
+#define DBC_NONCE_SIZE		16
+#define DBC_SIG_SIZE		32
+
+/**
+ * struct dbc_user_nonce - Nonce exchange structure (input/output).
+ * @auth_needed: Whether the PSP should authenticate this request (input).
+ *               0: no authentication, PSP will return single use nonce.
+ *               1: authentication: PSP will return multi-use nonce.
+ * @nonce:       8 byte value used for future authentication (output).
+ * @signature:   Optional 32 byte signature created by software using a
+ *               previous nonce (input).
+ */
+struct dbc_user_nonce {
+	__u32	auth_needed;
+	__u8	nonce[DBC_NONCE_SIZE];
+	__u8	signature[DBC_SIG_SIZE];
+} __packed;
+
+/**
+ * Dynamic Boost Control (DBC) IOC
+ *
+ * possible return codes for all DBC IOCTLs:
+ *  0:          success
+ *  -EINVAL:    invalid input
+ *  -E2BIG:     excess data passed
+ *  -EFAULT:    failed to copy to/from userspace
+ *  -EBUSY:     mailbox in recovery or in use
+ *  -ENODEV:    driver not bound with PSP device
+ *  -EACCES:    request isn't authorized
+ *  -EINVAL:    invalid parameter
+ *  -ETIMEDOUT: request timed out
+ *  -EAGAIN:    invalid request for state machine
+ *  -ENOENT:    not implemented
+ *  -ENFILE:    overflow
+ *  -EPERM:     invalid signature
+ *  -EIO:       unknown error
+ */
+#define DBC_IOC_TYPE	'D'
+
+/**
+ * DBCIOCNONCE - Fetch a nonce from the PSP for authenticating commands.
+ *               If a nonce is fetched without authentication it can only
+ *               be utilized for one command.
+ *               If a nonce is fetched with authentication it can be used
+ *               for multiple requests.
+ */
+#define DBCIOCNONCE	_IOWR(DBC_IOC_TYPE, 0x1, struct dbc_user_nonce)
+
+#endif /* __PSP_DBC_USER_H__ */

From d9408716d2126439fbc46f6c40e72792069b8411 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 23 Jun 2023 08:49:56 -0500
Subject: [PATCH 0317/2047] crypto: ccp - Add support for setting user ID for
 dynamic boost control

As part of the authentication flow for Dynamic Boost Control, the calling
software will need to send a uid used in all of its future
communications.

Add support for another IOCTL call to let userspace software set this up.

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/dbc.c            | 18 ++++++++++++++++++
 drivers/crypto/ccp/dbc.h            |  6 ++++++
 include/linux/psp-platform-access.h |  1 +
 include/uapi/linux/psp-dbc.h        | 20 ++++++++++++++++++++
 4 files changed, 45 insertions(+)

diff --git a/drivers/crypto/ccp/dbc.c b/drivers/crypto/ccp/dbc.c
index f65e93a81e53..c6f5fb3658ca 100644
--- a/drivers/crypto/ccp/dbc.c
+++ b/drivers/crypto/ccp/dbc.c
@@ -117,6 +117,24 @@ static long dbc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			goto unlock;
 		}
 		break;
+	case DBCIOCUID:
+		dbc_dev->mbox->req.header.payload_size = sizeof(dbc_dev->mbox->dbc_set_uid);
+		if (copy_from_user(&dbc_dev->mbox->dbc_set_uid.user, argp,
+				   sizeof(struct dbc_user_setuid))) {
+			ret = -EFAULT;
+			goto unlock;
+		}
+
+		ret = send_dbc_cmd(dbc_dev, PSP_DYNAMIC_BOOST_SET_UID);
+		if (ret)
+			goto unlock;
+
+		if (copy_to_user(argp, &dbc_dev->mbox->dbc_set_uid.user,
+				 sizeof(struct dbc_user_setuid))) {
+			ret = -EFAULT;
+			goto unlock;
+		}
+		break;
 	default:
 		ret = -EINVAL;
 
diff --git a/drivers/crypto/ccp/dbc.h b/drivers/crypto/ccp/dbc.h
index 1c3a0a078d15..156435100076 100644
--- a/drivers/crypto/ccp/dbc.h
+++ b/drivers/crypto/ccp/dbc.h
@@ -33,9 +33,15 @@ struct dbc_nonce {
 	struct dbc_user_nonce		user;
 } __packed;
 
+struct dbc_set_uid {
+	struct psp_req_buffer_hdr	header;
+	struct dbc_user_setuid		user;
+} __packed;
+
 union dbc_buffer {
 	struct psp_request		req;
 	struct dbc_nonce		dbc_nonce;
+	struct dbc_set_uid		dbc_set_uid;
 };
 
 void dbc_dev_destroy(struct psp_device *psp);
diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h
index 53b4a1df5180..18b9e0f0cb03 100644
--- a/include/linux/psp-platform-access.h
+++ b/include/linux/psp-platform-access.h
@@ -9,6 +9,7 @@ enum psp_platform_access_msg {
 	PSP_CMD_NONE = 0x0,
 	PSP_I2C_REQ_BUS_CMD = 0x64,
 	PSP_DYNAMIC_BOOST_GET_NONCE,
+	PSP_DYNAMIC_BOOST_SET_UID,
 };
 
 struct psp_req_buffer_hdr {
diff --git a/include/uapi/linux/psp-dbc.h b/include/uapi/linux/psp-dbc.h
index d032f78934e2..7443c78ede19 100644
--- a/include/uapi/linux/psp-dbc.h
+++ b/include/uapi/linux/psp-dbc.h
@@ -18,6 +18,7 @@
 
 #define DBC_NONCE_SIZE		16
 #define DBC_SIG_SIZE		32
+#define DBC_UID_SIZE		16
 
 /**
  * struct dbc_user_nonce - Nonce exchange structure (input/output).
@@ -34,6 +35,16 @@ struct dbc_user_nonce {
 	__u8	signature[DBC_SIG_SIZE];
 } __packed;
 
+/**
+ * struct dbc_user_setuid - UID exchange structure (input).
+ * @uid:       16 byte value representing software identity
+ * @signature: 32 byte signature created by software using a previous nonce
+ */
+struct dbc_user_setuid {
+	__u8	uid[DBC_UID_SIZE];
+	__u8	signature[DBC_SIG_SIZE];
+} __packed;
+
 /**
  * Dynamic Boost Control (DBC) IOC
  *
@@ -64,4 +75,13 @@ struct dbc_user_nonce {
  */
 #define DBCIOCNONCE	_IOWR(DBC_IOC_TYPE, 0x1, struct dbc_user_nonce)
 
+/**
+ * DBCIOCUID - Set the user ID (UID) of a calling process.
+ *             The user ID is 8 bytes long. It must be programmed using a
+ *             32 byte signature built using the nonce fetched from
+ *             DBCIOCNONCE.
+ *             The UID can only be set once until the system is rebooted.
+ */
+#define DBCIOCUID	_IOW(DBC_IOC_TYPE, 0x2, struct dbc_user_setuid)
+
 #endif /* __PSP_DBC_USER_H__ */

From e2cfe05e9277b5a7abbbc186fec1ad37348dd956 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 23 Jun 2023 08:49:57 -0500
Subject: [PATCH 0318/2047] crypto: ccp - Add support for getting and setting
 DBC parameters

After software has authenticated a dynamic boost control request,
it can fetch and set supported parameters using a selection of messages.

Add support for these messages and export the ability to do this to
userspace.

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/dbc.c            | 41 ++++++++++++++++++++
 drivers/crypto/ccp/dbc.h            |  6 +++
 include/linux/psp-platform-access.h |  2 +
 include/uapi/linux/psp-dbc.h        | 60 +++++++++++++++++++++++++++++
 4 files changed, 109 insertions(+)

diff --git a/drivers/crypto/ccp/dbc.c b/drivers/crypto/ccp/dbc.c
index c6f5fb3658ca..839ea14b9a85 100644
--- a/drivers/crypto/ccp/dbc.c
+++ b/drivers/crypto/ccp/dbc.c
@@ -74,6 +74,30 @@ static int send_dbc_nonce(struct psp_dbc_device *dbc_dev)
 	return ret;
 }
 
+static int send_dbc_parameter(struct psp_dbc_device *dbc_dev)
+{
+	dbc_dev->mbox->req.header.payload_size = sizeof(dbc_dev->mbox->dbc_param);
+
+	switch (dbc_dev->mbox->dbc_param.user.msg_index) {
+	case PARAM_SET_FMAX_CAP:
+	case PARAM_SET_PWR_CAP:
+	case PARAM_SET_GFX_MODE:
+		return send_dbc_cmd(dbc_dev, PSP_DYNAMIC_BOOST_SET_PARAMETER);
+	case PARAM_GET_FMAX_CAP:
+	case PARAM_GET_PWR_CAP:
+	case PARAM_GET_CURR_TEMP:
+	case PARAM_GET_FMAX_MAX:
+	case PARAM_GET_FMAX_MIN:
+	case PARAM_GET_SOC_PWR_MAX:
+	case PARAM_GET_SOC_PWR_MIN:
+	case PARAM_GET_SOC_PWR_CUR:
+	case PARAM_GET_GFX_MODE:
+		return send_dbc_cmd(dbc_dev, PSP_DYNAMIC_BOOST_GET_PARAMETER);
+	}
+
+	return -EINVAL;
+}
+
 void dbc_dev_destroy(struct psp_device *psp)
 {
 	struct psp_dbc_device *dbc_dev = psp->dbc_data;
@@ -135,6 +159,23 @@ static long dbc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			goto unlock;
 		}
 		break;
+	case DBCIOCPARAM:
+		if (copy_from_user(&dbc_dev->mbox->dbc_param.user, argp,
+				   sizeof(struct dbc_user_param))) {
+			ret = -EFAULT;
+			goto unlock;
+		}
+
+		ret = send_dbc_parameter(dbc_dev);
+		if (ret)
+			goto unlock;
+
+		if (copy_to_user(argp, &dbc_dev->mbox->dbc_param.user,
+				 sizeof(struct dbc_user_param)))  {
+			ret = -EFAULT;
+			goto unlock;
+		}
+		break;
 	default:
 		ret = -EINVAL;
 
diff --git a/drivers/crypto/ccp/dbc.h b/drivers/crypto/ccp/dbc.h
index 156435100076..e963099ca38e 100644
--- a/drivers/crypto/ccp/dbc.h
+++ b/drivers/crypto/ccp/dbc.h
@@ -38,10 +38,16 @@ struct dbc_set_uid {
 	struct dbc_user_setuid		user;
 } __packed;
 
+struct dbc_param {
+	struct psp_req_buffer_hdr	header;
+	struct dbc_user_param		user;
+} __packed;
+
 union dbc_buffer {
 	struct psp_request		req;
 	struct dbc_nonce		dbc_nonce;
 	struct dbc_set_uid		dbc_set_uid;
+	struct dbc_param		dbc_param;
 };
 
 void dbc_dev_destroy(struct psp_device *psp);
diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h
index 18b9e0f0cb03..c1dc87fc536b 100644
--- a/include/linux/psp-platform-access.h
+++ b/include/linux/psp-platform-access.h
@@ -10,6 +10,8 @@ enum psp_platform_access_msg {
 	PSP_I2C_REQ_BUS_CMD = 0x64,
 	PSP_DYNAMIC_BOOST_GET_NONCE,
 	PSP_DYNAMIC_BOOST_SET_UID,
+	PSP_DYNAMIC_BOOST_GET_PARAMETER,
+	PSP_DYNAMIC_BOOST_SET_PARAMETER,
 };
 
 struct psp_req_buffer_hdr {
diff --git a/include/uapi/linux/psp-dbc.h b/include/uapi/linux/psp-dbc.h
index 7443c78ede19..b3845a9ff5fd 100644
--- a/include/uapi/linux/psp-dbc.h
+++ b/include/uapi/linux/psp-dbc.h
@@ -45,6 +45,23 @@ struct dbc_user_setuid {
 	__u8	signature[DBC_SIG_SIZE];
 } __packed;
 
+/**
+ * struct dbc_user_param - Parameter exchange structure (input/output).
+ * @msg_index: Message indicating what parameter to set or get (input)
+ * @param:     4 byte parameter, units are message specific. (input/output)
+ * @signature: 32 byte signature.
+ *             - When sending a message this is to be created by software
+ *               using a previous nonce (input)
+ *             - For interpreting results, this signature is updated by the
+ *               PSP to allow software to validate the authenticity of the
+ *               results.
+ */
+struct dbc_user_param {
+	__u32	msg_index;
+	__u32	param;
+	__u8	signature[DBC_SIG_SIZE];
+} __packed;
+
 /**
  * Dynamic Boost Control (DBC) IOC
  *
@@ -84,4 +101,47 @@ struct dbc_user_setuid {
  */
 #define DBCIOCUID	_IOW(DBC_IOC_TYPE, 0x2, struct dbc_user_setuid)
 
+/**
+ * DBCIOCPARAM - Set or get a parameter from the PSP.
+ *               This request will only work after DBCIOCUID has successfully
+ *               set the UID of the calling process.
+ *               Whether the parameter is set or get is controlled by the
+ *               message ID in the request.
+ *               This command must be sent using a 32 byte signature built
+ *               using the nonce fetched from DBCIOCNONCE.
+ *               When the command succeeds, the 32 byte signature will be
+ *               updated by the PSP for software to authenticate the results.
+ */
+#define DBCIOCPARAM	_IOWR(DBC_IOC_TYPE, 0x3, struct dbc_user_param)
+
+/**
+ * enum dbc_cmd_msg - Messages utilized by DBCIOCPARAM
+ * @PARAM_GET_FMAX_CAP:		Get frequency cap (MHz)
+ * @PARAM_SET_FMAX_CAP:		Set frequency cap (MHz)
+ * @PARAM_GET_PWR_CAP:		Get socket power cap (mW)
+ * @PARAM_SET_PWR_CAP:		Set socket power cap (mW)
+ * @PARAM_GET_GFX_MODE:		Get graphics mode (0/1)
+ * @PARAM_SET_GFX_MODE:		Set graphics mode (0/1)
+ * @PARAM_GET_CURR_TEMP:	Get current temperature (degrees C)
+ * @PARAM_GET_FMAX_MAX:		Get maximum allowed value for frequency (MHz)
+ * @PARAM_GET_FMAX_MIN:		Get minimum allowed value for frequency (MHz)
+ * @PARAM_GET_SOC_PWR_MAX:	Get maximum allowed value for SoC power (mw)
+ * @PARAM_GET_SOC_PWR_MIN:	Get minimum allowed value for SoC power (mw)
+ * @PARAM_GET_SOC_PWR_CUR:	Get current value for SoC Power (mW)
+ */
+enum dbc_cmd_msg {
+	PARAM_GET_FMAX_CAP	= 0x3,
+	PARAM_SET_FMAX_CAP	= 0x4,
+	PARAM_GET_PWR_CAP	= 0x5,
+	PARAM_SET_PWR_CAP	= 0x6,
+	PARAM_GET_GFX_MODE	= 0x7,
+	PARAM_SET_GFX_MODE	= 0x8,
+	PARAM_GET_CURR_TEMP	= 0x9,
+	PARAM_GET_FMAX_MAX	= 0xA,
+	PARAM_GET_FMAX_MIN	= 0xB,
+	PARAM_GET_SOC_PWR_MAX	= 0xC,
+	PARAM_GET_SOC_PWR_MIN	= 0xD,
+	PARAM_GET_SOC_PWR_CUR	= 0xE,
+};
+
 #endif /* __PSP_DBC_USER_H__ */

From febe3ed3222f92672d3e0471893aa8ab23275c28 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 23 Jun 2023 08:49:58 -0500
Subject: [PATCH 0319/2047] crypto: ccp - Add a sample library for ioctl use

Add a small shared library that demonstrates the usage of the
IOCTL interface.  This library can be linked to but, is
intended to be loaded and used by higher level languages

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 tools/crypto/ccp/Makefile | 13 +++++++
 tools/crypto/ccp/dbc.c    | 72 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+)
 create mode 100644 tools/crypto/ccp/Makefile
 create mode 100644 tools/crypto/ccp/dbc.c

diff --git a/tools/crypto/ccp/Makefile b/tools/crypto/ccp/Makefile
new file mode 100644
index 000000000000..ae4a66d1558a
--- /dev/null
+++ b/tools/crypto/ccp/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-only
+CFLAGS += -D__EXPORTED_HEADERS__ -I../../../include/uapi -I../../../include
+
+TARGET = dbc_library.so
+
+all: $(TARGET)
+
+dbc_library.so: dbc.c
+	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o $@ $<
+	chmod -x $@
+
+clean:
+	$(RM) $(TARGET)
diff --git a/tools/crypto/ccp/dbc.c b/tools/crypto/ccp/dbc.c
new file mode 100644
index 000000000000..37e813175642
--- /dev/null
+++ b/tools/crypto/ccp/dbc.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Secure Processor Dynamic Boost Control sample library
+ *
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ *
+ * Author: Mario Limonciello <mario.limonciello@amd.com>
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+/* if uapi header isn't installed, this might not yet exist */
+#ifndef __packed
+#define __packed __attribute__((packed))
+#endif
+#include <linux/psp-dbc.h>
+
+int get_nonce(int fd, void *nonce_out, void *signature)
+{
+	struct dbc_user_nonce tmp = {
+		.auth_needed = !!signature,
+	};
+	int ret;
+
+	assert(nonce_out);
+
+	if (signature)
+		memcpy(tmp.signature, signature, sizeof(tmp.signature));
+
+	ret = ioctl(fd, DBCIOCNONCE, &tmp);
+	if (ret)
+		return ret;
+	memcpy(nonce_out, tmp.nonce, sizeof(tmp.nonce));
+
+	return 0;
+}
+
+int set_uid(int fd, __u8 *uid, __u8 *signature)
+{
+	struct dbc_user_setuid tmp;
+
+	assert(uid);
+	assert(signature);
+
+	memcpy(tmp.uid, uid, sizeof(tmp.uid));
+	memcpy(tmp.signature, signature, sizeof(tmp.signature));
+
+	return ioctl(fd, DBCIOCUID, &tmp);
+}
+
+int process_param(int fd, int msg_index, __u8 *signature, int *data)
+{
+	struct dbc_user_param tmp = {
+		.msg_index = msg_index,
+		.param = *data,
+	};
+	int ret;
+
+	assert(signature);
+	assert(data);
+
+	memcpy(tmp.signature, signature, sizeof(tmp.signature));
+
+	ret = ioctl(fd, DBCIOCPARAM, &tmp);
+	if (ret)
+		return ret;
+
+	*data = tmp.param;
+	return 0;
+}

From f40d42f116cf965a9e37e2991f19ca1b5b156210 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 23 Jun 2023 08:49:59 -0500
Subject: [PATCH 0320/2047] crypto: ccp - Add a sample python script for
 Dynamic Boost Control

Dynamic Boost Control commands are triggered by userspace with
an IOCTL interface that userspace will prepare proper buffers
for a request.

To allow prototyping and testing this interface, add a python3
command line script that loads the dbc_library.so for utilizing
the IOCTLs.

The signature to use and UID are passed as arguments to this script.

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 tools/crypto/ccp/.gitignore |   1 +
 tools/crypto/ccp/dbc.py     |  64 +++++++++++++++++
 tools/crypto/ccp/dbc_cli.py | 134 ++++++++++++++++++++++++++++++++++++
 3 files changed, 199 insertions(+)
 create mode 100644 tools/crypto/ccp/.gitignore
 create mode 100644 tools/crypto/ccp/dbc.py
 create mode 100755 tools/crypto/ccp/dbc_cli.py

diff --git a/tools/crypto/ccp/.gitignore b/tools/crypto/ccp/.gitignore
new file mode 100644
index 000000000000..bee8a64b79a9
--- /dev/null
+++ b/tools/crypto/ccp/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/tools/crypto/ccp/dbc.py b/tools/crypto/ccp/dbc.py
new file mode 100644
index 000000000000..3f6a825ffc9e
--- /dev/null
+++ b/tools/crypto/ccp/dbc.py
@@ -0,0 +1,64 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0
+
+import ctypes
+import os
+
+DBC_UID_SIZE = 16
+DBC_NONCE_SIZE = 16
+DBC_SIG_SIZE = 32
+
+PARAM_GET_FMAX_CAP = (0x3,)
+PARAM_SET_FMAX_CAP = (0x4,)
+PARAM_GET_PWR_CAP = (0x5,)
+PARAM_SET_PWR_CAP = (0x6,)
+PARAM_GET_GFX_MODE = (0x7,)
+PARAM_SET_GFX_MODE = (0x8,)
+PARAM_GET_CURR_TEMP = (0x9,)
+PARAM_GET_FMAX_MAX = (0xA,)
+PARAM_GET_FMAX_MIN = (0xB,)
+PARAM_GET_SOC_PWR_MAX = (0xC,)
+PARAM_GET_SOC_PWR_MIN = (0xD,)
+PARAM_GET_SOC_PWR_CUR = (0xE,)
+
+DEVICE_NODE = "/dev/dbc"
+
+lib = ctypes.CDLL("./dbc_library.so", mode=ctypes.RTLD_GLOBAL)
+
+
+def handle_error(code):
+    val = code * -1
+    raise OSError(val, os.strerror(val))
+
+
+def get_nonce(device, signature):
+    if not device:
+        raise ValueError("Device required")
+    buf = ctypes.create_string_buffer(DBC_NONCE_SIZE)
+    ret = lib.get_nonce(device.fileno(), ctypes.byref(buf), signature)
+    if ret:
+        handle_error(ret)
+    return buf.value
+
+
+def set_uid(device, new_uid, signature):
+    if not signature:
+        raise ValueError("Signature required")
+    if not new_uid:
+        raise ValueError("UID required")
+    ret = lib.set_uid(device.fileno(), new_uid, signature)
+    if ret:
+        handle_error(ret)
+    return True
+
+
+def process_param(device, message, signature, data=None):
+    if not signature:
+        raise ValueError("Signature required")
+    if type(message) != tuple:
+        raise ValueError("Expected message tuple")
+    arg = ctypes.c_int(data if data else 0)
+    ret = lib.process_param(device.fileno(), message[0], signature, ctypes.pointer(arg))
+    if ret:
+        handle_error(ret)
+    return arg, signature
diff --git a/tools/crypto/ccp/dbc_cli.py b/tools/crypto/ccp/dbc_cli.py
new file mode 100755
index 000000000000..bf52233fd038
--- /dev/null
+++ b/tools/crypto/ccp/dbc_cli.py
@@ -0,0 +1,134 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0
+import argparse
+import binascii
+import os
+import errno
+from dbc import *
+
+ERRORS = {
+    errno.EACCES: "Access is denied",
+    errno.E2BIG: "Excess data provided",
+    errno.EINVAL: "Bad parameters",
+    errno.EAGAIN: "Bad state",
+    errno.ENOENT: "Not implemented or message failure",
+    errno.EBUSY: "Busy",
+    errno.ENFILE: "Overflow",
+    errno.EPERM: "Signature invalid",
+}
+
+messages = {
+    "get-fmax-cap": PARAM_GET_FMAX_CAP,
+    "set-fmax-cap": PARAM_SET_FMAX_CAP,
+    "get-power-cap": PARAM_GET_PWR_CAP,
+    "set-power-cap": PARAM_SET_PWR_CAP,
+    "get-graphics-mode": PARAM_GET_GFX_MODE,
+    "set-graphics-mode": PARAM_SET_GFX_MODE,
+    "get-current-temp": PARAM_GET_CURR_TEMP,
+    "get-fmax-max": PARAM_GET_FMAX_MAX,
+    "get-fmax-min": PARAM_GET_FMAX_MIN,
+    "get-soc-power-max": PARAM_GET_SOC_PWR_MAX,
+    "get-soc-power-min": PARAM_GET_SOC_PWR_MIN,
+    "get-soc-power-cur": PARAM_GET_SOC_PWR_CUR,
+}
+
+
+def _pretty_buffer(ba):
+    return str(binascii.hexlify(ba, " "))
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Dynamic Boost control command line interface"
+    )
+    parser.add_argument(
+        "command",
+        choices=["get-nonce", "get-param", "set-param", "set-uid"],
+        help="Command to send",
+    )
+    parser.add_argument("--device", default="/dev/dbc", help="Device to operate")
+    parser.add_argument("--signature", help="File containing signature for command")
+    parser.add_argument("--message", choices=messages.keys(), help="Message index")
+    parser.add_argument("--data", help="Argument to pass to message")
+    parser.add_argument("--uid", help="File containing UID to pass")
+    return parser.parse_args()
+
+
+def pretty_error(code):
+    if code in ERRORS:
+        print(ERRORS[code])
+    else:
+        print("failed with return code %d" % code)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    data = 0
+    sig = None
+    uid = None
+    if not os.path.exists(args.device):
+        raise IOError("Missing device {device}".format(device=args.device))
+    if args.signature:
+        if not os.path.exists(args.signature):
+            raise ValueError("Invalid signature file %s" % args.signature)
+        with open(args.signature, "rb") as f:
+            sig = f.read()
+        if len(sig) != DBC_SIG_SIZE:
+            raise ValueError(
+                "Invalid signature length %d (expected %d)" % (len(sig), DBC_SIG_SIZE)
+            )
+    if args.uid:
+        if not os.path.exists(args.uid):
+            raise ValueError("Invalid uid file %s" % args.uid)
+        with open(args.uid, "rb") as f:
+            uid = f.read()
+        if len(uid) != DBC_UID_SIZE:
+            raise ValueError(
+                "Invalid UID length %d (expected %d)" % (len(uid), DBC_UID_SIZE)
+            )
+    if args.data:
+        try:
+            data = int(args.data, 10)
+        except ValueError:
+            data = int(args.data, 16)
+
+    with open(args.device) as d:
+        if args.command == "get-nonce":
+            try:
+                nonce = get_nonce(d, sig)
+                print("Nonce: %s" % _pretty_buffer(bytes(nonce)))
+            except OSError as e:
+                pretty_error(e.errno)
+        elif args.command == "set-uid":
+            try:
+                result = set_uid(d, uid, sig)
+                if result:
+                    print("Set UID")
+            except OSError as e:
+                pretty_error(e.errno)
+        elif args.command == "get-param":
+            if not args.message or args.message.startswith("set"):
+                raise ValueError("Invalid message %s" % args.message)
+            try:
+                param, signature = process_param(d, messages[args.message], sig)
+                print(
+                    "Parameter: {par}, response signature {sig}".format(
+                        par=param,
+                        sig=_pretty_buffer(bytes(signature)),
+                    )
+                )
+            except OSError as e:
+                pretty_error(e.errno)
+        elif args.command == "set-param":
+            if not args.message or args.message.startswith("get"):
+                raise ValueError("Invalid message %s" % args.message)
+            try:
+                param, signature = process_param(d, messages[args.message], sig, data)
+                print(
+                    "Parameter: {par}, response signature {sig}".format(
+                        par=param,
+                        sig=_pretty_buffer(bytes(signature)),
+                    )
+                )
+            except OSError as e:
+                pretty_error(e.errno)

From 15f8aa7bb3e550278aa561e81b2e72ffa8e442b7 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 23 Jun 2023 08:50:00 -0500
Subject: [PATCH 0321/2047] crypto: ccp - Add unit tests for dynamic boost
 control

Interacting with dynamic boost control messages requires the caller
to supply a signature. To allow validation of individual dynamic
boost control components, introduce a set of tests that can be run.

The tests can be run in 3 distinct different environments, and so
certain tests will be skipped depending on the environment.

1. Systems that do not support DBC.
2. Production systems that support DBC but are secured silicon.
3. Pre-production systems that support DBC but are unsecured silicon.

Unsecured silicon does not validate the signature, and so this allows
testing more of the state machine and functionality.

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 tools/crypto/ccp/test_dbc.py | 266 +++++++++++++++++++++++++++++++++++
 1 file changed, 266 insertions(+)
 create mode 100755 tools/crypto/ccp/test_dbc.py

diff --git a/tools/crypto/ccp/test_dbc.py b/tools/crypto/ccp/test_dbc.py
new file mode 100755
index 000000000000..998bb3e3cd04
--- /dev/null
+++ b/tools/crypto/ccp/test_dbc.py
@@ -0,0 +1,266 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0
+import unittest
+import os
+import time
+import glob
+from dbc import *
+
+# Artificial delay between set commands
+SET_DELAY = 0.5
+
+
+class invalid_param(ctypes.Structure):
+    _fields_ = [
+        ("data", ctypes.c_uint8),
+    ]
+
+
+def system_is_secured() -> bool:
+    fused_part = glob.glob("/sys/bus/pci/drivers/ccp/**/fused_part")[0]
+    if os.path.exists(fused_part):
+        with open(fused_part, "r") as r:
+            return int(r.read()) == 1
+    return True
+
+
+class DynamicBoostControlTest(unittest.TestCase):
+    def __init__(self, data) -> None:
+        self.d = None
+        self.signature = "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
+        self.uid = "1111111111111111"
+        super().__init__(data)
+
+    def setUp(self) -> None:
+        self.d = open(DEVICE_NODE)
+        return super().setUp()
+
+    def tearDown(self) -> None:
+        if self.d:
+            self.d.close()
+        return super().tearDown()
+
+
+class TestUnsupportedSystem(DynamicBoostControlTest):
+    def setUp(self) -> None:
+        if os.path.exists(DEVICE_NODE):
+            self.skipTest("system is supported")
+        with self.assertRaises(FileNotFoundError) as error:
+            super().setUp()
+        self.assertEqual(error.exception.errno, 2)
+
+    def test_unauthenticated_nonce(self) -> None:
+        """fetch unauthenticated nonce"""
+        with self.assertRaises(ValueError) as error:
+            get_nonce(self.d, None)
+
+
+class TestInvalidIoctls(DynamicBoostControlTest):
+    def __init__(self, data) -> None:
+        self.data = invalid_param()
+        self.data.data = 1
+        super().__init__(data)
+
+    def setUp(self) -> None:
+        if not os.path.exists(DEVICE_NODE):
+            self.skipTest("system is unsupported")
+        return super().setUp()
+
+    def test_invalid_nonce_ioctl(self) -> None:
+        """tries to call get_nonce ioctl with invalid data structures"""
+
+        # 0x1 (get nonce), and invalid data
+        INVALID1 = IOWR(ord("D"), 0x01, invalid_param)
+        with self.assertRaises(OSError) as error:
+            fcntl.ioctl(self.d, INVALID1, self.data, True)
+        self.assertEqual(error.exception.errno, 22)
+
+    def test_invalid_setuid_ioctl(self) -> None:
+        """tries to call set_uid ioctl with invalid data structures"""
+
+        # 0x2 (set uid), and invalid data
+        INVALID2 = IOW(ord("D"), 0x02, invalid_param)
+        with self.assertRaises(OSError) as error:
+            fcntl.ioctl(self.d, INVALID2, self.data, True)
+        self.assertEqual(error.exception.errno, 22)
+
+    def test_invalid_setuid_rw_ioctl(self) -> None:
+        """tries to call set_uid ioctl with invalid data structures"""
+
+        # 0x2 as RW (set uid), and invalid data
+        INVALID3 = IOWR(ord("D"), 0x02, invalid_param)
+        with self.assertRaises(OSError) as error:
+            fcntl.ioctl(self.d, INVALID3, self.data, True)
+        self.assertEqual(error.exception.errno, 22)
+
+    def test_invalid_param_ioctl(self) -> None:
+        """tries to call param ioctl with invalid data structures"""
+        # 0x3 (param), and invalid data
+        INVALID4 = IOWR(ord("D"), 0x03, invalid_param)
+        with self.assertRaises(OSError) as error:
+            fcntl.ioctl(self.d, INVALID4, self.data, True)
+        self.assertEqual(error.exception.errno, 22)
+
+    def test_invalid_call_ioctl(self) -> None:
+        """tries to call the DBC ioctl with invalid data structures"""
+        # 0x4, and invalid data
+        INVALID5 = IOWR(ord("D"), 0x04, invalid_param)
+        with self.assertRaises(OSError) as error:
+            fcntl.ioctl(self.d, INVALID5, self.data, True)
+        self.assertEqual(error.exception.errno, 22)
+
+
+class TestInvalidSignature(DynamicBoostControlTest):
+    def setUp(self) -> None:
+        if not os.path.exists(DEVICE_NODE):
+            self.skipTest("system is unsupported")
+        if not system_is_secured():
+            self.skipTest("system is unfused")
+        return super().setUp()
+
+    def test_unauthenticated_nonce(self) -> None:
+        """fetch unauthenticated nonce"""
+        get_nonce(self.d, None)
+
+    def test_multiple_unauthenticated_nonce(self) -> None:
+        """ensure state machine always returns nonce"""
+        for count in range(0, 2):
+            get_nonce(self.d, None)
+
+    def test_authenticated_nonce(self) -> None:
+        """fetch authenticated nonce"""
+        with self.assertRaises(OSError) as error:
+            get_nonce(self.d, self.signature)
+        self.assertEqual(error.exception.errno, 1)
+
+    def test_set_uid(self) -> None:
+        """set uid"""
+        with self.assertRaises(OSError) as error:
+            set_uid(self.d, self.uid, self.signature)
+        self.assertEqual(error.exception.errno, 1)
+
+    def test_get_param(self) -> None:
+        """fetch a parameter"""
+        with self.assertRaises(OSError) as error:
+            process_param(self.d, PARAM_GET_SOC_PWR_CUR, self.signature)
+        self.assertEqual(error.exception.errno, 1)
+
+    def test_set_param(self) -> None:
+        """set a parameter"""
+        with self.assertRaises(OSError) as error:
+            process_param(self.d, PARAM_SET_PWR_CAP, self.signature, 1000)
+        self.assertEqual(error.exception.errno, 1)
+
+
+class TestUnFusedSystem(DynamicBoostControlTest):
+    def setup_identity(self) -> None:
+        """sets up the identity of the caller"""
+        # if already authenticated these may fail
+        try:
+            get_nonce(self.d, None)
+        except PermissionError:
+            pass
+        try:
+            set_uid(self.d, self.uid, self.signature)
+        except BlockingIOError:
+            pass
+        try:
+            get_nonce(self.d, self.signature)
+        except PermissionError:
+            pass
+
+    def setUp(self) -> None:
+        if not os.path.exists(DEVICE_NODE):
+            self.skipTest("system is unsupported")
+        if system_is_secured():
+            self.skipTest("system is fused")
+        super().setUp()
+        self.setup_identity()
+        time.sleep(SET_DELAY)
+
+    def test_get_valid_param(self) -> None:
+        """fetch all possible parameters"""
+        # SOC power
+        soc_power_max = process_param(self.d, PARAM_GET_SOC_PWR_MAX, self.signature)
+        soc_power_min = process_param(self.d, PARAM_GET_SOC_PWR_MIN, self.signature)
+        self.assertGreater(soc_power_max.parameter, soc_power_min.parameter)
+
+        # fmax
+        fmax_max = process_param(self.d, PARAM_GET_FMAX_MAX, self.signature)
+        fmax_min = process_param(self.d, PARAM_GET_FMAX_MIN, self.signature)
+        self.assertGreater(fmax_max.parameter, fmax_min.parameter)
+
+        # cap values
+        keys = {
+            "fmax-cap": PARAM_GET_FMAX_CAP,
+            "power-cap": PARAM_GET_PWR_CAP,
+            "current-temp": PARAM_GET_CURR_TEMP,
+            "soc-power-cur": PARAM_GET_SOC_PWR_CUR,
+        }
+        for k in keys:
+            result = process_param(self.d, keys[k], self.signature)
+            self.assertGreater(result.parameter, 0)
+
+    def test_get_invalid_param(self) -> None:
+        """fetch an invalid parameter"""
+        try:
+            set_uid(self.d, self.uid, self.signature)
+        except OSError:
+            pass
+        with self.assertRaises(OSError) as error:
+            process_param(self.d, (0xF,), self.signature)
+        self.assertEqual(error.exception.errno, 22)
+
+    def test_set_fmax(self) -> None:
+        """get/set fmax limit"""
+        # fetch current
+        original = process_param(self.d, PARAM_GET_FMAX_CAP, self.signature)
+
+        # set the fmax
+        target = original.parameter - 100
+        process_param(self.d, PARAM_SET_FMAX_CAP, self.signature, target)
+        time.sleep(SET_DELAY)
+        new = process_param(self.d, PARAM_GET_FMAX_CAP, self.signature)
+        self.assertEqual(new.parameter, target)
+
+        # revert back to current
+        process_param(self.d, PARAM_SET_FMAX_CAP, self.signature, original.parameter)
+        time.sleep(SET_DELAY)
+        cur = process_param(self.d, PARAM_GET_FMAX_CAP, self.signature)
+        self.assertEqual(cur.parameter, original.parameter)
+
+    def test_set_power_cap(self) -> None:
+        """get/set power cap limit"""
+        # fetch current
+        original = process_param(self.d, PARAM_GET_PWR_CAP, self.signature)
+
+        # set the fmax
+        target = original.parameter - 10
+        process_param(self.d, PARAM_SET_PWR_CAP, self.signature, target)
+        time.sleep(SET_DELAY)
+        new = process_param(self.d, PARAM_GET_PWR_CAP, self.signature)
+        self.assertEqual(new.parameter, target)
+
+        # revert back to current
+        process_param(self.d, PARAM_SET_PWR_CAP, self.signature, original.parameter)
+        time.sleep(SET_DELAY)
+        cur = process_param(self.d, PARAM_GET_PWR_CAP, self.signature)
+        self.assertEqual(cur.parameter, original.parameter)
+
+    def test_set_3d_graphics_mode(self) -> None:
+        """set/get 3d graphics mode"""
+        # these aren't currently implemented but may be some day
+        # they are *expected* to fail
+        with self.assertRaises(OSError) as error:
+            process_param(self.d, PARAM_GET_GFX_MODE, self.signature)
+        self.assertEqual(error.exception.errno, 2)
+
+        time.sleep(SET_DELAY)
+
+        with self.assertRaises(OSError) as error:
+            process_param(self.d, PARAM_SET_GFX_MODE, self.signature, 1)
+        self.assertEqual(error.exception.errno, 2)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 4b97d282235dd4d9bff1b6aa327350101bc79471 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 23 Jun 2023 08:50:01 -0500
Subject: [PATCH 0322/2047] crypto: ccp - Add Mario to MAINTAINERS

I will maintain the platform access interface and dynamic boost
control support.

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 MAINTAINERS | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3be1bdfe8ecc..64602d4169d0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -915,6 +915,18 @@ S:	Supported
 F:	drivers/crypto/ccp/sev*
 F:	include/uapi/linux/psp-sev.h
 
+AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER - DBC SUPPORT
+M:	Mario Limonciello <mario.limonciello@amd.com>
+L:	linux-crypto@vger.kernel.org
+S:	Supported
+F:	drivers/crypto/ccp/dbc.c
+F:	drivers/crypto/ccp/dbc.h
+F:	drivers/crypto/ccp/platform-access.c
+F:	drivers/crypto/ccp/platform-access.h
+F:	include/uapi/linux/psp-dbc.h
+F:	tools/crypto/ccp/*.c
+F:	tools/crypto/ccp/*.py
+
 AMD DISPLAY CORE
 M:	Harry Wentland <harry.wentland@amd.com>
 M:	Leo Li <sunpeng.li@amd.com>

From 20508b751b4bef2cda999fe0268071b0fc335ac7 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 30 Jun 2023 15:54:24 +0800
Subject: [PATCH 0323/2047] crypto: sig - Remove some unused functions

These functions are defined in the sig.c file, but not called elsewhere,
so delete these unused functions.

crypto/sig.c:24:34: warning: unused function '__crypto_sig_tfm'.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=5701
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sig.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/crypto/sig.c b/crypto/sig.c
index b48c18ec65cd..224c47019297 100644
--- a/crypto/sig.c
+++ b/crypto/sig.c
@@ -21,11 +21,6 @@
 
 static const struct crypto_type crypto_sig_type;
 
-static inline struct crypto_sig *__crypto_sig_tfm(struct crypto_tfm *tfm)
-{
-	return container_of(tfm, struct crypto_sig, base);
-}
-
 static int crypto_sig_init_tfm(struct crypto_tfm *tfm)
 {
 	if (tfm->__crt_alg->cra_type != &crypto_sig_type)

From 865b50fe6ea85b41354967df03c317eb1627cc13 Mon Sep 17 00:00:00 2001
From: Lucas Segarra Fernandez <lucas.segarra.fernandez@intel.com>
Date: Fri, 30 Jun 2023 17:32:07 +0200
Subject: [PATCH 0324/2047] crypto: qat - add fw_counters debugfs file

Expose FW counters statistics by providing the "fw_counters" file
under debugfs. Currently the statistics include the number of
requests sent to the FW and the number of responses received
from the FW for each Acceleration Engine, for all the QAT product
line.

This patch is based on earlier work done by Marco Chiappero.

Co-developed-by: Adam Guerin <adam.guerin@intel.com>
Signed-off-by: Adam Guerin <adam.guerin@intel.com>
Signed-off-by: Lucas Segarra Fernandez <lucas.segarra.fernandez@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/ABI/testing/debugfs-driver-qat  |  10 +
 drivers/crypto/intel/qat/qat_common/Makefile  |   1 +
 .../intel/qat/qat_common/adf_accel_devices.h  |   1 +
 .../crypto/intel/qat/qat_common/adf_admin.c   |  18 ++
 .../intel/qat/qat_common/adf_common_drv.h     |   1 +
 .../crypto/intel/qat/qat_common/adf_dbgfs.c   |   7 +
 .../intel/qat/qat_common/adf_fw_counters.c    | 264 ++++++++++++++++++
 .../intel/qat/qat_common/adf_fw_counters.h    |  11 +
 8 files changed, 313 insertions(+)
 create mode 100644 Documentation/ABI/testing/debugfs-driver-qat
 create mode 100644 drivers/crypto/intel/qat/qat_common/adf_fw_counters.c
 create mode 100644 drivers/crypto/intel/qat/qat_common/adf_fw_counters.h

diff --git a/Documentation/ABI/testing/debugfs-driver-qat b/Documentation/ABI/testing/debugfs-driver-qat
new file mode 100644
index 000000000000..22d39c0ca1b2
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-driver-qat
@@ -0,0 +1,10 @@
+What:		/sys/kernel/debug/qat_<device>_<BDF>/qat/fw_counters
+Date:		November 2023
+KernelVersion:	6.6
+Contact:	qat-linux@intel.com
+Description:	(RO) Read returns the number of requests sent to the FW and the number of responses
+		received from the FW for each Acceleration Engine
+		Reported firmware counters::
+
+			<N>: Number of requests sent from Acceleration Engine N to FW and responses
+			     Acceleration Engine N received from FW
diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile
index 38de3aba6e8c..f541046cdf9a 100644
--- a/drivers/crypto/intel/qat/qat_common/Makefile
+++ b/drivers/crypto/intel/qat/qat_common/Makefile
@@ -28,6 +28,7 @@ intel_qat-objs := adf_cfg.o \
 	qat_bl.o
 
 intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \
+				adf_fw_counters.o \
 				adf_dbgfs.o
 
 intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \
diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
index 0399417b91fc..a54a994b5e27 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
@@ -292,6 +292,7 @@ struct adf_accel_dev {
 	unsigned long status;
 	atomic_t ref_count;
 	struct dentry *debugfs_dir;
+	struct dentry *fw_cntr_dbgfile;
 	struct list_head list;
 	struct module *owner;
 	struct adf_accel_pci accel_pci_dev;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_admin.c b/drivers/crypto/intel/qat/qat_common/adf_admin.c
index 118775ee02f2..4716b82d48ec 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_admin.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_admin.c
@@ -223,6 +223,24 @@ static int adf_get_dc_capabilities(struct adf_accel_dev *accel_dev,
 	return 0;
 }
 
+int adf_get_ae_fw_counters(struct adf_accel_dev *accel_dev, u16 ae, u64 *reqs, u64 *resps)
+{
+	struct icp_qat_fw_init_admin_resp resp = { };
+	struct icp_qat_fw_init_admin_req req = { };
+	int ret;
+
+	req.cmd_id = ICP_QAT_FW_COUNTERS_GET;
+
+	ret = adf_put_admin_msg_sync(accel_dev, ae, &req, &resp);
+	if (ret || resp.status)
+		return -EFAULT;
+
+	*reqs = resp.req_rec_count;
+	*resps = resp.resp_sent_count;
+
+	return 0;
+}
+
 /**
  * adf_send_admin_init() - Function sends init message to FW
  * @accel_dev: Pointer to acceleration device.
diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
index b8132eb9bc2a..4682be073c99 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
@@ -94,6 +94,7 @@ void adf_exit_aer(void);
 int adf_init_admin_comms(struct adf_accel_dev *accel_dev);
 void adf_exit_admin_comms(struct adf_accel_dev *accel_dev);
 int adf_send_admin_init(struct adf_accel_dev *accel_dev);
+int adf_get_ae_fw_counters(struct adf_accel_dev *accel_dev, u16 ae, u64 *reqs, u64 *resps);
 int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay);
 int adf_init_arb(struct adf_accel_dev *accel_dev);
 void adf_exit_arb(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_dbgfs.c b/drivers/crypto/intel/qat/qat_common/adf_dbgfs.c
index d0a2f892e6eb..5080ecffab03 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_dbgfs.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_dbgfs.c
@@ -6,6 +6,7 @@
 #include "adf_cfg.h"
 #include "adf_common_drv.h"
 #include "adf_dbgfs.h"
+#include "adf_fw_counters.h"
 
 /**
  * adf_dbgfs_init() - add persistent debugfs entries
@@ -56,6 +57,9 @@ void adf_dbgfs_add(struct adf_accel_dev *accel_dev)
 {
 	if (!accel_dev->debugfs_dir)
 		return;
+
+	if (!accel_dev->is_vf)
+		adf_fw_counters_dbgfs_add(accel_dev);
 }
 
 /**
@@ -66,4 +70,7 @@ void adf_dbgfs_rm(struct adf_accel_dev *accel_dev)
 {
 	if (!accel_dev->debugfs_dir)
 		return;
+
+	if (!accel_dev->is_vf)
+		adf_fw_counters_dbgfs_rm(accel_dev);
 }
diff --git a/drivers/crypto/intel/qat/qat_common/adf_fw_counters.c b/drivers/crypto/intel/qat/qat_common/adf_fw_counters.c
new file mode 100644
index 000000000000..cb6e09ef5c9f
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_fw_counters.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Intel Corporation */
+#include <linux/bitops.h>
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/types.h>
+
+#include "adf_accel_devices.h"
+#include "adf_common_drv.h"
+#include "adf_fw_counters.h"
+
+#define ADF_FW_COUNTERS_MAX_PADDING 16
+
+enum adf_fw_counters_types {
+	ADF_FW_REQUESTS,
+	ADF_FW_RESPONSES,
+	ADF_FW_COUNTERS_COUNT
+};
+
+static const char * const adf_fw_counter_names[] = {
+	[ADF_FW_REQUESTS] = "Requests",
+	[ADF_FW_RESPONSES] = "Responses",
+};
+
+static_assert(ARRAY_SIZE(adf_fw_counter_names) == ADF_FW_COUNTERS_COUNT);
+
+struct adf_ae_counters {
+	u16 ae;
+	u64 values[ADF_FW_COUNTERS_COUNT];
+};
+
+struct adf_fw_counters {
+	u16 ae_count;
+	struct adf_ae_counters ae_counters[];
+};
+
+static void adf_fw_counters_parse_ae_values(struct adf_ae_counters *ae_counters, u32 ae,
+					    u64 req_count, u64 resp_count)
+{
+	ae_counters->ae = ae;
+	ae_counters->values[ADF_FW_REQUESTS] = req_count;
+	ae_counters->values[ADF_FW_RESPONSES] = resp_count;
+}
+
+static int adf_fw_counters_load_from_device(struct adf_accel_dev *accel_dev,
+					    struct adf_fw_counters *fw_counters)
+{
+	struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev);
+	unsigned long ae_mask;
+	unsigned int i;
+	unsigned long ae;
+
+	/* Ignore the admin AEs */
+	ae_mask = hw_data->ae_mask & ~hw_data->admin_ae_mask;
+
+	if (hweight_long(ae_mask) > fw_counters->ae_count)
+		return -EINVAL;
+
+	i = 0;
+	for_each_set_bit(ae, &ae_mask, GET_MAX_ACCELENGINES(accel_dev)) {
+		u64 req_count, resp_count;
+		int ret;
+
+		ret = adf_get_ae_fw_counters(accel_dev, ae, &req_count, &resp_count);
+		if (ret)
+			return ret;
+
+		adf_fw_counters_parse_ae_values(&fw_counters->ae_counters[i++], ae,
+						req_count, resp_count);
+	}
+
+	return 0;
+}
+
+static struct adf_fw_counters *adf_fw_counters_allocate(unsigned long ae_count)
+{
+	struct adf_fw_counters *fw_counters;
+
+	if (unlikely(!ae_count))
+		return ERR_PTR(-EINVAL);
+
+	fw_counters = kmalloc(struct_size(fw_counters, ae_counters, ae_count), GFP_KERNEL);
+	if (!fw_counters)
+		return ERR_PTR(-ENOMEM);
+
+	fw_counters->ae_count = ae_count;
+
+	return fw_counters;
+}
+
+/**
+ * adf_fw_counters_get() - Return FW counters for the provided device.
+ * @accel_dev: Pointer to a QAT acceleration device
+ *
+ * Allocates and returns a table of counters containing execution statistics
+ * for each non-admin AE available through the supplied acceleration device.
+ * The caller becomes the owner of such memory and is responsible for
+ * the deallocation through a call to kfree().
+ *
+ * Returns: a pointer to a dynamically allocated struct adf_fw_counters
+ *          on success, or a negative value on error.
+ */
+static struct adf_fw_counters *adf_fw_counters_get(struct adf_accel_dev *accel_dev)
+{
+	struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev);
+	struct adf_fw_counters *fw_counters;
+	unsigned long ae_count;
+	int ret;
+
+	if (!adf_dev_started(accel_dev)) {
+		dev_err(&GET_DEV(accel_dev), "QAT Device not started\n");
+		return ERR_PTR(-EFAULT);
+	}
+
+	/* Ignore the admin AEs */
+	ae_count = hweight_long(hw_data->ae_mask & ~hw_data->admin_ae_mask);
+
+	fw_counters = adf_fw_counters_allocate(ae_count);
+	if (IS_ERR(fw_counters))
+		return fw_counters;
+
+	ret = adf_fw_counters_load_from_device(accel_dev, fw_counters);
+	if (ret) {
+		kfree(fw_counters);
+		dev_err(&GET_DEV(accel_dev),
+			"Failed to create QAT fw_counters file table [%d].\n", ret);
+		return ERR_PTR(ret);
+	}
+
+	return fw_counters;
+}
+
+static void *qat_fw_counters_seq_start(struct seq_file *sfile, loff_t *pos)
+{
+	struct adf_fw_counters *fw_counters = sfile->private;
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	if (*pos > fw_counters->ae_count)
+		return NULL;
+
+	return &fw_counters->ae_counters[*pos - 1];
+}
+
+static void *qat_fw_counters_seq_next(struct seq_file *sfile, void *v, loff_t *pos)
+{
+	struct adf_fw_counters *fw_counters = sfile->private;
+
+	(*pos)++;
+
+	if (*pos > fw_counters->ae_count)
+		return NULL;
+
+	return &fw_counters->ae_counters[*pos - 1];
+}
+
+static void qat_fw_counters_seq_stop(struct seq_file *sfile, void *v) {}
+
+static int qat_fw_counters_seq_show(struct seq_file *sfile, void *v)
+{
+	int i;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(sfile, "AE ");
+		for (i = 0; i < ADF_FW_COUNTERS_COUNT; ++i)
+			seq_printf(sfile, " %*s", ADF_FW_COUNTERS_MAX_PADDING,
+				   adf_fw_counter_names[i]);
+	} else {
+		struct adf_ae_counters *ae_counters = (struct adf_ae_counters *)v;
+
+		seq_printf(sfile, "%2d:", ae_counters->ae);
+		for (i = 0; i < ADF_FW_COUNTERS_COUNT; ++i)
+			seq_printf(sfile, " %*llu", ADF_FW_COUNTERS_MAX_PADDING,
+				   ae_counters->values[i]);
+	}
+	seq_putc(sfile, '\n');
+
+	return 0;
+}
+
+static const struct seq_operations qat_fw_counters_sops = {
+	.start = qat_fw_counters_seq_start,
+	.next = qat_fw_counters_seq_next,
+	.stop = qat_fw_counters_seq_stop,
+	.show = qat_fw_counters_seq_show,
+};
+
+static int qat_fw_counters_file_open(struct inode *inode, struct file *file)
+{
+	struct adf_accel_dev *accel_dev = inode->i_private;
+	struct seq_file *fw_counters_seq_file;
+	struct adf_fw_counters *fw_counters;
+	int ret;
+
+	fw_counters = adf_fw_counters_get(accel_dev);
+	if (IS_ERR(fw_counters))
+		return PTR_ERR(fw_counters);
+
+	ret = seq_open(file, &qat_fw_counters_sops);
+	if (unlikely(ret)) {
+		kfree(fw_counters);
+		return ret;
+	}
+
+	fw_counters_seq_file = file->private_data;
+	fw_counters_seq_file->private = fw_counters;
+	return ret;
+}
+
+static int qat_fw_counters_file_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+
+	kfree(seq->private);
+	seq->private = NULL;
+
+	return seq_release(inode, file); }
+
+static const struct file_operations qat_fw_counters_fops = {
+	.owner = THIS_MODULE,
+	.open = qat_fw_counters_file_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = qat_fw_counters_file_release,
+};
+
+/**
+ * adf_fw_counters_dbgfs_add() - Create a debugfs file containing FW
+ * execution counters.
+ * @accel_dev:  Pointer to a QAT acceleration device
+ *
+ * Function creates a file to display a table with statistics for the given
+ * QAT acceleration device. The table stores device specific execution values
+ * for each AE, such as the number of requests sent to the FW and responses
+ * received from the FW.
+ *
+ * Return: void
+ */
+void adf_fw_counters_dbgfs_add(struct adf_accel_dev *accel_dev)
+{
+	accel_dev->fw_cntr_dbgfile = debugfs_create_file("fw_counters", 0400,
+							 accel_dev->debugfs_dir,
+							 accel_dev,
+							 &qat_fw_counters_fops);
+}
+
+/**
+ * adf_fw_counters_dbgfs_rm() - Remove the debugfs file containing FW counters.
+ * @accel_dev:  Pointer to a QAT acceleration device.
+ *
+ * Function removes the file providing the table of statistics for the given
+ * QAT acceleration device.
+ *
+ * Return: void
+ */
+void adf_fw_counters_dbgfs_rm(struct adf_accel_dev *accel_dev)
+{
+	debugfs_remove(accel_dev->fw_cntr_dbgfile);
+	accel_dev->fw_cntr_dbgfile = NULL;
+}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_fw_counters.h b/drivers/crypto/intel/qat/qat_common/adf_fw_counters.h
new file mode 100644
index 000000000000..91b3b6a95f1f
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_fw_counters.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2023 Intel Corporation */
+#ifndef ADF_FW_COUNTERS_H
+#define ADF_FW_COUNTERS_H
+
+struct adf_accel_dev;
+
+void adf_fw_counters_dbgfs_add(struct adf_accel_dev *accel_dev);
+void adf_fw_counters_dbgfs_rm(struct adf_accel_dev *accel_dev);
+
+#endif

From fd77d8da1fa85a7549ce3cd50222f9f795cc5f5d Mon Sep 17 00:00:00 2001
From: Damian Muszynski <damian.muszynski@intel.com>
Date: Fri, 30 Jun 2023 19:03:54 +0200
Subject: [PATCH 0325/2047] crypto: qat - add internal timer for qat 4xxx

The power management feature in QAT 4xxx devices can disable clock
sources used to implement timers. Because of that, the firmware needs to
get an external reliable source of time.

Add a kernel delayed work that periodically sends an event to the
firmware. This is triggered every 200ms. At each execution, the driver
sends a sync request to the firmware reporting the current timestamp
counter value.

This is a pre-requisite for enabling the heartbeat, telemetry and
rate limiting features.

Signed-off-by: Damian Muszynski <damian.muszynski@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../intel/qat/qat_4xxx/adf_4xxx_hw_data.c     |  3 +
 drivers/crypto/intel/qat/qat_common/Makefile  |  1 +
 .../intel/qat/qat_common/adf_accel_devices.h  |  3 +
 .../crypto/intel/qat/qat_common/adf_admin.c   | 12 ++++
 .../intel/qat/qat_common/adf_common_drv.h     |  3 +
 .../intel/qat/qat_common/adf_gen4_timer.c     | 70 +++++++++++++++++++
 .../intel/qat/qat_common/adf_gen4_timer.h     | 21 ++++++
 .../crypto/intel/qat/qat_common/adf_init.c    | 13 ++++
 drivers/crypto/intel/qat/qat_common/adf_isr.c |  6 ++
 .../qat/qat_common/icp_qat_fw_init_admin.h    |  5 ++
 10 files changed, 137 insertions(+)
 create mode 100644 drivers/crypto/intel/qat/qat_common/adf_gen4_timer.c
 create mode 100644 drivers/crypto/intel/qat/qat_common/adf_gen4_timer.h

diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
index e543a9e24a06..831d460bc503 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -8,6 +8,7 @@
 #include <adf_gen4_hw_data.h>
 #include <adf_gen4_pfvf.h>
 #include <adf_gen4_pm.h>
+#include <adf_gen4_timer.h>
 #include "adf_4xxx_hw_data.h"
 #include "icp_qat_hw.h"
 
@@ -508,6 +509,8 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id)
 	hw_data->enable_pm = adf_gen4_enable_pm;
 	hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt;
 	hw_data->dev_config = adf_gen4_dev_config;
+	hw_data->start_timer = adf_gen4_timer_start;
+	hw_data->stop_timer = adf_gen4_timer_stop;
 
 	adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
 	adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile
index f541046cdf9a..6a82ef8df733 100644
--- a/drivers/crypto/intel/qat/qat_common/Makefile
+++ b/drivers/crypto/intel/qat/qat_common/Makefile
@@ -17,6 +17,7 @@ intel_qat-objs := adf_cfg.o \
 	adf_gen4_pm.o \
 	adf_gen2_dc.o \
 	adf_gen4_dc.o \
+	adf_gen4_timer.o \
 	qat_crypto.o \
 	qat_compression.o \
 	qat_comp_algs.o \
diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
index a54a994b5e27..2198b410b029 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
@@ -188,6 +188,8 @@ struct adf_hw_device_data {
 	int (*init_admin_comms)(struct adf_accel_dev *accel_dev);
 	void (*exit_admin_comms)(struct adf_accel_dev *accel_dev);
 	int (*send_admin_init)(struct adf_accel_dev *accel_dev);
+	int (*start_timer)(struct adf_accel_dev *accel_dev);
+	void (*stop_timer)(struct adf_accel_dev *accel_dev);
 	int (*init_arb)(struct adf_accel_dev *accel_dev);
 	void (*exit_arb)(struct adf_accel_dev *accel_dev);
 	const u32 *(*get_arb_mapping)(struct adf_accel_dev *accel_dev);
@@ -296,6 +298,7 @@ struct adf_accel_dev {
 	struct list_head list;
 	struct module *owner;
 	struct adf_accel_pci accel_pci_dev;
+	struct adf_timer *timer;
 	union {
 		struct {
 			/* protects VF2PF interrupts access */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_admin.c b/drivers/crypto/intel/qat/qat_common/adf_admin.c
index 4716b82d48ec..d6cb7f50bb5a 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_admin.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_admin.c
@@ -241,6 +241,18 @@ int adf_get_ae_fw_counters(struct adf_accel_dev *accel_dev, u16 ae, u64 *reqs, u
 	return 0;
 }
 
+int adf_send_admin_tim_sync(struct adf_accel_dev *accel_dev, u32 cnt)
+{
+	u32 ae_mask = accel_dev->hw_device->ae_mask;
+	struct icp_qat_fw_init_admin_req req = { };
+	struct icp_qat_fw_init_admin_resp resp = { };
+
+	req.cmd_id = ICP_QAT_FW_SYNC;
+	req.int_timer_ticks = cnt;
+
+	return adf_send_admin(accel_dev, &req, &resp, ae_mask);
+}
+
 /**
  * adf_send_admin_init() - Function sends init message to FW
  * @accel_dev: Pointer to acceleration device.
diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
index 4682be073c99..e0d472071354 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
@@ -96,6 +96,7 @@ void adf_exit_admin_comms(struct adf_accel_dev *accel_dev);
 int adf_send_admin_init(struct adf_accel_dev *accel_dev);
 int adf_get_ae_fw_counters(struct adf_accel_dev *accel_dev, u16 ae, u64 *reqs, u64 *resps);
 int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay);
+int adf_send_admin_tim_sync(struct adf_accel_dev *accel_dev, u32 cnt);
 int adf_init_arb(struct adf_accel_dev *accel_dev);
 void adf_exit_arb(struct adf_accel_dev *accel_dev);
 void adf_update_ring_arb(struct adf_etr_ring_data *ring);
@@ -194,6 +195,8 @@ int qat_uclo_set_cfg_ae_mask(struct icp_qat_fw_loader_handle *handle,
 int adf_init_misc_wq(void);
 void adf_exit_misc_wq(void);
 bool adf_misc_wq_queue_work(struct work_struct *work);
+bool adf_misc_wq_queue_delayed_work(struct delayed_work *work,
+				    unsigned long delay);
 #if defined(CONFIG_PCI_IOV)
 int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
 void adf_disable_sriov(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_timer.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_timer.c
new file mode 100644
index 000000000000..646c57922fcd
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_timer.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Intel Corporation */
+
+#include <linux/container_of.h>
+#include <linux/dev_printk.h>
+#include <linux/export.h>
+#include <linux/jiffies.h>
+#include <linux/ktime.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#include "adf_accel_devices.h"
+#include "adf_common_drv.h"
+#include "adf_gen4_timer.h"
+
+#define ADF_GEN4_TIMER_PERIOD_MS 200
+
+/* This periodic update is used to trigger HB, RL & TL fw events */
+static void work_handler(struct work_struct *work)
+{
+	struct adf_accel_dev *accel_dev;
+	struct adf_timer *timer_ctx;
+	u32 time_periods;
+
+	timer_ctx = container_of(to_delayed_work(work), struct adf_timer, work_ctx);
+	accel_dev = timer_ctx->accel_dev;
+
+	adf_misc_wq_queue_delayed_work(&timer_ctx->work_ctx,
+				       msecs_to_jiffies(ADF_GEN4_TIMER_PERIOD_MS));
+
+	time_periods = div_u64(ktime_ms_delta(ktime_get_real(), timer_ctx->initial_ktime),
+			       ADF_GEN4_TIMER_PERIOD_MS);
+
+	if (adf_send_admin_tim_sync(accel_dev, time_periods))
+		dev_err(&GET_DEV(accel_dev), "Failed to synchronize qat timer\n");
+}
+
+int adf_gen4_timer_start(struct adf_accel_dev *accel_dev)
+{
+	struct adf_timer *timer_ctx;
+
+	timer_ctx = kzalloc(sizeof(*timer_ctx), GFP_KERNEL);
+	if (!timer_ctx)
+		return -ENOMEM;
+
+	timer_ctx->accel_dev = accel_dev;
+	accel_dev->timer = timer_ctx;
+	timer_ctx->initial_ktime = ktime_get_real();
+
+	INIT_DELAYED_WORK(&timer_ctx->work_ctx, work_handler);
+	adf_misc_wq_queue_delayed_work(&timer_ctx->work_ctx,
+				       msecs_to_jiffies(ADF_GEN4_TIMER_PERIOD_MS));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_timer_start);
+
+void adf_gen4_timer_stop(struct adf_accel_dev *accel_dev)
+{
+	struct adf_timer *timer_ctx = accel_dev->timer;
+
+	if (!timer_ctx)
+		return;
+
+	cancel_delayed_work_sync(&timer_ctx->work_ctx);
+
+	kfree(timer_ctx);
+	accel_dev->timer = NULL;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_timer_stop);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_timer.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_timer.h
new file mode 100644
index 000000000000..66a709e7b358
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_timer.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2023 Intel Corporation */
+
+#ifndef ADF_GEN4_TIMER_H_
+#define ADF_GEN4_TIMER_H_
+
+#include <linux/ktime.h>
+#include <linux/workqueue.h>
+
+struct adf_accel_dev;
+
+struct adf_timer {
+	struct adf_accel_dev *accel_dev;
+	struct delayed_work work_ctx;
+	ktime_t initial_ktime;
+};
+
+int adf_gen4_timer_start(struct adf_accel_dev *accel_dev);
+void adf_gen4_timer_stop(struct adf_accel_dev *accel_dev);
+
+#endif /* ADF_GEN4_TIMER_H_ */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_init.c b/drivers/crypto/intel/qat/qat_common/adf_init.c
index 826179c98524..0acba0f988da 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_init.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_init.c
@@ -163,6 +163,7 @@ static int adf_dev_start(struct adf_accel_dev *accel_dev)
 	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
 	struct service_hndl *service;
 	struct list_head *list_itr;
+	int ret;
 
 	set_bit(ADF_STATUS_STARTING, &accel_dev->status);
 
@@ -187,6 +188,14 @@ static int adf_dev_start(struct adf_accel_dev *accel_dev)
 		return -EFAULT;
 	}
 
+	if (hw_data->start_timer) {
+		ret = hw_data->start_timer(accel_dev);
+		if (ret) {
+			dev_err(&GET_DEV(accel_dev), "Failed to start internal sync timer\n");
+			return ret;
+		}
+	}
+
 	list_for_each(list_itr, &service_table) {
 		service = list_entry(list_itr, struct service_hndl, list);
 		if (service->event_hld(accel_dev, ADF_EVENT_START)) {
@@ -235,6 +244,7 @@ static int adf_dev_start(struct adf_accel_dev *accel_dev)
  */
 static void adf_dev_stop(struct adf_accel_dev *accel_dev)
 {
+	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
 	struct service_hndl *service;
 	struct list_head *list_itr;
 	bool wait = false;
@@ -270,6 +280,9 @@ static void adf_dev_stop(struct adf_accel_dev *accel_dev)
 		}
 	}
 
+	if (hw_data->stop_timer)
+		hw_data->stop_timer(accel_dev);
+
 	if (wait)
 		msleep(100);
 
diff --git a/drivers/crypto/intel/qat/qat_common/adf_isr.c b/drivers/crypto/intel/qat/qat_common/adf_isr.c
index ad9e135b8560..2aba194a7c29 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_isr.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_isr.c
@@ -380,3 +380,9 @@ bool adf_misc_wq_queue_work(struct work_struct *work)
 {
 	return queue_work(adf_misc_wq, work);
 }
+
+bool adf_misc_wq_queue_delayed_work(struct delayed_work *work,
+				    unsigned long delay)
+{
+	return queue_delayed_work(adf_misc_wq, work, delay);
+}
diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h
index 56cb827f93ea..d853c9242acf 100644
--- a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h
@@ -37,6 +37,9 @@ struct icp_qat_fw_init_admin_req {
 			__u16 ibuf_size_in_kb;
 			__u16 resrvd3;
 		};
+		struct {
+			__u32 int_timer_ticks;
+		};
 		__u32 idle_filter;
 	};
 
@@ -97,6 +100,8 @@ struct icp_qat_fw_init_admin_resp {
 	};
 } __packed;
 
+#define ICP_QAT_FW_SYNC ICP_QAT_FW_HEARTBEAT_SYNC
+
 #define ICP_QAT_FW_COMN_HEARTBEAT_OK 0
 #define ICP_QAT_FW_COMN_HEARTBEAT_BLOCKED 1
 #define ICP_QAT_FW_COMN_HEARTBEAT_FLAG_BITPOS 0

From 7f77b6797b34862e6164335c894f587387bff082 Mon Sep 17 00:00:00 2001
From: Damian Muszynski <damian.muszynski@intel.com>
Date: Fri, 30 Jun 2023 19:03:55 +0200
Subject: [PATCH 0326/2047] crypto: qat - drop obsolete heartbeat interface

Drop legacy heartbeat interface from FW API as it is no longer used.

Signed-off-by: Damian Muszynski <damian.muszynski@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../intel/qat/qat_common/icp_qat_fw_init_admin.h  | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h
index d853c9242acf..6ea19b4fb0ce 100644
--- a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h
@@ -102,19 +102,4 @@ struct icp_qat_fw_init_admin_resp {
 
 #define ICP_QAT_FW_SYNC ICP_QAT_FW_HEARTBEAT_SYNC
 
-#define ICP_QAT_FW_COMN_HEARTBEAT_OK 0
-#define ICP_QAT_FW_COMN_HEARTBEAT_BLOCKED 1
-#define ICP_QAT_FW_COMN_HEARTBEAT_FLAG_BITPOS 0
-#define ICP_QAT_FW_COMN_HEARTBEAT_FLAG_MASK 0x1
-#define ICP_QAT_FW_COMN_STATUS_RESRVD_FLD_MASK 0xFE
-#define ICP_QAT_FW_COMN_HEARTBEAT_HDR_FLAG_GET(hdr_t) \
-	ICP_QAT_FW_COMN_HEARTBEAT_FLAG_GET(hdr_t.flags)
-
-#define ICP_QAT_FW_COMN_HEARTBEAT_HDR_FLAG_SET(hdr_t, val) \
-	ICP_QAT_FW_COMN_HEARTBEAT_FLAG_SET(hdr_t, val)
-
-#define ICP_QAT_FW_COMN_HEARTBEAT_FLAG_GET(flags) \
-	QAT_FIELD_GET(flags, \
-		 ICP_QAT_FW_COMN_HEARTBEAT_FLAG_BITPOS, \
-		 ICP_QAT_FW_COMN_HEARTBEAT_FLAG_MASK)
 #endif

From e2980ba57e797e58a5476fbc4296f40551fb3404 Mon Sep 17 00:00:00 2001
From: Damian Muszynski <damian.muszynski@intel.com>
Date: Fri, 30 Jun 2023 19:03:56 +0200
Subject: [PATCH 0327/2047] crypto: qat - add measure clock frequency

The QAT hardware does not expose a mechanism to report its clock
frequency. This is required to implement the Heartbeat feature.

Add a clock measuring algorithm that estimates the frequency by
comparing the internal timestamp counter incremented by the firmware
with the time measured by the kernel.
The frequency value is only used internally and not exposed to
the user.

Signed-off-by: Damian Muszynski <damian.muszynski@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../intel/qat/qat_4xxx/adf_4xxx_hw_data.c     |  10 ++
 .../intel/qat/qat_4xxx/adf_4xxx_hw_data.h     |   4 +
 .../intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c   |  25 ++++
 .../intel/qat/qat_c3xxx/adf_c3xxx_hw_data.h   |   7 +
 .../intel/qat/qat_c62x/adf_c62x_hw_data.c     |  25 ++++
 .../intel/qat/qat_c62x/adf_c62x_hw_data.h     |   7 +
 drivers/crypto/intel/qat/qat_common/Makefile  |   1 +
 .../intel/qat/qat_common/adf_accel_devices.h  |   2 +
 .../crypto/intel/qat/qat_common/adf_admin.c   |  17 +++
 .../crypto/intel/qat/qat_common/adf_clock.c   | 131 ++++++++++++++++++
 .../crypto/intel/qat/qat_common/adf_clock.h   |  14 ++
 .../intel/qat/qat_common/adf_common_drv.h     |   1 +
 .../crypto/intel/qat/qat_common/adf_init.c    |   8 ++
 .../qat/qat_common/icp_qat_fw_init_admin.h    |   1 +
 .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c   |  10 ++
 .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.h   |   5 +
 16 files changed, 268 insertions(+)
 create mode 100644 drivers/crypto/intel/qat/qat_common/adf_clock.c
 create mode 100644 drivers/crypto/intel/qat/qat_common/adf_clock.h

diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
index 831d460bc503..9cda0c17992a 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -3,6 +3,7 @@
 #include <linux/iopoll.h>
 #include <adf_accel_devices.h>
 #include <adf_cfg.h>
+#include <adf_clock.h>
 #include <adf_common_drv.h>
 #include <adf_gen4_dc.h>
 #include <adf_gen4_hw_data.h>
@@ -318,6 +319,14 @@ static void get_admin_info(struct admin_info *admin_csrs_info)
 	admin_csrs_info->admin_msg_lr = ADF_4XXX_ADMINMSGLR_OFFSET;
 }
 
+static u32 get_heartbeat_clock(struct adf_hw_device_data *self)
+{
+	/*
+	 * 4XXX uses KPT counter for HB
+	 */
+	return ADF_4XXX_KPT_COUNTER_FREQ;
+}
+
 static void adf_enable_error_correction(struct adf_accel_dev *accel_dev)
 {
 	struct adf_bar *misc_bar = &GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR];
@@ -511,6 +520,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id)
 	hw_data->dev_config = adf_gen4_dev_config;
 	hw_data->start_timer = adf_gen4_timer_start;
 	hw_data->stop_timer = adf_gen4_timer_stop;
+	hw_data->get_hb_clock = get_heartbeat_clock;
 
 	adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
 	adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops);
diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.h
index e5b314d2b60e..bb3d95a8fb21 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.h
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.h
@@ -3,6 +3,7 @@
 #ifndef ADF_4XXX_HW_DATA_H_
 #define ADF_4XXX_HW_DATA_H_
 
+#include <linux/units.h>
 #include <adf_accel_devices.h>
 
 /* PCIe configuration space */
@@ -64,6 +65,9 @@
 #define ADF_402XX_ASYM_OBJ	"qat_402xx_asym.bin"
 #define ADF_402XX_ADMIN_OBJ	"qat_402xx_admin.bin"
 
+/* Clocks frequency */
+#define ADF_4XXX_KPT_COUNTER_FREQ (100 * HZ_PER_MHZ)
+
 /* qat_4xxx fuse bits are different from old GENs, redefine them */
 enum icp_qat_4xxx_slice_mask {
 	ICP_ACCEL_4XXX_MASK_CIPHER_SLICE = BIT(0),
diff --git a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
index 475643654e64..2c6a1dd9780c 100644
--- a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
 /* Copyright(c) 2014 - 2021 Intel Corporation */
 #include <adf_accel_devices.h>
+#include <adf_clock.h>
 #include <adf_common_drv.h>
 #include <adf_gen2_config.h>
 #include <adf_gen2_dc.h>
@@ -50,6 +51,28 @@ static u32 get_ae_mask(struct adf_hw_device_data *self)
 	return ~(fuses | straps) & ADF_C3XXX_ACCELENGINES_MASK;
 }
 
+static u32 get_ts_clock(struct adf_hw_device_data *self)
+{
+	/*
+	 * Timestamp update interval is 16 AE clock ticks for c3xxx.
+	 */
+	return self->clock_frequency / 16;
+}
+
+static int measure_clock(struct adf_accel_dev *accel_dev)
+{
+	u32 frequency;
+	int ret;
+
+	ret = adf_dev_measure_clock(accel_dev, &frequency, ADF_C3XXX_MIN_AE_FREQ,
+				    ADF_C3XXX_MAX_AE_FREQ);
+	if (ret)
+		return ret;
+
+	accel_dev->hw_device->clock_frequency = frequency;
+	return 0;
+}
+
 static u32 get_misc_bar_id(struct adf_hw_device_data *self)
 {
 	return ADF_C3XXX_PMISC_BAR;
@@ -127,6 +150,8 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
 	hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
 	hw_data->disable_iov = adf_disable_sriov;
 	hw_data->dev_config = adf_gen2_dev_config;
+	hw_data->measure_clock = measure_clock;
+	hw_data->get_hb_clock = get_ts_clock;
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.h b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.h
index 336a06f11dbd..690c6a1aa172 100644
--- a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.h
+++ b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.h
@@ -3,6 +3,8 @@
 #ifndef ADF_C3XXX_HW_DATA_H_
 #define ADF_C3XXX_HW_DATA_H_
 
+#include <linux/units.h>
+
 /* PCIe configuration space */
 #define ADF_C3XXX_PMISC_BAR 0
 #define ADF_C3XXX_ETR_BAR 1
@@ -19,6 +21,11 @@
 #define ADF_C3XXX_AE2FUNC_MAP_GRP_A_NUM_REGS 48
 #define ADF_C3XXX_AE2FUNC_MAP_GRP_B_NUM_REGS 6
 
+/* Clocks frequency */
+#define ADF_C3XXX_AE_FREQ (685 * HZ_PER_MHZ)
+#define ADF_C3XXX_MIN_AE_FREQ (533 * HZ_PER_MHZ)
+#define ADF_C3XXX_MAX_AE_FREQ (685 * HZ_PER_MHZ)
+
 /* Firmware Binary */
 #define ADF_C3XXX_FW "qat_c3xxx.bin"
 #define ADF_C3XXX_MMP "qat_c3xxx_mmp.bin"
diff --git a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
index e14270703670..081702be839e 100644
--- a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
 /* Copyright(c) 2014 - 2021 Intel Corporation */
 #include <adf_accel_devices.h>
+#include <adf_clock.h>
 #include <adf_common_drv.h>
 #include <adf_gen2_config.h>
 #include <adf_gen2_dc.h>
@@ -50,6 +51,28 @@ static u32 get_ae_mask(struct adf_hw_device_data *self)
 	return ~(fuses | straps) & ADF_C62X_ACCELENGINES_MASK;
 }
 
+static u32 get_ts_clock(struct adf_hw_device_data *self)
+{
+	/*
+	 * Timestamp update interval is 16 AE clock ticks for c62x.
+	 */
+	return self->clock_frequency / 16;
+}
+
+static int measure_clock(struct adf_accel_dev *accel_dev)
+{
+	u32 frequency;
+	int ret;
+
+	ret = adf_dev_measure_clock(accel_dev, &frequency, ADF_C62X_MIN_AE_FREQ,
+				    ADF_C62X_MAX_AE_FREQ);
+	if (ret)
+		return ret;
+
+	accel_dev->hw_device->clock_frequency = frequency;
+	return 0;
+}
+
 static u32 get_misc_bar_id(struct adf_hw_device_data *self)
 {
 	return ADF_C62X_PMISC_BAR;
@@ -129,6 +152,8 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
 	hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
 	hw_data->disable_iov = adf_disable_sriov;
 	hw_data->dev_config = adf_gen2_dev_config;
+	hw_data->measure_clock = measure_clock;
+	hw_data->get_hb_clock = get_ts_clock;
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.h b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.h
index 008c0a3a9769..13e6ebf6fd91 100644
--- a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.h
+++ b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.h
@@ -3,6 +3,8 @@
 #ifndef ADF_C62X_HW_DATA_H_
 #define ADF_C62X_HW_DATA_H_
 
+#include <linux/units.h>
+
 /* PCIe configuration space */
 #define ADF_C62X_SRAM_BAR 0
 #define ADF_C62X_PMISC_BAR 1
@@ -19,6 +21,11 @@
 #define ADF_C62X_AE2FUNC_MAP_GRP_A_NUM_REGS 80
 #define ADF_C62X_AE2FUNC_MAP_GRP_B_NUM_REGS 10
 
+/* Clocks frequency */
+#define ADF_C62X_AE_FREQ (685 * HZ_PER_MHZ)
+#define ADF_C62X_MIN_AE_FREQ (533 * HZ_PER_MHZ)
+#define ADF_C62X_MAX_AE_FREQ (800 * HZ_PER_MHZ)
+
 /* Firmware Binary */
 #define ADF_C62X_FW "qat_c62x.bin"
 #define ADF_C62X_MMP "qat_c62x_mmp.bin"
diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile
index 6a82ef8df733..b3e32d2abf45 100644
--- a/drivers/crypto/intel/qat/qat_common/Makefile
+++ b/drivers/crypto/intel/qat/qat_common/Makefile
@@ -18,6 +18,7 @@ intel_qat-objs := adf_cfg.o \
 	adf_gen2_dc.o \
 	adf_gen4_dc.o \
 	adf_gen4_timer.o \
+	adf_clock.o \
 	qat_crypto.o \
 	qat_compression.o \
 	qat_comp_algs.o \
diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
index 2198b410b029..c96f59cb7de9 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
@@ -190,6 +190,8 @@ struct adf_hw_device_data {
 	int (*send_admin_init)(struct adf_accel_dev *accel_dev);
 	int (*start_timer)(struct adf_accel_dev *accel_dev);
 	void (*stop_timer)(struct adf_accel_dev *accel_dev);
+	uint32_t (*get_hb_clock)(struct adf_hw_device_data *self);
+	int (*measure_clock)(struct adf_accel_dev *accel_dev);
 	int (*init_arb)(struct adf_accel_dev *accel_dev);
 	void (*exit_arb)(struct adf_accel_dev *accel_dev);
 	const u32 *(*get_arb_mapping)(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_admin.c b/drivers/crypto/intel/qat/qat_common/adf_admin.c
index d6cb7f50bb5a..966e7ea271a2 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_admin.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_admin.c
@@ -15,6 +15,7 @@
 #define ADF_CONST_TABLE_SIZE 1024
 #define ADF_ADMIN_POLL_DELAY_US 20
 #define ADF_ADMIN_POLL_TIMEOUT_US (5 * USEC_PER_SEC)
+#define ADF_ONE_AE 1
 
 static const u8 const_tab[1024] __aligned(1024) = {
 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -194,6 +195,22 @@ static int adf_set_fw_constants(struct adf_accel_dev *accel_dev)
 	return adf_send_admin(accel_dev, &req, &resp, ae_mask);
 }
 
+int adf_get_fw_timestamp(struct adf_accel_dev *accel_dev, u64 *timestamp)
+{
+	struct icp_qat_fw_init_admin_req req = { };
+	struct icp_qat_fw_init_admin_resp resp;
+	unsigned int ae_mask = ADF_ONE_AE;
+	int ret;
+
+	req.cmd_id = ICP_QAT_FW_TIMER_GET;
+	ret = adf_send_admin(accel_dev, &req, &resp, ae_mask);
+	if (ret)
+		return ret;
+
+	*timestamp = resp.timestamp;
+	return 0;
+}
+
 static int adf_get_dc_capabilities(struct adf_accel_dev *accel_dev,
 				   u32 *capabilities)
 {
diff --git a/drivers/crypto/intel/qat/qat_common/adf_clock.c b/drivers/crypto/intel/qat/qat_common/adf_clock.c
new file mode 100644
index 000000000000..dc0778691eb0
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_clock.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Intel Corporation */
+
+#include <linux/delay.h>
+#include <linux/dev_printk.h>
+#include <linux/export.h>
+#include <linux/math.h>
+#include <linux/minmax.h>
+#include <linux/time64.h>
+#include <linux/types.h>
+#include <linux/units.h>
+#include <asm/errno.h>
+#include "adf_accel_devices.h"
+#include "adf_clock.h"
+#include "adf_common_drv.h"
+
+#define MEASURE_CLOCK_RETRIES 10
+#define MEASURE_CLOCK_DELAY_US 10000
+#define ME_CLK_DIVIDER 16
+#define MEASURE_CLOCK_DELTA_THRESHOLD_US 100
+
+static inline u64 timespec_to_us(const struct timespec64 *ts)
+{
+	return (u64)DIV_ROUND_CLOSEST_ULL(timespec64_to_ns(ts), NSEC_PER_USEC);
+}
+
+static inline u64 timespec_to_ms(const struct timespec64 *ts)
+{
+	return (u64)DIV_ROUND_CLOSEST_ULL(timespec64_to_ns(ts), NSEC_PER_MSEC);
+}
+
+u64 adf_clock_get_current_time(void)
+{
+	struct timespec64 ts;
+
+	ktime_get_real_ts64(&ts);
+	return timespec_to_ms(&ts);
+}
+
+static int measure_clock(struct adf_accel_dev *accel_dev, u32 *frequency)
+{
+	struct timespec64 ts1, ts2, ts3, ts4;
+	u64 timestamp1, timestamp2, temp;
+	u32 delta_us, tries;
+	int ret;
+
+	tries = MEASURE_CLOCK_RETRIES;
+	do {
+		ktime_get_real_ts64(&ts1);
+		ret = adf_get_fw_timestamp(accel_dev, &timestamp1);
+		if (ret) {
+			dev_err(&GET_DEV(accel_dev),
+				"Failed to get fw timestamp\n");
+			return ret;
+		}
+		ktime_get_real_ts64(&ts2);
+		delta_us = timespec_to_us(&ts2) - timespec_to_us(&ts1);
+	} while (delta_us > MEASURE_CLOCK_DELTA_THRESHOLD_US && --tries);
+
+	if (!tries) {
+		dev_err(&GET_DEV(accel_dev), "Excessive clock measure delay\n");
+		return -ETIMEDOUT;
+	}
+
+	fsleep(MEASURE_CLOCK_DELAY_US);
+
+	tries = MEASURE_CLOCK_RETRIES;
+	do {
+		ktime_get_real_ts64(&ts3);
+		if (adf_get_fw_timestamp(accel_dev, &timestamp2)) {
+			dev_err(&GET_DEV(accel_dev),
+				"Failed to get fw timestamp\n");
+			return -EIO;
+		}
+		ktime_get_real_ts64(&ts4);
+		delta_us = timespec_to_us(&ts4) - timespec_to_us(&ts3);
+	} while (delta_us > MEASURE_CLOCK_DELTA_THRESHOLD_US && --tries);
+
+	if (!tries) {
+		dev_err(&GET_DEV(accel_dev), "Excessive clock measure delay\n");
+		return -ETIMEDOUT;
+	}
+
+	delta_us = timespec_to_us(&ts3) - timespec_to_us(&ts1);
+	temp = (timestamp2 - timestamp1) * ME_CLK_DIVIDER * 10;
+	temp = DIV_ROUND_CLOSEST_ULL(temp, delta_us);
+	/*
+	 * Enclose the division to allow the preprocessor to precalculate it,
+	 * and avoid promoting r-value to 64-bit before division.
+	 */
+	*frequency = temp * (HZ_PER_MHZ / 10);
+
+	return 0;
+}
+
+/**
+ * adf_dev_measure_clock() - measures device clock frequency
+ * @accel_dev: Pointer to acceleration device.
+ * @frequency: Pointer to variable where result will be stored
+ * @min: Minimal allowed frequency value
+ * @max: Maximal allowed frequency value
+ *
+ * If the measurement result will go beyond the min/max thresholds the value
+ * will take the value of the crossed threshold.
+ *
+ * This algorithm compares the device firmware timestamp with the kernel
+ * timestamp. So we can't expect too high accuracy from this measurement.
+ *
+ * Return:
+ * * 0 - measurement succeed
+ * * -ETIMEDOUT - measurement failed
+ */
+int adf_dev_measure_clock(struct adf_accel_dev *accel_dev,
+			  u32 *frequency, u32 min, u32 max)
+{
+	int ret;
+	u32 freq;
+
+	ret = measure_clock(accel_dev, &freq);
+	if (ret)
+		return ret;
+
+	*frequency = clamp(freq, min, max);
+
+	if (*frequency != freq)
+		dev_warn(&GET_DEV(accel_dev),
+			 "Measured clock %d Hz is out of range, assuming %d\n",
+			 freq, *frequency);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(adf_dev_measure_clock);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_clock.h b/drivers/crypto/intel/qat/qat_common/adf_clock.h
new file mode 100644
index 000000000000..e309bc0dc35c
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_clock.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2023 Intel Corporation */
+#ifndef ADF_CLOCK_H
+#define ADF_CLOCK_H
+
+#include <linux/types.h>
+
+struct adf_accel_dev;
+
+int adf_dev_measure_clock(struct adf_accel_dev *accel_dev, u32 *frequency,
+			  u32 min, u32 max);
+u64 adf_clock_get_current_time(void);
+
+#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
index e0d472071354..a9552832858f 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
@@ -97,6 +97,7 @@ int adf_send_admin_init(struct adf_accel_dev *accel_dev);
 int adf_get_ae_fw_counters(struct adf_accel_dev *accel_dev, u16 ae, u64 *reqs, u64 *resps);
 int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay);
 int adf_send_admin_tim_sync(struct adf_accel_dev *accel_dev, u32 cnt);
+int adf_get_fw_timestamp(struct adf_accel_dev *accel_dev, u64 *timestamp);
 int adf_init_arb(struct adf_accel_dev *accel_dev);
 void adf_exit_arb(struct adf_accel_dev *accel_dev);
 void adf_update_ring_arb(struct adf_etr_ring_data *ring);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_init.c b/drivers/crypto/intel/qat/qat_common/adf_init.c
index 0acba0f988da..53fca6a7e2af 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_init.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_init.c
@@ -178,6 +178,14 @@ static int adf_dev_start(struct adf_accel_dev *accel_dev)
 		return -EFAULT;
 	}
 
+	if (hw_data->measure_clock) {
+		ret = hw_data->measure_clock(accel_dev);
+		if (ret) {
+			dev_err(&GET_DEV(accel_dev), "Failed measure device clock\n");
+			return ret;
+		}
+	}
+
 	/* Set ssm watch dog timer */
 	if (hw_data->set_ssm_wdtimer)
 		hw_data->set_ssm_wdtimer(accel_dev);
diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h
index 6ea19b4fb0ce..e836fbaeec62 100644
--- a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h
@@ -16,6 +16,7 @@ enum icp_qat_fw_init_admin_cmd_id {
 	ICP_QAT_FW_HEARTBEAT_SYNC = 7,
 	ICP_QAT_FW_HEARTBEAT_GET = 8,
 	ICP_QAT_FW_COMP_CAPABILITY_GET = 9,
+	ICP_QAT_FW_TIMER_GET = 19,
 	ICP_QAT_FW_PM_STATE_CONFIG = 128,
 };
 
diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index 1ebe0b351fae..c57403a6fbac 100644
--- a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -44,6 +44,14 @@ static u32 get_misc_bar_id(struct adf_hw_device_data *self)
 	return ADF_DH895XCC_PMISC_BAR;
 }
 
+static u32 get_ts_clock(struct adf_hw_device_data *self)
+{
+	/*
+	 * Timestamp update interval is 16 AE clock ticks for dh895xcc.
+	 */
+	return self->clock_frequency / 16;
+}
+
 static u32 get_etr_bar_id(struct adf_hw_device_data *self)
 {
 	return ADF_DH895XCC_ETR_BAR;
@@ -237,6 +245,8 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
 	hw_data->reset_device = adf_reset_sbr;
 	hw_data->disable_iov = adf_disable_sriov;
 	hw_data->dev_config = adf_gen2_dev_config;
+	hw_data->clock_frequency = ADF_DH895X_AE_FREQ;
+	hw_data->get_hb_clock = get_ts_clock;
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	hw_data->pfvf_ops.enable_vf2pf_interrupts = enable_vf2pf_interrupts;
diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
index 7b674bbe4192..cd3a21985455 100644
--- a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
+++ b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
@@ -3,6 +3,8 @@
 #ifndef ADF_DH895x_HW_DATA_H_
 #define ADF_DH895x_HW_DATA_H_
 
+#include <linux/units.h>
+
 /* PCIe configuration space */
 #define ADF_DH895XCC_SRAM_BAR 0
 #define ADF_DH895XCC_PMISC_BAR 1
@@ -30,6 +32,9 @@
 #define ADF_DH895XCC_AE2FUNC_MAP_GRP_A_NUM_REGS 96
 #define ADF_DH895XCC_AE2FUNC_MAP_GRP_B_NUM_REGS 12
 
+/* Clocks frequency */
+#define ADF_DH895X_AE_FREQ (933 * HZ_PER_MHZ)
+
 /* FW names */
 #define ADF_DH895XCC_FW "qat_895xcc.bin"
 #define ADF_DH895XCC_MMP "qat_895xcc_mmp.bin"

From 359b84f8db942ef46d24de8aa397790c3fae22e0 Mon Sep 17 00:00:00 2001
From: Damian Muszynski <damian.muszynski@intel.com>
Date: Fri, 30 Jun 2023 19:03:57 +0200
Subject: [PATCH 0328/2047] crypto: qat - add heartbeat feature

Under some circumstances, firmware in the QAT devices could become
unresponsive. The Heartbeat feature provides a mechanism to detect
unresponsive devices.

The QAT FW periodically writes to memory a set of counters that allow
to detect the liveness of a device. This patch adds logic to enable
the reporting of those counters, analyze them and report if a device
is alive or not.

In particular this adds
  (1) heartbeat enabling, reading and detection logic
  (2) reporting of heartbeat status and configuration via debugfs
  (3) documentation for the newly created sysfs entries
  (4) configuration of FW settings related to heartbeat, e.g. tick period
  (5) logic to convert time in ms (provided by the user) to clock ticks

This patch introduces a new folder in debugfs called heartbeat with the
following attributes:
 - status
 - queries_sent
 - queries_failed
 - config

All attributes except config are reading only. In particular:
 - `status` file returns 0 when device is operational and -1 otherwise.
 - `queries_sent` returns the total number of heartbeat queries sent.
 - `queries_failed` returns the total number of heartbeat queries failed.
 - `config` allows to adjust the frequency at which the firmware writes
   counters to memory. This period is given in milliseconds and it is
   fixed for GEN4 devices.

Signed-off-by: Damian Muszynski <damian.muszynski@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/ABI/testing/debugfs-driver-qat  |  51 ++++
 .../intel/qat/qat_4xxx/adf_4xxx_hw_data.c     |   1 +
 drivers/crypto/intel/qat/qat_4xxx/adf_drv.c   |   3 +
 .../intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c   |   1 +
 .../intel/qat/qat_c62x/adf_c62x_hw_data.c     |   1 +
 drivers/crypto/intel/qat/qat_common/Makefile  |   2 +
 .../intel/qat/qat_common/adf_accel_devices.h  |   7 +
 .../crypto/intel/qat/qat_common/adf_admin.c   |  14 +
 .../intel/qat/qat_common/adf_cfg_strings.h    |   2 +
 .../intel/qat/qat_common/adf_common_drv.h     |   1 +
 .../crypto/intel/qat/qat_common/adf_dbgfs.c   |   9 +-
 .../intel/qat/qat_common/adf_gen2_config.c    |   7 +
 .../intel/qat/qat_common/adf_gen2_hw_data.h   |   3 +
 .../intel/qat/qat_common/adf_gen4_hw_data.h   |   3 +
 .../intel/qat/qat_common/adf_heartbeat.c      | 268 ++++++++++++++++++
 .../intel/qat/qat_common/adf_heartbeat.h      |  73 +++++
 .../qat/qat_common/adf_heartbeat_dbgfs.c      | 194 +++++++++++++
 .../qat/qat_common/adf_heartbeat_dbgfs.h      |  12 +
 .../crypto/intel/qat/qat_common/adf_init.c    |   7 +
 .../qat/qat_common/icp_qat_fw_init_admin.h    |   4 +
 .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c   |   1 +
 21 files changed, 662 insertions(+), 2 deletions(-)
 create mode 100644 drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
 create mode 100644 drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
 create mode 100644 drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c
 create mode 100644 drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.h

diff --git a/Documentation/ABI/testing/debugfs-driver-qat b/Documentation/ABI/testing/debugfs-driver-qat
index 22d39c0ca1b2..6731ffacc5f0 100644
--- a/Documentation/ABI/testing/debugfs-driver-qat
+++ b/Documentation/ABI/testing/debugfs-driver-qat
@@ -8,3 +8,54 @@ Description:	(RO) Read returns the number of requests sent to the FW and the num
 
 			<N>: Number of requests sent from Acceleration Engine N to FW and responses
 			     Acceleration Engine N received from FW
+
+What:		/sys/kernel/debug/qat_<device>_<BDF>/heartbeat/config
+Date:		November 2023
+KernelVersion:	6.6
+Contact:	qat-linux@intel.com
+Description:	(RW) Read returns value of the Heartbeat update period.
+		Write to the file changes this period value.
+
+		This period should reflect planned polling interval of device
+		health status. High frequency Heartbeat monitoring wastes CPU cycles
+		but minimizes the customer’s system downtime. Also, if there are
+		large service requests that take some time to complete, high frequency
+		Heartbeat monitoring could result in false reports of unresponsiveness
+		and in those cases, period needs to be increased.
+
+		This parameter is effective only for c3xxx, c62x, dh895xcc devices.
+		4xxx has this value internally fixed to 200ms.
+
+		Default value is set to 500. Minimal allowed value is 200.
+		All values are expressed in milliseconds.
+
+What:		/sys/kernel/debug/qat_<device>_<BDF>/heartbeat/queries_failed
+Date:		November 2023
+KernelVersion:	6.6
+Contact:	qat-linux@intel.com
+Description:	(RO) Read returns the number of times the device became unresponsive.
+
+		Attribute returns value of the counter which is incremented when
+		status query results negative.
+
+What:		/sys/kernel/debug/qat_<device>_<BDF>/heartbeat/queries_sent
+Date:		November 2023
+KernelVersion:	6.6
+Contact:	qat-linux@intel.com
+Description:	(RO) Read returns the number of times the control process checked
+		if the device is responsive.
+
+		Attribute returns value of the counter which is incremented on
+		every status query.
+
+What:		/sys/kernel/debug/qat_<device>_<BDF>/heartbeat/status
+Date:		November 2023
+KernelVersion:	6.6
+Contact:	qat-linux@intel.com
+Description:	(RO) Read returns the device health status.
+
+		Returns 0 when device is healthy or -1 when is unresponsive
+		or the query failed to send.
+
+		The driver does not monitor for Heartbeat. It is left for a user
+		to poll the status periodically.
diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
index 9cda0c17992a..268a1f7694fc 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -521,6 +521,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id)
 	hw_data->start_timer = adf_gen4_timer_start;
 	hw_data->stop_timer = adf_gen4_timer_stop;
 	hw_data->get_hb_clock = get_heartbeat_clock;
+	hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE;
 
 	adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
 	adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops);
diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c
index 1a15600361d0..6d4e2e139ffa 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c
@@ -8,6 +8,7 @@
 #include <adf_cfg.h>
 #include <adf_common_drv.h>
 #include <adf_dbgfs.h>
+#include <adf_heartbeat.h>
 
 #include "adf_4xxx_hw_data.h"
 #include "qat_compression.h"
@@ -77,6 +78,8 @@ static int adf_cfg_dev_init(struct adf_accel_dev *accel_dev)
 	if (ret)
 		return ret;
 
+	adf_heartbeat_save_cfg_param(accel_dev, ADF_CFG_HB_TIMER_MIN_MS);
+
 	return 0;
 }
 
diff --git a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
index 2c6a1dd9780c..e81d11409426 100644
--- a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
@@ -152,6 +152,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
 	hw_data->dev_config = adf_gen2_dev_config;
 	hw_data->measure_clock = measure_clock;
 	hw_data->get_hb_clock = get_ts_clock;
+	hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE;
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
index 081702be839e..1a8c8e3a48e9 100644
--- a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
@@ -154,6 +154,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
 	hw_data->dev_config = adf_gen2_dev_config;
 	hw_data->measure_clock = measure_clock;
 	hw_data->get_hb_clock = get_ts_clock;
+	hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE;
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile
index b3e32d2abf45..43622c7fca71 100644
--- a/drivers/crypto/intel/qat/qat_common/Makefile
+++ b/drivers/crypto/intel/qat/qat_common/Makefile
@@ -31,6 +31,8 @@ intel_qat-objs := adf_cfg.o \
 
 intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \
 				adf_fw_counters.o \
+				adf_heartbeat.o \
+				adf_heartbeat_dbgfs.o \
 				adf_dbgfs.o
 
 intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \
diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
index c96f59cb7de9..ab897e1717e0 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
@@ -233,6 +233,7 @@ struct adf_hw_device_data {
 	u8 num_accel;
 	u8 num_logical_accel;
 	u8 num_engines;
+	u32 num_hb_ctrs;
 };
 
 /* CSR write macro */
@@ -245,6 +246,11 @@ struct adf_hw_device_data {
 #define ADF_CFG_NUM_SERVICES	4
 #define ADF_SRV_TYPE_BIT_LEN	3
 #define ADF_SRV_TYPE_MASK	0x7
+#define ADF_AE_ADMIN_THREAD	7
+#define ADF_NUM_THREADS_PER_AE	8
+#define ADF_NUM_PKE_STRAND	2
+#define ADF_AE_STRAND0_THREAD	8
+#define ADF_AE_STRAND1_THREAD	9
 
 #define GET_DEV(accel_dev) ((accel_dev)->accel_pci_dev.pci_dev->dev)
 #define GET_BARS(accel_dev) ((accel_dev)->accel_pci_dev.pci_bars)
@@ -301,6 +307,7 @@ struct adf_accel_dev {
 	struct module *owner;
 	struct adf_accel_pci accel_pci_dev;
 	struct adf_timer *timer;
+	struct adf_heartbeat *heartbeat;
 	union {
 		struct {
 			/* protects VF2PF interrupts access */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_admin.c b/drivers/crypto/intel/qat/qat_common/adf_admin.c
index 966e7ea271a2..ff790823b868 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_admin.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_admin.c
@@ -8,6 +8,7 @@
 #include <linux/dma-mapping.h>
 #include "adf_accel_devices.h"
 #include "adf_common_drv.h"
+#include "adf_heartbeat.h"
 #include "icp_qat_fw_init_admin.h"
 
 #define ADF_ADMIN_MAILBOX_STRIDE 0x1000
@@ -270,6 +271,19 @@ int adf_send_admin_tim_sync(struct adf_accel_dev *accel_dev, u32 cnt)
 	return adf_send_admin(accel_dev, &req, &resp, ae_mask);
 }
 
+int adf_send_admin_hb_timer(struct adf_accel_dev *accel_dev, uint32_t ticks)
+{
+	u32 ae_mask = accel_dev->hw_device->ae_mask;
+	struct icp_qat_fw_init_admin_req req = { };
+	struct icp_qat_fw_init_admin_resp resp;
+
+	req.cmd_id = ICP_QAT_FW_HEARTBEAT_TIMER_SET;
+	req.init_cfg_ptr = accel_dev->heartbeat->dma.phy_addr;
+	req.heartbeat_ticks = ticks;
+
+	return adf_send_admin(accel_dev, &req, &resp, ae_mask);
+}
+
 /**
  * adf_send_admin_init() - Function sends init message to FW
  * @accel_dev: Pointer to acceleration device.
diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h
index 3ae1e5caee0e..6066dc637352 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h
@@ -47,4 +47,6 @@
 #define ADF_ETRMGR_CORE_AFFINITY_FORMAT \
 	ADF_ETRMGR_BANK "%d" ADF_ETRMGR_CORE_AFFINITY
 #define ADF_ACCEL_STR "Accelerator%d"
+#define ADF_HEARTBEAT_TIMER  "HeartbeatTimer"
+
 #endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
index a9552832858f..799a2193d3e5 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
@@ -97,6 +97,7 @@ int adf_send_admin_init(struct adf_accel_dev *accel_dev);
 int adf_get_ae_fw_counters(struct adf_accel_dev *accel_dev, u16 ae, u64 *reqs, u64 *resps);
 int adf_init_admin_pm(struct adf_accel_dev *accel_dev, u32 idle_delay);
 int adf_send_admin_tim_sync(struct adf_accel_dev *accel_dev, u32 cnt);
+int adf_send_admin_hb_timer(struct adf_accel_dev *accel_dev, uint32_t ticks);
 int adf_get_fw_timestamp(struct adf_accel_dev *accel_dev, u64 *timestamp);
 int adf_init_arb(struct adf_accel_dev *accel_dev);
 void adf_exit_arb(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_dbgfs.c b/drivers/crypto/intel/qat/qat_common/adf_dbgfs.c
index 5080ecffab03..04845f8d72be 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_dbgfs.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_dbgfs.c
@@ -7,6 +7,7 @@
 #include "adf_common_drv.h"
 #include "adf_dbgfs.h"
 #include "adf_fw_counters.h"
+#include "adf_heartbeat_dbgfs.h"
 
 /**
  * adf_dbgfs_init() - add persistent debugfs entries
@@ -58,8 +59,10 @@ void adf_dbgfs_add(struct adf_accel_dev *accel_dev)
 	if (!accel_dev->debugfs_dir)
 		return;
 
-	if (!accel_dev->is_vf)
+	if (!accel_dev->is_vf) {
 		adf_fw_counters_dbgfs_add(accel_dev);
+		adf_heartbeat_dbgfs_add(accel_dev);
+	}
 }
 
 /**
@@ -71,6 +74,8 @@ void adf_dbgfs_rm(struct adf_accel_dev *accel_dev)
 	if (!accel_dev->debugfs_dir)
 		return;
 
-	if (!accel_dev->is_vf)
+	if (!accel_dev->is_vf) {
+		adf_heartbeat_dbgfs_rm(accel_dev);
 		adf_fw_counters_dbgfs_rm(accel_dev);
+	}
 }
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_config.c b/drivers/crypto/intel/qat/qat_common/adf_gen2_config.c
index eeb30da7587a..c27ff6d18e11 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen2_config.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_config.c
@@ -7,6 +7,7 @@
 #include "adf_common_drv.h"
 #include "qat_crypto.h"
 #include "qat_compression.h"
+#include "adf_heartbeat.h"
 #include "adf_transport_access_macros.h"
 
 static int adf_gen2_crypto_dev_config(struct adf_accel_dev *accel_dev)
@@ -195,6 +196,12 @@ int adf_gen2_dev_config(struct adf_accel_dev *accel_dev)
 	if (ret)
 		goto err;
 
+	ret = adf_cfg_section_add(accel_dev, ADF_GENERAL_SEC);
+	if (ret)
+		goto err;
+
+	adf_heartbeat_save_cfg_param(accel_dev, ADF_CFG_HB_TIMER_DEFAULT_MS);
+
 	set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
 
 	return ret;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h
index e4bc07529be4..6bd341061de4 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h
@@ -145,6 +145,9 @@ do { \
 #define ADF_GEN2_CERRSSMSH(i)		((i) * 0x4000 + 0x10)
 #define ADF_GEN2_ERRSSMSH_EN		BIT(3)
 
+/* Number of heartbeat counter pairs */
+#define ADF_NUM_HB_CNT_PER_AE ADF_NUM_THREADS_PER_AE
+
 /* Interrupts */
 #define ADF_GEN2_SMIAPF0_MASK_OFFSET    (0x3A000 + 0x28)
 #define ADF_GEN2_SMIAPF1_MASK_OFFSET    (0x3A000 + 0x30)
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
index 4fb4b3df5a18..02d7a019ebf8 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
@@ -136,6 +136,9 @@ do { \
 
 #define ADF_GEN4_VFLNOTIFY	BIT(7)
 
+/* Number of heartbeat counter pairs */
+#define ADF_NUM_HB_CNT_PER_AE ADF_NUM_THREADS_PER_AE
+
 void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev);
 void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops);
 int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
new file mode 100644
index 000000000000..7358aac8e56d
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Intel Corporation */
+
+#include <linux/dev_printk.h>
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/kstrtox.h>
+#include <linux/overflow.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <asm/errno.h>
+#include "adf_accel_devices.h"
+#include "adf_cfg.h"
+#include "adf_cfg_strings.h"
+#include "adf_clock.h"
+#include "adf_common_drv.h"
+#include "adf_heartbeat.h"
+#include "adf_transport_internal.h"
+#include "icp_qat_fw_init_admin.h"
+
+/* Heartbeat counter pair */
+struct hb_cnt_pair {
+	__u16 resp_heartbeat_cnt;
+	__u16 req_heartbeat_cnt;
+};
+
+static int adf_hb_check_polling_freq(struct adf_accel_dev *accel_dev)
+{
+	u64 curr_time = adf_clock_get_current_time();
+	u64 polling_time = curr_time - accel_dev->heartbeat->last_hb_check_time;
+
+	if (polling_time < accel_dev->heartbeat->hb_timer) {
+		dev_warn(&GET_DEV(accel_dev),
+			 "HB polling too frequent. Configured HB timer %d ms\n",
+			 accel_dev->heartbeat->hb_timer);
+		return -EINVAL;
+	}
+
+	accel_dev->heartbeat->last_hb_check_time = curr_time;
+	return 0;
+}
+
+static int get_timer_ticks(struct adf_accel_dev *accel_dev, unsigned int *value)
+{
+	char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { };
+	u32 timer_ms = ADF_CFG_HB_TIMER_DEFAULT_MS;
+	int cfg_read_status;
+	u32 ticks;
+	int ret;
+
+	cfg_read_status = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
+						  ADF_HEARTBEAT_TIMER, timer_str);
+	if (cfg_read_status == 0) {
+		if (kstrtouint(timer_str, 10, &timer_ms))
+			dev_dbg(&GET_DEV(accel_dev),
+				"kstrtouint failed to parse the %s, param value",
+				ADF_HEARTBEAT_TIMER);
+	}
+
+	if (timer_ms < ADF_CFG_HB_TIMER_MIN_MS) {
+		dev_err(&GET_DEV(accel_dev), "Timer cannot be less than %u\n",
+			ADF_CFG_HB_TIMER_MIN_MS);
+		return -EINVAL;
+	}
+
+	/*
+	 * On 4xxx devices adf_timer is responsible for HB updates and
+	 * its period is fixed to 200ms
+	 */
+	if (accel_dev->timer)
+		timer_ms = ADF_CFG_HB_TIMER_MIN_MS;
+
+	ret = adf_heartbeat_ms_to_ticks(accel_dev, timer_ms, &ticks);
+	if (ret)
+		return ret;
+
+	adf_heartbeat_save_cfg_param(accel_dev, timer_ms);
+
+	accel_dev->heartbeat->hb_timer = timer_ms;
+	*value = ticks;
+
+	return 0;
+}
+
+static int check_ae(struct hb_cnt_pair *curr, struct hb_cnt_pair *prev,
+		    u16 *count, const size_t hb_ctrs)
+{
+	size_t thr;
+
+	/* loop through all threads in AE */
+	for (thr = 0; thr < hb_ctrs; thr++) {
+		u16 req = curr[thr].req_heartbeat_cnt;
+		u16 resp = curr[thr].resp_heartbeat_cnt;
+		u16 last = prev[thr].resp_heartbeat_cnt;
+
+		if ((thr == ADF_AE_ADMIN_THREAD || req != resp) && resp == last) {
+			u16 retry = ++count[thr];
+
+			if (retry >= ADF_CFG_HB_COUNT_THRESHOLD)
+				return -EIO;
+
+		} else {
+			count[thr] = 0;
+		}
+	}
+	return 0;
+}
+
+static int adf_hb_get_status(struct adf_accel_dev *accel_dev)
+{
+	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+	struct hb_cnt_pair *live_stats, *last_stats, *curr_stats;
+	const size_t hb_ctrs = hw_device->num_hb_ctrs;
+	const unsigned long ae_mask = hw_device->ae_mask;
+	const size_t max_aes = hw_device->num_engines;
+	const size_t dev_ctrs = size_mul(max_aes, hb_ctrs);
+	const size_t stats_size = size_mul(dev_ctrs, sizeof(*curr_stats));
+	struct hb_cnt_pair *ae_curr_p, *ae_prev_p;
+	u16 *count_fails, *ae_count_p;
+	size_t ae_offset;
+	size_t ae = 0;
+	int ret = 0;
+
+	live_stats = accel_dev->heartbeat->dma.virt_addr;
+	last_stats = live_stats + dev_ctrs;
+	count_fails = (u16 *)(last_stats + dev_ctrs);
+
+	curr_stats = kmemdup(live_stats, stats_size, GFP_KERNEL);
+	if (!curr_stats)
+		return -ENOMEM;
+
+	/* loop through active AEs */
+	for_each_set_bit(ae, &ae_mask, max_aes) {
+		ae_offset = size_mul(ae, hb_ctrs);
+		ae_curr_p = curr_stats + ae_offset;
+		ae_prev_p = last_stats + ae_offset;
+		ae_count_p = count_fails + ae_offset;
+
+		ret = check_ae(ae_curr_p, ae_prev_p, ae_count_p, hb_ctrs);
+		if (ret)
+			break;
+	}
+
+	/* Copy current stats for the next iteration */
+	memcpy(last_stats, curr_stats, stats_size);
+	kfree(curr_stats);
+
+	return ret;
+}
+
+void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
+			  enum adf_device_heartbeat_status *hb_status)
+{
+	struct adf_heartbeat *hb;
+
+	if (!adf_dev_started(accel_dev) ||
+	    test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) {
+		*hb_status = HB_DEV_UNRESPONSIVE;
+		return;
+	}
+
+	if (adf_hb_check_polling_freq(accel_dev) == -EINVAL) {
+		*hb_status = HB_DEV_UNSUPPORTED;
+		return;
+	}
+
+	hb = accel_dev->heartbeat;
+	hb->hb_sent_counter++;
+
+	if (adf_hb_get_status(accel_dev)) {
+		dev_err(&GET_DEV(accel_dev),
+			"Heartbeat ERROR: QAT is not responding.\n");
+		*hb_status = HB_DEV_UNRESPONSIVE;
+		hb->hb_failed_counter++;
+		return;
+	}
+
+	*hb_status = HB_DEV_ALIVE;
+}
+
+int adf_heartbeat_ms_to_ticks(struct adf_accel_dev *accel_dev, unsigned int time_ms,
+			      u32 *value)
+{
+	struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+	u32 clk_per_sec;
+
+	/* HB clock may be different than AE clock */
+	if (!hw_data->get_hb_clock)
+		return -EINVAL;
+
+	clk_per_sec = hw_data->get_hb_clock(hw_data);
+	*value = time_ms * (clk_per_sec / MSEC_PER_SEC);
+
+	return 0;
+}
+
+int adf_heartbeat_save_cfg_param(struct adf_accel_dev *accel_dev,
+				 unsigned int timer_ms)
+{
+	char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
+
+	snprintf(timer_str, sizeof(timer_str), "%u", timer_ms);
+	return adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC,
+					  ADF_HEARTBEAT_TIMER, timer_str,
+					  ADF_STR);
+}
+EXPORT_SYMBOL_GPL(adf_heartbeat_save_cfg_param);
+
+int adf_heartbeat_init(struct adf_accel_dev *accel_dev)
+{
+	struct adf_heartbeat *hb;
+
+	hb = kzalloc(sizeof(*hb), GFP_KERNEL);
+	if (!hb)
+		goto err_ret;
+
+	hb->dma.virt_addr = dma_alloc_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
+					       &hb->dma.phy_addr, GFP_KERNEL);
+	if (!hb->dma.virt_addr)
+		goto err_free;
+
+	accel_dev->heartbeat = hb;
+
+	return 0;
+
+err_free:
+	kfree(hb);
+err_ret:
+	return -ENOMEM;
+}
+
+int adf_heartbeat_start(struct adf_accel_dev *accel_dev)
+{
+	unsigned int timer_ticks;
+	int ret;
+
+	if (!accel_dev->heartbeat) {
+		dev_warn(&GET_DEV(accel_dev), "Heartbeat instance not found!");
+		return -EFAULT;
+	}
+
+	ret = get_timer_ticks(accel_dev, &timer_ticks);
+	if (ret)
+		return ret;
+
+	ret = adf_send_admin_hb_timer(accel_dev, timer_ticks);
+	if (ret)
+		dev_warn(&GET_DEV(accel_dev), "Heartbeat not supported!");
+
+	return ret;
+}
+
+void adf_heartbeat_shutdown(struct adf_accel_dev *accel_dev)
+{
+	struct adf_heartbeat *hb = accel_dev->heartbeat;
+
+	if (!hb)
+		return;
+
+	if (hb->dma.virt_addr)
+		dma_free_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
+				  hb->dma.virt_addr, hb->dma.phy_addr);
+
+	kfree(hb);
+	accel_dev->heartbeat = NULL;
+}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
new file mode 100644
index 000000000000..297147f44150
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2023 Intel Corporation */
+
+#ifndef ADF_HEARTBEAT_H_
+#define ADF_HEARTBEAT_H_
+
+#include <linux/types.h>
+
+struct adf_accel_dev;
+struct dentry;
+
+#define ADF_CFG_HB_TIMER_MIN_MS 200
+#define ADF_CFG_HB_TIMER_DEFAULT_MS 500
+#define ADF_CFG_HB_COUNT_THRESHOLD 3
+
+enum adf_device_heartbeat_status {
+	HB_DEV_UNRESPONSIVE = 0,
+	HB_DEV_ALIVE,
+	HB_DEV_UNSUPPORTED,
+};
+
+struct adf_heartbeat {
+	unsigned int hb_sent_counter;
+	unsigned int hb_failed_counter;
+	unsigned int hb_timer;
+	u64 last_hb_check_time;
+	struct hb_dma_addr {
+		dma_addr_t phy_addr;
+		void *virt_addr;
+	} dma;
+	struct {
+		struct dentry *base_dir;
+		struct dentry *status;
+		struct dentry *cfg;
+		struct dentry *sent;
+		struct dentry *failed;
+	} dbgfs;
+};
+
+#ifdef CONFIG_DEBUG_FS
+int adf_heartbeat_init(struct adf_accel_dev *accel_dev);
+int adf_heartbeat_start(struct adf_accel_dev *accel_dev);
+void adf_heartbeat_shutdown(struct adf_accel_dev *accel_dev);
+
+int adf_heartbeat_ms_to_ticks(struct adf_accel_dev *accel_dev, unsigned int time_ms,
+			      uint32_t *value);
+int adf_heartbeat_save_cfg_param(struct adf_accel_dev *accel_dev,
+				 unsigned int timer_ms);
+void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
+			  enum adf_device_heartbeat_status *hb_status);
+
+#else
+static inline int adf_heartbeat_init(struct adf_accel_dev *accel_dev)
+{
+	return 0;
+}
+
+static inline int adf_heartbeat_start(struct adf_accel_dev *accel_dev)
+{
+	return 0;
+}
+
+static inline void adf_heartbeat_shutdown(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline int adf_heartbeat_save_cfg_param(struct adf_accel_dev *accel_dev,
+					       unsigned int timer_ms)
+{
+	return 0;
+}
+#endif
+#endif /* ADF_HEARTBEAT_H_ */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c
new file mode 100644
index 000000000000..803cbfd838f0
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Intel Corporation */
+
+#include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/kstrtox.h>
+#include <linux/types.h>
+#include "adf_cfg.h"
+#include "adf_common_drv.h"
+#include "adf_heartbeat.h"
+#include "adf_heartbeat_dbgfs.h"
+
+#define HB_OK 0
+#define HB_ERROR -1
+#define HB_STATUS_MAX_STRLEN 4
+#define HB_STATS_MAX_STRLEN 16
+
+static ssize_t adf_hb_stats_read(struct file *file, char __user *user_buffer,
+				 size_t count, loff_t *ppos)
+{
+	char buf[HB_STATS_MAX_STRLEN];
+	unsigned int *value;
+	int len;
+
+	if (*ppos > 0)
+		return 0;
+
+	value = file->private_data;
+	len = scnprintf(buf, sizeof(buf), "%u\n", *value);
+
+	return simple_read_from_buffer(user_buffer, count, ppos, buf, len + 1);
+}
+
+static const struct file_operations adf_hb_stats_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = adf_hb_stats_read,
+};
+
+static ssize_t adf_hb_status_read(struct file *file, char __user *user_buf,
+				  size_t count, loff_t *ppos)
+{
+	enum adf_device_heartbeat_status hb_status;
+	char ret_str[HB_STATUS_MAX_STRLEN];
+	struct adf_accel_dev *accel_dev;
+	int ret_code;
+	size_t len;
+
+	if (*ppos > 0)
+		return 0;
+
+	accel_dev = file->private_data;
+	ret_code = HB_OK;
+
+	adf_heartbeat_status(accel_dev, &hb_status);
+
+	if (hb_status != HB_DEV_ALIVE)
+		ret_code = HB_ERROR;
+
+	len = scnprintf(ret_str, sizeof(ret_str), "%d\n", ret_code);
+
+	return simple_read_from_buffer(user_buf, count, ppos, ret_str, len + 1);
+}
+
+static const struct file_operations adf_hb_status_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = adf_hb_status_read,
+};
+
+static ssize_t adf_hb_cfg_read(struct file *file, char __user *user_buf,
+			       size_t count, loff_t *ppos)
+{
+	char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
+	struct adf_accel_dev *accel_dev;
+	unsigned int timer_ms;
+	int len;
+
+	if (*ppos > 0)
+		return 0;
+
+	accel_dev = file->private_data;
+	timer_ms = accel_dev->heartbeat->hb_timer;
+	len = scnprintf(timer_str, sizeof(timer_str), "%u\n", timer_ms);
+
+	return simple_read_from_buffer(user_buf, count, ppos, timer_str,
+				       len + 1);
+}
+
+static ssize_t adf_hb_cfg_write(struct file *file, const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	char input_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { };
+	struct adf_accel_dev *accel_dev;
+	int ret, written_chars;
+	unsigned int timer_ms;
+	u32 ticks;
+
+	accel_dev = file->private_data;
+	timer_ms = ADF_CFG_HB_TIMER_DEFAULT_MS;
+
+	/* last byte left as string termination */
+	if (count > sizeof(input_str) - 1)
+		return -EINVAL;
+
+	written_chars = simple_write_to_buffer(input_str, sizeof(input_str) - 1,
+					       ppos, user_buf, count);
+	if (written_chars > 0) {
+		ret = kstrtouint(input_str, 10, &timer_ms);
+		if (ret) {
+			dev_err(&GET_DEV(accel_dev),
+				"heartbeat_cfg: Invalid value\n");
+			return ret;
+		}
+
+		if (timer_ms < ADF_CFG_HB_TIMER_MIN_MS) {
+			dev_err(&GET_DEV(accel_dev),
+				"heartbeat_cfg: Invalid value\n");
+			return -EINVAL;
+		}
+
+		/*
+		 * On 4xxx devices adf_timer is responsible for HB updates and
+		 * its period is fixed to 200ms
+		 */
+		if (accel_dev->timer)
+			timer_ms = ADF_CFG_HB_TIMER_MIN_MS;
+
+		ret = adf_heartbeat_save_cfg_param(accel_dev, timer_ms);
+		if (ret)
+			return ret;
+
+		ret = adf_heartbeat_ms_to_ticks(accel_dev, timer_ms, &ticks);
+		if (ret)
+			return ret;
+
+		ret = adf_send_admin_hb_timer(accel_dev, ticks);
+		if (ret)
+			return ret;
+
+		accel_dev->heartbeat->hb_timer = timer_ms;
+	}
+
+	return written_chars;
+}
+
+static const struct file_operations adf_hb_cfg_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = adf_hb_cfg_read,
+	.write = adf_hb_cfg_write,
+};
+
+void adf_heartbeat_dbgfs_add(struct adf_accel_dev *accel_dev)
+{
+	struct adf_heartbeat *hb = accel_dev->heartbeat;
+
+	if (!hb)
+		return;
+
+	hb->dbgfs.base_dir = debugfs_create_dir("heartbeat", accel_dev->debugfs_dir);
+	hb->dbgfs.status = debugfs_create_file("status", 0400, hb->dbgfs.base_dir,
+					       accel_dev, &adf_hb_status_fops);
+	hb->dbgfs.sent = debugfs_create_file("queries_sent", 0400, hb->dbgfs.base_dir,
+					     &hb->hb_sent_counter, &adf_hb_stats_fops);
+	hb->dbgfs.failed = debugfs_create_file("queries_failed", 0400, hb->dbgfs.base_dir,
+					       &hb->hb_failed_counter, &adf_hb_stats_fops);
+	hb->dbgfs.cfg = debugfs_create_file("config", 0600, hb->dbgfs.base_dir,
+					    accel_dev, &adf_hb_cfg_fops);
+}
+EXPORT_SYMBOL_GPL(adf_heartbeat_dbgfs_add);
+
+void adf_heartbeat_dbgfs_rm(struct adf_accel_dev *accel_dev)
+{
+	struct adf_heartbeat *hb = accel_dev->heartbeat;
+
+	if (!hb)
+		return;
+
+	debugfs_remove(hb->dbgfs.status);
+	hb->dbgfs.status = NULL;
+	debugfs_remove(hb->dbgfs.sent);
+	hb->dbgfs.sent = NULL;
+	debugfs_remove(hb->dbgfs.failed);
+	hb->dbgfs.failed = NULL;
+	debugfs_remove(hb->dbgfs.cfg);
+	hb->dbgfs.cfg = NULL;
+	debugfs_remove(hb->dbgfs.base_dir);
+	hb->dbgfs.base_dir = NULL;
+}
+EXPORT_SYMBOL_GPL(adf_heartbeat_dbgfs_rm);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.h b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.h
new file mode 100644
index 000000000000..84dd29ea6454
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2023 Intel Corporation */
+
+#ifndef ADF_HEARTBEAT_DBGFS_H_
+#define ADF_HEARTBEAT_DBGFS_H_
+
+struct adf_accel_dev;
+
+void adf_heartbeat_dbgfs_add(struct adf_accel_dev *accel_dev);
+void adf_heartbeat_dbgfs_rm(struct adf_accel_dev *accel_dev);
+
+#endif /* ADF_HEARTBEAT_DBGFS_H_ */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_init.c b/drivers/crypto/intel/qat/qat_common/adf_init.c
index 53fca6a7e2af..89001fe92e76 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_init.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_init.c
@@ -8,6 +8,7 @@
 #include "adf_cfg.h"
 #include "adf_common_drv.h"
 #include "adf_dbgfs.h"
+#include "adf_heartbeat.h"
 
 static LIST_HEAD(service_table);
 static DEFINE_MUTEX(service_lock);
@@ -129,6 +130,8 @@ static int adf_dev_init(struct adf_accel_dev *accel_dev)
 			return -EFAULT;
 	}
 
+	adf_heartbeat_init(accel_dev);
+
 	/*
 	 * Subservice initialisation is divided into two stages: init and start.
 	 * This is to facilitate any ordering dependencies between services
@@ -204,6 +207,8 @@ static int adf_dev_start(struct adf_accel_dev *accel_dev)
 		}
 	}
 
+	adf_heartbeat_start(accel_dev);
+
 	list_for_each(list_itr, &service_table) {
 		service = list_entry(list_itr, struct service_hndl, list);
 		if (service->event_hld(accel_dev, ADF_EVENT_START)) {
@@ -347,6 +352,8 @@ static void adf_dev_shutdown(struct adf_accel_dev *accel_dev)
 			clear_bit(accel_dev->accel_id, service->init_status);
 	}
 
+	adf_heartbeat_shutdown(accel_dev);
+
 	hw_data->disable_iov(accel_dev);
 
 	if (test_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status)) {
diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h
index e836fbaeec62..3e968a4bcc9c 100644
--- a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h
@@ -16,6 +16,7 @@ enum icp_qat_fw_init_admin_cmd_id {
 	ICP_QAT_FW_HEARTBEAT_SYNC = 7,
 	ICP_QAT_FW_HEARTBEAT_GET = 8,
 	ICP_QAT_FW_COMP_CAPABILITY_GET = 9,
+	ICP_QAT_FW_HEARTBEAT_TIMER_SET = 13,
 	ICP_QAT_FW_TIMER_GET = 19,
 	ICP_QAT_FW_PM_STATE_CONFIG = 128,
 };
@@ -41,6 +42,9 @@ struct icp_qat_fw_init_admin_req {
 		struct {
 			__u32 int_timer_ticks;
 		};
+		struct {
+			__u32 heartbeat_ticks;
+		};
 		__u32 idle_filter;
 	};
 
diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index c57403a6fbac..8fbab905c5cc 100644
--- a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -247,6 +247,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
 	hw_data->dev_config = adf_gen2_dev_config;
 	hw_data->clock_frequency = ADF_DH895X_AE_FREQ;
 	hw_data->get_hb_clock = get_ts_clock;
+	hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE;
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	hw_data->pfvf_ops.enable_vf2pf_interrupts = enable_vf2pf_interrupts;

From bec61a294dbec1c5929aa6f138cddcab68a62311 Mon Sep 17 00:00:00 2001
From: Damian Muszynski <damian.muszynski@intel.com>
Date: Fri, 30 Jun 2023 19:03:58 +0200
Subject: [PATCH 0329/2047] crypto: qat - add heartbeat counters check

A firmware update for QAT GEN2 changed the format of a data
structure used to report the heartbeat counters.

To support all firmware versions, extend the heartbeat logic
with an algorithm that detects the number of counters returned
by firmware. The algorithm detects the number of counters to
be used (and size of the corresponding data structure) by the
comparison the expected size of the data in memory, with the data
which was written by the firmware.

Firmware detection is done one time during the first read of heartbeat
debugfs file to avoid increasing the time needed to load the module.

Signed-off-by: Damian Muszynski <damian.muszynski@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c   |  2 +
 .../intel/qat/qat_c62x/adf_c62x_hw_data.c     |  2 +
 .../intel/qat/qat_common/adf_accel_devices.h  |  1 +
 .../intel/qat/qat_common/adf_heartbeat.c      | 68 +++++++++++++++++++
 .../intel/qat/qat_common/adf_heartbeat.h      |  6 ++
 .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c   |  2 +
 6 files changed, 81 insertions(+)

diff --git a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
index e81d11409426..9c00c441b602 100644
--- a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
@@ -8,6 +8,7 @@
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include "adf_c3xxx_hw_data.h"
+#include "adf_heartbeat.h"
 #include "icp_qat_hw.h"
 
 /* Worker thread to service arbiter mappings */
@@ -153,6 +154,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
 	hw_data->measure_clock = measure_clock;
 	hw_data->get_hb_clock = get_ts_clock;
 	hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE;
+	hw_data->check_hb_ctrs = adf_heartbeat_check_ctrs;
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
index 1a8c8e3a48e9..355a781693eb 100644
--- a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
@@ -8,6 +8,7 @@
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include "adf_c62x_hw_data.h"
+#include "adf_heartbeat.h"
 #include "icp_qat_hw.h"
 
 /* Worker thread to service arbiter mappings */
@@ -155,6 +156,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
 	hw_data->measure_clock = measure_clock;
 	hw_data->get_hb_clock = get_ts_clock;
 	hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE;
+	hw_data->check_hb_ctrs = adf_heartbeat_check_ctrs;
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
index ab897e1717e0..e57abde66f4f 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
@@ -190,6 +190,7 @@ struct adf_hw_device_data {
 	int (*send_admin_init)(struct adf_accel_dev *accel_dev);
 	int (*start_timer)(struct adf_accel_dev *accel_dev);
 	void (*stop_timer)(struct adf_accel_dev *accel_dev);
+	void (*check_hb_ctrs)(struct adf_accel_dev *accel_dev);
 	uint32_t (*get_hb_clock)(struct adf_hw_device_data *self);
 	int (*measure_clock)(struct adf_accel_dev *accel_dev);
 	int (*init_arb)(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
index 7358aac8e56d..beef9a5f6c75 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
@@ -20,6 +20,8 @@
 #include "adf_transport_internal.h"
 #include "icp_qat_fw_init_admin.h"
 
+#define ADF_HB_EMPTY_SIG 0xA5A5A5A5
+
 /* Heartbeat counter pair */
 struct hb_cnt_pair {
 	__u16 resp_heartbeat_cnt;
@@ -42,6 +44,57 @@ static int adf_hb_check_polling_freq(struct adf_accel_dev *accel_dev)
 	return 0;
 }
 
+/**
+ * validate_hb_ctrs_cnt() - checks if the number of heartbeat counters should
+ * be updated by one to support the currently loaded firmware.
+ * @accel_dev: Pointer to acceleration device.
+ *
+ * Return:
+ * * true - hb_ctrs must increased by ADF_NUM_PKE_STRAND
+ * * false - no changes needed
+ */
+static bool validate_hb_ctrs_cnt(struct adf_accel_dev *accel_dev)
+{
+	const size_t hb_ctrs = accel_dev->hw_device->num_hb_ctrs;
+	const size_t max_aes = accel_dev->hw_device->num_engines;
+	const size_t hb_struct_size = sizeof(struct hb_cnt_pair);
+	const size_t exp_diff_size = array3_size(ADF_NUM_PKE_STRAND, max_aes,
+						 hb_struct_size);
+	const size_t dev_ctrs = size_mul(max_aes, hb_ctrs);
+	const size_t stats_size = size_mul(dev_ctrs, hb_struct_size);
+	const u32 exp_diff_cnt = exp_diff_size / sizeof(u32);
+	const u32 stats_el_cnt = stats_size / sizeof(u32);
+	struct hb_cnt_pair *hb_stats = accel_dev->heartbeat->dma.virt_addr;
+	const u32 *mem_to_chk = (u32 *)(hb_stats + dev_ctrs);
+	u32 el_diff_cnt = 0;
+	int i;
+
+	/* count how many bytes are different from pattern */
+	for (i = 0; i < stats_el_cnt; i++) {
+		if (mem_to_chk[i] == ADF_HB_EMPTY_SIG)
+			break;
+
+		el_diff_cnt++;
+	}
+
+	return el_diff_cnt && el_diff_cnt == exp_diff_cnt;
+}
+
+void adf_heartbeat_check_ctrs(struct adf_accel_dev *accel_dev)
+{
+	struct hb_cnt_pair *hb_stats = accel_dev->heartbeat->dma.virt_addr;
+	const size_t hb_ctrs = accel_dev->hw_device->num_hb_ctrs;
+	const size_t max_aes = accel_dev->hw_device->num_engines;
+	const size_t dev_ctrs = size_mul(max_aes, hb_ctrs);
+	const size_t stats_size = size_mul(dev_ctrs, sizeof(struct hb_cnt_pair));
+	const size_t mem_items_to_fill = size_mul(stats_size, 2) / sizeof(u32);
+
+	/* fill hb stats memory with pattern */
+	memset32((uint32_t *)hb_stats, ADF_HB_EMPTY_SIG, mem_items_to_fill);
+	accel_dev->heartbeat->ctrs_cnt_checked = false;
+}
+EXPORT_SYMBOL_GPL(adf_heartbeat_check_ctrs);
+
 static int get_timer_ticks(struct adf_accel_dev *accel_dev, unsigned int *value)
 {
 	char timer_str[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = { };
@@ -123,6 +176,13 @@ static int adf_hb_get_status(struct adf_accel_dev *accel_dev)
 	size_t ae = 0;
 	int ret = 0;
 
+	if (!accel_dev->heartbeat->ctrs_cnt_checked) {
+		if (validate_hb_ctrs_cnt(accel_dev))
+			hw_device->num_hb_ctrs += ADF_NUM_PKE_STRAND;
+
+		accel_dev->heartbeat->ctrs_cnt_checked = true;
+	}
+
 	live_stats = accel_dev->heartbeat->dma.virt_addr;
 	last_stats = live_stats + dev_ctrs;
 	count_fails = (u16 *)(last_stats + dev_ctrs);
@@ -221,6 +281,11 @@ int adf_heartbeat_init(struct adf_accel_dev *accel_dev)
 	if (!hb->dma.virt_addr)
 		goto err_free;
 
+	/*
+	 * Default set this flag as true to avoid unnecessary checks,
+	 * it will be reset on platforms that need such a check
+	 */
+	hb->ctrs_cnt_checked = true;
 	accel_dev->heartbeat = hb;
 
 	return 0;
@@ -241,6 +306,9 @@ int adf_heartbeat_start(struct adf_accel_dev *accel_dev)
 		return -EFAULT;
 	}
 
+	if (accel_dev->hw_device->check_hb_ctrs)
+		accel_dev->hw_device->check_hb_ctrs(accel_dev);
+
 	ret = get_timer_ticks(accel_dev, &timer_ticks);
 	if (ret)
 		return ret;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
index 297147f44150..b22e3cb29798 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
@@ -24,6 +24,7 @@ struct adf_heartbeat {
 	unsigned int hb_failed_counter;
 	unsigned int hb_timer;
 	u64 last_hb_check_time;
+	bool ctrs_cnt_checked;
 	struct hb_dma_addr {
 		dma_addr_t phy_addr;
 		void *virt_addr;
@@ -48,6 +49,7 @@ int adf_heartbeat_save_cfg_param(struct adf_accel_dev *accel_dev,
 				 unsigned int timer_ms);
 void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
 			  enum adf_device_heartbeat_status *hb_status);
+void adf_heartbeat_check_ctrs(struct adf_accel_dev *accel_dev);
 
 #else
 static inline int adf_heartbeat_init(struct adf_accel_dev *accel_dev)
@@ -69,5 +71,9 @@ static inline int adf_heartbeat_save_cfg_param(struct adf_accel_dev *accel_dev,
 {
 	return 0;
 }
+
+static inline void adf_heartbeat_check_ctrs(struct adf_accel_dev *accel_dev)
+{
+}
 #endif
 #endif /* ADF_HEARTBEAT_H_ */
diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index 8fbab905c5cc..09551f949126 100644
--- a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -7,6 +7,7 @@
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include "adf_dh895xcc_hw_data.h"
+#include "adf_heartbeat.h"
 #include "icp_qat_hw.h"
 
 #define ADF_DH895XCC_VF_MSK	0xFFFFFFFF
@@ -248,6 +249,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
 	hw_data->clock_frequency = ADF_DH895X_AE_FREQ;
 	hw_data->get_hb_clock = get_ts_clock;
 	hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE;
+	hw_data->check_hb_ctrs = adf_heartbeat_check_ctrs;
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	hw_data->pfvf_ops.enable_vf2pf_interrupts = enable_vf2pf_interrupts;

From 039980de89dc9dd757418d6f296e4126cc3f86c3 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 2 Jul 2023 19:35:02 +0200
Subject: [PATCH 0330/2047] hwrng: nomadik - keep clock enabled while hwrng is
 registered

The nomadik driver uses devres to register itself with the hwrng core,
the driver will be unregistered from hwrng when its device goes out of
scope. This happens after the driver's remove function is called.

However, nomadik's clock is disabled in the remove function. There's a
short timeframe where nomadik is still registered with the hwrng core
although its clock is disabled. I suppose the clock must be active to
access the hardware and serve requests from the hwrng core.

Switch to devm_clk_get_enabled and let devres disable the clock and
unregister the hwrng. This avoids the race condition.

Fixes: 3e75241be808 ("hwrng: drivers - Use device-managed registration API")
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/nomadik-rng.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c
index e8f9621e7954..3774adf903a8 100644
--- a/drivers/char/hw_random/nomadik-rng.c
+++ b/drivers/char/hw_random/nomadik-rng.c
@@ -13,8 +13,6 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 
-static struct clk *rng_clk;
-
 static int nmk_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 {
 	void __iomem *base = (void __iomem *)rng->priv;
@@ -36,21 +34,20 @@ static struct hwrng nmk_rng = {
 
 static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id)
 {
+	struct clk *rng_clk;
 	void __iomem *base;
 	int ret;
 
-	rng_clk = devm_clk_get(&dev->dev, NULL);
+	rng_clk = devm_clk_get_enabled(&dev->dev, NULL);
 	if (IS_ERR(rng_clk)) {
 		dev_err(&dev->dev, "could not get rng clock\n");
 		ret = PTR_ERR(rng_clk);
 		return ret;
 	}
 
-	clk_prepare_enable(rng_clk);
-
 	ret = amba_request_regions(dev, dev->dev.init_name);
 	if (ret)
-		goto out_clk;
+		return ret;
 	ret = -ENOMEM;
 	base = devm_ioremap(&dev->dev, dev->res.start,
 			    resource_size(&dev->res));
@@ -64,15 +61,12 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id)
 
 out_release:
 	amba_release_regions(dev);
-out_clk:
-	clk_disable_unprepare(rng_clk);
 	return ret;
 }
 
 static void nmk_rng_remove(struct amba_device *dev)
 {
 	amba_release_regions(dev);
-	clk_disable_unprepare(rng_clk);
 }
 
 static const struct amba_id nmk_rng_ids[] = {

From 6a52ee38c79825ef469ab3ed8588d17e90d2049c Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 2 Jul 2023 19:35:03 +0200
Subject: [PATCH 0331/2047] hwrng: nomadik - use dev_err_probe

Use dev_err_probe to print a message and return an error. This makes the
code a tiny bit shorter.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/nomadik-rng.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c
index 3774adf903a8..8c6a40d6ce3d 100644
--- a/drivers/char/hw_random/nomadik-rng.c
+++ b/drivers/char/hw_random/nomadik-rng.c
@@ -39,11 +39,8 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id)
 	int ret;
 
 	rng_clk = devm_clk_get_enabled(&dev->dev, NULL);
-	if (IS_ERR(rng_clk)) {
-		dev_err(&dev->dev, "could not get rng clock\n");
-		ret = PTR_ERR(rng_clk);
-		return ret;
-	}
+	if (IS_ERR(rng_clk))
+		return dev_err_probe(&dev->dev, PTR_ERR(rng_clk), "could not get rng clock\n");
 
 	ret = amba_request_regions(dev, dev->dev.init_name);
 	if (ret)

From 8690b09c30b274691bc5bb7b1332a27a7cbe5db7 Mon Sep 17 00:00:00 2001
From: You Kangren <youkangren@vivo.com>
Date: Tue, 4 Jul 2023 20:45:32 +0800
Subject: [PATCH 0332/2047] crypto: qat - replace the if statement with min()

Mark UWORD_CPYBUF_SIZE with U suffix to make its type the same
with words_num. Then replace the if statement with min() in
qat_uclo_wr_uimage_raw_page() to make code shorter.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: You Kangren <youkangren@vivo.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/intel/qat/qat_common/qat_uclo.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/intel/qat/qat_common/qat_uclo.c b/drivers/crypto/intel/qat/qat_common/qat_uclo.c
index ce837bcc1cab..4bd150d1441a 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_uclo.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_uclo.c
@@ -11,7 +11,7 @@
 #include "icp_qat_hal.h"
 #include "icp_qat_fw_loader_handle.h"
 
-#define UWORD_CPYBUF_SIZE 1024
+#define UWORD_CPYBUF_SIZE 1024U
 #define INVLD_UWORD 0xffffffffffull
 #define PID_MINOR_REV 0xf
 #define PID_MAJOR_REV (0xf << 4)
@@ -1986,10 +1986,7 @@ static void qat_uclo_wr_uimage_raw_page(struct icp_qat_fw_loader_handle *handle,
 	uw_relative_addr = 0;
 	words_num = encap_page->micro_words_num;
 	while (words_num) {
-		if (words_num < UWORD_CPYBUF_SIZE)
-			cpylen = words_num;
-		else
-			cpylen = UWORD_CPYBUF_SIZE;
+		cpylen = min(words_num, UWORD_CPYBUF_SIZE);
 
 		/* load the buffer */
 		for (i = 0; i < cpylen; i++)

From ea6084559285f4436434630a0e0d9e82709a04f2 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Tue, 4 Jul 2023 19:06:44 +0200
Subject: [PATCH 0333/2047] hwrng: imx-rngc - use dev_err_probe

Simplify the code by calling dev_err_probe instead of dev_err and return.

While at it, use the same device for all error messages.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Reviewed-by: Rouven Czerwinski <r.czerwinskis@pengutronix.de>
Reviewed-by: Rouven Czerwinski <r.czerwinski@pengutronix.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/imx-rngc.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c
index bf07f17f78c8..e4b385b01b11 100644
--- a/drivers/char/hw_random/imx-rngc.c
+++ b/drivers/char/hw_random/imx-rngc.c
@@ -239,10 +239,8 @@ static int __init imx_rngc_probe(struct platform_device *pdev)
 		return PTR_ERR(rngc->base);
 
 	rngc->clk = devm_clk_get_enabled(&pdev->dev, NULL);
-	if (IS_ERR(rngc->clk)) {
-		dev_err(&pdev->dev, "Can not get rng_clk\n");
-		return PTR_ERR(rngc->clk);
-	}
+	if (IS_ERR(rngc->clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(rngc->clk), "Cannot get rng_clk\n");
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -272,24 +270,18 @@ static int __init imx_rngc_probe(struct platform_device *pdev)
 
 	ret = devm_request_irq(&pdev->dev,
 			irq, imx_rngc_irq, 0, pdev->name, (void *)rngc);
-	if (ret) {
-		dev_err(rngc->dev, "Can't get interrupt working.\n");
-		return ret;
-	}
+	if (ret)
+		return dev_err_probe(&pdev->dev, ret, "Can't get interrupt working.\n");
 
 	if (self_test) {
 		ret = imx_rngc_self_test(rngc);
-		if (ret) {
-			dev_err(rngc->dev, "self test failed\n");
-			return ret;
-		}
+		if (ret)
+			return dev_err_probe(&pdev->dev, ret, "self test failed\n");
 	}
 
 	ret = devm_hwrng_register(&pdev->dev, &rngc->rng);
-	if (ret) {
-		dev_err(&pdev->dev, "hwrng registration failed\n");
-		return ret;
-	}
+	if (ret)
+		return dev_err_probe(&pdev->dev, ret, "hwrng registration failed\n");
 
 	dev_info(&pdev->dev,
 		"Freescale RNG%c registered (HW revision %d.%02d)\n",

From b4198a9a538c330cdff7239f8aba0792d31fab42 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Tue, 4 Jul 2023 19:10:51 +0200
Subject: [PATCH 0334/2047] hwrng: exynos - switch to DEFINE_SIMPLE_DEV_PM_OPS

SIMPLE_DEV_PM_OPS is deprecated, replace it with DEFINE_SIMPLE_DEV_PM_OPS
and use pm_sleep_ptr for setting the driver's pm routines. We can now
remove the __maybe_unused qualifier in the suspend and resume functions.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/exynos-trng.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/char/hw_random/exynos-trng.c b/drivers/char/hw_random/exynos-trng.c
index 9cc3d542dd0f..30207b7ac5f4 100644
--- a/drivers/char/hw_random/exynos-trng.c
+++ b/drivers/char/hw_random/exynos-trng.c
@@ -185,14 +185,14 @@ static int exynos_trng_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static int __maybe_unused exynos_trng_suspend(struct device *dev)
+static int exynos_trng_suspend(struct device *dev)
 {
 	pm_runtime_put_sync(dev);
 
 	return 0;
 }
 
-static int __maybe_unused exynos_trng_resume(struct device *dev)
+static int exynos_trng_resume(struct device *dev)
 {
 	int ret;
 
@@ -205,7 +205,7 @@ static int __maybe_unused exynos_trng_resume(struct device *dev)
 	return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(exynos_trng_pm_ops, exynos_trng_suspend,
+static DEFINE_SIMPLE_DEV_PM_OPS(exynos_trng_pm_ops, exynos_trng_suspend,
 			 exynos_trng_resume);
 
 static const struct of_device_id exynos_trng_dt_match[] = {
@@ -219,7 +219,7 @@ MODULE_DEVICE_TABLE(of, exynos_trng_dt_match);
 static struct platform_driver exynos_trng_driver = {
 	.driver = {
 		.name = "exynos-trng",
-		.pm = &exynos_trng_pm_ops,
+		.pm = pm_sleep_ptr(&exynos_trng_pm_ops),
 		.of_match_table = exynos_trng_dt_match,
 	},
 	.probe = exynos_trng_probe,

From b157d50bd2de501023391bab81f6198153398b2d Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Tue, 4 Jul 2023 19:32:00 +0200
Subject: [PATCH 0335/2047] hwrng: pic32 - enable compile-testing

Enable compile testing for the pic32 driver.

Remove the dependency on HW_RANDOM. The pic32 config section is under
"if HW_RANDOM".

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index e0b3786ca51b..1aeba12391a1 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -385,7 +385,7 @@ config HW_RANDOM_STM32
 
 config HW_RANDOM_PIC32
 	tristate "Microchip PIC32 Random Number Generator support"
-	depends on HW_RANDOM && MACH_PIC32
+	depends on MACH_PIC32 || COMPILE_TEST
 	default y
 	help
 	  This driver provides kernel-side support for the Random Number

From 6755ad74aac0fb1c79b14724feb81b2f6ff25847 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Tue, 4 Jul 2023 19:32:01 +0200
Subject: [PATCH 0336/2047] hwrng: pic32 - use devm_clk_get_enabled

Use devm_clk_get_enabled in the pic32 driver. Ensure that the clock is
enabled as long as the driver is registered with the hwrng core.

Fixes: 7ea39973d1e5 ("hwrng: pic32 - Use device-managed registration API")
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/pic32-rng.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/char/hw_random/pic32-rng.c b/drivers/char/hw_random/pic32-rng.c
index 99c8bd0859a1..e04a054e8930 100644
--- a/drivers/char/hw_random/pic32-rng.c
+++ b/drivers/char/hw_random/pic32-rng.c
@@ -36,7 +36,6 @@
 struct pic32_rng {
 	void __iomem	*base;
 	struct hwrng	rng;
-	struct clk	*clk;
 };
 
 /*
@@ -70,6 +69,7 @@ static int pic32_rng_read(struct hwrng *rng, void *buf, size_t max,
 static int pic32_rng_probe(struct platform_device *pdev)
 {
 	struct pic32_rng *priv;
+	struct clk *clk;
 	u32 v;
 	int ret;
 
@@ -81,13 +81,9 @@ static int pic32_rng_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->base))
 		return PTR_ERR(priv->base);
 
-	priv->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(priv->clk))
-		return PTR_ERR(priv->clk);
-
-	ret = clk_prepare_enable(priv->clk);
-	if (ret)
-		return ret;
+	clk = devm_clk_get_enabled(&pdev->dev, NULL);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
 
 	/* enable TRNG in enhanced mode */
 	v = TRNGEN | TRNGMOD;
@@ -98,15 +94,11 @@ static int pic32_rng_probe(struct platform_device *pdev)
 
 	ret = devm_hwrng_register(&pdev->dev, &priv->rng);
 	if (ret)
-		goto err_register;
+		return ret;
 
 	platform_set_drvdata(pdev, priv);
 
 	return 0;
-
-err_register:
-	clk_disable_unprepare(priv->clk);
-	return ret;
 }
 
 static int pic32_rng_remove(struct platform_device *pdev)
@@ -114,7 +106,6 @@ static int pic32_rng_remove(struct platform_device *pdev)
 	struct pic32_rng *rng = platform_get_drvdata(pdev);
 
 	writel(0, rng->base + RNGCON);
-	clk_disable_unprepare(rng->clk);
 	return 0;
 }
 

From 97c63a9dea2929fc4fd42f1d4446d97b6c011a1f Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Tue, 4 Jul 2023 19:32:02 +0200
Subject: [PATCH 0337/2047] hwrng: pic32 - remove unused defines

Remove some unused defines and fix the indentation.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/pic32-rng.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/char/hw_random/pic32-rng.c b/drivers/char/hw_random/pic32-rng.c
index e04a054e8930..c1b3f5915f03 100644
--- a/drivers/char/hw_random/pic32-rng.c
+++ b/drivers/char/hw_random/pic32-rng.c
@@ -19,19 +19,12 @@
 #include <linux/slab.h>
 
 #define RNGCON		0x04
-#define  TRNGEN		BIT(8)
-#define  PRNGEN		BIT(9)
-#define  PRNGCONT	BIT(10)
-#define  TRNGMOD	BIT(11)
-#define  SEEDLOAD	BIT(12)
-#define RNGPOLY1	0x08
-#define RNGPOLY2	0x0C
-#define RNGNUMGEN1	0x10
-#define RNGNUMGEN2	0x14
+#define TRNGEN		BIT(8)
+#define TRNGMOD		BIT(11)
 #define RNGSEED1	0x18
 #define RNGSEED2	0x1C
 #define RNGRCNT		0x20
-#define  RCNT_MASK	0x7F
+#define RCNT_MASK	0x7F
 
 struct pic32_rng {
 	void __iomem	*base;

From ac0042fa5aa5ac7929d95f4bf9185429ccdb70cb Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Tue, 4 Jul 2023 19:32:03 +0200
Subject: [PATCH 0338/2047] hwrng: pic32 - enable TRNG only while it's used

The probe function enables the TRNG hardware before registering the
driver. If registration fails, probe returns an error, but the TRNG
remains enabled.

Define init and cleanup functions, enable and disable the hardware there.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/pic32-rng.c | 41 ++++++++++++++----------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/drivers/char/hw_random/pic32-rng.c b/drivers/char/hw_random/pic32-rng.c
index c1b3f5915f03..1902f4389a3f 100644
--- a/drivers/char/hw_random/pic32-rng.c
+++ b/drivers/char/hw_random/pic32-rng.c
@@ -38,6 +38,15 @@ struct pic32_rng {
  */
 #define RNG_TIMEOUT 500
 
+static int pic32_rng_init(struct hwrng *rng)
+{
+	struct pic32_rng *priv = container_of(rng, struct pic32_rng, rng);
+
+	/* enable TRNG in enhanced mode */
+	writel(TRNGEN | TRNGMOD, priv->base + RNGCON);
+	return 0;
+}
+
 static int pic32_rng_read(struct hwrng *rng, void *buf, size_t max,
 			  bool wait)
 {
@@ -59,12 +68,17 @@ static int pic32_rng_read(struct hwrng *rng, void *buf, size_t max,
 	return -EIO;
 }
 
+static void pic32_rng_cleanup(struct hwrng *rng)
+{
+	struct pic32_rng *priv = container_of(rng, struct pic32_rng, rng);
+
+	writel(0, priv->base + RNGCON);
+}
+
 static int pic32_rng_probe(struct platform_device *pdev)
 {
 	struct pic32_rng *priv;
 	struct clk *clk;
-	u32 v;
-	int ret;
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -78,28 +92,12 @@ static int pic32_rng_probe(struct platform_device *pdev)
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
-	/* enable TRNG in enhanced mode */
-	v = TRNGEN | TRNGMOD;
-	writel(v, priv->base + RNGCON);
-
 	priv->rng.name = pdev->name;
+	priv->rng.init = pic32_rng_init;
 	priv->rng.read = pic32_rng_read;
+	priv->rng.cleanup = pic32_rng_cleanup;
 
-	ret = devm_hwrng_register(&pdev->dev, &priv->rng);
-	if (ret)
-		return ret;
-
-	platform_set_drvdata(pdev, priv);
-
-	return 0;
-}
-
-static int pic32_rng_remove(struct platform_device *pdev)
-{
-	struct pic32_rng *rng = platform_get_drvdata(pdev);
-
-	writel(0, rng->base + RNGCON);
-	return 0;
+	return devm_hwrng_register(&pdev->dev, &priv->rng);
 }
 
 static const struct of_device_id pic32_rng_of_match[] __maybe_unused = {
@@ -110,7 +108,6 @@ MODULE_DEVICE_TABLE(of, pic32_rng_of_match);
 
 static struct platform_driver pic32_rng_driver = {
 	.probe		= pic32_rng_probe,
-	.remove		= pic32_rng_remove,
 	.driver		= {
 		.name	= "pic32-rng",
 		.of_match_table = of_match_ptr(pic32_rng_of_match),

From aa4b2f9ea53e9fc93f9d30746c32af5ac25f2a46 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 5 Jul 2023 15:21:54 +0800
Subject: [PATCH 0339/2047] crypto: omap-des - Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/omap-des.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index f783769ea110..371a51094e34 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -971,18 +971,12 @@ static int omap_des_probe(struct platform_device *pdev)
 	dd->dev = dev;
 	platform_set_drvdata(pdev, dd);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(dev, "no MEM resource info\n");
-		goto err_res;
-	}
-
 	err = (dev->of_node) ? omap_des_get_of(dd, pdev) :
 			       omap_des_get_pdev(dd, pdev);
 	if (err)
 		goto err_res;
 
-	dd->io_base = devm_ioremap_resource(dev, res);
+	dd->io_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(dd->io_base)) {
 		err = PTR_ERR(dd->io_base);
 		goto err_res;

From 1c5ff2fc35ac2c6da3509f7afa97b91f328dcfaa Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 5 Jul 2023 15:21:55 +0800
Subject: [PATCH 0340/2047] crypto: keembay - Convert to
 devm_platform_ioremap_resource()

Use devm_platform_ioremap_resource() to simplify code.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c b/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c
index d4bcbed1f546..51a6de6294cb 100644
--- a/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c
+++ b/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c
@@ -1170,7 +1170,6 @@ static int kmb_ocs_hcu_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct ocs_hcu_dev *hcu_dev;
-	struct resource *hcu_mem;
 	int rc;
 
 	hcu_dev = devm_kzalloc(dev, sizeof(*hcu_dev), GFP_KERNEL);
@@ -1184,14 +1183,7 @@ static int kmb_ocs_hcu_probe(struct platform_device *pdev)
 	if (rc)
 		return rc;
 
-	/* Get the memory address and remap. */
-	hcu_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!hcu_mem) {
-		dev_err(dev, "Could not retrieve io mem resource.\n");
-		return -ENODEV;
-	}
-
-	hcu_dev->io_base = devm_ioremap_resource(dev, hcu_mem);
+	hcu_dev->io_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(hcu_dev->io_base))
 		return PTR_ERR(hcu_dev->io_base);
 

From 3aaafe054b7140418c9355512bc1a7a4980943c3 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 5 Jul 2023 15:21:56 +0800
Subject: [PATCH 0341/2047] crypto: atmel-aes - Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-aes.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 143d33fbb316..9692254faad9 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -2566,11 +2566,9 @@ static int atmel_aes_probe(struct platform_device *pdev)
 
 	crypto_init_queue(&aes_dd->queue, ATMEL_AES_QUEUE_LENGTH);
 
-	/* Get the base address */
-	aes_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!aes_res) {
-		dev_err(dev, "no MEM resource info\n");
-		err = -ENODEV;
+	aes_dd->io_base = devm_platform_get_and_ioremap_resource(pdev, 0, &aes_res);
+	if (IS_ERR(aes_dd->io_base)) {
+		err = PTR_ERR(aes_dd->io_base);
 		goto err_tasklet_kill;
 	}
 	aes_dd->phys_base = aes_res->start;
@@ -2597,13 +2595,6 @@ static int atmel_aes_probe(struct platform_device *pdev)
 		goto err_tasklet_kill;
 	}
 
-	aes_dd->io_base = devm_ioremap_resource(&pdev->dev, aes_res);
-	if (IS_ERR(aes_dd->io_base)) {
-		dev_err(dev, "can't ioremap\n");
-		err = PTR_ERR(aes_dd->io_base);
-		goto err_tasklet_kill;
-	}
-
 	err = clk_prepare(aes_dd->iclk);
 	if (err)
 		goto err_tasklet_kill;

From f069fa9d789d908e21350fcfb6a34271a3f2d9b6 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 5 Jul 2023 15:21:57 +0800
Subject: [PATCH 0342/2047] crypto: atmel-sha - Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-sha.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 6bef634d3c86..f2031f934be9 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -2604,11 +2604,9 @@ static int atmel_sha_probe(struct platform_device *pdev)
 
 	crypto_init_queue(&sha_dd->queue, ATMEL_SHA_QUEUE_LENGTH);
 
-	/* Get the base address */
-	sha_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!sha_res) {
-		dev_err(dev, "no MEM resource info\n");
-		err = -ENODEV;
+	sha_dd->io_base = devm_platform_get_and_ioremap_resource(pdev, 0, &sha_res);
+	if (IS_ERR(sha_dd->io_base)) {
+		err = PTR_ERR(sha_dd->io_base);
 		goto err_tasklet_kill;
 	}
 	sha_dd->phys_base = sha_res->start;
@@ -2635,13 +2633,6 @@ static int atmel_sha_probe(struct platform_device *pdev)
 		goto err_tasklet_kill;
 	}
 
-	sha_dd->io_base = devm_ioremap_resource(&pdev->dev, sha_res);
-	if (IS_ERR(sha_dd->io_base)) {
-		dev_err(dev, "can't ioremap\n");
-		err = PTR_ERR(sha_dd->io_base);
-		goto err_tasklet_kill;
-	}
-
 	err = clk_prepare(sha_dd->iclk);
 	if (err)
 		goto err_tasklet_kill;

From 32f91bb3df785a6334aa5acd46ac85dabff7368b Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 5 Jul 2023 15:21:58 +0800
Subject: [PATCH 0343/2047] crypto: atmel-tdes - Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-tdes.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index c9ded8be9c39..ba8981f326cf 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -1172,11 +1172,9 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 
 	crypto_init_queue(&tdes_dd->queue, ATMEL_TDES_QUEUE_LENGTH);
 
-	/* Get the base address */
-	tdes_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!tdes_res) {
-		dev_err(dev, "no MEM resource info\n");
-		err = -ENODEV;
+	tdes_dd->io_base = devm_platform_get_and_ioremap_resource(pdev, 0, &tdes_res);
+	if (IS_ERR(tdes_dd->io_base)) {
+		err = PTR_ERR(tdes_dd->io_base);
 		goto err_tasklet_kill;
 	}
 	tdes_dd->phys_base = tdes_res->start;
@@ -1203,12 +1201,6 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 		goto err_tasklet_kill;
 	}
 
-	tdes_dd->io_base = devm_ioremap_resource(&pdev->dev, tdes_res);
-	if (IS_ERR(tdes_dd->io_base)) {
-		err = PTR_ERR(tdes_dd->io_base);
-		goto err_tasklet_kill;
-	}
-
 	err = atmel_tdes_hw_version_init(tdes_dd);
 	if (err)
 		goto err_tasklet_kill;

From 66c7b6473e2d6838973216a853ed1cc8dd193dc7 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 5 Jul 2023 19:52:42 +0800
Subject: [PATCH 0344/2047] hwrng: timeriomem - Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/timeriomem-rng.c | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index 26f322d19a88..3db9d868efb1 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -113,16 +113,6 @@ static int timeriomem_rng_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENXIO;
-
-	if (res->start % 4 != 0 || resource_size(res) < 4) {
-		dev_err(&pdev->dev,
-			"address must be at least four bytes wide and 32-bit aligned\n");
-		return -EINVAL;
-	}
-
 	/* Allocate memory for the device structure (and zero it) */
 	priv = devm_kzalloc(&pdev->dev,
 			sizeof(struct timeriomem_rng_private), GFP_KERNEL);
@@ -131,6 +121,16 @@ static int timeriomem_rng_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, priv);
 
+	priv->io_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(priv->io_base))
+		return PTR_ERR(priv->io_base);
+
+	if (res->start % 4 != 0 || resource_size(res) < 4) {
+		dev_err(&pdev->dev,
+			"address must be at least four bytes wide and 32-bit aligned\n");
+		return -EINVAL;
+	}
+
 	if (pdev->dev.of_node) {
 		int i;
 
@@ -158,11 +158,6 @@ static int timeriomem_rng_probe(struct platform_device *pdev)
 	priv->rng_ops.name = dev_name(&pdev->dev);
 	priv->rng_ops.read = timeriomem_rng_read;
 
-	priv->io_base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(priv->io_base)) {
-		return PTR_ERR(priv->io_base);
-	}
-
 	/* Assume random data is already available. */
 	priv->present = 1;
 	complete(&priv->completion);

From b3882fa2a10e28b7642d0c11fcc26caab90e0fa4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 7 Jul 2023 09:07:53 +0200
Subject: [PATCH 0345/2047] crypto: starfive - Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is ignored (apart
from emitting a warning) and this typically results in resource leaks.
To improve here there is a quest to make the remove callback return
void. In the first step of this quest all drivers are converted to
.remove_new() which already returns void. Eventually after all drivers
are converted, .remove_new() is renamed to .remove().

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/starfive/jh7110-cryp.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/starfive/jh7110-cryp.c b/drivers/crypto/starfive/jh7110-cryp.c
index cc43556b6c80..e573c097a4a0 100644
--- a/drivers/crypto/starfive/jh7110-cryp.c
+++ b/drivers/crypto/starfive/jh7110-cryp.c
@@ -212,7 +212,7 @@ err_probe_defer:
 	return ret;
 }
 
-static int starfive_cryp_remove(struct platform_device *pdev)
+static void starfive_cryp_remove(struct platform_device *pdev)
 {
 	struct starfive_cryp_dev *cryp = platform_get_drvdata(pdev);
 
@@ -233,8 +233,6 @@ static int starfive_cryp_remove(struct platform_device *pdev)
 	clk_disable_unprepare(cryp->hclk);
 	clk_disable_unprepare(cryp->ahb);
 	reset_control_assert(cryp->rst);
-
-	return 0;
 }
 
 static const struct of_device_id starfive_dt_ids[] __maybe_unused = {
@@ -245,7 +243,7 @@ MODULE_DEVICE_TABLE(of, starfive_dt_ids);
 
 static struct platform_driver starfive_cryp_driver = {
 	.probe  = starfive_cryp_probe,
-	.remove = starfive_cryp_remove,
+	.remove_new = starfive_cryp_remove,
 	.driver = {
 		.name           = DRIVER_NAME,
 		.of_match_table = starfive_dt_ids,

From b0ab0797f7ab74bd6e78ddce7d2ea580e588c60d Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Fri, 7 Jul 2023 21:18:19 +0800
Subject: [PATCH 0346/2047] crypto: hisilicon/hpre - ensure private key less
 than n

The private key of the curve key size generated by stdrng, which maybe
not less than n. Therefore, the private key with the curve key size
minus 1 is generated to ensure that the private key is less than n.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_crypto.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index 8ede77310dc5..9a1c61be32cc 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -1392,9 +1392,9 @@ static int hpre_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
 				unsigned int len)
 {
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+	unsigned int sz, sz_shift, curve_sz;
 	struct device *dev = ctx->dev;
 	char key[HPRE_ECC_MAX_KSZ];
-	unsigned int sz, sz_shift;
 	struct ecdh params;
 	int ret;
 
@@ -1406,7 +1406,13 @@ static int hpre_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
 	/* Use stdrng to generate private key */
 	if (!params.key || !params.key_size) {
 		params.key = key;
-		params.key_size = hpre_ecdh_get_curvesz(ctx->curve_id);
+		curve_sz = hpre_ecdh_get_curvesz(ctx->curve_id);
+		if (!curve_sz) {
+			dev_err(dev, "Invalid curve size!\n");
+			return -EINVAL;
+		}
+
+		params.key_size = curve_sz - 1;
 		ret = ecdh_gen_privkey(ctx, &params);
 		if (ret)
 			return ret;

From a70eb93a2477371638ef481aaae7bb7b760d3004 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 14 Jul 2023 11:44:13 -0600
Subject: [PATCH 0347/2047] cpufreq: Explicitly include correct DT includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/armada-37xx-cpufreq.c  | 4 +---
 drivers/cpufreq/mediatek-cpufreq-hw.c  | 3 ++-
 drivers/cpufreq/ppc_cbe_cpufreq.c      | 2 +-
 drivers/cpufreq/ppc_cbe_cpufreq_pmi.c  | 1 -
 drivers/cpufreq/qcom-cpufreq-nvmem.c   | 1 -
 drivers/cpufreq/scpi-cpufreq.c         | 2 +-
 drivers/cpufreq/sti-cpufreq.c          | 2 +-
 drivers/cpufreq/ti-cpufreq.c           | 2 +-
 drivers/cpufreq/vexpress-spc-cpufreq.c | 1 -
 9 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
index b74289a95a17..bea41ccabf1f 100644
--- a/drivers/cpufreq/armada-37xx-cpufreq.c
+++ b/drivers/cpufreq/armada-37xx-cpufreq.c
@@ -14,10 +14,8 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/mfd/syscon.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/regmap.h>
diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
index b22f5cc8a463..c93e14eb6221 100644
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -10,8 +10,9 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 
 #define LUT_MAX_ENTRIES			32U
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c
index e3313ce63b38..88afc49941b7 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.c
@@ -9,7 +9,7 @@
 
 #include <linux/cpufreq.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 
 #include <asm/machdep.h>
 #include <asm/cell-regs.h>
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index 4fba3637b115..6f0c32592416 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
@@ -11,7 +11,6 @@
 #include <linux/types.h>
 #include <linux/timer.h>
 #include <linux/init.h>
-#include <linux/of_platform.h>
 #include <linux/pm_qos.h>
 #include <linux/slab.h>
 
diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c
index a88b6fe5db50..4590c2570086 100644
--- a/drivers/cpufreq/qcom-cpufreq-nvmem.c
+++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c
@@ -22,7 +22,6 @@
 #include <linux/module.h>
 #include <linux/nvmem-consumer.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_opp.h>
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index fd2c16821d54..ac719aca49b7 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -14,7 +14,7 @@
 #include <linux/cpumask.h>
 #include <linux/export.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/scpi_protocol.h>
 #include <linux/slab.h>
diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c
index 1a63aeea8711..9c542e723a15 100644
--- a/drivers/cpufreq/sti-cpufreq.c
+++ b/drivers/cpufreq/sti-cpufreq.c
@@ -13,7 +13,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/regmap.h>
 
diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c
index d5cd2fd25cad..3c37d7899660 100644
--- a/drivers/cpufreq/ti-cpufreq.c
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -12,7 +12,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c
index d295f405c4bb..865e50164803 100644
--- a/drivers/cpufreq/vexpress-spc-cpufreq.c
+++ b/drivers/cpufreq/vexpress-spc-cpufreq.c
@@ -18,7 +18,6 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/slab.h>

From a7fb1727537253460466b840df0ce0f860f29947 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:04 +0800
Subject: [PATCH 0348/2047] cpufreq: sun50i: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Jernej Škrabec <jernej.skrabec@gmail.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/sun50i-cpufreq-nvmem.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
index 4321d7bbe769..32a9c88f8ff6 100644
--- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c
+++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c
@@ -137,7 +137,7 @@ free_opp:
 	return ret;
 }
 
-static int sun50i_cpufreq_nvmem_remove(struct platform_device *pdev)
+static void sun50i_cpufreq_nvmem_remove(struct platform_device *pdev)
 {
 	int *opp_tokens = platform_get_drvdata(pdev);
 	unsigned int cpu;
@@ -148,13 +148,11 @@ static int sun50i_cpufreq_nvmem_remove(struct platform_device *pdev)
 		dev_pm_opp_put_prop_name(opp_tokens[cpu]);
 
 	kfree(opp_tokens);
-
-	return 0;
 }
 
 static struct platform_driver sun50i_cpufreq_driver = {
 	.probe = sun50i_cpufreq_nvmem_probe,
-	.remove = sun50i_cpufreq_nvmem_remove,
+	.remove_new = sun50i_cpufreq_nvmem_remove,
 	.driver = {
 		.name = "sun50i-cpufreq-nvmem",
 	},

From 18da417686b3db681b1b77a925e4b60d73612e17 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:05 +0800
Subject: [PATCH 0349/2047] cpufreq: dt: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 4aec4b2a5225..8bd6e5e8f121 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -349,11 +349,10 @@ err:
 	return ret;
 }
 
-static int dt_cpufreq_remove(struct platform_device *pdev)
+static void dt_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&dt_cpufreq_driver);
 	dt_cpufreq_release();
-	return 0;
 }
 
 static struct platform_driver dt_cpufreq_platdrv = {
@@ -361,7 +360,7 @@ static struct platform_driver dt_cpufreq_platdrv = {
 		.name	= "cpufreq-dt",
 	},
 	.probe		= dt_cpufreq_probe,
-	.remove		= dt_cpufreq_remove,
+	.remove_new	= dt_cpufreq_remove,
 };
 module_platform_driver(dt_cpufreq_platdrv);
 

From 573d54dba2825bf7b5447fbaef4d95a3bf3582f4 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:06 +0800
Subject: [PATCH 0350/2047] cpufreq: qcom-cpufreq-hw: Convert to platform
 remove callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/qcom-cpufreq-hw.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index f2830371d25f..f24cf2eddf1e 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -730,16 +730,14 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int qcom_cpufreq_hw_driver_remove(struct platform_device *pdev)
+static void qcom_cpufreq_hw_driver_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&cpufreq_qcom_hw_driver);
-
-	return 0;
 }
 
 static struct platform_driver qcom_cpufreq_hw_driver = {
 	.probe = qcom_cpufreq_hw_driver_probe,
-	.remove = qcom_cpufreq_hw_driver_remove,
+	.remove_new = qcom_cpufreq_hw_driver_remove,
 	.driver = {
 		.name = "qcom-cpufreq-hw",
 		.of_match_table = qcom_cpufreq_hw_match,

From 9ffb053dc51058cef3d49aca9881380282257797 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:07 +0800
Subject: [PATCH 0351/2047] cpufreq: vexpress: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/vexpress-spc-cpufreq.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c
index 865e50164803..9ac4ea50b874 100644
--- a/drivers/cpufreq/vexpress-spc-cpufreq.c
+++ b/drivers/cpufreq/vexpress-spc-cpufreq.c
@@ -551,7 +551,7 @@ static int ve_spc_cpufreq_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int ve_spc_cpufreq_remove(struct platform_device *pdev)
+static void ve_spc_cpufreq_remove(struct platform_device *pdev)
 {
 	bL_switcher_get_enabled();
 	__bLs_unregister_notifier();
@@ -559,7 +559,6 @@ static int ve_spc_cpufreq_remove(struct platform_device *pdev)
 	bL_switcher_put_enabled();
 	pr_info("%s: Un-registered platform driver: %s\n", __func__,
 		ve_spc_cpufreq_driver.name);
-	return 0;
 }
 
 static struct platform_driver ve_spc_cpufreq_platdrv = {
@@ -567,7 +566,7 @@ static struct platform_driver ve_spc_cpufreq_platdrv = {
 		.name	= "vexpress-spc-cpufreq",
 	},
 	.probe		= ve_spc_cpufreq_probe,
-	.remove		= ve_spc_cpufreq_remove,
+	.remove_new	= ve_spc_cpufreq_remove,
 };
 module_platform_driver(ve_spc_cpufreq_platdrv);
 

From 1d61b32e500de0223eb42bf5642718a8801d9bc1 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:08 +0800
Subject: [PATCH 0352/2047] cpufreq: imx6q: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/imx6q-cpufreq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 9fb1501033bb..494d044b9e72 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -519,7 +519,7 @@ put_node:
 	return ret;
 }
 
-static int imx6q_cpufreq_remove(struct platform_device *pdev)
+static void imx6q_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&imx6q_cpufreq_driver);
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
@@ -530,8 +530,6 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev)
 	regulator_put(soc_reg);
 
 	clk_bulk_put(num_clks, clks);
-
-	return 0;
 }
 
 static struct platform_driver imx6q_cpufreq_platdrv = {
@@ -539,7 +537,7 @@ static struct platform_driver imx6q_cpufreq_platdrv = {
 		.name	= "imx6q-cpufreq",
 	},
 	.probe		= imx6q_cpufreq_probe,
-	.remove		= imx6q_cpufreq_remove,
+	.remove_new	= imx6q_cpufreq_remove,
 };
 module_platform_driver(imx6q_cpufreq_platdrv);
 

From b68ea4c2fb89f690c6d5b34002e3ea40cbecb8a2 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:09 +0800
Subject: [PATCH 0353/2047] cpufreq: mediatek-hw: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/mediatek-cpufreq-hw.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
index c93e14eb6221..d46afb3c0092 100644
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -316,11 +316,9 @@ static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int mtk_cpufreq_hw_driver_remove(struct platform_device *pdev)
+static void mtk_cpufreq_hw_driver_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&cpufreq_mtk_hw_driver);
-
-	return 0;
 }
 
 static const struct of_device_id mtk_cpufreq_hw_match[] = {
@@ -331,7 +329,7 @@ MODULE_DEVICE_TABLE(of, mtk_cpufreq_hw_match);
 
 static struct platform_driver mtk_cpufreq_hw_driver = {
 	.probe = mtk_cpufreq_hw_driver_probe,
-	.remove = mtk_cpufreq_hw_driver_remove,
+	.remove_new = mtk_cpufreq_hw_driver_remove,
 	.driver = {
 		.name = "mtk-cpufreq-hw",
 		.of_match_table = mtk_cpufreq_hw_match,

From d5aa35fcf3ee0dc7319d12132ed14e61f38007ed Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:10 +0800
Subject: [PATCH 0354/2047] cpufreq: scpi: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/scpi-cpufreq.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index ac719aca49b7..d33be56983ed 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -208,11 +208,10 @@ static int scpi_cpufreq_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int scpi_cpufreq_remove(struct platform_device *pdev)
+static void scpi_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&scpi_cpufreq_driver);
 	scpi_ops = NULL;
-	return 0;
 }
 
 static struct platform_driver scpi_cpufreq_platdrv = {
@@ -220,7 +219,7 @@ static struct platform_driver scpi_cpufreq_platdrv = {
 		.name	= "scpi-cpufreq",
 	},
 	.probe		= scpi_cpufreq_probe,
-	.remove		= scpi_cpufreq_remove,
+	.remove_new	= scpi_cpufreq_remove,
 };
 module_platform_driver(scpi_cpufreq_platdrv);
 

From 552f8df83e685c666d0df59559253fe02679f966 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:11 +0800
Subject: [PATCH 0355/2047] cpufreq: tegra194: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/tegra194-cpufreq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
index 36dad5ea5947..c90b30469165 100644
--- a/drivers/cpufreq/tegra194-cpufreq.c
+++ b/drivers/cpufreq/tegra194-cpufreq.c
@@ -708,12 +708,10 @@ put_bpmp:
 	return err;
 }
 
-static int tegra194_cpufreq_remove(struct platform_device *pdev)
+static void tegra194_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&tegra194_cpufreq_driver);
 	tegra194_cpufreq_free_resources();
-
-	return 0;
 }
 
 static const struct of_device_id tegra194_cpufreq_of_match[] = {
@@ -730,7 +728,7 @@ static struct platform_driver tegra194_ccplex_driver = {
 		.of_match_table = tegra194_cpufreq_of_match,
 	},
 	.probe = tegra194_cpufreq_probe,
-	.remove = tegra194_cpufreq_remove,
+	.remove_new = tegra194_cpufreq_remove,
 };
 module_platform_driver(tegra194_ccplex_driver);
 

From f1154d65d12feda9ddba36bad4b91984e6a8db36 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:12 +0800
Subject: [PATCH 0356/2047] cpufreq: brcmstb-avs-cpufreq: Convert to platform
 remove callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/brcmstb-avs-cpufreq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c
index ffea6402189d..1bdd513bcd19 100644
--- a/drivers/cpufreq/brcmstb-avs-cpufreq.c
+++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c
@@ -749,13 +749,11 @@ static int brcm_avs_cpufreq_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int brcm_avs_cpufreq_remove(struct platform_device *pdev)
+static void brcm_avs_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&brcm_avs_driver);
 
 	brcm_avs_prepare_uninit(pdev);
-
-	return 0;
 }
 
 static const struct of_device_id brcm_avs_cpufreq_match[] = {
@@ -770,7 +768,7 @@ static struct platform_driver brcm_avs_cpufreq_platdrv = {
 		.of_match_table = brcm_avs_cpufreq_match,
 	},
 	.probe		= brcm_avs_cpufreq_probe,
-	.remove		= brcm_avs_cpufreq_remove,
+	.remove_new	= brcm_avs_cpufreq_remove,
 };
 module_platform_driver(brcm_avs_cpufreq_platdrv);
 

From 8d09c46a494af814e059c77f4a1a3319776594e0 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:13 +0800
Subject: [PATCH 0357/2047] cpufreq: imx-cpufreq-dt: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/imx-cpufreq-dt.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c
index 535867a7dfdd..577bb9e2f112 100644
--- a/drivers/cpufreq/imx-cpufreq-dt.c
+++ b/drivers/cpufreq/imx-cpufreq-dt.c
@@ -172,20 +172,18 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int imx_cpufreq_dt_remove(struct platform_device *pdev)
+static void imx_cpufreq_dt_remove(struct platform_device *pdev)
 {
 	platform_device_unregister(cpufreq_dt_pdev);
 	if (!of_machine_is_compatible("fsl,imx7ulp"))
 		dev_pm_opp_put_supported_hw(cpufreq_opp_token);
 	else
 		clk_bulk_put(ARRAY_SIZE(imx7ulp_clks), imx7ulp_clks);
-
-	return 0;
 }
 
 static struct platform_driver imx_cpufreq_dt_driver = {
 	.probe = imx_cpufreq_dt_probe,
-	.remove = imx_cpufreq_dt_remove,
+	.remove_new = imx_cpufreq_dt_remove,
 	.driver = {
 		.name = "imx-cpufreq-dt",
 	},

From 463ff6d4d42d5672b0835e10a8144ba1832c497f Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:14 +0800
Subject: [PATCH 0358/2047] cpufreq: davinci: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/davinci-cpufreq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c
index ebb3a8102681..7d2754411d8c 100644
--- a/drivers/cpufreq/davinci-cpufreq.c
+++ b/drivers/cpufreq/davinci-cpufreq.c
@@ -131,7 +131,7 @@ static int __init davinci_cpufreq_probe(struct platform_device *pdev)
 	return cpufreq_register_driver(&davinci_driver);
 }
 
-static int __exit davinci_cpufreq_remove(struct platform_device *pdev)
+static void __exit davinci_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&davinci_driver);
 
@@ -139,15 +139,13 @@ static int __exit davinci_cpufreq_remove(struct platform_device *pdev)
 
 	if (cpufreq.asyncclk)
 		clk_put(cpufreq.asyncclk);
-
-	return 0;
 }
 
 static struct platform_driver davinci_cpufreq_driver = {
 	.driver = {
 		.name	 = "cpufreq-davinci",
 	},
-	.remove = __exit_p(davinci_cpufreq_remove),
+	.remove_new = __exit_p(davinci_cpufreq_remove),
 };
 
 int __init davinci_cpufreq_init(void)

From 46ebd4d865f4383ce3c45dc97d581614f84a6ac2 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:15 +0800
Subject: [PATCH 0359/2047] cpufreq: raspberrypi: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/raspberrypi-cpufreq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/raspberrypi-cpufreq.c b/drivers/cpufreq/raspberrypi-cpufreq.c
index 2bc7d9734272..e0705cc9a57d 100644
--- a/drivers/cpufreq/raspberrypi-cpufreq.c
+++ b/drivers/cpufreq/raspberrypi-cpufreq.c
@@ -65,7 +65,7 @@ remove_opp:
 	return ret;
 }
 
-static int raspberrypi_cpufreq_remove(struct platform_device *pdev)
+static void raspberrypi_cpufreq_remove(struct platform_device *pdev)
 {
 	struct device *cpu_dev;
 
@@ -74,8 +74,6 @@ static int raspberrypi_cpufreq_remove(struct platform_device *pdev)
 		dev_pm_opp_remove_all_dynamic(cpu_dev);
 
 	platform_device_unregister(cpufreq_dt);
-
-	return 0;
 }
 
 /*
@@ -87,7 +85,7 @@ static struct platform_driver raspberrypi_cpufreq_driver = {
 		.name = "raspberrypi-cpufreq",
 	},
 	.probe          = raspberrypi_cpufreq_probe,
-	.remove		= raspberrypi_cpufreq_remove,
+	.remove_new	= raspberrypi_cpufreq_remove,
 };
 module_platform_driver(raspberrypi_cpufreq_driver);
 

From d0988eaaa31d29c6bff1d079b42ea6c2ef9cab5e Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:16 +0800
Subject: [PATCH 0360/2047] cpufreq: pcc-cpufreq: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/pcc-cpufreq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index 73efbcf5513b..84fe37def0f1 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -608,22 +608,20 @@ static int __init pcc_cpufreq_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int pcc_cpufreq_remove(struct platform_device *pdev)
+static void pcc_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&pcc_cpufreq_driver);
 
 	pcc_clear_mapping();
 
 	free_percpu(pcc_cpu_info);
-
-	return 0;
 }
 
 static struct platform_driver pcc_cpufreq_platdrv = {
 	.driver = {
 		.name	= "pcc-cpufreq",
 	},
-	.remove		= pcc_cpufreq_remove,
+	.remove_new	= pcc_cpufreq_remove,
 };
 
 static int __init pcc_cpufreq_init(void)

From cc35f433543f3db25da37e6a663f07f970a03caf Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:17 +0800
Subject: [PATCH 0361/2047] cpufreq: kirkwood: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/kirkwood-cpufreq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c
index 95588101efbd..fd20b986d1f2 100644
--- a/drivers/cpufreq/kirkwood-cpufreq.c
+++ b/drivers/cpufreq/kirkwood-cpufreq.c
@@ -178,20 +178,18 @@ out_node:
 	return err;
 }
 
-static int kirkwood_cpufreq_remove(struct platform_device *pdev)
+static void kirkwood_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&kirkwood_cpufreq_driver);
 
 	clk_disable_unprepare(priv.powersave_clk);
 	clk_disable_unprepare(priv.ddr_clk);
 	clk_disable_unprepare(priv.cpu_clk);
-
-	return 0;
 }
 
 static struct platform_driver kirkwood_cpufreq_platform_driver = {
 	.probe = kirkwood_cpufreq_probe,
-	.remove = kirkwood_cpufreq_remove,
+	.remove_new = kirkwood_cpufreq_remove,
 	.driver = {
 		.name = "kirkwood-cpufreq",
 	},

From 402732324b17a31f7e5dce9659d1b1f049fd65d3 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:18 +0800
Subject: [PATCH 0362/2047] cpufreq: qcom-nvmem: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/qcom-cpufreq-nvmem.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c
index 4590c2570086..84d7033e5efe 100644
--- a/drivers/cpufreq/qcom-cpufreq-nvmem.c
+++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c
@@ -333,7 +333,7 @@ free_drv:
 	return ret;
 }
 
-static int qcom_cpufreq_remove(struct platform_device *pdev)
+static void qcom_cpufreq_remove(struct platform_device *pdev)
 {
 	struct qcom_cpufreq_drv *drv = platform_get_drvdata(pdev);
 	unsigned int cpu;
@@ -345,13 +345,11 @@ static int qcom_cpufreq_remove(struct platform_device *pdev)
 
 	kfree(drv->opp_tokens);
 	kfree(drv);
-
-	return 0;
 }
 
 static struct platform_driver qcom_cpufreq_driver = {
 	.probe = qcom_cpufreq_probe,
-	.remove = qcom_cpufreq_remove,
+	.remove_new = qcom_cpufreq_remove,
 	.driver = {
 		.name = "qcom-cpufreq-nvmem",
 	},

From b196622358c2dcdd70e0984eacf7105615a245be Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:19 +0800
Subject: [PATCH 0363/2047] cpufreq: tegra186: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/tegra186-cpufreq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c
index f98f53bf1011..7b8fcfa55038 100644
--- a/drivers/cpufreq/tegra186-cpufreq.c
+++ b/drivers/cpufreq/tegra186-cpufreq.c
@@ -259,11 +259,9 @@ put_bpmp:
 	return err;
 }
 
-static int tegra186_cpufreq_remove(struct platform_device *pdev)
+static void tegra186_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&tegra186_cpufreq_driver);
-
-	return 0;
 }
 
 static const struct of_device_id tegra186_cpufreq_of_match[] = {
@@ -278,7 +276,7 @@ static struct platform_driver tegra186_cpufreq_platform_driver = {
 		.of_match_table = tegra186_cpufreq_of_match,
 	},
 	.probe = tegra186_cpufreq_probe,
-	.remove = tegra186_cpufreq_remove,
+	.remove_new = tegra186_cpufreq_remove,
 };
 module_platform_driver(tegra186_cpufreq_platform_driver);
 

From 1cd04adf97e9e7da5872d4535d25b6ec04c19d4f Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:20 +0800
Subject: [PATCH 0364/2047] cpufreq: acpi: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/acpi-cpufreq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index b2f05d27167e..37f1cdf46d29 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -1011,22 +1011,20 @@ static int __init acpi_cpufreq_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int acpi_cpufreq_remove(struct platform_device *pdev)
+static void acpi_cpufreq_remove(struct platform_device *pdev)
 {
 	pr_debug("%s\n", __func__);
 
 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
 
 	free_acpi_perf_data();
-
-	return 0;
 }
 
 static struct platform_driver acpi_cpufreq_platdrv = {
 	.driver = {
 		.name	= "acpi-cpufreq",
 	},
-	.remove		= acpi_cpufreq_remove,
+	.remove_new	= acpi_cpufreq_remove,
 };
 
 static int __init acpi_cpufreq_init(void)

From a8cf9284a794abdaadc91e409e9f233621ea07f8 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:21 +0800
Subject: [PATCH 0365/2047] cpufreq: qoriq: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/qoriq-cpufreq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index 573b417e1483..0aecaecbb0e6 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -288,11 +288,9 @@ static int qoriq_cpufreq_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int qoriq_cpufreq_remove(struct platform_device *pdev)
+static void qoriq_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&qoriq_cpufreq_driver);
-
-	return 0;
 }
 
 static struct platform_driver qoriq_cpufreq_platform_driver = {
@@ -300,7 +298,7 @@ static struct platform_driver qoriq_cpufreq_platform_driver = {
 		.name = "qoriq-cpufreq",
 	},
 	.probe = qoriq_cpufreq_probe,
-	.remove = qoriq_cpufreq_remove,
+	.remove_new = qoriq_cpufreq_remove,
 };
 module_platform_driver(qoriq_cpufreq_platform_driver);
 

From fd459406b99d4a0c641a02aaaf56b585571566cf Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Wed, 12 Jul 2023 17:33:22 +0800
Subject: [PATCH 0366/2047] cpufreq: omap: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/omap-cpufreq.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index 81649a1969b6..895690856665 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -182,11 +182,9 @@ static int omap_cpufreq_probe(struct platform_device *pdev)
 	return cpufreq_register_driver(&omap_driver);
 }
 
-static int omap_cpufreq_remove(struct platform_device *pdev)
+static void omap_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&omap_driver);
-
-	return 0;
 }
 
 static struct platform_driver omap_cpufreq_platdrv = {
@@ -194,7 +192,7 @@ static struct platform_driver omap_cpufreq_platdrv = {
 		.name	= "omap-cpufreq",
 	},
 	.probe		= omap_cpufreq_probe,
-	.remove		= omap_cpufreq_remove,
+	.remove_new	= omap_cpufreq_remove,
 };
 module_platform_driver(omap_cpufreq_platdrv);
 

From ee43f5bb23340c27603c3ad8ef94f677ad7cb9ad Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 19 Jul 2023 20:28:48 -0700
Subject: [PATCH 0367/2047] regmap: Reject fast_io regmap configurations with
 RBTREE and MAPLE caches

REGCACHE_RBTREE and REGCACHE_MAPLE dynamically allocate memory for regmap
operations. This is incompatible with spinlock based locking which is used
for fast_io operations. Reject affected configurations.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20230720032848.1306349-2-linux@roeck-us.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 8b37451fa931..6c637b18d6b0 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -723,6 +723,15 @@ struct regmap *__regmap_init(struct device *dev,
 	} else {
 		if ((bus && bus->fast_io) ||
 		    config->fast_io) {
+			/*
+			 * fast_io is incompatible with REGCACHE_RBTREE and REGCACHE_MAPLE
+			 * since both need to dynamically allocate memory.
+			 */
+			if (config->cache_type == REGCACHE_RBTREE ||
+			    config->cache_type == REGCACHE_MAPLE) {
+				ret = -EINVAL;
+				goto err_name;
+			}
 			if (config->use_raw_spinlock) {
 				raw_spin_lock_init(&map->raw_spinlock);
 				map->lock = regmap_lock_raw_spinlock;

From 1e7dae68510aa90a022fefed392fa794b16bc68b Mon Sep 17 00:00:00 2001
From: Yuanjun Gong <ruc_gongyuanjun@163.com>
Date: Thu, 20 Jul 2023 22:08:59 +0800
Subject: [PATCH 0368/2047] spi: fix return value check in bcm2835_spi_probe()

in bcm2835_spi_probe(), clk_prepare_enable() may fail, therefore,
the return value of clk_prepare_enable() should be checked, and
the function should return error if clk_prepare_enable() fails.

Signed-off-by: Yuanjun Gong <ruc_gongyuanjun@163.com>
Link: https://lore.kernel.org/r/20230720140859.33883-1-ruc_gongyuanjun@163.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm2835.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index b247ee887562..83fd062fc491 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -1363,7 +1363,9 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
 	if (bs->irq <= 0)
 		return bs->irq ? bs->irq : -ENODEV;
 
-	clk_prepare_enable(bs->clk);
+	err = clk_prepare_enable(bs->clk);
+	if (err)
+		return err;
 	bs->clk_hz = clk_get_rate(bs->clk);
 
 	err = bcm2835_dma_init(ctlr, &pdev->dev, bs);

From a90a987ebe008c941338285764e5afc0d663f54d Mon Sep 17 00:00:00 2001
From: Yuanjun Gong <ruc_gongyuanjun@163.com>
Date: Thu, 20 Jul 2023 22:09:09 +0800
Subject: [PATCH 0369/2047] spi: use devm_clk_get_enabled() in mcfqspi_probe()

in mcfqspi_probe(), the return value of function
clk_prepare_enable() should be checked, since it may fail.
using devm_clk_get_enabled() instead of devm_clk_get() and
clk_prepare_enable() can avoid this problem.

Signed-off-by: Yuanjun Gong <ruc_gongyuanjun@163.com>
Link: https://lore.kernel.org/r/20230720140909.34084-1-ruc_gongyuanjun@163.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-coldfire-qspi.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/spi/spi-coldfire-qspi.c b/drivers/spi/spi-coldfire-qspi.c
index b1bd8a6b5bf9..31174e7ca7a6 100644
--- a/drivers/spi/spi-coldfire-qspi.c
+++ b/drivers/spi/spi-coldfire-qspi.c
@@ -381,13 +381,12 @@ static int mcfqspi_probe(struct platform_device *pdev)
 		goto fail0;
 	}
 
-	mcfqspi->clk = devm_clk_get(&pdev->dev, "qspi_clk");
+	mcfqspi->clk = devm_clk_get_enabled(&pdev->dev, "qspi_clk");
 	if (IS_ERR(mcfqspi->clk)) {
 		dev_dbg(&pdev->dev, "clk_get failed\n");
 		status = PTR_ERR(mcfqspi->clk);
 		goto fail0;
 	}
-	clk_prepare_enable(mcfqspi->clk);
 
 	master->bus_num = pdata->bus_num;
 	master->num_chipselect = pdata->num_chipselect;
@@ -396,7 +395,7 @@ static int mcfqspi_probe(struct platform_device *pdev)
 	status = mcfqspi_cs_setup(mcfqspi);
 	if (status) {
 		dev_dbg(&pdev->dev, "error initializing cs_control\n");
-		goto fail1;
+		goto fail0;
 	}
 
 	init_waitqueue_head(&mcfqspi->waitq);
@@ -414,18 +413,16 @@ static int mcfqspi_probe(struct platform_device *pdev)
 	status = devm_spi_register_master(&pdev->dev, master);
 	if (status) {
 		dev_dbg(&pdev->dev, "spi_register_master failed\n");
-		goto fail2;
+		goto fail1;
 	}
 
 	dev_info(&pdev->dev, "Coldfire QSPI bus driver\n");
 
 	return 0;
 
-fail2:
+fail1:
 	pm_runtime_disable(&pdev->dev);
 	mcfqspi_cs_teardown(mcfqspi);
-fail1:
-	clk_disable_unprepare(mcfqspi->clk);
 fail0:
 	spi_master_put(master);
 

From 212892b89d50cf4fd750f999a0c1b167708d45cd Mon Sep 17 00:00:00 2001
From: Andrew Davis <afd@ti.com>
Date: Thu, 13 Jul 2023 15:20:46 -0500
Subject: [PATCH 0370/2047] gpio: pisosr: Use devm_gpiochip_add_data() to
 simplify remove path

Use devm version of gpiochip add function to handle removal for us.

While here update copyright and module author.

Signed-off-by: Andrew Davis <afd@ti.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-pisosr.c | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/drivers/gpio/gpio-pisosr.c b/drivers/gpio/gpio-pisosr.c
index 67071bea08c2..e3013e778e15 100644
--- a/drivers/gpio/gpio-pisosr.c
+++ b/drivers/gpio/gpio-pisosr.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/
- *	Andrew F. Davis <afd@ti.com>
+ * Copyright (C) 2015-2023 Texas Instruments Incorporated - https://www.ti.com/
+ *	Andrew Davis <afd@ti.com>
  */
 
 #include <linux/bitmap.h>
@@ -116,6 +116,11 @@ static const struct gpio_chip template_chip = {
 	.can_sleep		= true,
 };
 
+static void pisosr_mutex_destroy(void *lock)
+{
+	mutex_destroy(lock);
+}
+
 static int pisosr_gpio_probe(struct spi_device *spi)
 {
 	struct device *dev = &spi->dev;
@@ -126,8 +131,6 @@ static int pisosr_gpio_probe(struct spi_device *spi)
 	if (!gpio)
 		return -ENOMEM;
 
-	spi_set_drvdata(spi, gpio);
-
 	gpio->chip = template_chip;
 	gpio->chip.parent = dev;
 	of_property_read_u16(dev->of_node, "ngpios", &gpio->chip.ngpio);
@@ -145,8 +148,11 @@ static int pisosr_gpio_probe(struct spi_device *spi)
 				     "Unable to allocate load GPIO\n");
 
 	mutex_init(&gpio->lock);
+	ret = devm_add_action_or_reset(dev, pisosr_mutex_destroy, &gpio->lock);
+	if (ret)
+		return ret;
 
-	ret = gpiochip_add_data(&gpio->chip, gpio);
+	ret = devm_gpiochip_add_data(dev, &gpio->chip, gpio);
 	if (ret < 0) {
 		dev_err(dev, "Unable to register gpiochip\n");
 		return ret;
@@ -155,15 +161,6 @@ static int pisosr_gpio_probe(struct spi_device *spi)
 	return 0;
 }
 
-static void pisosr_gpio_remove(struct spi_device *spi)
-{
-	struct pisosr_gpio *gpio = spi_get_drvdata(spi);
-
-	gpiochip_remove(&gpio->chip);
-
-	mutex_destroy(&gpio->lock);
-}
-
 static const struct spi_device_id pisosr_gpio_id_table[] = {
 	{ "pisosr-gpio", },
 	{ /* sentinel */ }
@@ -182,11 +179,10 @@ static struct spi_driver pisosr_gpio_driver = {
 		.of_match_table = pisosr_gpio_of_match_table,
 	},
 	.probe = pisosr_gpio_probe,
-	.remove = pisosr_gpio_remove,
 	.id_table = pisosr_gpio_id_table,
 };
 module_spi_driver(pisosr_gpio_driver);
 
-MODULE_AUTHOR("Andrew F. Davis <afd@ti.com>");
+MODULE_AUTHOR("Andrew Davis <afd@ti.com>");
 MODULE_DESCRIPTION("SPI Compatible PISO Shift Register GPIO Driver");
 MODULE_LICENSE("GPL v2");

From 320630c45e6668c7b9b59489068cd1f85637a6ff Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Wed, 12 Jul 2023 09:45:53 +0200
Subject: [PATCH 0371/2047] dt-bindings: gpio: snps,dw-apb: allow
 gpio-line-names

Allow the GPIO controller subnode to define GPIO names.  Already used in
at least on DTS:

  bitmain/bm1880-sophon-edge.dtb: gpio@50027000: gpio-controller@0: 'gpio-line-names' does not match any of the regexes: 'pinctrl-[0-9]+'

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml b/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml
index b391cc1b4590..209f03bba0a7 100644
--- a/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml
@@ -61,6 +61,10 @@ patternProperties:
       '#gpio-cells':
         const: 2
 
+      gpio-line-names:
+        minItems: 1
+        maxItems: 32
+
       ngpios:
         default: 32
         minimum: 1

From 3e767d6850f867cc33ac16ca097350a1d2417982 Mon Sep 17 00:00:00 2001
From: Cristian Marussi <cristian.marussi@arm.com>
Date: Tue, 18 Jul 2023 11:17:26 +0100
Subject: [PATCH 0372/2047] powercap: arm_scmi: Remove recursion while parsing
 zones

Powercap zones can be defined as arranged in a hierarchy of trees and when
registering a zone with powercap_register_zone(), the kernel powercap
subsystem expects this to happen starting from the root zones down to the
leaves; on the other side, de-registration by powercap_deregister_zone()
must begin from the leaf zones.

Available SCMI powercap zones are retrieved dynamically from the platform
at probe time and, while any defined hierarchy between the zones is
described properly in the zones descriptor, the platform returns the
availables zones with no particular well-defined order: as a consequence,
the trees possibly composing the hierarchy of zones have to be somehow
walked properly to register the retrieved zones from the root.

Currently the ARM SCMI Powercap driver walks the zones using a recursive
algorithm; this approach, even though correct and tested can lead to kernel
stack overflow when processing a returned hierarchy of zones composed by
particularly high trees.

Avoid possible kernel stack overflow by substituting the recursive approach
with an iterative one supported by a dynamically allocated stack-like data
structure.

Fixes: b55eef5226b7 ("powercap: arm_scmi: Add SCMI Powercap based driver")
Signed-off-by: Cristian Marussi <cristian.marussi@arm.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/arm_scmi_powercap.c | 159 ++++++++++++++++-----------
 1 file changed, 92 insertions(+), 67 deletions(-)

diff --git a/drivers/powercap/arm_scmi_powercap.c b/drivers/powercap/arm_scmi_powercap.c
index 5231f6d52ae3..a081f177e702 100644
--- a/drivers/powercap/arm_scmi_powercap.c
+++ b/drivers/powercap/arm_scmi_powercap.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/powercap.h>
 #include <linux/scmi_protocol.h>
+#include <linux/slab.h>
 
 #define to_scmi_powercap_zone(z)		\
 	container_of(z, struct scmi_powercap_zone, zone)
@@ -19,6 +20,8 @@
 static const struct scmi_powercap_proto_ops *powercap_ops;
 
 struct scmi_powercap_zone {
+	bool registered;
+	bool invalid;
 	unsigned int height;
 	struct device *dev;
 	struct scmi_protocol_handle *ph;
@@ -32,6 +35,7 @@ struct scmi_powercap_root {
 	unsigned int num_zones;
 	struct scmi_powercap_zone *spzones;
 	struct list_head *registered_zones;
+	struct list_head scmi_zones;
 };
 
 static struct powercap_control_type *scmi_top_pcntrl;
@@ -271,12 +275,6 @@ static void scmi_powercap_unregister_all_zones(struct scmi_powercap_root *pr)
 	}
 }
 
-static inline bool
-scmi_powercap_is_zone_registered(struct scmi_powercap_zone *spz)
-{
-	return !list_empty(&spz->node);
-}
-
 static inline unsigned int
 scmi_powercap_get_zone_height(struct scmi_powercap_zone *spz)
 {
@@ -295,11 +293,46 @@ scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz)
 	return &spz->spzones[spz->info->parent_id];
 }
 
+static int scmi_powercap_register_zone(struct scmi_powercap_root *pr,
+				       struct scmi_powercap_zone *spz,
+				       struct scmi_powercap_zone *parent)
+{
+	int ret = 0;
+	struct powercap_zone *z;
+
+	if (spz->invalid) {
+		list_del(&spz->node);
+		return -EINVAL;
+	}
+
+	z = powercap_register_zone(&spz->zone, scmi_top_pcntrl, spz->info->name,
+				   parent ? &parent->zone : NULL,
+				   &zone_ops, 1, &constraint_ops);
+	if (!IS_ERR(z)) {
+		spz->height = scmi_powercap_get_zone_height(spz);
+		spz->registered = true;
+		list_move(&spz->node, &pr->registered_zones[spz->height]);
+		dev_dbg(spz->dev, "Registered node %s - parent %s - height:%d\n",
+			spz->info->name, parent ? parent->info->name : "ROOT",
+			spz->height);
+	} else {
+		list_del(&spz->node);
+		ret = PTR_ERR(z);
+		dev_err(spz->dev,
+			"Error registering node:%s - parent:%s - h:%d - ret:%d\n",
+			spz->info->name,
+			parent ? parent->info->name : "ROOT",
+			spz->height, ret);
+	}
+
+	return ret;
+}
+
 /**
- * scmi_powercap_register_zone  - Register an SCMI powercap zone recursively
+ * scmi_zones_register- Register SCMI powercap zones starting from parent zones
  *
+ * @dev: A reference to the SCMI device
  * @pr: A reference to the root powercap zones descriptors
- * @spz: A reference to the SCMI powercap zone to register
  *
  * When registering SCMI powercap zones with the powercap framework we should
  * take care to always register zones starting from the root ones and to
@@ -309,10 +342,10 @@ scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz)
  * zones provided by the SCMI platform firmware is built to comply with such
  * requirement.
  *
- * This function, given an SCMI powercap zone to register, takes care to walk
- * the SCMI powercap zones tree up to the root looking recursively for
- * unregistered parent zones before registering the provided zone; at the same
- * time each registered zone height in such a tree is accounted for and each
+ * This function, given the set of SCMI powercap zones to register, takes care
+ * to walk the SCMI powercap zones trees up to the root registering any
+ * unregistered parent zone before registering the child zones; at the same
+ * time each registered-zone height in such a tree is accounted for and each
  * zone, once registered, is stored in the @registered_zones array that is
  * indexed by zone height: this way will be trivial, at unregister time, to walk
  * the @registered_zones array backward and unregister all the zones starting
@@ -330,57 +363,55 @@ scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz)
  *
  * Return: 0 on Success
  */
-static int scmi_powercap_register_zone(struct scmi_powercap_root *pr,
-				       struct scmi_powercap_zone *spz)
+static int scmi_zones_register(struct device *dev,
+			       struct scmi_powercap_root *pr)
 {
 	int ret = 0;
-	struct scmi_powercap_zone *parent;
+	unsigned int sp = 0, reg_zones = 0;
+	struct scmi_powercap_zone *spz, **zones_stack;
 
-	if (!spz->info)
-		return ret;
+	zones_stack = kcalloc(pr->num_zones, sizeof(spz), GFP_KERNEL);
+	if (!zones_stack)
+		return -ENOMEM;
 
-	parent = scmi_powercap_get_parent_zone(spz);
-	if (parent && !scmi_powercap_is_zone_registered(parent)) {
-		/*
-		 * Bail out if a parent domain was marked as unsupported:
-		 * only domains participating as leaves can be skipped.
-		 */
-		if (!parent->info)
-			return -ENODEV;
+	spz = list_first_entry_or_null(&pr->scmi_zones,
+				       struct scmi_powercap_zone, node);
+	while (spz) {
+		struct scmi_powercap_zone *parent;
 
-		ret = scmi_powercap_register_zone(pr, parent);
-		if (ret)
-			return ret;
-	}
-
-	if (!scmi_powercap_is_zone_registered(spz)) {
-		struct powercap_zone *z;
-
-		z = powercap_register_zone(&spz->zone,
-					   scmi_top_pcntrl,
-					   spz->info->name,
-					   parent ? &parent->zone : NULL,
-					   &zone_ops, 1, &constraint_ops);
-		if (!IS_ERR(z)) {
-			spz->height = scmi_powercap_get_zone_height(spz);
-			list_add(&spz->node,
-				 &pr->registered_zones[spz->height]);
-			dev_dbg(spz->dev,
-				"Registered node %s - parent %s - height:%d\n",
-				spz->info->name,
-				parent ? parent->info->name : "ROOT",
-				spz->height);
-			ret = 0;
+		parent = scmi_powercap_get_parent_zone(spz);
+		if (parent && !parent->registered) {
+			zones_stack[sp++] = spz;
+			spz = parent;
 		} else {
-			ret = PTR_ERR(z);
-			dev_err(spz->dev,
-				"Error registering node:%s - parent:%s - h:%d - ret:%d\n",
-				 spz->info->name,
-				 parent ? parent->info->name : "ROOT",
-				 spz->height, ret);
+			ret = scmi_powercap_register_zone(pr, spz, parent);
+			if (!ret) {
+				reg_zones++;
+			} else if (sp) {
+				/* Failed to register a non-leaf zone.
+				 * Bail-out.
+				 */
+				dev_err(dev,
+					"Failed to register non-leaf zone - ret:%d\n",
+					ret);
+				scmi_powercap_unregister_all_zones(pr);
+				reg_zones = 0;
+				goto out;
+			}
+			/* Pick next zone to process */
+			if (sp)
+				spz = zones_stack[--sp];
+			else
+				spz = list_first_entry_or_null(&pr->scmi_zones,
+							       struct scmi_powercap_zone,
+							       node);
 		}
 	}
 
+out:
+	kfree(zones_stack);
+	dev_info(dev, "Registered %d SCMI Powercap domains !\n", reg_zones);
+
 	return ret;
 }
 
@@ -424,6 +455,8 @@ static int scmi_powercap_probe(struct scmi_device *sdev)
 	if (!pr->registered_zones)
 		return -ENOMEM;
 
+	INIT_LIST_HEAD(&pr->scmi_zones);
+
 	for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) {
 		/*
 		 * Powercap domains are validate by the protocol layer, i.e.
@@ -438,6 +471,7 @@ static int scmi_powercap_probe(struct scmi_device *sdev)
 		INIT_LIST_HEAD(&spz->node);
 		INIT_LIST_HEAD(&pr->registered_zones[i]);
 
+		list_add_tail(&spz->node, &pr->scmi_zones);
 		/*
 		 * Forcibly skip powercap domains using an abstract scale.
 		 * Note that only leaves domains can be skipped, so this could
@@ -448,7 +482,7 @@ static int scmi_powercap_probe(struct scmi_device *sdev)
 			dev_warn(dev,
 				 "Abstract power scale not supported. Skip %s.\n",
 				 spz->info->name);
-			spz->info = NULL;
+			spz->invalid = true;
 			continue;
 		}
 	}
@@ -457,21 +491,12 @@ static int scmi_powercap_probe(struct scmi_device *sdev)
 	 * Scan array of retrieved SCMI powercap domains and register them
 	 * recursively starting from the root domains.
 	 */
-	for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) {
-		ret = scmi_powercap_register_zone(pr, spz);
-		if (ret) {
-			dev_err(dev,
-				"Failed to register powercap zone %s - ret:%d\n",
-				spz->info->name, ret);
-			scmi_powercap_unregister_all_zones(pr);
-			return ret;
-		}
-	}
+	ret = scmi_zones_register(dev, pr);
+	if (ret)
+		return ret;
 
 	dev_set_drvdata(dev, pr);
 
-	dev_info(dev, "Registered %d SCMI Powercap domains !\n", pr->num_zones);
-
 	return ret;
 }
 

From 003e0694fcd3d51ee2d55dafb2def501f9965cbd Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 19 Jul 2023 18:02:44 +0200
Subject: [PATCH 0373/2047] ACPI: processor: Refine messages in
 acpi_early_processor_control_setup()

The source and meaning of the messages printed by
acpi_early_processor_control_setup() is unclear, so add a pr_fmt()
definition to acpi_processor.c and expand the messages to make it
clear that they are about CPUs.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Michal Wilczynski <michal.wilczynski@intel.com>
---
 drivers/acpi/acpi_processor.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 9e35b8d8d334..c711db8a9c33 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -9,6 +9,7 @@
  * Copyright (C) 2013, Intel Corporation
  *                     Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  */
+#define pr_fmt(fmt) "ACPI: " fmt
 
 #include <linux/acpi.h>
 #include <linux/device.h>
@@ -611,9 +612,9 @@ static bool __init acpi_early_processor_osc(void)
 void __init acpi_early_processor_control_setup(void)
 {
 	if (acpi_early_processor_osc()) {
-		pr_info("_OSC evaluated successfully\n");
+		pr_info("_OSC evaluated successfully for all CPUs\n");
 	} else {
-		pr_info("_OSC evaluation failed, trying _PDC\n");
+		pr_info("_OSC evaluation for CPUs failed, trying _PDC\n");
 		acpi_early_processor_set_pdc();
 	}
 }

From b0393e1fe40e962574613a5cdc4a470d6c1de023 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 20 Jul 2023 10:20:21 -0700
Subject: [PATCH 0374/2047] regmap: maple: Use alloc_flags for memory
 allocations

REGCACHE_MAPLE needs to allocate memory for regmap operations.
This results in lockdep splats if used with fast_io since fast_io uses
spinlocks for locking.

BUG: sleeping function called from invalid context at include/linux/sched/mm.h:306
in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 167, name: kunit_try_catch
preempt_count: 1, expected: 0
1 lock held by kunit_try_catch/167:
 #0: 838e9c10 (regmap_kunit:86:(config)->lock){....}-{2:2}, at: regmap_lock_spinlock+0x14/0x1c
irq event stamp: 146
hardirqs last  enabled at (145): [<8078bfa8>] crng_make_state+0x1a0/0x294
hardirqs last disabled at (146): [<80c5f62c>] _raw_spin_lock_irqsave+0x7c/0x80
softirqs last  enabled at (0): [<80110cc4>] copy_process+0x810/0x216c
softirqs last disabled at (0): [<00000000>] 0x0
CPU: 0 PID: 167 Comm: kunit_try_catch Tainted: G                 N 6.5.0-rc1-00028-gc4be22597a36-dirty #6
Hardware name: Generic DT based system
 unwind_backtrace from show_stack+0x18/0x1c
 show_stack from dump_stack_lvl+0x38/0x5c
 dump_stack_lvl from __might_resched+0x188/0x2d0
 __might_resched from __kmem_cache_alloc_node+0x1f4/0x258
 __kmem_cache_alloc_node from __kmalloc+0x48/0x170
 __kmalloc from regcache_maple_write+0x194/0x248
 regcache_maple_write from _regmap_write+0x88/0x140
 _regmap_write from regmap_write+0x44/0x68
 regmap_write from basic_read_write+0x8c/0x27c
 basic_read_write from kunit_generic_run_threadfn_adapter+0x1c/0x28
 kunit_generic_run_threadfn_adapter from kthread+0xf8/0x120
 kthread from ret_from_fork+0x14/0x3c
Exception stack(0x881a5fb0 to 0x881a5ff8)
5fa0:                                     00000000 00000000 00000000 00000000
5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
5fe0: 00000000 00000000 00000000 00000000 00000013 00000000

Use map->alloc_flags instead of GFP_KERNEL for memory allocations to fix
the problem.

Fixes: f033c26de5a5 ("regmap: Add maple tree based register cache")
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20230720172021.2617326-1-linux@roeck-us.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regcache-maple.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c
index 283c2e02a298..41edd6a430eb 100644
--- a/drivers/base/regmap/regcache-maple.c
+++ b/drivers/base/regmap/regcache-maple.c
@@ -74,7 +74,7 @@ static int regcache_maple_write(struct regmap *map, unsigned int reg,
 	rcu_read_unlock();
 
 	entry = kmalloc((last - index + 1) * sizeof(unsigned long),
-			GFP_KERNEL);
+			map->alloc_flags);
 	if (!entry)
 		return -ENOMEM;
 
@@ -92,7 +92,7 @@ static int regcache_maple_write(struct regmap *map, unsigned int reg,
 	mas_lock(&mas);
 
 	mas_set_range(&mas, index, last);
-	ret = mas_store_gfp(&mas, entry, GFP_KERNEL);
+	ret = mas_store_gfp(&mas, entry, map->alloc_flags);
 
 	mas_unlock(&mas);
 
@@ -134,7 +134,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
 
 			lower = kmemdup(entry, ((min - mas.index) *
 						sizeof(unsigned long)),
-					GFP_KERNEL);
+					map->alloc_flags);
 			if (!lower) {
 				ret = -ENOMEM;
 				goto out_unlocked;
@@ -148,7 +148,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
 			upper = kmemdup(&entry[max + 1],
 					((mas.last - max) *
 					 sizeof(unsigned long)),
-					GFP_KERNEL);
+					map->alloc_flags);
 			if (!upper) {
 				ret = -ENOMEM;
 				goto out_unlocked;
@@ -162,7 +162,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
 		/* Insert new nodes with the saved data */
 		if (lower) {
 			mas_set_range(&mas, lower_index, lower_last);
-			ret = mas_store_gfp(&mas, lower, GFP_KERNEL);
+			ret = mas_store_gfp(&mas, lower, map->alloc_flags);
 			if (ret != 0)
 				goto out;
 			lower = NULL;
@@ -170,7 +170,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
 
 		if (upper) {
 			mas_set_range(&mas, upper_index, upper_last);
-			ret = mas_store_gfp(&mas, upper, GFP_KERNEL);
+			ret = mas_store_gfp(&mas, upper, map->alloc_flags);
 			if (ret != 0)
 				goto out;
 			upper = NULL;
@@ -320,7 +320,7 @@ static int regcache_maple_insert_block(struct regmap *map, int first,
 	unsigned long *entry;
 	int i, ret;
 
-	entry = kcalloc(last - first + 1, sizeof(unsigned long), GFP_KERNEL);
+	entry = kcalloc(last - first + 1, sizeof(unsigned long), map->alloc_flags);
 	if (!entry)
 		return -ENOMEM;
 
@@ -331,7 +331,7 @@ static int regcache_maple_insert_block(struct regmap *map, int first,
 
 	mas_set_range(&mas, map->reg_defaults[first].reg,
 		      map->reg_defaults[last].reg);
-	ret = mas_store_gfp(&mas, entry, GFP_KERNEL);
+	ret = mas_store_gfp(&mas, entry, map->alloc_flags);
 
 	mas_unlock(&mas);
 

From db5dc3502dfac007088ab823a267d9cd7e2c5604 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 10 Jul 2023 17:34:45 -0700
Subject: [PATCH 0375/2047] torture: Add srcu_lockdep.sh to torture.sh

This commit adds srcu_lockdep.sh to torture.sh, thus exercizing the
extended SRCU-aware lockdep-RCU functionality on a regular basis.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/bin/torture.sh       | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index d36b839bf586..bc63344ce4cd 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -56,6 +56,7 @@ do_kcsan=no
 do_clocksourcewd=yes
 do_rt=yes
 do_rcutasksflavors=yes
+do_srcu_lockdep=yes
 
 # doyesno - Helper function for yes/no arguments
 function doyesno () {
@@ -87,6 +88,7 @@ usage () {
 	echo "       --do-refscale / --do-no-refscale / --no-refscale"
 	echo "       --do-rt / --do-no-rt / --no-rt"
 	echo "       --do-scftorture / --do-no-scftorture / --no-scftorture"
+	echo "       --do-srcu-lockdep / --do-no-srcu-lockdep / --no-srcu-lockdep"
 	echo "       --duration [ <minutes> | <hours>h | <days>d ]"
 	echo "       --kcsan-kmake-arg kernel-make-arguments"
 	exit 1
@@ -128,6 +130,7 @@ do
 		do_kasan=yes
 		do_kcsan=yes
 		do_clocksourcewd=yes
+		do_srcu_lockdep=yes
 		;;
 	--do-allmodconfig|--do-no-allmodconfig|--no-allmodconfig)
 		do_allmodconfig=`doyesno "$1" --do-allmodconfig`
@@ -160,6 +163,7 @@ do
 		do_kasan=no
 		do_kcsan=no
 		do_clocksourcewd=no
+		do_srcu_lockdep=no
 		;;
 	--do-rcuscale|--do-no-rcuscale|--no-rcuscale)
 		do_rcuscale=`doyesno "$1" --do-rcuscale`
@@ -179,6 +183,9 @@ do
 	--do-scftorture|--do-no-scftorture|--no-scftorture)
 		do_scftorture=`doyesno "$1" --do-scftorture`
 		;;
+	--do-srcu-lockdep|--do-no-srcu-lockdep|--no-srcu-lockdep)
+		do_srcu_lockdep=`doyesno "$1" --do-srcu-lockdep`
+		;;
 	--duration)
 		checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(m\|h\|d\|\)$' '^error'
 		mult=1
@@ -432,6 +439,23 @@ then
 	torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
 fi
 
+if test "$do_srcu_lockdep" = "yes"
+then
+	echo " --- do-srcu-lockdep:" Start `date` | tee -a $T/log
+	tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh --datestamp "$ds/results-srcu-lockdep" > $T/srcu_lockdep.sh.out 2>&1
+	retcode=$?
+	cp $T/srcu_lockdep.sh.out "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log"
+	if test "$retcode" -eq 0
+	then
+		echo "srcu_lockdep($retcode)" "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep" >> $T/successes
+		echo Success >> "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log"
+	else
+		echo "srcu_lockdep($retcode)" "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep" >> $T/failures
+		echo " --- srcu_lockdep Test Summary:" >> "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log"
+		echo " --- Summary: Exit code $retcode from srcu_lockdep.sh, see ds/results-srcu-lockdep" >> "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log"
+	fi
+fi
+
 if test "$do_refscale" = yes
 then
 	primlist="`grep '\.name[ 	]*=' kernel/rcu/refscale.c | sed -e 's/^[^"]*"//' -e 's/".*$//'`"

From 10f84c2cfb5045e37d78cb5d4c8e8321e06ae18f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 11 Jul 2023 11:02:09 -0700
Subject: [PATCH 0376/2047] torture: Avoid torture-test reboot loops

Currently, the various torture tests sometimes react to an early-boot
bug by rebooting.  This is almost always counterproductive, needlessly
consuming CPU time and bloating the console log.  This commit therefore
adds the "-no-reboot" argument to qemu so that reboot requests will
cause qemu to exit.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 1e7566997e51..3bc0140438ef 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -9,9 +9,10 @@
 #
 # Usage: kvm-test-1-run.sh config resdir seconds qemu-args boot_args_in
 #
-# qemu-args defaults to "-enable-kvm -nographic", along with arguments
-#			specifying the number of CPUs and other options
-#			generated from the underlying CPU architecture.
+# qemu-args defaults to "-enable-kvm -nographic -no-reboot", along with
+#			arguments specifying the number of CPUs and
+#			other options generated from the underlying
+#			CPU architecture.
 # boot_args_in defaults to value returned by the per_version_boot_params
 #			shell function.
 #
@@ -139,7 +140,7 @@ then
 fi
 
 # Generate -smp qemu argument.
-qemu_args="-enable-kvm -nographic $qemu_args"
+qemu_args="-enable-kvm -nographic -no-reboot $qemu_args"
 cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment`
 cpu_count=`configfrag_boot_cpus "$boot_args_in" "$config_template" "$cpu_count"`
 if test "$cpu_count" -gt "$TORTURE_ALLOTED_CPUS"

From ecf671cf76b026dc27e6febb31a87b6b15d2a7f7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 13 Jul 2023 16:38:14 -0700
Subject: [PATCH 0377/2047] torture: Add init-program support for loongarch

This commit adds the __loongarch__, __loongarch_lp64, and
__loongarch_double_float targets to rcutorture's mkinitrd.sh
script in order to allow nolibc init programs for loongarch.

[ paulmck: Apply feedback from Feiyang Chen. ]

Cc: Feiyang Chen <chenfeiyang@loongson.cn>
Cc: Huacai Chen <chenhuacai@loongson.cn>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/mkinitrd.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
index 3af3a86a0091..435ae6db5cc5 100755
--- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
+++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
@@ -63,7 +63,7 @@ ___EOF___
 # build using nolibc on supported archs (smaller executable) and fall
 # back to regular glibc on other ones.
 if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \
-           "||__ARM_EABI__||__aarch64__||__s390x__\nyes\n#endif" \
+	   "||__ARM_EABI__||__aarch64__||__s390x__||__loongarch__\nyes\n#endif" \
    | ${CROSS_COMPILE}gcc -E -nostdlib -xc - \
    | grep -q '^yes'; then
 	# architecture supported by nolibc

From 93a556b8b3c8cae25230fac8f1376284a697c489 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 13 Jul 2023 17:39:02 -0700
Subject: [PATCH 0378/2047] torture: Switch qemu from -nographic to -display
 none

This commit switches the qemu argument "-nographic" to "-display none",
aligning with the nolibc tests.

Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 3bc0140438ef..b33cd8753689 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -9,9 +9,9 @@
 #
 # Usage: kvm-test-1-run.sh config resdir seconds qemu-args boot_args_in
 #
-# qemu-args defaults to "-enable-kvm -nographic -no-reboot", along with
-#			arguments specifying the number of CPUs and
-#			other options generated from the underlying
+# qemu-args defaults to "-enable-kvm -display none -no-reboot", along
+#			with arguments specifying the number of CPUs
+#			and other options generated from the underlying
 #			CPU architecture.
 # boot_args_in defaults to value returned by the per_version_boot_params
 #			shell function.
@@ -140,7 +140,7 @@ then
 fi
 
 # Generate -smp qemu argument.
-qemu_args="-enable-kvm -nographic -no-reboot $qemu_args"
+qemu_args="-enable-kvm -display none -no-reboot $qemu_args"
 cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment`
 cpu_count=`configfrag_boot_cpus "$boot_args_in" "$config_template" "$cpu_count"`
 if test "$cpu_count" -gt "$TORTURE_ALLOTED_CPUS"

From 451d2a52f652446f002f289431c171c2064a06fb Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 13 Jul 2023 18:22:07 -0700
Subject: [PATCH 0379/2047] torture: Make init program dump command-line
 arguments

This commit causes the init program generated by mkinitrd.sh dump out
its parameters.  Although this is in some sense redundant given that
the kernel already dumps them out, confirmation can be a good thing.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/mkinitrd.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
index 435ae6db5cc5..558f5a669dbd 100755
--- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
+++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
@@ -33,12 +33,16 @@ cat > init.c << '___EOF___'
 
 volatile unsigned long delaycount;
 
-int main(int argc, int argv[])
+int main(int argc, char *argv[])
 {
 	int i;
 	struct timeval tv;
 	struct timeval tvb;
 
+	printf("Torture-test rudimentary init program started, command line:\n");
+	for (i = 0; i < argc; i++)
+		printf(" %s", argv[i]);
+	printf("\n");
 	for (;;) {
 		sleep(1);
 		/* Need some userspace time. */

From 7c25ee819f39a6c84d5fbe87fee4a7f78dd204a1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 13 Jul 2023 18:29:36 -0700
Subject: [PATCH 0380/2047] torture: Cause mkinitrd.sh to indicate failure on
 compile errors

Currently, if the C program created by mkinitrd.sh has compile errors,
the errors are printed, but kvm.sh soldiers on, building kernels that
have init-less initrd setups.  The kernels then fail on boot when they
attempt to mount non-existent root filesystems.

This commit therefore improves user friendliness by making mkinitrd.sh
return non-zero exit status on compile errors, which in turn causes kvm.sh
to take an early exit, with the compile errors still clearly visible.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/mkinitrd.sh | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
index 558f5a669dbd..212c52ca90b5 100755
--- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
+++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
@@ -74,8 +74,16 @@ if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \
         ${CROSS_COMPILE}gcc -fno-asynchronous-unwind-tables -fno-ident \
 		-nostdlib -include ../../../../include/nolibc/nolibc.h \
 		-s -static -Os -o init init.c -lgcc
+	ret=$?
 else
 	${CROSS_COMPILE}gcc -s -static -Os -o init init.c
+	ret=$?
+fi
+
+if [ "$ret" -ne 0 ]
+then
+	echo "Failed to create a statically linked C-language initrd"
+	exit "$ret"
 fi
 
 rm init.c

From 754833b3194c30dad5af0145e25192a8e29521ab Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 21 Jul 2023 13:34:54 +0530
Subject: [PATCH 0381/2047] OPP: Rearrange entries in pm_opp.h

Rearrange the helper function declarations / definitions to keep them in
order of freq, level and then bw.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/pm_opp.h | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index dc1fb5890792..3821f50b9b89 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -121,17 +121,19 @@ unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev);
 struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					      unsigned long freq,
 					      bool available);
+
 struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 					      unsigned long *freq);
 
-struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
-					       unsigned int level);
-struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev,
-					      unsigned int *level);
-
 struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 					     unsigned long *freq);
 
+struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
+					       unsigned int level);
+
+struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev,
+					      unsigned int *level);
+
 struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev,
 					   unsigned int *bw, int index);
 
@@ -247,18 +249,6 @@ static inline unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev)
 	return 0;
 }
 
-static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
-					unsigned int level)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev,
-					unsigned int *level)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					unsigned long freq, bool available)
 {
@@ -277,6 +267,18 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
+					unsigned int level)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev,
+					unsigned int *level)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev,
 					unsigned int *bw, int index)
 {

From 142e17c1c2b48e3fb4f024e62ab6dee18f268694 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Thu, 20 Jul 2023 11:10:52 +0530
Subject: [PATCH 0382/2047] OPP: Introduce
 dev_pm_opp_find_freq_{ceil/floor}_indexed() APIs

In the case of devices with multiple clocks, drivers need to specify the
clock index for the OPP framework to find the OPP corresponding to the
floor/ceil of the supplied frequency. So let's introduce the two new APIs
accepting the clock index as an argument.

These APIs use the exising _find_key_ceil() helper by supplying the clock
index to it.

Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
[ Viresh: Rearranged definitions in pm_opp.h ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c     | 56 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm_opp.h | 18 ++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 3f46e499d615..cb4f47b341f9 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -658,6 +658,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil);
 
+/**
+ * dev_pm_opp_find_freq_ceil_indexed() - Search for a rounded ceil freq for the
+ *					 clock corresponding to the index
+ * @dev:	Device for which we do this operation
+ * @freq:	Start frequency
+ * @index:	Clock index
+ *
+ * Search for the matching ceil *available* OPP for the clock corresponding to
+ * the specified index from a starting freq for a device.
+ *
+ * Return: matching *opp and refreshes *freq accordingly, else returns
+ * ERR_PTR in case of error and should be handled using IS_ERR. Error return
+ * values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
+ *
+ * The callers are required to call dev_pm_opp_put() for the returned OPP after
+ * use.
+ */
+struct dev_pm_opp *
+dev_pm_opp_find_freq_ceil_indexed(struct device *dev, unsigned long *freq,
+				  u32 index)
+{
+	return _find_key_ceil(dev, freq, index, true, _read_freq, NULL);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil_indexed);
+
 /**
  * dev_pm_opp_find_freq_floor() - Search for a rounded floor freq
  * @dev:	device for which we do this operation
@@ -683,6 +711,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 
+/**
+ * dev_pm_opp_find_freq_floor_indexed() - Search for a rounded floor freq for the
+ *					  clock corresponding to the index
+ * @dev:	Device for which we do this operation
+ * @freq:	Start frequency
+ * @index:	Clock index
+ *
+ * Search for the matching floor *available* OPP for the clock corresponding to
+ * the specified index from a starting freq for a device.
+ *
+ * Return: matching *opp and refreshes *freq accordingly, else returns
+ * ERR_PTR in case of error and should be handled using IS_ERR. Error return
+ * values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
+ *
+ * The callers are required to call dev_pm_opp_put() for the returned OPP after
+ * use.
+ */
+struct dev_pm_opp *
+dev_pm_opp_find_freq_floor_indexed(struct device *dev, unsigned long *freq,
+				   u32 index)
+{
+	return _find_key_floor(dev, freq, index, true, _read_freq, NULL);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor_indexed);
+
 /**
  * dev_pm_opp_find_level_exact() - search for an exact level
  * @dev:		device for which we do this operation
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 3821f50b9b89..2617f2c51f29 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -125,9 +125,15 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 					      unsigned long *freq);
 
+struct dev_pm_opp *dev_pm_opp_find_freq_floor_indexed(struct device *dev,
+						      unsigned long *freq, u32 index);
+
 struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 					     unsigned long *freq);
 
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil_indexed(struct device *dev,
+						     unsigned long *freq, u32 index);
+
 struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
 					       unsigned int level);
 
@@ -261,12 +267,24 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline struct dev_pm_opp *
+dev_pm_opp_find_freq_floor_indexed(struct device *dev, unsigned long *freq, u32 index)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 					unsigned long *freq)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline struct dev_pm_opp *
+dev_pm_opp_find_freq_ceil_indexed(struct device *dev, unsigned long *freq, u32 index)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
 					unsigned int level)
 {

From 813ebba3b100997a24984040673d35cb2dc9f418 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 21 Jul 2023 09:33:03 +0200
Subject: [PATCH 0383/2047] regulator: max8893: Drop "_new" from probe callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver was introduced when .probe_new was the right probe callback
to use for i2c drivers. Today .probe is the right one (again) and the
driver was already switched in commit 964e186547b2 ("regulator: Switch
i2c drivers back to use .probe()") but the name continued to include
"_new". To prevent code readers wondering about what might be new here,
drop that part of the name.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20230721073303.112597-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max8893.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/max8893.c b/drivers/regulator/max8893.c
index cb0e72948dd4..30592425e193 100644
--- a/drivers/regulator/max8893.c
+++ b/drivers/regulator/max8893.c
@@ -125,7 +125,7 @@ static const struct regmap_config max8893_regmap = {
 	.val_bits = 8,
 };
 
-static int max8893_probe_new(struct i2c_client *i2c)
+static int max8893_probe(struct i2c_client *i2c)
 {
 	int id, ret;
 	struct regulator_config config = {.dev = &i2c->dev};
@@ -168,7 +168,7 @@ static const struct i2c_device_id max8893_ids[] = {
 MODULE_DEVICE_TABLE(i2c, max8893_ids);
 
 static struct i2c_driver max8893_driver = {
-	.probe		= max8893_probe_new,
+	.probe		= max8893_probe,
 	.driver		= {
 		.name	= "max8893",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From 4fdef8553df58953f572f1cb46d357c735c683a9 Mon Sep 17 00:00:00 2001
From: Rohit Agarwal <quic_rohiagar@quicinc.com>
Date: Tue, 11 Jul 2023 13:30:12 +0530
Subject: [PATCH 0384/2047] regulator: dt-bindings: qcom,rpmh: Update PMX65
 entry

PMX65 does not have vdd-l18-supply separately and its
already part of vdd-l2-l18-supply property specified.
Updating this PMX65 dt-bindings entry accordingly.

Signed-off-by: Rohit Agarwal <quic_rohiagar@quicinc.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/1689062414-3654-2-git-send-email-quic_rohiagar@quicinc.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/qcom,rpmh-regulator.yaml      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
index b9498504ad79..12a1e0c35ab2 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
@@ -424,7 +424,7 @@ allOf:
         vdd-l11-l13-supply: true
       patternProperties:
         "^vdd-l[1347]-supply$": true
-        "^vdd-l1[0245789]-supply$": true
+        "^vdd-l1[024579]-supply$": true
         "^vdd-l2[01]-supply$": true
         "^vdd-s[1-8]-supply$": true
 

From 0ef3d931632e3fce51ed5510935238937d644c97 Mon Sep 17 00:00:00 2001
From: Rohit Agarwal <quic_rohiagar@quicinc.com>
Date: Tue, 11 Jul 2023 13:30:13 +0530
Subject: [PATCH 0385/2047] regulator: dt-bindings: qcom,rpmh: Add PMX75
 compatible

Add PMX75 compatibles for PMIC found in SDX75 platform.

Signed-off-by: Rohit Agarwal <quic_rohiagar@quicinc.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/1689062414-3654-3-git-send-email-quic_rohiagar@quicinc.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../regulator/qcom,rpmh-regulator.yaml        | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
index 12a1e0c35ab2..72b533c3761a 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
@@ -53,6 +53,7 @@ description: |
       For PMR735A, smps1 - smps3, ldo1 - ldo7
       For PMX55, smps1 - smps7, ldo1 - ldo16
       For PMX65, smps1 - smps8, ldo1 - ldo21
+      For PMX75, smps1 - smps10, ldo1 - ldo21
 
 properties:
   compatible:
@@ -84,6 +85,7 @@ properties:
       - qcom,pmr735a-rpmh-regulators
       - qcom,pmx55-rpmh-regulators
       - qcom,pmx65-rpmh-regulators
+      - qcom,pmx75-rpmh-regulators
 
   qcom,pmic-id:
     description: |
@@ -428,6 +430,24 @@ allOf:
         "^vdd-l2[01]-supply$": true
         "^vdd-s[1-8]-supply$": true
 
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,pmx75-rpmh-regulators
+    then:
+      properties:
+        vdd-l2-l18-supply: true
+        vdd-l4-l16-supply: true
+        vdd-l5-l6-supply: true
+        vdd-l8-l9-supply: true
+        vdd-l11-l13-supply: true
+        vdd-l20-l21-supply: true
+      patternProperties:
+        "^vdd-l[137]-supply$": true
+        "^vdd-l1[024579]-supply$": true
+        "^vdd-s([1-9]|10)-supply$": true
+
 unevaluatedProperties: false
 
 examples:

From 0b294ed669ead34a348d17d06b6d4d58712b14e2 Mon Sep 17 00:00:00 2001
From: Rohit Agarwal <quic_rohiagar@quicinc.com>
Date: Tue, 11 Jul 2023 13:30:14 +0530
Subject: [PATCH 0386/2047] regulator: qcom-rpmh: Add regulators support for
 PMX75

Add support from RPMH regulators found in PMX75 for SDX75 platform.

Signed-off-by: Rohit Agarwal <quic_rohiagar@quicinc.com>
Link: https://lore.kernel.org/r/1689062414-3654-4-git-send-email-quic_rohiagar@quicinc.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/qcom-rpmh-regulator.c | 38 +++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index ec1b50721ec6..88ddd6c54d04 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -1272,6 +1272,40 @@ static const struct rpmh_vreg_init_data pmx65_vreg_data[] = {
 	{}
 };
 
+static const struct rpmh_vreg_init_data pmx75_vreg_data[] = {
+	RPMH_VREG("smps1",   "smp%s1",    &pmic5_ftsmps525_lv, "vdd-s1"),
+	RPMH_VREG("smps2",   "smp%s2",    &pmic5_ftsmps525_lv, "vdd-s2"),
+	RPMH_VREG("smps3",   "smp%s3",    &pmic5_ftsmps525_lv, "vdd-s3"),
+	RPMH_VREG("smps4",   "smp%s4",    &pmic5_ftsmps525_mv, "vdd-s4"),
+	RPMH_VREG("smps5",   "smp%s5",    &pmic5_ftsmps525_lv, "vdd-s5"),
+	RPMH_VREG("smps6",   "smp%s6",    &pmic5_ftsmps525_lv, "vdd-s6"),
+	RPMH_VREG("smps7",   "smp%s7",    &pmic5_ftsmps525_lv, "vdd-s7"),
+	RPMH_VREG("smps8",   "smp%s8",    &pmic5_ftsmps525_lv, "vdd-s8"),
+	RPMH_VREG("smps9",   "smp%s9",    &pmic5_ftsmps525_lv, "vdd-s9"),
+	RPMH_VREG("smps10",  "smp%s10",   &pmic5_ftsmps525_lv, "vdd-s10"),
+	RPMH_VREG("ldo1",    "ldo%s1",    &pmic5_nldo515,   "vdd-l1"),
+	RPMH_VREG("ldo2",    "ldo%s2",    &pmic5_nldo515,   "vdd-l2-18"),
+	RPMH_VREG("ldo3",    "ldo%s3",    &pmic5_nldo515,   "vdd-l3"),
+	RPMH_VREG("ldo4",    "ldo%s4",    &pmic5_nldo515,   "vdd-l4-l16"),
+	RPMH_VREG("ldo5",    "ldo%s5",    &pmic5_pldo_lv,   "vdd-l5-l6"),
+	RPMH_VREG("ldo6",    "ldo%s6",    &pmic5_pldo_lv,   "vdd-l5-l6"),
+	RPMH_VREG("ldo7",    "ldo%s7",    &pmic5_nldo515,   "vdd-l7"),
+	RPMH_VREG("ldo8",    "ldo%s8",    &pmic5_nldo515,   "vdd-l8-l9"),
+	RPMH_VREG("ldo9",    "ldo%s9",    &pmic5_nldo515,   "vdd-l8-l9"),
+	RPMH_VREG("ldo10",   "ldo%s10",   &pmic5_pldo,      "vdd-l10"),
+	RPMH_VREG("ldo11",   "ldo%s11",   &pmic5_pldo,      "vdd-l11-l13"),
+	RPMH_VREG("ldo12",   "ldo%s12",   &pmic5_nldo515,   "vdd-l12"),
+	RPMH_VREG("ldo13",   "ldo%s13",   &pmic5_pldo,      "vdd-l11-l13"),
+	RPMH_VREG("ldo14",   "ldo%s14",   &pmic5_nldo515,   "vdd-l14"),
+	RPMH_VREG("ldo15",   "ldo%s15",   &pmic5_nldo515,   "vdd-l15"),
+	RPMH_VREG("ldo16",   "ldo%s16",   &pmic5_nldo515,   "vdd-l4-l16"),
+	RPMH_VREG("ldo17",   "ldo%s17",   &pmic5_nldo515,   "vdd-l17"),
+	/* ldo18 not configured */
+	RPMH_VREG("ldo19",   "ldo%s19",   &pmic5_nldo515,   "vdd-l19"),
+	RPMH_VREG("ldo20",   "ldo%s20",   &pmic5_nldo515,   "vdd-l20-l21"),
+	RPMH_VREG("ldo21",   "ldo%s21",   &pmic5_nldo515,   "vdd-l20-l21"),
+};
+
 static const struct rpmh_vreg_init_data pm7325_vreg_data[] = {
 	RPMH_VREG("smps1",  "smp%s1",  &pmic5_hfsmps510, "vdd-s1"),
 	RPMH_VREG("smps2",  "smp%s2",  &pmic5_ftsmps520, "vdd-s2"),
@@ -1493,6 +1527,10 @@ static const struct of_device_id __maybe_unused rpmh_regulator_match_table[] = {
 		.compatible = "qcom,pmx65-rpmh-regulators",
 		.data = pmx65_vreg_data,
 	},
+	{
+		.compatible = "qcom,pmx75-rpmh-regulators",
+		.data = pmx75_vreg_data,
+	},
 	{
 		.compatible = "qcom,pm7325-rpmh-regulators",
 		.data = pm7325_vreg_data,

From 0c8b0bf42c8cef56f7cd9cd876fbb7ece9217064 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Fri, 21 Jul 2023 17:55:33 +0300
Subject: [PATCH 0387/2047] regmap: rbtree: Use alloc_flags for memory
 allocations

The kunit tests discovered a sleeping in atomic bug.  The allocations
in the regcache-rbtree code should use the map->alloc_flags instead of
GFP_KERNEL.

[    5.005510] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:306
[    5.005960] in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 117, name: kunit_try_catch
[    5.006219] preempt_count: 1, expected: 0
[    5.006414] 1 lock held by kunit_try_catch/117:
[    5.006590]  #0: 833b9010 (regmap_kunit:86:(config)->lock){....}-{2:2}, at: regmap_lock_spinlock+0x14/0x1c
[    5.007493] irq event stamp: 162
[    5.007627] hardirqs last  enabled at (161): [<80786738>] crng_make_state+0x1a0/0x294
[    5.007871] hardirqs last disabled at (162): [<80c531ec>] _raw_spin_lock_irqsave+0x7c/0x80
[    5.008119] softirqs last  enabled at (0): [<801110ac>] copy_process+0x810/0x2138
[    5.008356] softirqs last disabled at (0): [<00000000>] 0x0
[    5.008688] CPU: 0 PID: 117 Comm: kunit_try_catch Tainted: G                 N 6.4.4-rc3-g0e8d2fdfb188 #1
[    5.009011] Hardware name: Generic DT based system
[    5.009277]  unwind_backtrace from show_stack+0x18/0x1c
[    5.009497]  show_stack from dump_stack_lvl+0x38/0x5c
[    5.009676]  dump_stack_lvl from __might_resched+0x188/0x2d0
[    5.009860]  __might_resched from __kmem_cache_alloc_node+0x1dc/0x25c
[    5.010061]  __kmem_cache_alloc_node from kmalloc_trace+0x30/0xc8
[    5.010254]  kmalloc_trace from regcache_rbtree_write+0x26c/0x468
[    5.010446]  regcache_rbtree_write from _regmap_write+0x88/0x140
[    5.010634]  _regmap_write from regmap_write+0x44/0x68
[    5.010803]  regmap_write from basic_read_write+0x8c/0x270
[    5.010980]  basic_read_write from kunit_try_run_case+0x48/0xa0

Fixes: 28644c809f44 ("regmap: Add the rbtree cache support")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Closes: https://lore.kernel.org/all/ee59d128-413c-48ad-a3aa-d9d350c80042@roeck-us.net/
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/58f12a07-5f4b-4a8f-ab84-0a42d1908cb9@moroto.mountain
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regcache-rbtree.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c
index fabf87058d80..ae6b8788d5f3 100644
--- a/drivers/base/regmap/regcache-rbtree.c
+++ b/drivers/base/regmap/regcache-rbtree.c
@@ -277,7 +277,7 @@ static int regcache_rbtree_insert_to_block(struct regmap *map,
 
 	blk = krealloc(rbnode->block,
 		       blklen * map->cache_word_size,
-		       GFP_KERNEL);
+		       map->alloc_flags);
 	if (!blk)
 		return -ENOMEM;
 
@@ -286,7 +286,7 @@ static int regcache_rbtree_insert_to_block(struct regmap *map,
 	if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) {
 		present = krealloc(rbnode->cache_present,
 				   BITS_TO_LONGS(blklen) * sizeof(*present),
-				   GFP_KERNEL);
+				   map->alloc_flags);
 		if (!present)
 			return -ENOMEM;
 
@@ -320,7 +320,7 @@ regcache_rbtree_node_alloc(struct regmap *map, unsigned int reg)
 	const struct regmap_range *range;
 	int i;
 
-	rbnode = kzalloc(sizeof(*rbnode), GFP_KERNEL);
+	rbnode = kzalloc(sizeof(*rbnode), map->alloc_flags);
 	if (!rbnode)
 		return NULL;
 
@@ -346,13 +346,13 @@ regcache_rbtree_node_alloc(struct regmap *map, unsigned int reg)
 	}
 
 	rbnode->block = kmalloc_array(rbnode->blklen, map->cache_word_size,
-				      GFP_KERNEL);
+				      map->alloc_flags);
 	if (!rbnode->block)
 		goto err_free;
 
 	rbnode->cache_present = kcalloc(BITS_TO_LONGS(rbnode->blklen),
 					sizeof(*rbnode->cache_present),
-					GFP_KERNEL);
+					map->alloc_flags);
 	if (!rbnode->cache_present)
 		goto err_free_block;
 

From e02a4ccbeced64aa10f4e99683c721ec43e993c1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 21 Jul 2023 17:31:36 +0100
Subject: [PATCH 0388/2047] regmap: Remove dynamic allocation warnings for
 rbtree and maple

Thanks to Dan and Guenter's very prompt updates of the rbtree and maple
caches to support GPF_ATOMIC allocations and since the update shook out
a bunch of users at least some of whom have been suitably careful about
ensuring that the cache is prepoulated so there are no dynamic
allocations after init let's revert the warnings.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230721-regmap-enable-kmalloc-v1-1-f78287e794d3@kernel.org
---
 drivers/base/regmap/regmap.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 6c637b18d6b0..8b37451fa931 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -723,15 +723,6 @@ struct regmap *__regmap_init(struct device *dev,
 	} else {
 		if ((bus && bus->fast_io) ||
 		    config->fast_io) {
-			/*
-			 * fast_io is incompatible with REGCACHE_RBTREE and REGCACHE_MAPLE
-			 * since both need to dynamically allocate memory.
-			 */
-			if (config->cache_type == REGCACHE_RBTREE ||
-			    config->cache_type == REGCACHE_MAPLE) {
-				ret = -EINVAL;
-				goto err_name;
-			}
 			if (config->use_raw_spinlock) {
 				raw_spin_lock_init(&map->raw_spinlock);
 				map->lock = regmap_lock_raw_spinlock;

From c3629dd7e67d6ec5705d33b0de0d142c972fe573 Mon Sep 17 00:00:00 2001
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Date: Wed, 19 Jul 2023 14:19:50 +0200
Subject: [PATCH 0389/2047] x86/mce: Prevent duplicate error records

A legitimate use case of the MCA infrastructure is to have the firmware
log all uncorrectable errors and also, have the OS see all correctable
errors.

The uncorrectable, UCNA errors are usually configured to be reported
through an SMI. CMCI, which is the correctable error reporting
interrupt, uses SMI too and having both enabled, leads to unnecessary
overhead.

So what ends up happening is, people disable CMCI in the wild and leave
on only the UCNA SMI.

When CMCI is disabled, the MCA infrastructure resorts to polling the MCA
banks. If a MCA MSR is shared between the logical threads, one error
ends up getting logged multiple times as the polling runs on every
logical thread.

Therefore, introduce locking on the Intel side of the polling routine to
prevent such duplicate error records from appearing.

Based on a patch by Aristeu Rozanski <aris@ruivo.org>.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@ruivo.org>
Link: https://lore.kernel.org/r/20230515143225.GC4090740@cathedrallabs.org
---
 arch/x86/kernel/cpu/mce/core.c     |  9 ++++++++-
 arch/x86/kernel/cpu/mce/intel.c    | 19 ++++++++++++++++++-
 arch/x86/kernel/cpu/mce/internal.h |  1 +
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 89e2aab5d34d..b8ad5a5b4026 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1608,6 +1608,13 @@ static void __start_timer(struct timer_list *t, unsigned long interval)
 	local_irq_restore(flags);
 }
 
+static void mc_poll_banks_default(void)
+{
+	machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+}
+
+void (*mc_poll_banks)(void) = mc_poll_banks_default;
+
 static void mce_timer_fn(struct timer_list *t)
 {
 	struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
@@ -1618,7 +1625,7 @@ static void mce_timer_fn(struct timer_list *t)
 	iv = __this_cpu_read(mce_next_interval);
 
 	if (mce_available(this_cpu_ptr(&cpu_info))) {
-		machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+		mc_poll_banks();
 
 		if (mce_intel_cmci_poll()) {
 			iv = mce_adjust_timer(iv);
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 95275a5e57e0..f5323551c1a9 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -56,6 +56,13 @@ static DEFINE_PER_CPU(int, cmci_backoff_cnt);
  */
 static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
 
+/*
+ * On systems that do support CMCI but it's disabled, polling for MCEs can
+ * cause the same event to be reported multiple times because IA32_MCi_STATUS
+ * is shared by the same package.
+ */
+static DEFINE_SPINLOCK(cmci_poll_lock);
+
 #define CMCI_THRESHOLD		1
 #define CMCI_POLL_INTERVAL	(30 * HZ)
 #define CMCI_STORM_INTERVAL	(HZ)
@@ -426,12 +433,22 @@ void cmci_disable_bank(int bank)
 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 }
 
+/* Bank polling function when CMCI is disabled. */
+static void cmci_mc_poll_banks(void)
+{
+	spin_lock(&cmci_poll_lock);
+	machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+	spin_unlock(&cmci_poll_lock);
+}
+
 void intel_init_cmci(void)
 {
 	int banks;
 
-	if (!cmci_supported(&banks))
+	if (!cmci_supported(&banks)) {
+		mc_poll_banks = cmci_mc_poll_banks;
 		return;
+	}
 
 	mce_threshold_vector = intel_threshold_interrupt;
 	cmci_discover(banks);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index d2412ce2d312..ed4a71c0f093 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -274,4 +274,5 @@ static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
 	return 0;
 }
 
+extern void (*mc_poll_banks)(void);
 #endif /* __X86_MCE_INTERNAL_H__ */

From d4211390d88902462a7642eb22fd83083588eaf7 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 10 Jul 2023 22:27:27 +0200
Subject: [PATCH 0390/2047] hwrng: ingenic - enable compile testing

Enable compile testing for the ingenic-trng driver.

Remove the dependency on HW_RANDOM. The ingenic-trng config section is
under "if HW_RANDOM".

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 1aeba12391a1..5d72b8da3c36 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -271,8 +271,7 @@ config HW_RANDOM_INGENIC_RNG
 
 config HW_RANDOM_INGENIC_TRNG
 	tristate "Ingenic True Random Number Generator support"
-	depends on HW_RANDOM
-	depends on MACH_X1830
+	depends on MACH_X1830 || COMPILE_TEST
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the True Random Number Generator

From 4cb9a7271f46eb1cc58099a3503d4b3f3969e37c Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 10 Jul 2023 22:27:28 +0200
Subject: [PATCH 0391/2047] hwrng: ingenic - remove two unused defines

Remove two defines which are not used in the ingenic-trng driver's code.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/ingenic-trng.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/char/hw_random/ingenic-trng.c b/drivers/char/hw_random/ingenic-trng.c
index 0eb80f786f4d..64286cbdf158 100644
--- a/drivers/char/hw_random/ingenic-trng.c
+++ b/drivers/char/hw_random/ingenic-trng.c
@@ -22,8 +22,6 @@
 #define TRNG_REG_STATUS_OFFSET		0x08
 
 /* bits within the CFG register */
-#define CFG_RDY_CLR					BIT(12)
-#define CFG_INT_MASK				BIT(11)
 #define CFG_GEN_EN					BIT(0)
 
 /* bits within the STATUS register */

From 099f236879068547a05d02b92fb2861694027440 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 10 Jul 2023 22:27:29 +0200
Subject: [PATCH 0392/2047] hwrng: ingenic - remove dead assignments

Don't assign a value to ret if we're about to return from the probe
function and ret's value is not used.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/ingenic-trng.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/char/hw_random/ingenic-trng.c b/drivers/char/hw_random/ingenic-trng.c
index 64286cbdf158..bfec28ceab00 100644
--- a/drivers/char/hw_random/ingenic-trng.c
+++ b/drivers/char/hw_random/ingenic-trng.c
@@ -86,13 +86,11 @@ static int ingenic_trng_probe(struct platform_device *pdev)
 	trng->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(trng->base)) {
 		pr_err("%s: Failed to map DTRNG registers\n", __func__);
-		ret = PTR_ERR(trng->base);
 		return PTR_ERR(trng->base);
 	}
 
 	trng->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(trng->clk)) {
-		ret = PTR_ERR(trng->clk);
 		pr_crit("%s: Cannot get DTRNG clock\n", __func__);
 		return PTR_ERR(trng->clk);
 	}

From e4ab6e72e84d4cfd6f8e5f477204f3053a79108f Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 10 Jul 2023 22:27:30 +0200
Subject: [PATCH 0393/2047] hwrng: ingenic - use devm_clk_get_enabled

Use devm_clk_get_enabled in the ingenic-trng driver. We don't have to
disable and unprepare the clock any more in error paths or in the remove
function.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/ingenic-trng.c | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/drivers/char/hw_random/ingenic-trng.c b/drivers/char/hw_random/ingenic-trng.c
index bfec28ceab00..906fa78de47e 100644
--- a/drivers/char/hw_random/ingenic-trng.c
+++ b/drivers/char/hw_random/ingenic-trng.c
@@ -29,7 +29,6 @@
 
 struct ingenic_trng {
 	void __iomem *base;
-	struct clk *clk;
 	struct hwrng rng;
 };
 
@@ -77,6 +76,7 @@ static int ingenic_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait
 static int ingenic_trng_probe(struct platform_device *pdev)
 {
 	struct ingenic_trng *trng;
+	struct clk *clk;
 	int ret;
 
 	trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
@@ -89,16 +89,10 @@ static int ingenic_trng_probe(struct platform_device *pdev)
 		return PTR_ERR(trng->base);
 	}
 
-	trng->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(trng->clk)) {
-		pr_crit("%s: Cannot get DTRNG clock\n", __func__);
-		return PTR_ERR(trng->clk);
-	}
-
-	ret = clk_prepare_enable(trng->clk);
-	if (ret) {
-		pr_crit("%s: Unable to enable DTRNG clock\n", __func__);
-		return ret;
+	clk = devm_clk_get_enabled(&pdev->dev, NULL);
+	if (IS_ERR(clk)) {
+		pr_crit("%s: Cannot get and enable DTRNG clock\n", __func__);
+		return PTR_ERR(clk);
 	}
 
 	trng->rng.name = pdev->name;
@@ -109,17 +103,13 @@ static int ingenic_trng_probe(struct platform_device *pdev)
 	ret = hwrng_register(&trng->rng);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to register hwrng\n");
-		goto err_unprepare_clk;
+		return ret;
 	}
 
 	platform_set_drvdata(pdev, trng);
 
 	dev_info(&pdev->dev, "Ingenic DTRNG driver registered\n");
 	return 0;
-
-err_unprepare_clk:
-	clk_disable_unprepare(trng->clk);
-	return ret;
 }
 
 static int ingenic_trng_remove(struct platform_device *pdev)
@@ -133,8 +123,6 @@ static int ingenic_trng_remove(struct platform_device *pdev)
 	ctrl &= ~CFG_GEN_EN;
 	writel(ctrl, trng->base + TRNG_REG_CFG_OFFSET);
 
-	clk_disable_unprepare(trng->clk);
-
 	return 0;
 }
 

From 6257490b95252b0c22e4b518570d438cedc78a0d Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 10 Jul 2023 22:27:31 +0200
Subject: [PATCH 0394/2047] hwrng: ingenic - use dev_err_probe in error paths

Use dev_err_probe in error paths to make the code a bit shorter.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/ingenic-trng.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/char/hw_random/ingenic-trng.c b/drivers/char/hw_random/ingenic-trng.c
index 906fa78de47e..cc88a941c929 100644
--- a/drivers/char/hw_random/ingenic-trng.c
+++ b/drivers/char/hw_random/ingenic-trng.c
@@ -84,16 +84,14 @@ static int ingenic_trng_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	trng->base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(trng->base)) {
-		pr_err("%s: Failed to map DTRNG registers\n", __func__);
-		return PTR_ERR(trng->base);
-	}
+	if (IS_ERR(trng->base))
+		return dev_err_probe(&pdev->dev, PTR_ERR(trng->base),
+				     "%s: Failed to map DTRNG registers\n", __func__);
 
 	clk = devm_clk_get_enabled(&pdev->dev, NULL);
-	if (IS_ERR(clk)) {
-		pr_crit("%s: Cannot get and enable DTRNG clock\n", __func__);
-		return PTR_ERR(clk);
-	}
+	if (IS_ERR(clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(clk),
+				     "%s: Cannot get and enable DTRNG clock\n", __func__);
 
 	trng->rng.name = pdev->name;
 	trng->rng.init = ingenic_trng_init;
@@ -101,10 +99,8 @@ static int ingenic_trng_probe(struct platform_device *pdev)
 	trng->rng.read = ingenic_trng_read;
 
 	ret = hwrng_register(&trng->rng);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to register hwrng\n");
-		return ret;
-	}
+	if (ret)
+		return dev_err_probe(&pdev->dev, ret, "Failed to register hwrng\n");
 
 	platform_set_drvdata(pdev, trng);
 

From 71839a641066881e032b6353516b16bdafbf606a Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 10 Jul 2023 22:27:32 +0200
Subject: [PATCH 0395/2047] hwrng: ingenic - don't disable the rng in
 ingenic_trng_remove

There's no need to disable the rng in ingenic_trng_remove.

The driver's init function sets the CFG_GEN_EN bit to enable the
rng. The cleanup function clears CFG_GEN_EN to revert this.

The remove function calls hwrng_unregister. If the ingenic-trng is not
the current rng at this point, CFG_GEN_EN has already been cleared. If
the ingenic-trng is the current rng, drop_current_rng will call the
cleanup function.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/ingenic-trng.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/char/hw_random/ingenic-trng.c b/drivers/char/hw_random/ingenic-trng.c
index cc88a941c929..9c54721f8399 100644
--- a/drivers/char/hw_random/ingenic-trng.c
+++ b/drivers/char/hw_random/ingenic-trng.c
@@ -111,14 +111,9 @@ static int ingenic_trng_probe(struct platform_device *pdev)
 static int ingenic_trng_remove(struct platform_device *pdev)
 {
 	struct ingenic_trng *trng = platform_get_drvdata(pdev);
-	unsigned int ctrl;
 
 	hwrng_unregister(&trng->rng);
 
-	ctrl = readl(trng->base + TRNG_REG_CFG_OFFSET);
-	ctrl &= ~CFG_GEN_EN;
-	writel(ctrl, trng->base + TRNG_REG_CFG_OFFSET);
-
 	return 0;
 }
 

From a40be5e89ff636533fc849f3db8964388c0ee8f1 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 10 Jul 2023 22:27:33 +0200
Subject: [PATCH 0396/2047] hwrng: ingenic - switch to device managed
 registration

Call devm_hwrng_register for device managed registration of the
ingenic-trng driver. ingenic_trng_remove can then be deleted.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/ingenic-trng.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/char/hw_random/ingenic-trng.c b/drivers/char/hw_random/ingenic-trng.c
index 9c54721f8399..3967a8dbe967 100644
--- a/drivers/char/hw_random/ingenic-trng.c
+++ b/drivers/char/hw_random/ingenic-trng.c
@@ -98,7 +98,7 @@ static int ingenic_trng_probe(struct platform_device *pdev)
 	trng->rng.cleanup = ingenic_trng_cleanup;
 	trng->rng.read = ingenic_trng_read;
 
-	ret = hwrng_register(&trng->rng);
+	ret = devm_hwrng_register(&pdev->dev, &trng->rng);
 	if (ret)
 		return dev_err_probe(&pdev->dev, ret, "Failed to register hwrng\n");
 
@@ -108,15 +108,6 @@ static int ingenic_trng_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int ingenic_trng_remove(struct platform_device *pdev)
-{
-	struct ingenic_trng *trng = platform_get_drvdata(pdev);
-
-	hwrng_unregister(&trng->rng);
-
-	return 0;
-}
-
 static const struct of_device_id ingenic_trng_of_match[] = {
 	{ .compatible = "ingenic,x1830-dtrng" },
 	{ /* sentinel */ }
@@ -125,7 +116,6 @@ MODULE_DEVICE_TABLE(of, ingenic_trng_of_match);
 
 static struct platform_driver ingenic_trng_driver = {
 	.probe		= ingenic_trng_probe,
-	.remove		= ingenic_trng_remove,
 	.driver		= {
 		.name	= "ingenic-trng",
 		.of_match_table = ingenic_trng_of_match,

From e8c1fdcc62d373da38f673e474564fbed9e249dd Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Thu, 13 Jul 2023 09:04:44 +0200
Subject: [PATCH 0397/2047] hwrng: ba431 - do not set drvdata

Do not set drvdata in the ba431 driver. Nobody is using it.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/ba431-rng.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/char/hw_random/ba431-rng.c b/drivers/char/hw_random/ba431-rng.c
index 5b7ca0416490..b1518dd52a24 100644
--- a/drivers/char/hw_random/ba431-rng.c
+++ b/drivers/char/hw_random/ba431-rng.c
@@ -189,8 +189,6 @@ static int ba431_trng_probe(struct platform_device *pdev)
 	ba431->rng.cleanup = ba431_trng_cleanup;
 	ba431->rng.read = ba431_trng_read;
 
-	platform_set_drvdata(pdev, ba431);
-
 	ret = devm_hwrng_register(&pdev->dev, &ba431->rng);
 	if (ret) {
 		dev_err(&pdev->dev, "BA431 registration failed (%d)\n", ret);

From 97b7aa77d1705ea25d484a77de44bf55d0a772a0 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Thu, 13 Jul 2023 09:04:45 +0200
Subject: [PATCH 0398/2047] hwrng: ba431 - don't init of_device_id's data

We have no device-specific data for silex-insight,ba431-rng. There's no
need to set .data = NULL, this is the default.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/ba431-rng.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/ba431-rng.c b/drivers/char/hw_random/ba431-rng.c
index b1518dd52a24..d2a9d16323a6 100644
--- a/drivers/char/hw_random/ba431-rng.c
+++ b/drivers/char/hw_random/ba431-rng.c
@@ -201,7 +201,7 @@ static int ba431_trng_probe(struct platform_device *pdev)
 }
 
 static const struct of_device_id ba431_trng_dt_ids[] = {
-	{ .compatible = "silex-insight,ba431-rng", .data = NULL },
+	{ .compatible = "silex-insight,ba431-rng" },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, ba431_trng_dt_ids);

From 1422e363516c8ac5183a72124bd860d796ebd9e7 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Thu, 13 Jul 2023 09:04:46 +0200
Subject: [PATCH 0399/2047] hwrng: ba431 - use dev_err_probe after failed
 registration

Use dev_err_probe to print the error message after a failed hwrng
registration.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/ba431-rng.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/char/hw_random/ba431-rng.c b/drivers/char/hw_random/ba431-rng.c
index d2a9d16323a6..9de7466e6896 100644
--- a/drivers/char/hw_random/ba431-rng.c
+++ b/drivers/char/hw_random/ba431-rng.c
@@ -190,10 +190,8 @@ static int ba431_trng_probe(struct platform_device *pdev)
 	ba431->rng.read = ba431_trng_read;
 
 	ret = devm_hwrng_register(&pdev->dev, &ba431->rng);
-	if (ret) {
-		dev_err(&pdev->dev, "BA431 registration failed (%d)\n", ret);
-		return ret;
-	}
+	if (ret)
+		return dev_err_probe(&pdev->dev, ret, "BA431 registration failed\n");
 
 	dev_info(&pdev->dev, "BA431 TRNG registered\n");
 

From 0d51794386325e90f2215fe9b2f4cb7390029260 Mon Sep 17 00:00:00 2001
From: Lionel Debieve <lionel.debieve@foss.st.com>
Date: Thu, 13 Jul 2023 17:15:12 +0200
Subject: [PATCH 0400/2047] dt-bindings: crypto: add new compatible for
 stm32-hash

Add a new compatible for stm32mp13 support.

Signed-off-by: Lionel Debieve <lionel.debieve@foss.st.com>
Signed-off-by: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
index b767ec72a999..ac480765cde0 100644
--- a/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
@@ -20,6 +20,7 @@ properties:
       - stericsson,ux500-hash
       - st,stm32f456-hash
       - st,stm32f756-hash
+      - st,stm32mp13-hash
 
   reg:
     maxItems: 1

From b6248fb8b8324cfde5eefe2ead1ba3650452d410 Mon Sep 17 00:00:00 2001
From: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Date: Thu, 13 Jul 2023 17:15:13 +0200
Subject: [PATCH 0401/2047] crypto: stm32 - add new algorithms support

Add the all SHA-2 (up to 512) and SHA-3 algorithm support.
Update compatible table to add stm32mp13.

Signed-off-by: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/Kconfig      |   2 +
 drivers/crypto/stm32/stm32-hash.c | 676 ++++++++++++++++++++++++------
 2 files changed, 548 insertions(+), 130 deletions(-)

diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig
index 4fc581e9e595..49dfd161e9b9 100644
--- a/drivers/crypto/stm32/Kconfig
+++ b/drivers/crypto/stm32/Kconfig
@@ -16,6 +16,8 @@ config CRYPTO_DEV_STM32_HASH
 	select CRYPTO_MD5
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
+	select CRYPTO_SHA512
+	select CRYPTO_SHA3
 	select CRYPTO_ENGINE
 	help
 	  This enables support for the HASH hw accelerator which can be found
diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index f0df32382719..30e42a402aa7 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -26,6 +26,7 @@
 #include <crypto/scatterwalk.h>
 #include <crypto/sha1.h>
 #include <crypto/sha2.h>
+#include <crypto/sha3.h>
 #include <crypto/internal/hash.h>
 
 #define HASH_CR				0x00
@@ -45,19 +46,11 @@
 #define HASH_CR_DMAE			BIT(3)
 #define HASH_CR_DATATYPE_POS		4
 #define HASH_CR_MODE			BIT(6)
+#define HASH_CR_ALGO_POS		7
 #define HASH_CR_MDMAT			BIT(13)
 #define HASH_CR_DMAA			BIT(14)
 #define HASH_CR_LKEY			BIT(16)
 
-#define HASH_CR_ALGO_SHA1		0x0
-#define HASH_CR_ALGO_MD5		0x80
-#define HASH_CR_ALGO_SHA224		0x40000
-#define HASH_CR_ALGO_SHA256		0x40080
-
-#define HASH_CR_UX500_EMPTYMSG		BIT(20)
-#define HASH_CR_UX500_ALGO_SHA1		BIT(7)
-#define HASH_CR_UX500_ALGO_SHA256	0x0
-
 /* Interrupt */
 #define HASH_DINIE			BIT(0)
 #define HASH_DCIE			BIT(1)
@@ -66,9 +59,6 @@
 #define HASH_MASK_CALC_COMPLETION	BIT(0)
 #define HASH_MASK_DATA_INPUT		BIT(1)
 
-/* Context swap register */
-#define HASH_CSR_REGISTER_NUMBER	54
-
 /* Status Flags */
 #define HASH_SR_DATA_INPUT_READY	BIT(0)
 #define HASH_SR_OUTPUT_READY		BIT(1)
@@ -79,6 +69,18 @@
 #define HASH_STR_NBLW_MASK		GENMASK(4, 0)
 #define HASH_STR_DCAL			BIT(8)
 
+/* HWCFGR Register */
+#define HASH_HWCFG_DMA_MASK		GENMASK(3, 0)
+
+/* Context swap register */
+#define HASH_CSR_NB_SHA256_HMAC		54
+#define HASH_CSR_NB_SHA256		38
+#define HASH_CSR_NB_SHA512_HMAC		103
+#define HASH_CSR_NB_SHA512		91
+#define HASH_CSR_NB_SHA3_HMAC		88
+#define HASH_CSR_NB_SHA3		72
+#define HASH_CSR_NB_MAX			HASH_CSR_NB_SHA512_HMAC
+
 #define HASH_FLAGS_INIT			BIT(0)
 #define HASH_FLAGS_OUTPUT_READY		BIT(1)
 #define HASH_FLAGS_CPU			BIT(2)
@@ -87,20 +89,20 @@
 #define HASH_FLAGS_HMAC_INIT		BIT(5)
 #define HASH_FLAGS_HMAC_FINAL		BIT(6)
 #define HASH_FLAGS_HMAC_KEY		BIT(7)
-
+#define HASH_FLAGS_SHA3_MODE		BIT(8)
 #define HASH_FLAGS_FINAL		BIT(15)
 #define HASH_FLAGS_FINUP		BIT(16)
-#define HASH_FLAGS_ALGO_MASK		GENMASK(21, 18)
-#define HASH_FLAGS_MD5			BIT(18)
-#define HASH_FLAGS_SHA1			BIT(19)
-#define HASH_FLAGS_SHA224		BIT(20)
-#define HASH_FLAGS_SHA256		BIT(21)
+#define HASH_FLAGS_ALGO_MASK		GENMASK(20, 17)
+#define HASH_FLAGS_ALGO_SHIFT		17
+#define HASH_FLAGS_ERRORS		BIT(21)
 #define HASH_FLAGS_EMPTY		BIT(22)
 #define HASH_FLAGS_HMAC			BIT(23)
 
 #define HASH_OP_UPDATE			1
 #define HASH_OP_FINAL			2
 
+#define HASH_BURST_LEVEL		4
+
 enum stm32_hash_data_format {
 	HASH_DATA_32_BITS		= 0x0,
 	HASH_DATA_16_BITS		= 0x1,
@@ -108,11 +110,26 @@ enum stm32_hash_data_format {
 	HASH_DATA_1_BIT			= 0x3
 };
 
-#define HASH_BUFLEN			256
-#define HASH_LONG_KEY			64
-#define HASH_MAX_KEY_SIZE		(SHA256_BLOCK_SIZE * 8)
-#define HASH_QUEUE_LENGTH		16
-#define HASH_DMA_THRESHOLD		50
+#define HASH_BUFLEN			(SHA3_224_BLOCK_SIZE + 4)
+#define HASH_MAX_KEY_SIZE		(SHA512_BLOCK_SIZE * 8)
+
+enum stm32_hash_algo {
+	HASH_SHA1			= 0,
+	HASH_MD5			= 1,
+	HASH_SHA224			= 2,
+	HASH_SHA256			= 3,
+	HASH_SHA3_224			= 4,
+	HASH_SHA3_256			= 5,
+	HASH_SHA3_384			= 6,
+	HASH_SHA3_512			= 7,
+	HASH_SHA384			= 12,
+	HASH_SHA512			= 15,
+};
+
+enum ux500_hash_algo {
+	HASH_SHA256_UX500		= 0,
+	HASH_SHA1_UX500			= 1,
+};
 
 #define HASH_AUTOSUSPEND_DELAY		50
 
@@ -130,19 +147,19 @@ struct stm32_hash_state {
 	u32			flags;
 
 	u16			bufcnt;
-	u16			buflen;
+	u16			blocklen;
 
 	u8 buffer[HASH_BUFLEN] __aligned(4);
 
 	/* hash state */
-	u32			hw_context[3 + HASH_CSR_REGISTER_NUMBER];
+	u32			hw_context[3 + HASH_CSR_NB_MAX];
 };
 
 struct stm32_hash_request_ctx {
 	struct stm32_hash_dev	*hdev;
 	unsigned long		op;
 
-	u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
+	u8 digest[SHA512_DIGEST_SIZE] __aligned(sizeof(u32));
 	size_t			digcnt;
 
 	/* DMA */
@@ -166,12 +183,13 @@ struct stm32_hash_algs_info {
 };
 
 struct stm32_hash_pdata {
-	struct stm32_hash_algs_info	*algs_info;
-	size_t				algs_info_size;
-	bool				has_sr;
-	bool				has_mdmat;
-	bool				broken_emptymsg;
-	bool				ux500;
+	const int				alg_shift;
+	const struct stm32_hash_algs_info	*algs_info;
+	size_t					algs_info_size;
+	bool					has_sr;
+	bool					has_mdmat;
+	bool					broken_emptymsg;
+	bool					ux500;
 };
 
 struct stm32_hash_dev {
@@ -182,7 +200,6 @@ struct stm32_hash_dev {
 	void __iomem		*io_base;
 	phys_addr_t		phys_base;
 	u32			dma_mode;
-	u32			dma_maxburst;
 	bool			polled;
 
 	struct ahash_request	*req;
@@ -275,31 +292,19 @@ static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev, int bufcnt)
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(hdev->req);
 	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct stm32_hash_state *state = &rctx->state;
+	u32 alg = (state->flags & HASH_FLAGS_ALGO_MASK) >> HASH_FLAGS_ALGO_SHIFT;
 
 	u32 reg = HASH_CR_INIT;
 
 	if (!(hdev->flags & HASH_FLAGS_INIT)) {
-		switch (state->flags & HASH_FLAGS_ALGO_MASK) {
-		case HASH_FLAGS_MD5:
-			reg |= HASH_CR_ALGO_MD5;
-			break;
-		case HASH_FLAGS_SHA1:
-			if (hdev->pdata->ux500)
-				reg |= HASH_CR_UX500_ALGO_SHA1;
+		if (hdev->pdata->ux500) {
+			reg |= ((alg & BIT(0)) << HASH_CR_ALGO_POS);
+		} else {
+			if (hdev->pdata->alg_shift == HASH_CR_ALGO_POS)
+				reg |= ((alg & BIT(1)) << 17) |
+				       ((alg & BIT(0)) << HASH_CR_ALGO_POS);
 			else
-				reg |= HASH_CR_ALGO_SHA1;
-			break;
-		case HASH_FLAGS_SHA224:
-			reg |= HASH_CR_ALGO_SHA224;
-			break;
-		case HASH_FLAGS_SHA256:
-			if (hdev->pdata->ux500)
-				reg |= HASH_CR_UX500_ALGO_SHA256;
-			else
-				reg |= HASH_CR_ALGO_SHA256;
-			break;
-		default:
-			reg |= HASH_CR_ALGO_MD5;
+				reg |= alg << hdev->pdata->alg_shift;
 		}
 
 		reg |= (rctx->data_type << HASH_CR_DATATYPE_POS);
@@ -307,7 +312,7 @@ static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev, int bufcnt)
 		if (state->flags & HASH_FLAGS_HMAC) {
 			hdev->flags |= HASH_FLAGS_HMAC;
 			reg |= HASH_CR_MODE;
-			if (ctx->keylen > HASH_LONG_KEY)
+			if (ctx->keylen > crypto_ahash_blocksize(tfm))
 				reg |= HASH_CR_LKEY;
 		}
 
@@ -318,6 +323,12 @@ static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev, int bufcnt)
 
 		hdev->flags |= HASH_FLAGS_INIT;
 
+		/*
+		 * After first block + 1 words are fill up,
+		 * we only need to fill 1 block to start partial computation
+		 */
+		rctx->state.blocklen -= sizeof(u32);
+
 		dev_dbg(hdev->dev, "Write Control %x\n", reg);
 	}
 }
@@ -327,9 +338,9 @@ static void stm32_hash_append_sg(struct stm32_hash_request_ctx *rctx)
 	struct stm32_hash_state *state = &rctx->state;
 	size_t count;
 
-	while ((state->bufcnt < state->buflen) && rctx->total) {
+	while ((state->bufcnt < state->blocklen) && rctx->total) {
 		count = min(rctx->sg->length - rctx->offset, rctx->total);
-		count = min_t(size_t, count, state->buflen - state->bufcnt);
+		count = min_t(size_t, count, state->blocklen - state->bufcnt);
 
 		if (count <= 0) {
 			if ((rctx->sg->length == 0) && !sg_is_last(rctx->sg)) {
@@ -419,20 +430,59 @@ static int stm32_hash_xmit_cpu(struct stm32_hash_dev *hdev,
 	return 0;
 }
 
+static int hash_swap_reg(struct stm32_hash_request_ctx *rctx)
+{
+	struct stm32_hash_state *state = &rctx->state;
+
+	switch ((state->flags & HASH_FLAGS_ALGO_MASK) >>
+		HASH_FLAGS_ALGO_SHIFT) {
+	case HASH_MD5:
+	case HASH_SHA1:
+	case HASH_SHA224:
+	case HASH_SHA256:
+		if (state->flags & HASH_FLAGS_HMAC)
+			return HASH_CSR_NB_SHA256_HMAC;
+		else
+			return HASH_CSR_NB_SHA256;
+		break;
+
+	case HASH_SHA384:
+	case HASH_SHA512:
+		if (state->flags & HASH_FLAGS_HMAC)
+			return HASH_CSR_NB_SHA512_HMAC;
+		else
+			return HASH_CSR_NB_SHA512;
+		break;
+
+	case HASH_SHA3_224:
+	case HASH_SHA3_256:
+	case HASH_SHA3_384:
+	case HASH_SHA3_512:
+		if (state->flags & HASH_FLAGS_HMAC)
+			return HASH_CSR_NB_SHA3_HMAC;
+		else
+			return HASH_CSR_NB_SHA3;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+}
+
 static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev)
 {
 	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req);
 	struct stm32_hash_state *state = &rctx->state;
 	u32 *preg = state->hw_context;
 	int bufcnt, err = 0, final;
-	int i;
+	int i, swap_reg;
 
 	dev_dbg(hdev->dev, "%s flags %x\n", __func__, state->flags);
 
 	final = state->flags & HASH_FLAGS_FINAL;
 
-	while ((rctx->total >= state->buflen) ||
-	       (state->bufcnt + rctx->total >= state->buflen)) {
+	while ((rctx->total >= state->blocklen) ||
+	       (state->bufcnt + rctx->total >= state->blocklen)) {
 		stm32_hash_append_sg(rctx);
 		bufcnt = state->bufcnt;
 		state->bufcnt = 0;
@@ -455,11 +505,13 @@ static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev)
 	if (stm32_hash_wait_busy(hdev))
 		return -ETIMEDOUT;
 
+	swap_reg = hash_swap_reg(rctx);
+
 	if (!hdev->pdata->ux500)
 		*preg++ = stm32_hash_read(hdev, HASH_IMR);
 	*preg++ = stm32_hash_read(hdev, HASH_STR);
 	*preg++ = stm32_hash_read(hdev, HASH_CR);
-	for (i = 0; i < HASH_CSR_REGISTER_NUMBER; i++)
+	for (i = 0; i < swap_reg; i++)
 		*preg++ = stm32_hash_read(hdev, HASH_CSR(i));
 
 	state->flags |= HASH_FLAGS_INIT;
@@ -544,7 +596,7 @@ static int stm32_hash_hmac_dma_send(struct stm32_hash_dev *hdev)
 	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm);
 	int err;
 
-	if (ctx->keylen < HASH_DMA_THRESHOLD || (hdev->dma_mode == 1)) {
+	if (ctx->keylen < rctx->state.blocklen || hdev->dma_mode == 1) {
 		err = stm32_hash_write_key(hdev);
 		if (stm32_hash_wait_busy(hdev))
 			return -ETIMEDOUT;
@@ -579,8 +631,8 @@ static int stm32_hash_dma_init(struct stm32_hash_dev *hdev)
 	dma_conf.direction = DMA_MEM_TO_DEV;
 	dma_conf.dst_addr = hdev->phys_base + HASH_DIN;
 	dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	dma_conf.src_maxburst = hdev->dma_maxburst;
-	dma_conf.dst_maxburst = hdev->dma_maxburst;
+	dma_conf.src_maxburst = HASH_BURST_LEVEL;
+	dma_conf.dst_maxburst = HASH_BURST_LEVEL;
 	dma_conf.device_fc = false;
 
 	chan = dma_request_chan(hdev->dev, "in");
@@ -614,7 +666,6 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev)
 	rctx->total = hdev->req->nbytes;
 
 	rctx->nents = sg_nents(rctx->sg);
-
 	if (rctx->nents < 0)
 		return -EINVAL;
 
@@ -718,11 +769,12 @@ static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx)
 static bool stm32_hash_dma_aligned_data(struct ahash_request *req)
 {
 	struct scatterlist *sg;
+	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
 	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
 	struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
 	int i;
 
-	if (req->nbytes <= HASH_DMA_THRESHOLD)
+	if (!hdev->dma_lch || req->nbytes <= rctx->state.blocklen)
 		return false;
 
 	if (sg_nents(req->src) > 1) {
@@ -748,31 +800,64 @@ static int stm32_hash_init(struct ahash_request *req)
 	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
 	struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
 	struct stm32_hash_state *state = &rctx->state;
+	bool sha3_mode = ctx->flags & HASH_FLAGS_SHA3_MODE;
 
 	rctx->hdev = hdev;
 
 	state->flags = HASH_FLAGS_CPU;
 
+	if (sha3_mode)
+		state->flags |= HASH_FLAGS_SHA3_MODE;
+
 	rctx->digcnt = crypto_ahash_digestsize(tfm);
 	switch (rctx->digcnt) {
 	case MD5_DIGEST_SIZE:
-		state->flags |= HASH_FLAGS_MD5;
+		state->flags |= HASH_MD5 << HASH_FLAGS_ALGO_SHIFT;
 		break;
 	case SHA1_DIGEST_SIZE:
-		state->flags |= HASH_FLAGS_SHA1;
+		if (hdev->pdata->ux500)
+			state->flags |= HASH_SHA1_UX500 << HASH_FLAGS_ALGO_SHIFT;
+		else
+			state->flags |= HASH_SHA1 << HASH_FLAGS_ALGO_SHIFT;
 		break;
 	case SHA224_DIGEST_SIZE:
-		state->flags |= HASH_FLAGS_SHA224;
+		if (sha3_mode)
+			state->flags |= HASH_SHA3_224 << HASH_FLAGS_ALGO_SHIFT;
+		else
+			state->flags |= HASH_SHA224 << HASH_FLAGS_ALGO_SHIFT;
 		break;
 	case SHA256_DIGEST_SIZE:
-		state->flags |= HASH_FLAGS_SHA256;
+		if (sha3_mode) {
+			state->flags |= HASH_SHA3_256 << HASH_FLAGS_ALGO_SHIFT;
+		} else {
+			if (hdev->pdata->ux500)
+				state->flags |= HASH_SHA256_UX500 << HASH_FLAGS_ALGO_SHIFT;
+			else
+				state->flags |= HASH_SHA256 << HASH_FLAGS_ALGO_SHIFT;
+		}
+		break;
+	case SHA384_DIGEST_SIZE:
+		if (sha3_mode)
+			state->flags |= HASH_SHA3_384 << HASH_FLAGS_ALGO_SHIFT;
+		else
+			state->flags |= HASH_SHA384 << HASH_FLAGS_ALGO_SHIFT;
+		break;
+	case SHA512_DIGEST_SIZE:
+		if (sha3_mode)
+			state->flags |= HASH_SHA3_512 << HASH_FLAGS_ALGO_SHIFT;
+		else
+			state->flags |= HASH_SHA512 << HASH_FLAGS_ALGO_SHIFT;
 		break;
 	default:
 		return -EINVAL;
 	}
 
 	rctx->state.bufcnt = 0;
-	rctx->state.buflen = HASH_BUFLEN;
+	rctx->state.blocklen = crypto_ahash_blocksize(tfm) + sizeof(u32);
+	if (rctx->state.blocklen > HASH_BUFLEN) {
+		dev_err(hdev->dev, "Error, block too large");
+		return -EINVAL;
+	}
 	rctx->total = 0;
 	rctx->offset = 0;
 	rctx->data_type = HASH_DATA_8_BITS;
@@ -842,6 +927,7 @@ static void stm32_hash_emptymsg_fallback(struct ahash_request *req)
 
 static void stm32_hash_copy_hash(struct ahash_request *req)
 {
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
 	struct stm32_hash_state *state = &rctx->state;
 	struct stm32_hash_dev *hdev = rctx->hdev;
@@ -851,22 +937,7 @@ static void stm32_hash_copy_hash(struct ahash_request *req)
 	if (hdev->pdata->broken_emptymsg && (state->flags & HASH_FLAGS_EMPTY))
 		return stm32_hash_emptymsg_fallback(req);
 
-	switch (state->flags & HASH_FLAGS_ALGO_MASK) {
-	case HASH_FLAGS_MD5:
-		hashsize = MD5_DIGEST_SIZE;
-		break;
-	case HASH_FLAGS_SHA1:
-		hashsize = SHA1_DIGEST_SIZE;
-		break;
-	case HASH_FLAGS_SHA224:
-		hashsize = SHA224_DIGEST_SIZE;
-		break;
-	case HASH_FLAGS_SHA256:
-		hashsize = SHA256_DIGEST_SIZE;
-		break;
-	default:
-		return;
-	}
+	hashsize = crypto_ahash_digestsize(tfm);
 
 	for (i = 0; i < hashsize / sizeof(u32); i++) {
 		if (hdev->pdata->ux500)
@@ -881,6 +952,11 @@ static void stm32_hash_copy_hash(struct ahash_request *req)
 static int stm32_hash_finish(struct ahash_request *req)
 {
 	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+	u32 reg;
+
+	reg = stm32_hash_read(rctx->hdev, HASH_SR);
+	reg &= ~HASH_SR_OUTPUT_READY;
+	stm32_hash_write(rctx->hdev, HASH_SR, reg);
 
 	if (!req->result)
 		return -EINVAL;
@@ -920,6 +996,7 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq)
 	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
 	struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
 	struct stm32_hash_state *state = &rctx->state;
+	int swap_reg;
 	int err = 0;
 
 	if (!hdev)
@@ -932,6 +1009,7 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq)
 
 	hdev->req = req;
 	hdev->flags = 0;
+	swap_reg = hash_swap_reg(rctx);
 
 	if (state->flags & HASH_FLAGS_INIT) {
 		u32 *preg = rctx->state.hw_context;
@@ -945,7 +1023,7 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq)
 		reg = *preg++ | HASH_CR_INIT;
 		stm32_hash_write(hdev, HASH_CR, reg);
 
-		for (i = 0; i < HASH_CSR_REGISTER_NUMBER; i++)
+		for (i = 0; i < swap_reg; i++)
 			stm32_hash_write(hdev, HASH_CSR(i), *preg++);
 
 		hdev->flags |= HASH_FLAGS_INIT;
@@ -1000,7 +1078,7 @@ static int stm32_hash_update(struct ahash_request *req)
 	rctx->sg = req->src;
 	rctx->offset = 0;
 
-	if ((state->bufcnt + rctx->total < state->buflen)) {
+	if ((state->bufcnt + rctx->total < state->blocklen)) {
 		stm32_hash_append_sg(rctx);
 		return 0;
 	}
@@ -1102,8 +1180,7 @@ static int stm32_hash_init_fallback(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static int stm32_hash_cra_init_algs(struct crypto_tfm *tfm,
-				    const char *algs_hmac_name)
+static int stm32_hash_cra_init_algs(struct crypto_tfm *tfm, u32 algs_flags)
 {
 	struct stm32_hash_ctx *ctx = crypto_tfm_ctx(tfm);
 
@@ -1112,8 +1189,8 @@ static int stm32_hash_cra_init_algs(struct crypto_tfm *tfm,
 
 	ctx->keylen = 0;
 
-	if (algs_hmac_name)
-		ctx->flags |= HASH_FLAGS_HMAC;
+	if (algs_flags)
+		ctx->flags |= algs_flags;
 
 	ctx->enginectx.op.do_one_request = stm32_hash_one_request;
 
@@ -1122,28 +1199,25 @@ static int stm32_hash_cra_init_algs(struct crypto_tfm *tfm,
 
 static int stm32_hash_cra_init(struct crypto_tfm *tfm)
 {
-	return stm32_hash_cra_init_algs(tfm, NULL);
+	return stm32_hash_cra_init_algs(tfm, 0);
 }
 
-static int stm32_hash_cra_md5_init(struct crypto_tfm *tfm)
+static int stm32_hash_cra_hmac_init(struct crypto_tfm *tfm)
 {
-	return stm32_hash_cra_init_algs(tfm, "md5");
+	return stm32_hash_cra_init_algs(tfm, HASH_FLAGS_HMAC);
 }
 
-static int stm32_hash_cra_sha1_init(struct crypto_tfm *tfm)
+static int stm32_hash_cra_sha3_init(struct crypto_tfm *tfm)
 {
-	return stm32_hash_cra_init_algs(tfm, "sha1");
+	return stm32_hash_cra_init_algs(tfm, HASH_FLAGS_SHA3_MODE);
 }
 
-static int stm32_hash_cra_sha224_init(struct crypto_tfm *tfm)
+static int stm32_hash_cra_sha3_hmac_init(struct crypto_tfm *tfm)
 {
-	return stm32_hash_cra_init_algs(tfm, "sha224");
+	return stm32_hash_cra_init_algs(tfm, HASH_FLAGS_SHA3_MODE |
+					HASH_FLAGS_HMAC);
 }
 
-static int stm32_hash_cra_sha256_init(struct crypto_tfm *tfm)
-{
-	return stm32_hash_cra_init_algs(tfm, "sha256");
-}
 
 static void stm32_hash_cra_exit(struct crypto_tfm *tfm)
 {
@@ -1185,8 +1259,6 @@ static irqreturn_t stm32_hash_irq_handler(int irq, void *dev_id)
 
 	reg = stm32_hash_read(hdev, HASH_SR);
 	if (reg & HASH_SR_OUTPUT_READY) {
-		reg &= ~HASH_SR_OUTPUT_READY;
-		stm32_hash_write(hdev, HASH_SR, reg);
 		hdev->flags |= HASH_FLAGS_OUTPUT_READY;
 		/* Disable IT*/
 		stm32_hash_write(hdev, HASH_IMR, 0);
@@ -1244,12 +1316,12 @@ static struct ahash_alg algs_md5[] = {
 				.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
 				.cra_alignmask = 3,
-				.cra_init = stm32_hash_cra_md5_init,
+				.cra_init = stm32_hash_cra_hmac_init,
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
 		}
-	},
+	}
 };
 
 static struct ahash_alg algs_sha1[] = {
@@ -1300,7 +1372,7 @@ static struct ahash_alg algs_sha1[] = {
 				.cra_blocksize = SHA1_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
 				.cra_alignmask = 3,
-				.cra_init = stm32_hash_cra_sha1_init,
+				.cra_init = stm32_hash_cra_hmac_init,
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
@@ -1356,7 +1428,7 @@ static struct ahash_alg algs_sha224[] = {
 				.cra_blocksize = SHA224_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
 				.cra_alignmask = 3,
-				.cra_init = stm32_hash_cra_sha224_init,
+				.cra_init = stm32_hash_cra_hmac_init,
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
@@ -1412,7 +1484,7 @@ static struct ahash_alg algs_sha256[] = {
 				.cra_blocksize = SHA256_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
 				.cra_alignmask = 3,
-				.cra_init = stm32_hash_cra_sha256_init,
+				.cra_init = stm32_hash_cra_hmac_init,
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
@@ -1420,6 +1492,330 @@ static struct ahash_alg algs_sha256[] = {
 	},
 };
 
+static struct ahash_alg algs_sha384_sha512[] = {
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.halg = {
+			.digestsize = SHA384_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_state),
+			.base = {
+				.cra_name = "sha384",
+				.cra_driver_name = "stm32-sha384",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA384_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_init,
+				.cra_exit = stm32_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.setkey = stm32_hash_setkey,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.halg = {
+			.digestsize = SHA384_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_state),
+			.base = {
+				.cra_name = "hmac(sha384)",
+				.cra_driver_name = "stm32-hmac-sha384",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA384_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_hmac_init,
+				.cra_exit = stm32_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.halg = {
+			.digestsize = SHA512_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_state),
+			.base = {
+				.cra_name = "sha512",
+				.cra_driver_name = "stm32-sha512",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA512_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_init,
+				.cra_exit = stm32_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.setkey = stm32_hash_setkey,
+		.halg = {
+			.digestsize = SHA512_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_state),
+			.base = {
+				.cra_name = "hmac(sha512)",
+				.cra_driver_name = "stm32-hmac-sha512",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA512_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_hmac_init,
+				.cra_exit = stm32_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+};
+
+static struct ahash_alg algs_sha3[] = {
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.halg = {
+			.digestsize = SHA3_224_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_state),
+			.base = {
+				.cra_name = "sha3-224",
+				.cra_driver_name = "stm32-sha3-224",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA3_224_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_sha3_init,
+				.cra_exit = stm32_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.setkey = stm32_hash_setkey,
+		.halg = {
+			.digestsize = SHA3_224_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_state),
+			.base = {
+				.cra_name = "hmac(sha3-224)",
+				.cra_driver_name = "stm32-hmac-sha3-224",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA3_224_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_sha3_hmac_init,
+				.cra_exit = stm32_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+		{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.halg = {
+			.digestsize = SHA3_256_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_state),
+			.base = {
+				.cra_name = "sha3-256",
+				.cra_driver_name = "stm32-sha3-256",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA3_256_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_sha3_init,
+				.cra_exit = stm32_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.setkey = stm32_hash_setkey,
+		.halg = {
+			.digestsize = SHA3_256_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_state),
+			.base = {
+				.cra_name = "hmac(sha3-256)",
+				.cra_driver_name = "stm32-hmac-sha3-256",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA3_256_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_sha3_hmac_init,
+				.cra_exit = stm32_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.halg = {
+			.digestsize = SHA3_384_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_state),
+			.base = {
+				.cra_name = "sha3-384",
+				.cra_driver_name = "stm32-sha3-384",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA3_384_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_sha3_init,
+				.cra_exit = stm32_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.setkey = stm32_hash_setkey,
+		.halg = {
+			.digestsize = SHA3_384_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_state),
+			.base = {
+				.cra_name = "hmac(sha3-384)",
+				.cra_driver_name = "stm32-hmac-sha3-384",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA3_384_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_sha3_hmac_init,
+				.cra_exit = stm32_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.halg = {
+			.digestsize = SHA3_512_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_state),
+			.base = {
+				.cra_name = "sha3-512",
+				.cra_driver_name = "stm32-sha3-512",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA3_512_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_sha3_init,
+				.cra_exit = stm32_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = stm32_hash_init,
+		.update = stm32_hash_update,
+		.final = stm32_hash_final,
+		.finup = stm32_hash_finup,
+		.digest = stm32_hash_digest,
+		.export = stm32_hash_export,
+		.import = stm32_hash_import,
+		.setkey = stm32_hash_setkey,
+		.halg = {
+			.digestsize = SHA3_512_DIGEST_SIZE,
+			.statesize = sizeof(struct stm32_hash_state),
+			.base = {
+				.cra_name = "hmac(sha3-512)",
+				.cra_driver_name = "stm32-hmac-sha3-512",
+				.cra_priority = 200,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA3_512_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct stm32_hash_ctx),
+				.cra_alignmask = 3,
+				.cra_init = stm32_hash_cra_sha3_hmac_init,
+				.cra_exit = stm32_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	}
+};
+
 static int stm32_hash_register_algs(struct stm32_hash_dev *hdev)
 {
 	unsigned int i, j;
@@ -1471,6 +1867,7 @@ static struct stm32_hash_algs_info stm32_hash_algs_info_ux500[] = {
 };
 
 static const struct stm32_hash_pdata stm32_hash_pdata_ux500 = {
+	.alg_shift	= 7,
 	.algs_info	= stm32_hash_algs_info_ux500,
 	.algs_info_size	= ARRAY_SIZE(stm32_hash_algs_info_ux500),
 	.broken_emptymsg = true,
@@ -1489,6 +1886,7 @@ static struct stm32_hash_algs_info stm32_hash_algs_info_stm32f4[] = {
 };
 
 static const struct stm32_hash_pdata stm32_hash_pdata_stm32f4 = {
+	.alg_shift	= 7,
 	.algs_info	= stm32_hash_algs_info_stm32f4,
 	.algs_info_size	= ARRAY_SIZE(stm32_hash_algs_info_stm32f4),
 	.has_sr		= true,
@@ -1515,25 +1913,49 @@ static struct stm32_hash_algs_info stm32_hash_algs_info_stm32f7[] = {
 };
 
 static const struct stm32_hash_pdata stm32_hash_pdata_stm32f7 = {
+	.alg_shift	= 7,
 	.algs_info	= stm32_hash_algs_info_stm32f7,
 	.algs_info_size	= ARRAY_SIZE(stm32_hash_algs_info_stm32f7),
 	.has_sr		= true,
 	.has_mdmat	= true,
 };
 
+static struct stm32_hash_algs_info stm32_hash_algs_info_stm32mp13[] = {
+	{
+		.algs_list	= algs_sha1,
+		.size		= ARRAY_SIZE(algs_sha1),
+	},
+	{
+		.algs_list	= algs_sha224,
+		.size		= ARRAY_SIZE(algs_sha224),
+	},
+	{
+		.algs_list	= algs_sha256,
+		.size		= ARRAY_SIZE(algs_sha256),
+	},
+	{
+		.algs_list	= algs_sha384_sha512,
+		.size		= ARRAY_SIZE(algs_sha384_sha512),
+	},
+	{
+		.algs_list	= algs_sha3,
+		.size		= ARRAY_SIZE(algs_sha3),
+	},
+};
+
+static const struct stm32_hash_pdata stm32_hash_pdata_stm32mp13 = {
+	.alg_shift	= 17,
+	.algs_info	= stm32_hash_algs_info_stm32mp13,
+	.algs_info_size	= ARRAY_SIZE(stm32_hash_algs_info_stm32mp13),
+	.has_sr		= true,
+	.has_mdmat	= true,
+};
+
 static const struct of_device_id stm32_hash_of_match[] = {
-	{
-		.compatible = "stericsson,ux500-hash",
-		.data = &stm32_hash_pdata_ux500,
-	},
-	{
-		.compatible = "st,stm32f456-hash",
-		.data = &stm32_hash_pdata_stm32f4,
-	},
-	{
-		.compatible = "st,stm32f756-hash",
-		.data = &stm32_hash_pdata_stm32f7,
-	},
+	{ .compatible = "stericsson,ux500-hash", .data = &stm32_hash_pdata_ux500 },
+	{ .compatible = "st,stm32f456-hash", .data = &stm32_hash_pdata_stm32f4 },
+	{ .compatible = "st,stm32f756-hash", .data = &stm32_hash_pdata_stm32f7 },
+	{ .compatible = "st,stm32mp13-hash", .data = &stm32_hash_pdata_stm32mp13 },
 	{},
 };
 
@@ -1548,12 +1970,6 @@ static int stm32_hash_get_of_match(struct stm32_hash_dev *hdev,
 		return -EINVAL;
 	}
 
-	if (of_property_read_u32(dev->of_node, "dma-maxburst",
-				 &hdev->dma_maxburst)) {
-		dev_info(dev, "dma-maxburst not specified, using 0\n");
-		hdev->dma_maxburst = 0;
-	}
-
 	return 0;
 }
 
@@ -1663,7 +2079,7 @@ static int stm32_hash_probe(struct platform_device *pdev)
 		/* FIXME: implement DMA mode for Ux500 */
 		hdev->dma_mode = 0;
 	else
-		hdev->dma_mode = stm32_hash_read(hdev, HASH_HWCFGR);
+		hdev->dma_mode = stm32_hash_read(hdev, HASH_HWCFGR) & HASH_HWCFG_DMA_MASK;
 
 	/* Register algos */
 	ret = stm32_hash_register_algs(hdev);
@@ -1772,6 +2188,6 @@ static struct platform_driver stm32_hash_driver = {
 
 module_platform_driver(stm32_hash_driver);
 
-MODULE_DESCRIPTION("STM32 SHA1/224/256 & MD5 (HMAC) hw accelerator driver");
+MODULE_DESCRIPTION("STM32 SHA1/SHA2/SHA3 & MD5 (HMAC) hw accelerator driver");
 MODULE_AUTHOR("Lionel Debieve <lionel.debieve@st.com>");
 MODULE_LICENSE("GPL v2");

From 0e99d38ff6ad4baf0c24ec0aca8a01c522ef4dcd Mon Sep 17 00:00:00 2001
From: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Date: Thu, 13 Jul 2023 17:15:14 +0200
Subject: [PATCH 0402/2047] crypto: stm32 - remove bufcnt in
 stm32_hash_write_ctrl.

Commit "crypto: stm32 - Fix empty message processing" remove the use of
the argument bufcnt in stm32_hash_write_ctrl.
Hence, we can remove it from the function prototype and simplify the
function declaration.

Signed-off-by: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32-hash.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index 30e42a402aa7..c179a6c1a457 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -286,7 +286,7 @@ static int stm32_hash_write_key(struct stm32_hash_dev *hdev)
 	return 0;
 }
 
-static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev, int bufcnt)
+static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev)
 {
 	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(hdev->req);
@@ -395,7 +395,7 @@ static int stm32_hash_xmit_cpu(struct stm32_hash_dev *hdev,
 
 	hdev->flags |= HASH_FLAGS_CPU;
 
-	stm32_hash_write_ctrl(hdev, length);
+	stm32_hash_write_ctrl(hdev);
 
 	if (stm32_hash_wait_busy(hdev))
 		return -ETIMEDOUT;
@@ -669,7 +669,7 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev)
 	if (rctx->nents < 0)
 		return -EINVAL;
 
-	stm32_hash_write_ctrl(hdev, rctx->total);
+	stm32_hash_write_ctrl(hdev);
 
 	if (hdev->flags & HASH_FLAGS_HMAC) {
 		err = stm32_hash_hmac_dma_send(hdev);

From d9c83f71eeceed2cb54bb78be84f2d4055fd9a1f Mon Sep 17 00:00:00 2001
From: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Date: Thu, 13 Jul 2023 17:15:15 +0200
Subject: [PATCH 0403/2047] crypto: stm32 - fix loop iterating through
 scatterlist for DMA

We were reading the length of the scatterlist sg after copying value of
tsg inside.
So we are using the size of the previous scatterlist and for the first
one we are using an unitialised value.
Fix this by copying tsg in sg[0] before reading the size.

Fixes : 8a1012d3f2ab ("crypto: stm32 - Support for STM32 HASH module")
Cc: stable@vger.kernel.org
Signed-off-by: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32-hash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index c179a6c1a457..519fb716acee 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -678,9 +678,9 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev)
 	}
 
 	for_each_sg(rctx->sg, tsg, rctx->nents, i) {
+		sg[0] = *tsg;
 		len = sg->length;
 
-		sg[0] = *tsg;
 		if (sg_is_last(sg)) {
 			if (hdev->dma_mode == 1) {
 				len = (ALIGN(sg->length, 16) - 16);

From a10618f397062823a84cd4abedc51f46b9d4410f Mon Sep 17 00:00:00 2001
From: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Date: Thu, 13 Jul 2023 17:15:16 +0200
Subject: [PATCH 0404/2047] crypto: stm32 - check request size and scatterlist
 size when using DMA.

When we are sending the data to HASH with the DMA, we send all the data
provided in the scatterlists of the request.
But in some cases (ex : tcrypt performances tests), we should only send
req->nbytes
When iterating through the scatterlist we verify if it is the last
scatterlist or if the number of bytes sent plus the data of the current
scatterlist is superior of the total number of bytes to hash.

Signed-off-by: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32-hash.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index 519fb716acee..701995a72e57 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -659,8 +659,9 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev)
 	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req);
 	u32 *buffer = (void *)rctx->state.buffer;
 	struct scatterlist sg[1], *tsg;
-	int err = 0, len = 0, reg, ncp = 0;
-	unsigned int i;
+	int err = 0, reg, ncp = 0;
+	unsigned int i, len = 0, bufcnt = 0;
+	bool is_last = false;
 
 	rctx->sg = hdev->req->src;
 	rctx->total = hdev->req->nbytes;
@@ -681,7 +682,9 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev)
 		sg[0] = *tsg;
 		len = sg->length;
 
-		if (sg_is_last(sg)) {
+		if (sg_is_last(sg) || (bufcnt + sg[0].length) >= rctx->total) {
+			sg->length = rctx->total - bufcnt;
+			is_last = true;
 			if (hdev->dma_mode == 1) {
 				len = (ALIGN(sg->length, 16) - 16);
 
@@ -707,13 +710,15 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev)
 			return -ENOMEM;
 		}
 
-		err = stm32_hash_xmit_dma(hdev, sg, len,
-					  !sg_is_last(sg));
+		err = stm32_hash_xmit_dma(hdev, sg, len, !is_last);
 
+		bufcnt += sg[0].length;
 		dma_unmap_sg(hdev->dev, sg, 1, DMA_TO_DEVICE);
 
 		if (err == -ENOMEM)
 			return err;
+		if (is_last)
+			break;
 	}
 
 	if (hdev->dma_mode == 1) {

From a4adfbc2544933ac12e7fbd50708290265546dbc Mon Sep 17 00:00:00 2001
From: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Date: Thu, 13 Jul 2023 17:15:17 +0200
Subject: [PATCH 0405/2047] crypto: stm32 - fix MDMAT condition

If IP has MDMAT support, set or reset the bit MDMAT in Control Register.

Fixes: b56403a25af7 ("crypto: stm32/hash - Support Ux500 hash")
Cc: stable@vger.kernel.org
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32-hash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index 701995a72e57..a48e6a14da2e 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -544,7 +544,7 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev,
 
 	reg = stm32_hash_read(hdev, HASH_CR);
 
-	if (!hdev->pdata->has_mdmat) {
+	if (hdev->pdata->has_mdmat) {
 		if (mdma)
 			reg |= HASH_CR_MDMAT;
 		else

From 1e3b2e805587ab5aa3ecdfe6339ad120ceaef7d3 Mon Sep 17 00:00:00 2001
From: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Date: Thu, 13 Jul 2023 17:15:18 +0200
Subject: [PATCH 0406/2047] crypto: stm32 - remove flag HASH_FLAGS_DMA_READY

Remove flag HASH_FLAGS_DMA_READY as it can put the driver in a deadlock
state.
If the DMA automatically set the DCAL bit, the interrupt indicating the
end of a computation can be raised before the DMA complete sequence.

Signed-off-by: Thomas Bourgoin <thomas.bourgoin@foss.st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32-hash.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index a48e6a14da2e..88a186c3dd78 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -84,12 +84,11 @@
 #define HASH_FLAGS_INIT			BIT(0)
 #define HASH_FLAGS_OUTPUT_READY		BIT(1)
 #define HASH_FLAGS_CPU			BIT(2)
-#define HASH_FLAGS_DMA_READY		BIT(3)
-#define HASH_FLAGS_DMA_ACTIVE		BIT(4)
-#define HASH_FLAGS_HMAC_INIT		BIT(5)
-#define HASH_FLAGS_HMAC_FINAL		BIT(6)
-#define HASH_FLAGS_HMAC_KEY		BIT(7)
-#define HASH_FLAGS_SHA3_MODE		BIT(8)
+#define HASH_FLAGS_DMA_ACTIVE		BIT(3)
+#define HASH_FLAGS_HMAC_INIT		BIT(4)
+#define HASH_FLAGS_HMAC_FINAL		BIT(5)
+#define HASH_FLAGS_HMAC_KEY		BIT(6)
+#define HASH_FLAGS_SHA3_MODE		BIT(7)
 #define HASH_FLAGS_FINAL		BIT(15)
 #define HASH_FLAGS_FINUP		BIT(16)
 #define HASH_FLAGS_ALGO_MASK		GENMASK(20, 17)
@@ -585,8 +584,6 @@ static void stm32_hash_dma_callback(void *param)
 	struct stm32_hash_dev *hdev = param;
 
 	complete(&hdev->dma_completion);
-
-	hdev->flags |= HASH_FLAGS_DMA_READY;
 }
 
 static int stm32_hash_hmac_dma_send(struct stm32_hash_dev *hdev)
@@ -1241,11 +1238,9 @@ static irqreturn_t stm32_hash_irq_thread(int irq, void *dev_id)
 			hdev->flags &= ~HASH_FLAGS_OUTPUT_READY;
 			goto finish;
 		}
-	} else if (HASH_FLAGS_DMA_READY & hdev->flags) {
-		if (HASH_FLAGS_DMA_ACTIVE & hdev->flags) {
-			hdev->flags &= ~HASH_FLAGS_DMA_ACTIVE;
-				goto finish;
-		}
+	} else if (HASH_FLAGS_DMA_ACTIVE & hdev->flags) {
+		hdev->flags &= ~HASH_FLAGS_DMA_ACTIVE;
+			goto finish;
 	}
 
 	return IRQ_HANDLED;

From 5cd4ed98cfb743cd8f363ccc674313c14c17acd5 Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Fri, 14 Jul 2023 19:41:35 +0800
Subject: [PATCH 0407/2047] crypto: hisilicon/qm - flush all work before driver
 removed

Before removing the driver, flush inter-function communication
work, and subsequent communication work is not processed.
This prevents communication threads from accessing released memory.

Fixes: ("crypto: hisilicon/qm - enable PF and VFs communication")
Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index edc6fd44e7ca..81c21ca40375 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -954,6 +954,11 @@ static irqreturn_t qm_mb_cmd_irq(int irq, void *data)
 	if (!val)
 		return IRQ_NONE;
 
+	if (test_bit(QM_DRIVER_REMOVING, &qm->misc_ctl)) {
+		dev_warn(&qm->pdev->dev, "Driver is down, message cannot be processed!\n");
+		return IRQ_HANDLED;
+	}
+
 	schedule_work(&qm->cmd_process);
 
 	return IRQ_HANDLED;
@@ -2743,6 +2748,9 @@ void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
 	       test_bit(QM_RESETTING, &qm->misc_ctl))
 		msleep(WAIT_PERIOD);
 
+	if (test_bit(QM_SUPPORT_MB_COMMAND, &qm->caps))
+		flush_work(&qm->cmd_process);
+
 	udelay(REMOVE_WAIT_DELAY);
 }
 EXPORT_SYMBOL_GPL(hisi_qm_wait_task_finish);

From 4b3ee3ff2dd66b45dd5ec374a95af06eb26d35ac Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Fri, 14 Jul 2023 19:41:36 +0800
Subject: [PATCH 0408/2047] crypto: hisilicon/qm - stop function and write data
 to memory

When the system is shut down, the process is killed, but the
accelerator device does not stop executing the tasks. If the
accelerator device still accesses the memory and writes back data
to the memory after the memory is reclaimed by the system,
an NFE error may occur. Therefore, before the system is shut
down, the driver needs to stop the device and write data back
to the memory.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 14 ++++++++------
 include/linux/hisi_acc_qm.h   |  2 +-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 81c21ca40375..fdff87c09aaf 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -1002,7 +1002,7 @@ static void qm_reset_function(struct hisi_qm *qm)
 		return;
 	}
 
-	ret = hisi_qm_stop(qm, QM_FLR);
+	ret = hisi_qm_stop(qm, QM_DOWN);
 	if (ret) {
 		dev_err(dev, "failed to stop qm when reset function\n");
 		goto clear_bit;
@@ -3251,7 +3251,7 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r)
 	}
 
 	if (qm->status.stop_reason == QM_SOFT_RESET ||
-	    qm->status.stop_reason == QM_FLR) {
+	    qm->status.stop_reason == QM_DOWN) {
 		hisi_qm_set_hw_reset(qm, QM_RESET_STOP_TX_OFFSET);
 		ret = qm_stop_started_qp(qm);
 		if (ret < 0) {
@@ -4547,11 +4547,11 @@ void hisi_qm_reset_prepare(struct pci_dev *pdev)
 	if (qm->fun_type == QM_HW_PF)
 		qm_cmd_uninit(qm);
 
-	ret = qm_try_stop_vfs(qm, QM_PF_FLR_PREPARE, QM_FLR);
+	ret = qm_try_stop_vfs(qm, QM_PF_FLR_PREPARE, QM_DOWN);
 	if (ret)
 		pci_err(pdev, "failed to stop vfs by pf in FLR.\n");
 
-	ret = hisi_qm_stop(qm, QM_FLR);
+	ret = hisi_qm_stop(qm, QM_DOWN);
 	if (ret) {
 		pci_err(pdev, "Failed to stop QM, ret = %d.\n", ret);
 		hisi_qm_set_hw_reset(qm, QM_RESET_STOP_TX_OFFSET);
@@ -4649,9 +4649,11 @@ void hisi_qm_dev_shutdown(struct pci_dev *pdev)
 	struct hisi_qm *qm = pci_get_drvdata(pdev);
 	int ret;
 
-	ret = hisi_qm_stop(qm, QM_NORMAL);
+	ret = hisi_qm_stop(qm, QM_DOWN);
 	if (ret)
 		dev_err(&pdev->dev, "Fail to stop qm in shutdown!\n");
+
+	hisi_qm_cache_wb(qm);
 }
 EXPORT_SYMBOL_GPL(hisi_qm_dev_shutdown);
 
@@ -4815,7 +4817,7 @@ static void qm_handle_cmd_msg(struct hisi_qm *qm, u32 fun_num)
 	cmd = msg & QM_MB_CMD_DATA_MASK;
 	switch (cmd) {
 	case QM_PF_FLR_PREPARE:
-		qm_pf_reset_vf_process(qm, QM_FLR);
+		qm_pf_reset_vf_process(qm, QM_DOWN);
 		break;
 	case QM_PF_SRST_PREPARE:
 		qm_pf_reset_vf_process(qm, QM_SOFT_RESET);
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index a7d54d4d41fd..39fbfb4be944 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -104,7 +104,7 @@
 enum qm_stop_reason {
 	QM_NORMAL,
 	QM_SOFT_RESET,
-	QM_FLR,
+	QM_DOWN,
 };
 
 enum qm_state {

From b925a0cc87a1b950bc87ebf869e6d2dff0839e5f Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Fri, 14 Jul 2023 19:41:37 +0800
Subject: [PATCH 0409/2047] crypto: hisilicon/qm - increase device doorbell
 timeout

When both the accelerator device and SMMU are busy,
the processing time of the doorbell may be prolonged.
As a result, the doorbell may timeout, especially in the sva
scenario. Therefore, the doorbell timeout is increased.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index fdff87c09aaf..a99fd589445c 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -88,6 +88,8 @@
 #define QM_DB_PRIORITY_SHIFT_V1		48
 #define QM_PAGE_SIZE			0x0034
 #define QM_QP_DB_INTERVAL		0x10000
+#define QM_DB_TIMEOUT_CFG		0x100074
+#define QM_DB_TIMEOUT_SET		0x1fffff
 
 #define QM_MEM_START_INIT		0x100040
 #define QM_MEM_INIT_DONE		0x100044
@@ -5381,6 +5383,8 @@ int hisi_qm_init(struct hisi_qm *qm)
 		goto err_pci_init;
 
 	if (qm->fun_type == QM_HW_PF) {
+		/* Set the doorbell timeout to QM_DB_TIMEOUT_CFG ns. */
+		writel(QM_DB_TIMEOUT_SET, qm->io_base + QM_DB_TIMEOUT_CFG);
 		qm_disable_clock_gate(qm);
 		ret = qm_dev_mem_reset(qm);
 		if (ret) {
@@ -5548,6 +5552,8 @@ static int qm_rebuild_for_resume(struct hisi_qm *qm)
 
 	qm_cmd_init(qm);
 	hisi_qm_dev_err_init(qm);
+	/* Set the doorbell timeout to QM_DB_TIMEOUT_CFG ns. */
+	writel(QM_DB_TIMEOUT_SET, qm->io_base + QM_DB_TIMEOUT_CFG);
 	qm_disable_clock_gate(qm);
 	ret = qm_dev_mem_reset(qm);
 	if (ret)

From 391dde6e48ff84687395a0a4e84f7e1540301e4e Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Fri, 14 Jul 2023 19:41:38 +0800
Subject: [PATCH 0410/2047] crypto: hisilicon/hpre - enable sva error interrupt
 event

Enable sva error interrupt event. When an error occurs on
the sva module, the device reports an abnormal interrupt to
the driver.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 5d0adfb54a34..39297ce70f44 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -209,7 +209,7 @@ static const struct hisi_qm_cap_info hpre_basic_info[] = {
 	{HPRE_QM_RESET_MASK_CAP, 0x3128, 0, GENMASK(31, 0), 0x0, 0xC37, 0x6C37},
 	{HPRE_QM_OOO_SHUTDOWN_MASK_CAP, 0x3128, 0, GENMASK(31, 0), 0x0, 0x4, 0x6C37},
 	{HPRE_QM_CE_MASK_CAP, 0x312C, 0, GENMASK(31, 0), 0x0, 0x8, 0x8},
-	{HPRE_NFE_MASK_CAP, 0x3130, 0, GENMASK(31, 0), 0x0, 0x3FFFFE, 0xFFFFFE},
+	{HPRE_NFE_MASK_CAP, 0x3130, 0, GENMASK(31, 0), 0x0, 0x3FFFFE, 0x1FFFFFE},
 	{HPRE_RESET_MASK_CAP, 0x3134, 0, GENMASK(31, 0), 0x0, 0x3FFFFE, 0xBFFFFE},
 	{HPRE_OOO_SHUTDOWN_MASK_CAP, 0x3134, 0, GENMASK(31, 0), 0x0, 0x22, 0xBFFFFE},
 	{HPRE_CE_MASK_CAP, 0x3138, 0, GENMASK(31, 0), 0x0, 0x1, 0x1},
@@ -275,6 +275,9 @@ static const struct hpre_hw_error hpre_hw_errors[] = {
 	}, {
 		.int_msk = BIT(23),
 		.msg = "sva_fsm_timeout_int_set"
+	}, {
+		.int_msk = BIT(24),
+		.msg = "sva_int_set"
 	}, {
 		/* sentinel */
 	}

From 28b776098379fb698702b972df159aeca30aa6e3 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 14 Jul 2023 22:01:04 -0700
Subject: [PATCH 0411/2047] crypto: x86/aesni - remove unused parameter to
 aes_set_key_common()

The 'tfm' parameter to aes_set_key_common() is never used, so remove it.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aesni-intel_glue.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index c4eea7e746e7..39d6a62ac627 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -229,8 +229,7 @@ static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
 	return (struct crypto_aes_ctx *)ALIGN(addr, align);
 }
 
-static int aes_set_key_common(struct crypto_tfm *tfm,
-			      struct crypto_aes_ctx *ctx,
+static int aes_set_key_common(struct crypto_aes_ctx *ctx,
 			      const u8 *in_key, unsigned int key_len)
 {
 	int err;
@@ -253,7 +252,8 @@ static int aes_set_key_common(struct crypto_tfm *tfm,
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		       unsigned int key_len)
 {
-	return aes_set_key_common(tfm, aes_ctx(crypto_tfm_ctx(tfm)), in_key, key_len);
+	return aes_set_key_common(aes_ctx(crypto_tfm_ctx(tfm)), in_key,
+				  key_len);
 }
 
 static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
@@ -285,8 +285,7 @@ static void aesni_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			         unsigned int len)
 {
-	return aes_set_key_common(crypto_skcipher_tfm(tfm),
-				  aes_ctx(crypto_skcipher_ctx(tfm)), key, len);
+	return aes_set_key_common(aes_ctx(crypto_skcipher_ctx(tfm)), key, len);
 }
 
 static int ecb_encrypt(struct skcipher_request *req)
@@ -627,8 +626,7 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
 
 	memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce));
 
-	return aes_set_key_common(crypto_aead_tfm(aead),
-				  &ctx->aes_key_expanded, key, key_len) ?:
+	return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?:
 	       rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
 }
 
@@ -893,14 +891,13 @@ static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	keylen /= 2;
 
 	/* first half of xts-key is for crypt */
-	err = aes_set_key_common(crypto_skcipher_tfm(tfm), aes_ctx(ctx->raw_crypt_ctx),
-				 key, keylen);
+	err = aes_set_key_common(aes_ctx(ctx->raw_crypt_ctx), key, keylen);
 	if (err)
 		return err;
 
 	/* second half of xts-key is for tweak */
-	return aes_set_key_common(crypto_skcipher_tfm(tfm), aes_ctx(ctx->raw_tweak_ctx),
-				  key + keylen, keylen);
+	return aes_set_key_common(aes_ctx(ctx->raw_tweak_ctx), key + keylen,
+				  keylen);
 }
 
 static int xts_crypt(struct skcipher_request *req, bool encrypt)
@@ -1150,8 +1147,7 @@ static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
 {
 	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead);
 
-	return aes_set_key_common(crypto_aead_tfm(aead),
-				  &ctx->aes_key_expanded, key, key_len) ?:
+	return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?:
 	       rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
 }
 

From dd105461ad15ea930d88aec1e4fcfc1f3186da43 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 16 Jul 2023 16:11:12 +0200
Subject: [PATCH 0412/2047] hwrng: arm-smccc-trng - don't set drvdata

Don't set drvdata, there's nobody who reads it.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/arm_smccc_trng.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/char/hw_random/arm_smccc_trng.c b/drivers/char/hw_random/arm_smccc_trng.c
index e34c3ea692b6..7e954341b09f 100644
--- a/drivers/char/hw_random/arm_smccc_trng.c
+++ b/drivers/char/hw_random/arm_smccc_trng.c
@@ -105,8 +105,6 @@ static int smccc_trng_probe(struct platform_device *pdev)
 	trng->name = "smccc_trng";
 	trng->read = smccc_trng_read;
 
-	platform_set_drvdata(pdev, trng);
-
 	return devm_hwrng_register(&pdev->dev, trng);
 }
 

From 9027f2e79788ddee716674ad9af3cdc076de18cc Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 14 Jul 2023 11:44:25 -0600
Subject: [PATCH 0413/2047] PM / devfreq: Explicitly include correct DT
 includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/imx-bus.c         | 2 +-
 drivers/devfreq/imx8m-ddrc.c      | 2 +-
 drivers/devfreq/mtk-cci-devfreq.c | 1 -
 drivers/devfreq/tegra30-devfreq.c | 2 +-
 4 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/devfreq/imx-bus.c b/drivers/devfreq/imx-bus.c
index a727067980fb..86850b7dea09 100644
--- a/drivers/devfreq/imx-bus.c
+++ b/drivers/devfreq/imx-bus.c
@@ -7,7 +7,7 @@
 #include <linux/devfreq.h>
 #include <linux/device.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/pm_opp.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
diff --git a/drivers/devfreq/imx8m-ddrc.c b/drivers/devfreq/imx8m-ddrc.c
index 16636973eb10..e1348490c8aa 100644
--- a/drivers/devfreq/imx8m-ddrc.c
+++ b/drivers/devfreq/imx8m-ddrc.c
@@ -3,9 +3,9 @@
  * Copyright 2019 NXP
  */
 
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/device.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/devfreq.h>
 #include <linux/pm_opp.h>
diff --git a/drivers/devfreq/mtk-cci-devfreq.c b/drivers/devfreq/mtk-cci-devfreq.c
index 6354622eda65..83a73f0ccd80 100644
--- a/drivers/devfreq/mtk-cci-devfreq.c
+++ b/drivers/devfreq/mtk-cci-devfreq.c
@@ -8,7 +8,6 @@
 #include <linux/minmax.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/regulator/consumer.h>
diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
index 503376b894b6..4a4f0106ab9d 100644
--- a/drivers/devfreq/tegra30-devfreq.c
+++ b/drivers/devfreq/tegra30-devfreq.c
@@ -13,7 +13,7 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/reset.h>

From c46de2fb4ad9e2e5d9041953e87d71ee762fc160 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Thu, 20 Jul 2023 14:18:54 +0530
Subject: [PATCH 0414/2047] PM / devfreq: Reword the kernel-doc comment for
 devfreq_monitor_start() API

Current kernel-doc comment doesn't specify the default timer used for the
load monitoring. Also, it uses the term "default delayed work" which could
be misunderstood as "default delayer timer". So reword the comment to
clearly specify the default timer and also reword the last sentence to make
it more understandable.

Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/devfreq.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index e36cbb920ec8..db50c93c0ac3 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -472,10 +472,11 @@ static void devfreq_monitor(struct work_struct *work)
  * devfreq_monitor_start() - Start load monitoring of devfreq instance
  * @devfreq:	the devfreq instance.
  *
- * Helper function for starting devfreq device load monitoring. By
- * default delayed work based monitoring is supported. Function
- * to be called from governor in response to DEVFREQ_GOV_START
- * event when device is added to devfreq framework.
+ * Helper function for starting devfreq device load monitoring. By default,
+ * deferrable timer is used for load monitoring. But the users can change this
+ * behavior using the "timer" type in devfreq_dev_profile. This function will be
+ * called by devfreq governor in response to the DEVFREQ_GOV_START event
+ * generated while adding a device to the devfreq framework.
  */
 void devfreq_monitor_start(struct devfreq *devfreq)
 {

From ffb57ee9c0e2a9aae89016ee7b793a666c6aa4a5 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 21 Jul 2023 19:25:43 +0200
Subject: [PATCH 0415/2047] regmap: Fix the type used for a bitmap pointer

Bitmaps should be defined as 'unsigned long', not 'long'.
Fix the type of 'cache_present' is the 'struct regcache_rbtree_node'.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/5600df5003d23da10efcfafbda97ca55776d0d29.1689960321.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regcache-rbtree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c
index 06788965aa29..db716ffd083e 100644
--- a/drivers/base/regmap/regcache-rbtree.c
+++ b/drivers/base/regmap/regcache-rbtree.c
@@ -22,7 +22,7 @@ struct regcache_rbtree_node {
 	/* block of adjacent registers */
 	void *block;
 	/* Which registers are present */
-	long *cache_present;
+	unsigned long *cache_present;
 	/* base register handled by this block */
 	unsigned int base_reg;
 	/* number of registers available in the block */

From 005e8dddd4978f6243820d031987056a1a88a2dd Mon Sep 17 00:00:00 2001
From: Brian Geffon <bgeffon@google.com>
Date: Fri, 14 Jul 2023 13:55:05 -0400
Subject: [PATCH 0416/2047] PM: hibernate: don't store zero pages in the image
 file

On ChromeOS we've observed a considerable number of in-use pages filled with
zeros. Today with hibernate it's entirely possible that saveable pages are just
zero filled. Since we're already copying pages word-by-word in do_copy_page it
becomes almost free to determine if a page was completely filled with zeros.

This change introduces a new bitmap which will track these zero pages. If a page
is zero it will not be included in the saved image, instead to track these zero
pages in the image file we will introduce a new flag which we will set on the
packed PFN list. When reading back in the image file we will detect these zero
page PFNs and rebuild the zero page bitmap.

When the image is being loaded through calls to write_next_page if we encounter
a zero page we will silently memset it to 0 and then continue on to the next
page. Given the implementation in snapshot_read_next/snapshot_write_next this
change  will be transparent to non-compressed/compressed and swsusp modes of
operation.

To provide some concrete numbers from simple ad-hoc testing, on a device which
was lightly in use we saw that:

PM: hibernation: Image created (964408 pages copied, 548304 zero pages)

Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero filled
and could be tracked entirely within the packed PFN list. The savings would
obviously be much lower for lzo compressed images, but even in the case of
compression not copying pages across to the compression threads will still
speed things up. It's also possible that we would see better overall compression
ratios as larger regions of "real data" would improve the compressibility.

Finally, such an approach could dramatically improve swsusp performance
as each one of those zero pages requires a write syscall to reload, by
handling it as part of the packed PFN list we're able to fully avoid
that.

Signed-off-by: Brian Geffon <bgeffon@google.com>
[ rjw: Whitespace adjustments, removal of redundant parentheses ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/snapshot.c | 187 ++++++++++++++++++++++++++++++++--------
 1 file changed, 149 insertions(+), 38 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 0415d5ecb977..87e9f7e2bdc0 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -404,6 +404,7 @@ struct bm_position {
 	struct mem_zone_bm_rtree *zone;
 	struct rtree_node *node;
 	unsigned long node_pfn;
+	unsigned long cur_pfn;
 	int node_bit;
 };
 
@@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
 	bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 				  struct rtree_node, list);
 	bm->cur.node_pfn = 0;
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	bm->cur.node_bit = 0;
 }
 
@@ -799,6 +801,7 @@ node_found:
 	bm->cur.zone = zone;
 	bm->cur.node = node;
 	bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
+	bm->cur.cur_pfn = pfn;
 
 	/* Set return values */
 	*addr = node->data;
@@ -850,6 +853,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
 	clear_bit(bit, bm->cur.node->data);
 }
 
+static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
+{
+	return bm->cur.cur_pfn;
+}
+
 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
 	void *addr;
@@ -929,10 +937,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 		if (bit < bits) {
 			pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
 			bm->cur.node_bit = bit + 1;
+			bm->cur.cur_pfn = pfn;
 			return pfn;
 		}
 	} while (rtree_next_node(bm));
 
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	return BM_END_OF_MAP;
 }
 
@@ -1423,14 +1433,19 @@ static unsigned int count_data_pages(void)
 
 /*
  * This is needed, because copy_page and memcpy are not usable for copying
- * task structs.
+ * task structs. Returns true if the page was filled with only zeros,
+ * otherwise false.
  */
-static inline void do_copy_page(long *dst, long *src)
+static inline bool do_copy_page(long *dst, long *src)
 {
+	long z = 0;
 	int n;
 
-	for (n = PAGE_SIZE / sizeof(long); n; n--)
+	for (n = PAGE_SIZE / sizeof(long); n; n--) {
+		z |= *src;
 		*dst++ = *src++;
+	}
+	return !z;
 }
 
 /**
@@ -1439,17 +1454,21 @@ static inline void do_copy_page(long *dst, long *src)
  * Check if the page we are going to copy is marked as present in the kernel
  * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
  * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
- * always returns 'true'.
+ * always returns 'true'. Returns true if the page was entirely composed of
+ * zeros, otherwise it will return false.
  */
-static void safe_copy_page(void *dst, struct page *s_page)
+static bool safe_copy_page(void *dst, struct page *s_page)
 {
+	bool zeros_only;
+
 	if (kernel_page_present(s_page)) {
-		do_copy_page(dst, page_address(s_page));
+		zeros_only = do_copy_page(dst, page_address(s_page));
 	} else {
 		hibernate_map_page(s_page);
-		do_copy_page(dst, page_address(s_page));
+		zeros_only = do_copy_page(dst, page_address(s_page));
 		hibernate_unmap_page(s_page);
 	}
+	return zeros_only;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -1459,17 +1478,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
 		saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 }
 
-static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static bool copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
 	struct page *s_page, *d_page;
 	void *src, *dst;
+	bool zeros_only;
 
 	s_page = pfn_to_page(src_pfn);
 	d_page = pfn_to_page(dst_pfn);
 	if (PageHighMem(s_page)) {
 		src = kmap_atomic(s_page);
 		dst = kmap_atomic(d_page);
-		do_copy_page(dst, src);
+		zeros_only = do_copy_page(dst, src);
 		kunmap_atomic(dst);
 		kunmap_atomic(src);
 	} else {
@@ -1478,30 +1498,39 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 			 * The page pointed to by src may contain some kernel
 			 * data modified by kmap_atomic()
 			 */
-			safe_copy_page(buffer, s_page);
+			zeros_only = safe_copy_page(buffer, s_page);
 			dst = kmap_atomic(d_page);
 			copy_page(dst, buffer);
 			kunmap_atomic(dst);
 		} else {
-			safe_copy_page(page_address(d_page), s_page);
+			zeros_only = safe_copy_page(page_address(d_page), s_page);
 		}
 	}
+	return zeros_only;
 }
 #else
 #define page_is_saveable(zone, pfn)	saveable_page(zone, pfn)
 
-static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
-	safe_copy_page(page_address(pfn_to_page(dst_pfn)),
+	return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
 				pfn_to_page(src_pfn));
 }
 #endif /* CONFIG_HIGHMEM */
 
-static void copy_data_pages(struct memory_bitmap *copy_bm,
-			    struct memory_bitmap *orig_bm)
+/*
+ * Copy data pages will copy all pages into pages pulled from the copy_bm.
+ * If a page was entirely filled with zeros it will be marked in the zero_bm.
+ *
+ * Returns the number of pages copied.
+ */
+static unsigned long copy_data_pages(struct memory_bitmap *copy_bm,
+			    struct memory_bitmap *orig_bm,
+			    struct memory_bitmap *zero_bm)
 {
+	unsigned long copied_pages = 0;
 	struct zone *zone;
-	unsigned long pfn;
+	unsigned long pfn, copy_pfn;
 
 	for_each_populated_zone(zone) {
 		unsigned long max_zone_pfn;
@@ -1514,18 +1543,29 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
 	}
 	memory_bm_position_reset(orig_bm);
 	memory_bm_position_reset(copy_bm);
+	copy_pfn = memory_bm_next_pfn(copy_bm);
 	for(;;) {
 		pfn = memory_bm_next_pfn(orig_bm);
 		if (unlikely(pfn == BM_END_OF_MAP))
 			break;
-		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
+		if (copy_data_page(copy_pfn, pfn)) {
+			memory_bm_set_bit(zero_bm, pfn);
+			/* Use this copy_pfn for a page that is not full of zeros */
+			continue;
+		}
+		copied_pages++;
+		copy_pfn = memory_bm_next_pfn(copy_bm);
 	}
+	return copied_pages;
 }
 
 /* Total number of image pages */
 static unsigned int nr_copy_pages;
 /* Number of pages needed for saving the original pfns of the image pages */
 static unsigned int nr_meta_pages;
+/* Number of zero pages */
+static unsigned int nr_zero_pages;
+
 /*
  * Numbers of normal and highmem page frames allocated for hibernation image
  * before suspending devices.
@@ -1546,6 +1586,9 @@ static struct memory_bitmap orig_bm;
  */
 static struct memory_bitmap copy_bm;
 
+/* Memory bitmap which tracks which saveable pages were zero filled. */
+static struct memory_bitmap zero_bm;
+
 /**
  * swsusp_free - Free pages allocated for hibernation image.
  *
@@ -1590,6 +1633,7 @@ loop:
 out:
 	nr_copy_pages = 0;
 	nr_meta_pages = 0;
+	nr_zero_pages = 0;
 	restore_pblist = NULL;
 	buffer = NULL;
 	alloc_normal = 0;
@@ -1808,8 +1852,15 @@ int hibernate_preallocate_memory(void)
 		goto err_out;
 	}
 
+	error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
+	if (error) {
+		pr_err("Cannot allocate zero bitmap\n");
+		goto err_out;
+	}
+
 	alloc_normal = 0;
 	alloc_highmem = 0;
+	nr_zero_pages = 0;
 
 	/* Count the number of saveable data pages. */
 	save_highmem = count_highmem_pages();
@@ -2089,19 +2140,19 @@ asmlinkage __visible int swsusp_save(void)
 	 * Kill them.
 	 */
 	drain_local_pages(NULL);
-	copy_data_pages(&copy_bm, &orig_bm);
+	nr_copy_pages = copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
 
 	/*
 	 * End of critical section. From now on, we can write to memory,
 	 * but we should not touch disk. This specially means we must _not_
 	 * touch swap space! Except we must write out our image of course.
 	 */
-
 	nr_pages += nr_highmem;
-	nr_copy_pages = nr_pages;
+	/* We don't actually copy the zero pages */
+	nr_zero_pages = nr_pages - nr_copy_pages;
 	nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
 
-	pr_info("Image created (%d pages copied)\n", nr_pages);
+	pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
 
 	return 0;
 }
@@ -2146,15 +2197,22 @@ static int init_header(struct swsusp_info *info)
 	return init_header_complete(info);
 }
 
+#define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1))
+#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
+
 /**
  * pack_pfns - Prepare PFNs for saving.
  * @bm: Memory bitmap.
  * @buf: Memory buffer to store the PFNs in.
+ * @zero_bm: Memory bitmap containing PFNs of zero pages.
  *
  * PFNs corresponding to set bits in @bm are stored in the area of memory
- * pointed to by @buf (1 page at a time).
+ * pointed to by @buf (1 page at a time). Pages which were filled with only
+ * zeros will have the highest bit set in the packed format to distinguish
+ * them from PFNs which will be contained in the image file.
  */
-static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	int j;
 
@@ -2162,6 +2220,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 		buf[j] = memory_bm_next_pfn(bm);
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
+		if (memory_bm_test_bit(zero_bm, buf[j]))
+			buf[j] |= ENCODED_PFN_ZERO_FLAG;
 	}
 }
 
@@ -2203,7 +2263,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
 		memory_bm_position_reset(&copy_bm);
 	} else if (handle->cur <= nr_meta_pages) {
 		clear_page(buffer);
-		pack_pfns(buffer, &orig_bm);
+		pack_pfns(buffer, &orig_bm, &zero_bm);
 	} else {
 		struct page *page;
 
@@ -2299,24 +2359,35 @@ static int load_header(struct swsusp_info *info)
  * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
  * @bm: Memory bitmap.
  * @buf: Area of memory containing the PFNs.
+ * @zero_bm: Memory bitmap with the zero PFNs marked.
  *
  * For each element of the array pointed to by @buf (1 page at a time), set the
- * corresponding bit in @bm.
+ * corresponding bit in @bm. If the page was originally populated with only
+ * zeros then a corresponding bit will also be set in @zero_bm.
  */
-static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
+	unsigned long decoded_pfn;
+        bool zero;
 	int j;
 
 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
 
-		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
-			memory_bm_set_bit(bm, buf[j]);
+		zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
+		decoded_pfn = buf[j] & ENCODED_PFN_MASK;
+		if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
+			memory_bm_set_bit(bm, decoded_pfn);
+			if (zero) {
+				memory_bm_set_bit(zero_bm, decoded_pfn);
+				nr_zero_pages++;
+			}
 		} else {
-			if (!pfn_valid(buf[j]))
+			if (!pfn_valid(decoded_pfn))
 				pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
-				       (unsigned long long)PFN_PHYS(buf[j]));
+				       (unsigned long long)PFN_PHYS(decoded_pfn));
 			return -EFAULT;
 		}
 	}
@@ -2538,6 +2609,7 @@ static inline void free_highmem_data(void) {}
  * prepare_image - Make room for loading hibernation image.
  * @new_bm: Uninitialized memory bitmap structure.
  * @bm: Memory bitmap with unsafe pages marked.
+ * @zero_bm: Memory bitmap containing the zero pages.
  *
  * Use @bm to mark the pages that will be overwritten in the process of
  * restoring the system memory state from the suspend image ("unsafe" pages)
@@ -2548,10 +2620,15 @@ static inline void free_highmem_data(void) {}
  * pages will be used for just yet.  Instead, we mark them all as allocated and
  * create a lists of "safe" pages to be used later.  On systems with high
  * memory a list of "safe" highmem pages is created too.
+ *
+ * Because it was not known which pages were unsafe when @zero_bm was created,
+ * make a copy of it and recreate it within safe pages.
  */
-static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
+static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	unsigned int nr_pages, nr_highmem;
+	struct memory_bitmap tmp;
 	struct linked_page *lp;
 	int error;
 
@@ -2568,6 +2645,24 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 
 	duplicate_memory_bitmap(new_bm, bm);
 	memory_bm_free(bm, PG_UNSAFE_KEEP);
+
+	/* Make a copy of zero_bm so it can be created in safe pages */
+	error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY);
+	if (error)
+		goto Free;
+
+	duplicate_memory_bitmap(&tmp, zero_bm);
+	memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
+
+	/* Recreate zero_bm in safe pages */
+	error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
+	if (error)
+		goto Free;
+
+	duplicate_memory_bitmap(zero_bm, &tmp);
+	memory_bm_free(&tmp, PG_UNSAFE_KEEP);
+	/* At this point zero_bm is in safe pages and it can be used for restoring. */
+
 	if (nr_highmem > 0) {
 		error = prepare_highmem_image(bm, &nr_highmem);
 		if (error)
@@ -2582,7 +2677,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 	 *
 	 * nr_copy_pages cannot be less than allocated_unsafe_pages too.
 	 */
-	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+	nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
 	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
 	while (nr_pages > 0) {
 		lp = get_image_page(GFP_ATOMIC, PG_SAFE);
@@ -2595,7 +2690,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 		nr_pages--;
 	}
 	/* Preallocate memory for the image */
-	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+	nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
 	while (nr_pages > 0) {
 		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
 		if (!lp) {
@@ -2683,8 +2778,9 @@ int snapshot_write_next(struct snapshot_handle *handle)
 	static struct chain_allocator ca;
 	int error = 0;
 
+next:
 	/* Check if we have already loaded the entire image */
-	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
+	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages)
 		return 0;
 
 	handle->sync_read = 1;
@@ -2709,19 +2805,26 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		if (error)
 			return error;
 
+		error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
+		if (error)
+			return error;
+
+		nr_zero_pages = 0;
+
 		hibernate_restore_protection_begin();
 	} else if (handle->cur <= nr_meta_pages + 1) {
-		error = unpack_orig_pfns(buffer, &copy_bm);
+		error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
 		if (error)
 			return error;
 
 		if (handle->cur == nr_meta_pages + 1) {
-			error = prepare_image(&orig_bm, &copy_bm);
+			error = prepare_image(&orig_bm, &copy_bm, &zero_bm);
 			if (error)
 				return error;
 
 			chain_init(&ca, GFP_ATOMIC, PG_SAFE);
 			memory_bm_position_reset(&orig_bm);
+			memory_bm_position_reset(&zero_bm);
 			restore_pblist = NULL;
 			handle->buffer = get_buffer(&orig_bm, &ca);
 			handle->sync_read = 0;
@@ -2738,6 +2841,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
 			handle->sync_read = 0;
 	}
 	handle->cur++;
+
+	/* Zero pages were not included in the image, memset it and move on. */
+	if (handle->cur > nr_meta_pages + 1 &&
+	    memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
+		memset(handle->buffer, 0, PAGE_SIZE);
+		goto next;
+	}
+
 	return PAGE_SIZE;
 }
 
@@ -2754,7 +2865,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
 	copy_last_highmem_page();
 	hibernate_restore_protect_page(handle->buffer);
 	/* Do that only if we have loaded the image entirely */
-	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
+	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) {
 		memory_bm_recycle(&orig_bm);
 		free_highmem_data();
 	}
@@ -2763,7 +2874,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
 int snapshot_image_loaded(struct snapshot_handle *handle)
 {
 	return !(!nr_copy_pages || !last_highmem_page_copied() ||
-			handle->cur <= nr_meta_pages + nr_copy_pages);
+			handle->cur <= nr_meta_pages + nr_copy_pages + nr_zero_pages);
 }
 
 #ifdef CONFIG_HIGHMEM

From 5f756d03e2c7db63c1df7148d7b1739f29ff1532 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Thu, 20 Jul 2023 11:10:53 +0530
Subject: [PATCH 0417/2047] OPP: Introduce dev_pm_opp_get_freq_indexed() API

In the case of devices with multiple clocks, drivers need to specify the
frequency index for the OPP framework to get the specific frequency within
the required OPP. So let's introduce the dev_pm_opp_get_freq_indexed() API
accepting the frequency index as an argument.

Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
[ Viresh: Fixed potential access to NULL opp pointer ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c     | 20 ++++++++++++++++++++
 include/linux/pm_opp.h |  7 +++++++
 2 files changed, 27 insertions(+)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index cb4f47b341f9..07341050d57f 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -197,6 +197,26 @@ unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq);
 
+/**
+ * dev_pm_opp_get_freq_indexed() - Gets the frequency corresponding to an
+ *				   available opp with specified index
+ * @opp: opp for which frequency has to be returned for
+ * @index: index of the frequency within the required opp
+ *
+ * Return: frequency in hertz corresponding to the opp with specified index,
+ * else return 0
+ */
+unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index)
+{
+	if (IS_ERR_OR_NULL(opp) || index >= opp->opp_table->clk_count) {
+		pr_err("%s: Invalid parameters\n", __func__);
+		return 0;
+	}
+
+	return opp->rates[index];
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq_indexed);
+
 /**
  * dev_pm_opp_get_level() - Gets the level corresponding to an available opp
  * @opp:	opp for which level value has to be returned for
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 2617f2c51f29..a13a1705df57 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -105,6 +105,8 @@ unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp);
 
 unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp);
 
+unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index);
+
 unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp);
 
 unsigned int dev_pm_opp_get_required_pstate(struct dev_pm_opp *opp,
@@ -213,6 +215,11 @@ static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
 	return 0;
 }
 
+static inline unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index)
+{
+	return 0;
+}
+
 static inline unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp)
 {
 	return 0;

From a5893928bb179d67ca1d44a8f66c990480ba541d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 21 Jul 2023 13:41:20 +0530
Subject: [PATCH 0418/2047] OPP: Add dev_pm_opp_find_freq_exact_indexed()

The indexed version of the API is added for other floor and ceil, add
the same for exact as well for completeness.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c     | 28 ++++++++++++++++++++++++++++
 include/linux/pm_opp.h | 11 +++++++++++
 2 files changed, 39 insertions(+)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 07341050d57f..2af958dc17ba 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -646,6 +646,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact);
 
+/**
+ * dev_pm_opp_find_freq_exact_indexed() - Search for an exact freq for the
+ *					 clock corresponding to the index
+ * @dev:	Device for which we do this operation
+ * @freq:	frequency to search for
+ * @index:	Clock index
+ * @available:	true/false - match for available opp
+ *
+ * Search for the matching exact OPP for the clock corresponding to the
+ * specified index from a starting freq for a device.
+ *
+ * Return: matching *opp , else returns ERR_PTR in case of error and should be
+ * handled using IS_ERR. Error return values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
+ *
+ * The callers are required to call dev_pm_opp_put() for the returned OPP after
+ * use.
+ */
+struct dev_pm_opp *
+dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq,
+				   u32 index, bool available)
+{
+	return _find_key_exact(dev, freq, index, available, _read_freq, NULL);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact_indexed);
+
 static noinline struct dev_pm_opp *_find_freq_ceil(struct opp_table *opp_table,
 						   unsigned long *freq)
 {
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index a13a1705df57..23e4e4eaaa42 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -124,6 +124,10 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					      unsigned long freq,
 					      bool available);
 
+struct dev_pm_opp *
+dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq,
+				   u32 index, bool available);
+
 struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 					      unsigned long *freq);
 
@@ -268,6 +272,13 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline struct dev_pm_opp *
+dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq,
+				   u32 index, bool available)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 					unsigned long *freq)
 {

From 034d6aac2160b732d58d82e34aaf2c058a8f72fa Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 21 Jul 2023 14:35:08 +0530
Subject: [PATCH 0419/2047] OPP: Update _read_freq() to return the correct
 frequency

Now that we support finding indexed frequencies, lets update
_read_freq() to return the right one.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/opp/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 2af958dc17ba..b83f651680e7 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -470,7 +470,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count);
 /* Helpers to read keys */
 static unsigned long _read_freq(struct dev_pm_opp *opp, int index)
 {
-	return opp->rates[0];
+	return opp->rates[index];
 }
 
 static unsigned long _read_level(struct dev_pm_opp *opp, int index)

From 746de8255076c9924ffa51baad9822adddccb94e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 21 Jul 2023 14:55:07 +0530
Subject: [PATCH 0420/2047] OPP: Reuse dev_pm_opp_get_freq_indexed()

Reuse dev_pm_opp_get_freq_indexed() from dev_pm_opp_get_freq().

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/opp/core.c     | 21 ---------------------
 include/linux/pm_opp.h | 12 +++++-------
 2 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index b83f651680e7..0ad8664d2423 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -176,27 +176,6 @@ unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_power);
 
-/**
- * dev_pm_opp_get_freq() - Gets the frequency corresponding to an available opp
- * @opp:	opp for which frequency has to be returned for
- *
- * Return: frequency in hertz corresponding to the opp, else
- * return 0
- */
-unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
-{
-	if (IS_ERR_OR_NULL(opp)) {
-		pr_err("%s: Invalid parameters\n", __func__);
-		return 0;
-	}
-
-	if (!assert_single_clk(opp->opp_table))
-		return 0;
-
-	return opp->rates[0];
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq);
-
 /**
  * dev_pm_opp_get_freq_indexed() - Gets the frequency corresponding to an
  *				   available opp with specified index
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 23e4e4eaaa42..91f87d7e807c 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -103,8 +103,6 @@ int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *su
 
 unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp);
 
-unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp);
-
 unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index);
 
 unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp);
@@ -214,11 +212,6 @@ static inline unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp)
 	return 0;
 }
 
-static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
-{
-	return 0;
-}
-
 static inline unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index)
 {
 	return 0;
@@ -669,4 +662,9 @@ static inline void dev_pm_opp_put_prop_name(int token)
 	dev_pm_opp_clear_config(token);
 }
 
+static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
+{
+	return dev_pm_opp_get_freq_indexed(opp, 0);
+}
+
 #endif		/* __LINUX_OPP_H__ */

From 7ddd8deb1c3c0363a7e14fafb5df26e2089a69a5 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Fri, 21 Jul 2023 18:16:33 +0530
Subject: [PATCH 0421/2047] OPP: Fix potential null ptr dereference in
 dev_pm_opp_get_required_pstate()

"opp" pointer is dereferenced before the IS_ERR_OR_NULL() check. Fix it by
removing the dereference to cache opp_table and dereference it directly
where opp_table is used.

This fixes the following smatch warning:

drivers/opp/core.c:232 dev_pm_opp_get_required_pstate() warn: variable
dereferenced before IS_ERR check 'opp' (see line 230)

Fixes: 84cb7ff35fcf ("OPP: pstate is only valid for genpd OPP tables")
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 0ad8664d2423..991c2e894b1a 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -226,20 +226,18 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_level);
 unsigned int dev_pm_opp_get_required_pstate(struct dev_pm_opp *opp,
 					    unsigned int index)
 {
-	struct opp_table *opp_table = opp->opp_table;
-
 	if (IS_ERR_OR_NULL(opp) || !opp->available ||
-	    index >= opp_table->required_opp_count) {
+	    index >= opp->opp_table->required_opp_count) {
 		pr_err("%s: Invalid parameters\n", __func__);
 		return 0;
 	}
 
 	/* required-opps not fully initialized yet */
-	if (lazy_linking_pending(opp_table))
+	if (lazy_linking_pending(opp->opp_table))
 		return 0;
 
 	/* The required OPP table must belong to a genpd */
-	if (unlikely(!opp_table->required_opp_tables[index]->is_genpd)) {
+	if (unlikely(!opp->opp_table->required_opp_tables[index]->is_genpd)) {
 		pr_err("%s: Performance state is only valid for genpds.\n", __func__);
 		return 0;
 	}

From d920920f85a82c1c806a4143871a0e8f534732f2 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Fri, 21 Jul 2023 18:16:34 +0530
Subject: [PATCH 0422/2047] OPP: Fix passing 0 to PTR_ERR in
 _opp_attach_genpd()

If dev_pm_domain_attach_by_name() returns NULL, then 0 will be passed to
PTR_ERR() as reported by the smatch warning below:

drivers/opp/core.c:2456 _opp_attach_genpd() warn: passing zero to 'PTR_ERR'

Fix it by checking for the non-NULL virt_dev pointer before passing it to
PTR_ERR. Otherwise return -ENODEV.

Fixes: 4ea9496cbc95 ("opp: Fix error check in dev_pm_opp_attach_genpd()")
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 991c2e894b1a..919cc53bc02e 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -2460,7 +2460,7 @@ static int _opp_attach_genpd(struct opp_table *opp_table, struct device *dev,
 
 		virt_dev = dev_pm_domain_attach_by_name(dev, *name);
 		if (IS_ERR_OR_NULL(virt_dev)) {
-			ret = PTR_ERR(virt_dev) ? : -ENODEV;
+			ret = virt_dev ? PTR_ERR(virt_dev) : -ENODEV;
 			dev_err(dev, "Couldn't attach to pm_domain: %d\n", ret);
 			goto err;
 		}

From 1bc33893e79a79df93d44fbd25080dbf59410e87 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:07 -0400
Subject: [PATCH 0423/2047] ext4: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-40-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ext4/acl.c        |  2 +-
 fs/ext4/ext4.h       | 92 +++++++++++++++++++++++---------------------
 fs/ext4/extents.c    | 12 +++---
 fs/ext4/ialloc.c     |  2 +-
 fs/ext4/inline.c     |  4 +-
 fs/ext4/inode-test.c |  6 +--
 fs/ext4/inode.c      | 16 ++++----
 fs/ext4/ioctl.c      |  9 +++--
 fs/ext4/namei.c      | 26 ++++++-------
 fs/ext4/super.c      |  2 +-
 fs/ext4/xattr.c      |  6 +--
 11 files changed, 90 insertions(+), 87 deletions(-)

diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 27fcbddfb148..3bffe862f954 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -259,7 +259,7 @@ retry:
 	error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */);
 	if (!error && update_mode) {
 		inode->i_mode = mode;
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		error = ext4_mark_inode_dirty(handle, inode);
 	}
 out_stop:
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0a2d55faa095..1e2259d9967d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -868,64 +868,70 @@ struct ext4_inode {
  * affected filesystem before 2242.
  */
 
-static inline __le32 ext4_encode_extra_time(struct timespec64 *time)
+static inline __le32 ext4_encode_extra_time(struct timespec64 ts)
 {
-	u32 extra =((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK;
-	return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS));
+	u32 extra = ((ts.tv_sec - (s32)ts.tv_sec) >> 32) & EXT4_EPOCH_MASK;
+	return cpu_to_le32(extra | (ts.tv_nsec << EXT4_EPOCH_BITS));
 }
 
-static inline void ext4_decode_extra_time(struct timespec64 *time,
-					  __le32 extra)
+static inline struct timespec64 ext4_decode_extra_time(__le32 base,
+						       __le32 extra)
 {
+	struct timespec64 ts = { .tv_sec = (signed)le32_to_cpu(base) };
+
 	if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK)))
-		time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
-	time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
+		ts.tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
+	ts.tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
+	return ts;
 }
 
+#define EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, ts)			\
+do {										\
+	if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {	\
+		(raw_inode)->xtime = cpu_to_le32((ts).tv_sec);			\
+		(raw_inode)->xtime ## _extra = ext4_encode_extra_time(ts);	\
+	} else									\
+		(raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (ts).tv_sec, S32_MIN, S32_MAX));	\
+} while (0)
+
 #define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)				\
-do {										\
-	if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     {\
-		(raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec);	\
-		(raw_inode)->xtime ## _extra =					\
-				ext4_encode_extra_time(&(inode)->xtime);	\
-		}								\
-	else	\
-		(raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (inode)->xtime.tv_sec, S32_MIN, S32_MAX));	\
-} while (0)
+	EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, (inode)->xtime)
 
-#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode)			       \
-do {									       \
-	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))		       \
-		(raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec);      \
-	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))	       \
-		(raw_inode)->xtime ## _extra =				       \
-				ext4_encode_extra_time(&(einode)->xtime);      \
-} while (0)
+#define EXT4_INODE_SET_CTIME(inode, raw_inode)					\
+	EXT4_INODE_SET_XTIME_VAL(i_ctime, inode, raw_inode, inode_get_ctime(inode))
+
+#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode)				\
+	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))			\
+		EXT4_INODE_SET_XTIME_VAL(xtime, &((einode)->vfs_inode),		\
+					 raw_inode, (einode)->xtime)
+
+#define EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode)			\
+	(EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra) ?	\
+		ext4_decode_extra_time((raw_inode)->xtime,				\
+				       (raw_inode)->xtime ## _extra) :		\
+		(struct timespec64) {						\
+			.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime)	\
+		})
 
 #define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode)				\
 do {										\
-	(inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime);	\
-	if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {	\
-		ext4_decode_extra_time(&(inode)->xtime,				\
-				       raw_inode->xtime ## _extra);		\
-		}								\
-	else									\
-		(inode)->xtime.tv_nsec = 0;					\
+	(inode)->xtime = EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode);	\
 } while (0)
 
+#define EXT4_INODE_GET_CTIME(inode, raw_inode)					\
+do {										\
+	inode_set_ctime_to_ts(inode,						\
+		EXT4_INODE_GET_XTIME_VAL(i_ctime, inode, raw_inode));		\
+} while (0)
 
-#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode)			       \
-do {									       \
-	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))		       \
-		(einode)->xtime.tv_sec = 				       \
-			(signed)le32_to_cpu((raw_inode)->xtime);	       \
-	else								       \
-		(einode)->xtime.tv_sec = 0;				       \
-	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))	       \
-		ext4_decode_extra_time(&(einode)->xtime,		       \
-				       raw_inode->xtime ## _extra);	       \
-	else								       \
-		(einode)->xtime.tv_nsec = 0;				       \
+#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode)				\
+do {										\
+	if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) 			\
+		(einode)->xtime =						\
+			EXT4_INODE_GET_XTIME_VAL(xtime, &(einode->vfs_inode),	\
+						 raw_inode);			\
+	else									\
+		(einode)->xtime = (struct timespec64){0, 0};			\
 } while (0)
 
 #define i_disk_version osd1.linux1.l_i_version
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e4115d338f10..202c76996b62 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4476,12 +4476,12 @@ retry:
 		map.m_lblk += ret;
 		map.m_len = len = len - ret;
 		epos = (loff_t)map.m_lblk << inode->i_blkbits;
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		if (new_size) {
 			if (epos > new_size)
 				epos = new_size;
 			if (ext4_update_inode_size(inode, epos) & 0x1)
-				inode->i_mtime = inode->i_ctime;
+				inode->i_mtime = inode_get_ctime(inode);
 		}
 		ret2 = ext4_mark_inode_dirty(handle, inode);
 		ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -4617,7 +4617,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
 		/* Now release the pages and zero block aligned part of pages */
 		truncate_pagecache_range(inode, start, end - 1);
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 
 		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
 					     flags);
@@ -4642,7 +4642,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		goto out_mutex;
 	}
 
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	if (new_size)
 		ext4_update_inode_size(inode, new_size);
 	ret = ext4_mark_inode_dirty(handle, inode);
@@ -5378,7 +5378,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
 	up_write(&EXT4_I(inode)->i_data_sem);
 	if (IS_SYNC(inode))
 		ext4_handle_sync(handle);
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	ret = ext4_mark_inode_dirty(handle, inode);
 	ext4_update_inode_fsync_trans(handle, inode, 1);
 
@@ -5488,7 +5488,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
 	/* Expand file to avoid data loss if there is error while shifting */
 	inode->i_size += len;
 	EXT4_I(inode)->i_disksize += len;
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	ret = ext4_mark_inode_dirty(handle, inode);
 	if (ret)
 		goto out_stop;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 754f961cd9fd..48abef5f23e7 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1250,7 +1250,7 @@ got:
 	inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
 	/* This is the optimal IO size (for stat), not the fs block size */
 	inode->i_blocks = 0;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	ei->i_crtime = inode->i_mtime;
 
 	memset(ei->i_data, 0, sizeof(ei->i_data));
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index a4b7e4bc32d4..003861037374 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1037,7 +1037,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
 	 * happen is that the times are slightly out of date
 	 * and/or different from the directory change time.
 	 */
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	ext4_update_dx_flag(dir);
 	inode_inc_iversion(dir);
 	return 1;
@@ -1991,7 +1991,7 @@ out:
 		ext4_orphan_del(handle, inode);
 
 	if (err == 0) {
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 		err = ext4_mark_inode_dirty(handle, inode);
 		if (IS_SYNC(inode))
 			ext4_handle_sync(handle);
diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
index 7935ea6cf92c..f0c0fd507fbc 100644
--- a/fs/ext4/inode-test.c
+++ b/fs/ext4/inode-test.c
@@ -245,9 +245,9 @@ static void inode_test_xtimestamp_decoding(struct kunit *test)
 	struct timestamp_expectation *test_param =
 			(struct timestamp_expectation *)(test->param_value);
 
-	timestamp.tv_sec = get_32bit_time(test_param);
-	ext4_decode_extra_time(&timestamp,
-			       cpu_to_le32(test_param->extra_bits));
+	timestamp = ext4_decode_extra_time(
+				cpu_to_le32(get_32bit_time(test_param)),
+				cpu_to_le32(test_param->extra_bits));
 
 	KUNIT_EXPECT_EQ_MSG(test,
 			    test_param->expected.tv_sec,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 43775a6ca505..86696b40c58f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3986,7 +3986,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 	if (IS_SYNC(inode))
 		ext4_handle_sync(handle);
 
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	ret2 = ext4_mark_inode_dirty(handle, inode);
 	if (unlikely(ret2))
 		ret = ret2;
@@ -4146,7 +4146,7 @@ out_stop:
 	if (inode->i_nlink)
 		ext4_orphan_del(handle, inode);
 
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	err2 = ext4_mark_inode_dirty(handle, inode);
 	if (unlikely(err2 && !err))
 		err = err2;
@@ -4249,7 +4249,7 @@ static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode
 	}
 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
 
-	EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+	EXT4_INODE_SET_CTIME(inode, raw_inode);
 	EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
 	EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
 	EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
@@ -4858,7 +4858,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
 		}
 	}
 
-	EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
+	EXT4_INODE_GET_CTIME(inode, raw_inode);
 	EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
 	EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
 	EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
@@ -4981,7 +4981,7 @@ static void __ext4_update_other_inode_time(struct super_block *sb,
 		spin_unlock(&inode->i_lock);
 
 		spin_lock(&ei->i_raw_lock);
-		EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+		EXT4_INODE_SET_CTIME(inode, raw_inode);
 		EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
 		EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
 		ext4_inode_csum_set(inode, raw_inode, ei);
@@ -5376,10 +5376,8 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 			 * Update c/mtime on truncate up, ext4_truncate() will
 			 * update c/mtime in shrink case below
 			 */
-			if (!shrink) {
-				inode->i_mtime = current_time(inode);
-				inode->i_ctime = inode->i_mtime;
-			}
+			if (!shrink)
+				inode->i_mtime = inode_set_ctime_current(inode);
 
 			if (shrink)
 				ext4_fc_track_range(handle, inode,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 331859511f80..b0349f451863 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -449,7 +449,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
 	diff = size - size_bl;
 	swap_inode_data(inode, inode_bl);
 
-	inode->i_ctime = inode_bl->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
+	inode_set_ctime_current(inode_bl);
 	inode_inc_iversion(inode);
 
 	inode->i_generation = get_random_u32();
@@ -663,7 +664,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
 
 	ext4_set_inode_flags(inode, false);
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	inode_inc_iversion(inode);
 
 	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@ -774,7 +775,7 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid)
 	}
 
 	EXT4_I(inode)->i_projid = kprojid;
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	inode_inc_iversion(inode);
 out_dirty:
 	rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@ -1266,7 +1267,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		}
 		err = ext4_reserve_inode_write(handle, inode, &iloc);
 		if (err == 0) {
-			inode->i_ctime = current_time(inode);
+			inode_set_ctime_current(inode);
 			inode_inc_iversion(inode);
 			inode->i_generation = generation;
 			err = ext4_mark_iloc_dirty(handle, inode, &iloc);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 0caf6c730ce3..933ad03f4f58 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2203,7 +2203,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
 	 * happen is that the times are slightly out of date
 	 * and/or different from the directory change time.
 	 */
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	ext4_update_dx_flag(dir);
 	inode_inc_iversion(dir);
 	err2 = ext4_mark_inode_dirty(handle, dir);
@@ -3197,7 +3197,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 	 * recovery. */
 	inode->i_size = 0;
 	ext4_orphan_add(handle, inode);
-	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+	dir->i_mtime = inode_set_ctime_current(dir);
+	inode_set_ctime_current(inode);
 	retval = ext4_mark_inode_dirty(handle, inode);
 	if (retval)
 		goto end_rmdir;
@@ -3271,7 +3272,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
 		retval = ext4_delete_entry(handle, dir, de, bh);
 		if (retval)
 			goto out_handle;
-		dir->i_ctime = dir->i_mtime = current_time(dir);
+		dir->i_mtime = inode_set_ctime_current(dir);
 		ext4_update_dx_flag(dir);
 		retval = ext4_mark_inode_dirty(handle, dir);
 		if (retval)
@@ -3286,7 +3287,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
 		drop_nlink(inode);
 	if (!inode->i_nlink)
 		ext4_orphan_add(handle, inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	retval = ext4_mark_inode_dirty(handle, inode);
 	if (dentry && !retval)
 		ext4_fc_track_unlink(handle, dentry);
@@ -3463,7 +3464,7 @@ retry:
 	if (IS_DIRSYNC(dir))
 		ext4_handle_sync(handle);
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	ext4_inc_count(inode);
 	ihold(inode);
 
@@ -3641,8 +3642,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
 	if (ext4_has_feature_filetype(ent->dir->i_sb))
 		ent->de->file_type = file_type;
 	inode_inc_iversion(ent->dir);
-	ent->dir->i_ctime = ent->dir->i_mtime =
-		current_time(ent->dir);
+	ent->dir->i_mtime = inode_set_ctime_current(ent->dir);
 	retval = ext4_mark_inode_dirty(handle, ent->dir);
 	BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
 	if (!ent->inlined) {
@@ -3941,7 +3941,7 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 	 * Like most other Unix systems, set the ctime for inodes on a
 	 * rename.
 	 */
-	old.inode->i_ctime = current_time(old.inode);
+	inode_set_ctime_current(old.inode);
 	retval = ext4_mark_inode_dirty(handle, old.inode);
 	if (unlikely(retval))
 		goto end_rename;
@@ -3955,9 +3955,9 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 
 	if (new.inode) {
 		ext4_dec_count(new.inode);
-		new.inode->i_ctime = current_time(new.inode);
+		inode_set_ctime_current(new.inode);
 	}
-	old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir);
+	old.dir->i_mtime = inode_set_ctime_current(old.dir);
 	ext4_update_dx_flag(old.dir);
 	if (old.dir_bh) {
 		retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
@@ -4053,7 +4053,6 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 	};
 	u8 new_file_type;
 	int retval;
-	struct timespec64 ctime;
 
 	if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
 	     !projid_eq(EXT4_I(new_dir)->i_projid,
@@ -4147,9 +4146,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * Like most other Unix systems, set the ctime for inodes on a
 	 * rename.
 	 */
-	ctime = current_time(old.inode);
-	old.inode->i_ctime = ctime;
-	new.inode->i_ctime = ctime;
+	inode_set_ctime_current(old.inode);
+	inode_set_ctime_current(new.inode);
 	retval = ext4_mark_inode_dirty(handle, old.inode);
 	if (unlikely(retval))
 		goto end_rename;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c94ebf704616..b54c70e1a74e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -7103,7 +7103,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
 	}
 	EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
 	inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	err = ext4_mark_inode_dirty(handle, inode);
 	ext4_journal_stop(handle);
 out_unlock:
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 321e3a888c20..dbe54cddda3d 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -356,13 +356,13 @@ ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size)
 
 static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode)
 {
-	return ((u64)ea_inode->i_ctime.tv_sec << 32) |
+	return ((u64) inode_get_ctime(ea_inode).tv_sec << 32) |
 		(u32) inode_peek_iversion_raw(ea_inode);
 }
 
 static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count)
 {
-	ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32);
+	inode_set_ctime(ea_inode, (u32)(ref_count >> 32), 0);
 	inode_set_iversion_raw(ea_inode, ref_count & 0xffffffff);
 }
 
@@ -2459,7 +2459,7 @@ retry_inode:
 	}
 	if (!error) {
 		ext4_xattr_update_super_block(handle, inode->i_sb);
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		inode_inc_iversion(inode);
 		if (!value)
 			no_expand = 0;

From c62ebd3501cc2c78d4cc28a9b4cbdb17d199c6d6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:08 -0400
Subject: [PATCH 0424/2047] f2fs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-41-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/f2fs/dir.c      |  8 ++++----
 fs/f2fs/f2fs.h     |  4 +++-
 fs/f2fs/file.c     | 20 ++++++++++----------
 fs/f2fs/inline.c   |  2 +-
 fs/f2fs/inode.c    | 10 +++++-----
 fs/f2fs/namei.c    | 12 ++++++------
 fs/f2fs/recovery.c |  4 ++--
 fs/f2fs/super.c    |  2 +-
 fs/f2fs/xattr.c    |  2 +-
 9 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index d635c58cf5a3..8aa29fe2e87b 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -455,7 +455,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
 	de->file_type = fs_umode_to_ftype(inode->i_mode);
 	set_page_dirty(page);
 
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	f2fs_mark_inode_dirty_sync(dir, false);
 	f2fs_put_page(page, 1);
 }
@@ -609,7 +609,7 @@ void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode,
 			f2fs_i_links_write(dir, true);
 		clear_inode_flag(inode, FI_NEW_INODE);
 	}
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	f2fs_mark_inode_dirty_sync(dir, false);
 
 	if (F2FS_I(dir)->i_current_depth != current_depth)
@@ -858,7 +858,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
 
 	if (S_ISDIR(inode->i_mode))
 		f2fs_i_links_write(dir, false);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 
 	f2fs_i_links_write(inode, false);
 	if (S_ISDIR(inode->i_mode)) {
@@ -919,7 +919,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 	}
 	f2fs_put_page(page, 1);
 
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	f2fs_mark_inode_dirty_sync(dir, false);
 
 	if (inode)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c7cb2177b252..e18272ae3119 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3303,9 +3303,11 @@ static inline void clear_file(struct inode *inode, int type)
 
 static inline bool f2fs_is_time_consistent(struct inode *inode)
 {
+	struct timespec64 ctime = inode_get_ctime(inode);
+
 	if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime))
 		return false;
-	if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime))
+	if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &ctime))
 		return false;
 	if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime))
 		return false;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 093039dee992..b018800223c4 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -794,7 +794,7 @@ int f2fs_truncate(struct inode *inode)
 	if (err)
 		return err;
 
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	f2fs_mark_inode_dirty_sync(inode, false);
 	return 0;
 }
@@ -905,7 +905,7 @@ static void __setattr_copy(struct mnt_idmap *idmap,
 	if (ia_valid & ATTR_MTIME)
 		inode->i_mtime = attr->ia_mtime;
 	if (ia_valid & ATTR_CTIME)
-		inode->i_ctime = attr->ia_ctime;
+		inode_set_ctime_to_ts(inode, attr->ia_ctime);
 	if (ia_valid & ATTR_MODE) {
 		umode_t mode = attr->ia_mode;
 		vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
@@ -1008,7 +1008,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 			return err;
 
 		spin_lock(&F2FS_I(inode)->i_size_lock);
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 		F2FS_I(inode)->last_disk_size = i_size_read(inode);
 		spin_unlock(&F2FS_I(inode)->i_size_lock);
 	}
@@ -1835,7 +1835,7 @@ static long f2fs_fallocate(struct file *file, int mode,
 	}
 
 	if (!ret) {
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 		f2fs_mark_inode_dirty_sync(inode, false);
 		f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 	}
@@ -1937,7 +1937,7 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
 	else
 		clear_inode_flag(inode, FI_PROJ_INHERIT);
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	f2fs_set_inode_flags(inode);
 	f2fs_mark_inode_dirty_sync(inode, true);
 	return 0;
@@ -2874,10 +2874,10 @@ out_src:
 	if (ret)
 		goto out_unlock;
 
-	src->i_mtime = src->i_ctime = current_time(src);
+	src->i_mtime = inode_set_ctime_current(src);
 	f2fs_mark_inode_dirty_sync(src, false);
 	if (src != dst) {
-		dst->i_mtime = dst->i_ctime = current_time(dst);
+		dst->i_mtime = inode_set_ctime_current(dst);
 		f2fs_mark_inode_dirty_sync(dst, false);
 	}
 	f2fs_update_time(sbi, REQ_TIME);
@@ -3073,7 +3073,7 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
 		goto out_unlock;
 
 	fi->i_projid = kprojid;
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	f2fs_mark_inode_dirty_sync(inode, true);
 out_unlock:
 	f2fs_unlock_op(sbi);
@@ -3511,7 +3511,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
 	}
 
 	set_inode_flag(inode, FI_COMPRESS_RELEASED);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	f2fs_mark_inode_dirty_sync(inode, true);
 
 	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -3710,7 +3710,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
 
 	if (ret >= 0) {
 		clear_inode_flag(inode, FI_COMPRESS_RELEASED);
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		f2fs_mark_inode_dirty_sync(inode, true);
 	}
 unlock_inode:
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 4638fee16a91..88fc9208ffa7 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -698,7 +698,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
 	set_page_dirty(page);
 	f2fs_put_page(page, 1);
 
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	f2fs_mark_inode_dirty_sync(dir, false);
 
 	if (inode)
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 09e986b050c6..c1c2ba9f28e5 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -403,7 +403,7 @@ static void init_idisk_time(struct inode *inode)
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 
 	fi->i_disk_time[0] = inode->i_atime;
-	fi->i_disk_time[1] = inode->i_ctime;
+	fi->i_disk_time[1] = inode_get_ctime(inode);
 	fi->i_disk_time[2] = inode->i_mtime;
 }
 
@@ -434,10 +434,10 @@ static int do_read_inode(struct inode *inode)
 	inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1);
 
 	inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
-	inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
+	inode_set_ctime(inode, le64_to_cpu(ri->i_ctime),
+			le32_to_cpu(ri->i_ctime_nsec));
 	inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime);
 	inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
-	inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
 	inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
 	inode->i_generation = le32_to_cpu(ri->i_generation);
 	if (S_ISDIR(inode->i_mode))
@@ -714,10 +714,10 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
 	set_raw_inline(inode, ri);
 
 	ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
-	ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+	ri->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
 	ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
 	ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
-	ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	ri->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 	ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
 	if (S_ISDIR(inode->i_mode))
 		ri->i_current_depth =
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index bee0568888da..193b22a2d6bf 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -243,7 +243,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
 
 	inode->i_ino = ino;
 	inode->i_blocks = 0;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	F2FS_I(inode)->i_crtime = inode->i_mtime;
 	inode->i_generation = get_random_u32();
 
@@ -420,7 +420,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 
 	f2fs_balance_fs(sbi, true);
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	ihold(inode);
 
 	set_inode_flag(inode, FI_INC_LINK);
@@ -1052,7 +1052,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 		f2fs_set_link(new_dir, new_entry, new_page, old_inode);
 		new_page = NULL;
 
-		new_inode->i_ctime = current_time(new_inode);
+		inode_set_ctime_current(new_inode);
 		f2fs_down_write(&F2FS_I(new_inode)->i_sem);
 		if (old_dir_entry)
 			f2fs_i_links_write(new_inode, false);
@@ -1086,7 +1086,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 		f2fs_i_pino_write(old_inode, new_dir->i_ino);
 	f2fs_up_write(&F2FS_I(old_inode)->i_sem);
 
-	old_inode->i_ctime = current_time(old_inode);
+	inode_set_ctime_current(old_inode);
 	f2fs_mark_inode_dirty_sync(old_inode, false);
 
 	f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
@@ -1251,7 +1251,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 		f2fs_i_pino_write(old_inode, new_dir->i_ino);
 	f2fs_up_write(&F2FS_I(old_inode)->i_sem);
 
-	old_dir->i_ctime = current_time(old_dir);
+	inode_set_ctime_current(old_dir);
 	if (old_nlink) {
 		f2fs_down_write(&F2FS_I(old_dir)->i_sem);
 		f2fs_i_links_write(old_dir, old_nlink > 0);
@@ -1270,7 +1270,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 		f2fs_i_pino_write(new_inode, old_dir->i_ino);
 	f2fs_up_write(&F2FS_I(new_inode)->i_sem);
 
-	new_dir->i_ctime = current_time(new_dir);
+	inode_set_ctime_current(new_dir);
 	if (new_nlink) {
 		f2fs_down_write(&F2FS_I(new_dir)->i_sem);
 		f2fs_i_links_write(new_dir, new_nlink > 0);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 4e7d4ceeb084..b8637e88d94f 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -321,10 +321,10 @@ static int recover_inode(struct inode *inode, struct page *page)
 
 	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
 	inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
-	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
+	inode_set_ctime(inode, le64_to_cpu(raw->i_ctime),
+			le32_to_cpu(raw->i_ctime_nsec));
 	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
 	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
-	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
 	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
 
 	F2FS_I(inode)->i_advise = raw->i_advise;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ca31163da00a..28aeffc0048c 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2703,7 +2703,7 @@ retry:
 
 	if (len == towrite)
 		return err;
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	f2fs_mark_inode_dirty_sync(inode, false);
 	return len - towrite;
 }
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 476b186b90a6..4ae93e1df421 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -764,7 +764,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
 same:
 	if (is_inode_flag_set(inode, FI_ACL_MODE)) {
 		inode->i_mode = F2FS_I(inode)->i_acl_mode;
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		clear_inode_flag(inode, FI_ACL_MODE);
 	}
 

From ea60635ec1f456b4cf8ecf1ead2e203c9226e0d6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:09 -0400
Subject: [PATCH 0425/2047] fat: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-42-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fat/inode.c | 5 ++---
 fs/fat/misc.c  | 3 ++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index d99b8549ec8f..cdd39b6020f3 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -562,7 +562,7 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 			   & ~((loff_t)sbi->cluster_size - 1)) >> 9;
 
 	fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
-	inode->i_ctime = inode->i_mtime;
+	inode_set_ctime_to_ts(inode, inode->i_mtime);
 	if (sbi->options.isvfat) {
 		fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
 		fat_time_fat2unix(sbi, &MSDOS_I(inode)->i_crtime, de->ctime,
@@ -1407,8 +1407,7 @@ static int fat_read_root(struct inode *inode)
 	MSDOS_I(inode)->mmu_private = inode->i_size;
 
 	fat_save_attrs(inode, ATTR_DIR);
-	inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
-	inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
+	inode->i_mtime = inode->i_atime = inode_set_ctime(inode, 0, 0);
 	set_nlink(inode, fat_subdirs(inode)+2);
 
 	return 0;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 7e5d6ae305f2..67006ea08db6 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -332,7 +332,8 @@ int fat_truncate_time(struct inode *inode, struct timespec64 *now, int flags)
 	 * but ctime updates are ignored.
 	 */
 	if (flags & S_MTIME)
-		inode->i_mtime = inode->i_ctime = fat_truncate_mtime(sbi, now);
+		inode->i_mtime = inode_set_ctime_to_ts(inode,
+						       fat_truncate_mtime(sbi, now));
 
 	return 0;
 }

From 529d8a290ac92e0ea095e1edabbcad41c056a854 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:10 -0400
Subject: [PATCH 0426/2047] freevxfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-43-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/freevxfs/vxfs_inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ceb6a12649ba..ac5d43b164b5 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -110,10 +110,9 @@ static inline void dip2vip_cpy(struct vxfs_sb_info *sbi,
 	inode->i_size = vip->vii_size;
 
 	inode->i_atime.tv_sec = vip->vii_atime;
-	inode->i_ctime.tv_sec = vip->vii_ctime;
+	inode_set_ctime(inode, vip->vii_ctime, 0);
 	inode->i_mtime.tv_sec = vip->vii_mtime;
 	inode->i_atime.tv_nsec = 0;
-	inode->i_ctime.tv_nsec = 0;
 	inode->i_mtime.tv_nsec = 0;
 
 	inode->i_blocks = vip->vii_blocks;

From ceb2d5e92f49c2497faea6a501262e8fe6d91f89 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:11 -0400
Subject: [PATCH 0427/2047] fuse: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-44-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fuse/control.c |  2 +-
 fs/fuse/dir.c     |  8 ++++----
 fs/fuse/inode.c   | 16 ++++++++--------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 247ef4f76761..ab62e4624256 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -235,7 +235,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
 	inode->i_mode = mode;
 	inode->i_uid = fc->user_id;
 	inode->i_gid = fc->group_id;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	/* setting ->i_op to NULL is not allowed */
 	if (iop)
 		inode->i_op = iop;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 35bc174f9ba2..a210c231c7d3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -935,7 +935,7 @@ void fuse_flush_time_update(struct inode *inode)
 static void fuse_update_ctime_in_cache(struct inode *inode)
 {
 	if (!IS_NOCMTIME(inode)) {
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		mark_inode_dirty_sync(inode);
 		fuse_flush_time_update(inode);
 	}
@@ -1717,8 +1717,8 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
 	inarg.mtimensec = inode->i_mtime.tv_nsec;
 	if (fm->fc->minor >= 23) {
 		inarg.valid |= FATTR_CTIME;
-		inarg.ctime = inode->i_ctime.tv_sec;
-		inarg.ctimensec = inode->i_ctime.tv_nsec;
+		inarg.ctime = inode_get_ctime(inode).tv_sec;
+		inarg.ctimensec = inode_get_ctime(inode).tv_nsec;
 	}
 	if (ff) {
 		inarg.valid |= FATTR_FH;
@@ -1859,7 +1859,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
 		if (attr->ia_valid & ATTR_MTIME)
 			inode->i_mtime = attr->ia_mtime;
 		if (attr->ia_valid & ATTR_CTIME)
-			inode->i_ctime = attr->ia_ctime;
+			inode_set_ctime_to_ts(inode, attr->ia_ctime);
 		/* FIXME: clear I_DIRTY_SYNC? */
 	}
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d66070af145d..0b435b23a97f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -194,8 +194,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
 		inode->i_mtime.tv_nsec  = attr->mtimensec;
 	}
 	if (!(cache_mask & STATX_CTIME)) {
-		inode->i_ctime.tv_sec   = attr->ctime;
-		inode->i_ctime.tv_nsec  = attr->ctimensec;
+		inode_set_ctime(inode, attr->ctime, attr->ctimensec);
 	}
 
 	if (attr->blksize != 0)
@@ -259,8 +258,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
 		attr->mtimensec = inode->i_mtime.tv_nsec;
 	}
 	if (cache_mask & STATX_CTIME) {
-		attr->ctime = inode->i_ctime.tv_sec;
-		attr->ctimensec = inode->i_ctime.tv_nsec;
+		attr->ctime = inode_get_ctime(inode).tv_sec;
+		attr->ctimensec = inode_get_ctime(inode).tv_nsec;
 	}
 
 	if ((attr_version != 0 && fi->attr_version > attr_version) ||
@@ -318,8 +317,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr,
 	inode->i_size = attr->size;
 	inode->i_mtime.tv_sec  = attr->mtime;
 	inode->i_mtime.tv_nsec = attr->mtimensec;
-	inode->i_ctime.tv_sec  = attr->ctime;
-	inode->i_ctime.tv_nsec = attr->ctimensec;
+	inode_set_ctime(inode, attr->ctime, attr->ctimensec);
 	if (S_ISREG(inode->i_mode)) {
 		fuse_init_common(inode);
 		fuse_init_file_inode(inode, attr->flags);
@@ -1397,16 +1395,18 @@ EXPORT_SYMBOL_GPL(fuse_dev_free);
 static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
 				      const struct fuse_inode *fi)
 {
+	struct timespec64 ctime = inode_get_ctime(&fi->inode);
+
 	*attr = (struct fuse_attr){
 		.ino		= fi->inode.i_ino,
 		.size		= fi->inode.i_size,
 		.blocks		= fi->inode.i_blocks,
 		.atime		= fi->inode.i_atime.tv_sec,
 		.mtime		= fi->inode.i_mtime.tv_sec,
-		.ctime		= fi->inode.i_ctime.tv_sec,
+		.ctime		= ctime.tv_sec,
 		.atimensec	= fi->inode.i_atime.tv_nsec,
 		.mtimensec	= fi->inode.i_mtime.tv_nsec,
-		.ctimensec	= fi->inode.i_ctime.tv_nsec,
+		.ctimensec	= ctime.tv_nsec,
 		.mode		= fi->inode.i_mode,
 		.nlink		= fi->inode.i_nlink,
 		.uid		= fi->inode.i_uid.val,

From 8a8b8d91b1582f03b934dc3b9781f7ced4d27202 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:12 -0400
Subject: [PATCH 0428/2047] gfs2: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-45-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/gfs2/acl.c   |  2 +-
 fs/gfs2/bmap.c  | 11 +++++------
 fs/gfs2/dir.c   | 15 ++++++++-------
 fs/gfs2/file.c  |  2 +-
 fs/gfs2/glops.c |  4 ++--
 fs/gfs2/inode.c |  8 ++++----
 fs/gfs2/super.c |  4 ++--
 fs/gfs2/xattr.c |  8 ++++----
 8 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index a392aa0f041d..443640e6fb9c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -142,7 +142,7 @@ int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 
 	ret = __gfs2_set_acl(inode, acl, type);
 	if (!ret && mode != inode->i_mode) {
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		inode->i_mode = mode;
 		mark_inode_dirty(inode);
 	}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8d611fbcf0bd..45ea63f7167d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1386,7 +1386,7 @@ static int trunc_start(struct inode *inode, u64 newsize)
 		ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
 
 	i_size_write(inode, newsize);
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
 	gfs2_dinode_out(ip, dibh->b_data);
 
 	if (journaled)
@@ -1583,8 +1583,7 @@ out_unlock:
 
 			/* Every transaction boundary, we rewrite the dinode
 			   to keep its di_blocks current in case of failure. */
-			ip->i_inode.i_mtime = ip->i_inode.i_ctime =
-				current_time(&ip->i_inode);
+			ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
 			gfs2_trans_add_meta(ip->i_gl, dibh);
 			gfs2_dinode_out(ip, dibh->b_data);
 			brelse(dibh);
@@ -1950,7 +1949,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
 		gfs2_statfs_change(sdp, 0, +btotal, 0);
 		gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
 				  ip->i_inode.i_gid);
-		ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+		ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
 		gfs2_trans_add_meta(ip->i_gl, dibh);
 		gfs2_dinode_out(ip, dibh->b_data);
 		up_write(&ip->i_rw_mutex);
@@ -1993,7 +1992,7 @@ static int trunc_end(struct gfs2_inode *ip)
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 		gfs2_ordered_del_inode(ip);
 	}
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
 	ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
 
 	gfs2_trans_add_meta(ip->i_gl, dibh);
@@ -2094,7 +2093,7 @@ static int do_grow(struct inode *inode, u64 size)
 		goto do_end_trans;
 
 	truncate_setsize(inode, size);
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
 	gfs2_trans_add_meta(ip->i_gl, dibh);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 54a6d17b8c25..1a2afa88f8be 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -130,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
 	memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
 	if (ip->i_inode.i_size < offset + size)
 		i_size_write(&ip->i_inode, offset + size);
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
 	gfs2_dinode_out(ip, dibh->b_data);
 
 	brelse(dibh);
@@ -227,7 +227,7 @@ out:
 
 	if (ip->i_inode.i_size < offset + copied)
 		i_size_write(&ip->i_inode, offset + copied);
-	ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
 
 	gfs2_trans_add_meta(ip->i_gl, dibh);
 	gfs2_dinode_out(ip, dibh->b_data);
@@ -1814,7 +1814,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 			gfs2_inum_out(nip, dent);
 			dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
 			dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip));
-			tv = current_time(&ip->i_inode);
+			tv = inode_set_ctime_current(&ip->i_inode);
 			if (ip->i_diskflags & GFS2_DIF_EXHASH) {
 				leaf = (struct gfs2_leaf *)bh->b_data;
 				be16_add_cpu(&leaf->lf_entries, 1);
@@ -1825,7 +1825,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 			da->bh = NULL;
 			brelse(bh);
 			ip->i_entries++;
-			ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv;
+			ip->i_inode.i_mtime = tv;
 			if (S_ISDIR(nip->i_inode.i_mode))
 				inc_nlink(&ip->i_inode);
 			mark_inode_dirty(inode);
@@ -1876,7 +1876,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
 	const struct qstr *name = &dentry->d_name;
 	struct gfs2_dirent *dent, *prev = NULL;
 	struct buffer_head *bh;
-	struct timespec64 tv = current_time(&dip->i_inode);
+	struct timespec64 tv;
 
 	/* Returns _either_ the entry (if its first in block) or the
 	   previous entry otherwise */
@@ -1896,6 +1896,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
 	}
 
 	dirent_del(dip, bh, prev, dent);
+	tv = inode_set_ctime_current(&dip->i_inode);
 	if (dip->i_diskflags & GFS2_DIF_EXHASH) {
 		struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
 		u16 entries = be16_to_cpu(leaf->lf_entries);
@@ -1910,7 +1911,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
 	if (!dip->i_entries)
 		gfs2_consist_inode(dip);
 	dip->i_entries--;
-	dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv;
+	dip->i_inode.i_mtime =  tv;
 	if (d_is_dir(dentry))
 		drop_nlink(&dip->i_inode);
 	mark_inode_dirty(&dip->i_inode);
@@ -1951,7 +1952,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 	dent->de_type = cpu_to_be16(new_type);
 	brelse(bh);
 
-	dip->i_inode.i_mtime = dip->i_inode.i_ctime = current_time(&dip->i_inode);
+	dip->i_inode.i_mtime = inode_set_ctime_current(&dip->i_inode);
 	mark_inode_dirty_sync(&dip->i_inode);
 	return 0;
 }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 1bf3c4453516..ecbfbc6df621 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -260,7 +260,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask)
 	error = gfs2_meta_inode_buffer(ip, &bh);
 	if (error)
 		goto out_trans_end;
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	gfs2_trans_add_meta(ip->i_gl, bh);
 	ip->i_diskflags = new_flags;
 	gfs2_dinode_out(ip, bh->b_data);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 54319328b16b..aecdac3cfbe1 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -437,8 +437,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 		inode->i_atime = atime;
 	inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
 	inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
-	inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
-	inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
+	inode_set_ctime(inode, be64_to_cpu(str->di_ctime),
+			be32_to_cpu(str->di_ctime_nsec));
 
 	ip->i_goal = be64_to_cpu(str->di_goal_meta);
 	ip->i_generation = be64_to_cpu(str->di_generation);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 17c994a0c0d0..2ded6c813f20 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -690,7 +690,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	set_nlink(inode, S_ISDIR(mode) ? 2 : 1);
 	inode->i_rdev = dev;
 	inode->i_size = size;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	munge_mode_uid_gid(dip, inode);
 	check_and_update_goal(dip);
 	ip->i_goal = dip->i_goal;
@@ -1029,7 +1029,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 
 	gfs2_trans_add_meta(ip->i_gl, dibh);
 	inc_nlink(&ip->i_inode);
-	ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	inode_set_ctime_current(&ip->i_inode);
 	ihold(inode);
 	d_instantiate(dentry, inode);
 	mark_inode_dirty(inode);
@@ -1114,7 +1114,7 @@ static int gfs2_unlink_inode(struct gfs2_inode *dip,
 		return error;
 
 	ip->i_entries = 0;
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	if (S_ISDIR(inode->i_mode))
 		clear_nlink(inode);
 	else
@@ -1371,7 +1371,7 @@ static int update_moved_ino(struct gfs2_inode *ip, struct gfs2_inode *ndip,
 	if (dir_rename)
 		return gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
 
-	ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	inode_set_ctime_current(&ip->i_inode);
 	mark_inode_dirty_sync(&ip->i_inode);
 	return 0;
 }
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 9f4d5d6549ee..ec0296b35dfe 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -412,7 +412,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode));
 	str->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
 	str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
-	str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
+	str->di_ctime = cpu_to_be64(inode_get_ctime(inode).tv_sec);
 
 	str->di_goal_meta = cpu_to_be64(ip->i_goal);
 	str->di_goal_data = cpu_to_be64(ip->i_goal);
@@ -429,7 +429,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_eattr = cpu_to_be64(ip->i_eattr);
 	str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
 	str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
-	str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
+	str->di_ctime_nsec = cpu_to_be32(inode_get_ctime(inode).tv_nsec);
 }
 
 /**
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 93b36d026bb4..4fea70c0fe3d 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -311,7 +311,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 		ea->ea_num_ptrs = 0;
 	}
 
-	ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	inode_set_ctime_current(&ip->i_inode);
 	__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
 
 	gfs2_trans_end(sdp);
@@ -763,7 +763,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	if (error)
 		goto out_end_trans;
 
-	ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	inode_set_ctime_current(&ip->i_inode);
 	__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
 
 out_end_trans:
@@ -888,7 +888,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
 	if (es->es_el)
 		ea_set_remove_stuffed(ip, es->es_el);
 
-	ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	inode_set_ctime_current(&ip->i_inode);
 	__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
 
 	gfs2_trans_end(GFS2_SB(&ip->i_inode));
@@ -1106,7 +1106,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
 		ea->ea_type = GFS2_EATYPE_UNUSED;
 	}
 
-	ip->i_inode.i_ctime = current_time(&ip->i_inode);
+	inode_set_ctime_current(&ip->i_inode);
 	__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
 
 	gfs2_trans_end(GFS2_SB(&ip->i_inode));

From 7305586a79284f86ee003ab0c03c31b59f87ebbb Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:13 -0400
Subject: [PATCH 0429/2047] hfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-46-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/hfs/catalog.c |  8 ++++----
 fs/hfs/dir.c     |  2 +-
 fs/hfs/inode.c   | 13 ++++++-------
 fs/hfs/sysdep.c  |  4 +++-
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index d365bf0b8c77..632c226a3972 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -133,7 +133,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, const struct qstr *str, struct i
 		goto err1;
 
 	dir->i_size++;
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
 	hfs_find_exit(&fd);
 	return 0;
@@ -269,7 +269,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, const struct qstr *str)
 	}
 
 	dir->i_size--;
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
 	res = 0;
 out:
@@ -337,7 +337,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, const struct qstr *src_name,
 	if (err)
 		goto out;
 	dst_dir->i_size++;
-	dst_dir->i_mtime = dst_dir->i_ctime = current_time(dst_dir);
+	dst_dir->i_mtime = inode_set_ctime_current(dst_dir);
 	mark_inode_dirty(dst_dir);
 
 	/* finally remove the old entry */
@@ -349,7 +349,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, const struct qstr *src_name,
 	if (err)
 		goto out;
 	src_dir->i_size--;
-	src_dir->i_mtime = src_dir->i_ctime = current_time(src_dir);
+	src_dir->i_mtime = inode_set_ctime_current(src_dir);
 	mark_inode_dirty(src_dir);
 
 	type = entry.type;
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 3e1e3dcf0b48..b75c26045df4 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -263,7 +263,7 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry)
 	if (res)
 		return res;
 	clear_nlink(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	hfs_delete_inode(inode);
 	mark_inode_dirty(inode);
 	return 0;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 441d7fc952e3..ee349b72cfb3 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -200,7 +200,7 @@ struct inode *hfs_new_inode(struct inode *dir, const struct qstr *name, umode_t
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
 	set_nlink(inode, 1);
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	HFS_I(inode)->flags = 0;
 	HFS_I(inode)->rsrc_inode = NULL;
 	HFS_I(inode)->fs_blocks = 0;
@@ -355,8 +355,8 @@ static int hfs_read_inode(struct inode *inode, void *data)
 			inode->i_mode |= S_IWUGO;
 		inode->i_mode &= ~hsb->s_file_umask;
 		inode->i_mode |= S_IFREG;
-		inode->i_ctime = inode->i_atime = inode->i_mtime =
-				hfs_m_to_utime(rec->file.MdDat);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode,
+									hfs_m_to_utime(rec->file.MdDat));
 		inode->i_op = &hfs_file_inode_operations;
 		inode->i_fop = &hfs_file_operations;
 		inode->i_mapping->a_ops = &hfs_aops;
@@ -366,8 +366,8 @@ static int hfs_read_inode(struct inode *inode, void *data)
 		inode->i_size = be16_to_cpu(rec->dir.Val) + 2;
 		HFS_I(inode)->fs_blocks = 0;
 		inode->i_mode = S_IFDIR | (S_IRWXUGO & ~hsb->s_dir_umask);
-		inode->i_ctime = inode->i_atime = inode->i_mtime =
-				hfs_m_to_utime(rec->dir.MdDat);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode,
+									hfs_m_to_utime(rec->dir.MdDat));
 		inode->i_op = &hfs_dir_inode_operations;
 		inode->i_fop = &hfs_dir_operations;
 		break;
@@ -654,8 +654,7 @@ int hfs_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 
 		truncate_setsize(inode, attr->ia_size);
 		hfs_file_truncate(inode);
-		inode->i_atime = inode->i_mtime = inode->i_ctime =
-						  current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	}
 
 	setattr_copy(&nop_mnt_idmap, inode, attr);
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c
index 2875961fdc10..dc27d418fbcd 100644
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -28,7 +28,9 @@ static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags)
 	/* fix up inode on a timezone change */
 	diff = sys_tz.tz_minuteswest * 60 - HFS_I(inode)->tz_secondswest;
 	if (diff) {
-		inode->i_ctime.tv_sec += diff;
+		struct timespec64 ctime = inode_get_ctime(inode);
+
+		inode_set_ctime(inode, ctime.tv_sec + diff, ctime.tv_nsec);
 		inode->i_atime.tv_sec += diff;
 		inode->i_mtime.tv_sec += diff;
 		HFS_I(inode)->tz_secondswest += diff;

From 101fa821ab7d1a1ae353da41f8a2455409e35e2d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:14 -0400
Subject: [PATCH 0430/2047] hfsplus: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-47-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/hfsplus/catalog.c |  8 ++++----
 fs/hfsplus/dir.c     |  6 +++---
 fs/hfsplus/inode.c   | 16 +++++++++-------
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 35472cba750e..e71ae2537eaa 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -312,7 +312,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
 	dir->i_size++;
 	if (S_ISDIR(inode->i_mode))
 		hfsplus_subfolders_inc(dir);
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
 
 	hfs_find_exit(&fd);
@@ -417,7 +417,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, const struct qstr *str)
 	dir->i_size--;
 	if (type == HFSPLUS_FOLDER)
 		hfsplus_subfolders_dec(dir);
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
 
 	if (type == HFSPLUS_FILE || type == HFSPLUS_FOLDER) {
@@ -494,7 +494,7 @@ int hfsplus_rename_cat(u32 cnid,
 	dst_dir->i_size++;
 	if (type == HFSPLUS_FOLDER)
 		hfsplus_subfolders_inc(dst_dir);
-	dst_dir->i_mtime = dst_dir->i_ctime = current_time(dst_dir);
+	dst_dir->i_mtime = inode_set_ctime_current(dst_dir);
 
 	/* finally remove the old entry */
 	err = hfsplus_cat_build_key(sb, src_fd.search_key,
@@ -511,7 +511,7 @@ int hfsplus_rename_cat(u32 cnid,
 	src_dir->i_size--;
 	if (type == HFSPLUS_FOLDER)
 		hfsplus_subfolders_dec(src_dir);
-	src_dir->i_mtime = src_dir->i_ctime = current_time(src_dir);
+	src_dir->i_mtime = inode_set_ctime_current(src_dir);
 
 	/* remove old thread entry */
 	hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 56fb5f1312e7..f5c4b3e31a1c 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -346,7 +346,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
 	inc_nlink(inode);
 	hfsplus_instantiate(dst_dentry, inode, cnid);
 	ihold(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 	sbi->file_count++;
 	hfsplus_mark_mdb_dirty(dst_dir->i_sb);
@@ -405,7 +405,7 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
 			hfsplus_delete_inode(inode);
 	} else
 		sbi->file_count--;
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 out:
 	mutex_unlock(&sbi->vh_mutex);
@@ -426,7 +426,7 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry)
 	if (res)
 		goto out;
 	clear_nlink(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	hfsplus_delete_inode(inode);
 	mark_inode_dirty(inode);
 out:
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 7d1a675e037d..40c61ab4a918 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -267,7 +267,7 @@ static int hfsplus_setattr(struct mnt_idmap *idmap,
 		}
 		truncate_setsize(inode, attr->ia_size);
 		hfsplus_file_truncate(inode);
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 	}
 
 	setattr_copy(&nop_mnt_idmap, inode, attr);
@@ -392,7 +392,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir,
 	inode->i_ino = sbi->next_cnid++;
 	inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
 	set_nlink(inode, 1);
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 
 	hip = HFSPLUS_I(inode);
 	INIT_LIST_HEAD(&hip->open_dir_list);
@@ -523,7 +523,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
 		inode->i_size = 2 + be32_to_cpu(folder->valence);
 		inode->i_atime = hfsp_mt2ut(folder->access_date);
 		inode->i_mtime = hfsp_mt2ut(folder->content_mod_date);
-		inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date);
+		inode_set_ctime_to_ts(inode,
+				      hfsp_mt2ut(folder->attribute_mod_date));
 		HFSPLUS_I(inode)->create_date = folder->create_date;
 		HFSPLUS_I(inode)->fs_blocks = 0;
 		if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) {
@@ -564,7 +565,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
 		}
 		inode->i_atime = hfsp_mt2ut(file->access_date);
 		inode->i_mtime = hfsp_mt2ut(file->content_mod_date);
-		inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date);
+		inode_set_ctime_to_ts(inode,
+				      hfsp_mt2ut(file->attribute_mod_date));
 		HFSPLUS_I(inode)->create_date = file->create_date;
 	} else {
 		pr_err("bad catalog entry used to create inode\n");
@@ -609,7 +611,7 @@ int hfsplus_cat_write_inode(struct inode *inode)
 		hfsplus_cat_set_perms(inode, &folder->permissions);
 		folder->access_date = hfsp_ut2mt(inode->i_atime);
 		folder->content_mod_date = hfsp_ut2mt(inode->i_mtime);
-		folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime);
+		folder->attribute_mod_date = hfsp_ut2mt(inode_get_ctime(inode));
 		folder->valence = cpu_to_be32(inode->i_size - 2);
 		if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) {
 			folder->subfolders =
@@ -644,7 +646,7 @@ int hfsplus_cat_write_inode(struct inode *inode)
 			file->flags &= cpu_to_be16(~HFSPLUS_FILE_LOCKED);
 		file->access_date = hfsp_ut2mt(inode->i_atime);
 		file->content_mod_date = hfsp_ut2mt(inode->i_mtime);
-		file->attribute_mod_date = hfsp_ut2mt(inode->i_ctime);
+		file->attribute_mod_date = hfsp_ut2mt(inode_get_ctime(inode));
 		hfs_bnode_write(fd.bnode, &entry, fd.entryoffset,
 					 sizeof(struct hfsplus_cat_file));
 	}
@@ -700,7 +702,7 @@ int hfsplus_fileattr_set(struct mnt_idmap *idmap,
 	else
 		hip->userflags &= ~HFSPLUS_FLG_NODUMP;
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 
 	return 0;

From 66e79d8934ec77bc490d00533f53e36d029d30e1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:15 -0400
Subject: [PATCH 0431/2047] hostfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-48-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/hostfs/hostfs_kern.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 46387090eb76..dc5a5cea5fae 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -517,8 +517,7 @@ static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st)
 		(struct timespec64){ st->atime.tv_sec, st->atime.tv_nsec };
 	ino->i_mtime =
 		(struct timespec64){ st->mtime.tv_sec, st->mtime.tv_nsec };
-	ino->i_ctime =
-		(struct timespec64){ st->ctime.tv_sec, st->ctime.tv_nsec };
+	inode_set_ctime(ino, st->ctime.tv_sec, st->ctime.tv_nsec);
 	ino->i_size = st->size;
 	ino->i_blocks = st->blocks;
 	return 0;

From 3200d07d3367e7f320ed646f58b7191360050675 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:16 -0400
Subject: [PATCH 0432/2047] hpfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-49-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/hpfs/dir.c   |  8 ++++----
 fs/hpfs/inode.c |  6 +++---
 fs/hpfs/namei.c | 29 ++++++++++-------------------
 fs/hpfs/super.c |  5 +++--
 4 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index f32f15669996..f36566d61215 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -277,10 +277,10 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in
 	 * inode.
 	 */
 
-	if (!result->i_ctime.tv_sec) {
-		if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date))))
-			result->i_ctime.tv_sec = 1;
-		result->i_ctime.tv_nsec = 0;
+	if (!inode_get_ctime(result).tv_sec) {
+		time64_t csec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date));
+
+		inode_set_ctime(result, csec ? csec : 1, 0);
 		result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->write_date));
 		result->i_mtime.tv_nsec = 0;
 		result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->read_date));
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index e50e92a42432..479166378bae 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -36,7 +36,7 @@ void hpfs_init_inode(struct inode *i)
 	hpfs_inode->i_rddir_off = NULL;
 	hpfs_inode->i_dirty = 0;
 
-	i->i_ctime.tv_sec = i->i_ctime.tv_nsec = 0;
+	inode_set_ctime(i, 0, 0);
 	i->i_mtime.tv_sec = i->i_mtime.tv_nsec = 0;
 	i->i_atime.tv_sec = i->i_atime.tv_nsec = 0;
 }
@@ -232,7 +232,7 @@ void hpfs_write_inode_nolock(struct inode *i)
 	if (de) {
 		de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
 		de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
-		de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
+		de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, inode_get_ctime(i).tv_sec));
 		de->read_only = !(i->i_mode & 0222);
 		de->ea_size = cpu_to_le32(hpfs_inode->i_ea_size);
 		hpfs_mark_4buffers_dirty(&qbh);
@@ -242,7 +242,7 @@ void hpfs_write_inode_nolock(struct inode *i)
 		if ((de = map_dirent(i, hpfs_inode->i_dno, "\001\001", 2, NULL, &qbh))) {
 			de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
 			de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
-			de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
+			de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, inode_get_ctime(i).tv_sec));
 			de->read_only = !(i->i_mode & 0222);
 			de->ea_size = cpu_to_le32(/*hpfs_inode->i_ea_size*/0);
 			de->file_size = cpu_to_le32(0);
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 69fb40b2c99a..f4eb8d6f5989 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -13,10 +13,9 @@ static void hpfs_update_directory_times(struct inode *dir)
 {
 	time64_t t = local_to_gmt(dir->i_sb, local_get_seconds(dir->i_sb));
 	if (t == dir->i_mtime.tv_sec &&
-	    t == dir->i_ctime.tv_sec)
+	    t == inode_get_ctime(dir).tv_sec)
 		return;
-	dir->i_mtime.tv_sec = dir->i_ctime.tv_sec = t;
-	dir->i_mtime.tv_nsec = dir->i_ctime.tv_nsec = 0;
+	dir->i_mtime = inode_set_ctime(dir, t, 0);
 	hpfs_write_inode_nolock(dir);
 }
 
@@ -59,10 +58,8 @@ static int hpfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	result->i_ino = fno;
 	hpfs_i(result)->i_parent_dir = dir->i_ino;
 	hpfs_i(result)->i_dno = dno;
-	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
-	result->i_ctime.tv_nsec = 0; 
-	result->i_mtime.tv_nsec = 0; 
-	result->i_atime.tv_nsec = 0; 
+	result->i_mtime = result->i_atime =
+		inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0);
 	hpfs_i(result)->i_ea_size = 0;
 	result->i_mode |= S_IFDIR;
 	result->i_op = &hpfs_dir_iops;
@@ -167,10 +164,8 @@ static int hpfs_create(struct mnt_idmap *idmap, struct inode *dir,
 	result->i_fop = &hpfs_file_ops;
 	set_nlink(result, 1);
 	hpfs_i(result)->i_parent_dir = dir->i_ino;
-	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
-	result->i_ctime.tv_nsec = 0;
-	result->i_mtime.tv_nsec = 0;
-	result->i_atime.tv_nsec = 0;
+	result->i_mtime = result->i_atime =
+		inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0);
 	hpfs_i(result)->i_ea_size = 0;
 	if (dee.read_only)
 		result->i_mode &= ~0222;
@@ -250,10 +245,8 @@ static int hpfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	hpfs_init_inode(result);
 	result->i_ino = fno;
 	hpfs_i(result)->i_parent_dir = dir->i_ino;
-	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
-	result->i_ctime.tv_nsec = 0;
-	result->i_mtime.tv_nsec = 0;
-	result->i_atime.tv_nsec = 0;
+	result->i_mtime = result->i_atime =
+		inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0);
 	hpfs_i(result)->i_ea_size = 0;
 	result->i_uid = current_fsuid();
 	result->i_gid = current_fsgid();
@@ -326,10 +319,8 @@ static int hpfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	result->i_ino = fno;
 	hpfs_init_inode(result);
 	hpfs_i(result)->i_parent_dir = dir->i_ino;
-	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
-	result->i_ctime.tv_nsec = 0;
-	result->i_mtime.tv_nsec = 0;
-	result->i_atime.tv_nsec = 0;
+	result->i_mtime = result->i_atime =
+		inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0);
 	hpfs_i(result)->i_ea_size = 0;
 	result->i_mode = S_IFLNK | 0777;
 	result->i_uid = current_fsuid();
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 1cb89595b875..758a51564124 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -729,8 +729,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 		root->i_atime.tv_nsec = 0;
 		root->i_mtime.tv_sec = local_to_gmt(s, le32_to_cpu(de->write_date));
 		root->i_mtime.tv_nsec = 0;
-		root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date));
-		root->i_ctime.tv_nsec = 0;
+		inode_set_ctime(root,
+				local_to_gmt(s, le32_to_cpu(de->creation_date)),
+				0);
 		hpfs_i(root)->i_ea_size = le32_to_cpu(de->ea_size);
 		hpfs_i(root)->i_parent_dir = root->i_ino;
 		if (root->i_size == -1)

From a72a7deab690f37b0856617af05024f64fec09dc Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:17 -0400
Subject: [PATCH 0433/2047] hugetlbfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-50-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/hugetlbfs/inode.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7b17ccfa039d..93d3bcfd4fc8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -887,7 +887,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 
 	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
 		i_size_write(inode, offset + len);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 out:
 	inode_unlock(inode);
 	return error;
@@ -935,7 +935,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
 		inode->i_mode = S_IFDIR | ctx->mode;
 		inode->i_uid = ctx->uid;
 		inode->i_gid = ctx->gid;
-		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 		inode->i_op = &hugetlbfs_dir_inode_operations;
 		inode->i_fop = &simple_dir_operations;
 		/* directory inodes start off with i_nlink == 2 (for "." entry) */
@@ -979,7 +979,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 		lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
 				&hugetlbfs_i_mmap_rwsem_key);
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
-		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 		inode->i_mapping->private_data = resv_map;
 		info->seals = F_SEAL_SEAL;
 		switch (mode & S_IFMT) {
@@ -1022,7 +1022,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
 	if (!inode)
 		return -ENOSPC;
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	d_instantiate(dentry, inode);
 	dget(dentry);/* Extra count - pin the dentry in core */
 	return 0;
@@ -1054,7 +1054,7 @@ static int hugetlbfs_tmpfile(struct mnt_idmap *idmap,
 	inode = hugetlbfs_get_inode(dir->i_sb, dir, mode | S_IFREG, 0);
 	if (!inode)
 		return -ENOSPC;
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	d_tmpfile(file, inode);
 	return finish_open_simple(file, 0);
 }
@@ -1076,7 +1076,7 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap,
 		} else
 			iput(inode);
 	}
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 
 	return error;
 }

From 2f06e4ba90da172cf39f43a101f17e2903eb0f81 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:18 -0400
Subject: [PATCH 0434/2047] isofs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-51-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/isofs/inode.c |  9 ++-------
 fs/isofs/rock.c  | 16 +++++++---------
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index df9d70588b60..2ee21286ac8f 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1422,13 +1422,8 @@ static int isofs_read_inode(struct inode *inode, int relocated)
 			inode->i_ino, de->flags[-high_sierra]);
 	}
 #endif
-
-	inode->i_mtime.tv_sec =
-	inode->i_atime.tv_sec =
-	inode->i_ctime.tv_sec = iso_date(de->date, high_sierra);
-	inode->i_mtime.tv_nsec =
-	inode->i_atime.tv_nsec =
-	inode->i_ctime.tv_nsec = 0;
+	inode->i_mtime = inode->i_atime =
+		inode_set_ctime(inode, iso_date(de->date, high_sierra), 0);
 
 	ei->i_first_extent = (isonum_733(de->extent) +
 			isonum_711(de->ext_attr_length));
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 48f58c6c9e69..348783a70f57 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -421,10 +421,9 @@ repeat:
 			/* Rock ridge never appears on a High Sierra disk */
 			cnt = 0;
 			if (rr->u.TF.flags & TF_CREATE) {
-				inode->i_ctime.tv_sec =
-				    iso_date(rr->u.TF.times[cnt++].time,
-					     0);
-				inode->i_ctime.tv_nsec = 0;
+				inode_set_ctime(inode,
+						iso_date(rr->u.TF.times[cnt++].time, 0),
+						0);
 			}
 			if (rr->u.TF.flags & TF_MODIFY) {
 				inode->i_mtime.tv_sec =
@@ -439,10 +438,9 @@ repeat:
 				inode->i_atime.tv_nsec = 0;
 			}
 			if (rr->u.TF.flags & TF_ATTRIBUTES) {
-				inode->i_ctime.tv_sec =
-				    iso_date(rr->u.TF.times[cnt++].time,
-					     0);
-				inode->i_ctime.tv_nsec = 0;
+				inode_set_ctime(inode,
+						iso_date(rr->u.TF.times[cnt++].time, 0),
+						0);
 			}
 			break;
 		case SIG('S', 'L'):
@@ -534,7 +532,7 @@ repeat:
 			inode->i_size = reloc->i_size;
 			inode->i_blocks = reloc->i_blocks;
 			inode->i_atime = reloc->i_atime;
-			inode->i_ctime = reloc->i_ctime;
+			inode_set_ctime_to_ts(inode, inode_get_ctime(reloc));
 			inode->i_mtime = reloc->i_mtime;
 			iput(reloc);
 			break;

From d8b23c618c75c7f94e66e0c08ee4b902065ae1ab Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:19 -0400
Subject: [PATCH 0435/2047] jffs2: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-52-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/jffs2/dir.c      | 24 +++++++++++++++---------
 fs/jffs2/file.c     |  3 ++-
 fs/jffs2/fs.c       | 10 +++++-----
 fs/jffs2/os-linux.h |  2 +-
 4 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 5075a0a6d594..091ab0eaabbe 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -204,7 +204,8 @@ static int jffs2_create(struct mnt_idmap *idmap, struct inode *dir_i,
 	if (ret)
 		goto fail;
 
-	dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime));
+	dir_i->i_mtime = inode_set_ctime_to_ts(dir_i,
+					       ITIME(je32_to_cpu(ri->ctime)));
 
 	jffs2_free_raw_inode(ri);
 
@@ -237,7 +238,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
 	if (dead_f->inocache)
 		set_nlink(d_inode(dentry), dead_f->inocache->pino_nlink);
 	if (!ret)
-		dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
+		dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, ITIME(now));
 	return ret;
 }
 /***********************************************************************/
@@ -271,7 +272,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
 		set_nlink(d_inode(old_dentry), ++f->inocache->pino_nlink);
 		mutex_unlock(&f->sem);
 		d_instantiate(dentry, d_inode(old_dentry));
-		dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
+		dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, ITIME(now));
 		ihold(d_inode(old_dentry));
 	}
 	return ret;
@@ -422,7 +423,8 @@ static int jffs2_symlink (struct mnt_idmap *idmap, struct inode *dir_i,
 		goto fail;
 	}
 
-	dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
+	dir_i->i_mtime = inode_set_ctime_to_ts(dir_i,
+					       ITIME(je32_to_cpu(rd->mctime)));
 
 	jffs2_free_raw_dirent(rd);
 
@@ -566,7 +568,8 @@ static int jffs2_mkdir (struct mnt_idmap *idmap, struct inode *dir_i,
 		goto fail;
 	}
 
-	dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
+	dir_i->i_mtime = inode_set_ctime_to_ts(dir_i,
+					       ITIME(je32_to_cpu(rd->mctime)));
 	inc_nlink(dir_i);
 
 	jffs2_free_raw_dirent(rd);
@@ -607,7 +610,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
 	ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
 			      dentry->d_name.len, f, now);
 	if (!ret) {
-		dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
+		dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, ITIME(now));
 		clear_nlink(d_inode(dentry));
 		drop_nlink(dir_i);
 	}
@@ -743,7 +746,8 @@ static int jffs2_mknod (struct mnt_idmap *idmap, struct inode *dir_i,
 		goto fail;
 	}
 
-	dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
+	dir_i->i_mtime = inode_set_ctime_to_ts(dir_i,
+					       ITIME(je32_to_cpu(rd->mctime)));
 
 	jffs2_free_raw_dirent(rd);
 
@@ -864,14 +868,16 @@ static int jffs2_rename (struct mnt_idmap *idmap,
 		 * caller won't do it on its own since we are returning an error.
 		 */
 		d_invalidate(new_dentry);
-		new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
+		new_dir_i->i_mtime = inode_set_ctime_to_ts(new_dir_i,
+							   ITIME(now));
 		return ret;
 	}
 
 	if (d_is_dir(old_dentry))
 		drop_nlink(old_dir_i);
 
-	new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = ITIME(now);
+	old_dir_i->i_mtime = inode_set_ctime_to_ts(old_dir_i, ITIME(now));
+	new_dir_i->i_mtime = inode_set_ctime_to_ts(new_dir_i, ITIME(now));
 
 	return 0;
 }
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 2345ca3f09ee..11c66793960e 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -317,7 +317,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
 			inode->i_size = pos + writtenlen;
 			inode->i_blocks = (inode->i_size + 511) >> 9;
 
-			inode->i_ctime = inode->i_mtime = ITIME(je32_to_cpu(ri->ctime));
+			inode->i_mtime = inode_set_ctime_to_ts(inode,
+							       ITIME(je32_to_cpu(ri->ctime)));
 		}
 	}
 
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 038516bee1ab..0403efab4089 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -115,7 +115,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 	ri->isize = cpu_to_je32((ivalid & ATTR_SIZE)?iattr->ia_size:inode->i_size);
 	ri->atime = cpu_to_je32(I_SEC((ivalid & ATTR_ATIME)?iattr->ia_atime:inode->i_atime));
 	ri->mtime = cpu_to_je32(I_SEC((ivalid & ATTR_MTIME)?iattr->ia_mtime:inode->i_mtime));
-	ri->ctime = cpu_to_je32(I_SEC((ivalid & ATTR_CTIME)?iattr->ia_ctime:inode->i_ctime));
+	ri->ctime = cpu_to_je32(I_SEC((ivalid & ATTR_CTIME)?iattr->ia_ctime:inode_get_ctime(inode)));
 
 	ri->offset = cpu_to_je32(0);
 	ri->csize = ri->dsize = cpu_to_je32(mdatalen);
@@ -148,7 +148,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 	}
 	/* It worked. Update the inode */
 	inode->i_atime = ITIME(je32_to_cpu(ri->atime));
-	inode->i_ctime = ITIME(je32_to_cpu(ri->ctime));
+	inode_set_ctime_to_ts(inode, ITIME(je32_to_cpu(ri->ctime)));
 	inode->i_mtime = ITIME(je32_to_cpu(ri->mtime));
 	inode->i_mode = jemode_to_cpu(ri->mode);
 	i_uid_write(inode, je16_to_cpu(ri->uid));
@@ -284,7 +284,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
 	inode->i_size = je32_to_cpu(latest_node.isize);
 	inode->i_atime = ITIME(je32_to_cpu(latest_node.atime));
 	inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
-	inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
+	inode_set_ctime_to_ts(inode, ITIME(je32_to_cpu(latest_node.ctime)));
 
 	set_nlink(inode, f->inocache->pino_nlink);
 
@@ -388,7 +388,7 @@ void jffs2_dirty_inode(struct inode *inode, int flags)
 	iattr.ia_gid = inode->i_gid;
 	iattr.ia_atime = inode->i_atime;
 	iattr.ia_mtime = inode->i_mtime;
-	iattr.ia_ctime = inode->i_ctime;
+	iattr.ia_ctime = inode_get_ctime(inode);
 
 	jffs2_do_setattr(inode, &iattr);
 }
@@ -475,7 +475,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
 	inode->i_mode = jemode_to_cpu(ri->mode);
 	i_gid_write(inode, je16_to_cpu(ri->gid));
 	i_uid_write(inode, je16_to_cpu(ri->uid));
-	inode->i_atime = inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime));
 
 	inode->i_blocks = 0;
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 8da19766c101..50727a1ff931 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -35,7 +35,7 @@ struct kvec;
 #define ITIME(sec) ((struct timespec64){sec, 0})
 #define JFFS2_NOW() JFFS2_CLAMP_TIME(ktime_get_real_seconds())
 #define I_SEC(tv) JFFS2_CLAMP_TIME((tv).tv_sec)
-#define JFFS2_F_I_CTIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_ctime)
+#define JFFS2_F_I_CTIME(f) I_SEC(inode_get_ctime(OFNI_EDONI_2SFFJ(f)))
 #define JFFS2_F_I_MTIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_mtime)
 #define JFFS2_F_I_ATIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_atime)
 #define sleep_on_spinunlock(wq, s)				\

From ad9dc5df36a2253868154a22650eac882c6faec4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:20 -0400
Subject: [PATCH 0436/2047] jfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-53-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/jfs/acl.c       |  2 +-
 fs/jfs/inode.c     |  2 +-
 fs/jfs/ioctl.c     |  2 +-
 fs/jfs/jfs_imap.c  |  8 ++++----
 fs/jfs/jfs_inode.c |  4 ++--
 fs/jfs/namei.c     | 24 ++++++++++++------------
 fs/jfs/super.c     |  2 +-
 fs/jfs/xattr.c     |  2 +-
 8 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index fb96f872d207..1de3602c98de 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -116,7 +116,7 @@ int jfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 	if (!rc) {
 		if (update_mode) {
 			inode->i_mode = mode;
-			inode->i_ctime = current_time(inode);
+			inode_set_ctime_current(inode);
 			mark_inode_dirty(inode);
 		}
 		rc = txCommit(tid, 1, &inode, 0);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 8ac10e396050..920d58a1566b 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -393,7 +393,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
 			break;
 		}
 
-		ip->i_mtime = ip->i_ctime = current_time(ip);
+		ip->i_mtime = inode_set_ctime_current(ip);
 		mark_inode_dirty(ip);
 
 		txCommit(tid, 1, &ip, 0);
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index ed7989bc2db1..f7bd7e8f5be4 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -96,7 +96,7 @@ int jfs_fileattr_set(struct mnt_idmap *idmap,
 	jfs_inode->mode2 = flags;
 
 	jfs_set_inode_flags(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 
 	return 0;
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 390cbfce391f..a40383aa6c84 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3064,8 +3064,8 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 	ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
 	ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec);
 	ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
-	ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec);
-	ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec);
+	inode_set_ctime(ip, le32_to_cpu(dip->di_ctime.tv_sec),
+			le32_to_cpu(dip->di_ctime.tv_nsec));
 	ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
 	ip->i_generation = le32_to_cpu(dip->di_gen);
 
@@ -3139,8 +3139,8 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip)
 
 	dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec);
 	dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec);
-	dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec);
-	dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec);
+	dip->di_ctime.tv_sec = cpu_to_le32(inode_get_ctime(ip).tv_sec);
+	dip->di_ctime.tv_nsec = cpu_to_le32(inode_get_ctime(ip).tv_nsec);
 	dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec);
 	dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec);
 	dip->di_ixpxd = jfs_ip->ixpxd;	/* in-memory pxd's are little-endian */
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 9e1f02767201..87594efa7f7c 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -97,8 +97,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	jfs_inode->mode2 |= inode->i_mode;
 
 	inode->i_blocks = 0;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
-	jfs_inode->otime = inode->i_ctime.tv_sec;
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
+	jfs_inode->otime = inode_get_ctime(inode).tv_sec;
 	inode->i_generation = JFS_SBI(sb)->gengen++;
 
 	jfs_inode->cflag = 0;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 9b030297aa64..541578126b1a 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -149,7 +149,7 @@ static int jfs_create(struct mnt_idmap *idmap, struct inode *dip,
 
 	mark_inode_dirty(ip);
 
-	dip->i_ctime = dip->i_mtime = current_time(dip);
+	dip->i_mtime = inode_set_ctime_current(dip);
 
 	mark_inode_dirty(dip);
 
@@ -284,7 +284,7 @@ static int jfs_mkdir(struct mnt_idmap *idmap, struct inode *dip,
 
 	/* update parent directory inode */
 	inc_nlink(dip);		/* for '..' from child directory */
-	dip->i_ctime = dip->i_mtime = current_time(dip);
+	dip->i_mtime = inode_set_ctime_current(dip);
 	mark_inode_dirty(dip);
 
 	rc = txCommit(tid, 2, &iplist[0], 0);
@@ -390,7 +390,7 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 	/* update parent directory's link count corresponding
 	 * to ".." entry of the target directory deleted
 	 */
-	dip->i_ctime = dip->i_mtime = current_time(dip);
+	dip->i_mtime = inode_set_ctime_current(dip);
 	inode_dec_link_count(dip);
 
 	/*
@@ -512,7 +512,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 
 	ASSERT(ip->i_nlink);
 
-	ip->i_ctime = dip->i_ctime = dip->i_mtime = current_time(ip);
+	dip->i_mtime = inode_set_ctime_to_ts(dip, inode_set_ctime_current(ip));
 	mark_inode_dirty(dip);
 
 	/* update target's inode */
@@ -827,8 +827,8 @@ static int jfs_link(struct dentry *old_dentry,
 
 	/* update object inode */
 	inc_nlink(ip);		/* for new link */
-	ip->i_ctime = current_time(ip);
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	inode_set_ctime_current(ip);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
 	ihold(ip);
 
@@ -1028,7 +1028,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
 
 	mark_inode_dirty(ip);
 
-	dip->i_ctime = dip->i_mtime = current_time(dip);
+	dip->i_mtime = inode_set_ctime_current(dip);
 	mark_inode_dirty(dip);
 	/*
 	 * commit update of parent directory and link object
@@ -1205,7 +1205,7 @@ static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 			tblk->xflag |= COMMIT_DELETE;
 			tblk->u.ip = new_ip;
 		} else {
-			new_ip->i_ctime = current_time(new_ip);
+			inode_set_ctime_current(new_ip);
 			mark_inode_dirty(new_ip);
 		}
 	} else {
@@ -1268,10 +1268,10 @@ static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 	/*
 	 * Update ctime on changed/moved inodes & mark dirty
 	 */
-	old_ip->i_ctime = current_time(old_ip);
+	inode_set_ctime_current(old_ip);
 	mark_inode_dirty(old_ip);
 
-	new_dir->i_ctime = new_dir->i_mtime = current_time(new_dir);
+	new_dir->i_mtime = inode_set_ctime_current(new_dir);
 	mark_inode_dirty(new_dir);
 
 	/* Build list of inodes modified by this transaction */
@@ -1283,7 +1283,7 @@ static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 
 	if (old_dir != new_dir) {
 		iplist[ipcount++] = new_dir;
-		old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
+		old_dir->i_mtime = inode_set_ctime_current(old_dir);
 		mark_inode_dirty(old_dir);
 	}
 
@@ -1416,7 +1416,7 @@ static int jfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 
 	mark_inode_dirty(ip);
 
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 
 	mark_inode_dirty(dir);
 
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index d2f82cb7db1b..2e2f7f6d36a0 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -818,7 +818,7 @@ out:
 	}
 	if (inode->i_size < off+len-towrite)
 		i_size_write(inode, off+len-towrite);
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 	inode_unlock(inode);
 	return len - towrite;
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 931e50018f88..8577ad494e05 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -647,7 +647,7 @@ static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf,
 	if (old_blocks)
 		dquot_free_block(inode, old_blocks);
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 
 	return 0;
 }

From f7f438589f4686758ee1de99c7723919a2b05d70 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:21 -0400
Subject: [PATCH 0437/2047] kernfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-54-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/kernfs/inode.c       |  5 ++---
 fs/libfs.c              | 19 ++++++++++---------
 fs/minix/bitmap.c       |  2 +-
 fs/minix/dir.c          |  6 +++---
 fs/minix/inode.c        | 10 +++-------
 fs/minix/itree_common.c |  4 ++--
 fs/minix/namei.c        |  6 +++---
 7 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index b22b74d1a115..89a9b4dcf109 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -151,8 +151,7 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
 static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
 {
 	inode->i_mode = mode;
-	inode->i_atime = inode->i_mtime =
-		inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 }
 
 static inline void set_inode_attr(struct inode *inode,
@@ -162,7 +161,7 @@ static inline void set_inode_attr(struct inode *inode,
 	inode->i_gid = attrs->ia_gid;
 	inode->i_atime = attrs->ia_atime;
 	inode->i_mtime = attrs->ia_mtime;
-	inode->i_ctime = attrs->ia_ctime;
+	inode_set_ctime_to_ts(inode, attrs->ia_ctime);
 }
 
 static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
diff --git a/fs/libfs.c b/fs/libfs.c
index ea0a84591886..1f5245e8bfdc 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -275,7 +275,7 @@ void simple_recursive_removal(struct dentry *dentry,
 		while ((child = find_next_child(this, victim)) == NULL) {
 			// kill and ascend
 			// update metadata while it's still locked
-			inode->i_ctime = current_time(inode);
+			inode_set_ctime_current(inode);
 			clear_nlink(inode);
 			inode_unlock(inode);
 			victim = this;
@@ -293,8 +293,7 @@ void simple_recursive_removal(struct dentry *dentry,
 				dput(victim);		// unpin it
 			}
 			if (victim == dentry) {
-				inode->i_ctime = inode->i_mtime =
-					current_time(inode);
+				inode->i_mtime = inode_set_ctime_current(inode);
 				if (d_is_dir(dentry))
 					drop_nlink(inode);
 				inode_unlock(inode);
@@ -335,7 +334,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
 	 */
 	root->i_ino = 1;
 	root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
-	root->i_atime = root->i_mtime = root->i_ctime = current_time(root);
+	root->i_atime = root->i_mtime = inode_set_ctime_current(root);
 	s->s_root = d_make_root(root);
 	if (!s->s_root)
 		return -ENOMEM;
@@ -391,7 +390,8 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
 {
 	struct inode *inode = d_inode(old_dentry);
 
-	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+	dir->i_mtime = inode_set_ctime_to_ts(dir,
+					     inode_set_ctime_current(inode));
 	inc_nlink(inode);
 	ihold(inode);
 	dget(dentry);
@@ -425,7 +425,8 @@ int simple_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct inode *inode = d_inode(dentry);
 
-	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+	dir->i_mtime = inode_set_ctime_to_ts(dir,
+					     inode_set_ctime_current(inode));
 	drop_nlink(inode);
 	dput(dentry);
 	return 0;
@@ -677,7 +678,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
 	 */
 	inode->i_ino = 1;
 	inode->i_mode = S_IFDIR | 0755;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 	set_nlink(inode, 2);
@@ -703,7 +704,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
 			goto out;
 		}
 		inode->i_mode = S_IFREG | files->mode;
-		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 		inode->i_fop = files->ops;
 		inode->i_ino = i;
 		d_add(dentry, inode);
@@ -1271,7 +1272,7 @@ struct inode *alloc_anon_inode(struct super_block *s)
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
 	inode->i_flags |= S_PRIVATE;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	return inode;
 }
 EXPORT_SYMBOL(alloc_anon_inode);
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 870207ba23f1..25c08fbfcb9d 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -251,7 +251,7 @@ struct inode *minix_new_inode(const struct inode *dir, umode_t mode)
 	}
 	inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
 	inode->i_ino = j;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_blocks = 0;
 	memset(&minix_i(inode)->u, 0, sizeof(minix_i(inode)->u));
 	insert_inode_hash(inode);
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index bf9858f76b6a..20f23e6e58ad 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -281,7 +281,7 @@ got_it:
 		de->inode = inode->i_ino;
 	}
 	dir_commit_chunk(page, pos, sbi->s_dirsize);
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
 	err = minix_handle_dirsync(dir);
 out_put:
@@ -313,7 +313,7 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
 	else
 		de->inode = 0;
 	dir_commit_chunk(page, pos, len);
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 	return minix_handle_dirsync(inode);
 }
@@ -436,7 +436,7 @@ int minix_set_link(struct minix_dir_entry *de, struct page *page,
 	else
 		de->inode = inode->i_ino;
 	dir_commit_chunk(page, pos, sbi->s_dirsize);
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
 	return minix_handle_dirsync(dir);
 }
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e9fbb5303a22..8a4fc9420b36 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -501,10 +501,7 @@ static struct inode *V1_minix_iget(struct inode *inode)
 	i_gid_write(inode, raw_inode->i_gid);
 	set_nlink(inode, raw_inode->i_nlinks);
 	inode->i_size = raw_inode->i_size;
-	inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time;
-	inode->i_mtime.tv_nsec = 0;
-	inode->i_atime.tv_nsec = 0;
-	inode->i_ctime.tv_nsec = 0;
+	inode->i_mtime = inode->i_atime = inode_set_ctime(inode, raw_inode->i_time, 0);
 	inode->i_blocks = 0;
 	for (i = 0; i < 9; i++)
 		minix_inode->u.i1_data[i] = raw_inode->i_zone[i];
@@ -543,10 +540,9 @@ static struct inode *V2_minix_iget(struct inode *inode)
 	inode->i_size = raw_inode->i_size;
 	inode->i_mtime.tv_sec = raw_inode->i_mtime;
 	inode->i_atime.tv_sec = raw_inode->i_atime;
-	inode->i_ctime.tv_sec = raw_inode->i_ctime;
+	inode_set_ctime(inode, raw_inode->i_ctime, 0);
 	inode->i_mtime.tv_nsec = 0;
 	inode->i_atime.tv_nsec = 0;
-	inode->i_ctime.tv_nsec = 0;
 	inode->i_blocks = 0;
 	for (i = 0; i < 10; i++)
 		minix_inode->u.i2_data[i] = raw_inode->i_zone[i];
@@ -622,7 +618,7 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode)
 	raw_inode->i_size = inode->i_size;
 	raw_inode->i_mtime = inode->i_mtime.tv_sec;
 	raw_inode->i_atime = inode->i_atime.tv_sec;
-	raw_inode->i_ctime = inode->i_ctime.tv_sec;
+	raw_inode->i_ctime = inode_get_ctime(inode).tv_sec;
 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
 		raw_inode->i_zone[0] = old_encode_dev(inode->i_rdev);
 	else for (i = 0; i < 10; i++)
diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c
index 446148792f41..ce18ae37c29d 100644
--- a/fs/minix/itree_common.c
+++ b/fs/minix/itree_common.c
@@ -131,7 +131,7 @@ static inline int splice_branch(struct inode *inode,
 
 	/* We are done with atomic stuff, now do the rest of housekeeping */
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 
 	/* had we spliced it onto indirect block? */
 	if (where->bh)
@@ -350,7 +350,7 @@ do_indirects:
 		}
 		first_whole++;
 	}
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 }
 
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 956d5183828d..114084d5636a 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -98,7 +98,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
 {
 	struct inode *inode = d_inode(old_dentry);
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	inode_inc_link_count(inode);
 	ihold(inode);
 	return add_nondir(dentry, inode);
@@ -154,7 +154,7 @@ static int minix_unlink(struct inode * dir, struct dentry *dentry)
 
 	if (err)
 		return err;
-	inode->i_ctime = dir->i_ctime;
+	inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
 	inode_dec_link_count(inode);
 	return 0;
 }
@@ -218,7 +218,7 @@ static int minix_rename(struct mnt_idmap *idmap,
 		put_page(new_page);
 		if (err)
 			goto out_dir;
-		new_inode->i_ctime = current_time(new_inode);
+		inode_set_ctime_current(new_inode);
 		if (dir_de)
 			drop_nlink(new_inode);
 		inode_dec_link_count(new_inode);

From 55e04e9c926eb3b7f6fb6d86bdcb6ee3991c4e4a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:22 -0400
Subject: [PATCH 0438/2047] nfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-55-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/nfs/callback_proc.c |  2 +-
 fs/nfs/fscache.h       |  4 ++--
 fs/nfs/inode.c         | 20 ++++++++++----------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c1eda73254e1..6bed1394d748 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -59,7 +59,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
 	res->change_attr = delegation->change_attr;
 	if (nfs_have_writebacks(inode))
 		res->change_attr++;
-	res->ctime = inode->i_ctime;
+	res->ctime = inode_get_ctime(inode);
 	res->mtime = inode->i_mtime;
 	res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
 		args->bitmap[0];
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index e1706e736c64..2dc64454492b 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -116,8 +116,8 @@ static inline void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata *
 	memset(auxdata, 0, sizeof(*auxdata));
 	auxdata->mtime_sec  = inode->i_mtime.tv_sec;
 	auxdata->mtime_nsec = inode->i_mtime.tv_nsec;
-	auxdata->ctime_sec  = inode->i_ctime.tv_sec;
-	auxdata->ctime_nsec = inode->i_ctime.tv_nsec;
+	auxdata->ctime_sec  = inode_get_ctime(inode).tv_sec;
+	auxdata->ctime_nsec = inode_get_ctime(inode).tv_nsec;
 
 	if (NFS_SERVER(inode)->nfs_client->rpc_ops->version == 4)
 		auxdata->change_attr = inode_peek_iversion_raw(inode);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8172dd4135a1..1283fdfa4b0a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -514,7 +514,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 
 		memset(&inode->i_atime, 0, sizeof(inode->i_atime));
 		memset(&inode->i_mtime, 0, sizeof(inode->i_mtime));
-		memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
+		inode_set_ctime(inode, 0, 0);
 		inode_set_iversion_raw(inode, 0);
 		inode->i_size = 0;
 		clear_nlink(inode);
@@ -535,7 +535,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		else if (fattr_supported & NFS_ATTR_FATTR_MTIME)
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
 		if (fattr->valid & NFS_ATTR_FATTR_CTIME)
-			inode->i_ctime = fattr->ctime;
+			inode_set_ctime_to_ts(inode, fattr->ctime);
 		else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
 		if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@@ -731,7 +731,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
 		if ((attr->ia_valid & ATTR_GID) != 0)
 			inode->i_gid = attr->ia_gid;
 		if (fattr->valid & NFS_ATTR_FATTR_CTIME)
-			inode->i_ctime = fattr->ctime;
+			inode_set_ctime_to_ts(inode, fattr->ctime);
 		else
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
 					| NFS_INO_INVALID_CTIME);
@@ -749,7 +749,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
 
 		if (fattr->valid & NFS_ATTR_FATTR_CTIME)
-			inode->i_ctime = fattr->ctime;
+			inode_set_ctime_to_ts(inode, fattr->ctime);
 		else
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
 					| NFS_INO_INVALID_CTIME);
@@ -765,7 +765,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
 
 		if (fattr->valid & NFS_ATTR_FATTR_CTIME)
-			inode->i_ctime = fattr->ctime;
+			inode_set_ctime_to_ts(inode, fattr->ctime);
 		else
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
 					| NFS_INO_INVALID_CTIME);
@@ -1444,11 +1444,11 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
 	}
 	/* If we have atomic WCC data, we may update some attributes */
-	ts = inode->i_ctime;
+	ts = inode_get_ctime(inode);
 	if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
 			&& (fattr->valid & NFS_ATTR_FATTR_CTIME)
 			&& timespec64_equal(&ts, &fattr->pre_ctime)) {
-		inode->i_ctime = fattr->ctime;
+		inode_set_ctime_to_ts(inode, fattr->ctime);
 	}
 
 	ts = inode->i_mtime;
@@ -1510,7 +1510,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 		if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime))
 			invalid |= NFS_INO_INVALID_MTIME;
 
-		ts = inode->i_ctime;
+		ts = inode_get_ctime(inode);
 		if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime))
 			invalid |= NFS_INO_INVALID_CTIME;
 
@@ -1997,7 +1997,7 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa
 	}
 	if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
 			(fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
-		fattr->pre_ctime = inode->i_ctime;
+		fattr->pre_ctime = inode_get_ctime(inode);
 		fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
 	}
 	if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
@@ -2190,7 +2190,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			save_cache_validity & NFS_INO_INVALID_MTIME;
 
 	if (fattr->valid & NFS_ATTR_FATTR_CTIME)
-		inode->i_ctime = fattr->ctime;
+		inode_set_ctime_to_ts(inode, fattr->ctime);
 	else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
 		nfsi->cache_validity |=
 			save_cache_validity & NFS_INO_INVALID_CTIME;

From 38d721b13f861adc8aad447aa8b0952cd4a3000c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:23 -0400
Subject: [PATCH 0439/2047] nfsd: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-56-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/nfsd/nfsctl.c | 2 +-
 fs/nfsd/vfs.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1b8b1aab9a15..a53c5660a8c4 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1131,7 +1131,7 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
 	/* Following advice from simple_fill_super documentation: */
 	inode->i_ino = iunique(sb, NFSD_MaxReserved);
 	inode->i_mode = mode;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	switch (mode & S_IFMT) {
 	case S_IFDIR:
 		inode->i_fop = &simple_dir_operations;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8a2321d19194..40a68bae88fc 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -520,7 +520,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
 
 	nfsd_sanitize_attrs(inode, iap);
 
-	if (check_guard && guardtime != inode->i_ctime.tv_sec)
+	if (check_guard && guardtime != inode_get_ctime(inode).tv_sec)
 		return nfserr_notsync;
 
 	/*

From e21d4f419402ce71f6ddd10449af85356293e788 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:24 -0400
Subject: [PATCH 0440/2047] nilfs2: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-57-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/nilfs2/dir.c   |  6 +++---
 fs/nilfs2/inode.c | 12 ++++++------
 fs/nilfs2/ioctl.c |  2 +-
 fs/nilfs2/namei.c |  8 ++++----
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index decd6471300b..bce734b68f08 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -429,7 +429,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
 	nilfs_set_de_type(de, inode);
 	nilfs_commit_chunk(page, mapping, from, to);
 	nilfs_put_page(page);
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 }
 
 /*
@@ -519,7 +519,7 @@ got_it:
 	de->inode = cpu_to_le64(inode->i_ino);
 	nilfs_set_de_type(de, inode);
 	nilfs_commit_chunk(page, page->mapping, from, to);
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	nilfs_mark_inode_dirty(dir);
 	/* OFFSET_CACHE */
 out_put:
@@ -567,7 +567,7 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
 		pde->rec_len = nilfs_rec_len_to_disk(to - from);
 	dir->inode = 0;
 	nilfs_commit_chunk(page, mapping, from, to);
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 out:
 	nilfs_put_page(page);
 	return err;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index a8ce522ac747..5259b94ca1dc 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -366,7 +366,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 	atomic64_inc(&root->inodes_count);
 	inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
 	inode->i_ino = ino;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
 		err = nilfs_bmap_read(ii->i_bmap, NULL);
@@ -450,10 +450,10 @@ int nilfs_read_inode_common(struct inode *inode,
 	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
 	inode->i_size = le64_to_cpu(raw_inode->i_size);
 	inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
-	inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
+	inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime),
+			le32_to_cpu(raw_inode->i_ctime_nsec));
 	inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
 	inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
-	inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
 	inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
 	if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
 		return -EIO; /* this inode is for metadata and corrupted */
@@ -768,9 +768,9 @@ void nilfs_write_inode_common(struct inode *inode,
 	raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
 	raw_inode->i_size = cpu_to_le64(inode->i_size);
-	raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+	raw_inode->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
 	raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
-	raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 	raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
 	raw_inode->i_blocks = cpu_to_le64(inode->i_blocks);
 
@@ -875,7 +875,7 @@ void nilfs_truncate(struct inode *inode)
 
 	nilfs_truncate_bmap(ii, blkoff);
 
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	if (IS_SYNC(inode))
 		nilfs_set_transaction_flag(NILFS_TI_SYNC);
 
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 1dfbc0c34513..40ffade49f38 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -149,7 +149,7 @@ int nilfs_fileattr_set(struct mnt_idmap *idmap,
 	NILFS_I(inode)->i_flags = oldflags | (flags & FS_FL_USER_MODIFIABLE);
 
 	nilfs_set_inode_flags(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	if (IS_SYNC(inode))
 		nilfs_set_transaction_flag(NILFS_TI_SYNC);
 
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index c7024da8f1e2..2a4e7f4a8102 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -185,7 +185,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
 	if (err)
 		return err;
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	inode_inc_link_count(inode);
 	ihold(inode);
 
@@ -283,7 +283,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
 	if (err)
 		goto out;
 
-	inode->i_ctime = dir->i_ctime;
+	inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
 	drop_nlink(inode);
 	err = 0;
 out:
@@ -387,7 +387,7 @@ static int nilfs_rename(struct mnt_idmap *idmap,
 			goto out_dir;
 		nilfs_set_link(new_dir, new_de, new_page, old_inode);
 		nilfs_mark_inode_dirty(new_dir);
-		new_inode->i_ctime = current_time(new_inode);
+		inode_set_ctime_current(new_inode);
 		if (dir_de)
 			drop_nlink(new_inode);
 		drop_nlink(new_inode);
@@ -406,7 +406,7 @@ static int nilfs_rename(struct mnt_idmap *idmap,
 	 * Like most other Unix systems, set the ctime for inodes on a
 	 * rename.
 	 */
-	old_inode->i_ctime = current_time(old_inode);
+	inode_set_ctime_current(old_inode);
 
 	nilfs_delete_entry(old_de, old_page);
 

From 03870d277fea0efb33d5de792194589130c6a3cb Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:25 -0400
Subject: [PATCH 0441/2047] ntfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-58-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ntfs/inode.c | 15 ++++++++-------
 fs/ntfs/mft.c   |  3 +--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 6c3f38d66579..99ac6ea277c4 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -654,7 +654,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
 	 * always changes, when mtime is changed. ctime can be changed on its
 	 * own, mtime is then not changed, e.g. when a file is renamed.
 	 */
-	vi->i_ctime = ntfs2utc(si->last_mft_change_time);
+	inode_set_ctime_to_ts(vi, ntfs2utc(si->last_mft_change_time));
 	/*
 	 * Last access to the data within the file. Not changed during a rename
 	 * for example but changed whenever the file is written to.
@@ -1218,7 +1218,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 	vi->i_gid	= base_vi->i_gid;
 	set_nlink(vi, base_vi->i_nlink);
 	vi->i_mtime	= base_vi->i_mtime;
-	vi->i_ctime	= base_vi->i_ctime;
+	inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
 	vi->i_atime	= base_vi->i_atime;
 	vi->i_generation = ni->seq_no = base_ni->seq_no;
 
@@ -1484,7 +1484,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
 	vi->i_gid	= base_vi->i_gid;
 	set_nlink(vi, base_vi->i_nlink);
 	vi->i_mtime	= base_vi->i_mtime;
-	vi->i_ctime	= base_vi->i_ctime;
+	inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
 	vi->i_atime	= base_vi->i_atime;
 	vi->i_generation = ni->seq_no = base_ni->seq_no;
 	/* Set inode type to zero but preserve permissions. */
@@ -2804,13 +2804,14 @@ done:
 	 */
 	if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) {
 		struct timespec64 now = current_time(VFS_I(base_ni));
+		struct timespec64 ctime = inode_get_ctime(VFS_I(base_ni));
 		int sync_it = 0;
 
 		if (!timespec64_equal(&VFS_I(base_ni)->i_mtime, &now) ||
-		    !timespec64_equal(&VFS_I(base_ni)->i_ctime, &now))
+		    !timespec64_equal(&ctime, &now))
 			sync_it = 1;
+		inode_set_ctime_to_ts(VFS_I(base_ni), now);
 		VFS_I(base_ni)->i_mtime = now;
-		VFS_I(base_ni)->i_ctime = now;
 
 		if (sync_it)
 			mark_inode_dirty_sync(VFS_I(base_ni));
@@ -2928,7 +2929,7 @@ int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 	if (ia_valid & ATTR_MTIME)
 		vi->i_mtime = attr->ia_mtime;
 	if (ia_valid & ATTR_CTIME)
-		vi->i_ctime = attr->ia_ctime;
+		inode_set_ctime_to_ts(vi, attr->ia_ctime);
 	mark_inode_dirty(vi);
 out:
 	return err;
@@ -3004,7 +3005,7 @@ int __ntfs_write_inode(struct inode *vi, int sync)
 		si->last_data_change_time = nt;
 		modified = true;
 	}
-	nt = utc2ntfs(vi->i_ctime);
+	nt = utc2ntfs(inode_get_ctime(vi));
 	if (si->last_mft_change_time != nt) {
 		ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, "
 				"new = 0x%llx", vi->i_ino, (long long)
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 0155f106ec34..ad1a8f72da22 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -2682,8 +2682,7 @@ mft_rec_already_initialized:
 			vi->i_mode &= ~S_IWUGO;
 
 		/* Set the inode times to the current time. */
-		vi->i_atime = vi->i_mtime = vi->i_ctime =
-			current_time(vi);
+		vi->i_atime = vi->i_mtime = inode_set_ctime_current(vi);
 		/*
 		 * Set the file size to 0, the ntfs inode sizes are set to 0 by
 		 * the call to ntfs_init_big_inode() below.

From 3d65c46fbf6ad0e097f8ec0bc6c7674cce119fa2 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:26 -0400
Subject: [PATCH 0442/2047] ntfs3: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-59-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ntfs3/file.c    |  6 +++---
 fs/ntfs3/frecord.c |  3 ++-
 fs/ntfs3/inode.c   | 14 ++++++++------
 fs/ntfs3/namei.c   |  4 ++--
 fs/ntfs3/xattr.c   |  4 ++--
 5 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 1d6c824246c4..12788601dc84 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -342,7 +342,7 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count,
 		err = 0;
 	}
 
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 
 	if (IS_SYNC(inode)) {
@@ -400,7 +400,7 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size)
 	ni_unlock(ni);
 
 	ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE;
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	if (!IS_DIRSYNC(inode)) {
 		dirty = 1;
 	} else {
@@ -642,7 +642,7 @@ out:
 		filemap_invalidate_unlock(mapping);
 
 	if (!err) {
-		inode->i_ctime = inode->i_mtime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 		mark_inode_dirty(inode);
 	}
 
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 16bd9faa2d28..2b85cb10f0be 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -3265,6 +3265,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
 	if (is_rec_inuse(ni->mi.mrec) &&
 	    !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) {
 		bool modified = false;
+		struct timespec64 ctime = inode_get_ctime(inode);
 
 		/* Update times in standard attribute. */
 		std = ni_std(ni);
@@ -3280,7 +3281,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
 			modified = true;
 		}
 
-		dup.c_time = kernel2nt(&inode->i_ctime);
+		dup.c_time = kernel2nt(&ctime);
 		if (std->c_time != dup.c_time) {
 			std->c_time = dup.c_time;
 			modified = true;
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index dc7e7ab701c6..4123e126c4d0 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -44,6 +44,7 @@ static struct inode *ntfs_read_mft(struct inode *inode,
 	u64 t64;
 	struct MFT_REC *rec;
 	struct runs_tree *run;
+	struct timespec64 ctime;
 
 	inode->i_op = NULL;
 	/* Setup 'uid' and 'gid' */
@@ -169,7 +170,8 @@ next_attr:
 		nt2kernel(std5->cr_time, &ni->i_crtime);
 #endif
 		nt2kernel(std5->a_time, &inode->i_atime);
-		nt2kernel(std5->c_time, &inode->i_ctime);
+		ctime = inode_get_ctime(inode);
+		nt2kernel(std5->c_time, &ctime);
 		nt2kernel(std5->m_time, &inode->i_mtime);
 
 		ni->std_fa = std5->fa;
@@ -958,7 +960,7 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
 
 	if (err >= 0) {
 		if (!(ni->std_fa & FILE_ATTRIBUTE_ARCHIVE)) {
-			inode->i_ctime = inode->i_mtime = current_time(inode);
+			inode->i_mtime = inode_set_ctime_current(inode);
 			ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE;
 			dirty = true;
 		}
@@ -1658,8 +1660,8 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
 	d_instantiate(dentry, inode);
 
 	/* Set original time. inode times (i_ctime) may be changed in ntfs_init_acl. */
-	inode->i_atime = inode->i_mtime = inode->i_ctime = dir->i_mtime =
-		dir->i_ctime = ni->i_crtime;
+	inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode, ni->i_crtime);
+	dir->i_mtime = inode_set_ctime_to_ts(dir, ni->i_crtime);
 
 	mark_inode_dirty(dir);
 	mark_inode_dirty(inode);
@@ -1765,9 +1767,9 @@ int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry)
 
 	if (!err) {
 		drop_nlink(inode);
-		dir->i_mtime = dir->i_ctime = current_time(dir);
+		dir->i_mtime = inode_set_ctime_current(dir);
 		mark_inode_dirty(dir);
-		inode->i_ctime = dir->i_ctime;
+		inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
 		if (inode->i_nlink)
 			mark_inode_dirty(inode);
 	} else if (!ni_remove_name_undo(dir_ni, ni, de, de2, undo_remove)) {
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index bfd986699f9e..ad430d50bd79 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -156,8 +156,8 @@ static int ntfs_link(struct dentry *ode, struct inode *dir, struct dentry *de)
 	err = ntfs_link_inode(inode, de);
 
 	if (!err) {
-		dir->i_ctime = dir->i_mtime = inode->i_ctime =
-			current_time(dir);
+		dir->i_mtime = inode_set_ctime_to_ts(inode,
+						     inode_set_ctime_current(dir));
 		mark_inode_dirty(inode);
 		mark_inode_dirty(dir);
 		d_instantiate(de, inode);
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 023f314e8950..29fd391899e5 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -637,7 +637,7 @@ static noinline int ntfs_set_acl_ex(struct mnt_idmap *idmap,
 	if (!err) {
 		set_cached_acl(inode, type, acl);
 		inode->i_mode = mode;
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		mark_inode_dirty(inode);
 	}
 
@@ -924,7 +924,7 @@ set_new_fa:
 			  NULL);
 
 out:
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 
 	return err;

From 6861de979fa0aebe998c1606d70d01180dd29b3e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:27 -0400
Subject: [PATCH 0443/2047] ocfs2: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-60-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ocfs2/acl.c          |  6 +++---
 fs/ocfs2/alloc.c        |  6 +++---
 fs/ocfs2/aops.c         |  2 +-
 fs/ocfs2/dir.c          |  8 ++++----
 fs/ocfs2/dlmfs/dlmfs.c  |  4 ++--
 fs/ocfs2/dlmglue.c      |  7 +++++--
 fs/ocfs2/file.c         | 16 +++++++++-------
 fs/ocfs2/inode.c        | 12 ++++++------
 fs/ocfs2/move_extents.c |  6 +++---
 fs/ocfs2/namei.c        | 21 +++++++++++----------
 fs/ocfs2/refcounttree.c | 14 +++++++-------
 fs/ocfs2/xattr.c        |  6 +++---
 12 files changed, 57 insertions(+), 51 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 9fd03eaf15f8..e75137a8e7cb 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -191,10 +191,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
 	}
 
 	inode->i_mode = new_mode;
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	di->i_mode = cpu_to_le16(inode->i_mode);
-	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+	di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 	ocfs2_update_inode_fsync_trans(handle, inode, 0);
 
 	ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 51c93929a146..aef58f1395c8 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7436,10 +7436,10 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
 	}
 
 	inode->i_blocks = ocfs2_inode_sector_count(inode);
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 
-	di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
-	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 
 	ocfs2_update_inode_fsync_trans(handle, inode, 1);
 	ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8dfc284e85f0..0fdba30740ab 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2048,7 +2048,7 @@ out_write_size:
 		}
 		inode->i_blocks = ocfs2_inode_sector_count(inode);
 		di->i_size = cpu_to_le64((u64)i_size_read(inode));
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 		di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
 		di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
 		if (handle)
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 694471fc46b8..8b123d543e6e 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1658,7 +1658,7 @@ int __ocfs2_add_entry(handle_t *handle,
 				offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
 
 		if (ocfs2_dirent_would_fit(de, rec_len)) {
-			dir->i_mtime = dir->i_ctime = current_time(dir);
+			dir->i_mtime = inode_set_ctime_current(dir);
 			retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
 			if (retval < 0) {
 				mlog_errno(retval);
@@ -2962,11 +2962,11 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 	ocfs2_dinode_new_extent_list(dir, di);
 
 	i_size_write(dir, sb->s_blocksize);
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 
 	di->i_size = cpu_to_le64(sb->s_blocksize);
-	di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
-	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
+	di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(dir).tv_sec);
+	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(dir).tv_nsec);
 	ocfs2_update_inode_fsync_trans(handle, dir, 1);
 
 	/*
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index ba26c5567cff..81265123ce6c 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -337,7 +337,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
 	if (inode) {
 		inode->i_ino = get_next_ino();
 		inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
-		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 		inc_nlink(inode);
 
 		inode->i_fop = &simple_dir_operations;
@@ -360,7 +360,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
 
 	inode->i_ino = get_next_ino();
 	inode_init_owner(&nop_mnt_idmap, inode, parent, mode);
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 
 	ip = DLMFS_I(inode);
 	ip->ip_conn = DLMFS_I(parent)->ip_conn;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index c28bc983a7b1..c3e2961ee5db 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2162,6 +2162,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
 	struct ocfs2_meta_lvb *lvb;
+	struct timespec64 ctime = inode_get_ctime(inode);
 
 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 
@@ -2185,7 +2186,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 	lvb->lvb_iatime_packed  =
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
 	lvb->lvb_ictime_packed =
-		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
+		cpu_to_be64(ocfs2_pack_timespec(&ctime));
 	lvb->lvb_imtime_packed =
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
@@ -2208,6 +2209,7 @@ static int ocfs2_refresh_inode_from_lvb(struct inode *inode)
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
 	struct ocfs2_meta_lvb *lvb;
+	struct timespec64 ctime;
 
 	mlog_meta_lvb(0, lockres);
 
@@ -2238,8 +2240,9 @@ static int ocfs2_refresh_inode_from_lvb(struct inode *inode)
 			      be64_to_cpu(lvb->lvb_iatime_packed));
 	ocfs2_unpack_timespec(&inode->i_mtime,
 			      be64_to_cpu(lvb->lvb_imtime_packed));
-	ocfs2_unpack_timespec(&inode->i_ctime,
+	ocfs2_unpack_timespec(&ctime,
 			      be64_to_cpu(lvb->lvb_ictime_packed));
+	inode_set_ctime_to_ts(inode, ctime);
 	spin_unlock(&oi->ip_lock);
 	return 0;
 }
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 91a194596552..1b337ebce4df 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -232,8 +232,10 @@ int ocfs2_should_update_atime(struct inode *inode,
 		return 0;
 
 	if (vfsmnt->mnt_flags & MNT_RELATIME) {
+		struct timespec64 ctime = inode_get_ctime(inode);
+
 		if ((timespec64_compare(&inode->i_atime, &inode->i_mtime) <= 0) ||
-		    (timespec64_compare(&inode->i_atime, &inode->i_ctime) <= 0))
+		    (timespec64_compare(&inode->i_atime, &ctime) <= 0))
 			return 1;
 
 		return 0;
@@ -294,7 +296,7 @@ int ocfs2_set_inode_size(handle_t *handle,
 
 	i_size_write(inode, new_i_size);
 	inode->i_blocks = ocfs2_inode_sector_count(inode);
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 
 	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
 	if (status < 0) {
@@ -415,12 +417,12 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 	}
 
 	i_size_write(inode, new_i_size);
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 
 	di = (struct ocfs2_dinode *) fe_bh->b_data;
 	di->i_size = cpu_to_le64(new_i_size);
-	di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
-	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 	ocfs2_update_inode_fsync_trans(handle, inode, 0);
 
 	ocfs2_journal_dirty(handle, fe_bh);
@@ -824,7 +826,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
 	i_size_write(inode, abs_to);
 	inode->i_blocks = ocfs2_inode_sector_count(inode);
 	di->i_size = cpu_to_le64((u64)i_size_read(inode));
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
 	di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
 	di->i_mtime_nsec = di->i_ctime_nsec;
@@ -2043,7 +2045,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 		goto out_inode_unlock;
 	}
 
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
 	if (ret < 0)
 		mlog_errno(ret);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index bb116c39b581..e8771600b930 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -306,8 +306,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
 	inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
 	inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec);
-	inode->i_ctime.tv_sec = le64_to_cpu(fe->i_ctime);
-	inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec);
+	inode_set_ctime(inode, le64_to_cpu(fe->i_ctime),
+		        le32_to_cpu(fe->i_ctime_nsec));
 
 	if (OCFS2_I(inode)->ip_blkno != le64_to_cpu(fe->i_blkno))
 		mlog(ML_ERROR,
@@ -1314,8 +1314,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
 	fe->i_mode = cpu_to_le16(inode->i_mode);
 	fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
 	fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
-	fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	fe->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+	fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 	fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
 	fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
 
@@ -1352,8 +1352,8 @@ void ocfs2_refresh_inode(struct inode *inode,
 	inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
 	inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
 	inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec);
-	inode->i_ctime.tv_sec = le64_to_cpu(fe->i_ctime);
-	inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec);
+	inode_set_ctime(inode, le64_to_cpu(fe->i_ctime),
+			le32_to_cpu(fe->i_ctime_nsec));
 
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 }
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index b1e32ec4a9d4..05d67968a3a9 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -950,9 +950,9 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
 	}
 
 	di = (struct ocfs2_dinode *)di_bh->b_data;
-	inode->i_ctime = current_time(inode);
-	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	inode_set_ctime_current(inode);
+	di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+	di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 	ocfs2_update_inode_fsync_trans(handle, inode, 0);
 
 	ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 17c52225b87d..e4a684d45308 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -793,10 +793,10 @@ static int ocfs2_link(struct dentry *old_dentry,
 	}
 
 	inc_nlink(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	ocfs2_set_links_count(fe, inode->i_nlink);
-	fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	fe->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+	fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 	ocfs2_journal_dirty(handle, fe_bh);
 
 	err = ocfs2_add_entry(handle, dentry, inode,
@@ -995,7 +995,7 @@ static int ocfs2_unlink(struct inode *dir,
 	ocfs2_set_links_count(fe, inode->i_nlink);
 	ocfs2_journal_dirty(handle, fe_bh);
 
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	if (S_ISDIR(inode->i_mode))
 		drop_nlink(dir);
 
@@ -1537,7 +1537,7 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
 					 new_dir_bh, &target_insert);
 	}
 
-	old_inode->i_ctime = current_time(old_inode);
+	inode_set_ctime_current(old_inode);
 	mark_inode_dirty(old_inode);
 
 	status = ocfs2_journal_access_di(handle, INODE_CACHE(old_inode),
@@ -1546,8 +1546,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
 	if (status >= 0) {
 		old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
 
-		old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
-		old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
+		old_di->i_ctime = cpu_to_le64(inode_get_ctime(old_inode).tv_sec);
+		old_di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(old_inode).tv_nsec);
 		ocfs2_journal_dirty(handle, old_inode_bh);
 	} else
 		mlog_errno(status);
@@ -1586,9 +1586,9 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
 
 	if (new_inode) {
 		drop_nlink(new_inode);
-		new_inode->i_ctime = current_time(new_inode);
+		inode_set_ctime_current(new_inode);
 	}
-	old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
+	old_dir->i_mtime = inode_set_ctime_current(old_dir);
 
 	if (update_dot_dot) {
 		status = ocfs2_update_entry(old_inode, handle,
@@ -1610,7 +1610,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
 
 	if (old_dir != new_dir) {
 		/* Keep the same times on both directories.*/
-		new_dir->i_ctime = new_dir->i_mtime = old_dir->i_ctime;
+		new_dir->i_mtime = inode_set_ctime_to_ts(new_dir,
+							 inode_get_ctime(old_dir));
 
 		/*
 		 * This will also pick up the i_nlink change from the
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 564ab48d03ef..25c8ec3c8c3a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3750,9 +3750,9 @@ static int ocfs2_change_ctime(struct inode *inode,
 		goto out_commit;
 	}
 
-	inode->i_ctime = current_time(inode);
-	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	inode_set_ctime_current(inode);
+	di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+	di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 
 	ocfs2_journal_dirty(handle, di_bh);
 
@@ -4073,10 +4073,10 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
 		 * we want mtime to appear identical to the source and
 		 * update ctime.
 		 */
-		t_inode->i_ctime = current_time(t_inode);
+		inode_set_ctime_current(t_inode);
 
-		di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
-		di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
+		di->i_ctime = cpu_to_le64(inode_get_ctime(t_inode).tv_sec);
+		di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(t_inode).tv_nsec);
 
 		t_inode->i_mtime = s_inode->i_mtime;
 		di->i_mtime = s_di->i_mtime;
@@ -4456,7 +4456,7 @@ int ocfs2_reflink_update_dest(struct inode *dest,
 	if (newlen > i_size_read(dest))
 		i_size_write(dest, newlen);
 	spin_unlock(&OCFS2_I(dest)->ip_lock);
-	dest->i_ctime = dest->i_mtime = current_time(dest);
+	dest->i_mtime = inode_set_ctime_current(dest);
 
 	ret = ocfs2_mark_inode_dirty(handle, dest, d_bh);
 	if (ret) {
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 4ac77ff6e676..6510ad783c91 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3421,9 +3421,9 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 			goto out;
 		}
 
-		inode->i_ctime = current_time(inode);
-		di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-		di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+		inode_set_ctime_current(inode);
+		di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+		di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 		ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
 	}
 out:

From 906effbf679fb6eb314a82a52d067815a59cf214 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:28 -0400
Subject: [PATCH 0444/2047] omfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-61-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/omfs/dir.c   | 4 ++--
 fs/omfs/inode.c | 9 ++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 82cf7e9a665f..6bda275826d6 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -143,7 +143,7 @@ static int omfs_add_link(struct dentry *dentry, struct inode *inode)
 	mark_buffer_dirty(bh);
 	brelse(bh);
 
-	dir->i_ctime = current_time(dir);
+	inode_set_ctime_current(dir);
 
 	/* mark affected inodes dirty to rebuild checksums */
 	mark_inode_dirty(dir);
@@ -399,7 +399,7 @@ static int omfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 	if (err)
 		goto out;
 
-	old_inode->i_ctime = current_time(old_inode);
+	inode_set_ctime_current(old_inode);
 	mark_inode_dirty(old_inode);
 out:
 	return err;
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index c4c79e07efc7..2f8c1882f45c 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -51,7 +51,7 @@ struct inode *omfs_new_inode(struct inode *dir, umode_t mode)
 	inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
 	inode->i_mapping->a_ops = &omfs_aops;
 
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	switch (mode & S_IFMT) {
 	case S_IFDIR:
 		inode->i_op = &omfs_dir_inops;
@@ -134,8 +134,8 @@ static int __omfs_write_inode(struct inode *inode, int wait)
 	oi->i_head.h_magic = OMFS_IMAGIC;
 	oi->i_size = cpu_to_be64(inode->i_size);
 
-	ctime = inode->i_ctime.tv_sec * 1000LL +
-		((inode->i_ctime.tv_nsec + 999)/1000);
+	ctime = inode_get_ctime(inode).tv_sec * 1000LL +
+		((inode_get_ctime(inode).tv_nsec + 999)/1000);
 	oi->i_ctime = cpu_to_be64(ctime);
 
 	omfs_update_checksums(oi);
@@ -232,10 +232,9 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
 
 	inode->i_atime.tv_sec = ctime;
 	inode->i_mtime.tv_sec = ctime;
-	inode->i_ctime.tv_sec = ctime;
+	inode_set_ctime(inode, ctime, nsecs);
 	inode->i_atime.tv_nsec = nsecs;
 	inode->i_mtime.tv_nsec = nsecs;
-	inode->i_ctime.tv_nsec = nsecs;
 
 	inode->i_mapping->a_ops = &omfs_aops;
 

From 1520a1520c61eb41ca27d4a852080c073f71bd49 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:29 -0400
Subject: [PATCH 0445/2047] openpromfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-62-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/openpromfs/inode.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index f0b7f4d51a17..b2457cb97fa0 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -237,7 +237,7 @@ found:
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
 	if (inode->i_state & I_NEW) {
-		inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 		ent_oi = OP_I(inode);
 		ent_oi->type = ent_type;
 		ent_oi->u = ent_data;
@@ -387,8 +387,7 @@ static int openprom_fill_super(struct super_block *s, struct fs_context *fc)
 		goto out_no_root;
 	}
 
-	root_inode->i_mtime = root_inode->i_atime =
-		root_inode->i_ctime = current_time(root_inode);
+	root_inode->i_mtime = root_inode->i_atime = inode_set_ctime_current(root_inode);
 	root_inode->i_op = &openprom_inode_operations;
 	root_inode->i_fop = &openprom_operations;
 	root_inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;

From 0971a799b156b1b4030d6563a9f4b784ae2f5c6d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:30 -0400
Subject: [PATCH 0446/2047] orangefs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-63-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/orangefs/namei.c          | 2 +-
 fs/orangefs/orangefs-utils.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index 77518e248cf7..c9dfd5c6a097 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -421,7 +421,7 @@ static int orangefs_rename(struct mnt_idmap *idmap,
 		     ret);
 
 	if (new_dentry->d_inode)
-		new_dentry->d_inode->i_ctime = current_time(new_dentry->d_inode);
+		inode_set_ctime_current(d_inode(new_dentry));
 
 	op_release(new_op);
 	return ret;
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
index 46b7dcff18ac..0a9fcfdf552f 100644
--- a/fs/orangefs/orangefs-utils.c
+++ b/fs/orangefs/orangefs-utils.c
@@ -361,11 +361,11 @@ again2:
 	    downcall.resp.getattr.attributes.atime;
 	inode->i_mtime.tv_sec = (time64_t)new_op->
 	    downcall.resp.getattr.attributes.mtime;
-	inode->i_ctime.tv_sec = (time64_t)new_op->
-	    downcall.resp.getattr.attributes.ctime;
+	inode_set_ctime(inode,
+			(time64_t)new_op->downcall.resp.getattr.attributes.ctime,
+			0);
 	inode->i_atime.tv_nsec = 0;
 	inode->i_mtime.tv_nsec = 0;
-	inode->i_ctime.tv_nsec = 0;
 
 	/* special case: mark the root inode as sticky */
 	inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |

From 9aa7111523577e6cdbf9f8dbe68f51d95e616f48 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:31 -0400
Subject: [PATCH 0447/2047] overlayfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-64-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/overlayfs/file.c | 7 +++++--
 fs/overlayfs/util.c | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 21245b00722a..7e7876aae01c 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -239,6 +239,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
 static void ovl_file_accessed(struct file *file)
 {
 	struct inode *inode, *upperinode;
+	struct timespec64 ctime, uctime;
 
 	if (file->f_flags & O_NOATIME)
 		return;
@@ -249,10 +250,12 @@ static void ovl_file_accessed(struct file *file)
 	if (!upperinode)
 		return;
 
+	ctime = inode_get_ctime(inode);
+	uctime = inode_get_ctime(upperinode);
 	if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
-	     !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
+	     !timespec64_equal(&ctime, &uctime))) {
 		inode->i_mtime = upperinode->i_mtime;
-		inode->i_ctime = upperinode->i_ctime;
+		inode_set_ctime_to_ts(inode, uctime);
 	}
 
 	touch_atime(&file->f_path);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 7ef9e13c404a..c210b5d496a8 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -1202,6 +1202,6 @@ void ovl_copyattr(struct inode *inode)
 	inode->i_mode = realinode->i_mode;
 	inode->i_atime = realinode->i_atime;
 	inode->i_mtime = realinode->i_mtime;
-	inode->i_ctime = realinode->i_ctime;
+	inode_set_ctime_to_ts(inode, inode_get_ctime(realinode));
 	i_size_write(inode, i_size_read(realinode));
 }

From e9d7d3cb9fb3f142df574664507ac0ee4a26365a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:32 -0400
Subject: [PATCH 0448/2047] procfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-65-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/proc/base.c        | 2 +-
 fs/proc/inode.c       | 2 +-
 fs/proc/proc_sysctl.c | 2 +-
 fs/proc/self.c        | 2 +-
 fs/proc/thread_self.c | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 05452c3b9872..3c644a822bca 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1902,7 +1902,7 @@ struct inode *proc_pid_make_inode(struct super_block *sb,
 	ei = PROC_I(inode);
 	inode->i_mode = mode;
 	inode->i_ino = get_next_ino();
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_op = &proc_def_inode_operations;
 
 	/*
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 67b09a1d9433..532dc9d240f7 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -660,7 +660,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 
 	inode->i_private = de->data;
 	inode->i_ino = de->low_ino;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	PROC_I(inode)->pde = de;
 	if (is_empty_pde(de)) {
 		make_empty_dir_inode(inode);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5ea42653126e..6bc10e7e0ff7 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -463,7 +463,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 	head->count++;
 	spin_unlock(&sysctl_lock);
 
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_mode = table->mode;
 	if (!S_ISDIR(table->mode)) {
 		inode->i_mode |= S_IFREG;
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 72cd69bcaf4a..ecc4da8d265e 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -46,7 +46,7 @@ int proc_setup_self(struct super_block *s)
 		struct inode *inode = new_inode(s);
 		if (inode) {
 			inode->i_ino = self_inum;
-			inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+			inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 			inode->i_mode = S_IFLNK | S_IRWXUGO;
 			inode->i_uid = GLOBAL_ROOT_UID;
 			inode->i_gid = GLOBAL_ROOT_GID;
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index a553273fbd41..63ac1f93289f 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -46,7 +46,7 @@ int proc_setup_thread_self(struct super_block *s)
 		struct inode *inode = new_inode(s);
 		if (inode) {
 			inode->i_ino = thread_self_inum;
-			inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+			inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 			inode->i_mode = S_IFLNK | S_IRWXUGO;
 			inode->i_uid = GLOBAL_ROOT_UID;
 			inode->i_gid = GLOBAL_ROOT_GID;

From a411ea5af7ec9603f47c6810de1ac032e538003e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:33 -0400
Subject: [PATCH 0449/2047] pstore: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-66-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/pstore/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ffbadb8b3032..a756af980d0c 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -223,7 +223,7 @@ static struct inode *pstore_get_inode(struct super_block *sb)
 	struct inode *inode = new_inode(sb);
 	if (inode) {
 		inode->i_ino = get_next_ino();
-		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	}
 	return inode;
 }
@@ -390,7 +390,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
 	inode->i_private = private;
 
 	if (record->time.tv_sec)
-		inode->i_mtime = inode->i_ctime = record->time;
+		inode->i_mtime = inode_set_ctime_to_ts(inode, record->time);
 
 	d_add(dentry, inode);
 

From bbe21298548d718fc76042409117ebb72a5965cc Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:34 -0400
Subject: [PATCH 0450/2047] qnx4: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Anders Larsen <al@alarsen.net>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-67-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/qnx4/inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 391ea402920d..a7171f5532a1 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -305,8 +305,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
 	inode->i_mtime.tv_nsec = 0;
 	inode->i_atime.tv_sec   = le32_to_cpu(raw_inode->di_atime);
 	inode->i_atime.tv_nsec = 0;
-	inode->i_ctime.tv_sec   = le32_to_cpu(raw_inode->di_ctime);
-	inode->i_ctime.tv_nsec = 0;
+	inode_set_ctime(inode, le32_to_cpu(raw_inode->di_ctime), 0);
 	inode->i_blocks  = le32_to_cpu(raw_inode->di_first_xtnt.xtnt_size);
 
 	memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE);

From ade79b7cf677fefd57747dfaa4a55cdd0a31623f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:35 -0400
Subject: [PATCH 0451/2047] qnx6: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-68-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/qnx6/inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 85b2fa3b211c..21f90d519f1a 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -562,8 +562,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
 	inode->i_mtime.tv_nsec = 0;
 	inode->i_atime.tv_sec   = fs32_to_cpu(sbi, raw_inode->di_atime);
 	inode->i_atime.tv_nsec = 0;
-	inode->i_ctime.tv_sec   = fs32_to_cpu(sbi, raw_inode->di_ctime);
-	inode->i_ctime.tv_nsec = 0;
+	inode_set_ctime(inode, fs32_to_cpu(sbi, raw_inode->di_ctime), 0);
 
 	/* calc blocks based on 512 byte blocksize */
 	inode->i_blocks = (inode->i_size + 511) >> 9;

From 9f590685bc5aa7a8f6c1a9ff47bf4755546b1c2b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:36 -0400
Subject: [PATCH 0452/2047] ramfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-69-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ramfs/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index fef477c78107..18e8387cab41 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -65,7 +65,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
 		inode->i_mapping->a_ops = &ram_aops;
 		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
 		mapping_set_unevictable(inode->i_mapping);
-		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 		switch (mode & S_IFMT) {
 		default:
 			init_special_inode(inode, mode, dev);
@@ -105,7 +105,7 @@ ramfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 		d_instantiate(dentry, inode);
 		dget(dentry);	/* Extra count - pin the dentry in core */
 		error = 0;
-		dir->i_mtime = dir->i_ctime = current_time(dir);
+		dir->i_mtime = inode_set_ctime_current(dir);
 	}
 	return error;
 }
@@ -138,7 +138,7 @@ static int ramfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 		if (!error) {
 			d_instantiate(dentry, inode);
 			dget(dentry);
-			dir->i_mtime = dir->i_ctime = current_time(dir);
+			dir->i_mtime = inode_set_ctime_current(dir);
 		} else
 			iput(inode);
 	}

From ae834901ecc517cd943e172cb30b65fb79407b98 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:37 -0400
Subject: [PATCH 0453/2047] reiserfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-70-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/reiserfs/inode.c     | 12 +++++-------
 fs/reiserfs/ioctl.c     |  4 ++--
 fs/reiserfs/namei.c     | 11 ++++++-----
 fs/reiserfs/stree.c     |  4 ++--
 fs/reiserfs/super.c     |  2 +-
 fs/reiserfs/xattr.c     |  5 +++--
 fs/reiserfs/xattr_acl.c |  2 +-
 7 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 77bd3b27059f..86e55d4bb10d 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1259,9 +1259,8 @@ static void init_inode(struct inode *inode, struct treepath *path)
 		inode->i_size = sd_v1_size(sd);
 		inode->i_atime.tv_sec = sd_v1_atime(sd);
 		inode->i_mtime.tv_sec = sd_v1_mtime(sd);
-		inode->i_ctime.tv_sec = sd_v1_ctime(sd);
+		inode_set_ctime(inode, sd_v1_ctime(sd), 0);
 		inode->i_atime.tv_nsec = 0;
-		inode->i_ctime.tv_nsec = 0;
 		inode->i_mtime.tv_nsec = 0;
 
 		inode->i_blocks = sd_v1_blocks(sd);
@@ -1314,8 +1313,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
 		i_gid_write(inode, sd_v2_gid(sd));
 		inode->i_mtime.tv_sec = sd_v2_mtime(sd);
 		inode->i_atime.tv_sec = sd_v2_atime(sd);
-		inode->i_ctime.tv_sec = sd_v2_ctime(sd);
-		inode->i_ctime.tv_nsec = 0;
+		inode_set_ctime(inode, sd_v2_ctime(sd), 0);
 		inode->i_mtime.tv_nsec = 0;
 		inode->i_atime.tv_nsec = 0;
 		inode->i_blocks = sd_v2_blocks(sd);
@@ -1374,7 +1372,7 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size)
 	set_sd_v2_gid(sd_v2, i_gid_read(inode));
 	set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
 	set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
-	set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
+	set_sd_v2_ctime(sd_v2, inode_get_ctime(inode).tv_sec);
 	set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
 		set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
@@ -1394,7 +1392,7 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
 	set_sd_v1_nlink(sd_v1, inode->i_nlink);
 	set_sd_v1_size(sd_v1, size);
 	set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
-	set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
+	set_sd_v1_ctime(sd_v1, inode_get_ctime(inode).tv_sec);
 	set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);
 
 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
@@ -1986,7 +1984,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 
 	/* uid and gid must already be set by the caller for quota init */
 
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_size = i_size;
 	inode->i_blocks = 0;
 	inode->i_bytes = 0;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 6bf9b54e58ca..dd33f8cc6eda 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -55,7 +55,7 @@ int reiserfs_fileattr_set(struct mnt_idmap *idmap,
 	}
 	sd_attrs_to_i_attrs(flags, inode);
 	REISERFS_I(inode)->i_attrs = flags;
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 	err = 0;
 unlock:
@@ -107,7 +107,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			err = -EFAULT;
 			goto setversion_out;
 		}
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		mark_inode_dirty(inode);
 setversion_out:
 		mnt_drop_write_file(filp);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 405ac59eb2dd..9c5704be2435 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -572,7 +572,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
 	}
 
 	dir->i_size += paste_size;
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	if (!S_ISDIR(inode->i_mode) && visible)
 		/* reiserfs_mkdir or reiserfs_rename will do that by itself */
 		reiserfs_update_sd(th, dir);
@@ -966,7 +966,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
 			       inode->i_nlink);
 
 	clear_nlink(inode);
-	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_to_ts(dir,
+					     inode_set_ctime_current(inode));
 	reiserfs_update_sd(&th, inode);
 
 	DEC_DIR_INODE_NLINK(dir)
@@ -1070,11 +1071,11 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
 		inc_nlink(inode);
 		goto end_unlink;
 	}
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	reiserfs_update_sd(&th, inode);
 
 	dir->i_size -= (de.de_entrylen + DEH_SIZE);
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	reiserfs_update_sd(&th, dir);
 
 	if (!savelink)
@@ -1250,7 +1251,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
 		return err ? err : retval;
 	}
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	reiserfs_update_sd(&th, inode);
 
 	ihold(inode);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index ce5003986789..3676e02a0232 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -2004,7 +2004,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
 
 			if (update_timestamps) {
 				inode->i_mtime = current_time(inode);
-				inode->i_ctime = current_time(inode);
+				inode_set_ctime_current(inode);
 			}
 			reiserfs_update_sd(th, inode);
 
@@ -2029,7 +2029,7 @@ update_and_out:
 	if (update_timestamps) {
 		/* this is truncate, not file closing */
 		inode->i_mtime = current_time(inode);
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 	}
 	reiserfs_update_sd(th, inode);
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 929acce6e731..7eaf36b3de12 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2587,7 +2587,7 @@ out:
 		return err;
 	if (inode->i_size < off + len - towrite)
 		i_size_write(inode, off + len - towrite);
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 	return len - towrite;
 }
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 651027967159..6000964c2b80 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -466,12 +466,13 @@ int reiserfs_commit_write(struct file *f, struct page *page,
 static void update_ctime(struct inode *inode)
 {
 	struct timespec64 now = current_time(inode);
+	struct timespec64 ctime = inode_get_ctime(inode);
 
 	if (inode_unhashed(inode) || !inode->i_nlink ||
-	    timespec64_equal(&inode->i_ctime, &now))
+	    timespec64_equal(&ctime, &now))
 		return;
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_to_ts(inode, now);
 	mark_inode_dirty(inode);
 }
 
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 138060452678..064264992b49 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -285,7 +285,7 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	if (error == -ENODATA) {
 		error = 0;
 		if (type == ACL_TYPE_ACCESS) {
-			inode->i_ctime = current_time(inode);
+			inode_set_ctime_current(inode);
 			mark_inode_dirty(inode);
 		}
 	}

From a58451279c25fc7330156d5c2f01aa950ceef90e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:38 -0400
Subject: [PATCH 0454/2047] romfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230705190309.579783-71-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/romfs/super.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index c59b230d55b4..d89739655f9e 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -322,8 +322,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
 
 	set_nlink(i, 1);		/* Hard to decide.. */
 	i->i_size = be32_to_cpu(ri.size);
-	i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0;
-	i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
+	i->i_mtime = i->i_atime = inode_set_ctime(i, 0, 0);
 
 	/* set up mode and ops */
 	mode = romfs_modemap[nextfh & ROMFH_TYPE];

From 9448765397b6e6522dff485f4b480b7ee020a536 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:39 -0400
Subject: [PATCH 0455/2047] smb: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Tom Talpey <tom@talpey.com>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Steve French <stfrench@microsoft.com>
Message-Id: <20230705190309.579783-72-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/smb/client/file.c    |  4 ++--
 fs/smb/client/fscache.h |  5 +++--
 fs/smb/client/inode.c   | 14 +++++++-------
 fs/smb/client/smb2ops.c |  3 ++-
 fs/smb/server/smb2pdu.c |  8 ++++----
 5 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 0a5fe8d5314b..689058e1b6e6 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -1085,7 +1085,7 @@ int cifs_close(struct inode *inode, struct file *file)
 		    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
 		    dclose) {
 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
-				inode->i_ctime = inode->i_mtime = current_time(inode);
+				inode->i_mtime = inode_set_ctime_current(inode);
 			}
 			spin_lock(&cinode->deferred_lock);
 			cifs_add_deferred_close(cfile, dclose);
@@ -2596,7 +2596,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 					   write_data, to - from, &offset);
 		cifsFileInfo_put(open_file);
 		/* Does mm or vfs already set times? */
-		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 		if ((bytes_written > 0) && (offset))
 			rc = 0;
 		else if (bytes_written < 0)
diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h
index 173999610997..84f3b09367d2 100644
--- a/fs/smb/client/fscache.h
+++ b/fs/smb/client/fscache.h
@@ -50,12 +50,13 @@ void cifs_fscache_fill_coherency(struct inode *inode,
 				 struct cifs_fscache_inode_coherency_data *cd)
 {
 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
+	struct timespec64 ctime = inode_get_ctime(inode);
 
 	memset(cd, 0, sizeof(*cd));
 	cd->last_write_time_sec   = cpu_to_le64(cifsi->netfs.inode.i_mtime.tv_sec);
 	cd->last_write_time_nsec  = cpu_to_le32(cifsi->netfs.inode.i_mtime.tv_nsec);
-	cd->last_change_time_sec  = cpu_to_le64(cifsi->netfs.inode.i_ctime.tv_sec);
-	cd->last_change_time_nsec = cpu_to_le32(cifsi->netfs.inode.i_ctime.tv_nsec);
+	cd->last_change_time_sec  = cpu_to_le64(ctime.tv_sec);
+	cd->last_change_time_nsec = cpu_to_le32(ctime.tv_nsec);
 }
 
 
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index c3eeae07e139..218f03dd3f52 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -172,7 +172,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
 	else
 		inode->i_atime = fattr->cf_atime;
 	inode->i_mtime = fattr->cf_mtime;
-	inode->i_ctime = fattr->cf_ctime;
+	inode_set_ctime_to_ts(inode, fattr->cf_ctime);
 	inode->i_rdev = fattr->cf_rdev;
 	cifs_nlink_fattr_to_inode(inode, fattr);
 	inode->i_uid = fattr->cf_uid;
@@ -1744,9 +1744,9 @@ out_reval:
 		cifs_inode = CIFS_I(inode);
 		cifs_inode->time = 0;	/* will force revalidate to get info
 					   when needed */
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 	}
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	cifs_inode = CIFS_I(dir);
 	CIFS_I(dir)->time = 0;	/* force revalidate of dir as well */
 unlink_out:
@@ -2060,8 +2060,8 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
 	 */
 	cifsInode->time = 0;
 
-	d_inode(direntry)->i_ctime = inode->i_ctime = inode->i_mtime =
-		current_time(inode);
+	inode_set_ctime_current(d_inode(direntry));
+	inode->i_mtime = inode_set_ctime_current(inode);
 
 rmdir_exit:
 	free_dentry_path(page);
@@ -2267,8 +2267,8 @@ unlink_target:
 	/* force revalidate to go get info when needed */
 	CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0;
 
-	source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime =
-		target_dir->i_mtime = current_time(source_dir);
+	source_dir->i_mtime = target_dir->i_mtime = inode_set_ctime_to_ts(source_dir,
+									  inode_set_ctime_current(target_dir));
 
 cifs_rename_exit:
 	kfree(info_buf_source);
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 87abce010974..3cc3c4a71e32 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -1396,7 +1396,8 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
 	if (file_inf.LastWriteTime)
 		inode->i_mtime = cifs_NTtimeToUnix(file_inf.LastWriteTime);
 	if (file_inf.ChangeTime)
-		inode->i_ctime = cifs_NTtimeToUnix(file_inf.ChangeTime);
+		inode_set_ctime_to_ts(inode,
+				      cifs_NTtimeToUnix(file_inf.ChangeTime));
 	if (file_inf.LastAccessTime)
 		inode->i_atime = cifs_NTtimeToUnix(file_inf.LastAccessTime);
 
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index cf8822103f50..f9099831c8ff 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -4779,7 +4779,7 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
 	file_info->LastAccessTime = cpu_to_le64(time);
 	time = ksmbd_UnixTimeToNT(inode->i_mtime);
 	file_info->LastWriteTime = cpu_to_le64(time);
-	time = ksmbd_UnixTimeToNT(inode->i_ctime);
+	time = ksmbd_UnixTimeToNT(inode_get_ctime(inode));
 	file_info->ChangeTime = cpu_to_le64(time);
 	file_info->DosAttributes = fp->f_ci->m_fattr;
 	file_info->Inode = cpu_to_le64(inode->i_ino);
@@ -5422,7 +5422,7 @@ int smb2_close(struct ksmbd_work *work)
 		rsp->LastAccessTime = cpu_to_le64(time);
 		time = ksmbd_UnixTimeToNT(inode->i_mtime);
 		rsp->LastWriteTime = cpu_to_le64(time);
-		time = ksmbd_UnixTimeToNT(inode->i_ctime);
+		time = ksmbd_UnixTimeToNT(inode_get_ctime(inode));
 		rsp->ChangeTime = cpu_to_le64(time);
 		ksmbd_fd_put(work, fp);
 	} else {
@@ -5644,7 +5644,7 @@ static int set_file_basic_info(struct ksmbd_file *fp,
 	if (file_info->ChangeTime)
 		attrs.ia_ctime = ksmbd_NTtimeToUnix(file_info->ChangeTime);
 	else
-		attrs.ia_ctime = inode->i_ctime;
+		attrs.ia_ctime = inode_get_ctime(inode);
 
 	if (file_info->LastWriteTime) {
 		attrs.ia_mtime = ksmbd_NTtimeToUnix(file_info->LastWriteTime);
@@ -5689,7 +5689,7 @@ static int set_file_basic_info(struct ksmbd_file *fp,
 			return -EACCES;
 
 		inode_lock(inode);
-		inode->i_ctime = attrs.ia_ctime;
+		inode_set_ctime_to_ts(inode, attrs.ia_ctime);
 		attrs.ia_valid &= ~ATTR_CTIME;
 		rc = notify_change(idmap, dentry, &attrs, NULL);
 		inode_unlock(inode);

From 280345d0d03bcdb9d30cb0df0c45409f813646d3 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:40 -0400
Subject: [PATCH 0456/2047] squashfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-73-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/squashfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 24463145b351..c6e626b00546 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -61,7 +61,7 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
 	inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
 	inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime);
 	inode->i_atime.tv_sec = inode->i_mtime.tv_sec;
-	inode->i_ctime.tv_sec = inode->i_mtime.tv_sec;
+	inode_set_ctime(inode, inode->i_mtime.tv_sec, 0);
 	inode->i_mode = le16_to_cpu(sqsh_ino->mode);
 	inode->i_size = 0;
 

From c801b09511976dcace6cd33360b5163fe6a23c2a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:41 -0400
Subject: [PATCH 0457/2047] sysv: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-74-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/sysv/dir.c    | 6 +++---
 fs/sysv/ialloc.c | 2 +-
 fs/sysv/inode.c  | 5 ++---
 fs/sysv/itree.c  | 4 ++--
 fs/sysv/namei.c  | 6 +++---
 5 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 0140010aa0c3..2f5ead88d00b 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -224,7 +224,7 @@ got_it:
 	memset (de->name + namelen, 0, SYSV_DIRSIZE - namelen - 2);
 	de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
 	dir_commit_chunk(page, pos, SYSV_DIRSIZE);
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
 	err = sysv_handle_dirsync(dir);
 out_page:
@@ -249,7 +249,7 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
 	}
 	de->inode = 0;
 	dir_commit_chunk(page, pos, SYSV_DIRSIZE);
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 	return sysv_handle_dirsync(inode);
 }
@@ -346,7 +346,7 @@ int sysv_set_link(struct sysv_dir_entry *de, struct page *page,
 	}
 	de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
 	dir_commit_chunk(page, pos, SYSV_DIRSIZE);
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
 	return sysv_handle_dirsync(inode);
 }
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index e732879036ab..6719da5889d9 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -165,7 +165,7 @@ struct inode * sysv_new_inode(const struct inode * dir, umode_t mode)
 	dirty_sb(sb);
 	inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
 	inode->i_ino = fs16_to_cpu(sbi, ino);
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_blocks = 0;
 	memset(SYSV_I(inode)->i_data, 0, sizeof(SYSV_I(inode)->i_data));
 	SYSV_I(inode)->i_dir_start_lookup = 0;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 9e8d4a6fb2f3..0aa3827d8178 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -202,8 +202,7 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino)
 	inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size);
 	inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime);
 	inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime);
-	inode->i_ctime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_ctime);
-	inode->i_ctime.tv_nsec = 0;
+	inode_set_ctime(inode, fs32_to_cpu(sbi, raw_inode->i_ctime), 0);
 	inode->i_atime.tv_nsec = 0;
 	inode->i_mtime.tv_nsec = 0;
 	inode->i_blocks = 0;
@@ -256,7 +255,7 @@ static int __sysv_write_inode(struct inode *inode, int wait)
 	raw_inode->i_size = cpu_to_fs32(sbi, inode->i_size);
 	raw_inode->i_atime = cpu_to_fs32(sbi, inode->i_atime.tv_sec);
 	raw_inode->i_mtime = cpu_to_fs32(sbi, inode->i_mtime.tv_sec);
-	raw_inode->i_ctime = cpu_to_fs32(sbi, inode->i_ctime.tv_sec);
+	raw_inode->i_ctime = cpu_to_fs32(sbi, inode_get_ctime(inode).tv_sec);
 
 	si = SYSV_I(inode);
 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 58d7f43a1371..dba6a2ef26f1 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -183,7 +183,7 @@ static inline int splice_branch(struct inode *inode,
 	*where->p = where->key;
 	write_unlock(&pointers_lock);
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 
 	/* had we spliced it onto indirect block? */
 	if (where->bh)
@@ -423,7 +423,7 @@ do_indirects:
 		}
 		n++;
 	}
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	if (IS_SYNC(inode))
 		sysv_sync_inode (inode);
 	else
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index fcf163fea3ad..d6b73798071b 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -103,7 +103,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
 {
 	struct inode *inode = d_inode(old_dentry);
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	inode_inc_link_count(inode);
 	ihold(inode);
 
@@ -161,7 +161,7 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry)
 
 	err = sysv_delete_entry(de, page);
 	if (!err) {
-		inode->i_ctime = dir->i_ctime;
+		inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
 		inode_dec_link_count(inode);
 	}
 	unmap_and_put_page(page, de);
@@ -230,7 +230,7 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 		unmap_and_put_page(new_page, new_de);
 		if (err)
 			goto out_dir;
-		new_inode->i_ctime = current_time(new_inode);
+		inode_set_ctime_current(new_inode);
 		if (dir_de)
 			drop_nlink(new_inode);
 		inode_dec_link_count(new_inode);

From bb9c40e652624291c0239c71d7c4663d80523686 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:42 -0400
Subject: [PATCH 0458/2047] tracefs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Acked-by: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-75-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/tracefs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 57ac8aa4a724..2feb6c58648c 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -132,7 +132,7 @@ static struct inode *tracefs_get_inode(struct super_block *sb)
 	struct inode *inode = new_inode(sb);
 	if (inode) {
 		inode->i_ino = get_next_ino();
-		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	}
 	return inode;
 }

From d07d3a7e9376133e1065cb29c764556a158f5bff Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:43 -0400
Subject: [PATCH 0459/2047] ubifs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
Message-Id: <20230705190309.579783-76-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ubifs/debug.c   |  4 ++--
 fs/ubifs/dir.c     | 24 +++++++++++-------------
 fs/ubifs/file.c    | 16 +++++++++-------
 fs/ubifs/ioctl.c   |  2 +-
 fs/ubifs/journal.c |  4 ++--
 fs/ubifs/super.c   |  4 ++--
 fs/ubifs/xattr.c   |  6 +++---
 7 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 9c9d3f0e36a4..eef9e527d9ff 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -243,8 +243,8 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
 	       (unsigned int)inode->i_mtime.tv_sec,
 	       (unsigned int)inode->i_mtime.tv_nsec);
 	pr_err("\tctime          %u.%u\n",
-	       (unsigned int)inode->i_ctime.tv_sec,
-	       (unsigned int)inode->i_ctime.tv_nsec);
+	       (unsigned int) inode_get_ctime(inode).tv_sec,
+	       (unsigned int) inode_get_ctime(inode).tv_nsec);
 	pr_err("\tcreat_sqnum    %llu\n", ui->creat_sqnum);
 	pr_err("\txattr_size     %u\n", ui->xattr_size);
 	pr_err("\txattr_cnt      %u\n", ui->xattr_cnt);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 7ec25310bd8a..3a1ba8ba308a 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -96,8 +96,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
 	inode->i_flags |= S_NOCMTIME;
 
 	inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
-	inode->i_mtime = inode->i_atime = inode->i_ctime =
-			 current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_mapping->nrpages = 0;
 
 	if (!is_xattr) {
@@ -325,7 +324,7 @@ static int ubifs_create(struct mnt_idmap *idmap, struct inode *dir,
 	mutex_lock(&dir_ui->ui_mutex);
 	dir->i_size += sz_change;
 	dir_ui->ui_size = dir->i_size;
-	dir->i_mtime = dir->i_ctime = inode->i_ctime;
+	dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
 	err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
 	if (err)
 		goto out_cancel;
@@ -765,10 +764,10 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
 
 	inc_nlink(inode);
 	ihold(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	dir->i_size += sz_change;
 	dir_ui->ui_size = dir->i_size;
-	dir->i_mtime = dir->i_ctime = inode->i_ctime;
+	dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
 	err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
 	if (err)
 		goto out_cancel;
@@ -838,11 +837,11 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
 	}
 
 	lock_2_inodes(dir, inode);
-	inode->i_ctime = current_time(dir);
+	inode_set_ctime_current(inode);
 	drop_nlink(inode);
 	dir->i_size -= sz_change;
 	dir_ui->ui_size = dir->i_size;
-	dir->i_mtime = dir->i_ctime = inode->i_ctime;
+	dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
 	err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0);
 	if (err)
 		goto out_cancel;
@@ -940,12 +939,12 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
 	}
 
 	lock_2_inodes(dir, inode);
-	inode->i_ctime = current_time(dir);
+	inode_set_ctime_current(inode);
 	clear_nlink(inode);
 	drop_nlink(dir);
 	dir->i_size -= sz_change;
 	dir_ui->ui_size = dir->i_size;
-	dir->i_mtime = dir->i_ctime = inode->i_ctime;
+	dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
 	err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0);
 	if (err)
 		goto out_cancel;
@@ -1019,7 +1018,7 @@ static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	inc_nlink(dir);
 	dir->i_size += sz_change;
 	dir_ui->ui_size = dir->i_size;
-	dir->i_mtime = dir->i_ctime = inode->i_ctime;
+	dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
 	err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
 	if (err) {
 		ubifs_err(c, "cannot create directory, error %d", err);
@@ -1110,7 +1109,7 @@ static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	mutex_lock(&dir_ui->ui_mutex);
 	dir->i_size += sz_change;
 	dir_ui->ui_size = dir->i_size;
-	dir->i_mtime = dir->i_ctime = inode->i_ctime;
+	dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
 	err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
 	if (err)
 		goto out_cancel;
@@ -1210,7 +1209,7 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	mutex_lock(&dir_ui->ui_mutex);
 	dir->i_size += sz_change;
 	dir_ui->ui_size = dir->i_size;
-	dir->i_mtime = dir->i_ctime = inode->i_ctime;
+	dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
 	err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
 	if (err)
 		goto out_cancel;
@@ -1298,7 +1297,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
 			.dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
 	struct ubifs_budget_req wht_req;
-	struct timespec64 time;
 	unsigned int saved_nlink;
 	struct fscrypt_name old_nm, new_nm;
 
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 6738fe43040b..436b27d7c58f 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1092,7 +1092,7 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr)
 	if (attr->ia_valid & ATTR_MTIME)
 		inode->i_mtime = attr->ia_mtime;
 	if (attr->ia_valid & ATTR_CTIME)
-		inode->i_ctime = attr->ia_ctime;
+		inode_set_ctime_to_ts(inode, attr->ia_ctime);
 	if (attr->ia_valid & ATTR_MODE) {
 		umode_t mode = attr->ia_mode;
 
@@ -1192,7 +1192,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
 	mutex_lock(&ui->ui_mutex);
 	ui->ui_size = inode->i_size;
 	/* Truncation changes inode [mc]time */
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	/* Other attributes may be changed at the same time as well */
 	do_attr_changes(inode, attr);
 	err = ubifs_jnl_truncate(c, inode, old_size, new_size);
@@ -1239,7 +1239,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
 	mutex_lock(&ui->ui_mutex);
 	if (attr->ia_valid & ATTR_SIZE) {
 		/* Truncation changes inode [mc]time */
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 		/* 'truncate_setsize()' changed @i_size, update @ui_size */
 		ui->ui_size = inode->i_size;
 	}
@@ -1364,8 +1364,10 @@ out:
 static inline int mctime_update_needed(const struct inode *inode,
 				       const struct timespec64 *now)
 {
+	struct timespec64 ctime = inode_get_ctime(inode);
+
 	if (!timespec64_equal(&inode->i_mtime, now) ||
-	    !timespec64_equal(&inode->i_ctime, now))
+	    !timespec64_equal(&ctime, now))
 		return 1;
 	return 0;
 }
@@ -1396,7 +1398,7 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time,
 	if (flags & S_ATIME)
 		inode->i_atime = *time;
 	if (flags & S_CTIME)
-		inode->i_ctime = *time;
+		inode_set_ctime_to_ts(inode, *time);
 	if (flags & S_MTIME)
 		inode->i_mtime = *time;
 
@@ -1432,7 +1434,7 @@ static int update_mctime(struct inode *inode)
 			return err;
 
 		mutex_lock(&ui->ui_mutex);
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 		release = ui->dirty;
 		mark_inode_dirty_sync(inode);
 		mutex_unlock(&ui->ui_mutex);
@@ -1570,7 +1572,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
 		struct ubifs_inode *ui = ubifs_inode(inode);
 
 		mutex_lock(&ui->ui_mutex);
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 		release = ui->dirty;
 		mark_inode_dirty_sync(inode);
 		mutex_unlock(&ui->ui_mutex);
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 67c5108abd89..d79cabe193c3 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -118,7 +118,7 @@ static int setflags(struct inode *inode, int flags)
 	ui->flags &= ~ioctl2ubifs(UBIFS_SETTABLE_IOCTL_FLAGS);
 	ui->flags |= ioctl2ubifs(flags);
 	ubifs_set_inode_flags(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	release = ui->dirty;
 	mark_inode_dirty_sync(inode);
 	mutex_unlock(&ui->ui_mutex);
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index dc52ac0f4a34..ffc9beee7be6 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -454,8 +454,8 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
 	ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum);
 	ino->atime_sec  = cpu_to_le64(inode->i_atime.tv_sec);
 	ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
-	ino->ctime_sec  = cpu_to_le64(inode->i_ctime.tv_sec);
-	ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	ino->ctime_sec  = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+	ino->ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 	ino->mtime_sec  = cpu_to_le64(inode->i_mtime.tv_sec);
 	ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
 	ino->uid   = cpu_to_le32(i_uid_read(inode));
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 32cb14759796..b08fb28d16b5 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -146,8 +146,8 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
 	inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec);
 	inode->i_mtime.tv_sec  = (int64_t)le64_to_cpu(ino->mtime_sec);
 	inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec);
-	inode->i_ctime.tv_sec  = (int64_t)le64_to_cpu(ino->ctime_sec);
-	inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec);
+	inode_set_ctime(inode, (int64_t)le64_to_cpu(ino->ctime_sec),
+			le32_to_cpu(ino->ctime_nsec));
 	inode->i_mode = le32_to_cpu(ino->mode);
 	inode->i_size = le64_to_cpu(ino->size);
 
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 349228dd1191..406c82eab513 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -134,7 +134,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
 	ui->data_len = size;
 
 	mutex_lock(&host_ui->ui_mutex);
-	host->i_ctime = current_time(host);
+	inode_set_ctime_current(host);
 	host_ui->xattr_cnt += 1;
 	host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm));
 	host_ui->xattr_size += CALC_XATTR_BYTES(size);
@@ -215,7 +215,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
 	ui->data_len = size;
 
 	mutex_lock(&host_ui->ui_mutex);
-	host->i_ctime = current_time(host);
+	inode_set_ctime_current(host);
 	host_ui->xattr_size -= CALC_XATTR_BYTES(old_size);
 	host_ui->xattr_size += CALC_XATTR_BYTES(size);
 
@@ -474,7 +474,7 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host,
 		return err;
 
 	mutex_lock(&host_ui->ui_mutex);
-	host->i_ctime = current_time(host);
+	inode_set_ctime_current(host);
 	host_ui->xattr_cnt -= 1;
 	host_ui->xattr_size -= CALC_DENT_SIZE(fname_len(nm));
 	host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);

From 59ad88f26e97331a8ab955349d0b1d89040b12fc Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:44 -0400
Subject: [PATCH 0460/2047] udf: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-77-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/udf/ialloc.c |  2 +-
 fs/udf/inode.c  | 17 ++++++++++-------
 fs/udf/namei.c  | 24 ++++++++++++------------
 3 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 5f7ac8c84798..6b558cbbeb6b 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -100,7 +100,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode)
 		iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
 	else
 		iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	iinfo->i_crtime = inode->i_mtime;
 	if (unlikely(insert_inode_locked(inode) < 0)) {
 		make_bad_inode(inode);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 28cdfc57d946..d089795074e8 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -910,7 +910,7 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map)
 	map->oflags = UDF_BLK_NEW | UDF_BLK_MAPPED;
 	iinfo->i_next_alloc_block = map->lblk + 1;
 	iinfo->i_next_alloc_goal = newblocknum + 1;
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 
 	if (IS_SYNC(inode))
 		udf_sync_inode(inode);
@@ -1298,7 +1298,7 @@ set_size:
 			goto out_unlock;
 	}
 update_time:
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	if (IS_SYNC(inode))
 		udf_sync_inode(inode);
 	else
@@ -1329,6 +1329,7 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode)
 	int bs = inode->i_sb->s_blocksize;
 	int ret = -EIO;
 	uint32_t uid, gid;
+	struct timespec64 ctime;
 
 reread:
 	if (iloc->partitionReferenceNum >= sbi->s_partitions) {
@@ -1507,7 +1508,8 @@ reread:
 
 		udf_disk_stamp_to_time(&inode->i_atime, fe->accessTime);
 		udf_disk_stamp_to_time(&inode->i_mtime, fe->modificationTime);
-		udf_disk_stamp_to_time(&inode->i_ctime, fe->attrTime);
+		udf_disk_stamp_to_time(&ctime, fe->attrTime);
+		inode_set_ctime_to_ts(inode, ctime);
 
 		iinfo->i_unique = le64_to_cpu(fe->uniqueID);
 		iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
@@ -1522,7 +1524,8 @@ reread:
 		udf_disk_stamp_to_time(&inode->i_atime, efe->accessTime);
 		udf_disk_stamp_to_time(&inode->i_mtime, efe->modificationTime);
 		udf_disk_stamp_to_time(&iinfo->i_crtime, efe->createTime);
-		udf_disk_stamp_to_time(&inode->i_ctime, efe->attrTime);
+		udf_disk_stamp_to_time(&ctime, efe->attrTime);
+		inode_set_ctime_to_ts(inode, ctime);
 
 		iinfo->i_unique = le64_to_cpu(efe->uniqueID);
 		iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
@@ -1799,7 +1802,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 
 		udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime);
 		udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime);
-		udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime);
+		udf_time_to_disk_stamp(&fe->attrTime, inode_get_ctime(inode));
 		memset(&(fe->impIdent), 0, sizeof(struct regid));
 		strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER);
 		fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
@@ -1830,12 +1833,12 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 
 		udf_adjust_time(iinfo, inode->i_atime);
 		udf_adjust_time(iinfo, inode->i_mtime);
-		udf_adjust_time(iinfo, inode->i_ctime);
+		udf_adjust_time(iinfo, inode_get_ctime(inode));
 
 		udf_time_to_disk_stamp(&efe->accessTime, inode->i_atime);
 		udf_time_to_disk_stamp(&efe->modificationTime, inode->i_mtime);
 		udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime);
-		udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime);
+		udf_time_to_disk_stamp(&efe->attrTime, inode_get_ctime(inode));
 
 		memset(&(efe->impIdent), 0, sizeof(efe->impIdent));
 		strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index a95579b043ab..ae55ab8859b6 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -365,7 +365,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
 	*(__le32 *)((struct allocDescImpUse *)iter.fi.icb.impUse)->impUse =
 		cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL);
 	udf_fiiter_write_fi(&iter, NULL);
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
 	udf_fiiter_release(&iter);
 	udf_add_fid_counter(dir->i_sb, false, 1);
@@ -471,7 +471,7 @@ static int udf_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	udf_fiiter_release(&iter);
 	udf_add_fid_counter(dir->i_sb, true, 1);
 	inc_nlink(dir);
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
 	d_instantiate_new(dentry, inode);
 
@@ -523,8 +523,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
 	inode->i_size = 0;
 	inode_dec_link_count(dir);
 	udf_add_fid_counter(dir->i_sb, true, -1);
-	inode->i_ctime = dir->i_ctime = dir->i_mtime =
-						current_time(inode);
+	dir->i_mtime = inode_set_ctime_to_ts(dir,
+					     inode_set_ctime_current(inode));
 	mark_inode_dirty(dir);
 	ret = 0;
 end_rmdir:
@@ -555,11 +555,11 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
 		set_nlink(inode, 1);
 	}
 	udf_fiiter_delete_entry(&iter);
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
 	inode_dec_link_count(inode);
 	udf_add_fid_counter(dir->i_sb, false, -1);
-	inode->i_ctime = dir->i_ctime;
+	inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
 	ret = 0;
 end_unlink:
 	udf_fiiter_release(&iter);
@@ -746,9 +746,9 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 
 	inc_nlink(inode);
 	udf_add_fid_counter(dir->i_sb, false, 1);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
 	ihold(inode);
 	d_instantiate(dentry, inode);
@@ -833,7 +833,7 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 	 * Like most other Unix systems, set the ctime for inodes on a
 	 * rename.
 	 */
-	old_inode->i_ctime = current_time(old_inode);
+	inode_set_ctime_current(old_inode);
 	mark_inode_dirty(old_inode);
 
 	/*
@@ -861,13 +861,13 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 	}
 
 	if (new_inode) {
-		new_inode->i_ctime = current_time(new_inode);
+		inode_set_ctime_current(new_inode);
 		inode_dec_link_count(new_inode);
 		udf_add_fid_counter(old_dir->i_sb, S_ISDIR(new_inode->i_mode),
 				    -1);
 	}
-	old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
-	new_dir->i_ctime = new_dir->i_mtime = current_time(new_dir);
+	old_dir->i_mtime = inode_set_ctime_current(old_dir);
+	new_dir->i_mtime = inode_set_ctime_current(new_dir);
 	mark_inode_dirty(old_dir);
 	mark_inode_dirty(new_dir);
 

From 6eeb017e4f7f3669c1846d76dd247726f85354cc Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:45 -0400
Subject: [PATCH 0461/2047] ufs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-78-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ufs/dir.c    |  6 +++---
 fs/ufs/ialloc.c |  2 +-
 fs/ufs/inode.c  | 23 +++++++++++++----------
 fs/ufs/namei.c  |  8 ++++----
 4 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 379d75796a5c..fd57f03b6c93 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -107,7 +107,7 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
 	ufs_commit_chunk(page, pos, len);
 	ufs_put_page(page);
 	if (update_times)
-		dir->i_mtime = dir->i_ctime = current_time(dir);
+		dir->i_mtime = inode_set_ctime_current(dir);
 	mark_inode_dirty(dir);
 	ufs_handle_dirsync(dir);
 }
@@ -397,7 +397,7 @@ got_it:
 	ufs_set_de_type(sb, de, inode->i_mode);
 
 	ufs_commit_chunk(page, pos, rec_len);
-	dir->i_mtime = dir->i_ctime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 
 	mark_inode_dirty(dir);
 	err = ufs_handle_dirsync(dir);
@@ -539,7 +539,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
 		pde->d_reclen = cpu_to_fs16(sb, to - from);
 	dir->d_ino = 0;
 	ufs_commit_chunk(page, pos, to - from);
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 	err = ufs_handle_dirsync(inode);
 out:
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 06bd84d555bd..a1e7bd9d1f98 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -292,7 +292,7 @@ cg_found:
 	inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
 	inode->i_blocks = 0;
 	inode->i_generation = 0;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	ufsi->i_flags = UFS_I(dir)->i_flags;
 	ufsi->i_lastfrag = 0;
 	ufsi->i_shadow = 0;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index a4246c83a8cd..21a4779a2de5 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -296,7 +296,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index,
 
 	if (new)
 		*new = 1;
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	if (IS_SYNC(inode))
 		ufs_sync_inode (inode);
 	mark_inode_dirty(inode);
@@ -378,7 +378,7 @@ ufs_inode_getblock(struct inode *inode, u64 ind_block,
 	mark_buffer_dirty(bh);
 	if (IS_SYNC(inode))
 		sync_dirty_buffer(bh);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 out:
 	brelse (bh);
@@ -580,11 +580,12 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 
 	inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size);
 	inode->i_atime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
-	inode->i_ctime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
+	inode_set_ctime(inode,
+			(signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec),
+			0);
 	inode->i_mtime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
 	inode->i_mtime.tv_nsec = 0;
 	inode->i_atime.tv_nsec = 0;
-	inode->i_ctime.tv_nsec = 0;
 	inode->i_blocks = fs32_to_cpu(sb, ufs_inode->ui_blocks);
 	inode->i_generation = fs32_to_cpu(sb, ufs_inode->ui_gen);
 	ufsi->i_flags = fs32_to_cpu(sb, ufs_inode->ui_flags);
@@ -626,10 +627,10 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
 
 	inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size);
 	inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime);
-	inode->i_ctime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_ctime);
+	inode_set_ctime(inode, fs64_to_cpu(sb, ufs2_inode->ui_ctime),
+			fs32_to_cpu(sb, ufs2_inode->ui_ctimensec));
 	inode->i_mtime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_mtime);
 	inode->i_atime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_atimensec);
-	inode->i_ctime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_ctimensec);
 	inode->i_mtime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_mtimensec);
 	inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks);
 	inode->i_generation = fs32_to_cpu(sb, ufs2_inode->ui_gen);
@@ -726,7 +727,8 @@ static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 	ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
 	ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec);
 	ufs_inode->ui_atime.tv_usec = 0;
-	ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb, inode->i_ctime.tv_sec);
+	ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb,
+						 inode_get_ctime(inode).tv_sec);
 	ufs_inode->ui_ctime.tv_usec = 0;
 	ufs_inode->ui_mtime.tv_sec = cpu_to_fs32(sb, inode->i_mtime.tv_sec);
 	ufs_inode->ui_mtime.tv_usec = 0;
@@ -770,8 +772,9 @@ static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode)
 	ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
 	ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec);
 	ufs_inode->ui_atimensec = cpu_to_fs32(sb, inode->i_atime.tv_nsec);
-	ufs_inode->ui_ctime = cpu_to_fs64(sb, inode->i_ctime.tv_sec);
-	ufs_inode->ui_ctimensec = cpu_to_fs32(sb, inode->i_ctime.tv_nsec);
+	ufs_inode->ui_ctime = cpu_to_fs64(sb, inode_get_ctime(inode).tv_sec);
+	ufs_inode->ui_ctimensec = cpu_to_fs32(sb,
+					      inode_get_ctime(inode).tv_nsec);
 	ufs_inode->ui_mtime = cpu_to_fs64(sb, inode->i_mtime.tv_sec);
 	ufs_inode->ui_mtimensec = cpu_to_fs32(sb, inode->i_mtime.tv_nsec);
 
@@ -1205,7 +1208,7 @@ static int ufs_truncate(struct inode *inode, loff_t size)
 	truncate_setsize(inode, size);
 
 	ufs_truncate_blocks(inode);
-	inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	mark_inode_dirty(inode);
 out:
 	UFSD("EXIT: err %d\n", err);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 36154b5aca6d..9cad29463791 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -153,7 +153,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
 	struct inode *inode = d_inode(old_dentry);
 	int error;
 
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	inode_inc_link_count(inode);
 	ihold(inode);
 
@@ -220,7 +220,7 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
 	if (err)
 		goto out;
 
-	inode->i_ctime = dir->i_ctime;
+	inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
 	inode_dec_link_count(inode);
 	err = 0;
 out:
@@ -282,7 +282,7 @@ static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 		if (!new_de)
 			goto out_dir;
 		ufs_set_link(new_dir, new_de, new_page, old_inode, 1);
-		new_inode->i_ctime = current_time(new_inode);
+		inode_set_ctime_current(new_inode);
 		if (dir_de)
 			drop_nlink(new_inode);
 		inode_dec_link_count(new_inode);
@@ -298,7 +298,7 @@ static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 	 * Like most other Unix systems, set the ctime for inodes on a
  	 * rename.
 	 */
-	old_inode->i_ctime = current_time(old_inode);
+	inode_set_ctime_current(old_inode);
 
 	ufs_delete_entry(old_dir, old_de, old_page);
 	mark_inode_dirty(old_inode);

From 0593be0c8e60c05cc130db2f98281b0fe3379083 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:46 -0400
Subject: [PATCH 0462/2047] vboxsf: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-79-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/vboxsf/utils.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
index dd0ae1188e87..576b91d571c5 100644
--- a/fs/vboxsf/utils.c
+++ b/fs/vboxsf/utils.c
@@ -128,8 +128,8 @@ int vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode,
 
 	inode->i_atime = ns_to_timespec64(
 				 info->access_time.ns_relative_to_unix_epoch);
-	inode->i_ctime = ns_to_timespec64(
-				 info->change_time.ns_relative_to_unix_epoch);
+	inode_set_ctime_to_ts(inode,
+			      ns_to_timespec64(info->change_time.ns_relative_to_unix_epoch));
 	inode->i_mtime = ns_to_timespec64(
 			   info->modification_time.ns_relative_to_unix_epoch);
 	return 0;

From a0a415e34b57368acd262e1172720252c028b936 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:47 -0400
Subject: [PATCH 0463/2047] xfs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-80-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/libxfs/xfs_inode_buf.c   | 5 +++--
 fs/xfs/libxfs/xfs_trans_inode.c | 2 +-
 fs/xfs/xfs_acl.c                | 2 +-
 fs/xfs/xfs_bmap_util.c          | 6 ++++--
 fs/xfs/xfs_inode.c              | 3 +--
 fs/xfs/xfs_inode_item.c         | 2 +-
 fs/xfs/xfs_iops.c               | 4 ++--
 fs/xfs/xfs_itable.c             | 4 ++--
 8 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 758aacd8166b..a35781577cad 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -222,7 +222,8 @@ xfs_inode_from_disk(
 	 */
 	inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime);
 	inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime);
-	inode->i_ctime = xfs_inode_from_disk_ts(from, from->di_ctime);
+	inode_set_ctime_to_ts(inode,
+			      xfs_inode_from_disk_ts(from, from->di_ctime));
 
 	ip->i_disk_size = be64_to_cpu(from->di_size);
 	ip->i_nblocks = be64_to_cpu(from->di_nblocks);
@@ -316,7 +317,7 @@ xfs_inode_to_disk(
 
 	to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
 	to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
-	to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
+	to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode));
 	to->di_nlink = cpu_to_be32(inode->i_nlink);
 	to->di_gen = cpu_to_be32(inode->i_generation);
 	to->di_mode = cpu_to_be16(inode->i_mode);
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index cb4796b6e693..6b2296ff248a 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -67,7 +67,7 @@ xfs_trans_ichgtime(
 	if (flags & XFS_ICHGTIME_MOD)
 		inode->i_mtime = tv;
 	if (flags & XFS_ICHGTIME_CHG)
-		inode->i_ctime = tv;
+		inode_set_ctime_to_ts(inode, tv);
 	if (flags & XFS_ICHGTIME_CREATE)
 		ip->i_crtime = tv;
 }
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 791db7d9c849..6b840301817a 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -233,7 +233,7 @@ xfs_acl_set_mode(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 	inode->i_mode = mode;
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
 	if (xfs_has_wsync(mp))
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index fbb675563208..fcefab687285 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1644,6 +1644,7 @@ xfs_swap_extents(
 	uint64_t		f;
 	int			resblks = 0;
 	unsigned int		flags = 0;
+	struct timespec64	ctime;
 
 	/*
 	 * Lock the inodes against other IO, page faults and truncate to
@@ -1756,8 +1757,9 @@ xfs_swap_extents(
 	 * process that the file was not changed out from
 	 * under it.
 	 */
-	if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) ||
-	    (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
+	ctime = inode_get_ctime(VFS_I(ip));
+	if ((sbp->bs_ctime.tv_sec != ctime.tv_sec) ||
+	    (sbp->bs_ctime.tv_nsec != ctime.tv_nsec) ||
 	    (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
 	    (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
 		error = -EBUSY;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 9e62cc500140..360fe83a334f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -843,10 +843,9 @@ xfs_init_new_inode(
 	ip->i_df.if_nextents = 0;
 	ASSERT(ip->i_nblocks == 0);
 
-	tv = current_time(inode);
+	tv = inode_set_ctime_current(inode);
 	inode->i_mtime = tv;
 	inode->i_atime = tv;
-	inode->i_ctime = tv;
 
 	ip->i_extsize = 0;
 	ip->i_diflags = 0;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 91c847a84e10..127b2410eb20 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -528,7 +528,7 @@ xfs_inode_to_log_dinode(
 	memset(to->di_pad3, 0, sizeof(to->di_pad3));
 	to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime);
 	to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime);
-	to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode->i_ctime);
+	to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode_get_ctime(inode));
 	to->di_nlink = inode->i_nlink;
 	to->di_gen = inode->i_generation;
 	to->di_mode = inode->i_mode;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 24718adb3c16..3a9363953ef2 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -574,7 +574,7 @@ xfs_vn_getattr(
 	stat->ino = ip->i_ino;
 	stat->atime = inode->i_atime;
 	stat->mtime = inode->i_mtime;
-	stat->ctime = inode->i_ctime;
+	stat->ctime = inode_get_ctime(inode);
 	stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
 
 	if (xfs_has_v3inodes(mp)) {
@@ -1055,7 +1055,7 @@ xfs_vn_update_time(
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	if (flags & S_CTIME)
-		inode->i_ctime = *now;
+		inode_set_ctime_to_ts(inode, *now);
 	if (flags & S_MTIME)
 		inode->i_mtime = *now;
 	if (flags & S_ATIME)
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f225413a993c..c2093cb56092 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -100,8 +100,8 @@ xfs_bulkstat_one_int(
 	buf->bs_atime_nsec = inode->i_atime.tv_nsec;
 	buf->bs_mtime = inode->i_mtime.tv_sec;
 	buf->bs_mtime_nsec = inode->i_mtime.tv_nsec;
-	buf->bs_ctime = inode->i_ctime.tv_sec;
-	buf->bs_ctime_nsec = inode->i_ctime.tv_nsec;
+	buf->bs_ctime = inode_get_ctime(inode).tv_sec;
+	buf->bs_ctime_nsec = inode_get_ctime(inode).tv_nsec;
 	buf->bs_gen = inode->i_generation;
 	buf->bs_mode = inode->i_mode;
 

From f74207d84dab31ed7e36f530281c556c9815cbc5 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:48 -0400
Subject: [PATCH 0464/2047] zonefs: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-81-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/zonefs/super.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index bbe44a26a8e5..eaaa23cf6d93 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -658,7 +658,8 @@ static struct inode *zonefs_get_file_inode(struct inode *dir,
 
 	inode->i_ino = ino;
 	inode->i_mode = z->z_mode;
-	inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
+	inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode,
+								inode_get_ctime(dir));
 	inode->i_uid = z->z_uid;
 	inode->i_gid = z->z_gid;
 	inode->i_size = z->z_wpoffset;
@@ -694,7 +695,8 @@ static struct inode *zonefs_get_zgroup_inode(struct super_block *sb,
 	inode->i_ino = ino;
 	inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555);
 	inode->i_size = sbi->s_zgroup[ztype].g_nr_zones;
-	inode->i_ctime = inode->i_mtime = inode->i_atime = root->i_ctime;
+	inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode,
+								inode_get_ctime(root));
 	inode->i_private = &sbi->s_zgroup[ztype];
 	set_nlink(inode, 2);
 
@@ -1317,7 +1319,7 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
 
 	inode->i_ino = bdev_nr_zones(sb->s_bdev);
 	inode->i_mode = S_IFDIR | 0555;
-	inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_op = &zonefs_dir_inode_operations;
 	inode->i_fop = &zonefs_dir_operations;
 	inode->i_size = 2;

From e359147f01606e17d74df96cfeb0027f540f5e97 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:49 -0400
Subject: [PATCH 0465/2047] linux: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-82-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs_stack.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index 54210a42c30d..010d39d0dc1c 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -24,7 +24,7 @@ static inline void fsstack_copy_attr_times(struct inode *dest,
 {
 	dest->i_atime = src->i_atime;
 	dest->i_mtime = src->i_mtime;
-	dest->i_ctime = src->i_ctime;
+	inode_set_ctime_to_ts(dest, inode_get_ctime(src));
 }
 
 #endif /* _LINUX_FS_STACK_H */

From 783904f50a9c8264543c73521a20de7916e40be7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:50 -0400
Subject: [PATCH 0466/2047] mqueue: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-83-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 ipc/mqueue.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 71881bddad25..ba8215ed663a 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -302,7 +302,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 	inode->i_mode = mode;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
-	inode->i_mtime = inode->i_ctime = inode->i_atime = current_time(inode);
+	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 
 	if (S_ISREG(mode)) {
 		struct mqueue_inode_info *info;
@@ -596,7 +596,7 @@ static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg)
 
 	put_ipc_ns(ipc_ns);
 	dir->i_size += DIRENT_SIZE;
-	dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);
+	dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
 
 	d_instantiate(dentry, inode);
 	dget(dentry);
@@ -618,7 +618,7 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct inode *inode = d_inode(dentry);
 
-	dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);
+	dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
 	dir->i_size -= DIRENT_SIZE;
 	drop_nlink(inode);
 	dput(dentry);
@@ -635,7 +635,8 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
 static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
 				size_t count, loff_t *off)
 {
-	struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
+	struct inode *inode = file_inode(filp);
+	struct mqueue_inode_info *info = MQUEUE_I(inode);
 	char buffer[FILENT_SIZE];
 	ssize_t ret;
 
@@ -656,7 +657,7 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
 	if (ret <= 0)
 		return ret;
 
-	file_inode(filp)->i_atime = file_inode(filp)->i_ctime = current_time(file_inode(filp));
+	inode->i_atime = inode_set_ctime_current(inode);
 	return ret;
 }
 
@@ -1162,8 +1163,7 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
 				goto out_unlock;
 			__do_notify(info);
 		}
-		inode->i_atime = inode->i_mtime = inode->i_ctime =
-				current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	}
 out_unlock:
 	spin_unlock(&info->lock);
@@ -1257,8 +1257,7 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
 
 		msg_ptr = msg_get(info);
 
-		inode->i_atime = inode->i_mtime = inode->i_ctime =
-				current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 
 		/* There is now free space in queue. */
 		pipelined_receive(&wake_q, info);
@@ -1396,7 +1395,7 @@ retry:
 	if (notification == NULL) {
 		if (info->notify_owner == task_tgid(current)) {
 			remove_notification(info);
-			inode->i_atime = inode->i_ctime = current_time(inode);
+			inode->i_atime = inode_set_ctime_current(inode);
 		}
 	} else if (info->notify_owner != NULL) {
 		ret = -EBUSY;
@@ -1422,7 +1421,7 @@ retry:
 
 		info->notify_owner = get_pid(task_tgid(current));
 		info->notify_user_ns = get_user_ns(current_user_ns());
-		inode->i_atime = inode->i_ctime = current_time(inode);
+		inode->i_atime = inode_set_ctime_current(inode);
 	}
 	spin_unlock(&info->lock);
 out_fput:
@@ -1485,7 +1484,7 @@ static int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old)
 			f.file->f_flags &= ~O_NONBLOCK;
 		spin_unlock(&f.file->f_lock);
 
-		inode->i_atime = inode->i_ctime = current_time(inode);
+		inode->i_atime = inode_set_ctime_current(inode);
 	}
 
 	spin_unlock(&info->lock);

From 417d2b6b112dc031d7e00ccf42ec677fec1c2c39 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:51 -0400
Subject: [PATCH 0467/2047] bpf: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-84-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 kernel/bpf/inode.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 4174f76133df..99d0625b6c82 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -118,9 +118,8 @@ static struct inode *bpf_get_inode(struct super_block *sb,
 		return ERR_PTR(-ENOSPC);
 
 	inode->i_ino = get_next_ino();
-	inode->i_atime = current_time(inode);
+	inode->i_atime = inode_set_ctime_current(inode);
 	inode->i_mtime = inode->i_atime;
-	inode->i_ctime = inode->i_atime;
 
 	inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
 
@@ -148,8 +147,7 @@ static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode,
 	d_instantiate(dentry, inode);
 	dget(dentry);
 
-	dir->i_mtime = current_time(dir);
-	dir->i_ctime = dir->i_mtime;
+	dir->i_mtime = inode_set_ctime_current(dir);
 }
 
 static int bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir,

From 6528733416f13dd67eda1f34e74a2242af36d638 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:52 -0400
Subject: [PATCH 0468/2047] shmem: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-85-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 mm/shmem.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index a7dfbd221a25..72129c101800 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1047,7 +1047,7 @@ whole_folios:
 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 {
 	shmem_undo_range(inode, lstart, lend, false);
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	inode->i_mtime = inode_set_ctime_current(inode);
 	inode_inc_iversion(inode);
 }
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
@@ -1144,9 +1144,9 @@ static int shmem_setattr(struct mnt_idmap *idmap,
 	if (attr->ia_valid & ATTR_MODE)
 		error = posix_acl_chmod(idmap, dentry, inode->i_mode);
 	if (!error && update_ctime) {
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		if (update_mtime)
-			inode->i_mtime = inode->i_ctime;
+			inode->i_mtime = inode_get_ctime(inode);
 		inode_inc_iversion(inode);
 	}
 	return error;
@@ -2372,7 +2372,7 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
 		inode->i_ino = ino;
 		inode_init_owner(idmap, inode, dir, mode);
 		inode->i_blocks = 0;
-		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 		inode->i_generation = get_random_u32();
 		info = SHMEM_I(inode);
 		memset(info, 0, (char *)inode - (char *)info);
@@ -3084,7 +3084,7 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
 
 		error = 0;
 		dir->i_size += BOGO_DIRENT_SIZE;
-		dir->i_ctime = dir->i_mtime = current_time(dir);
+		dir->i_mtime = inode_set_ctime_current(dir);
 		inode_inc_iversion(dir);
 		d_instantiate(dentry, inode);
 		dget(dentry); /* Extra count - pin the dentry in core */
@@ -3160,7 +3160,8 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
 	}
 
 	dir->i_size += BOGO_DIRENT_SIZE;
-	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+	dir->i_mtime = inode_set_ctime_to_ts(dir,
+					     inode_set_ctime_current(inode));
 	inode_inc_iversion(dir);
 	inc_nlink(inode);
 	ihold(inode);	/* New dentry reference */
@@ -3178,7 +3179,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
 		shmem_free_inode(inode->i_sb);
 
 	dir->i_size -= BOGO_DIRENT_SIZE;
-	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+	dir->i_mtime = inode_set_ctime_to_ts(dir,
+					     inode_set_ctime_current(inode));
 	inode_inc_iversion(dir);
 	drop_nlink(inode);
 	dput(dentry);	/* Undo the count from "create" - this does all the work */
@@ -3320,7 +3322,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 		folio_put(folio);
 	}
 	dir->i_size += BOGO_DIRENT_SIZE;
-	dir->i_ctime = dir->i_mtime = current_time(dir);
+	dir->i_mtime = inode_set_ctime_current(dir);
 	inode_inc_iversion(dir);
 	d_instantiate(dentry, inode);
 	dget(dentry);
@@ -3392,7 +3394,7 @@ static int shmem_fileattr_set(struct mnt_idmap *idmap,
 		(fa->flags & SHMEM_FL_USER_MODIFIABLE);
 
 	shmem_set_inode_flags(inode, info->fsflags);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_current(inode);
 	inode_inc_iversion(inode);
 	return 0;
 }
@@ -3462,7 +3464,7 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
 	name = xattr_full_name(handler, name);
 	err = simple_xattr_set(&info->xattrs, name, value, size, flags, NULL);
 	if (!err) {
-		inode->i_ctime = current_time(inode);
+		inode_set_ctime_current(inode);
 		inode_inc_iversion(inode);
 	}
 	return err;

From bc734e8ec11305f164d1d540bc55cfbd41f28c05 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:53 -0400
Subject: [PATCH 0469/2047] sunrpc: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-86-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 net/sunrpc/rpc_pipe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 0b6034fab9ab..f420d8457345 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -472,7 +472,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
 		return NULL;
 	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	switch (mode & S_IFMT) {
 	case S_IFDIR:
 		inode->i_fop = &simple_dir_operations;

From 6ac5422617eda03b95c00367d0e59fcb4d7ca3cc Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:54 -0400
Subject: [PATCH 0470/2047] apparmor: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-87-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 security/apparmor/apparmorfs.c    | 6 +++---
 security/apparmor/policy_unpack.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index c06053718836..bd6a910f6528 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -226,7 +226,7 @@ static int __aafs_setup_d_inode(struct inode *dir, struct dentry *dentry,
 
 	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	inode->i_private = data;
 	if (S_ISDIR(mode)) {
 		inode->i_op = iops ? iops : &simple_dir_inode_operations;
@@ -1557,7 +1557,7 @@ void __aafs_profile_migrate_dents(struct aa_profile *old,
 		if (new->dents[i]) {
 			struct inode *inode = d_inode(new->dents[i]);
 
-			inode->i_mtime = inode->i_ctime = current_time(inode);
+			inode->i_mtime = inode_set_ctime_current(inode);
 		}
 		old->dents[i] = NULL;
 	}
@@ -2543,7 +2543,7 @@ static int aa_mk_null_file(struct dentry *parent)
 
 	inode->i_ino = get_next_ino();
 	inode->i_mode = S_IFCHR | S_IRUGO | S_IWUGO;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO,
 			   MKDEV(MEM_MAJOR, 3));
 	d_instantiate(dentry, inode);
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index ed180722a833..8b8846073e14 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -89,10 +89,10 @@ void __aa_loaddata_update(struct aa_loaddata *data, long revision)
 		struct inode *inode;
 
 		inode = d_inode(data->dents[AAFS_LOADDATA_DIR]);
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 
 		inode = d_inode(data->dents[AAFS_LOADDATA_REVISION]);
-		inode->i_mtime = inode->i_ctime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 	}
 }
 

From 428c33f2851e226673dc04f970cbcf5829100b3f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:55 -0400
Subject: [PATCH 0471/2047] security: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-88-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 security/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/inode.c b/security/inode.c
index 6c326939750d..3aa75fffa8c9 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -145,7 +145,7 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode,
 
 	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 	inode->i_private = data;
 	if (S_ISDIR(mode)) {
 		inode->i_op = &simple_dir_inode_operations;

From 4c1698d3033d3935b2b4170270e1ef641ccda446 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 15:01:56 -0400
Subject: [PATCH 0472/2047] selinux: convert to ctime accessor functions

In later patches, we're going to change how the inode's ctime field is
used. Switch to using accessor functions instead of raw accesses of
inode->i_ctime.

Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705190309.579783-89-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 security/selinux/selinuxfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index bad1f6b685fd..9dafb6ff110d 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1197,7 +1197,7 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode)
 
 	if (ret) {
 		ret->i_mode = mode;
-		ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+		ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
 	}
 	return ret;
 }

From 13bc24457850583a2e7203ded05b7209ab4bc5ef Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 5 Jul 2023 14:58:12 -0400
Subject: [PATCH 0473/2047] fs: rename i_ctime field to __i_ctime

Now that everything in-tree is converted to use the accessor functions,
rename the i_ctime field in the inode to discourage direct access.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230705185812.579118-4-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 42755cb7d55b..61f27011fd04 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -642,7 +642,7 @@ struct inode {
 	loff_t			i_size;
 	struct timespec64	i_atime;
 	struct timespec64	i_mtime;
-	struct timespec64	i_ctime;
+	struct timespec64	__i_ctime; /* use inode_*_ctime accessors! */
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned short          i_bytes;
 	u8			i_blkbits;
@@ -1485,7 +1485,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode);
  */
 static inline struct timespec64 inode_get_ctime(const struct inode *inode)
 {
-	return inode->i_ctime;
+	return inode->__i_ctime;
 }
 
 /**
@@ -1498,7 +1498,7 @@ static inline struct timespec64 inode_get_ctime(const struct inode *inode)
 static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
 						      struct timespec64 ts)
 {
-	inode->i_ctime = ts;
+	inode->__i_ctime = ts;
 	return ts;
 }
 

From d85f1b5bad5752831b11bdeb5a1c403075673c0d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Thu, 13 Jul 2023 09:52:48 -0400
Subject: [PATCH 0474/2047] gfs2: fix timestamp handling on quota inodes

While these aren't generally visible from userland, it's best to be
consistent with timestamp handling. When adjusting the quota, update the
mtime and ctime like we would with a write operation on any other inode,
and avoid updating the atime which should only be done for reads.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Andreas Gruenbacher <agruenba@redhat.com>
Message-Id: <20230713135249.153796-1-jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/gfs2/quota.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 704192b73605..aa5fd06d47bc 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -871,7 +871,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 		size = loc + sizeof(struct gfs2_quota);
 		if (size > inode->i_size)
 			i_size_write(inode, size);
-		inode->i_mtime = inode->i_atime = current_time(inode);
+		inode->i_mtime = inode_set_ctime_current(inode);
 		mark_inode_dirty(inode);
 		set_bit(QDF_REFRESH, &qd->qd_flags);
 	}

From 8cf57d7217c32133d25615324c0ab4aaacf4d9c4 Mon Sep 17 00:00:00 2001
From: Anastasia Eskova <anastasia.eskova@ibm.com>
Date: Fri, 28 Apr 2023 06:42:42 -0400
Subject: [PATCH 0475/2047] s390: add support for user-defined certificates

Enable receiving the user-defined certificates from the s390x
hypervisor via new diagnose 0x320 calls, and make them available to the
Linux root user as 'cert_store_key' type keys in a so-called
'cert_store' keyring.

New user-space interfaces:

  /sys/firmware/cert_store/refresh

    Writing to this attribute re-fetches certificates via DIAG 0x320

  /sys/firmware/cert_store/cs_status

    Reading from this attribute returns either of:

	  "uninitialized"
	    If no certificate has been retrieved yet
	  "ok"
	    If certificates have been successfully retrieved
	  "failed (<number>)"
	    If certificate retrieval failed with reason code <number>

New debug trace areas:

  /sys/kernel/debug/s390dbf/cert_store_msg

  /sys/kernel/debug/s390dbf/cert_store_hexdump

Usage example:

To initiate request for certificates available to the system as root:

  $ echo 1 > /sys/firmware/cert_store/refresh

Upon success the '/sys/firmware/cert_store/cs_status' contains
the value 'ok'.

  $ cat /sys/firmware/cert_store/cs_status
  ok

Get the ID of the keyring 'cert_store':

  $ keyctl search @us keyring cert_store
OR
  $ keyctl link @us @s; keyctl request keyring cert_store

Obtain list of IDs of certificates:

  $ keyctl rlist <cert_store keyring ID>

Display certificate content as hex-dump:

  $ keyctl read <certificate ID>

Read certificate contents as binary data:

  $ keyctl pipe <certificate ID> >cert_data

Display certificate description:

  $ keyctl describe <certificate ID>

The certificate description has the following format:

  <64 bytes certificate name in EBCDIC> ':'
  <certificate index as obtained from hypervisor> ':'
  <certificate store token obtained from hypervisor>

The certificate description in /proc/keys has certificate name
represented in ASCII.

Users can read but cannot update the content of the certificate.

Signed-off-by: Anastasia Eskova <anastasia.eskova@ibm.com>
Reviewed-by: Peter Oberparleiter <oberpar@linux.ibm.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/Kconfig              |  10 +
 arch/s390/include/asm/diag.h   |   1 +
 arch/s390/include/asm/sclp.h   |   1 +
 arch/s390/kernel/Makefile      |   2 +-
 arch/s390/kernel/cert_store.c  | 810 +++++++++++++++++++++++++++++++++
 arch/s390/kernel/diag.c        |   1 +
 drivers/s390/char/sclp_early.c |   1 +
 7 files changed, 825 insertions(+), 1 deletion(-)
 create mode 100644 arch/s390/kernel/cert_store.c

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 5b39918b7042..c0afca69904e 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -512,6 +512,16 @@ config KEXEC_SIG
 	  verification for the corresponding kernel image type being
 	  loaded in order for this to work.
 
+config CERT_STORE
+	bool "Get user certificates via DIAG320"
+	depends on KEYS
+	help
+	  Enable this option if you want to access user-provided secure boot
+	  certificates via DIAG 0x320.
+
+	  These certificates will be made available via the keyring named
+	  'cert_store'.
+
 config KERNEL_NOBP
 	def_bool n
 	prompt "Enable modified branch prediction for the kernel by default"
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 902e0330dd91..fb5a886ff47f 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -36,6 +36,7 @@ enum diag_stat_enum {
 	DIAG_STAT_X304,
 	DIAG_STAT_X308,
 	DIAG_STAT_X318,
+	DIAG_STAT_X320,
 	DIAG_STAT_X500,
 	NR_DIAG_STAT
 };
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index dac7da88f61f..5742d23bba13 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -86,6 +86,7 @@ struct sclp_info {
 	unsigned char has_kss : 1;
 	unsigned char has_gisaf : 1;
 	unsigned char has_diag318 : 1;
+	unsigned char has_diag320 : 1;
 	unsigned char has_sipl : 1;
 	unsigned char has_sipl_eckd : 1;
 	unsigned char has_dirq : 1;
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 6b2a051e1f8a..8d7514c72bb8 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -68,7 +68,7 @@ obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
 
 obj-$(CONFIG_KEXEC_FILE)	+= machine_kexec_file.o kexec_image.o
 obj-$(CONFIG_KEXEC_FILE)	+= kexec_elf.o
-
+obj-$(CONFIG_CERT_STORE)	+= cert_store.o
 obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT)	+= ima_arch.o
 
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
new file mode 100644
index 000000000000..1cbeb9ce0eb1
--- /dev/null
+++ b/arch/s390/kernel/cert_store.c
@@ -0,0 +1,810 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DIAG 0x320 support and certificate store handling
+ *
+ * Copyright IBM Corp. 2023
+ * Author(s):	Anastasia Eskova <anastasia.eskova@ibm.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/key-type.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <crypto/sha2.h>
+#include <keys/user-type.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+
+#define DIAG_MAX_RETRIES		10
+
+#define VCE_FLAGS_VALID_MASK		0x80
+
+#define ISM_LEN_DWORDS			4
+#define VCSSB_LEN_BYTES			128
+#define VCSSB_LEN_NO_CERTS		4
+#define VCB_LEN_NO_CERTS		64
+#define VC_NAME_LEN_BYTES		64
+
+#define CERT_STORE_KEY_TYPE_NAME	"cert_store_key"
+#define CERT_STORE_KEYRING_NAME		"cert_store"
+
+static debug_info_t *cert_store_dbf;
+static debug_info_t *cert_store_hexdump;
+
+#define pr_dbf_msg(fmt, ...) \
+	debug_sprintf_event(cert_store_dbf, 3, fmt "\n", ## __VA_ARGS__)
+
+enum diag320_subcode {
+	DIAG320_SUBCODES	= 0,
+	DIAG320_STORAGE		= 1,
+	DIAG320_CERT_BLOCK	= 2,
+};
+
+enum diag320_rc {
+	DIAG320_RC_OK		= 0x0001,
+	DIAG320_RC_CS_NOMATCH	= 0x0306,
+};
+
+/* Verification Certificates Store Support Block (VCSSB). */
+struct vcssb {
+	u32 vcssb_length;
+	u8  pad_0x04[3];
+	u8  version;
+	u8  pad_0x08[8];
+	u32 cs_token;
+	u8  pad_0x14[12];
+	u16 total_vc_index_count;
+	u16 max_vc_index_count;
+	u8  pad_0x24[28];
+	u32 max_vce_length;
+	u32 max_vcxe_length;
+	u8  pad_0x48[8];
+	u32 max_single_vcb_length;
+	u32 total_vcb_length;
+	u32 max_single_vcxb_length;
+	u32 total_vcxb_length;
+	u8  pad_0x60[32];
+} __packed __aligned(8);
+
+/* Verification Certificate Entry (VCE) Header. */
+struct vce_header {
+	u32 vce_length;
+	u8  flags;
+	u8  key_type;
+	u16 vc_index;
+	u8  vc_name[VC_NAME_LEN_BYTES]; /* EBCDIC */
+	u8  vc_format;
+	u8  pad_0x49;
+	u16 key_id_length;
+	u8  pad_0x4c;
+	u8  vc_hash_type;
+	u16 vc_hash_length;
+	u8  pad_0x50[4];
+	u32 vc_length;
+	u8  pad_0x58[8];
+	u16 vc_hash_offset;
+	u16 vc_offset;
+	u8  pad_0x64[28];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB) Header. */
+struct vcb_header {
+	u32 vcb_input_length;
+	u8  pad_0x04[4];
+	u16 first_vc_index;
+	u16 last_vc_index;
+	u32 pad_0x0c;
+	u32 cs_token;
+	u8  pad_0x14[12];
+	u32 vcb_output_length;
+	u8  pad_0x24[3];
+	u8  version;
+	u16 stored_vc_count;
+	u16 remaining_vc_count;
+	u8  pad_0x2c[20];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB). */
+struct vcb {
+	struct vcb_header vcb_hdr;
+	u8 vcb_buf[];
+} __packed __aligned(4);
+
+/* Verification Certificate Entry (VCE). */
+struct vce {
+	struct vce_header vce_hdr;
+	u8 cert_data_buf[];
+} __packed __aligned(4);
+
+static void cert_store_key_describe(const struct key *key, struct seq_file *m)
+{
+	char ascii[VC_NAME_LEN_BYTES + 1];
+
+	/*
+	 * First 64 bytes of the key description is key name in EBCDIC CP 500.
+	 * Convert it to ASCII for displaying in /proc/keys.
+	 */
+	strscpy(ascii, key->description, sizeof(ascii));
+	EBCASC_500(ascii, VC_NAME_LEN_BYTES);
+	seq_puts(m, ascii);
+
+	seq_puts(m, &key->description[VC_NAME_LEN_BYTES]);
+	if (key_is_positive(key))
+		seq_printf(m, ": %u", key->datalen);
+}
+
+/*
+ * Certificate store key type takes over properties of
+ * user key but cannot be updated.
+ */
+static struct key_type key_type_cert_store_key = {
+	.name		= CERT_STORE_KEY_TYPE_NAME,
+	.preparse	= user_preparse,
+	.free_preparse	= user_free_preparse,
+	.instantiate	= generic_key_instantiate,
+	.revoke		= user_revoke,
+	.destroy	= user_destroy,
+	.describe	= cert_store_key_describe,
+	.read		= user_read,
+};
+
+/* Logging functions. */
+static void pr_dbf_vcb(const struct vcb *b)
+{
+	pr_dbf_msg("VCB Header:");
+	pr_dbf_msg("vcb_input_length: %d", b->vcb_hdr.vcb_input_length);
+	pr_dbf_msg("first_vc_index: %d", b->vcb_hdr.first_vc_index);
+	pr_dbf_msg("last_vc_index: %d", b->vcb_hdr.last_vc_index);
+	pr_dbf_msg("cs_token: %d", b->vcb_hdr.cs_token);
+	pr_dbf_msg("vcb_output_length: %d", b->vcb_hdr.vcb_output_length);
+	pr_dbf_msg("version: %d", b->vcb_hdr.version);
+	pr_dbf_msg("stored_vc_count: %d", b->vcb_hdr.stored_vc_count);
+	pr_dbf_msg("remaining_vc_count: %d", b->vcb_hdr.remaining_vc_count);
+}
+
+static void pr_dbf_vce(const struct vce *e)
+{
+	unsigned char vc_name[VC_NAME_LEN_BYTES + 1];
+	char log_string[VC_NAME_LEN_BYTES + 40];
+
+	pr_dbf_msg("VCE Header:");
+	pr_dbf_msg("vce_hdr.vce_length: %d", e->vce_hdr.vce_length);
+	pr_dbf_msg("vce_hdr.flags: %d", e->vce_hdr.flags);
+	pr_dbf_msg("vce_hdr.key_type: %d", e->vce_hdr.key_type);
+	pr_dbf_msg("vce_hdr.vc_index: %d", e->vce_hdr.vc_index);
+	pr_dbf_msg("vce_hdr.vc_format: %d", e->vce_hdr.vc_format);
+	pr_dbf_msg("vce_hdr.key_id_length: %d", e->vce_hdr.key_id_length);
+	pr_dbf_msg("vce_hdr.vc_hash_type: %d", e->vce_hdr.vc_hash_type);
+	pr_dbf_msg("vce_hdr.vc_hash_length: %d", e->vce_hdr.vc_hash_length);
+	pr_dbf_msg("vce_hdr.vc_hash_offset: %d", e->vce_hdr.vc_hash_offset);
+	pr_dbf_msg("vce_hdr.vc_length: %d", e->vce_hdr.vc_length);
+	pr_dbf_msg("vce_hdr.vc_offset: %d", e->vce_hdr.vc_offset);
+
+	/* Certificate name in ASCII. */
+	memcpy(vc_name, e->vce_hdr.vc_name, VC_NAME_LEN_BYTES);
+	EBCASC_500(vc_name, VC_NAME_LEN_BYTES);
+	vc_name[VC_NAME_LEN_BYTES] = '\0';
+
+	snprintf(log_string, sizeof(log_string),
+		 "index: %d vce_hdr.vc_name (ASCII): %s",
+		 e->vce_hdr.vc_index, vc_name);
+	debug_text_event(cert_store_hexdump, 3, log_string);
+
+	/* Certificate data. */
+	debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data start");
+	debug_event(cert_store_hexdump, 3, (u8 *)e->cert_data_buf, 128);
+	debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data end");
+	debug_event(cert_store_hexdump, 3,
+		    (u8 *)e->cert_data_buf + e->vce_hdr.vce_length - 128, 128);
+}
+
+static void pr_dbf_vcssb(const struct vcssb *s)
+{
+	debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode1");
+	debug_event(cert_store_hexdump, 3, (u8 *)s, VCSSB_LEN_BYTES);
+
+	pr_dbf_msg("VCSSB:");
+	pr_dbf_msg("vcssb_length: %u", s->vcssb_length);
+	pr_dbf_msg("version: %u", s->version);
+	pr_dbf_msg("cs_token: %u", s->cs_token);
+	pr_dbf_msg("total_vc_index_count: %u", s->total_vc_index_count);
+	pr_dbf_msg("max_vc_index_count: %u", s->max_vc_index_count);
+	pr_dbf_msg("max_vce_length: %u", s->max_vce_length);
+	pr_dbf_msg("max_vcxe_length: %u", s->max_vce_length);
+	pr_dbf_msg("max_single_vcb_length: %u", s->max_single_vcb_length);
+	pr_dbf_msg("total_vcb_length: %u", s->total_vcb_length);
+	pr_dbf_msg("max_single_vcxb_length: %u", s->max_single_vcxb_length);
+	pr_dbf_msg("total_vcxb_length: %u", s->total_vcxb_length);
+}
+
+static int __diag320(unsigned long subcode, void *addr)
+{
+	union register_pair rp = { .even = (unsigned long)addr, };
+
+	asm volatile(
+		"	diag	%[rp],%[subcode],0x320\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b, 0b)
+		: [rp] "+d" (rp.pair)
+		: [subcode] "d" (subcode)
+		: "cc", "memory");
+
+	return rp.odd;
+}
+
+static int diag320(unsigned long subcode, void *addr)
+{
+	diag_stat_inc(DIAG_STAT_X320);
+
+	return __diag320(subcode, addr);
+}
+
+/*
+ * Calculate SHA256 hash of the VCE certificate and compare it to hash stored in
+ * VCE. Return -EINVAL if hashes don't match.
+ */
+static int check_certificate_hash(const struct vce *vce)
+{
+	u8 hash[SHA256_DIGEST_SIZE];
+	u16 vc_hash_length;
+	u8 *vce_hash;
+
+	vce_hash = (u8 *)vce + vce->vce_hdr.vc_hash_offset;
+	vc_hash_length = vce->vce_hdr.vc_hash_length;
+	sha256((u8 *)vce + vce->vce_hdr.vc_offset, vce->vce_hdr.vc_length, hash);
+	if (memcmp(vce_hash, hash, vc_hash_length) == 0)
+		return 0;
+
+	pr_dbf_msg("SHA256 hash of received certificate does not match");
+	debug_text_event(cert_store_hexdump, 3, "VCE hash:");
+	debug_event(cert_store_hexdump, 3, vce_hash, SHA256_DIGEST_SIZE);
+	debug_text_event(cert_store_hexdump, 3, "Calculated hash:");
+	debug_event(cert_store_hexdump, 3, hash, SHA256_DIGEST_SIZE);
+
+	return -EINVAL;
+}
+
+static int check_certificate_valid(const struct vce *vce)
+{
+	if (!(vce->vce_hdr.flags & VCE_FLAGS_VALID_MASK)) {
+		pr_dbf_msg("Certificate entry is invalid");
+		return -EINVAL;
+	}
+	if (vce->vce_hdr.vc_format != 1) {
+		pr_dbf_msg("Certificate format is not supported");
+		return -EINVAL;
+	}
+	if (vce->vce_hdr.vc_hash_type != 1) {
+		pr_dbf_msg("Hash type is not supported");
+		return -EINVAL;
+	}
+
+	return check_certificate_hash(vce);
+}
+
+static struct key *get_user_session_keyring(void)
+{
+	key_ref_t us_keyring_ref;
+
+	us_keyring_ref = lookup_user_key(KEY_SPEC_USER_SESSION_KEYRING,
+					 KEY_LOOKUP_CREATE, KEY_NEED_LINK);
+	if (IS_ERR(us_keyring_ref)) {
+		pr_dbf_msg("Couldn't get user session keyring: %ld",
+			   PTR_ERR(us_keyring_ref));
+		return ERR_PTR(-ENOKEY);
+	}
+	key_ref_put(us_keyring_ref);
+	return key_ref_to_ptr(us_keyring_ref);
+}
+
+/* Invalidate all keys from cert_store keyring. */
+static int invalidate_keyring_keys(struct key *keyring)
+{
+	unsigned long num_keys, key_index;
+	size_t keyring_payload_len;
+	key_serial_t *key_array;
+	struct key *current_key;
+	int rc;
+
+	keyring_payload_len = key_type_keyring.read(keyring, NULL, 0);
+	num_keys = keyring_payload_len / sizeof(key_serial_t);
+	key_array = kcalloc(num_keys, sizeof(key_serial_t), GFP_KERNEL);
+	if (!key_array)
+		return -ENOMEM;
+
+	rc = key_type_keyring.read(keyring, (char *)key_array, keyring_payload_len);
+	if (rc != keyring_payload_len) {
+		pr_dbf_msg("Couldn't read keyring payload");
+		goto out;
+	}
+
+	for (key_index = 0; key_index < num_keys; key_index++) {
+		current_key = key_lookup(key_array[key_index]);
+		pr_dbf_msg("Invalidating key %08x", current_key->serial);
+
+		key_invalidate(current_key);
+		key_put(current_key);
+		rc = key_unlink(keyring, current_key);
+		if (rc) {
+			pr_dbf_msg("Couldn't unlink key %08x: %d", current_key->serial, rc);
+			break;
+		}
+	}
+out:
+	kfree(key_array);
+	return rc;
+}
+
+static struct key *find_cs_keyring(void)
+{
+	key_ref_t cs_keyring_ref;
+	struct key *cs_keyring;
+
+	cs_keyring_ref = keyring_search(make_key_ref(get_user_session_keyring(), true),
+					&key_type_keyring, CERT_STORE_KEYRING_NAME,
+					false);
+	if (!IS_ERR(cs_keyring_ref)) {
+		cs_keyring = key_ref_to_ptr(cs_keyring_ref);
+		key_ref_put(cs_keyring_ref);
+		goto found;
+	}
+	/* Search default locations: thread, process, session keyrings */
+	cs_keyring = request_key(&key_type_keyring, CERT_STORE_KEYRING_NAME, NULL);
+	if (IS_ERR(cs_keyring))
+		return NULL;
+	key_put(cs_keyring);
+found:
+	return cs_keyring;
+}
+
+static void cleanup_cs_keys(void)
+{
+	struct key *cs_keyring;
+
+	cs_keyring = find_cs_keyring();
+	if (!cs_keyring)
+		return;
+
+	pr_dbf_msg("Found cert_store keyring. Purging...");
+	/*
+	 * Remove cert_store_key_type in case invalidation
+	 * of old cert_store keys failed (= severe error).
+	 */
+	if (invalidate_keyring_keys(cs_keyring))
+		unregister_key_type(&key_type_cert_store_key);
+
+	keyring_clear(cs_keyring);
+	key_invalidate(cs_keyring);
+	key_put(cs_keyring);
+	key_unlink(get_user_session_keyring(), cs_keyring);
+}
+
+static struct key *create_cs_keyring(void)
+{
+	static struct key *cs_keyring;
+
+	/* Cleanup previous cs_keyring and all associated keys if any. */
+	cleanup_cs_keys();
+	cs_keyring = keyring_alloc(CERT_STORE_KEYRING_NAME, GLOBAL_ROOT_UID,
+				   GLOBAL_ROOT_GID, current_cred(),
+				   (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ,
+				   KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_SET_KEEP,
+				   NULL, get_user_session_keyring());
+	if (IS_ERR(cs_keyring)) {
+		pr_dbf_msg("Can't allocate cert_store keyring");
+		return NULL;
+	}
+
+	pr_dbf_msg("Successfully allocated cert_store keyring: %08x", cs_keyring->serial);
+
+	/*
+	 * In case a previous clean-up ran into an
+	 * error and unregistered key type.
+	 */
+	register_key_type(&key_type_cert_store_key);
+
+	return cs_keyring;
+}
+
+/*
+ * Allocate memory and create key description in format
+ * [key name in EBCDIC]:[VCE index]:[CS token].
+ * Return a pointer to key description or NULL if memory
+ * allocation failed. Memory should be freed by caller.
+ */
+static char *get_key_description(struct vcssb *vcssb, const struct vce *vce)
+{
+	size_t len, name_len;
+	u32 cs_token;
+	char *desc;
+
+	cs_token = vcssb->cs_token;
+	/* Description string contains "%64s:%04u:%08u\0". */
+	name_len = sizeof(vce->vce_hdr.vc_name);
+	len = name_len + 1 + 4 + 1 + 8 + 1;
+	desc = kmalloc(len, GFP_KERNEL);
+	if (!desc)
+		return NULL;
+
+	memcpy(desc, vce->vce_hdr.vc_name, name_len);
+	sprintf(desc + name_len, ":%04u:%08u", vce->vce_hdr.vc_index, cs_token);
+
+	return desc;
+}
+
+/*
+ * Create a key of type "cert_store_key" using the data from VCE for key
+ * payload and key description. Link the key to "cert_store" keyring.
+ */
+static int create_key_from_vce(struct vcssb *vcssb, struct vce *vce,
+			       struct key *keyring)
+{
+	key_ref_t newkey;
+	char *desc;
+	int rc;
+
+	desc = get_key_description(vcssb, vce);
+	if (!desc)
+		return -ENOMEM;
+
+	newkey = key_create_or_update(
+		make_key_ref(keyring, true), CERT_STORE_KEY_TYPE_NAME,
+		desc, (u8 *)vce + vce->vce_hdr.vc_offset,
+		vce->vce_hdr.vc_length,
+		(KEY_POS_ALL & ~KEY_POS_SETATTR)  | KEY_USR_VIEW | KEY_USR_READ,
+		KEY_ALLOC_NOT_IN_QUOTA);
+
+	rc = PTR_ERR_OR_ZERO(newkey);
+	if (rc) {
+		pr_dbf_msg("Couldn't create a key from Certificate Entry (%d)", rc);
+		rc = -ENOKEY;
+		goto out;
+	}
+
+	key_ref_put(newkey);
+out:
+	kfree(desc);
+	return rc;
+}
+
+/* Get Verification Certificate Storage Size block with DIAG320 subcode2. */
+static int get_vcssb(struct vcssb *vcssb)
+{
+	int diag320_rc;
+
+	memset(vcssb, 0, sizeof(*vcssb));
+	vcssb->vcssb_length = VCSSB_LEN_BYTES;
+	diag320_rc = diag320(DIAG320_STORAGE, vcssb);
+	pr_dbf_vcssb(vcssb);
+
+	if (diag320_rc != DIAG320_RC_OK) {
+		pr_dbf_msg("Diag 320 Subcode 1 returned bad RC: %04x", diag320_rc);
+		return -EIO;
+	}
+	if (vcssb->vcssb_length == VCSSB_LEN_NO_CERTS) {
+		pr_dbf_msg("No certificates available for current configuration");
+		return -ENOKEY;
+	}
+
+	return 0;
+}
+
+static u32 get_4k_mult_vcb_size(struct vcssb *vcssb)
+{
+	return round_up(vcssb->max_single_vcb_length, PAGE_SIZE);
+}
+
+/* Fill input fields of single-entry VCB that will be read by LPAR. */
+static void fill_vcb_input(struct vcssb *vcssb, struct vcb *vcb, u16 index)
+{
+	memset(vcb, 0, sizeof(*vcb));
+	vcb->vcb_hdr.vcb_input_length = get_4k_mult_vcb_size(vcssb);
+	vcb->vcb_hdr.cs_token = vcssb->cs_token;
+
+	/* Request single entry. */
+	vcb->vcb_hdr.first_vc_index = index;
+	vcb->vcb_hdr.last_vc_index = index;
+}
+
+static void extract_vce_from_sevcb(struct vcb *vcb, struct vce *vce)
+{
+	struct vce *extracted_vce;
+
+	extracted_vce = (struct vce *)vcb->vcb_buf;
+	memcpy(vce, vcb->vcb_buf, extracted_vce->vce_hdr.vce_length);
+	pr_dbf_vce(vce);
+}
+
+static int get_sevcb(struct vcssb *vcssb, u16 index, struct vcb *vcb)
+{
+	int rc, diag320_rc;
+
+	fill_vcb_input(vcssb, vcb, index);
+
+	diag320_rc = diag320(DIAG320_CERT_BLOCK, vcb);
+	pr_dbf_msg("Diag 320 Subcode2 RC %2x", diag320_rc);
+	pr_dbf_vcb(vcb);
+
+	switch (diag320_rc) {
+	case DIAG320_RC_OK:
+		rc = 0;
+		if (vcb->vcb_hdr.vcb_output_length == VCB_LEN_NO_CERTS) {
+			pr_dbf_msg("No certificate entry for index %u", index);
+			rc = -ENOKEY;
+		} else if (vcb->vcb_hdr.remaining_vc_count != 0) {
+			/* Retry on insufficient space. */
+			pr_dbf_msg("Couldn't get all requested certificates");
+			rc = -EAGAIN;
+		}
+		break;
+	case DIAG320_RC_CS_NOMATCH:
+		pr_dbf_msg("Certificate Store token mismatch");
+		rc = -EAGAIN;
+		break;
+	default:
+		pr_dbf_msg("Diag 320 Subcode2 returned bad rc (0x%4x)", diag320_rc);
+		rc = -EINVAL;
+		break;
+	}
+
+	return rc;
+}
+
+/*
+ * Allocate memory for single-entry VCB, get VCB via DIAG320 subcode 2 call,
+ * extract VCE and create a key from its' certificate.
+ */
+static int create_key_from_sevcb(struct vcssb *vcssb, u16 index,
+				 struct key *keyring)
+{
+	struct vcb *vcb;
+	struct vce *vce;
+	int rc;
+
+	rc = -ENOMEM;
+	vcb = vmalloc(get_4k_mult_vcb_size(vcssb));
+	vce = vmalloc(vcssb->max_single_vcb_length - sizeof(vcb->vcb_hdr));
+	if (!vcb || !vce)
+		goto out;
+
+	rc = get_sevcb(vcssb, index, vcb);
+	if (rc)
+		goto out;
+
+	extract_vce_from_sevcb(vcb, vce);
+	rc = check_certificate_valid(vce);
+	if (rc)
+		goto out;
+
+	rc = create_key_from_vce(vcssb, vce, keyring);
+	if (rc)
+		goto out;
+
+	pr_dbf_msg("Successfully created key from Certificate Entry %d", index);
+out:
+	vfree(vce);
+	vfree(vcb);
+	return rc;
+}
+
+/*
+ * Request a single-entry VCB for each VCE available for the partition.
+ * Create a key from it and link it to cert_store keyring. If no keys
+ * could be created (i.e. VCEs were invalid) return -ENOKEY.
+ */
+static int add_certificates_to_keyring(struct vcssb *vcssb, struct key *keyring)
+{
+	int rc, index, count, added;
+
+	count = 0;
+	added = 0;
+	/* Certificate Store entries indices start with 1 and have no gaps. */
+	for (index = 1; index < vcssb->total_vc_index_count + 1; index++) {
+		pr_dbf_msg("Creating key from VCE %u", index);
+		rc = create_key_from_sevcb(vcssb, index, keyring);
+		count++;
+
+		if (rc == -EAGAIN)
+			return rc;
+
+		if (rc)
+			pr_dbf_msg("Creating key from VCE %u failed (%d)", index, rc);
+		else
+			added++;
+	}
+
+	if (added == 0) {
+		pr_dbf_msg("Processed %d entries. No keys created", count);
+		return -ENOKEY;
+	}
+
+	pr_info("Added %d of %d keys to cert_store keyring", added, count);
+
+	/*
+	 * Do not allow to link more keys to certificate store keyring after all
+	 * the VCEs were processed.
+	 */
+	rc = keyring_restrict(make_key_ref(keyring, true), NULL, NULL);
+	if (rc)
+		pr_dbf_msg("Failed to set restriction to cert_store keyring (%d)", rc);
+
+	return 0;
+}
+
+/*
+ * Check which DIAG320 subcodes are installed.
+ * Return -ENOENT if subcodes 1 or 2 are not available.
+ */
+static int query_diag320_subcodes(void)
+{
+	unsigned long ism[ISM_LEN_DWORDS];
+	int rc;
+
+	rc = diag320(0, ism);
+	if (rc != DIAG320_RC_OK) {
+		pr_dbf_msg("DIAG320 subcode query returned %04x", rc);
+		return -ENOENT;
+	}
+
+	debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode 0");
+	debug_event(cert_store_hexdump, 3, ism, sizeof(ism));
+
+	if (!test_bit_inv(1, ism) || !test_bit_inv(2, ism)) {
+		pr_dbf_msg("Not all required DIAG320 subcodes are installed");
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+/*
+ * Check if Certificate Store is supported by the firmware and DIAG320 subcodes
+ * 1 and 2 are installed. Create cert_store keyring and link all certificates
+ * available for the current partition to it as "cert_store_key" type
+ * keys. On refresh or error invalidate cert_store keyring and destroy
+ * all keys of "cert_store_key" type.
+ */
+static int fill_cs_keyring(void)
+{
+	struct key *cs_keyring;
+	struct vcssb *vcssb;
+	int rc;
+
+	rc = -ENOMEM;
+	vcssb = kmalloc(VCSSB_LEN_BYTES, GFP_KERNEL);
+	if (!vcssb)
+		goto cleanup_keys;
+
+	rc = -ENOENT;
+	if (!sclp.has_diag320) {
+		pr_dbf_msg("Certificate Store is not supported");
+		goto cleanup_keys;
+	}
+
+	rc = query_diag320_subcodes();
+	if (rc)
+		goto cleanup_keys;
+
+	rc = get_vcssb(vcssb);
+	if (rc)
+		goto cleanup_keys;
+
+	cs_keyring = create_cs_keyring();
+	if (!cs_keyring)
+		goto cleanup_keys;
+
+	rc = add_certificates_to_keyring(vcssb, cs_keyring);
+	if (rc)
+		goto cleanup_cs_keyring;
+
+	goto out;
+
+cleanup_cs_keyring:
+	key_put(cs_keyring);
+cleanup_keys:
+	cleanup_cs_keys();
+out:
+	kfree(vcssb);
+	return rc;
+}
+
+static DEFINE_MUTEX(cs_refresh_lock);
+static int cs_status_val = -1;
+
+static ssize_t cs_status_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *buf)
+{
+	if (cs_status_val == -1)
+		return sysfs_emit(buf, "uninitialized\n");
+	else if (cs_status_val == 0)
+		return sysfs_emit(buf, "ok\n");
+
+	return sysfs_emit(buf, "failed (%d)\n", cs_status_val);
+}
+
+static struct kobj_attribute cs_status_attr = __ATTR_RO(cs_status);
+
+static ssize_t refresh_store(struct kobject *kobj, struct kobj_attribute *attr,
+			     const char *buf, size_t count)
+{
+	int rc, retries;
+
+	pr_dbf_msg("Refresh certificate store information requested");
+	rc = mutex_lock_interruptible(&cs_refresh_lock);
+	if (rc)
+		return rc;
+
+	for (retries = 0; retries < DIAG_MAX_RETRIES; retries++) {
+		/* Request certificates from certificate store. */
+		rc = fill_cs_keyring();
+		if (rc)
+			pr_dbf_msg("Failed to refresh certificate store information (%d)", rc);
+		if (rc != -EAGAIN)
+			break;
+	}
+	cs_status_val = rc;
+	mutex_unlock(&cs_refresh_lock);
+
+	return rc ?: count;
+}
+
+static struct kobj_attribute refresh_attr = __ATTR_WO(refresh);
+
+static const struct attribute *cert_store_attrs[] __initconst = {
+	&cs_status_attr.attr,
+	&refresh_attr.attr,
+	NULL,
+};
+
+static struct kobject *cert_store_kobj;
+
+static int __init cert_store_init(void)
+{
+	int rc = -ENOMEM;
+
+	cert_store_dbf = debug_register("cert_store_msg", 10, 1, 64);
+	if (!cert_store_dbf)
+		goto cleanup_dbf;
+
+	cert_store_hexdump = debug_register("cert_store_hexdump", 3, 1, 128);
+	if (!cert_store_hexdump)
+		goto cleanup_dbf;
+
+	debug_register_view(cert_store_hexdump, &debug_hex_ascii_view);
+	debug_register_view(cert_store_dbf, &debug_sprintf_view);
+
+	/* Create directory /sys/firmware/cert_store. */
+	cert_store_kobj = kobject_create_and_add("cert_store", firmware_kobj);
+	if (!cert_store_kobj)
+		goto cleanup_dbf;
+
+	rc = sysfs_create_files(cert_store_kobj, cert_store_attrs);
+	if (rc)
+		goto cleanup_kobj;
+
+	register_key_type(&key_type_cert_store_key);
+
+	return rc;
+
+cleanup_kobj:
+	kobject_put(cert_store_kobj);
+cleanup_dbf:
+	debug_unregister(cert_store_dbf);
+	debug_unregister(cert_store_hexdump);
+
+	return rc;
+}
+device_initcall(cert_store_init);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 82079f2d8583..cca56b649ca3 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -50,6 +50,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
 	[DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
 	[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
 	[DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
+	[DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
 	[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
 };
 
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index f480d6c7fd39..fdc8668f3fba 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -55,6 +55,7 @@ static void __init sclp_early_facilities_detect(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST;
 	if (sccb->cpuoff > 134) {
 		sclp.has_diag318 = !!(sccb->byte_134 & 0x80);
+		sclp.has_diag320 = !!(sccb->byte_134 & 0x04);
 		sclp.has_iplcc = !!(sccb->byte_134 & 0x02);
 	}
 	if (sccb->cpuoff > 137) {

From c83cd4fe31d52bca0587370d9e98f00072aefa27 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Sun, 2 Jul 2023 21:20:09 +0200
Subject: [PATCH 0476/2047] s390/diag: handle diag 204 subcode 4 address
 correctly

Diagnose 204 subcode 4 requires a real (physical) address, but a
virtual address is passed to the inline assembly.

Convert the address to a physical address for only this specific case.

Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/diag.h | 2 ++
 arch/s390/kernel/diag.c      | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index fb5a886ff47f..bed804137537 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -109,6 +109,8 @@ enum diag204_sc {
 	DIAG204_SUBC_STIB7 = 7
 };
 
+#define DIAG204_SUBCODE_MASK 0xffff
+
 /* The two available diag 204 data formats */
 enum diag204_format {
 	DIAG204_INFO_SIMPLE = 0,
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index cca56b649ca3..f3a0f39cbd6c 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -171,6 +171,8 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad
 int diag204(unsigned long subcode, unsigned long size, void *addr)
 {
 	diag_stat_inc(DIAG_STAT_X204);
+	if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4)
+		addr = (void *)__pa(addr);
 	size = __diag204(&subcode, size, addr);
 	if (subcode)
 		return -1;

From 86e74965bbdf534b9c7f1f678b963492de41276e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 3 Jul 2023 14:34:25 +0200
Subject: [PATCH 0477/2047] s390/sthyi: enforce 4k alignment of vmalloc'ed area

vmalloc() does not guarantee any alignment, unless it is explicitly
requested with e.g. __vmalloc_node(). Using diag204() with subcode 7
requires a 4k aligned virtual buffer. Therefore switch to __vmalloc_node().

Note: with the current vmalloc() implementation callers would still get a
4k aligned area, even though this is quite non-obvious looking at the
code. So changing this in sthyi doesn't fix a real bug. It is just to make
sure the code will not suffer from some obscure options, like it happened
in the past with kmalloc() where debug options changed the assumed
alignment of allocated memory areas.

Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/sthyi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 4d141e2c132e..98ebedbb5761 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -317,7 +317,9 @@ static void fill_diag(struct sthyi_sctns *sctns)
 	if (pages <= 0)
 		return;
 
-	diag204_buf = vmalloc(array_size(pages, PAGE_SIZE));
+	diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE),
+				     PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+				     __builtin_return_address(0));
 	if (!diag204_buf)
 		return;
 

From 5ac8c72462cdad56e37981eb2172c5baa1ea40d6 Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.ibm.com>
Date: Wed, 28 Jun 2023 12:36:08 +0200
Subject: [PATCH 0478/2047] s390/zcrypt: remove CEX2 and CEX3 device drivers

Remove the legacy device driver code for CEX2 and CEX3 cards.

The last machines which are able to handle CEX2 crypto cards
are z10 EC first available 2008 and z10 BC first available 2009.
The last machines able to handle a CEX3 crypto card are
z196 first available 2010 and z114 first available 2011.

Please note that this does not imply to drop CEX2 and CEX3
support in general. With older kernels on hardware up to the
aforementioned machine models these crypto cards will get
support by IBM.

The removal of the CEX2 and CEX3 device drivers code opens up
some simplifications, for example support for crypto cards
without rng support can be removed also.

Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/crypto/Kconfig                 |   7 +-
 drivers/s390/crypto/Makefile           |   2 +-
 drivers/s390/crypto/ap_bus.c           |  25 +-
 drivers/s390/crypto/ap_bus.h           |  19 +-
 drivers/s390/crypto/ap_queue.c         |  47 +--
 drivers/s390/crypto/zcrypt_cex2a.c     | 227 -------------
 drivers/s390/crypto/zcrypt_cex2a.h     | 134 --------
 drivers/s390/crypto/zcrypt_cex2c.c     | 421 -------------------------
 drivers/s390/crypto/zcrypt_cex2c.h     |  18 --
 drivers/s390/crypto/zcrypt_msgtype50.c |  64 ++--
 drivers/s390/crypto/zcrypt_msgtype50.h |   3 +-
 drivers/s390/crypto/zcrypt_msgtype6.c  |  14 +-
 12 files changed, 42 insertions(+), 939 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 44e44b8d9ce6..c761952f0dc6 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -70,10 +70,9 @@ config ZCRYPT
 	select HW_RANDOM
 	help
 	  Select this option if you want to enable support for
-	  s390 cryptographic adapters like:
-	  + Crypto Express 2 up to 7 Coprocessor (CEXxC)
-	  + Crypto Express 2 up to 7 Accelerator (CEXxA)
-	  + Crypto Express 4 up to 7 EP11 Coprocessor (CEXxP)
+	  s390 cryptographic adapters like Crypto Express 4 up
+	  to 8 in Coprocessor (CEXxC), EP11 Coprocessor (CEXxP)
+	  or Accelerator (CEXxA) mode.
 
 config ZCRYPT_DEBUG
 	bool "Enable debug features for s390 cryptographic adapters"
diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile
index 22d2db690cd3..0edacd101c12 100644
--- a/drivers/s390/crypto/Makefile
+++ b/drivers/s390/crypto/Makefile
@@ -11,7 +11,7 @@ zcrypt-objs += zcrypt_msgtype6.o zcrypt_msgtype50.o
 zcrypt-objs += zcrypt_ccamisc.o zcrypt_ep11misc.o
 obj-$(CONFIG_ZCRYPT) += zcrypt.o
 # adapter drivers depend on ap.o and zcrypt.o
-obj-$(CONFIG_ZCRYPT) += zcrypt_cex2c.o zcrypt_cex2a.o zcrypt_cex4.o
+obj-$(CONFIG_ZCRYPT) += zcrypt_cex4.o
 
 # pkey kernel module
 pkey-objs := pkey_api.o
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 420120be300f..b1d2fedea086 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- * Copyright IBM Corp. 2006, 2021
+ * Copyright IBM Corp. 2006, 2023
  * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
  *	      Martin Schwidefsky <schwidefsky@de.ibm.com>
  *	      Ralph Wuerthner <rwuerthn@de.ibm.com>
@@ -387,23 +387,6 @@ static int ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac,
 		*q_ml = tapq_info.ml;
 		*q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED;
 		*q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED;
-		switch (*q_type) {
-			/* For CEX2 and CEX3 the available functions
-			 * are not reflected by the facilities bits.
-			 * Instead it is coded into the type. So here
-			 * modify the function bits based on the type.
-			 */
-		case AP_DEVICE_TYPE_CEX2A:
-		case AP_DEVICE_TYPE_CEX3A:
-			*q_fac |= 0x08000000;
-			break;
-		case AP_DEVICE_TYPE_CEX2C:
-		case AP_DEVICE_TYPE_CEX3C:
-			*q_fac |= 0x10000000;
-			break;
-		default:
-			break;
-		}
 		return 1;
 	default:
 		/*
@@ -1678,8 +1661,8 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func)
 {
 	int comp_type = 0;
 
-	/* < CEX2A is not supported */
-	if (rawtype < AP_DEVICE_TYPE_CEX2A) {
+	/* < CEX4 is not supported */
+	if (rawtype < AP_DEVICE_TYPE_CEX4) {
 		AP_DBF_WARN("%s queue=%02x.%04x unsupported type %d\n",
 			    __func__, AP_QID_CARD(qid),
 			    AP_QID_QUEUE(qid), rawtype);
@@ -1701,7 +1684,7 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func)
 		apinfo.cat = AP_DEVICE_TYPE_CEX8;
 		status = ap_qact(qid, 0, &apinfo);
 		if (status.response_code == AP_RESPONSE_NORMAL &&
-		    apinfo.cat >= AP_DEVICE_TYPE_CEX2A &&
+		    apinfo.cat >= AP_DEVICE_TYPE_CEX4 &&
 		    apinfo.cat <= AP_DEVICE_TYPE_CEX8)
 			comp_type = apinfo.cat;
 	}
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 0d7b7eb374ad..47bbe9babc59 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ */
 /*
- * Copyright IBM Corp. 2006, 2019
+ * Copyright IBM Corp. 2006, 2023
  * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
  *	      Martin Schwidefsky <schwidefsky@de.ibm.com>
  *	      Ralph Wuerthner <rwuerthn@de.ibm.com>
@@ -67,15 +67,8 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
 #define AP_RESPONSE_INVALID_DOMAIN	     0x42
 
 /*
- * Known device types
+ * Supported AP device types
  */
-#define AP_DEVICE_TYPE_PCICC	3
-#define AP_DEVICE_TYPE_PCICA	4
-#define AP_DEVICE_TYPE_PCIXCC	5
-#define AP_DEVICE_TYPE_CEX2A	6
-#define AP_DEVICE_TYPE_CEX2C	7
-#define AP_DEVICE_TYPE_CEX3A	8
-#define AP_DEVICE_TYPE_CEX3C	9
 #define AP_DEVICE_TYPE_CEX4	10
 #define AP_DEVICE_TYPE_CEX5	11
 #define AP_DEVICE_TYPE_CEX6	12
@@ -272,14 +265,6 @@ static inline void ap_release_message(struct ap_message *ap_msg)
 	kfree_sensitive(ap_msg->private);
 }
 
-/*
- * Note: don't use ap_send/ap_recv after using ap_queue_message
- * for the first time. Otherwise the ap message queue will get
- * confused.
- */
-int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t msglen);
-int ap_recv(ap_qid_t qid, unsigned long *psmid, void *msg, size_t msglen);
-
 enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event);
 enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event);
 
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 30df83735adf..1336e632adc4 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright IBM Corp. 2016
+ * Copyright IBM Corp. 2016, 2023
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  *
  * Adjunct processor bus, queue related code.
@@ -93,51 +93,6 @@ __ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t msglen,
 	return ap_nqap(qid, psmid, msg, msglen);
 }
 
-int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t msglen)
-{
-	struct ap_queue_status status;
-
-	status = __ap_send(qid, psmid, msg, msglen, 0);
-	if (status.async)
-		return -EPERM;
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		return 0;
-	case AP_RESPONSE_Q_FULL:
-	case AP_RESPONSE_RESET_IN_PROGRESS:
-		return -EBUSY;
-	case AP_RESPONSE_REQ_FAC_NOT_INST:
-		return -EINVAL;
-	default:	/* Device is gone. */
-		return -ENODEV;
-	}
-}
-EXPORT_SYMBOL(ap_send);
-
-int ap_recv(ap_qid_t qid, unsigned long *psmid, void *msg, size_t msglen)
-{
-	struct ap_queue_status status;
-
-	if (!msg)
-		return -EINVAL;
-	status = ap_dqap(qid, psmid, msg, msglen, NULL, NULL, NULL);
-	if (status.async)
-		return -EPERM;
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		return 0;
-	case AP_RESPONSE_NO_PENDING_REPLY:
-		if (status.queue_empty)
-			return -ENOENT;
-		return -EBUSY;
-	case AP_RESPONSE_RESET_IN_PROGRESS:
-		return -EBUSY;
-	default:
-		return -ENODEV;
-	}
-}
-EXPORT_SYMBOL(ap_recv);
-
 /* State machine definitions and helpers */
 
 static enum ap_sm_wait ap_sm_nop(struct ap_queue *aq)
diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c
index 83f692c9c197..e69de29bb2d1 100644
--- a/drivers/s390/crypto/zcrypt_cex2a.c
+++ b/drivers/s390/crypto/zcrypt_cex2a.c
@@ -1,227 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- *  Copyright IBM Corp. 2001, 2012
- *  Author(s): Robert Burroughs
- *	       Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
- *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
- *  MSGTYPE restruct:		  Holger Dengler <hd@linux.vnet.ibm.com>
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/atomic.h>
-#include <linux/uaccess.h>
-#include <linux/mod_devicetable.h>
-
-#include "ap_bus.h"
-#include "zcrypt_api.h"
-#include "zcrypt_error.h"
-#include "zcrypt_cex2a.h"
-#include "zcrypt_msgtype50.h"
-
-#define CEX2A_MIN_MOD_SIZE	  1	/*    8 bits	*/
-#define CEX2A_MAX_MOD_SIZE	256	/* 2048 bits	*/
-#define CEX3A_MIN_MOD_SIZE	CEX2A_MIN_MOD_SIZE
-#define CEX3A_MAX_MOD_SIZE	512	/* 4096 bits	*/
-
-#define CEX2A_MAX_MESSAGE_SIZE	0x390	/* sizeof(struct type50_crb2_msg)    */
-#define CEX2A_MAX_RESPONSE_SIZE 0x110	/* max outputdatalength + type80_hdr */
-
-#define CEX3A_MAX_RESPONSE_SIZE	0x210	/* 512 bit modulus
-					 * (max outputdatalength) +
-					 * type80_hdr
-					 */
-#define CEX3A_MAX_MESSAGE_SIZE	sizeof(struct type50_crb3_msg)
-
-#define CEX2A_CLEANUP_TIME	(15 * HZ)
-#define CEX3A_CLEANUP_TIME	CEX2A_CLEANUP_TIME
-
-MODULE_AUTHOR("IBM Corporation");
-MODULE_DESCRIPTION("CEX2A/CEX3A Cryptographic Coprocessor device driver, " \
-		   "Copyright IBM Corp. 2001, 2018");
-MODULE_LICENSE("GPL");
-
-static struct ap_device_id zcrypt_cex2a_card_ids[] = {
-	{ .dev_type = AP_DEVICE_TYPE_CEX2A,
-	  .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
-	{ .dev_type = AP_DEVICE_TYPE_CEX3A,
-	  .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
-	{ /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_card_ids);
-
-static struct ap_device_id zcrypt_cex2a_queue_ids[] = {
-	{ .dev_type = AP_DEVICE_TYPE_CEX2A,
-	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
-	{ .dev_type = AP_DEVICE_TYPE_CEX3A,
-	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
-	{ /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_queue_ids);
-
-/*
- * Probe function for CEX2A card devices. It always accepts the AP device
- * since the bus_match already checked the card type.
- * @ap_dev: pointer to the AP device.
- */
-static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev)
-{
-	/*
-	 * Normalized speed ratings per crypto adapter
-	 * MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY
-	 */
-	static const int CEX2A_SPEED_IDX[] = {
-		800, 1000, 2000,  900, 1200, 2400, 0, 0};
-	static const int CEX3A_SPEED_IDX[] = {
-		400,  500, 1000,  450,	550, 1200, 0, 0};
-
-	struct ap_card *ac = to_ap_card(&ap_dev->device);
-	struct zcrypt_card *zc;
-	int rc = 0;
-
-	zc = zcrypt_card_alloc();
-	if (!zc)
-		return -ENOMEM;
-	zc->card = ac;
-	dev_set_drvdata(&ap_dev->device, zc);
-
-	if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX2A) {
-		zc->min_mod_size = CEX2A_MIN_MOD_SIZE;
-		zc->max_mod_size = CEX2A_MAX_MOD_SIZE;
-		zc->speed_rating = CEX2A_SPEED_IDX;
-		zc->max_exp_bit_length = CEX2A_MAX_MOD_SIZE;
-		zc->type_string = "CEX2A";
-		zc->user_space_type = ZCRYPT_CEX2A;
-	} else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX3A) {
-		zc->min_mod_size = CEX2A_MIN_MOD_SIZE;
-		zc->max_mod_size = CEX2A_MAX_MOD_SIZE;
-		zc->max_exp_bit_length = CEX2A_MAX_MOD_SIZE;
-		if (ap_test_bit(&ac->functions, AP_FUNC_MEX4K) &&
-		    ap_test_bit(&ac->functions, AP_FUNC_CRT4K)) {
-			zc->max_mod_size = CEX3A_MAX_MOD_SIZE;
-			zc->max_exp_bit_length = CEX3A_MAX_MOD_SIZE;
-		}
-		zc->speed_rating = CEX3A_SPEED_IDX;
-		zc->type_string = "CEX3A";
-		zc->user_space_type = ZCRYPT_CEX3A;
-	} else {
-		zcrypt_card_free(zc);
-		return -ENODEV;
-	}
-	zc->online = 1;
-
-	rc = zcrypt_card_register(zc);
-	if (rc)
-		zcrypt_card_free(zc);
-
-	return rc;
-}
-
-/*
- * This is called to remove the CEX2A card driver information
- * if an AP card device is removed.
- */
-static void zcrypt_cex2a_card_remove(struct ap_device *ap_dev)
-{
-	struct zcrypt_card *zc = dev_get_drvdata(&ap_dev->device);
-
-	zcrypt_card_unregister(zc);
-}
-
-static struct ap_driver zcrypt_cex2a_card_driver = {
-	.probe = zcrypt_cex2a_card_probe,
-	.remove = zcrypt_cex2a_card_remove,
-	.ids = zcrypt_cex2a_card_ids,
-	.flags = AP_DRIVER_FLAG_DEFAULT,
-};
-
-/*
- * Probe function for CEX2A queue devices. It always accepts the AP device
- * since the bus_match already checked the queue type.
- * @ap_dev: pointer to the AP device.
- */
-static int zcrypt_cex2a_queue_probe(struct ap_device *ap_dev)
-{
-	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
-	struct zcrypt_queue *zq = NULL;
-	int rc;
-
-	switch (ap_dev->device_type) {
-	case AP_DEVICE_TYPE_CEX2A:
-		zq = zcrypt_queue_alloc(CEX2A_MAX_RESPONSE_SIZE);
-		if (!zq)
-			return -ENOMEM;
-		break;
-	case AP_DEVICE_TYPE_CEX3A:
-		zq = zcrypt_queue_alloc(CEX3A_MAX_RESPONSE_SIZE);
-		if (!zq)
-			return -ENOMEM;
-		break;
-	}
-	if (!zq)
-		return -ENODEV;
-	zq->ops = zcrypt_msgtype(MSGTYPE50_NAME, MSGTYPE50_VARIANT_DEFAULT);
-	zq->queue = aq;
-	zq->online = 1;
-	atomic_set(&zq->load, 0);
-	ap_queue_init_state(aq);
-	ap_queue_init_reply(aq, &zq->reply);
-	aq->request_timeout = CEX2A_CLEANUP_TIME;
-	dev_set_drvdata(&ap_dev->device, zq);
-	rc = zcrypt_queue_register(zq);
-	if (rc)
-		zcrypt_queue_free(zq);
-
-	return rc;
-}
-
-/*
- * This is called to remove the CEX2A queue driver information
- * if an AP queue device is removed.
- */
-static void zcrypt_cex2a_queue_remove(struct ap_device *ap_dev)
-{
-	struct zcrypt_queue *zq = dev_get_drvdata(&ap_dev->device);
-
-	zcrypt_queue_unregister(zq);
-}
-
-static struct ap_driver zcrypt_cex2a_queue_driver = {
-	.probe = zcrypt_cex2a_queue_probe,
-	.remove = zcrypt_cex2a_queue_remove,
-	.ids = zcrypt_cex2a_queue_ids,
-	.flags = AP_DRIVER_FLAG_DEFAULT,
-};
-
-int __init zcrypt_cex2a_init(void)
-{
-	int rc;
-
-	rc = ap_driver_register(&zcrypt_cex2a_card_driver,
-				THIS_MODULE, "cex2acard");
-	if (rc)
-		return rc;
-
-	rc = ap_driver_register(&zcrypt_cex2a_queue_driver,
-				THIS_MODULE, "cex2aqueue");
-	if (rc)
-		ap_driver_unregister(&zcrypt_cex2a_card_driver);
-
-	return rc;
-}
-
-void __exit zcrypt_cex2a_exit(void)
-{
-	ap_driver_unregister(&zcrypt_cex2a_queue_driver);
-	ap_driver_unregister(&zcrypt_cex2a_card_driver);
-}
-
-module_init(zcrypt_cex2a_init);
-module_exit(zcrypt_cex2a_exit);
diff --git a/drivers/s390/crypto/zcrypt_cex2a.h b/drivers/s390/crypto/zcrypt_cex2a.h
index 7842214d9d09..e69de29bb2d1 100644
--- a/drivers/s390/crypto/zcrypt_cex2a.h
+++ b/drivers/s390/crypto/zcrypt_cex2a.h
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- *  Copyright IBM Corp. 2001, 2006
- *  Author(s): Robert Burroughs
- *	       Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-
-#ifndef _ZCRYPT_CEX2A_H_
-#define _ZCRYPT_CEX2A_H_
-
-/**
- * The type 50 message family is associated with CEXxA cards.
- *
- * The four members of the family are described below.
- *
- * Note that all unsigned char arrays are right-justified and left-padded
- * with zeroes.
- *
- * Note that all reserved fields must be zeroes.
- */
-struct type50_hdr {
-	unsigned char	reserved1;
-	unsigned char	msg_type_code;	/* 0x50 */
-	unsigned short	msg_len;
-	unsigned char	reserved2;
-	unsigned char	ignored;
-	unsigned short	reserved3;
-} __packed;
-
-#define TYPE50_TYPE_CODE	0x50
-
-#define TYPE50_MEB1_FMT		0x0001
-#define TYPE50_MEB2_FMT		0x0002
-#define TYPE50_MEB3_FMT		0x0003
-#define TYPE50_CRB1_FMT		0x0011
-#define TYPE50_CRB2_FMT		0x0012
-#define TYPE50_CRB3_FMT		0x0013
-
-/* Mod-Exp, with a small modulus */
-struct type50_meb1_msg {
-	struct type50_hdr header;
-	unsigned short	keyblock_type;	/* 0x0001 */
-	unsigned char	reserved[6];
-	unsigned char	exponent[128];
-	unsigned char	modulus[128];
-	unsigned char	message[128];
-} __packed;
-
-/* Mod-Exp, with a large modulus */
-struct type50_meb2_msg {
-	struct type50_hdr header;
-	unsigned short	keyblock_type;	/* 0x0002 */
-	unsigned char	reserved[6];
-	unsigned char	exponent[256];
-	unsigned char	modulus[256];
-	unsigned char	message[256];
-} __packed;
-
-/* Mod-Exp, with a larger modulus */
-struct type50_meb3_msg {
-	struct type50_hdr header;
-	unsigned short	keyblock_type;	/* 0x0003 */
-	unsigned char	reserved[6];
-	unsigned char	exponent[512];
-	unsigned char	modulus[512];
-	unsigned char	message[512];
-} __packed;
-
-/* CRT, with a small modulus */
-struct type50_crb1_msg {
-	struct type50_hdr header;
-	unsigned short	keyblock_type;	/* 0x0011 */
-	unsigned char	reserved[6];
-	unsigned char	p[64];
-	unsigned char	q[64];
-	unsigned char	dp[64];
-	unsigned char	dq[64];
-	unsigned char	u[64];
-	unsigned char	message[128];
-} __packed;
-
-/* CRT, with a large modulus */
-struct type50_crb2_msg {
-	struct type50_hdr header;
-	unsigned short	keyblock_type;	/* 0x0012 */
-	unsigned char	reserved[6];
-	unsigned char	p[128];
-	unsigned char	q[128];
-	unsigned char	dp[128];
-	unsigned char	dq[128];
-	unsigned char	u[128];
-	unsigned char	message[256];
-} __packed;
-
-/* CRT, with a larger modulus */
-struct type50_crb3_msg {
-	struct type50_hdr header;
-	unsigned short	keyblock_type;	/* 0x0013 */
-	unsigned char	reserved[6];
-	unsigned char	p[256];
-	unsigned char	q[256];
-	unsigned char	dp[256];
-	unsigned char	dq[256];
-	unsigned char	u[256];
-	unsigned char	message[512];
-} __packed;
-
-/**
- * The type 80 response family is associated with a CEXxA cards.
- *
- * Note that all unsigned char arrays are right-justified and left-padded
- * with zeroes.
- *
- * Note that all reserved fields must be zeroes.
- */
-
-#define TYPE80_RSP_CODE 0x80
-
-struct type80_hdr {
-	unsigned char	reserved1;
-	unsigned char	type;		/* 0x80 */
-	unsigned short	len;
-	unsigned char	code;		/* 0x00 */
-	unsigned char	reserved2[3];
-	unsigned char	reserved3[8];
-} __packed;
-
-int zcrypt_cex2a_init(void);
-void zcrypt_cex2a_exit(void);
-
-#endif /* _ZCRYPT_CEX2A_H_ */
diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c
index 251b5bd3d19c..e69de29bb2d1 100644
--- a/drivers/s390/crypto/zcrypt_cex2c.c
+++ b/drivers/s390/crypto/zcrypt_cex2c.c
@@ -1,421 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- *  Copyright IBM Corp. 2001, 2018
- *  Author(s): Robert Burroughs
- *	       Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
- *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
- *  MSGTYPE restruct:		  Holger Dengler <hd@linux.vnet.ibm.com>
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/atomic.h>
-#include <linux/uaccess.h>
-#include <linux/mod_devicetable.h>
-
-#include "ap_bus.h"
-#include "zcrypt_api.h"
-#include "zcrypt_error.h"
-#include "zcrypt_msgtype6.h"
-#include "zcrypt_cex2c.h"
-#include "zcrypt_cca_key.h"
-#include "zcrypt_ccamisc.h"
-
-#define CEX2C_MIN_MOD_SIZE	 16	/*  128 bits	*/
-#define CEX2C_MAX_MOD_SIZE	256	/* 2048 bits	*/
-#define CEX3C_MIN_MOD_SIZE	 16	/*  128 bits	*/
-#define CEX3C_MAX_MOD_SIZE	512	/* 4096 bits	*/
-#define CEX2C_MAX_XCRB_MESSAGE_SIZE (12 * 1024)
-#define CEX2C_CLEANUP_TIME	(15 * HZ)
-
-MODULE_AUTHOR("IBM Corporation");
-MODULE_DESCRIPTION("CEX2C/CEX3C Cryptographic Coprocessor device driver, " \
-		   "Copyright IBM Corp. 2001, 2018");
-MODULE_LICENSE("GPL");
-
-static struct ap_device_id zcrypt_cex2c_card_ids[] = {
-	{ .dev_type = AP_DEVICE_TYPE_CEX2C,
-	  .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
-	{ .dev_type = AP_DEVICE_TYPE_CEX3C,
-	  .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
-	{ /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_cex2c_card_ids);
-
-static struct ap_device_id zcrypt_cex2c_queue_ids[] = {
-	{ .dev_type = AP_DEVICE_TYPE_CEX2C,
-	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
-	{ .dev_type = AP_DEVICE_TYPE_CEX3C,
-	  .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
-	{ /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_cex2c_queue_ids);
-
-/*
- * CCA card additional device attributes
- */
-static ssize_t cca_serialnr_show(struct device *dev,
-				 struct device_attribute *attr,
-				 char *buf)
-{
-	struct zcrypt_card *zc = dev_get_drvdata(dev);
-	struct cca_info ci;
-	struct ap_card *ac = to_ap_card(dev);
-
-	memset(&ci, 0, sizeof(ci));
-
-	if (ap_domain_index >= 0)
-		cca_get_info(ac->id, ap_domain_index, &ci, zc->online);
-
-	return sysfs_emit(buf, "%s\n", ci.serial);
-}
-
-static struct device_attribute dev_attr_cca_serialnr =
-	__ATTR(serialnr, 0444, cca_serialnr_show, NULL);
-
-static struct attribute *cca_card_attrs[] = {
-	&dev_attr_cca_serialnr.attr,
-	NULL,
-};
-
-static const struct attribute_group cca_card_attr_grp = {
-	.attrs = cca_card_attrs,
-};
-
- /*
-  * CCA queue additional device attributes
-  */
-static ssize_t cca_mkvps_show(struct device *dev,
-			      struct device_attribute *attr,
-			      char *buf)
-{
-	struct zcrypt_queue *zq = dev_get_drvdata(dev);
-	int n = 0;
-	struct cca_info ci;
-	static const char * const cao_state[] = { "invalid", "valid" };
-	static const char * const new_state[] = { "empty", "partial", "full" };
-
-	memset(&ci, 0, sizeof(ci));
-
-	cca_get_info(AP_QID_CARD(zq->queue->qid),
-		     AP_QID_QUEUE(zq->queue->qid),
-		     &ci, zq->online);
-
-	if (ci.new_aes_mk_state >= '1' && ci.new_aes_mk_state <= '3')
-		n = sysfs_emit(buf, "AES NEW: %s 0x%016llx\n",
-			       new_state[ci.new_aes_mk_state - '1'],
-			       ci.new_aes_mkvp);
-	else
-		n = sysfs_emit(buf, "AES NEW: - -\n");
-
-	if (ci.cur_aes_mk_state >= '1' && ci.cur_aes_mk_state <= '2')
-		n += sysfs_emit_at(buf, n, "AES CUR: %s 0x%016llx\n",
-				   cao_state[ci.cur_aes_mk_state - '1'],
-				   ci.cur_aes_mkvp);
-	else
-		n += sysfs_emit_at(buf, n, "AES CUR: - -\n");
-
-	if (ci.old_aes_mk_state >= '1' && ci.old_aes_mk_state <= '2')
-		n += sysfs_emit_at(buf, n, "AES OLD: %s 0x%016llx\n",
-				   cao_state[ci.old_aes_mk_state - '1'],
-				   ci.old_aes_mkvp);
-	else
-		n += sysfs_emit_at(buf, n, "AES OLD: - -\n");
-
-	if (ci.new_apka_mk_state >= '1' && ci.new_apka_mk_state <= '3')
-		n += sysfs_emit_at(buf, n, "APKA NEW: %s 0x%016llx\n",
-				   new_state[ci.new_apka_mk_state - '1'],
-				   ci.new_apka_mkvp);
-	else
-		n += sysfs_emit_at(buf, n, "APKA NEW: - -\n");
-
-	if (ci.cur_apka_mk_state >= '1' && ci.cur_apka_mk_state <= '2')
-		n += sysfs_emit_at(buf, n, "APKA CUR: %s 0x%016llx\n",
-				   cao_state[ci.cur_apka_mk_state - '1'],
-				   ci.cur_apka_mkvp);
-	else
-		n += sysfs_emit_at(buf, n, "APKA CUR: - -\n");
-
-	if (ci.old_apka_mk_state >= '1' && ci.old_apka_mk_state <= '2')
-		n += sysfs_emit_at(buf, n, "APKA OLD: %s 0x%016llx\n",
-				   cao_state[ci.old_apka_mk_state - '1'],
-				   ci.old_apka_mkvp);
-	else
-		n += sysfs_emit_at(buf, n, "APKA OLD: - -\n");
-
-	return n;
-}
-
-static struct device_attribute dev_attr_cca_mkvps =
-	__ATTR(mkvps, 0444, cca_mkvps_show, NULL);
-
-static struct attribute *cca_queue_attrs[] = {
-	&dev_attr_cca_mkvps.attr,
-	NULL,
-};
-
-static const struct attribute_group cca_queue_attr_grp = {
-	.attrs = cca_queue_attrs,
-};
-
-/*
- * Large random number detection function. Its sends a message to a CEX2C/CEX3C
- * card to find out if large random numbers are supported.
- * @ap_dev: pointer to the AP device.
- *
- * Returns 1 if large random numbers are supported, 0 if not and < 0 on error.
- */
-static int zcrypt_cex2c_rng_supported(struct ap_queue *aq)
-{
-	struct ap_message ap_msg;
-	unsigned long psmid;
-	unsigned int domain;
-	struct {
-		struct type86_hdr hdr;
-		struct type86_fmt2_ext fmt2;
-		struct CPRBX cprbx;
-	} __packed *reply;
-	struct {
-		struct type6_hdr hdr;
-		struct CPRBX cprbx;
-		char function_code[2];
-		short int rule_length;
-		char rule[8];
-		short int verb_length;
-		short int key_length;
-	} __packed *msg;
-	int rc, i;
-
-	ap_init_message(&ap_msg);
-	ap_msg.msg = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!ap_msg.msg)
-		return -ENOMEM;
-	ap_msg.bufsize = PAGE_SIZE;
-
-	rng_type6cprb_msgx(&ap_msg, 4, &domain);
-
-	msg = ap_msg.msg;
-	msg->cprbx.domain = AP_QID_QUEUE(aq->qid);
-
-	rc = ap_send(aq->qid, 0x0102030405060708UL, ap_msg.msg, ap_msg.len);
-	if (rc)
-		goto out_free;
-
-	/* Wait for the test message to complete. */
-	for (i = 0; i < 2 * HZ; i++) {
-		msleep(1000 / HZ);
-		rc = ap_recv(aq->qid, &psmid, ap_msg.msg, ap_msg.bufsize);
-		if (rc == 0 && psmid == 0x0102030405060708UL)
-			break;
-	}
-
-	if (i >= 2 * HZ) {
-		/* Got no answer. */
-		rc = -ENODEV;
-		goto out_free;
-	}
-
-	reply = ap_msg.msg;
-	if (reply->cprbx.ccp_rtcode == 0 && reply->cprbx.ccp_rscode == 0)
-		rc = 1;
-	else
-		rc = 0;
-out_free:
-	free_page((unsigned long)ap_msg.msg);
-	return rc;
-}
-
-/*
- * Probe function for CEX2C/CEX3C card devices. It always accepts the
- * AP device since the bus_match already checked the hardware type.
- * @ap_dev: pointer to the AP card device.
- */
-static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev)
-{
-	/*
-	 * Normalized speed ratings per crypto adapter
-	 * MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY
-	 */
-	static const int CEX2C_SPEED_IDX[] = {
-		1000, 1400, 2400, 1100, 1500, 2600, 100, 12};
-	static const int CEX3C_SPEED_IDX[] = {
-		500,  700, 1400,  550,	800, 1500,  80, 10};
-
-	struct ap_card *ac = to_ap_card(&ap_dev->device);
-	struct zcrypt_card *zc;
-	int rc = 0;
-
-	zc = zcrypt_card_alloc();
-	if (!zc)
-		return -ENOMEM;
-	zc->card = ac;
-	dev_set_drvdata(&ap_dev->device, zc);
-	switch (ac->ap_dev.device_type) {
-	case AP_DEVICE_TYPE_CEX2C:
-		zc->user_space_type = ZCRYPT_CEX2C;
-		zc->type_string = "CEX2C";
-		zc->speed_rating = CEX2C_SPEED_IDX;
-		zc->min_mod_size = CEX2C_MIN_MOD_SIZE;
-		zc->max_mod_size = CEX2C_MAX_MOD_SIZE;
-		zc->max_exp_bit_length = CEX2C_MAX_MOD_SIZE;
-		break;
-	case AP_DEVICE_TYPE_CEX3C:
-		zc->user_space_type = ZCRYPT_CEX3C;
-		zc->type_string = "CEX3C";
-		zc->speed_rating = CEX3C_SPEED_IDX;
-		zc->min_mod_size = CEX3C_MIN_MOD_SIZE;
-		zc->max_mod_size = CEX3C_MAX_MOD_SIZE;
-		zc->max_exp_bit_length = CEX3C_MAX_MOD_SIZE;
-		break;
-	default:
-		zcrypt_card_free(zc);
-		return -ENODEV;
-	}
-	zc->online = 1;
-
-	rc = zcrypt_card_register(zc);
-	if (rc) {
-		zcrypt_card_free(zc);
-		return rc;
-	}
-
-	if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) {
-		rc = sysfs_create_group(&ap_dev->device.kobj,
-					&cca_card_attr_grp);
-		if (rc) {
-			zcrypt_card_unregister(zc);
-			zcrypt_card_free(zc);
-		}
-	}
-
-	return rc;
-}
-
-/*
- * This is called to remove the CEX2C/CEX3C card driver information
- * if an AP card device is removed.
- */
-static void zcrypt_cex2c_card_remove(struct ap_device *ap_dev)
-{
-	struct zcrypt_card *zc = dev_get_drvdata(&ap_dev->device);
-	struct ap_card *ac = to_ap_card(&ap_dev->device);
-
-	if (ap_test_bit(&ac->functions, AP_FUNC_COPRO))
-		sysfs_remove_group(&ap_dev->device.kobj, &cca_card_attr_grp);
-
-	zcrypt_card_unregister(zc);
-}
-
-static struct ap_driver zcrypt_cex2c_card_driver = {
-	.probe = zcrypt_cex2c_card_probe,
-	.remove = zcrypt_cex2c_card_remove,
-	.ids = zcrypt_cex2c_card_ids,
-	.flags = AP_DRIVER_FLAG_DEFAULT,
-};
-
-/*
- * Probe function for CEX2C/CEX3C queue devices. It always accepts the
- * AP device since the bus_match already checked the hardware type.
- * @ap_dev: pointer to the AP card device.
- */
-static int zcrypt_cex2c_queue_probe(struct ap_device *ap_dev)
-{
-	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
-	struct zcrypt_queue *zq;
-	int rc;
-
-	zq = zcrypt_queue_alloc(CEX2C_MAX_XCRB_MESSAGE_SIZE);
-	if (!zq)
-		return -ENOMEM;
-	zq->queue = aq;
-	zq->online = 1;
-	atomic_set(&zq->load, 0);
-	ap_rapq(aq->qid, 0);
-	rc = zcrypt_cex2c_rng_supported(aq);
-	if (rc < 0) {
-		zcrypt_queue_free(zq);
-		return rc;
-	}
-	if (rc)
-		zq->ops = zcrypt_msgtype(MSGTYPE06_NAME,
-					 MSGTYPE06_VARIANT_DEFAULT);
-	else
-		zq->ops = zcrypt_msgtype(MSGTYPE06_NAME,
-					 MSGTYPE06_VARIANT_NORNG);
-	ap_queue_init_state(aq);
-	ap_queue_init_reply(aq, &zq->reply);
-	aq->request_timeout = CEX2C_CLEANUP_TIME;
-	dev_set_drvdata(&ap_dev->device, zq);
-	rc = zcrypt_queue_register(zq);
-	if (rc) {
-		zcrypt_queue_free(zq);
-		return rc;
-	}
-
-	if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO)) {
-		rc = sysfs_create_group(&ap_dev->device.kobj,
-					&cca_queue_attr_grp);
-		if (rc) {
-			zcrypt_queue_unregister(zq);
-			zcrypt_queue_free(zq);
-		}
-	}
-
-	return rc;
-}
-
-/*
- * This is called to remove the CEX2C/CEX3C queue driver information
- * if an AP queue device is removed.
- */
-static void zcrypt_cex2c_queue_remove(struct ap_device *ap_dev)
-{
-	struct zcrypt_queue *zq = dev_get_drvdata(&ap_dev->device);
-	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
-
-	if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO))
-		sysfs_remove_group(&ap_dev->device.kobj, &cca_queue_attr_grp);
-
-	zcrypt_queue_unregister(zq);
-}
-
-static struct ap_driver zcrypt_cex2c_queue_driver = {
-	.probe = zcrypt_cex2c_queue_probe,
-	.remove = zcrypt_cex2c_queue_remove,
-	.ids = zcrypt_cex2c_queue_ids,
-	.flags = AP_DRIVER_FLAG_DEFAULT,
-};
-
-int __init zcrypt_cex2c_init(void)
-{
-	int rc;
-
-	rc = ap_driver_register(&zcrypt_cex2c_card_driver,
-				THIS_MODULE, "cex2card");
-	if (rc)
-		return rc;
-
-	rc = ap_driver_register(&zcrypt_cex2c_queue_driver,
-				THIS_MODULE, "cex2cqueue");
-	if (rc)
-		ap_driver_unregister(&zcrypt_cex2c_card_driver);
-
-	return rc;
-}
-
-void zcrypt_cex2c_exit(void)
-{
-	ap_driver_unregister(&zcrypt_cex2c_queue_driver);
-	ap_driver_unregister(&zcrypt_cex2c_card_driver);
-}
-
-module_init(zcrypt_cex2c_init);
-module_exit(zcrypt_cex2c_exit);
diff --git a/drivers/s390/crypto/zcrypt_cex2c.h b/drivers/s390/crypto/zcrypt_cex2c.h
index 6ec405c2bec2..e69de29bb2d1 100644
--- a/drivers/s390/crypto/zcrypt_cex2c.h
+++ b/drivers/s390/crypto/zcrypt_cex2c.h
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- *  Copyright IBM Corp. 2001, 2018
- *  Author(s): Robert Burroughs
- *	       Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
- *  MSGTYPE restruct:		  Holger Dengler <hd@linux.vnet.ibm.com>
- */
-
-#ifndef _ZCRYPT_CEX2C_H_
-#define _ZCRYPT_CEX2C_H_
-
-int zcrypt_cex2c_init(void);
-void zcrypt_cex2c_exit(void);
-
-#endif /* _ZCRYPT_CEX2C_H_ */
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index 51f8f7a463f7..2e155de8abe5 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- *  Copyright IBM Corp. 2001, 2012
+ *  Copyright IBM Corp. 2001, 2023
  *  Author(s): Robert Burroughs
  *	       Eric Rossman (edrossma@us.ibm.com)
  *
@@ -28,15 +28,12 @@
 /* >= CEX3A: 4096 bits */
 #define CEX3A_MAX_MOD_SIZE 512
 
-/* CEX2A: max outputdatalength + type80_hdr */
-#define CEX2A_MAX_RESPONSE_SIZE 0x110
-
 /* >= CEX3A: 512 bit modulus, (max outputdatalength) + type80_hdr */
 #define CEX3A_MAX_RESPONSE_SIZE 0x210
 
 MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("Cryptographic Accelerator (message type 50), " \
-		   "Copyright IBM Corp. 2001, 2012");
+		   "Copyright IBM Corp. 2001, 2023");
 MODULE_LICENSE("GPL");
 
 /*
@@ -366,20 +363,17 @@ static int convert_type80(struct zcrypt_queue *zq,
 		ap_send_online_uevent(&zq->queue->ap_dev, zq->online);
 		return -EAGAIN;
 	}
-	if (zq->zcard->user_space_type == ZCRYPT_CEX2A)
-		BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE);
-	else
-		BUG_ON(t80h->len > CEX3A_MAX_RESPONSE_SIZE);
+	BUG_ON(t80h->len > CEX3A_MAX_RESPONSE_SIZE);
 	data = reply->msg + t80h->len - outputdatalength;
 	if (copy_to_user(outputdata, data, outputdatalength))
 		return -EFAULT;
 	return 0;
 }
 
-static int convert_response_cex2a(struct zcrypt_queue *zq,
-				  struct ap_message *reply,
-				  char __user *outputdata,
-				  unsigned int outputdatalength)
+static int convert_response(struct zcrypt_queue *zq,
+			    struct ap_message *reply,
+			    char __user *outputdata,
+			    unsigned int outputdatalength)
 {
 	/* Response type byte is the second byte in the response. */
 	unsigned char rtype = ((unsigned char *)reply->msg)[1];
@@ -414,9 +408,9 @@ static int convert_response_cex2a(struct zcrypt_queue *zq,
  * @msg: pointer to the AP message
  * @reply: pointer to the AP reply message
  */
-static void zcrypt_cex2a_receive(struct ap_queue *aq,
-				 struct ap_message *msg,
-				 struct ap_message *reply)
+static void zcrypt_msgtype50_receive(struct ap_queue *aq,
+				     struct ap_message *msg,
+				     struct ap_message *reply)
 {
 	static struct error_hdr error_reply = {
 		.type = TYPE82_RSP_CODE,
@@ -456,19 +450,18 @@ static atomic_t zcrypt_step = ATOMIC_INIT(0);
  *	CEXxA device to the request distributor
  * @mex: pointer to the modexpo request buffer
  */
-static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq,
-				 struct ica_rsa_modexpo *mex,
-				 struct ap_message *ap_msg)
+static long zcrypt_msgtype50_modexpo(struct zcrypt_queue *zq,
+				     struct ica_rsa_modexpo *mex,
+				     struct ap_message *ap_msg)
 {
 	struct completion work;
 	int rc;
 
-	ap_msg->bufsize = (zq->zcard->user_space_type == ZCRYPT_CEX2A) ?
-		MSGTYPE50_CRB2_MAX_MSG_SIZE : MSGTYPE50_CRB3_MAX_MSG_SIZE;
+	ap_msg->bufsize = MSGTYPE50_CRB3_MAX_MSG_SIZE;
 	ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL);
 	if (!ap_msg->msg)
 		return -ENOMEM;
-	ap_msg->receive = zcrypt_cex2a_receive;
+	ap_msg->receive = zcrypt_msgtype50_receive;
 	ap_msg->psmid = (((unsigned long)current->pid) << 32) +
 		atomic_inc_return(&zcrypt_step);
 	ap_msg->private = &work;
@@ -483,9 +476,9 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq,
 	if (rc == 0) {
 		rc = ap_msg->rc;
 		if (rc == 0)
-			rc = convert_response_cex2a(zq, ap_msg,
-						    mex->outputdata,
-						    mex->outputdatalength);
+			rc = convert_response(zq, ap_msg,
+					      mex->outputdata,
+					      mex->outputdatalength);
 	} else {
 		/* Signal pending. */
 		ap_cancel_message(zq->queue, ap_msg);
@@ -507,19 +500,18 @@ out:
  *	CEXxA device to the request distributor
  * @crt: pointer to the modexpoc_crt request buffer
  */
-static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq,
-				     struct ica_rsa_modexpo_crt *crt,
-				     struct ap_message *ap_msg)
+static long zcrypt_msgtype50_modexpo_crt(struct zcrypt_queue *zq,
+					 struct ica_rsa_modexpo_crt *crt,
+					 struct ap_message *ap_msg)
 {
 	struct completion work;
 	int rc;
 
-	ap_msg->bufsize = (zq->zcard->user_space_type == ZCRYPT_CEX2A) ?
-		MSGTYPE50_CRB2_MAX_MSG_SIZE : MSGTYPE50_CRB3_MAX_MSG_SIZE;
+	ap_msg->bufsize = MSGTYPE50_CRB3_MAX_MSG_SIZE;
 	ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL);
 	if (!ap_msg->msg)
 		return -ENOMEM;
-	ap_msg->receive = zcrypt_cex2a_receive;
+	ap_msg->receive = zcrypt_msgtype50_receive;
 	ap_msg->psmid = (((unsigned long)current->pid) << 32) +
 		atomic_inc_return(&zcrypt_step);
 	ap_msg->private = &work;
@@ -534,9 +526,9 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq,
 	if (rc == 0) {
 		rc = ap_msg->rc;
 		if (rc == 0)
-			rc = convert_response_cex2a(zq, ap_msg,
-						    crt->outputdata,
-						    crt->outputdatalength);
+			rc = convert_response(zq, ap_msg,
+					      crt->outputdata,
+					      crt->outputdatalength);
 	} else {
 		/* Signal pending. */
 		ap_cancel_message(zq->queue, ap_msg);
@@ -555,8 +547,8 @@ out:
  * The crypto operations for message type 50.
  */
 static struct zcrypt_ops zcrypt_msgtype50_ops = {
-	.rsa_modexpo = zcrypt_cex2a_modexpo,
-	.rsa_modexpo_crt = zcrypt_cex2a_modexpo_crt,
+	.rsa_modexpo = zcrypt_msgtype50_modexpo,
+	.rsa_modexpo_crt = zcrypt_msgtype50_modexpo_crt,
 	.owner = THIS_MODULE,
 	.name = MSGTYPE50_NAME,
 	.variant = MSGTYPE50_VARIANT_DEFAULT,
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.h b/drivers/s390/crypto/zcrypt_msgtype50.h
index eb49f06bed29..323e93b90b12 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.h
+++ b/drivers/s390/crypto/zcrypt_msgtype50.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ */
 /*
- *  Copyright IBM Corp. 2001, 2012
+ *  Copyright IBM Corp. 2001, 2023
  *  Author(s): Robert Burroughs
  *	       Eric Rossman (edrossma@us.ibm.com)
  *
@@ -15,7 +15,6 @@
 #define MSGTYPE50_NAME			"zcrypt_msgtype50"
 #define MSGTYPE50_VARIANT_DEFAULT	0
 
-#define MSGTYPE50_CRB2_MAX_MSG_SIZE 0x390 /* sizeof(struct type50_crb2_msg) */
 #define MSGTYPE50_CRB3_MAX_MSG_SIZE 0x710 /* sizeof(struct type50_crb3_msg) */
 
 #define MSGTYPE_ADJUSTMENT 0x08  /* type04 extension (not needed in type50) */
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index e668ff5eb384..3c53abbdc342 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- *  Copyright IBM Corp. 2001, 2022
+ *  Copyright IBM Corp. 2001, 2023
  *  Author(s): Robert Burroughs
  *	       Eric Rossman (edrossma@us.ibm.com)
  *
@@ -42,7 +42,7 @@ struct response_type {
 
 MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("Cryptographic Coprocessor (message type 6), " \
-		   "Copyright IBM Corp. 2001, 2012");
+		   "Copyright IBM Corp. 2001, 2023");
 MODULE_LICENSE("GPL");
 
 struct function_and_rules_block {
@@ -1348,14 +1348,6 @@ out:
 /*
  * The crypto operations for a CEXxC card.
  */
-static struct zcrypt_ops zcrypt_msgtype6_norng_ops = {
-	.owner = THIS_MODULE,
-	.name = MSGTYPE06_NAME,
-	.variant = MSGTYPE06_VARIANT_NORNG,
-	.rsa_modexpo = zcrypt_msgtype6_modexpo,
-	.rsa_modexpo_crt = zcrypt_msgtype6_modexpo_crt,
-	.send_cprb = zcrypt_msgtype6_send_cprb,
-};
 
 static struct zcrypt_ops zcrypt_msgtype6_ops = {
 	.owner = THIS_MODULE,
@@ -1378,14 +1370,12 @@ static struct zcrypt_ops zcrypt_msgtype6_ep11_ops = {
 
 void __init zcrypt_msgtype6_init(void)
 {
-	zcrypt_msgtype_register(&zcrypt_msgtype6_norng_ops);
 	zcrypt_msgtype_register(&zcrypt_msgtype6_ops);
 	zcrypt_msgtype_register(&zcrypt_msgtype6_ep11_ops);
 }
 
 void __exit zcrypt_msgtype6_exit(void)
 {
-	zcrypt_msgtype_unregister(&zcrypt_msgtype6_norng_ops);
 	zcrypt_msgtype_unregister(&zcrypt_msgtype6_ops);
 	zcrypt_msgtype_unregister(&zcrypt_msgtype6_ep11_ops);
 }

From 83f95671943e6394eda4d20fa9458d4a2ae13c5c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Tue, 4 Jul 2023 15:47:11 +0200
Subject: [PATCH 0479/2047] s390/hypfs: simplify memory allocation

Simplify memory allocation for diagnose 204 memory buffer:

- allocate with __vmalloc_node() to enure page alignment
- allocate real / physical memory area also within vmalloc area and handle
  vmalloc to real / physical address translation within diag204().

Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Reviewed-by: Mete Durlu <meted@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/hypfs/hypfs_diag.c | 40 ++++++++----------------------------
 arch/s390/kernel/diag.c      | 24 ++++++++++++++++++++--
 2 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index c3be533c4cd3..c8083dc08db3 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -29,7 +29,6 @@ static enum diag204_sc diag204_store_sc;	/* used subcode for store */
 static enum diag204_format diag204_info_type;	/* used diag 204 data format */
 
 static void *diag204_buf;		/* 4K aligned buffer for diag204 data */
-static void *diag204_buf_vmalloc;	/* vmalloc pointer for diag204 data */
 static int diag204_buf_pages;		/* number of pages for diag204 data */
 
 static struct dentry *dbfs_d204_file;
@@ -212,14 +211,7 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
 
 static void diag204_free_buffer(void)
 {
-	if (!diag204_buf)
-		return;
-	if (diag204_buf_vmalloc) {
-		vfree(diag204_buf_vmalloc);
-		diag204_buf_vmalloc = NULL;
-	} else {
-		free_pages((unsigned long) diag204_buf, 0);
-	}
+	vfree(diag204_buf);
 	diag204_buf = NULL;
 }
 
@@ -228,26 +220,6 @@ static void *page_align_ptr(void *ptr)
 	return (void *) PAGE_ALIGN((unsigned long) ptr);
 }
 
-static void *diag204_alloc_vbuf(int pages)
-{
-	/* The buffer has to be page aligned! */
-	diag204_buf_vmalloc = vmalloc(array_size(PAGE_SIZE, (pages + 1)));
-	if (!diag204_buf_vmalloc)
-		return ERR_PTR(-ENOMEM);
-	diag204_buf = page_align_ptr(diag204_buf_vmalloc);
-	diag204_buf_pages = pages;
-	return diag204_buf;
-}
-
-static void *diag204_alloc_rbuf(void)
-{
-	diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0);
-	if (!diag204_buf)
-		return ERR_PTR(-ENOMEM);
-	diag204_buf_pages = 1;
-	return diag204_buf;
-}
-
 static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
 {
 	if (diag204_buf) {
@@ -256,15 +228,19 @@ static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
 	}
 	if (fmt == DIAG204_INFO_SIMPLE) {
 		*pages = 1;
-		return diag204_alloc_rbuf();
 	} else {/* DIAG204_INFO_EXT */
 		*pages = diag204((unsigned long)DIAG204_SUBC_RSI |
 				 (unsigned long)DIAG204_INFO_EXT, 0, NULL);
 		if (*pages <= 0)
 			return ERR_PTR(-ENOSYS);
-		else
-			return diag204_alloc_vbuf(*pages);
 	}
+	diag204_buf = __vmalloc_node(array_size(*pages, PAGE_SIZE),
+				     PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+				     __builtin_return_address(0));
+	if (!diag204_buf)
+		return ERR_PTR(-ENOMEM);
+	diag204_buf_pages = *pages;
+	return diag204_buf;
 }
 
 /*
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index f3a0f39cbd6c..f287713baf6d 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -11,6 +11,7 @@
 #include <linux/cpu.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-extable.h>
 #include <asm/diag.h>
 #include <asm/trace/diag.h>
@@ -168,11 +169,30 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad
 	return rp.odd;
 }
 
+/**
+ * diag204() - Issue diagnose 204 call.
+ * @subcode: Subcode of diagnose 204 to be executed.
+ * @size: Size of area in pages which @area points to, if given.
+ * @addr: Vmalloc'ed memory area where the result is written to.
+ *
+ * Execute diagnose 204 with the given subcode and write the result to the
+ * memory area specified with @addr. For subcodes which do not write a
+ * result to memory both @size and @addr must be zero. If @addr is
+ * specified it must be page aligned and must have been allocated with
+ * vmalloc(). Conversion to real / physical addresses will be handled by
+ * this function if required.
+ */
 int diag204(unsigned long subcode, unsigned long size, void *addr)
 {
-	diag_stat_inc(DIAG_STAT_X204);
+	if (addr) {
+		if (WARN_ON_ONCE(!is_vmalloc_addr(addr)))
+			return -1;
+		if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE)))
+			return -1;
+	}
 	if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4)
-		addr = (void *)__pa(addr);
+		addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr));
+	diag_stat_inc(DIAG_STAT_X204);
 	size = __diag204(&subcode, size, addr);
 	if (subcode)
 		return -1;

From b7857acc1b1105da5f088fe2593f1a6e3a3d47ce Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Tue, 4 Jul 2023 15:47:12 +0200
Subject: [PATCH 0480/2047] s390/hypfs: remove open-coded PTR_ALIGN()

Get rid of page_align_ptr() and use PTR_ALIGN() instead.

Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/hypfs/hypfs_diag.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index c8083dc08db3..889d83c77826 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -215,11 +215,6 @@ static void diag204_free_buffer(void)
 	diag204_buf = NULL;
 }
 
-static void *page_align_ptr(void *ptr)
-{
-	return (void *) PAGE_ALIGN((unsigned long) ptr);
-}
-
 static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
 {
 	if (diag204_buf) {
@@ -379,7 +374,7 @@ static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size)
 	base = vzalloc(buf_size);
 	if (!base)
 		return -ENOMEM;
-	d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr);
+	d204 = PTR_ALIGN(base + sizeof(d204->hdr), PAGE_SIZE) - sizeof(d204->hdr);
 	rc = diag204_do_store(d204->buf, diag204_buf_pages);
 	if (rc) {
 		vfree(base);

From 3325b4d85799957aa53514e69bed5c9df7771caf Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Tue, 4 Jul 2023 15:47:13 +0200
Subject: [PATCH 0481/2047] s390/hypfs: factor out filesystem code

The s390_hypfs filesystem is deprecated and shouldn't be used due to its
rather odd semantics. It creates a whole directory structure with static
file contents so a user can read a consistent state while within that
directory.
Writing to its update attribute will remove and rebuild nearly the whole
filesystem, so that again a user can read a consistent state, even if
multiple files need to be read.

Given that this wastes a lot of CPU cycles, and involves a lot of code,
binary interfaces have been added quite a couple of years ago, which simply
pass the binary data to user space, and let user space decode the data.
This is the preferred and only way how the data should be retrieved.

The assumption is that there are no users of the s390_hypfs filesystem.
However instead of just removing the code, and having to revert in case
there are actually users, factor the filesystem code out and make it only
available via a new config option.

This config option is supposed to be disabled. If it turns out there are no
complaints the filesystem code can be removed probably in a couple of
years.

Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/Kbuild                |   2 +-
 arch/s390/Kconfig               |  15 +-
 arch/s390/hypfs/Makefile        |  11 +-
 arch/s390/hypfs/hypfs.h         |  10 +-
 arch/s390/hypfs/hypfs_dbfs.c    |  31 ++-
 arch/s390/hypfs/hypfs_diag.c    | 400 ++------------------------------
 arch/s390/hypfs/hypfs_diag.h    |  35 +++
 arch/s390/hypfs/hypfs_diag_fs.c | 393 +++++++++++++++++++++++++++++++
 arch/s390/hypfs/hypfs_vm.c      | 175 +-------------
 arch/s390/hypfs/hypfs_vm.h      |  50 ++++
 arch/s390/hypfs/hypfs_vm_fs.c   | 139 +++++++++++
 arch/s390/hypfs/inode.c         |  35 +--
 12 files changed, 709 insertions(+), 587 deletions(-)
 create mode 100644 arch/s390/hypfs/hypfs_diag.h
 create mode 100644 arch/s390/hypfs/hypfs_diag_fs.c
 create mode 100644 arch/s390/hypfs/hypfs_vm.h
 create mode 100644 arch/s390/hypfs/hypfs_vm_fs.c

diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index 76e362277179..8e4d74f51115 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -3,7 +3,7 @@ obj-y				+= kernel/
 obj-y				+= mm/
 obj-$(CONFIG_KVM)		+= kvm/
 obj-y				+= crypto/
-obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs/
+obj-$(CONFIG_S390_HYPFS)	+= hypfs/
 obj-$(CONFIG_APPLDATA_BASE)	+= appldata/
 obj-y				+= net/
 obj-$(CONFIG_PCI)		+= pci/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c0afca69904e..ea0e8f34eb0d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -877,13 +877,24 @@ config APPLDATA_NET_SUM
 	  This can also be compiled as a module, which will be called
 	  appldata_net_sum.o.
 
-config S390_HYPFS_FS
+config S390_HYPFS
 	def_bool y
+	prompt "s390 hypervisor information"
+	help
+	  This provides several binary files at (debugfs)/s390_hypfs/ to
+	  provide accounting information in an s390 hypervisor environment.
+
+config S390_HYPFS_FS
+	def_bool n
 	prompt "s390 hypervisor file system support"
 	select SYS_HYPERVISOR
+	depends on S390_HYPFS
 	help
 	  This is a virtual file system intended to provide accounting
-	  information in an s390 hypervisor environment.
+	  information in an s390 hypervisor environment. This file system
+	  is deprecated and should not be used.
+
+	  Say N if you are unsure.
 
 source "arch/s390/kvm/Kconfig"
 
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
index 06f601509ce9..c34854d298f8 100644
--- a/arch/s390/hypfs/Makefile
+++ b/arch/s390/hypfs/Makefile
@@ -3,7 +3,12 @@
 # Makefile for the linux hypfs filesystem routines.
 #
 
-obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_dbfs.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_diag.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_diag0c.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_sprp.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_vm.o
 
-s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o
-s390_hypfs-objs += hypfs_diag0c.o
+obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs_diag_fs.o
+obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs_vm_fs.o
+obj-$(CONFIG_S390_HYPFS_FS)	+= inode.o
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index 05f3f9aee5fc..65f4036fd541 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -46,6 +46,15 @@ void hypfs_diag0c_exit(void);
 void hypfs_sprp_init(void);
 void hypfs_sprp_exit(void);
 
+int __hypfs_fs_init(void);
+
+static inline int hypfs_fs_init(void)
+{
+	if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
+		return __hypfs_fs_init();
+	return 0;
+}
+
 /* debugfs interface */
 struct hypfs_dbfs_file;
 
@@ -69,7 +78,6 @@ struct hypfs_dbfs_file {
 	struct dentry		*dentry;
 };
 
-extern void hypfs_dbfs_init(void);
 extern void hypfs_dbfs_exit(void);
 extern void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df);
 extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df);
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index f4c7dbfaf8ee..4024599eb448 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -90,12 +90,33 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df)
 	debugfs_remove(df->dentry);
 }
 
-void hypfs_dbfs_init(void)
+static int __init hypfs_dbfs_init(void)
 {
-	dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
-}
+	int rc = -ENODATA;
 
-void hypfs_dbfs_exit(void)
-{
+	dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
+	if (hypfs_diag_init())
+		goto fail_dbfs_exit;
+	if (hypfs_vm_init())
+		goto fail_hypfs_diag_exit;
+	hypfs_sprp_init();
+	if (hypfs_diag0c_init())
+		goto fail_hypfs_sprp_exit;
+	rc = hypfs_fs_init();
+	if (rc)
+		goto fail_hypfs_diag0c_exit;
+	return 0;
+
+fail_hypfs_diag0c_exit:
+	hypfs_diag0c_exit();
+fail_hypfs_sprp_exit:
+	hypfs_sprp_exit();
+	hypfs_vm_exit();
+fail_hypfs_diag_exit:
+	hypfs_diag_exit();
+	pr_err("Initialization of hypfs failed with rc=%i\n", rc);
+fail_dbfs_exit:
 	debugfs_remove(dbfs_dir);
+	return rc;
 }
+device_initcall(hypfs_dbfs_init)
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 889d83c77826..ea4c436f86a0 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -18,13 +18,11 @@
 #include <linux/mm.h>
 #include <asm/diag.h>
 #include <asm/ebcdic.h>
+#include "hypfs_diag.h"
 #include "hypfs.h"
 
-#define TMP_SIZE 64		/* size of temporary buffers */
-
 #define DBFS_D204_HDR_VERSION	0
 
-static char *diag224_cpu_names;			/* diag 224 name table */
 static enum diag204_sc diag204_store_sc;	/* used subcode for store */
 static enum diag204_format diag204_info_type;	/* used diag 204 data format */
 
@@ -33,172 +31,14 @@ static int diag204_buf_pages;		/* number of pages for diag204 data */
 
 static struct dentry *dbfs_d204_file;
 
-/*
- * DIAG 204 member access functions.
- *
- * Since we have two different diag 204 data formats for old and new s390
- * machines, we do not access the structs directly, but use getter functions for
- * each struct member instead. This should make the code more readable.
- */
-
-/* Time information block */
-
-static inline int info_blk_hdr__size(enum diag204_format type)
+enum diag204_format diag204_get_info_type(void)
 {
-	if (type == DIAG204_INFO_SIMPLE)
-		return sizeof(struct diag204_info_blk_hdr);
-	else /* DIAG204_INFO_EXT */
-		return sizeof(struct diag204_x_info_blk_hdr);
+	return diag204_info_type;
 }
 
-static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
+static void diag204_set_info_type(enum diag204_format type)
 {
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_info_blk_hdr *)hdr)->npar;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_info_blk_hdr *)hdr)->npar;
-}
-
-static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_info_blk_hdr *)hdr)->flags;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_info_blk_hdr *)hdr)->flags;
-}
-
-/* Partition header */
-
-static inline int part_hdr__size(enum diag204_format type)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return sizeof(struct diag204_part_hdr);
-	else /* DIAG204_INFO_EXT */
-		return sizeof(struct diag204_x_part_hdr);
-}
-
-static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_part_hdr *)hdr)->cpus;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_part_hdr *)hdr)->rcpus;
-}
-
-static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
-				       char *name)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name,
-		       DIAG204_LPAR_NAME_LEN);
-	else /* DIAG204_INFO_EXT */
-		memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name,
-		       DIAG204_LPAR_NAME_LEN);
-	EBCASC(name, DIAG204_LPAR_NAME_LEN);
-	name[DIAG204_LPAR_NAME_LEN] = 0;
-	strim(name);
-}
-
-/* CPU info block */
-
-static inline int cpu_info__size(enum diag204_format type)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return sizeof(struct diag204_cpu_info);
-	else /* DIAG204_INFO_EXT */
-		return sizeof(struct diag204_x_cpu_info);
-}
-
-static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_cpu_info *)hdr)->ctidx;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_cpu_info *)hdr)->ctidx;
-}
-
-static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_cpu_info *)hdr)->cpu_addr;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_cpu_info *)hdr)->cpu_addr;
-}
-
-static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_cpu_info *)hdr)->acc_time;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_cpu_info *)hdr)->acc_time;
-}
-
-static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_cpu_info *)hdr)->lp_time;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_cpu_info *)hdr)->lp_time;
-}
-
-static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return 0;	/* online_time not available in simple info */
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_cpu_info *)hdr)->online_time;
-}
-
-/* Physical header */
-
-static inline int phys_hdr__size(enum diag204_format type)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return sizeof(struct diag204_phys_hdr);
-	else /* DIAG204_INFO_EXT */
-		return sizeof(struct diag204_x_phys_hdr);
-}
-
-static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_phys_hdr *)hdr)->cpus;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_phys_hdr *)hdr)->cpus;
-}
-
-/* Physical CPU info block */
-
-static inline int phys_cpu__size(enum diag204_format type)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return sizeof(struct diag204_phys_cpu);
-	else /* DIAG204_INFO_EXT */
-		return sizeof(struct diag204_x_phys_cpu);
-}
-
-static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_phys_cpu *)hdr)->cpu_addr;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr;
-}
-
-static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_phys_cpu *)hdr)->mgm_time;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_phys_cpu *)hdr)->mgm_time;
-}
-
-static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_phys_cpu *)hdr)->ctidx;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_phys_cpu *)hdr)->ctidx;
+	diag204_info_type = type;
 }
 
 /* Diagnose 204 functions */
@@ -215,7 +55,7 @@ static void diag204_free_buffer(void)
 	diag204_buf = NULL;
 }
 
-static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
+void *diag204_get_buffer(enum diag204_format fmt, int *pages)
 {
 	if (diag204_buf) {
 		*pages = diag204_buf_pages;
@@ -262,13 +102,13 @@ static int diag204_probe(void)
 		if (diag204((unsigned long)DIAG204_SUBC_STIB7 |
 			    (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
 			diag204_store_sc = DIAG204_SUBC_STIB7;
-			diag204_info_type = DIAG204_INFO_EXT;
+			diag204_set_info_type(DIAG204_INFO_EXT);
 			goto out;
 		}
 		if (diag204((unsigned long)DIAG204_SUBC_STIB6 |
 			    (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
 			diag204_store_sc = DIAG204_SUBC_STIB6;
-			diag204_info_type = DIAG204_INFO_EXT;
+			diag204_set_info_type(DIAG204_INFO_EXT);
 			goto out;
 		}
 		diag204_free_buffer();
@@ -284,7 +124,7 @@ static int diag204_probe(void)
 	if (diag204((unsigned long)DIAG204_SUBC_STIB4 |
 		    (unsigned long)DIAG204_INFO_SIMPLE, pages, buf) >= 0) {
 		diag204_store_sc = DIAG204_SUBC_STIB4;
-		diag204_info_type = DIAG204_INFO_SIMPLE;
+		diag204_set_info_type(DIAG204_INFO_SIMPLE);
 		goto out;
 	} else {
 		rc = -ENOSYS;
@@ -298,60 +138,15 @@ fail_alloc:
 	return rc;
 }
 
-static int diag204_do_store(void *buf, int pages)
+int diag204_store(void *buf, int pages)
 {
 	int rc;
 
-	rc = diag204((unsigned long) diag204_store_sc |
-		     (unsigned long) diag204_info_type, pages, buf);
+	rc = diag204((unsigned long)diag204_store_sc |
+		     (unsigned long)diag204_get_info_type(), pages, buf);
 	return rc < 0 ? -ENOSYS : 0;
 }
 
-static void *diag204_store(void)
-{
-	void *buf;
-	int pages, rc;
-
-	buf = diag204_get_buffer(diag204_info_type, &pages);
-	if (IS_ERR(buf))
-		goto out;
-	rc = diag204_do_store(buf, pages);
-	if (rc)
-		return ERR_PTR(rc);
-out:
-	return buf;
-}
-
-/* Diagnose 224 functions */
-
-static int diag224_get_name_table(void)
-{
-	/* memory must be below 2GB */
-	diag224_cpu_names = (char *) __get_free_page(GFP_KERNEL | GFP_DMA);
-	if (!diag224_cpu_names)
-		return -ENOMEM;
-	if (diag224(diag224_cpu_names)) {
-		free_page((unsigned long) diag224_cpu_names);
-		return -EOPNOTSUPP;
-	}
-	EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
-	return 0;
-}
-
-static void diag224_delete_name_table(void)
-{
-	free_page((unsigned long) diag224_cpu_names);
-}
-
-static int diag224_idx2name(int index, char *name)
-{
-	memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN),
-	       DIAG204_CPU_NAME_LEN);
-	name[DIAG204_CPU_NAME_LEN] = 0;
-	strim(name);
-	return 0;
-}
-
 struct dbfs_d204_hdr {
 	u64	len;		/* Length of d204 buffer without header */
 	u16	version;	/* Version of header */
@@ -375,7 +170,7 @@ static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size)
 	if (!base)
 		return -ENOMEM;
 	d204 = PTR_ALIGN(base + sizeof(d204->hdr), PAGE_SIZE) - sizeof(d204->hdr);
-	rc = diag204_do_store(d204->buf, diag204_buf_pages);
+	rc = diag204_store(d204->buf, diag204_buf_pages);
 	if (rc) {
 		vfree(base);
 		return rc;
@@ -404,176 +199,21 @@ __init int hypfs_diag_init(void)
 		return -ENODATA;
 	}
 
-	if (diag204_info_type == DIAG204_INFO_EXT)
+	if (diag204_get_info_type() == DIAG204_INFO_EXT)
 		hypfs_dbfs_create_file(&dbfs_file_d204);
 
-	if (MACHINE_IS_LPAR) {
-		rc = diag224_get_name_table();
-		if (rc) {
-			pr_err("The hardware system does not provide all "
-			       "functions required by hypfs\n");
-			debugfs_remove(dbfs_d204_file);
-			return rc;
-		}
+	rc = hypfs_diag_fs_init();
+	if (rc) {
+		pr_err("The hardware system does not provide all functions required by hypfs\n");
+		debugfs_remove(dbfs_d204_file);
 	}
-	return 0;
+	return rc;
 }
 
 void hypfs_diag_exit(void)
 {
 	debugfs_remove(dbfs_d204_file);
-	diag224_delete_name_table();
+	hypfs_diag_fs_exit();
 	diag204_free_buffer();
 	hypfs_dbfs_remove_file(&dbfs_file_d204);
 }
-
-/*
- * Functions to create the directory structure
- * *******************************************
- */
-
-static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
-{
-	struct dentry *cpu_dir;
-	char buffer[TMP_SIZE];
-	void *rc;
-
-	snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type,
-							    cpu_info));
-	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
-	rc = hypfs_create_u64(cpu_dir, "mgmtime",
-			      cpu_info__acc_time(diag204_info_type, cpu_info) -
-			      cpu_info__lp_time(diag204_info_type, cpu_info));
-	if (IS_ERR(rc))
-		return PTR_ERR(rc);
-	rc = hypfs_create_u64(cpu_dir, "cputime",
-			      cpu_info__lp_time(diag204_info_type, cpu_info));
-	if (IS_ERR(rc))
-		return PTR_ERR(rc);
-	if (diag204_info_type == DIAG204_INFO_EXT) {
-		rc = hypfs_create_u64(cpu_dir, "onlinetime",
-				      cpu_info__online_time(diag204_info_type,
-							    cpu_info));
-		if (IS_ERR(rc))
-			return PTR_ERR(rc);
-	}
-	diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
-	rc = hypfs_create_str(cpu_dir, "type", buffer);
-	return PTR_ERR_OR_ZERO(rc);
-}
-
-static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
-{
-	struct dentry *cpus_dir;
-	struct dentry *lpar_dir;
-	char lpar_name[DIAG204_LPAR_NAME_LEN + 1];
-	void *cpu_info;
-	int i;
-
-	part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
-	lpar_name[DIAG204_LPAR_NAME_LEN] = 0;
-	lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
-	if (IS_ERR(lpar_dir))
-		return lpar_dir;
-	cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
-	if (IS_ERR(cpus_dir))
-		return cpus_dir;
-	cpu_info = part_hdr + part_hdr__size(diag204_info_type);
-	for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) {
-		int rc;
-		rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
-		if (rc)
-			return ERR_PTR(rc);
-		cpu_info += cpu_info__size(diag204_info_type);
-	}
-	return cpu_info;
-}
-
-static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
-{
-	struct dentry *cpu_dir;
-	char buffer[TMP_SIZE];
-	void *rc;
-
-	snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type,
-							    cpu_info));
-	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
-	if (IS_ERR(cpu_dir))
-		return PTR_ERR(cpu_dir);
-	rc = hypfs_create_u64(cpu_dir, "mgmtime",
-			      phys_cpu__mgm_time(diag204_info_type, cpu_info));
-	if (IS_ERR(rc))
-		return PTR_ERR(rc);
-	diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
-	rc = hypfs_create_str(cpu_dir, "type", buffer);
-	return PTR_ERR_OR_ZERO(rc);
-}
-
-static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
-{
-	int i;
-	void *cpu_info;
-	struct dentry *cpus_dir;
-
-	cpus_dir = hypfs_mkdir(parent_dir, "cpus");
-	if (IS_ERR(cpus_dir))
-		return cpus_dir;
-	cpu_info = phys_hdr + phys_hdr__size(diag204_info_type);
-	for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) {
-		int rc;
-		rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
-		if (rc)
-			return ERR_PTR(rc);
-		cpu_info += phys_cpu__size(diag204_info_type);
-	}
-	return cpu_info;
-}
-
-int hypfs_diag_create_files(struct dentry *root)
-{
-	struct dentry *systems_dir, *hyp_dir;
-	void *time_hdr, *part_hdr;
-	int i, rc;
-	void *buffer, *ptr;
-
-	buffer = diag204_store();
-	if (IS_ERR(buffer))
-		return PTR_ERR(buffer);
-
-	systems_dir = hypfs_mkdir(root, "systems");
-	if (IS_ERR(systems_dir)) {
-		rc = PTR_ERR(systems_dir);
-		goto err_out;
-	}
-	time_hdr = (struct x_info_blk_hdr *)buffer;
-	part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type);
-	for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) {
-		part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
-		if (IS_ERR(part_hdr)) {
-			rc = PTR_ERR(part_hdr);
-			goto err_out;
-		}
-	}
-	if (info_blk_hdr__flags(diag204_info_type, time_hdr) &
-	    DIAG204_LPAR_PHYS_FLG) {
-		ptr = hypfs_create_phys_files(root, part_hdr);
-		if (IS_ERR(ptr)) {
-			rc = PTR_ERR(ptr);
-			goto err_out;
-		}
-	}
-	hyp_dir = hypfs_mkdir(root, "hyp");
-	if (IS_ERR(hyp_dir)) {
-		rc = PTR_ERR(hyp_dir);
-		goto err_out;
-	}
-	ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
-	if (IS_ERR(ptr)) {
-		rc = PTR_ERR(ptr);
-		goto err_out;
-	}
-	rc = 0;
-
-err_out:
-	return rc;
-}
diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h
new file mode 100644
index 000000000000..7090eff27fef
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
+ *    implementation.
+ *
+ *    Copyright IBM Corp. 2006, 2008
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _S390_HYPFS_DIAG_H_
+#define _S390_HYPFS_DIAG_H_
+
+#include <asm/diag.h>
+
+enum diag204_format diag204_get_info_type(void);
+void *diag204_get_buffer(enum diag204_format fmt, int *pages);
+int diag204_store(void *buf, int pages);
+
+int __hypfs_diag_fs_init(void);
+void __hypfs_diag_fs_exit(void);
+
+static inline int hypfs_diag_fs_init(void)
+{
+	if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
+		return __hypfs_diag_fs_init();
+	return 0;
+}
+
+static inline void hypfs_diag_fs_exit(void)
+{
+	if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
+		__hypfs_diag_fs_exit();
+}
+
+#endif /* _S390_HYPFS_DIAG_H_ */
diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c
new file mode 100644
index 000000000000..00a6d370a280
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag_fs.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
+ *    implementation.
+ *
+ *    Copyright IBM Corp. 2006, 2008
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "hypfs"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include "hypfs_diag.h"
+#include "hypfs.h"
+
+#define TMP_SIZE 64		/* size of temporary buffers */
+
+static char *diag224_cpu_names;			/* diag 224 name table */
+static int diag224_idx2name(int index, char *name);
+
+/*
+ * DIAG 204 member access functions.
+ *
+ * Since we have two different diag 204 data formats for old and new s390
+ * machines, we do not access the structs directly, but use getter functions for
+ * each struct member instead. This should make the code more readable.
+ */
+
+/* Time information block */
+
+static inline int info_blk_hdr__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_info_blk_hdr);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_info_blk_hdr);
+}
+
+static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_info_blk_hdr *)hdr)->npar;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_info_blk_hdr *)hdr)->npar;
+}
+
+static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_info_blk_hdr *)hdr)->flags;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_info_blk_hdr *)hdr)->flags;
+}
+
+/* Partition header */
+
+static inline int part_hdr__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_part_hdr);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_part_hdr);
+}
+
+static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_part_hdr *)hdr)->cpus;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_part_hdr *)hdr)->rcpus;
+}
+
+static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
+				       char *name)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name,
+		       DIAG204_LPAR_NAME_LEN);
+	else /* DIAG204_INFO_EXT */
+		memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name,
+		       DIAG204_LPAR_NAME_LEN);
+	EBCASC(name, DIAG204_LPAR_NAME_LEN);
+	name[DIAG204_LPAR_NAME_LEN] = 0;
+	strim(name);
+}
+
+/* CPU info block */
+
+static inline int cpu_info__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_cpu_info);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_cpu_info);
+}
+
+static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->ctidx;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->ctidx;
+}
+
+static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->cpu_addr;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->cpu_addr;
+}
+
+static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->acc_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->acc_time;
+}
+
+static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->lp_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->lp_time;
+}
+
+static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return 0;	/* online_time not available in simple info */
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->online_time;
+}
+
+/* Physical header */
+
+static inline int phys_hdr__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_phys_hdr);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_phys_hdr);
+}
+
+static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_hdr *)hdr)->cpus;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_hdr *)hdr)->cpus;
+}
+
+/* Physical CPU info block */
+
+static inline int phys_cpu__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_phys_cpu);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_phys_cpu);
+}
+
+static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_cpu *)hdr)->cpu_addr;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr;
+}
+
+static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_cpu *)hdr)->mgm_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_cpu *)hdr)->mgm_time;
+}
+
+static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_cpu *)hdr)->ctidx;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_cpu *)hdr)->ctidx;
+}
+
+/*
+ * Functions to create the directory structure
+ * *******************************************
+ */
+
+static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
+{
+	struct dentry *cpu_dir;
+	char buffer[TMP_SIZE];
+	void *rc;
+
+	snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(),
+							    cpu_info));
+	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+	rc = hypfs_create_u64(cpu_dir, "mgmtime",
+			      cpu_info__acc_time(diag204_get_info_type(), cpu_info) -
+			      cpu_info__lp_time(diag204_get_info_type(), cpu_info));
+	if (IS_ERR(rc))
+		return PTR_ERR(rc);
+	rc = hypfs_create_u64(cpu_dir, "cputime",
+			      cpu_info__lp_time(diag204_get_info_type(), cpu_info));
+	if (IS_ERR(rc))
+		return PTR_ERR(rc);
+	if (diag204_get_info_type() == DIAG204_INFO_EXT) {
+		rc = hypfs_create_u64(cpu_dir, "onlinetime",
+				      cpu_info__online_time(diag204_get_info_type(),
+							    cpu_info));
+		if (IS_ERR(rc))
+			return PTR_ERR(rc);
+	}
+	diag224_idx2name(cpu_info__ctidx(diag204_get_info_type(), cpu_info), buffer);
+	rc = hypfs_create_str(cpu_dir, "type", buffer);
+	return PTR_ERR_OR_ZERO(rc);
+}
+
+static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
+{
+	struct dentry *cpus_dir;
+	struct dentry *lpar_dir;
+	char lpar_name[DIAG204_LPAR_NAME_LEN + 1];
+	void *cpu_info;
+	int i;
+
+	part_hdr__part_name(diag204_get_info_type(), part_hdr, lpar_name);
+	lpar_name[DIAG204_LPAR_NAME_LEN] = 0;
+	lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
+	if (IS_ERR(lpar_dir))
+		return lpar_dir;
+	cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
+	if (IS_ERR(cpus_dir))
+		return cpus_dir;
+	cpu_info = part_hdr + part_hdr__size(diag204_get_info_type());
+	for (i = 0; i < part_hdr__rcpus(diag204_get_info_type(), part_hdr); i++) {
+		int rc;
+
+		rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
+		if (rc)
+			return ERR_PTR(rc);
+		cpu_info += cpu_info__size(diag204_get_info_type());
+	}
+	return cpu_info;
+}
+
+static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
+{
+	struct dentry *cpu_dir;
+	char buffer[TMP_SIZE];
+	void *rc;
+
+	snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_get_info_type(),
+							    cpu_info));
+	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+	if (IS_ERR(cpu_dir))
+		return PTR_ERR(cpu_dir);
+	rc = hypfs_create_u64(cpu_dir, "mgmtime",
+			      phys_cpu__mgm_time(diag204_get_info_type(), cpu_info));
+	if (IS_ERR(rc))
+		return PTR_ERR(rc);
+	diag224_idx2name(phys_cpu__ctidx(diag204_get_info_type(), cpu_info), buffer);
+	rc = hypfs_create_str(cpu_dir, "type", buffer);
+	return PTR_ERR_OR_ZERO(rc);
+}
+
+static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
+{
+	int i;
+	void *cpu_info;
+	struct dentry *cpus_dir;
+
+	cpus_dir = hypfs_mkdir(parent_dir, "cpus");
+	if (IS_ERR(cpus_dir))
+		return cpus_dir;
+	cpu_info = phys_hdr + phys_hdr__size(diag204_get_info_type());
+	for (i = 0; i < phys_hdr__cpus(diag204_get_info_type(), phys_hdr); i++) {
+		int rc;
+
+		rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
+		if (rc)
+			return ERR_PTR(rc);
+		cpu_info += phys_cpu__size(diag204_get_info_type());
+	}
+	return cpu_info;
+}
+
+int hypfs_diag_create_files(struct dentry *root)
+{
+	struct dentry *systems_dir, *hyp_dir;
+	void *time_hdr, *part_hdr;
+	void *buffer, *ptr;
+	int i, rc, pages;
+
+	buffer = diag204_get_buffer(diag204_get_info_type(), &pages);
+	if (IS_ERR(buffer))
+		return PTR_ERR(buffer);
+	rc = diag204_store(buffer, pages);
+	if (rc)
+		return rc;
+
+	systems_dir = hypfs_mkdir(root, "systems");
+	if (IS_ERR(systems_dir)) {
+		rc = PTR_ERR(systems_dir);
+		goto err_out;
+	}
+	time_hdr = (struct x_info_blk_hdr *)buffer;
+	part_hdr = time_hdr + info_blk_hdr__size(diag204_get_info_type());
+	for (i = 0; i < info_blk_hdr__npar(diag204_get_info_type(), time_hdr); i++) {
+		part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
+		if (IS_ERR(part_hdr)) {
+			rc = PTR_ERR(part_hdr);
+			goto err_out;
+		}
+	}
+	if (info_blk_hdr__flags(diag204_get_info_type(), time_hdr) &
+	    DIAG204_LPAR_PHYS_FLG) {
+		ptr = hypfs_create_phys_files(root, part_hdr);
+		if (IS_ERR(ptr)) {
+			rc = PTR_ERR(ptr);
+			goto err_out;
+		}
+	}
+	hyp_dir = hypfs_mkdir(root, "hyp");
+	if (IS_ERR(hyp_dir)) {
+		rc = PTR_ERR(hyp_dir);
+		goto err_out;
+	}
+	ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
+	if (IS_ERR(ptr)) {
+		rc = PTR_ERR(ptr);
+		goto err_out;
+	}
+	rc = 0;
+
+err_out:
+	return rc;
+}
+
+/* Diagnose 224 functions */
+
+static int diag224_idx2name(int index, char *name)
+{
+	memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN),
+	       DIAG204_CPU_NAME_LEN);
+	name[DIAG204_CPU_NAME_LEN] = 0;
+	strim(name);
+	return 0;
+}
+
+static int diag224_get_name_table(void)
+{
+	/* memory must be below 2GB */
+	diag224_cpu_names = (char *)__get_free_page(GFP_KERNEL | GFP_DMA);
+	if (!diag224_cpu_names)
+		return -ENOMEM;
+	if (diag224(diag224_cpu_names)) {
+		free_page((unsigned long)diag224_cpu_names);
+		return -EOPNOTSUPP;
+	}
+	EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
+	return 0;
+}
+
+static void diag224_delete_name_table(void)
+{
+	free_page((unsigned long)diag224_cpu_names);
+}
+
+int __init __hypfs_diag_fs_init(void)
+{
+	if (MACHINE_IS_LPAR)
+		return diag224_get_name_table();
+	return 0;
+}
+
+void __hypfs_diag_fs_exit(void)
+{
+	diag224_delete_name_table();
+}
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index a3d881ca0a98..3db40ad853e0 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -14,47 +14,15 @@
 #include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include <asm/timex.h>
+#include "hypfs_vm.h"
 #include "hypfs.h"
 
-#define NAME_LEN 8
 #define DBFS_D2FC_HDR_VERSION 0
 
 static char local_guest[] = "        ";
 static char all_guests[] = "*       ";
 static char *all_groups = all_guests;
-static char *guest_query;
-
-struct diag2fc_data {
-	__u32 version;
-	__u32 flags;
-	__u64 used_cpu;
-	__u64 el_time;
-	__u64 mem_min_kb;
-	__u64 mem_max_kb;
-	__u64 mem_share_kb;
-	__u64 mem_used_kb;
-	__u32 pcpus;
-	__u32 lcpus;
-	__u32 vcpus;
-	__u32 ocpus;
-	__u32 cpu_max;
-	__u32 cpu_shares;
-	__u32 cpu_use_samp;
-	__u32 cpu_delay_samp;
-	__u32 page_wait_samp;
-	__u32 idle_samp;
-	__u32 other_samp;
-	__u32 total_samp;
-	char  guest_name[NAME_LEN];
-};
-
-struct diag2fc_parm_list {
-	char userid[NAME_LEN];
-	char aci_grp[NAME_LEN];
-	__u64 addr;
-	__u32 size;
-	__u32 fmt;
-};
+char *diag2fc_guest_query;
 
 static int diag2fc(int size, char* query, void *addr)
 {
@@ -62,10 +30,10 @@ static int diag2fc(int size, char* query, void *addr)
 	unsigned long rc;
 	struct diag2fc_parm_list parm_list;
 
-	memcpy(parm_list.userid, query, NAME_LEN);
-	ASCEBC(parm_list.userid, NAME_LEN);
-	memcpy(parm_list.aci_grp, all_groups, NAME_LEN);
-	ASCEBC(parm_list.aci_grp, NAME_LEN);
+	memcpy(parm_list.userid, query, DIAG2FC_NAME_LEN);
+	ASCEBC(parm_list.userid, DIAG2FC_NAME_LEN);
+	memcpy(parm_list.aci_grp, all_groups, DIAG2FC_NAME_LEN);
+	ASCEBC(parm_list.aci_grp, DIAG2FC_NAME_LEN);
 	parm_list.addr = (unsigned long)addr;
 	parm_list.size = size;
 	parm_list.fmt = 0x02;
@@ -87,7 +55,7 @@ static int diag2fc(int size, char* query, void *addr)
 /*
  * Allocate buffer for "query" and store diag 2fc at "offset"
  */
-static void *diag2fc_store(char *query, unsigned int *count, int offset)
+void *diag2fc_store(char *query, unsigned int *count, int offset)
 {
 	void *data;
 	int size;
@@ -108,132 +76,11 @@ static void *diag2fc_store(char *query, unsigned int *count, int offset)
 	return data;
 }
 
-static void diag2fc_free(const void *data)
+void diag2fc_free(const void *data)
 {
 	vfree(data);
 }
 
-#define ATTRIBUTE(dir, name, member) \
-do { \
-	void *rc; \
-	rc = hypfs_create_u64(dir, name, member); \
-	if (IS_ERR(rc)) \
-		return PTR_ERR(rc); \
-} while(0)
-
-static int hypfs_vm_create_guest(struct dentry *systems_dir,
-				 struct diag2fc_data *data)
-{
-	char guest_name[NAME_LEN + 1] = {};
-	struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir;
-	int dedicated_flag, capped_value;
-
-	capped_value = (data->flags & 0x00000006) >> 1;
-	dedicated_flag = (data->flags & 0x00000008) >> 3;
-
-	/* guest dir */
-	memcpy(guest_name, data->guest_name, NAME_LEN);
-	EBCASC(guest_name, NAME_LEN);
-	strim(guest_name);
-	guest_dir = hypfs_mkdir(systems_dir, guest_name);
-	if (IS_ERR(guest_dir))
-		return PTR_ERR(guest_dir);
-	ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
-
-	/* logical cpu information */
-	cpus_dir = hypfs_mkdir(guest_dir, "cpus");
-	if (IS_ERR(cpus_dir))
-		return PTR_ERR(cpus_dir);
-	ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
-	ATTRIBUTE(cpus_dir, "capped", capped_value);
-	ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
-	ATTRIBUTE(cpus_dir, "count", data->vcpus);
-	/*
-	 * Note: The "weight_min" attribute got the wrong name.
-	 * The value represents the number of non-stopped (operating)
-	 * CPUS.
-	 */
-	ATTRIBUTE(cpus_dir, "weight_min", data->ocpus);
-	ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
-	ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
-
-	/* memory information */
-	mem_dir = hypfs_mkdir(guest_dir, "mem");
-	if (IS_ERR(mem_dir))
-		return PTR_ERR(mem_dir);
-	ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
-	ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
-	ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
-	ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
-
-	/* samples */
-	samples_dir = hypfs_mkdir(guest_dir, "samples");
-	if (IS_ERR(samples_dir))
-		return PTR_ERR(samples_dir);
-	ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
-	ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
-	ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
-	ATTRIBUTE(samples_dir, "idle", data->idle_samp);
-	ATTRIBUTE(samples_dir, "other", data->other_samp);
-	ATTRIBUTE(samples_dir, "total", data->total_samp);
-	return 0;
-}
-
-int hypfs_vm_create_files(struct dentry *root)
-{
-	struct dentry *dir, *file;
-	struct diag2fc_data *data;
-	unsigned int count = 0;
-	int rc, i;
-
-	data = diag2fc_store(guest_query, &count, 0);
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	/* Hypervisor Info */
-	dir = hypfs_mkdir(root, "hyp");
-	if (IS_ERR(dir)) {
-		rc = PTR_ERR(dir);
-		goto failed;
-	}
-	file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
-	if (IS_ERR(file)) {
-		rc = PTR_ERR(file);
-		goto failed;
-	}
-
-	/* physical cpus */
-	dir = hypfs_mkdir(root, "cpus");
-	if (IS_ERR(dir)) {
-		rc = PTR_ERR(dir);
-		goto failed;
-	}
-	file = hypfs_create_u64(dir, "count", data->lcpus);
-	if (IS_ERR(file)) {
-		rc = PTR_ERR(file);
-		goto failed;
-	}
-
-	/* guests */
-	dir = hypfs_mkdir(root, "systems");
-	if (IS_ERR(dir)) {
-		rc = PTR_ERR(dir);
-		goto failed;
-	}
-
-	for (i = 0; i < count; i++) {
-		rc = hypfs_vm_create_guest(dir, &(data[i]));
-		if (rc)
-			goto failed;
-	}
-	diag2fc_free(data);
-	return 0;
-
-failed:
-	diag2fc_free(data);
-	return rc;
-}
-
 struct dbfs_d2fc_hdr {
 	u64	len;		/* Length of d2fc buffer without header */
 	u16	version;	/* Version of header */
@@ -252,7 +99,7 @@ static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size)
 	struct dbfs_d2fc *d2fc;
 	unsigned int count;
 
-	d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr));
+	d2fc = diag2fc_store(diag2fc_guest_query, &count, sizeof(d2fc->hdr));
 	if (IS_ERR(d2fc))
 		return PTR_ERR(d2fc);
 	store_tod_clock_ext(&d2fc->hdr.tod_ext);
@@ -277,9 +124,9 @@ int hypfs_vm_init(void)
 	if (!MACHINE_IS_VM)
 		return 0;
 	if (diag2fc(0, all_guests, NULL) > 0)
-		guest_query = all_guests;
+		diag2fc_guest_query = all_guests;
 	else if (diag2fc(0, local_guest, NULL) > 0)
-		guest_query = local_guest;
+		diag2fc_guest_query = local_guest;
 	else
 		return -EACCES;
 	hypfs_dbfs_create_file(&dbfs_file_2fc);
diff --git a/arch/s390/hypfs/hypfs_vm.h b/arch/s390/hypfs/hypfs_vm.h
new file mode 100644
index 000000000000..fe2e5851addd
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_vm.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Hypervisor filesystem for Linux on s390. z/VM implementation.
+ *
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _S390_HYPFS_VM_H_
+#define _S390_HYPFS_VM_H_
+
+#define DIAG2FC_NAME_LEN 8
+
+struct diag2fc_data {
+	__u32 version;
+	__u32 flags;
+	__u64 used_cpu;
+	__u64 el_time;
+	__u64 mem_min_kb;
+	__u64 mem_max_kb;
+	__u64 mem_share_kb;
+	__u64 mem_used_kb;
+	__u32 pcpus;
+	__u32 lcpus;
+	__u32 vcpus;
+	__u32 ocpus;
+	__u32 cpu_max;
+	__u32 cpu_shares;
+	__u32 cpu_use_samp;
+	__u32 cpu_delay_samp;
+	__u32 page_wait_samp;
+	__u32 idle_samp;
+	__u32 other_samp;
+	__u32 total_samp;
+	char  guest_name[DIAG2FC_NAME_LEN];
+};
+
+struct diag2fc_parm_list {
+	char userid[DIAG2FC_NAME_LEN];
+	char aci_grp[DIAG2FC_NAME_LEN];
+	__u64 addr;
+	__u32 size;
+	__u32 fmt;
+};
+
+void *diag2fc_store(char *query, unsigned int *count, int offset);
+void diag2fc_free(const void *data);
+extern char *diag2fc_guest_query;
+
+#endif /* _S390_HYPFS_VM_H_ */
diff --git a/arch/s390/hypfs/hypfs_vm_fs.c b/arch/s390/hypfs/hypfs_vm_fs.c
new file mode 100644
index 000000000000..6011289afa8c
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_vm_fs.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Hypervisor filesystem for Linux on s390. z/VM implementation.
+ *
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <asm/extable.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/timex.h>
+#include "hypfs_vm.h"
+#include "hypfs.h"
+
+#define ATTRIBUTE(dir, name, member) \
+do { \
+	void *rc; \
+	rc = hypfs_create_u64(dir, name, member); \
+	if (IS_ERR(rc)) \
+		return PTR_ERR(rc); \
+} while (0)
+
+static int hypfs_vm_create_guest(struct dentry *systems_dir,
+				 struct diag2fc_data *data)
+{
+	char guest_name[DIAG2FC_NAME_LEN + 1] = {};
+	struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir;
+	int dedicated_flag, capped_value;
+
+	capped_value = (data->flags & 0x00000006) >> 1;
+	dedicated_flag = (data->flags & 0x00000008) >> 3;
+
+	/* guest dir */
+	memcpy(guest_name, data->guest_name, DIAG2FC_NAME_LEN);
+	EBCASC(guest_name, DIAG2FC_NAME_LEN);
+	strim(guest_name);
+	guest_dir = hypfs_mkdir(systems_dir, guest_name);
+	if (IS_ERR(guest_dir))
+		return PTR_ERR(guest_dir);
+	ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
+
+	/* logical cpu information */
+	cpus_dir = hypfs_mkdir(guest_dir, "cpus");
+	if (IS_ERR(cpus_dir))
+		return PTR_ERR(cpus_dir);
+	ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
+	ATTRIBUTE(cpus_dir, "capped", capped_value);
+	ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
+	ATTRIBUTE(cpus_dir, "count", data->vcpus);
+	/*
+	 * Note: The "weight_min" attribute got the wrong name.
+	 * The value represents the number of non-stopped (operating)
+	 * CPUS.
+	 */
+	ATTRIBUTE(cpus_dir, "weight_min", data->ocpus);
+	ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
+	ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
+
+	/* memory information */
+	mem_dir = hypfs_mkdir(guest_dir, "mem");
+	if (IS_ERR(mem_dir))
+		return PTR_ERR(mem_dir);
+	ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
+	ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
+	ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
+	ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
+
+	/* samples */
+	samples_dir = hypfs_mkdir(guest_dir, "samples");
+	if (IS_ERR(samples_dir))
+		return PTR_ERR(samples_dir);
+	ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
+	ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
+	ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
+	ATTRIBUTE(samples_dir, "idle", data->idle_samp);
+	ATTRIBUTE(samples_dir, "other", data->other_samp);
+	ATTRIBUTE(samples_dir, "total", data->total_samp);
+	return 0;
+}
+
+int hypfs_vm_create_files(struct dentry *root)
+{
+	struct dentry *dir, *file;
+	struct diag2fc_data *data;
+	unsigned int count = 0;
+	int rc, i;
+
+	data = diag2fc_store(diag2fc_guest_query, &count, 0);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	/* Hypervisor Info */
+	dir = hypfs_mkdir(root, "hyp");
+	if (IS_ERR(dir)) {
+		rc = PTR_ERR(dir);
+		goto failed;
+	}
+	file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
+	if (IS_ERR(file)) {
+		rc = PTR_ERR(file);
+		goto failed;
+	}
+
+	/* physical cpus */
+	dir = hypfs_mkdir(root, "cpus");
+	if (IS_ERR(dir)) {
+		rc = PTR_ERR(dir);
+		goto failed;
+	}
+	file = hypfs_create_u64(dir, "count", data->lcpus);
+	if (IS_ERR(file)) {
+		rc = PTR_ERR(file);
+		goto failed;
+	}
+
+	/* guests */
+	dir = hypfs_mkdir(root, "systems");
+	if (IS_ERR(dir)) {
+		rc = PTR_ERR(dir);
+		goto failed;
+	}
+
+	for (i = 0; i < count; i++) {
+		rc = hypfs_vm_create_guest(dir, &data[i]);
+		if (rc)
+			goto failed;
+	}
+	diag2fc_free(data);
+	return 0;
+
+failed:
+	diag2fc_free(data);
+	return rc;
+}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index ee919bfc8186..0d53483fec34 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -460,45 +460,18 @@ static const struct super_operations hypfs_s_ops = {
 	.show_options	= hypfs_show_options,
 };
 
-static int __init hypfs_init(void)
+int __init __hypfs_fs_init(void)
 {
 	int rc;
 
-	hypfs_dbfs_init();
-
-	if (hypfs_diag_init()) {
-		rc = -ENODATA;
-		goto fail_dbfs_exit;
-	}
-	if (hypfs_vm_init()) {
-		rc = -ENODATA;
-		goto fail_hypfs_diag_exit;
-	}
-	hypfs_sprp_init();
-	if (hypfs_diag0c_init()) {
-		rc = -ENODATA;
-		goto fail_hypfs_sprp_exit;
-	}
 	rc = sysfs_create_mount_point(hypervisor_kobj, "s390");
 	if (rc)
-		goto fail_hypfs_diag0c_exit;
+		return rc;
 	rc = register_filesystem(&hypfs_type);
 	if (rc)
-		goto fail_filesystem;
+		goto fail;
 	return 0;
-
-fail_filesystem:
+fail:
 	sysfs_remove_mount_point(hypervisor_kobj, "s390");
-fail_hypfs_diag0c_exit:
-	hypfs_diag0c_exit();
-fail_hypfs_sprp_exit:
-	hypfs_sprp_exit();
-	hypfs_vm_exit();
-fail_hypfs_diag_exit:
-	hypfs_diag_exit();
-	pr_err("Initialization of hypfs failed with rc=%i\n", rc);
-fail_dbfs_exit:
-	hypfs_dbfs_exit();
 	return rc;
 }
-device_initcall(hypfs_init)

From 1256e70a082ad855efe351146dfa39207d5e3e70 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Thu, 29 Jun 2023 12:02:19 +0200
Subject: [PATCH 0482/2047] s390/ftrace: enable HAVE_FUNCTION_GRAPH_RETVAL

Add support for tracing return values in the function graph tracer.
This requires return_to_handler() to record gpr2 and the frame pointer

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/Kconfig              |  1 +
 arch/s390/include/asm/ftrace.h | 17 +++++++++++++++++
 arch/s390/kernel/asm-offsets.c |  7 +++++++
 arch/s390/kernel/mcount.S      |  8 ++++++--
 4 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index ea0e8f34eb0d..c5be7199067b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -174,6 +174,7 @@ config S390
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_ARG_ACCESS_API
 	select HAVE_FUNCTION_ERROR_INJECTION
+	select HAVE_FUNCTION_GRAPH_RETVAL
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
 	select HAVE_GCC_PLUGINS
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index e5c5cb1207e2..5a82b08f03cd 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -54,6 +54,23 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *
 	return NULL;
 }
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+struct fgraph_ret_regs {
+	unsigned long gpr2;
+	unsigned long fp;
+};
+
+static __always_inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
+{
+	return ret_regs->gpr2;
+}
+
+static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
+{
+	return ret_regs->fp;
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
 static __always_inline unsigned long
 ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs)
 {
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 81cf72088041..150809c9ffa5 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/purgatory.h>
 #include <linux/pgtable.h>
+#include <linux/ftrace.h>
 #include <asm/idle.h>
 #include <asm/gmap.h>
 #include <asm/stacktrace.h>
@@ -177,5 +178,11 @@ int main(void)
 	DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
 	DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
 	DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size));
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/* function graph return value tracing */
+	OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2);
+	OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp);
+	DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs));
+#endif
 	return 0;
 }
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index dbece2803c50..fd27ff9f2cf3 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -128,10 +128,14 @@ SYM_CODE_END(ftrace_common)
 SYM_FUNC_START(return_to_handler)
 	stmg	%r2,%r5,32(%r15)
 	lgr	%r1,%r15
-	aghi	%r15,-STACK_FRAME_OVERHEAD
+	aghi	%r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE)
 	stg	%r1,__SF_BACKCHAIN(%r15)
+	aghik	%r3,%r15,STACK_FRAME_OVERHEAD
+	stg	%r1,__FGRAPH_RET_FP(%r3)
+	stg	%r2,__FGRAPH_RET_GPR2(%r3)
+	lgr	%r2,%r3
 	brasl	%r14,ftrace_return_to_handler
-	aghi	%r15,STACK_FRAME_OVERHEAD
+	aghi	%r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE
 	lgr	%r14,%r2
 	lmg	%r2,%r5,32(%r15)
 	BR_EX	%r14

From b9b4568843bb6ceee85bf1280473f510701c82e2 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Thu, 6 Jul 2023 16:24:11 +0200
Subject: [PATCH 0483/2047] s390/kexec: make machine_kexec() depend on
 CONFIG_KEXEC_CORE

Make machine_kexec.o and relocate_kernel.o depend on
CONFIG_KEXEC_CORE option as other architectures do.

Still generate machine_kexec_reloc.o unconditionally,
since arch_kexec_do_relocs() function is neded by the
decompressor.

Suggested-by: Nathan Chancellor <nathan@kernel.org>
Reported-by: Nathan Chancellor <nathan@kernel.org>
Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/Makefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 8d7514c72bb8..0df2b88cc0da 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -37,9 +37,9 @@ CFLAGS_unwind_bc.o	+= -fno-optimize-sibling-calls
 obj-y	:= head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
-obj-y	+= sysinfo.o lgr.o os_info.o machine_kexec.o
+obj-y	+= sysinfo.o lgr.o os_info.o
 obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
-obj-y	+= entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
+obj-y	+= entry.o reipl.o kdebugfs.o alternative.o
 obj-y	+= nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
 obj-y	+= smp.o text_amode31.o stacktrace.o abs_lowcore.o
 
@@ -63,6 +63,7 @@ obj-$(CONFIG_RETHOOK)		+= rethook.o
 obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_KEXEC_CORE)	+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_UPROBES)		+= uprobes.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
 

From 04b8698ae879b88f96c083292f328bd31b555422 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Tue, 11 Jul 2023 17:59:42 +0200
Subject: [PATCH 0484/2047] s390/dcssblk: use IS_ALIGNED() for alignment checks

Use IS_ALIGNED() instead of cumbersome bit manipulations.

Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/block/dcssblk.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 09acf3853a77..4f157dc1608e 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -868,8 +868,8 @@ dcssblk_submit_bio(struct bio *bio)
 	dev_info = bio->bi_bdev->bd_disk->private_data;
 	if (dev_info == NULL)
 		goto fail;
-	if ((bio->bi_iter.bi_sector & 7) != 0 ||
-	    (bio->bi_iter.bi_size & 4095) != 0)
+	if (!IS_ALIGNED(bio->bi_iter.bi_sector, 8) ||
+	    !IS_ALIGNED(bio->bi_iter.bi_size, PAGE_SIZE))
 		/* Request is not page-aligned. */
 		goto fail;
 	/* verify data transfer direction */
@@ -891,7 +891,8 @@ dcssblk_submit_bio(struct bio *bio)
 	bio_for_each_segment(bvec, bio, iter) {
 		page_addr = (unsigned long)bvec_virt(&bvec);
 		source_addr = dev_info->start + (index<<12) + bytes_done;
-		if (unlikely((page_addr & 4095) != 0) || (bvec.bv_len & 4095) != 0)
+		if (unlikely(!IS_ALIGNED(page_addr, PAGE_SIZE) ||
+			     !IS_ALIGNED(bvec.bv_len, PAGE_SIZE)))
 			// More paranoia.
 			goto fail;
 		if (bio_data_dir(bio) == READ) {

From 3b53d7b131bd79d97dd553af84846fde456e029f Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Tue, 11 Jul 2023 17:41:37 +0200
Subject: [PATCH 0485/2047] s390/dcssblk: fix virtual vs physical address
 confusion

Fix virtual vs physical address confusion (which currently are the same).

Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/block/dcssblk.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 4f157dc1608e..6eafd0a34483 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -860,7 +860,7 @@ dcssblk_submit_bio(struct bio *bio)
 	struct bio_vec bvec;
 	struct bvec_iter iter;
 	unsigned long index;
-	unsigned long page_addr;
+	void *page_addr;
 	unsigned long source_addr;
 	unsigned long bytes_done;
 
@@ -889,19 +889,16 @@ dcssblk_submit_bio(struct bio *bio)
 
 	index = (bio->bi_iter.bi_sector >> 3);
 	bio_for_each_segment(bvec, bio, iter) {
-		page_addr = (unsigned long)bvec_virt(&bvec);
+		page_addr = bvec_virt(&bvec);
 		source_addr = dev_info->start + (index<<12) + bytes_done;
-		if (unlikely(!IS_ALIGNED(page_addr, PAGE_SIZE) ||
+		if (unlikely(!IS_ALIGNED((unsigned long)page_addr, PAGE_SIZE) ||
 			     !IS_ALIGNED(bvec.bv_len, PAGE_SIZE)))
 			// More paranoia.
 			goto fail;
-		if (bio_data_dir(bio) == READ) {
-			memcpy((void*)page_addr, (void*)source_addr,
-				bvec.bv_len);
-		} else {
-			memcpy((void*)source_addr, (void*)page_addr,
-				bvec.bv_len);
-		}
+		if (bio_data_dir(bio) == READ)
+			memcpy(page_addr, __va(source_addr), bvec.bv_len);
+		else
+			memcpy(__va(source_addr), page_addr, bvec.bv_len);
 		bytes_done += bvec.bv_len;
 	}
 	bio_endio(bio);

From 355e30ca1a707526b23a9b016fa4f740e9379c8d Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Sun, 2 Jul 2023 22:29:10 +0200
Subject: [PATCH 0486/2047] s390/mm: remove redundant check against
 VMEM_MAX_PHYS

The value of ident_map_size could never exceed the value of
vmemmap as secured by setup_kernel_memory_layout() function:

	/* make sure identity map doesn't overlay with vmemmap */
	ident_map_size = min(ident_map_size, vmemmap_start);

Since VMEM_MAX_PHYS macro is set to vmemmap and a newly added
range is checked against ident_map_size in add_memory_merged()
function anyway, the check against VMEM_MAX_PHYS is redundant.

Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/char/sclp_cmd.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 3c87057436d5..8b4575a0db9f 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -392,10 +392,6 @@ static void __init add_memory_merged(u16 rn)
 		goto skip_add;
 	start = rn2addr(first_rn);
 	size = (unsigned long long) num * sclp.rzm;
-	if (start >= VMEM_MAX_PHYS)
-		goto skip_add;
-	if (start + size > VMEM_MAX_PHYS)
-		size = VMEM_MAX_PHYS - start;
 	if (start >= ident_map_size)
 		goto skip_add;
 	if (start + size > ident_map_size)

From 94fd522069e124297c094840473f0d9637c3d991 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Sun, 2 Jul 2023 23:11:18 +0200
Subject: [PATCH 0487/2047] s390/mm: rework arch_get_mappable_range() callback

As per description in mm/memory_hotplug.c platforms should define
arch_get_mappable_range() that provides maximum possible addressable
physical memory range for which the linear mapping could be created.

The current implementation uses VMEM_MAX_PHYS macro as the maximum
mappable physical address and it is simply a cast to vmemmap. Since
the address is in physical address space the natural upper limit of
MAX_PHYSMEM_BITS is honoured:

	vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);

Further, to make sure the identity mapping would not overlay with
vmemmap, the size of identity mapping could be stripped like this:

	ident_map_size = min(ident_map_size, vmemmap_start);

Similarily, any other memory that could be added (e.g DCSS segment)
should not overlay with vmemmap as well and that is prevented by
using vmemmap (VMEM_MAX_PHYS macro) as the upper limit.

However, while the use of VMEM_MAX_PHYS brings the desired result
it actually poses two issues:

1. As described, vmemmap is handled as a physical address, although
   it is actually a pointer to struct page in virtual address space.

2. As vmemmap is a virtual address it could have been located
   anywhere in the virtual address space. However, the desired
   necessity to honour MAX_PHYSMEM_BITS limit prevents that.

Rework arch_get_mappable_range() callback in a way it does not
use VMEM_MAX_PHYS macro and does not confuse the notion of virtual
vs physical address spacees as result. That paves the way for moving
vmemmap elsewhere and optimizing the virtual address space layout.

Introduce max_mappable preserved boot variable and let function
setup_kernel_memory_layout() set it up. As result, the rest of the
code is does not need to know the virtual memory layout specifics.

Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/boot/startup.c      | 3 +++
 arch/s390/include/asm/setup.h | 1 +
 arch/s390/kernel/setup.c      | 1 +
 arch/s390/mm/vmem.c           | 2 +-
 4 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 64bd7ac3e35d..59abdd0ab56f 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -27,6 +27,7 @@ struct page *__bootdata_preserved(vmemmap);
 unsigned long __bootdata_preserved(vmemmap_size);
 unsigned long __bootdata_preserved(MODULES_VADDR);
 unsigned long __bootdata_preserved(MODULES_END);
+unsigned long __bootdata_preserved(max_mappable);
 unsigned long __bootdata(ident_map_size);
 
 u64 __bootdata_preserved(stfle_fac_list[16]);
@@ -222,6 +223,8 @@ static unsigned long setup_kernel_memory_layout(void)
 	vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
 	/* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */
 	vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
+	/* maximum mappable address as seen by arch_get_mappable_range() */
+	max_mappable = vmemmap_start;
 	/* make sure identity map doesn't overlay with vmemmap */
 	ident_map_size = min(ident_map_size, vmemmap_start);
 	vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index f191255c60db..e795f425627a 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -74,6 +74,7 @@ extern unsigned int zlib_dfltcc_support;
 
 extern int noexec_disabled;
 extern unsigned long ident_map_size;
+extern unsigned long max_mappable;
 
 /* The Write Back bit position in the physaddr is given by the SLPC PCI */
 extern unsigned long mio_wb_bit_mask;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 00d76448319d..393dd8385506 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -146,6 +146,7 @@ static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
 
 int __bootdata(noexec_disabled);
+unsigned long __bootdata_preserved(max_mappable);
 unsigned long __bootdata(ident_map_size);
 struct physmem_info __bootdata(physmem_info);
 
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index b26649233d12..be69cb2d47eb 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -531,7 +531,7 @@ struct range arch_get_mappable_range(void)
 	struct range mhp_range;
 
 	mhp_range.start = 0;
-	mhp_range.end =  VMEM_MAX_PHYS - 1;
+	mhp_range.end = max_mappable - 1;
 	return mhp_range;
 }
 

From 9916bf4edac6f8f499ce7c42dafb57e242865790 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Wed, 5 Jul 2023 14:17:11 +0200
Subject: [PATCH 0488/2047] s390/extmem: improve reporting of -ERANGE error

Interface segment_warning() reports maximum mappable physical
address for -ERANGE error. Currently that address is the value
of VMEM_MAX_PHYS macro, but that well might change. A better
way to obtain that address is calling arch_get_mappable_range()
callback - one that is used by vmem_add_mapping() and generates
-ERANGE error in the first place.

Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/mm/extmem.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 1bc42ce26599..e41869f5cc95 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -640,10 +640,13 @@ void segment_warning(int rc, char *seg_name)
 		pr_err("There is not enough memory to load or query "
 		       "DCSS %s\n", seg_name);
 		break;
-	case -ERANGE:
-		pr_err("DCSS %s exceeds the kernel mapping range (%lu) "
-		       "and cannot be loaded\n", seg_name, VMEM_MAX_PHYS);
+	case -ERANGE: {
+		struct range mhp_range = arch_get_mappable_range();
+
+		pr_err("DCSS %s exceeds the kernel mapping range (%llu) "
+		       "and cannot be loaded\n", seg_name, mhp_range.end + 1);
 		break;
+	}
 	default:
 		break;
 	}

From e7e828ebeb5d80d42c9ac514db5fb3d33367cf10 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Wed, 5 Jul 2023 15:50:19 +0200
Subject: [PATCH 0489/2047] s390/mm: get rid of VMEM_MAX_PHYS macro

There are no users of VMEM_MAX_PHYS macro left, remove it.

Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/boot/startup.c        | 1 -
 arch/s390/include/asm/pgtable.h | 2 --
 2 files changed, 3 deletions(-)

diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 59abdd0ab56f..a1f792fcc710 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -221,7 +221,6 @@ static unsigned long setup_kernel_memory_layout(void)
 	pages = SECTION_ALIGN_UP(pages);
 	/* keep vmemmap_start aligned to a top level region table entry */
 	vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
-	/* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */
 	vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
 	/* maximum mappable address as seen by arch_get_mappable_range() */
 	max_mappable = vmemmap_start;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index c55f3c3365af..30909fe27c24 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -89,8 +89,6 @@ extern unsigned long __bootdata_preserved(VMALLOC_END);
 extern struct page *__bootdata_preserved(vmemmap);
 extern unsigned long __bootdata_preserved(vmemmap_size);
 
-#define VMEM_MAX_PHYS ((unsigned long) vmemmap)
-
 extern unsigned long __bootdata_preserved(MODULES_VADDR);
 extern unsigned long __bootdata_preserved(MODULES_END);
 #define MODULES_VADDR	MODULES_VADDR

From 5216d853cb154a4866c9984fd41f71583e8bdd39 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Fri, 14 Jul 2023 10:42:56 +0200
Subject: [PATCH 0490/2047] s390/hypfs: stop using ENOSYS error code

ENOSYS should only be returned to userspace when a syscall
is not implemented. The only known user, 'hyptop' is not explicitely
checking for -ENOSYS, therefore use EOPNOTSUPP instead. It is very
unlikely that there are other users, so this change should have no
impact on userspace.

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/hypfs/hypfs_diag.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index ea4c436f86a0..279b7bba4d43 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -67,7 +67,7 @@ void *diag204_get_buffer(enum diag204_format fmt, int *pages)
 		*pages = diag204((unsigned long)DIAG204_SUBC_RSI |
 				 (unsigned long)DIAG204_INFO_EXT, 0, NULL);
 		if (*pages <= 0)
-			return ERR_PTR(-ENOSYS);
+			return ERR_PTR(-EOPNOTSUPP);
 	}
 	diag204_buf = __vmalloc_node(array_size(*pages, PAGE_SIZE),
 				     PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
@@ -127,7 +127,7 @@ static int diag204_probe(void)
 		diag204_set_info_type(DIAG204_INFO_SIMPLE);
 		goto out;
 	} else {
-		rc = -ENOSYS;
+		rc = -EOPNOTSUPP;
 		goto fail_store;
 	}
 out:
@@ -144,7 +144,7 @@ int diag204_store(void *buf, int pages)
 
 	rc = diag204((unsigned long)diag204_store_sc |
 		     (unsigned long)diag204_get_info_type(), pages, buf);
-	return rc < 0 ? -ENOSYS : 0;
+	return rc < 0 ? -EOPNOTSUPP : 0;
 }
 
 struct dbfs_d204_hdr {

From e3123dfb5373939d65ac2b874189a773d37ac7f5 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Mon, 17 Jul 2023 10:14:32 +0200
Subject: [PATCH 0491/2047] s390/tracing: pass struct ftrace_regs to
 ftrace_trace_function

ftrace_trace_function expects a struct ftrace_regs, but the s390
architecure code passes struct pt_regs. This isn't a problem with the
current code because struct ftrace_regs contains only one member:
struct pt_regs. To avoid issues in the future this should be fixed.

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/asm-offsets.c |  2 ++
 arch/s390/kernel/mcount.S      | 57 +++++++++++++++++++---------------
 2 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 150809c9ffa5..fa5f6885c74a 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -184,5 +184,7 @@ int main(void)
 	OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp);
 	DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs));
 #endif
+	OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs);
+	DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs));
 	return 0;
 }
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index fd27ff9f2cf3..d2596e0df6fa 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -12,12 +12,19 @@
 #include <asm/export.h>
 
 
-#define STACK_FRAME_SIZE	(STACK_FRAME_OVERHEAD + __PT_SIZE)
-#define STACK_PTREGS		(STACK_FRAME_OVERHEAD)
-#define STACK_PTREGS_GPRS	(STACK_PTREGS + __PT_GPRS)
-#define STACK_PTREGS_PSW	(STACK_PTREGS + __PT_PSW)
-#define STACK_PTREGS_ORIG_GPR2	(STACK_PTREGS + __PT_ORIG_GPR2)
-#define STACK_PTREGS_FLAGS	(STACK_PTREGS + __PT_FLAGS)
+#define STACK_FRAME_SIZE_PTREGS		(STACK_FRAME_OVERHEAD + __PT_SIZE)
+#define STACK_PTREGS			(STACK_FRAME_OVERHEAD)
+#define STACK_PTREGS_GPRS		(STACK_PTREGS + __PT_GPRS)
+#define STACK_PTREGS_PSW		(STACK_PTREGS + __PT_PSW)
+
+#define STACK_FRAME_SIZE_FREGS		(STACK_FRAME_OVERHEAD + __FTRACE_REGS_SIZE)
+#define STACK_FREGS			(STACK_FRAME_OVERHEAD)
+#define STACK_FREGS_PTREGS		(STACK_FRAME_OVERHEAD + __FTRACE_REGS_PT_REGS)
+#define STACK_FREGS_PTREGS_GPRS		(STACK_FREGS_PTREGS + __PT_GPRS)
+#define STACK_FREGS_PTREGS_PSW		(STACK_FREGS_PTREGS + __PT_PSW)
+#define STACK_FREGS_PTREGS_ORIG_GPR2	(STACK_FREGS_PTREGS + __PT_ORIG_GPR2)
+#define STACK_FREGS_PTREGS_FLAGS	(STACK_FREGS_PTREGS + __PT_FLAGS)
+
 /* packed stack: allocate just enough for r14, r15 and backchain */
 #define TRACED_FUNC_FRAME_SIZE	24
 
@@ -53,23 +60,23 @@ SYM_CODE_END(ftrace_stub_direct_tramp)
 	stg	%r1,__SF_BACKCHAIN(%r15)
 	stg	%r0,(__SF_GPRS+8*8)(%r15)
 	stg	%r15,(__SF_GPRS+9*8)(%r15)
-	# allocate pt_regs and stack frame for ftrace_trace_function
-	aghi	%r15,-STACK_FRAME_SIZE
-	stg	%r1,(STACK_PTREGS_GPRS+15*8)(%r15)
-	xc	STACK_PTREGS_ORIG_GPR2(8,%r15),STACK_PTREGS_ORIG_GPR2(%r15)
+	# allocate ftrace_regs and stack frame for ftrace_trace_function
+	aghi	%r15,-STACK_FRAME_SIZE_FREGS
+	stg	%r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+	xc	STACK_FREGS_PTREGS_ORIG_GPR2(8,%r15),STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
 
 	.if \allregs == 1
-	stg	%r14,(STACK_PTREGS_PSW)(%r15)
-	mvghi	STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
+	stg	%r14,(STACK_FREGS_PTREGS_PSW)(%r15)
+	mvghi	STACK_FREGS_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
 	.else
-	xc	STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15)
+	xc	STACK_FREGS_PTREGS_FLAGS(8,%r15),STACK_FREGS_PTREGS_FLAGS(%r15)
 	.endif
 
 	lg	%r14,(__SF_GPRS+8*8)(%r1)	# restore original return address
 	aghi	%r1,-TRACED_FUNC_FRAME_SIZE
 	stg	%r1,__SF_BACKCHAIN(%r15)
-	stg	%r0,(STACK_PTREGS_PSW+8)(%r15)
-	stmg	%r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
+	stg	%r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+	stmg	%r2,%r14,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
 	.endm
 
 SYM_CODE_START(ftrace_regs_caller)
@@ -96,30 +103,30 @@ SYM_CODE_START(ftrace_common)
 	lg	%r1,0(%r1)
 #endif
 	lgr	%r3,%r14
-	la	%r5,STACK_PTREGS(%r15)
+	la	%r5,STACK_FREGS(%r15)
 	BASR_EX	%r14,%r1
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 # The j instruction gets runtime patched to a nop instruction.
 # See ftrace_enable_ftrace_graph_caller.
 SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL)
 	j	.Lftrace_graph_caller_end
-	lmg	%r2,%r3,(STACK_PTREGS_GPRS+14*8)(%r15)
-	lg	%r4,(STACK_PTREGS_PSW+8)(%r15)
+	lmg	%r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
+	lg	%r4,(STACK_FREGS_PTREGS_PSW+8)(%r15)
 	brasl	%r14,prepare_ftrace_return
-	stg	%r2,(STACK_PTREGS_GPRS+14*8)(%r15)
+	stg	%r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
 .Lftrace_graph_caller_end:
 #endif
-	lg	%r0,(STACK_PTREGS_PSW+8)(%r15)
+	lg	%r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
-	ltg	%r1,STACK_PTREGS_ORIG_GPR2(%r15)
+	ltg	%r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
 	locgrz	%r1,%r0
 #else
-	lg	%r1,STACK_PTREGS_ORIG_GPR2(%r15)
+	lg	%r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
 	ltgr	%r1,%r1
 	jnz	0f
 	lgr	%r1,%r0
 #endif
-0:	lmg	%r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
+0:	lmg	%r2,%r15,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
 	BR_EX	%r1
 SYM_CODE_END(ftrace_common)
 
@@ -164,11 +171,11 @@ SYM_CODE_END(ftrace_shared_hotpatch_trampoline_exrl)
 
 SYM_CODE_START(arch_rethook_trampoline)
 	stg	%r14,(__SF_GPRS+8*8)(%r15)
-	lay	%r15,-STACK_FRAME_SIZE(%r15)
+	lay	%r15,-STACK_FRAME_SIZE_PTREGS(%r15)
 	stmg	%r0,%r14,STACK_PTREGS_GPRS(%r15)
 
 	# store original stack pointer in backchain and pt_regs
-	lay	%r7,STACK_FRAME_SIZE(%r15)
+	lay	%r7,STACK_FRAME_SIZE_PTREGS(%r15)
 	stg	%r7,__SF_BACKCHAIN(%r15)
 	stg	%r7,STACK_PTREGS_GPRS+(15*8)(%r15)
 

From 37002bc6b6039e1491140869c6801e0a2deee43e Mon Sep 17 00:00:00 2001
From: Costa Shulyupin <costa.shul@redhat.com>
Date: Tue, 18 Jul 2023 07:55:02 +0300
Subject: [PATCH 0492/2047] docs: move s390 under arch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

and fix all in-tree references.

Architecture-specific documentation is being moved into Documentation/arch/
as a way of cleaning up the top-level documentation directory and making
the docs hierarchy more closely match the source hierarchy.

Signed-off-by: Costa Shulyupin <costa.shul@redhat.com>
Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
Acked-by: Jonathan Corbet <corbet@lwn.net>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20230718045550.495428-1-costa.shul@redhat.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 Documentation/admin-guide/kernel-parameters.txt   | 4 ++--
 Documentation/arch/index.rst                      | 2 +-
 Documentation/{ => arch}/s390/3270.ChangeLog      | 0
 Documentation/{ => arch}/s390/3270.rst            | 4 ++--
 Documentation/{ => arch}/s390/cds.rst             | 2 +-
 Documentation/{ => arch}/s390/common_io.rst       | 2 +-
 Documentation/{ => arch}/s390/config3270.sh       | 0
 Documentation/{ => arch}/s390/driver-model.rst    | 0
 Documentation/{ => arch}/s390/features.rst        | 0
 Documentation/{ => arch}/s390/index.rst           | 0
 Documentation/{ => arch}/s390/monreader.rst       | 0
 Documentation/{ => arch}/s390/pci.rst             | 2 +-
 Documentation/{ => arch}/s390/qeth.rst            | 0
 Documentation/{ => arch}/s390/s390dbf.rst         | 0
 Documentation/{ => arch}/s390/text_files.rst      | 0
 Documentation/{ => arch}/s390/vfio-ap-locking.rst | 0
 Documentation/{ => arch}/s390/vfio-ap.rst         | 0
 Documentation/{ => arch}/s390/vfio-ccw.rst        | 2 +-
 Documentation/{ => arch}/s390/zfcpdump.rst        | 0
 Documentation/driver-api/s390-drivers.rst         | 4 ++--
 MAINTAINERS                                       | 8 ++++----
 arch/s390/Kconfig                                 | 4 ++--
 arch/s390/include/asm/debug.h                     | 4 ++--
 drivers/s390/char/zcore.c                         | 2 +-
 24 files changed, 20 insertions(+), 20 deletions(-)
 rename Documentation/{ => arch}/s390/3270.ChangeLog (100%)
 rename Documentation/{ => arch}/s390/3270.rst (99%)
 rename Documentation/{ => arch}/s390/cds.rst (99%)
 rename Documentation/{ => arch}/s390/common_io.rst (98%)
 rename Documentation/{ => arch}/s390/config3270.sh (100%)
 rename Documentation/{ => arch}/s390/driver-model.rst (100%)
 rename Documentation/{ => arch}/s390/features.rst (100%)
 rename Documentation/{ => arch}/s390/index.rst (100%)
 rename Documentation/{ => arch}/s390/monreader.rst (100%)
 rename Documentation/{ => arch}/s390/pci.rst (99%)
 rename Documentation/{ => arch}/s390/qeth.rst (100%)
 rename Documentation/{ => arch}/s390/s390dbf.rst (100%)
 rename Documentation/{ => arch}/s390/text_files.rst (100%)
 rename Documentation/{ => arch}/s390/vfio-ap-locking.rst (100%)
 rename Documentation/{ => arch}/s390/vfio-ap.rst (100%)
 rename Documentation/{ => arch}/s390/vfio-ccw.rst (99%)
 rename Documentation/{ => arch}/s390/zfcpdump.rst (100%)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1457995fd41..94b3dc21eec8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -553,7 +553,7 @@
 			others).
 
 	ccw_timeout_log	[S390]
-			See Documentation/s390/common_io.rst for details.
+			See Documentation/arch/s390/common_io.rst for details.
 
 	cgroup_disable=	[KNL] Disable a particular controller or optional feature
 			Format: {name of the controller(s) or feature(s) to disable}
@@ -598,7 +598,7 @@
 			Setting checkreqprot to 1 is deprecated.
 
 	cio_ignore=	[S390]
-			See Documentation/s390/common_io.rst for details.
+			See Documentation/arch/s390/common_io.rst for details.
 
 	clearcpuid=X[,X...] [X86]
 			Disable CPUID feature X for the kernel. See
diff --git a/Documentation/arch/index.rst b/Documentation/arch/index.rst
index 8458b88e9b79..c9a209878cf3 100644
--- a/Documentation/arch/index.rst
+++ b/Documentation/arch/index.rst
@@ -21,7 +21,7 @@ implementation.
    parisc/index
    ../powerpc/index
    ../riscv/index
-   ../s390/index
+   s390/index
    sh/index
    sparc/index
    x86/index
diff --git a/Documentation/s390/3270.ChangeLog b/Documentation/arch/s390/3270.ChangeLog
similarity index 100%
rename from Documentation/s390/3270.ChangeLog
rename to Documentation/arch/s390/3270.ChangeLog
diff --git a/Documentation/s390/3270.rst b/Documentation/arch/s390/3270.rst
similarity index 99%
rename from Documentation/s390/3270.rst
rename to Documentation/arch/s390/3270.rst
index e09e77954238..467eace91473 100644
--- a/Documentation/s390/3270.rst
+++ b/Documentation/arch/s390/3270.rst
@@ -116,7 +116,7 @@ Here are the installation steps in detail:
 	as a 3270, not a 3215.
 
 	5. Run the 3270 configuration script config3270.  It is
-	distributed in this same directory, Documentation/s390, as
+	distributed in this same directory, Documentation/arch/s390, as
 	config3270.sh.  Inspect the output script it produces,
 	/tmp/mkdev3270, and then run that script.  This will create the
 	necessary character special device files and make the necessary
@@ -125,7 +125,7 @@ Here are the installation steps in detail:
 	Then notify /sbin/init that /etc/inittab has changed, by issuing
 	the telinit command with the q operand::
 
-		cd Documentation/s390
+		cd Documentation/arch/s390
 		sh config3270.sh
 		sh /tmp/mkdev3270
 		telinit q
diff --git a/Documentation/s390/cds.rst b/Documentation/arch/s390/cds.rst
similarity index 99%
rename from Documentation/s390/cds.rst
rename to Documentation/arch/s390/cds.rst
index 7006d8209d2e..bcad2a14244a 100644
--- a/Documentation/s390/cds.rst
+++ b/Documentation/arch/s390/cds.rst
@@ -39,7 +39,7 @@ some of them are ESA/390 platform specific.
 
 Note:
   In order to write a driver for S/390, you also need to look into the interface
-  described in Documentation/s390/driver-model.rst.
+  described in Documentation/arch/s390/driver-model.rst.
 
 Note for porting drivers from 2.4:
 
diff --git a/Documentation/s390/common_io.rst b/Documentation/arch/s390/common_io.rst
similarity index 98%
rename from Documentation/s390/common_io.rst
rename to Documentation/arch/s390/common_io.rst
index 846485681ce7..6dcb40cb7145 100644
--- a/Documentation/s390/common_io.rst
+++ b/Documentation/arch/s390/common_io.rst
@@ -136,5 +136,5 @@ debugfs entries
 
   The level of logging can be changed to be more or less verbose by piping to
   /sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
-  documentation on the S/390 debug feature (Documentation/s390/s390dbf.rst)
+  documentation on the S/390 debug feature (Documentation/arch/s390/s390dbf.rst)
   for details.
diff --git a/Documentation/s390/config3270.sh b/Documentation/arch/s390/config3270.sh
similarity index 100%
rename from Documentation/s390/config3270.sh
rename to Documentation/arch/s390/config3270.sh
diff --git a/Documentation/s390/driver-model.rst b/Documentation/arch/s390/driver-model.rst
similarity index 100%
rename from Documentation/s390/driver-model.rst
rename to Documentation/arch/s390/driver-model.rst
diff --git a/Documentation/s390/features.rst b/Documentation/arch/s390/features.rst
similarity index 100%
rename from Documentation/s390/features.rst
rename to Documentation/arch/s390/features.rst
diff --git a/Documentation/s390/index.rst b/Documentation/arch/s390/index.rst
similarity index 100%
rename from Documentation/s390/index.rst
rename to Documentation/arch/s390/index.rst
diff --git a/Documentation/s390/monreader.rst b/Documentation/arch/s390/monreader.rst
similarity index 100%
rename from Documentation/s390/monreader.rst
rename to Documentation/arch/s390/monreader.rst
diff --git a/Documentation/s390/pci.rst b/Documentation/arch/s390/pci.rst
similarity index 99%
rename from Documentation/s390/pci.rst
rename to Documentation/arch/s390/pci.rst
index a1a72a47dc96..d5755484d8e7 100644
--- a/Documentation/s390/pci.rst
+++ b/Documentation/arch/s390/pci.rst
@@ -40,7 +40,7 @@ For example:
   Change the level of logging to be more or less verbose by piping
   a number between 0 and 6 to  /sys/kernel/debug/s390dbf/pci_*/level. For
   details, see the documentation on the S/390 debug feature at
-  Documentation/s390/s390dbf.rst.
+  Documentation/arch/s390/s390dbf.rst.
 
 Sysfs entries
 =============
diff --git a/Documentation/s390/qeth.rst b/Documentation/arch/s390/qeth.rst
similarity index 100%
rename from Documentation/s390/qeth.rst
rename to Documentation/arch/s390/qeth.rst
diff --git a/Documentation/s390/s390dbf.rst b/Documentation/arch/s390/s390dbf.rst
similarity index 100%
rename from Documentation/s390/s390dbf.rst
rename to Documentation/arch/s390/s390dbf.rst
diff --git a/Documentation/s390/text_files.rst b/Documentation/arch/s390/text_files.rst
similarity index 100%
rename from Documentation/s390/text_files.rst
rename to Documentation/arch/s390/text_files.rst
diff --git a/Documentation/s390/vfio-ap-locking.rst b/Documentation/arch/s390/vfio-ap-locking.rst
similarity index 100%
rename from Documentation/s390/vfio-ap-locking.rst
rename to Documentation/arch/s390/vfio-ap-locking.rst
diff --git a/Documentation/s390/vfio-ap.rst b/Documentation/arch/s390/vfio-ap.rst
similarity index 100%
rename from Documentation/s390/vfio-ap.rst
rename to Documentation/arch/s390/vfio-ap.rst
diff --git a/Documentation/s390/vfio-ccw.rst b/Documentation/arch/s390/vfio-ccw.rst
similarity index 99%
rename from Documentation/s390/vfio-ccw.rst
rename to Documentation/arch/s390/vfio-ccw.rst
index 37026fa18179..42960b7b0d70 100644
--- a/Documentation/s390/vfio-ccw.rst
+++ b/Documentation/arch/s390/vfio-ccw.rst
@@ -440,6 +440,6 @@ Reference
 1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
 2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
 3. https://en.wikipedia.org/wiki/Channel_I/O
-4. Documentation/s390/cds.rst
+4. Documentation/arch/s390/cds.rst
 5. Documentation/driver-api/vfio.rst
 6. Documentation/driver-api/vfio-mediated-device.rst
diff --git a/Documentation/s390/zfcpdump.rst b/Documentation/arch/s390/zfcpdump.rst
similarity index 100%
rename from Documentation/s390/zfcpdump.rst
rename to Documentation/arch/s390/zfcpdump.rst
diff --git a/Documentation/driver-api/s390-drivers.rst b/Documentation/driver-api/s390-drivers.rst
index 5158577bc29b..8c0845c4eee7 100644
--- a/Documentation/driver-api/s390-drivers.rst
+++ b/Documentation/driver-api/s390-drivers.rst
@@ -27,7 +27,7 @@ not strictly considered I/O devices. They are considered here as well,
 although they are not the focus of this document.
 
 Some additional information can also be found in the kernel source under
-Documentation/s390/driver-model.rst.
+Documentation/arch/s390/driver-model.rst.
 
 The css bus
 ===========
@@ -38,7 +38,7 @@ into several categories:
 * Standard I/O subchannels, for use by the system. They have a child
   device on the ccw bus and are described below.
 * I/O subchannels bound to the vfio-ccw driver. See
-  Documentation/s390/vfio-ccw.rst.
+  Documentation/arch/s390/vfio-ccw.rst.
 * Message subchannels. No Linux driver currently exists.
 * CHSC subchannels (at most one). The chsc subchannel driver can be used
   to send asynchronous chsc commands.
diff --git a/MAINTAINERS b/MAINTAINERS
index d516295978a4..262736152862 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18596,7 +18596,7 @@ L:	linux-s390@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git
 F:	Documentation/driver-api/s390-drivers.rst
-F:	Documentation/s390/
+F:	Documentation/arch/s390/
 F:	arch/s390/
 F:	drivers/s390/
 F:	drivers/watchdog/diag288_wdt.c
@@ -18657,7 +18657,7 @@ M:	Niklas Schnelle <schnelle@linux.ibm.com>
 M:	Gerald Schaefer <gerald.schaefer@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 S:	Supported
-F:	Documentation/s390/pci.rst
+F:	Documentation/arch/s390/pci.rst
 F:	arch/s390/pci/
 F:	drivers/pci/hotplug/s390_pci_hpc.c
 
@@ -18674,7 +18674,7 @@ M:	Halil Pasic <pasic@linux.ibm.com>
 M:	Jason Herne <jjherne@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 S:	Supported
-F:	Documentation/s390/vfio-ap*
+F:	Documentation/arch/s390/vfio-ap*
 F:	drivers/s390/crypto/vfio_ap*
 
 S390 VFIO-CCW DRIVER
@@ -18684,7 +18684,7 @@ R:	Halil Pasic <pasic@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 L:	kvm@vger.kernel.org
 S:	Supported
-F:	Documentation/s390/vfio-ccw.rst
+F:	Documentation/arch/s390/vfio-ccw.rst
 F:	drivers/s390/cio/vfio_ccw*
 F:	include/uapi/linux/vfio_ccw.h
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c5be7199067b..d9d50a7a2016 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -754,9 +754,9 @@ config CRASH_DUMP
 	  Crash dump kernels are loaded in the main kernel with kexec-tools
 	  into a specially reserved region and then later executed after
 	  a crash by kdump/kexec.
-	  Refer to <file:Documentation/s390/zfcpdump.rst> for more details on this.
+	  Refer to <file:Documentation/arch/s390/zfcpdump.rst> for more details on this.
 	  This option also enables s390 zfcpdump.
-	  See also <file:Documentation/s390/zfcpdump.rst>
+	  See also <file:Documentation/arch/s390/zfcpdump.rst>
 
 endmenu
 
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index ac665b9670c5..ccd4e148b5ed 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -222,7 +222,7 @@ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level,
 
 /*
  * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
- * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
+ * stored in the s390dbf. See Documentation/arch/s390/s390dbf.rst for more details!
  */
 extern debug_entry_t *
 __debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
@@ -350,7 +350,7 @@ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level,
 
 /*
  * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
- * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
+ * stored in the s390dbf. See Documentation/arch/s390/s390dbf.rst for more details!
  */
 extern debug_entry_t *
 __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 942c73a11ca3..bc3be0330f1d 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -3,7 +3,7 @@
  * zcore module to export memory content and register sets for creating system
  * dumps on SCSI/NVMe disks (zfcp/nvme dump).
  *
- * For more information please refer to Documentation/s390/zfcpdump.rst
+ * For more information please refer to Documentation/arch/s390/zfcpdump.rst
  *
  * Copyright IBM Corp. 2003, 2008
  * Author(s): Michael Holzheu

From 305b9f4f7bebc12610035f8cd865a0db87df81b6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sat, 22 Jul 2023 02:13:58 +0900
Subject: [PATCH 0493/2047] s390: use obj-y to descend into drivers/s390/

The single build rule does not work with the drivers-y syntax. [1]

Use the standard obj-y syntax. It moves the objects from drivers/s390/
to slightly lower address, but fixes the reported issue.

[1]: https://lore.kernel.org/linux-kbuild/d57ba55f-20a3-b836-783d-b49c8a161b6e@kernel.org/T/#m27f781ab60acadfed8a9e9642f30d5414a5e2df3

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Tested-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20230721171358.3612099-1-masahiroy@kernel.org
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/Makefile | 1 -
 drivers/Makefile   | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 5ed242897b0d..a53a36ee0731 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -119,7 +119,6 @@ export KBUILD_CFLAGS_DECOMPRESSOR
 OBJCOPYFLAGS	:= -O binary
 
 libs-y		+= arch/s390/lib/
-drivers-y	+= drivers/s390/
 
 boot		:= arch/s390/boot
 syscalls	:= arch/s390/kernel/syscalls
diff --git a/drivers/Makefile b/drivers/Makefile
index 7241d80a7b29..a7459e77df37 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -195,3 +195,5 @@ obj-$(CONFIG_PECI)		+= peci/
 obj-$(CONFIG_HTE)		+= hte/
 obj-$(CONFIG_DRM_ACCEL)		+= accel/
 obj-$(CONFIG_CDX_BUS)		+= cdx/
+
+obj-$(CONFIG_S390)		+= s390/

From 8858babff615ee366bf570c1facfbd0845dd924e Mon Sep 17 00:00:00 2001
From: William Qiu <william.qiu@starfivetech.com>
Date: Mon, 24 Jul 2023 18:10:53 +0800
Subject: [PATCH 0494/2047] dt-bindings: spi: add reference file to YAML

In JH7110 SoC, the spi module needs to use "arm,primecell-periphid" to
override the h/w ID value to  correctly init "spi_dev", so add
"primecell.yaml" as the reference file for YAML.

Signed-off-by: William Qiu <william.qiu@starfivetech.com>
Link: https://lore.kernel.org/r/20230724101054.25268-2-william.qiu@starfivetech.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-pl022.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/spi/spi-pl022.yaml b/Documentation/devicetree/bindings/spi/spi-pl022.yaml
index 91e540a92faf..5e5a704a766e 100644
--- a/Documentation/devicetree/bindings/spi/spi-pl022.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-pl022.yaml
@@ -11,6 +11,7 @@ maintainers:
 
 allOf:
   - $ref: spi-controller.yaml#
+  - $ref: /schemas/arm/primecell.yaml#
 
 # We need a select here so we don't match all nodes with 'arm,primecell'
 select:

From e0606daeaab4fba3cc418a202da81aa7a57c0342 Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Wed, 7 Jun 2023 20:36:00 +0500
Subject: [PATCH 0495/2047] selftests: prctl: Add new prctl test for
 PR_SET_NAME

This patch will add the new test, which covers the prctl call
PR_SET_NAME command. The test tries to give a name using the PR_SET_NAME
call and then confirm it that it changed correctly by using  PR_GET_NAME.
It also tries to rename it with empty name.In the test PR_GET_NAME is
tested by passing null pointer to it and check its behaviour.

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Tested-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/prctl/Makefile        |  2 +-
 .../selftests/prctl/set-process-name.c        | 62 +++++++++++++++++++
 2 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/prctl/set-process-name.c

diff --git a/tools/testing/selftests/prctl/Makefile b/tools/testing/selftests/prctl/Makefile
index c058b81eeb41..cfc35d29fc2e 100644
--- a/tools/testing/selftests/prctl/Makefile
+++ b/tools/testing/selftests/prctl/Makefile
@@ -5,7 +5,7 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
 ifeq ($(ARCH),x86)
 TEST_PROGS := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test \
-		disable-tsc-test set-anon-vma-name-test
+		disable-tsc-test set-anon-vma-name-test set-process-name
 all: $(TEST_PROGS)
 
 include ../lib.mk
diff --git a/tools/testing/selftests/prctl/set-process-name.c b/tools/testing/selftests/prctl/set-process-name.c
new file mode 100644
index 000000000000..3bc5e0e09eb9
--- /dev/null
+++ b/tools/testing/selftests/prctl/set-process-name.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This test covers the PR_SET_NAME functionality of prctl calls
+ */
+
+#include <errno.h>
+#include <sys/prctl.h>
+#include <string.h>
+
+#include "../kselftest_harness.h"
+
+#define CHANGE_NAME "changename"
+#define EMPTY_NAME ""
+#define TASK_COMM_LEN 16
+
+int set_name(char *name)
+{
+	int res;
+
+	res = prctl(PR_SET_NAME, name, NULL, NULL, NULL);
+
+	if (res < 0)
+		return -errno;
+	return res;
+}
+
+int check_is_name_correct(char *check_name)
+{
+	char name[TASK_COMM_LEN];
+	int res;
+
+	res = prctl(PR_GET_NAME, name, NULL, NULL, NULL);
+
+	if (res < 0)
+		return -errno;
+
+	return !strcmp(name, check_name);
+}
+
+int check_null_pointer(char *check_name)
+{
+	char *name = NULL;
+	int res;
+
+	res = prctl(PR_GET_NAME, name, NULL, NULL, NULL);
+
+	return res;
+}
+
+TEST(rename_process) {
+
+	EXPECT_GE(set_name(CHANGE_NAME), 0);
+	EXPECT_TRUE(check_is_name_correct(CHANGE_NAME));
+
+	EXPECT_GE(set_name(EMPTY_NAME), 0);
+	EXPECT_TRUE(check_is_name_correct(EMPTY_NAME));
+
+	EXPECT_GE(set_name(CHANGE_NAME), 0);
+	EXPECT_LT(check_null_pointer(CHANGE_NAME), 0);
+}
+
+TEST_HARNESS_MAIN

From 49aa77165c00e5047b17b0072bbb41cc83a9f197 Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@ew.tq-group.com>
Date: Fri, 21 Jul 2023 13:10:19 +0200
Subject: [PATCH 0496/2047] spi: lpspi: Add power-domains to the DT bindings

i.MX8(X) based SoC use a power domain. Allow supplying this domain in
bindings.

Signed-off-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230721111020.1234278-2-alexander.stein@ew.tq-group.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml b/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml
index e91425012319..727c5346b8ce 100644
--- a/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml
@@ -63,6 +63,9 @@ properties:
     maximum: 2
     default: 1
 
+  power-domains:
+    maxItems: 1
+
 required:
   - compatible
   - reg

From 84dd7f19e72656fac5faf67f1b1809e57e9788cc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 6 Jul 2023 11:48:07 -0700
Subject: [PATCH 0497/2047] checkpatch: Complain about unexpected uses of RCU
 Tasks Trace

RCU Tasks Trace is quite specialized, having been created specifically
for sleepable BPF programs.  Because it allows general blocking within
readers, any new use of RCU Tasks Trace must take current use cases into
account.  Therefore, update checkpatch.pl to complain about use of any of
the RCU Tasks Trace API members outside of BPF and outside of RCU itself.

[ paulmck: Apply Joe Perches feedback. ]

Cc: Andy Whitcroft <apw@canonical.com> (maintainer:CHECKPATCH)
Cc: Joe Perches <joe@perches.com> (maintainer:CHECKPATCH)
Cc: Dwaipayan Ray <dwaipayanray1@gmail.com> (reviewer:CHECKPATCH)
Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: <bpf@vger.kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 scripts/checkpatch.pl | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 880fde13d9b8..a9841148cde2 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -7457,6 +7457,30 @@ sub process {
 			}
 		}
 
+# Complain about RCU Tasks Trace used outside of BPF (and of course, RCU).
+		our $rcu_trace_funcs = qr{(?x:
+			rcu_read_lock_trace |
+			rcu_read_lock_trace_held |
+			rcu_read_unlock_trace |
+			call_rcu_tasks_trace |
+			synchronize_rcu_tasks_trace |
+			rcu_barrier_tasks_trace |
+			rcu_request_urgent_qs_task
+		)};
+		our $rcu_trace_paths = qr{(?x:
+			kernel/bpf/ |
+			include/linux/bpf |
+			net/bpf/ |
+			kernel/rcu/ |
+			include/linux/rcu
+		)};
+		if ($line =~ /\b($rcu_trace_funcs)\s*\(/) {
+			if ($realfile !~ m{^$rcu_trace_paths}) {
+				WARN("RCU_TASKS_TRACE",
+				     "use of RCU tasks trace is incorrect outside BPF or core RCU code\n" . $herecurr);
+			}
+		}
+
 # check for lockdep_set_novalidate_class
 		if ($line =~ /^.\s*lockdep_set_novalidate_class\s*\(/ ||
 		    $line =~ /__lockdep_no_validate__\s*\)/ ) {

From f7f9a0c8736d418af8d981000ea68ea43b395da4 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 15 Jul 2023 23:03:52 -0400
Subject: [PATCH 0498/2047] iov_iter: Map the page later in
 copy_page_from_iter_atomic()

Remove a couple of calls to kunmap_atomic() in the rare error cases.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kent Overstreet <kent.overstreet@linux.dev>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 lib/iov_iter.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index e4dc809d1075..db58af778291 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -569,20 +569,20 @@ EXPORT_SYMBOL(iov_iter_zero);
 size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes,
 				  struct iov_iter *i)
 {
-	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
-	if (!page_copy_sane(page, offset, bytes)) {
-		kunmap_atomic(kaddr);
+	char *p;
+
+	if (!page_copy_sane(page, offset, bytes))
 		return 0;
-	}
-	if (WARN_ON_ONCE(!i->data_source)) {
-		kunmap_atomic(kaddr);
+	if (WARN_ON_ONCE(!i->data_source))
 		return 0;
-	}
+
+	p = kmap_atomic(page) + offset;
 	iterate_and_advance(i, bytes, base, len, off,
 		copyin(p + off, base, len),
 		memcpy_from_iter(i, p + off, base, len)
 	)
-	kunmap_atomic(kaddr);
+	kunmap_atomic(p);
+
 	return bytes;
 }
 EXPORT_SYMBOL(copy_page_from_iter_atomic);

From 908a1ad89466c1febf20bfe0037b84fc66f8a3f8 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 6 Jun 2023 16:43:32 -0400
Subject: [PATCH 0499/2047] iov_iter: Handle compound highmem pages in
 copy_page_from_iter_atomic()

copy_page_from_iter_atomic() already handles !highmem compound
pages correctly, but if we are passed a highmem compound page,
each base page needs to be mapped & unmapped individually.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kent Overstreet <kent.overstreet@linux.dev>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 lib/iov_iter.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index db58af778291..eb3dffb24aae 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -566,24 +566,37 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_zero);
 
-size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes,
-				  struct iov_iter *i)
+size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
+		size_t bytes, struct iov_iter *i)
 {
-	char *p;
+	size_t n, copied = 0;
 
 	if (!page_copy_sane(page, offset, bytes))
 		return 0;
 	if (WARN_ON_ONCE(!i->data_source))
 		return 0;
 
-	p = kmap_atomic(page) + offset;
-	iterate_and_advance(i, bytes, base, len, off,
-		copyin(p + off, base, len),
-		memcpy_from_iter(i, p + off, base, len)
-	)
-	kunmap_atomic(p);
+	do {
+		char *p;
 
-	return bytes;
+		n = bytes - copied;
+		if (PageHighMem(page)) {
+			page += offset / PAGE_SIZE;
+			offset %= PAGE_SIZE;
+			n = min_t(size_t, n, PAGE_SIZE - offset);
+		}
+
+		p = kmap_atomic(page) + offset;
+		iterate_and_advance(i, n, base, len, off,
+			copyin(p + off, base, len),
+			memcpy_from_iter(i, p + off, base, len)
+		)
+		kunmap_atomic(p);
+		copied += n;
+		offset += n;
+	} while (PageHighMem(page) && copied != bytes && n > 0);
+
+	return copied;
 }
 EXPORT_SYMBOL(copy_page_from_iter_atomic);
 

From 1b0306981e0f1e5a9ad3d08b9c88bfe21b68e3b9 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 9 Jul 2023 18:17:33 -0400
Subject: [PATCH 0500/2047] iov_iter: Add copy_folio_from_iter_atomic()

Add a folio wrapper around copy_page_from_iter_atomic().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 include/linux/uio.h | 9 ++++++++-
 lib/iov_iter.c      | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index ff81e5ccaef2..42bce38a8e87 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -163,7 +163,7 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
 	return ret;
 }
 
-size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
+size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
 				  size_t bytes, struct iov_iter *i);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 void iov_iter_revert(struct iov_iter *i, size_t bytes);
@@ -184,6 +184,13 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset,
 {
 	return copy_page_to_iter(&folio->page, offset, bytes, i);
 }
+
+static inline size_t copy_folio_from_iter_atomic(struct folio *folio,
+		size_t offset, size_t bytes, struct iov_iter *i)
+{
+	return copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
+}
+
 size_t copy_page_to_iter_nofault(struct page *page, unsigned offset,
 				 size_t bytes, struct iov_iter *i);
 
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index eb3dffb24aae..424737045b97 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -566,7 +566,7 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_zero);
 
-size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
+size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
 		size_t bytes, struct iov_iter *i)
 {
 	size_t n, copied = 0;

From a221ab717c43147f728d93513923ba3528f861bf Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 2 Jun 2023 18:09:11 -0400
Subject: [PATCH 0501/2047] iomap: Remove large folio handling in
 iomap_invalidate_folio()

We do not need to release the iomap_page in iomap_invalidate_folio()
to allow the folio to be split.  The splitting code will call
->release_folio() if there is still per-fs private data attached to
the folio.  At that point, we will check if the folio is still dirty
and decline to release the iomap_page.  It is possible to trigger the
warning in perfectly legitimate circumstances (eg if a disk read fails,
we do a partial write to the folio, then we truncate the folio), which
will cause those writes to be lost.

Fixes: 60d8231089f0 ("iomap: Support large folios in invalidatepage")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/buffered-io.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index aa8967cca1a3..7d2f70708f37 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -508,11 +508,6 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len)
 		WARN_ON_ONCE(folio_test_writeback(folio));
 		folio_cancel_dirty(folio);
 		iomap_page_release(folio);
-	} else if (folio_test_large(folio)) {
-		/* Must release the iop so the page can be split */
-		WARN_ON_ONCE(!folio_test_uptodate(folio) &&
-			     folio_test_dirty(folio));
-		iomap_page_release(folio);
 	}
 }
 EXPORT_SYMBOL_GPL(iomap_invalidate_folio);

From 32b29cc9db455f69ac792c637aea11493bfaf1bc Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 2 Jun 2023 16:33:20 -0400
Subject: [PATCH 0502/2047] doc: Correct the description of ->release_folio

The filesystem ->release_folio method is called under more circumstances
now than when the documentation was written.  The second sentence
describing the interpretation of the return value is the wrong polarity
(false indicates failure, not success).  And the third sentence is also
wrong (the kernel calls try_to_free_buffers() instead).

So replace the entire paragraph with a detailed description of what the
state of the folio may be, the meaning of the gfp parameter, why the
method is being called and what the filesystem is expected to do.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 Documentation/filesystems/locking.rst | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index ed148919e11a..ca3d24becbf5 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -374,10 +374,17 @@ invalidate_lock before invalidating page cache in truncate / hole punch
 path (and thus calling into ->invalidate_folio) to block races between page
 cache invalidation and page cache filling functions (fault, read, ...).
 
-->release_folio() is called when the kernel is about to try to drop the
-buffers from the folio in preparation for freeing it.  It returns false to
-indicate that the buffers are (or may be) freeable.  If ->release_folio is
-NULL, the kernel assumes that the fs has no private interest in the buffers.
+->release_folio() is called when the MM wants to make a change to the
+folio that would invalidate the filesystem's private data.  For example,
+it may be about to be removed from the address_space or split.  The folio
+is locked and not under writeback.  It may be dirty.  The gfp parameter
+is not usually used for allocation, but rather to indicate what the
+filesystem may do to attempt to free the private data.  The filesystem may
+return false to indicate that the folio's private data cannot be freed.
+If it returns true, it should have already removed the private data from
+the folio.  If a filesystem does not provide a ->release_folio method,
+the pagecache will assume that private data is buffer_heads and call
+try_to_free_buffers().
 
 ->free_folio() is called when the kernel has dropped the folio
 from the page cache.

From 7a8eb01b078f6280089347feee02aebda0024880 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 2 Jun 2023 18:07:48 -0400
Subject: [PATCH 0503/2047] iomap: Remove unnecessary test from
 iomap_release_folio()

The check for the folio being under writeback is unnecessary; the caller
has checked this and the folio is locked, so the folio cannot be under
writeback at this point.

The comment is somewhat misleading in that it talks about one specific
situation in which we can see a dirty folio.  There are others, so change
the comment to explain why we can't release the iomap_page.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 7d2f70708f37..cc640576f9f7 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -483,12 +483,11 @@ bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags)
 			folio_size(folio));
 
 	/*
-	 * mm accommodates an old ext3 case where clean folios might
-	 * not have had the dirty bit cleared.  Thus, it can send actual
-	 * dirty folios to ->release_folio() via shrink_active_list();
-	 * skip those here.
+	 * If the folio is dirty, we refuse to release our metadata because
+	 * it may be partially dirty.  Once we track per-block dirty state,
+	 * we can release the metadata if every block is dirty.
 	 */
-	if (folio_test_dirty(folio) || folio_test_writeback(folio))
+	if (folio_test_dirty(folio))
 		return false;
 	iomap_page_release(folio);
 	return true;

From ffc143db63eeea7c8a27deb3c56d090a220a1ace Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 26 May 2023 16:43:23 -0400
Subject: [PATCH 0504/2047] filemap: Add fgf_t typedef

Similarly to gfp_t, define fgf_t as its own type to prevent various
misuses and confusion.  Leave the flags as FGP_* for now to reduce the
size of this patch; they will be converted to FGF_* later.  Move the
documentation to the definition of the type insted of burying it in the
__filemap_get_folio() documentation.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/btrfs/file.c         |  6 +++---
 fs/f2fs/compress.c      |  2 +-
 fs/f2fs/f2fs.h          |  2 +-
 fs/iomap/buffered-io.c  |  2 +-
 include/linux/pagemap.h | 48 +++++++++++++++++++++++++++++++----------
 mm/filemap.c            | 19 ++--------------
 mm/folio-compat.c       |  2 +-
 7 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fd03e689a6be..3876fae90fc3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -876,9 +876,9 @@ static int prepare_uptodate_page(struct inode *inode,
 	return 0;
 }
 
-static unsigned int get_prepare_fgp_flags(bool nowait)
+static fgf_t get_prepare_fgp_flags(bool nowait)
 {
-	unsigned int fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
+	fgf_t fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
 
 	if (nowait)
 		fgp_flags |= FGP_NOWAIT;
@@ -910,7 +910,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
 	int i;
 	unsigned long index = pos >> PAGE_SHIFT;
 	gfp_t mask = get_prepare_gfp_flags(inode, nowait);
-	unsigned int fgp_flags = get_prepare_fgp_flags(nowait);
+	fgf_t fgp_flags = get_prepare_fgp_flags(nowait);
 	int err = 0;
 	int faili;
 
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 236d890f560b..0f7df9c11af3 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1045,7 +1045,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc,
 	struct address_space *mapping = cc->inode->i_mapping;
 	struct page *page;
 	sector_t last_block_in_bio;
-	unsigned fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT;
+	fgf_t fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT;
 	pgoff_t start_idx = start_idx_of_cluster(cc);
 	int i, ret;
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c7cb2177b252..c275ff2753c2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2736,7 +2736,7 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
 
 static inline struct page *f2fs_pagecache_get_page(
 				struct address_space *mapping, pgoff_t index,
-				int fgp_flags, gfp_t gfp_mask)
+				fgf_t fgp_flags, gfp_t gfp_mask)
 {
 	if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET))
 		return NULL;
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index cc640576f9f7..94a6ae05e03b 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -467,7 +467,7 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
  */
 struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos)
 {
-	unsigned fgp = FGP_WRITEBEGIN | FGP_NOFS;
+	fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS;
 
 	if (iter->flags & IOMAP_NOWAIT)
 		fgp |= FGP_NOWAIT;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 716953ee1ebd..911201fc41fc 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -501,22 +501,48 @@ pgoff_t page_cache_next_miss(struct address_space *mapping,
 pgoff_t page_cache_prev_miss(struct address_space *mapping,
 			     pgoff_t index, unsigned long max_scan);
 
-#define FGP_ACCESSED		0x00000001
-#define FGP_LOCK		0x00000002
-#define FGP_CREAT		0x00000004
-#define FGP_WRITE		0x00000008
-#define FGP_NOFS		0x00000010
-#define FGP_NOWAIT		0x00000020
-#define FGP_FOR_MMAP		0x00000040
-#define FGP_STABLE		0x00000080
+/**
+ * typedef fgf_t - Flags for getting folios from the page cache.
+ *
+ * Most users of the page cache will not need to use these flags;
+ * there are convenience functions such as filemap_get_folio() and
+ * filemap_lock_folio().  For users which need more control over exactly
+ * what is done with the folios, these flags to __filemap_get_folio()
+ * are available.
+ *
+ * * %FGP_ACCESSED - The folio will be marked accessed.
+ * * %FGP_LOCK - The folio is returned locked.
+ * * %FGP_CREAT - If no folio is present then a new folio is allocated,
+ *   added to the page cache and the VM's LRU list.  The folio is
+ *   returned locked.
+ * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
+ *   folio is already in cache.  If the folio was allocated, unlock it
+ *   before returning so the caller can do the same dance.
+ * * %FGP_WRITE - The folio will be written to by the caller.
+ * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
+ * * %FGP_NOWAIT - Don't block on the folio lock.
+ * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
+ * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin()
+ *   implementation.
+ */
+typedef unsigned int __bitwise fgf_t;
+
+#define FGP_ACCESSED		((__force fgf_t)0x00000001)
+#define FGP_LOCK		((__force fgf_t)0x00000002)
+#define FGP_CREAT		((__force fgf_t)0x00000004)
+#define FGP_WRITE		((__force fgf_t)0x00000008)
+#define FGP_NOFS		((__force fgf_t)0x00000010)
+#define FGP_NOWAIT		((__force fgf_t)0x00000020)
+#define FGP_FOR_MMAP		((__force fgf_t)0x00000040)
+#define FGP_STABLE		((__force fgf_t)0x00000080)
 
 #define FGP_WRITEBEGIN		(FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
 
 void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
-		int fgp_flags, gfp_t gfp);
+		fgf_t fgp_flags, gfp_t gfp);
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
-		int fgp_flags, gfp_t gfp);
+		fgf_t fgp_flags, gfp_t gfp);
 
 /**
  * filemap_get_folio - Find and get a folio.
@@ -590,7 +616,7 @@ static inline struct page *find_get_page(struct address_space *mapping,
 }
 
 static inline struct page *find_get_page_flags(struct address_space *mapping,
-					pgoff_t offset, int fgp_flags)
+					pgoff_t offset, fgf_t fgp_flags)
 {
 	return pagecache_get_page(mapping, offset, fgp_flags, 0);
 }
diff --git a/mm/filemap.c b/mm/filemap.c
index 9e44a49bbd74..8a669fecfd1c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1855,30 +1855,15 @@ out:
  *
  * Looks up the page cache entry at @mapping & @index.
  *
- * @fgp_flags can be zero or more of these flags:
- *
- * * %FGP_ACCESSED - The folio will be marked accessed.
- * * %FGP_LOCK - The folio is returned locked.
- * * %FGP_CREAT - If no page is present then a new page is allocated using
- *   @gfp and added to the page cache and the VM's LRU list.
- *   The page is returned locked and with an increased refcount.
- * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
- *   page is already in cache.  If the page was allocated, unlock it before
- *   returning so the caller can do the same dance.
- * * %FGP_WRITE - The page will be written to by the caller.
- * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
- * * %FGP_NOWAIT - Don't get blocked by page lock.
- * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
- *
  * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even
  * if the %GFP flags specified for %FGP_CREAT are atomic.
  *
- * If there is a page cache page, it is returned with an increased refcount.
+ * If this function returns a folio, it is returned with an increased refcount.
  *
  * Return: The found folio or an ERR_PTR() otherwise.
  */
 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
-		int fgp_flags, gfp_t gfp)
+		fgf_t fgp_flags, gfp_t gfp)
 {
 	struct folio *folio;
 
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index c6f056c20503..10c3247542cb 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -92,7 +92,7 @@ EXPORT_SYMBOL(add_to_page_cache_lru);
 
 noinline
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
-		int fgp_flags, gfp_t gfp)
+		fgf_t fgp_flags, gfp_t gfp)
 {
 	struct folio *folio;
 

From 4f66170119107f1452d2438ba4606e105e9e3afe Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 19 May 2023 16:10:37 -0400
Subject: [PATCH 0505/2047] filemap: Allow __filemap_get_folio to allocate
 large folios

Allow callers of __filemap_get_folio() to specify a preferred folio
order in the FGP flags.  This is only honoured in the FGP_CREATE path;
if there is already a folio in the page cache that covers the index,
we will return it, no matter what its order is.  No create-around is
attempted; we will only create folios which start at the specified index.
Unmodified callers will continue to allocate order 0 folios.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 include/linux/pagemap.h | 34 ++++++++++++++++++++++++++++++
 mm/filemap.c            | 46 +++++++++++++++++++++++++++++------------
 mm/readahead.c          | 13 ------------
 3 files changed, 67 insertions(+), 26 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 911201fc41fc..d87840acbfb2 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -470,6 +470,19 @@ static inline void *detach_page_private(struct page *page)
 	return folio_detach_private(page_folio(page));
 }
 
+/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size.  I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
+#else
+#define MAX_PAGECACHE_ORDER	8
+#endif
+
 #ifdef CONFIG_NUMA
 struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
 #else
@@ -535,9 +548,30 @@ typedef unsigned int __bitwise fgf_t;
 #define FGP_NOWAIT		((__force fgf_t)0x00000020)
 #define FGP_FOR_MMAP		((__force fgf_t)0x00000040)
 #define FGP_STABLE		((__force fgf_t)0x00000080)
+#define FGF_GET_ORDER(fgf)	(((__force unsigned)fgf) >> 26)	/* top 6 bits */
 
 #define FGP_WRITEBEGIN		(FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
 
+/**
+ * fgf_set_order - Encode a length in the fgf_t flags.
+ * @size: The suggested size of the folio to create.
+ *
+ * The caller of __filemap_get_folio() can use this to suggest a preferred
+ * size for the folio that is created.  If there is already a folio at
+ * the index, it will be returned, no matter what its size.  If a folio
+ * is freshly created, it may be of a different size than requested
+ * due to alignment constraints, memory pressure, or the presence of
+ * other folios at nearby indices.
+ */
+static inline fgf_t fgf_set_order(size_t size)
+{
+	unsigned int shift = ilog2(size);
+
+	if (shift <= PAGE_SHIFT)
+		return 0;
+	return (__force fgf_t)((shift - PAGE_SHIFT) << 26);
+}
+
 void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 		fgf_t fgp_flags, gfp_t gfp);
diff --git a/mm/filemap.c b/mm/filemap.c
index 8a669fecfd1c..baafbf324c9f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1905,7 +1905,9 @@ repeat:
 		folio_wait_stable(folio);
 no_page:
 	if (!folio && (fgp_flags & FGP_CREAT)) {
+		unsigned order = FGF_GET_ORDER(fgp_flags);
 		int err;
+
 		if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
 			gfp |= __GFP_WRITE;
 		if (fgp_flags & FGP_NOFS)
@@ -1914,26 +1916,44 @@ no_page:
 			gfp &= ~GFP_KERNEL;
 			gfp |= GFP_NOWAIT | __GFP_NOWARN;
 		}
-
-		folio = filemap_alloc_folio(gfp, 0);
-		if (!folio)
-			return ERR_PTR(-ENOMEM);
-
 		if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
 			fgp_flags |= FGP_LOCK;
 
-		/* Init accessed so avoid atomic mark_page_accessed later */
-		if (fgp_flags & FGP_ACCESSED)
-			__folio_set_referenced(folio);
+		if (!mapping_large_folio_support(mapping))
+			order = 0;
+		if (order > MAX_PAGECACHE_ORDER)
+			order = MAX_PAGECACHE_ORDER;
+		/* If we're not aligned, allocate a smaller folio */
+		if (index & ((1UL << order) - 1))
+			order = __ffs(index);
 
-		err = filemap_add_folio(mapping, folio, index, gfp);
-		if (unlikely(err)) {
+		do {
+			gfp_t alloc_gfp = gfp;
+
+			err = -ENOMEM;
+			if (order == 1)
+				order = 0;
+			if (order > 0)
+				alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
+			folio = filemap_alloc_folio(alloc_gfp, order);
+			if (!folio)
+				continue;
+
+			/* Init accessed so avoid atomic mark_page_accessed later */
+			if (fgp_flags & FGP_ACCESSED)
+				__folio_set_referenced(folio);
+
+			err = filemap_add_folio(mapping, folio, index, gfp);
+			if (!err)
+				break;
 			folio_put(folio);
 			folio = NULL;
-			if (err == -EEXIST)
-				goto repeat;
-		}
+		} while (order-- > 0);
 
+		if (err == -EEXIST)
+			goto repeat;
+		if (err)
+			return ERR_PTR(err);
 		/*
 		 * filemap_add_folio locks the page, and for mmap
 		 * we expect an unlocked page.
diff --git a/mm/readahead.c b/mm/readahead.c
index a9c999aa19af..e815c114de21 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -461,19 +461,6 @@ static int try_context_readahead(struct address_space *mapping,
 	return 1;
 }
 
-/*
- * There are some parts of the kernel which assume that PMD entries
- * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
- * limit the maximum allocation order to PMD size.  I'm not aware of any
- * assumptions about maximum order if THP are disabled, but 8 seems like
- * a good order (that's 1MB if you're using 4kB pages)
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
-#else
-#define MAX_PAGECACHE_ORDER	8
-#endif
-
 static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
 		pgoff_t mark, unsigned int order, gfp_t gfp)
 {

From d6bb59a9444d218dc60dee3b45fd93c0d4f5e123 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 19 May 2023 16:18:05 -0400
Subject: [PATCH 0506/2047] iomap: Create large folios in the buffered write
 path

Use the size of the write as a hint for the size of the folio to create.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/gfs2/bmap.c         | 2 +-
 fs/iomap/buffered-io.c | 6 ++++--
 include/linux/iomap.h  | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8d611fbcf0bd..521d1e00a0ff 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -971,7 +971,7 @@ gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len)
 	if (status)
 		return ERR_PTR(status);
 
-	folio = iomap_get_folio(iter, pos);
+	folio = iomap_get_folio(iter, pos, len);
 	if (IS_ERR(folio))
 		gfs2_trans_end(sdp);
 	return folio;
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 94a6ae05e03b..00e0e94c953e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -461,16 +461,18 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
  * iomap_get_folio - get a folio reference for writing
  * @iter: iteration structure
  * @pos: start offset of write
+ * @len: Suggested size of folio to create.
  *
  * Returns a locked reference to the folio at @pos, or an error pointer if the
  * folio could not be obtained.
  */
-struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos)
+struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len)
 {
 	fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS;
 
 	if (iter->flags & IOMAP_NOWAIT)
 		fgp |= FGP_NOWAIT;
+	fgp |= fgf_set_order(len);
 
 	return __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
 			fgp, mapping_gfp_mask(iter->inode->i_mapping));
@@ -597,7 +599,7 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos,
 	if (folio_ops && folio_ops->get_folio)
 		return folio_ops->get_folio(iter, pos, len);
 	else
-		return iomap_get_folio(iter, pos);
+		return iomap_get_folio(iter, pos, len);
 }
 
 static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index e2b836c2e119..80facb9c9e5b 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -261,7 +261,7 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
 int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
 void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
 bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
-struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos);
+struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
 bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
 void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len);
 int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,

From 5d8edfb900d55db7dbf2dbf325bfb9fdb863ec72 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 20 May 2023 12:13:20 -0400
Subject: [PATCH 0507/2047] iomap: Copy larger chunks from userspace

If we have a large folio, we can copy in larger chunks than PAGE_SIZE.
Start at the maximum page cache size and shrink by half every time we
hit the "we are short on memory" problem.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 00e0e94c953e..608cd2e5a7c9 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -769,6 +769,7 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
 static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 {
 	loff_t length = iomap_length(iter);
+	size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
 	loff_t pos = iter->pos;
 	ssize_t written = 0;
 	long status = 0;
@@ -777,15 +778,12 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 
 	do {
 		struct folio *folio;
-		struct page *page;
-		unsigned long offset;	/* Offset into pagecache page */
-		unsigned long bytes;	/* Bytes to write to page */
+		size_t offset;		/* Offset into folio */
+		size_t bytes;		/* Bytes to write to folio */
 		size_t copied;		/* Bytes copied from user */
 
-		offset = offset_in_page(pos);
-		bytes = min_t(unsigned long, PAGE_SIZE - offset,
-						iov_iter_count(i));
-again:
+		offset = pos & (chunk - 1);
+		bytes = min(chunk - offset, iov_iter_count(i));
 		status = balance_dirty_pages_ratelimited_flags(mapping,
 							       bdp_flags);
 		if (unlikely(status))
@@ -815,12 +813,14 @@ again:
 		if (iter->iomap.flags & IOMAP_F_STALE)
 			break;
 
-		page = folio_file_page(folio, pos >> PAGE_SHIFT);
+		offset = offset_in_folio(folio, pos);
+		if (bytes > folio_size(folio) - offset)
+			bytes = folio_size(folio) - offset;
+
 		if (mapping_writably_mapped(mapping))
-			flush_dcache_page(page);
-
-		copied = copy_page_from_iter_atomic(page, offset, bytes, i);
+			flush_dcache_folio(folio);
 
+		copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
 		status = iomap_write_end(iter, pos, bytes, copied, folio);
 
 		if (unlikely(copied != status))
@@ -836,11 +836,13 @@ again:
 			 */
 			if (copied)
 				bytes = copied;
-			goto again;
+			if (chunk > PAGE_SIZE)
+				chunk /= 2;
+		} else {
+			pos += status;
+			written += status;
+			length -= status;
 		}
-		pos += status;
-		written += status;
-		length -= status;
 	} while (iov_iter_count(i) && length);
 
 	if (status == -EAGAIN) {

From 0979e7e3cfd7b479c34dc0f7217beb0c30f8028a Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Mon, 24 Jul 2023 16:12:47 +0800
Subject: [PATCH 0508/2047] MAINTAINERS: Add drivers/firmware/google/ entry

These are mostly used for Chrome platforms, so group it in with the same
mailing list, repo, and (one) committer.

Signed-off-by: Brian Norris <briannorris@chromium.org>
Acked-by: Julius Werner <jwerner@chromium.org>
Acked-By: Benson Leung <bleung@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Acked-by: Guenter Roeck <groeck@chromium.org>
Link: https://lore.kernel.org/r/20230724081247.678784-1-tzungbi@kernel.org
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 MAINTAINERS | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3be1bdfe8ecc..8293a4ab82d8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8758,6 +8758,15 @@ S:	Supported
 F:	Documentation/networking/device_drivers/ethernet/google/gve.rst
 F:	drivers/net/ethernet/google
 
+GOOGLE FIRMWARE DRIVERS
+M:	Tzung-Bi Shih <tzungbi@kernel.org>
+R:	Brian Norris <briannorris@chromium.org>
+R:	Julius Werner <jwerner@chromium.org>
+L:	chrome-platform@lists.linux.dev
+S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/chrome-platform/linux.git
+F:	drivers/firmware/google/
+
 GPD POCKET FAN DRIVER
 M:	Hans de Goede <hdegoede@redhat.com>
 L:	platform-driver-x86@vger.kernel.org

From c2add32ce452e216f5b80269fe73d11be1e2c9a5 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Thu, 20 Jul 2023 11:10:47 +0530
Subject: [PATCH 0509/2047] dt-bindings: opp: Increase maxItems for opp-hz
 property

Current limit of 16 will be exhausted by platforms specifying the frequency
for 9 clocks using opp-hz, like Qcom SDM845 SoC. For instance, specifying
the frequency for 9 clocks with 64bit specifier as below would consume
(9 * 2 = 18) items.

	opp-50000000 {
		opp-hz = /bits/ 64 <50000000>,
			 /bits/ 64 <0>,
			 /bits/ 64 <0>,
			 /bits/ 64 <37500000>,
			 /bits/ 64 <0>,
			 /bits/ 64 <0>,
			 /bits/ 64 <0>,
			 /bits/ 64 <0>,
			 /bits/ 64 <75000000>;
	};

So let's increase the limit to 32 which should be enough for most platforms
(hopefully).

Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 Documentation/devicetree/bindings/opp/opp-v2-base.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml
index 47e6f36b7637..e2f8f7af3cf4 100644
--- a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml
+++ b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml
@@ -56,7 +56,7 @@ patternProperties:
           need to be configured and that is left for the implementation
           specific binding.
         minItems: 1
-        maxItems: 16
+        maxItems: 32
         items:
           maxItems: 1
 

From 04f52c4e6f8001e2d81006027dac08badddafde8 Mon Sep 17 00:00:00 2001
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Date: Mon, 10 Jul 2023 14:11:19 -0700
Subject: [PATCH 0510/2047] iomap: Rename iomap_page to iomap_folio_state and
 others

struct iomap_page actually tracks per-block state of a folio.
Hence it make sense to rename some of these function names and data
structures for e.g.
1. struct iomap_page (iop) -> struct iomap_folio_state (ifs)
2. iomap_page_create() -> ifs_alloc()
3. iomap_page_release() -> ifs_free()
4. iomap_iop_set_range_uptodate() -> ifs_set_range_uptodate()
5. to_iomap_page() -> folio->private

Since in later patches we are also going to add per-block dirty state
tracking to iomap_folio_state. Hence this patch also renames "uptodate"
& "uptodate_lock" members of iomap_folio_state to "state" and"state_lock".

We don't really need to_iomap_page() function, instead directly open code
it as folio->private;

Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 147 ++++++++++++++++++++---------------------
 1 file changed, 70 insertions(+), 77 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 608cd2e5a7c9..0ae34b5ba441 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -24,64 +24,57 @@
 #define IOEND_BATCH_SIZE	4096
 
 /*
- * Structure allocated for each folio when block size < folio size
- * to track sub-folio uptodate status and I/O completions.
+ * Structure allocated for each folio to track per-block uptodate state
+ * and I/O completions.
  */
-struct iomap_page {
+struct iomap_folio_state {
 	atomic_t		read_bytes_pending;
 	atomic_t		write_bytes_pending;
-	spinlock_t		uptodate_lock;
-	unsigned long		uptodate[];
+	spinlock_t		state_lock;
+	unsigned long		state[];
 };
 
-static inline struct iomap_page *to_iomap_page(struct folio *folio)
-{
-	if (folio_test_private(folio))
-		return folio_get_private(folio);
-	return NULL;
-}
-
 static struct bio_set iomap_ioend_bioset;
 
-static struct iomap_page *
-iomap_page_create(struct inode *inode, struct folio *folio, unsigned int flags)
+static struct iomap_folio_state *ifs_alloc(struct inode *inode,
+		struct folio *folio, unsigned int flags)
 {
-	struct iomap_page *iop = to_iomap_page(folio);
+	struct iomap_folio_state *ifs = folio->private;
 	unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
 	gfp_t gfp;
 
-	if (iop || nr_blocks <= 1)
-		return iop;
+	if (ifs || nr_blocks <= 1)
+		return ifs;
 
 	if (flags & IOMAP_NOWAIT)
 		gfp = GFP_NOWAIT;
 	else
 		gfp = GFP_NOFS | __GFP_NOFAIL;
 
-	iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
+	ifs = kzalloc(struct_size(ifs, state, BITS_TO_LONGS(nr_blocks)),
 		      gfp);
-	if (iop) {
-		spin_lock_init(&iop->uptodate_lock);
+	if (ifs) {
+		spin_lock_init(&ifs->state_lock);
 		if (folio_test_uptodate(folio))
-			bitmap_fill(iop->uptodate, nr_blocks);
-		folio_attach_private(folio, iop);
+			bitmap_fill(ifs->state, nr_blocks);
+		folio_attach_private(folio, ifs);
 	}
-	return iop;
+	return ifs;
 }
 
-static void iomap_page_release(struct folio *folio)
+static void ifs_free(struct folio *folio)
 {
-	struct iomap_page *iop = folio_detach_private(folio);
+	struct iomap_folio_state *ifs = folio_detach_private(folio);
 	struct inode *inode = folio->mapping->host;
 	unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
 
-	if (!iop)
+	if (!ifs)
 		return;
-	WARN_ON_ONCE(atomic_read(&iop->read_bytes_pending));
-	WARN_ON_ONCE(atomic_read(&iop->write_bytes_pending));
-	WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) !=
+	WARN_ON_ONCE(atomic_read(&ifs->read_bytes_pending));
+	WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending));
+	WARN_ON_ONCE(bitmap_full(ifs->state, nr_blocks) !=
 			folio_test_uptodate(folio));
-	kfree(iop);
+	kfree(ifs);
 }
 
 /*
@@ -90,7 +83,7 @@ static void iomap_page_release(struct folio *folio)
 static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
 		loff_t *pos, loff_t length, size_t *offp, size_t *lenp)
 {
-	struct iomap_page *iop = to_iomap_page(folio);
+	struct iomap_folio_state *ifs = folio->private;
 	loff_t orig_pos = *pos;
 	loff_t isize = i_size_read(inode);
 	unsigned block_bits = inode->i_blkbits;
@@ -105,12 +98,12 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
 	 * per-block uptodate status and adjust the offset and length if needed
 	 * to avoid reading in already uptodate ranges.
 	 */
-	if (iop) {
+	if (ifs) {
 		unsigned int i;
 
 		/* move forward for each leading block marked uptodate */
 		for (i = first; i <= last; i++) {
-			if (!test_bit(i, iop->uptodate))
+			if (!test_bit(i, ifs->state))
 				break;
 			*pos += block_size;
 			poff += block_size;
@@ -120,7 +113,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
 
 		/* truncate len if we find any trailing uptodate block(s) */
 		for ( ; i <= last; i++) {
-			if (test_bit(i, iop->uptodate)) {
+			if (test_bit(i, ifs->state)) {
 				plen -= (last - i + 1) * block_size;
 				last = i - 1;
 				break;
@@ -144,26 +137,26 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
 	*lenp = plen;
 }
 
-static void iomap_iop_set_range_uptodate(struct folio *folio,
-		struct iomap_page *iop, size_t off, size_t len)
+static void ifs_set_range_uptodate(struct folio *folio,
+		struct iomap_folio_state *ifs, size_t off, size_t len)
 {
 	struct inode *inode = folio->mapping->host;
 	unsigned first = off >> inode->i_blkbits;
 	unsigned last = (off + len - 1) >> inode->i_blkbits;
 	unsigned long flags;
 
-	spin_lock_irqsave(&iop->uptodate_lock, flags);
-	bitmap_set(iop->uptodate, first, last - first + 1);
-	if (bitmap_full(iop->uptodate, i_blocks_per_folio(inode, folio)))
+	spin_lock_irqsave(&ifs->state_lock, flags);
+	bitmap_set(ifs->state, first, last - first + 1);
+	if (bitmap_full(ifs->state, i_blocks_per_folio(inode, folio)))
 		folio_mark_uptodate(folio);
-	spin_unlock_irqrestore(&iop->uptodate_lock, flags);
+	spin_unlock_irqrestore(&ifs->state_lock, flags);
 }
 
 static void iomap_set_range_uptodate(struct folio *folio,
-		struct iomap_page *iop, size_t off, size_t len)
+		struct iomap_folio_state *ifs, size_t off, size_t len)
 {
-	if (iop)
-		iomap_iop_set_range_uptodate(folio, iop, off, len);
+	if (ifs)
+		ifs_set_range_uptodate(folio, ifs, off, len);
 	else
 		folio_mark_uptodate(folio);
 }
@@ -171,16 +164,16 @@ static void iomap_set_range_uptodate(struct folio *folio,
 static void iomap_finish_folio_read(struct folio *folio, size_t offset,
 		size_t len, int error)
 {
-	struct iomap_page *iop = to_iomap_page(folio);
+	struct iomap_folio_state *ifs = folio->private;
 
 	if (unlikely(error)) {
 		folio_clear_uptodate(folio);
 		folio_set_error(folio);
 	} else {
-		iomap_set_range_uptodate(folio, iop, offset, len);
+		iomap_set_range_uptodate(folio, ifs, offset, len);
 	}
 
-	if (!iop || atomic_sub_and_test(len, &iop->read_bytes_pending))
+	if (!ifs || atomic_sub_and_test(len, &ifs->read_bytes_pending))
 		folio_unlock(folio);
 }
 
@@ -213,7 +206,7 @@ struct iomap_readpage_ctx {
 static int iomap_read_inline_data(const struct iomap_iter *iter,
 		struct folio *folio)
 {
-	struct iomap_page *iop;
+	struct iomap_folio_state *ifs;
 	const struct iomap *iomap = iomap_iter_srcmap(iter);
 	size_t size = i_size_read(iter->inode) - iomap->offset;
 	size_t poff = offset_in_page(iomap->offset);
@@ -231,15 +224,15 @@ static int iomap_read_inline_data(const struct iomap_iter *iter,
 	if (WARN_ON_ONCE(size > iomap->length))
 		return -EIO;
 	if (offset > 0)
-		iop = iomap_page_create(iter->inode, folio, iter->flags);
+		ifs = ifs_alloc(iter->inode, folio, iter->flags);
 	else
-		iop = to_iomap_page(folio);
+		ifs = folio->private;
 
 	addr = kmap_local_folio(folio, offset);
 	memcpy(addr, iomap->inline_data, size);
 	memset(addr + size, 0, PAGE_SIZE - poff - size);
 	kunmap_local(addr);
-	iomap_set_range_uptodate(folio, iop, offset, PAGE_SIZE - poff);
+	iomap_set_range_uptodate(folio, ifs, offset, PAGE_SIZE - poff);
 	return 0;
 }
 
@@ -260,7 +253,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
 	loff_t pos = iter->pos + offset;
 	loff_t length = iomap_length(iter) - offset;
 	struct folio *folio = ctx->cur_folio;
-	struct iomap_page *iop;
+	struct iomap_folio_state *ifs;
 	loff_t orig_pos = pos;
 	size_t poff, plen;
 	sector_t sector;
@@ -269,20 +262,20 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
 		return iomap_read_inline_data(iter, folio);
 
 	/* zero post-eof blocks as the page may be mapped */
-	iop = iomap_page_create(iter->inode, folio, iter->flags);
+	ifs = ifs_alloc(iter->inode, folio, iter->flags);
 	iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen);
 	if (plen == 0)
 		goto done;
 
 	if (iomap_block_needs_zeroing(iter, pos)) {
 		folio_zero_range(folio, poff, plen);
-		iomap_set_range_uptodate(folio, iop, poff, plen);
+		iomap_set_range_uptodate(folio, ifs, poff, plen);
 		goto done;
 	}
 
 	ctx->cur_folio_in_bio = true;
-	if (iop)
-		atomic_add(plen, &iop->read_bytes_pending);
+	if (ifs)
+		atomic_add(plen, &ifs->read_bytes_pending);
 
 	sector = iomap_sector(iomap, pos);
 	if (!ctx->bio ||
@@ -436,11 +429,11 @@ EXPORT_SYMBOL_GPL(iomap_readahead);
  */
 bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
 {
-	struct iomap_page *iop = to_iomap_page(folio);
+	struct iomap_folio_state *ifs = folio->private;
 	struct inode *inode = folio->mapping->host;
 	unsigned first, last, i;
 
-	if (!iop)
+	if (!ifs)
 		return false;
 
 	/* Caller's range may extend past the end of this folio */
@@ -451,7 +444,7 @@ bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
 	last = (from + count - 1) >> inode->i_blkbits;
 
 	for (i = first; i <= last; i++)
-		if (!test_bit(i, iop->uptodate))
+		if (!test_bit(i, ifs->state))
 			return false;
 	return true;
 }
@@ -491,7 +484,7 @@ bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags)
 	 */
 	if (folio_test_dirty(folio))
 		return false;
-	iomap_page_release(folio);
+	ifs_free(folio);
 	return true;
 }
 EXPORT_SYMBOL_GPL(iomap_release_folio);
@@ -508,7 +501,7 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len)
 	if (offset == 0 && len == folio_size(folio)) {
 		WARN_ON_ONCE(folio_test_writeback(folio));
 		folio_cancel_dirty(folio);
-		iomap_page_release(folio);
+		ifs_free(folio);
 	}
 }
 EXPORT_SYMBOL_GPL(iomap_invalidate_folio);
@@ -543,7 +536,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
 		size_t len, struct folio *folio)
 {
 	const struct iomap *srcmap = iomap_iter_srcmap(iter);
-	struct iomap_page *iop;
+	struct iomap_folio_state *ifs;
 	loff_t block_size = i_blocksize(iter->inode);
 	loff_t block_start = round_down(pos, block_size);
 	loff_t block_end = round_up(pos + len, block_size);
@@ -555,8 +548,8 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
 		return 0;
 	folio_clear_error(folio);
 
-	iop = iomap_page_create(iter->inode, folio, iter->flags);
-	if ((iter->flags & IOMAP_NOWAIT) && !iop && nr_blocks > 1)
+	ifs = ifs_alloc(iter->inode, folio, iter->flags);
+	if ((iter->flags & IOMAP_NOWAIT) && !ifs && nr_blocks > 1)
 		return -EAGAIN;
 
 	do {
@@ -585,7 +578,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
 			if (status)
 				return status;
 		}
-		iomap_set_range_uptodate(folio, iop, poff, plen);
+		iomap_set_range_uptodate(folio, ifs, poff, plen);
 	} while ((block_start += plen) < block_end);
 
 	return 0;
@@ -692,7 +685,7 @@ out_unlock:
 static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
 		size_t copied, struct folio *folio)
 {
-	struct iomap_page *iop = to_iomap_page(folio);
+	struct iomap_folio_state *ifs = folio->private;
 	flush_dcache_folio(folio);
 
 	/*
@@ -708,7 +701,7 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
 	 */
 	if (unlikely(copied < len && !folio_test_uptodate(folio)))
 		return 0;
-	iomap_set_range_uptodate(folio, iop, offset_in_folio(folio, pos), len);
+	iomap_set_range_uptodate(folio, ifs, offset_in_folio(folio, pos), len);
 	filemap_dirty_folio(inode->i_mapping, folio);
 	return copied;
 }
@@ -1291,17 +1284,17 @@ EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
 static void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
 		size_t len, int error)
 {
-	struct iomap_page *iop = to_iomap_page(folio);
+	struct iomap_folio_state *ifs = folio->private;
 
 	if (error) {
 		folio_set_error(folio);
 		mapping_set_error(inode->i_mapping, error);
 	}
 
-	WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !iop);
-	WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) <= 0);
+	WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs);
+	WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) <= 0);
 
-	if (!iop || atomic_sub_and_test(len, &iop->write_bytes_pending))
+	if (!ifs || atomic_sub_and_test(len, &ifs->write_bytes_pending))
 		folio_end_writeback(folio);
 }
 
@@ -1568,7 +1561,7 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
  */
 static void
 iomap_add_to_ioend(struct inode *inode, loff_t pos, struct folio *folio,
-		struct iomap_page *iop, struct iomap_writepage_ctx *wpc,
+		struct iomap_folio_state *ifs, struct iomap_writepage_ctx *wpc,
 		struct writeback_control *wbc, struct list_head *iolist)
 {
 	sector_t sector = iomap_sector(&wpc->iomap, pos);
@@ -1586,8 +1579,8 @@ iomap_add_to_ioend(struct inode *inode, loff_t pos, struct folio *folio,
 		bio_add_folio_nofail(wpc->ioend->io_bio, folio, len, poff);
 	}
 
-	if (iop)
-		atomic_add(len, &iop->write_bytes_pending);
+	if (ifs)
+		atomic_add(len, &ifs->write_bytes_pending);
 	wpc->ioend->io_size += len;
 	wbc_account_cgroup_owner(wbc, &folio->page, len);
 }
@@ -1613,7 +1606,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 		struct writeback_control *wbc, struct inode *inode,
 		struct folio *folio, u64 end_pos)
 {
-	struct iomap_page *iop = iomap_page_create(inode, folio, 0);
+	struct iomap_folio_state *ifs = ifs_alloc(inode, folio, 0);
 	struct iomap_ioend *ioend, *next;
 	unsigned len = i_blocksize(inode);
 	unsigned nblocks = i_blocks_per_folio(inode, folio);
@@ -1621,7 +1614,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 	int error = 0, count = 0, i;
 	LIST_HEAD(submit_list);
 
-	WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0);
+	WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) != 0);
 
 	/*
 	 * Walk through the folio to find areas to write back. If we
@@ -1629,7 +1622,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 	 * invalid, grab a new one.
 	 */
 	for (i = 0; i < nblocks && pos < end_pos; i++, pos += len) {
-		if (iop && !test_bit(i, iop->uptodate))
+		if (ifs && !test_bit(i, ifs->state))
 			continue;
 
 		error = wpc->ops->map_blocks(wpc, inode, pos);
@@ -1640,7 +1633,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 			continue;
 		if (wpc->iomap.type == IOMAP_HOLE)
 			continue;
-		iomap_add_to_ioend(inode, pos, folio, iop, wpc, wbc,
+		iomap_add_to_ioend(inode, pos, folio, ifs, wpc, wbc,
 				 &submit_list);
 		count++;
 	}

From 3ea5c76cadeedcc0e40e180d2c4e37dc289fee22 Mon Sep 17 00:00:00 2001
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Date: Mon, 10 Jul 2023 14:12:21 -0700
Subject: [PATCH 0511/2047] iomap: Drop ifs argument from
 iomap_set_range_uptodate()

iomap_folio_state (ifs) can be derived directly from the folio, making it
unnecessary to pass "ifs" as an argument to iomap_set_range_uptodate().
This patch eliminates "ifs" argument from iomap_set_range_uptodate()
function.

Also, the definition of iomap_set_range_uptodate() and
ifs_set_range_uptodate() functions are moved above ifs_alloc().
In upcoming patches, we plan to introduce additional helper routines for
handling dirty state, with the intention of consolidating all of "ifs"
state handling routines at one place.

Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 67 +++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 0ae34b5ba441..6fff25f955e8 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -36,6 +36,33 @@ struct iomap_folio_state {
 
 static struct bio_set iomap_ioend_bioset;
 
+static void ifs_set_range_uptodate(struct folio *folio,
+		struct iomap_folio_state *ifs, size_t off, size_t len)
+{
+	struct inode *inode = folio->mapping->host;
+	unsigned int first_blk = off >> inode->i_blkbits;
+	unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+	unsigned int nr_blks = last_blk - first_blk + 1;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ifs->state_lock, flags);
+	bitmap_set(ifs->state, first_blk, nr_blks);
+	if (bitmap_full(ifs->state, i_blocks_per_folio(inode, folio)))
+		folio_mark_uptodate(folio);
+	spin_unlock_irqrestore(&ifs->state_lock, flags);
+}
+
+static void iomap_set_range_uptodate(struct folio *folio, size_t off,
+		size_t len)
+{
+	struct iomap_folio_state *ifs = folio->private;
+
+	if (ifs)
+		ifs_set_range_uptodate(folio, ifs, off, len);
+	else
+		folio_mark_uptodate(folio);
+}
+
 static struct iomap_folio_state *ifs_alloc(struct inode *inode,
 		struct folio *folio, unsigned int flags)
 {
@@ -137,30 +164,6 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
 	*lenp = plen;
 }
 
-static void ifs_set_range_uptodate(struct folio *folio,
-		struct iomap_folio_state *ifs, size_t off, size_t len)
-{
-	struct inode *inode = folio->mapping->host;
-	unsigned first = off >> inode->i_blkbits;
-	unsigned last = (off + len - 1) >> inode->i_blkbits;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ifs->state_lock, flags);
-	bitmap_set(ifs->state, first, last - first + 1);
-	if (bitmap_full(ifs->state, i_blocks_per_folio(inode, folio)))
-		folio_mark_uptodate(folio);
-	spin_unlock_irqrestore(&ifs->state_lock, flags);
-}
-
-static void iomap_set_range_uptodate(struct folio *folio,
-		struct iomap_folio_state *ifs, size_t off, size_t len)
-{
-	if (ifs)
-		ifs_set_range_uptodate(folio, ifs, off, len);
-	else
-		folio_mark_uptodate(folio);
-}
-
 static void iomap_finish_folio_read(struct folio *folio, size_t offset,
 		size_t len, int error)
 {
@@ -170,7 +173,7 @@ static void iomap_finish_folio_read(struct folio *folio, size_t offset,
 		folio_clear_uptodate(folio);
 		folio_set_error(folio);
 	} else {
-		iomap_set_range_uptodate(folio, ifs, offset, len);
+		iomap_set_range_uptodate(folio, offset, len);
 	}
 
 	if (!ifs || atomic_sub_and_test(len, &ifs->read_bytes_pending))
@@ -206,7 +209,6 @@ struct iomap_readpage_ctx {
 static int iomap_read_inline_data(const struct iomap_iter *iter,
 		struct folio *folio)
 {
-	struct iomap_folio_state *ifs;
 	const struct iomap *iomap = iomap_iter_srcmap(iter);
 	size_t size = i_size_read(iter->inode) - iomap->offset;
 	size_t poff = offset_in_page(iomap->offset);
@@ -224,15 +226,13 @@ static int iomap_read_inline_data(const struct iomap_iter *iter,
 	if (WARN_ON_ONCE(size > iomap->length))
 		return -EIO;
 	if (offset > 0)
-		ifs = ifs_alloc(iter->inode, folio, iter->flags);
-	else
-		ifs = folio->private;
+		ifs_alloc(iter->inode, folio, iter->flags);
 
 	addr = kmap_local_folio(folio, offset);
 	memcpy(addr, iomap->inline_data, size);
 	memset(addr + size, 0, PAGE_SIZE - poff - size);
 	kunmap_local(addr);
-	iomap_set_range_uptodate(folio, ifs, offset, PAGE_SIZE - poff);
+	iomap_set_range_uptodate(folio, offset, PAGE_SIZE - poff);
 	return 0;
 }
 
@@ -269,7 +269,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
 
 	if (iomap_block_needs_zeroing(iter, pos)) {
 		folio_zero_range(folio, poff, plen);
-		iomap_set_range_uptodate(folio, ifs, poff, plen);
+		iomap_set_range_uptodate(folio, poff, plen);
 		goto done;
 	}
 
@@ -578,7 +578,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
 			if (status)
 				return status;
 		}
-		iomap_set_range_uptodate(folio, ifs, poff, plen);
+		iomap_set_range_uptodate(folio, poff, plen);
 	} while ((block_start += plen) < block_end);
 
 	return 0;
@@ -685,7 +685,6 @@ out_unlock:
 static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
 		size_t copied, struct folio *folio)
 {
-	struct iomap_folio_state *ifs = folio->private;
 	flush_dcache_folio(folio);
 
 	/*
@@ -701,7 +700,7 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
 	 */
 	if (unlikely(copied < len && !folio_test_uptodate(folio)))
 		return 0;
-	iomap_set_range_uptodate(folio, ifs, offset_in_folio(folio, pos), len);
+	iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len);
 	filemap_dirty_folio(inode->i_mapping, folio);
 	return copied;
 }

From cc86181a3b7605f394182cdc38b6a87632511a88 Mon Sep 17 00:00:00 2001
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Date: Mon, 10 Jul 2023 14:12:22 -0700
Subject: [PATCH 0512/2047] iomap: Add some uptodate state handling helpers for
 ifs state bitmap

This patch adds two of the helper routines ifs_is_fully_uptodate()
and ifs_block_is_uptodate() for managing uptodate state of "ifs" state
bitmap.

In later patches ifs state bitmap array will also handle dirty state of all
blocks of a folio. Hence this patch adds some helper routines for handling
uptodate state of the ifs state bitmap.

Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 6fff25f955e8..85159e5b3d1d 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -36,6 +36,20 @@ struct iomap_folio_state {
 
 static struct bio_set iomap_ioend_bioset;
 
+static inline bool ifs_is_fully_uptodate(struct folio *folio,
+		struct iomap_folio_state *ifs)
+{
+	struct inode *inode = folio->mapping->host;
+
+	return bitmap_full(ifs->state, i_blocks_per_folio(inode, folio));
+}
+
+static inline bool ifs_block_is_uptodate(struct iomap_folio_state *ifs,
+		unsigned int block)
+{
+	return test_bit(block, ifs->state);
+}
+
 static void ifs_set_range_uptodate(struct folio *folio,
 		struct iomap_folio_state *ifs, size_t off, size_t len)
 {
@@ -47,7 +61,7 @@ static void ifs_set_range_uptodate(struct folio *folio,
 
 	spin_lock_irqsave(&ifs->state_lock, flags);
 	bitmap_set(ifs->state, first_blk, nr_blks);
-	if (bitmap_full(ifs->state, i_blocks_per_folio(inode, folio)))
+	if (ifs_is_fully_uptodate(folio, ifs))
 		folio_mark_uptodate(folio);
 	spin_unlock_irqrestore(&ifs->state_lock, flags);
 }
@@ -92,14 +106,12 @@ static struct iomap_folio_state *ifs_alloc(struct inode *inode,
 static void ifs_free(struct folio *folio)
 {
 	struct iomap_folio_state *ifs = folio_detach_private(folio);
-	struct inode *inode = folio->mapping->host;
-	unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
 
 	if (!ifs)
 		return;
 	WARN_ON_ONCE(atomic_read(&ifs->read_bytes_pending));
 	WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending));
-	WARN_ON_ONCE(bitmap_full(ifs->state, nr_blocks) !=
+	WARN_ON_ONCE(ifs_is_fully_uptodate(folio, ifs) !=
 			folio_test_uptodate(folio));
 	kfree(ifs);
 }
@@ -130,7 +142,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
 
 		/* move forward for each leading block marked uptodate */
 		for (i = first; i <= last; i++) {
-			if (!test_bit(i, ifs->state))
+			if (!ifs_block_is_uptodate(ifs, i))
 				break;
 			*pos += block_size;
 			poff += block_size;
@@ -140,7 +152,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
 
 		/* truncate len if we find any trailing uptodate block(s) */
 		for ( ; i <= last; i++) {
-			if (test_bit(i, ifs->state)) {
+			if (ifs_block_is_uptodate(ifs, i)) {
 				plen -= (last - i + 1) * block_size;
 				last = i - 1;
 				break;
@@ -444,7 +456,7 @@ bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
 	last = (from + count - 1) >> inode->i_blkbits;
 
 	for (i = first; i <= last; i++)
-		if (!test_bit(i, ifs->state))
+		if (!ifs_block_is_uptodate(ifs, i))
 			return false;
 	return true;
 }
@@ -1621,7 +1633,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 	 * invalid, grab a new one.
 	 */
 	for (i = 0; i < nblocks && pos < end_pos; i++, pos += len) {
-		if (ifs && !test_bit(i, ifs->state))
+		if (ifs && !ifs_block_is_uptodate(ifs, i))
 			continue;
 
 		error = wpc->ops->map_blocks(wpc, inode, pos);

From eee2d2e6ea5550118170dbd5bb1316ceb38455fb Mon Sep 17 00:00:00 2001
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Date: Mon, 10 Jul 2023 14:12:22 -0700
Subject: [PATCH 0513/2047] iomap: Fix possible overflow condition in
 iomap_write_delalloc_scan

folio_next_index() returns an unsigned long value which left shifted
by PAGE_SHIFT could possibly cause an overflow on 32-bit system. Instead
use folio_pos(folio) + folio_size(folio), which does this correctly.

Suggested-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 85159e5b3d1d..38e95302bd6a 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -934,7 +934,7 @@ static int iomap_write_delalloc_scan(struct inode *inode,
 			 * the end of this data range, not the end of the folio.
 			 */
 			*punch_start_byte = min_t(loff_t, end_byte,
-					folio_next_index(folio) << PAGE_SHIFT);
+					folio_pos(folio) + folio_size(folio));
 		}
 
 		/* move offset to start of next folio in range */

From 0af2b37d8e7a3da90c94862abb18c8b2853f81ea Mon Sep 17 00:00:00 2001
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Date: Mon, 10 Jul 2023 14:12:23 -0700
Subject: [PATCH 0514/2047] iomap: Use iomap_punch_t typedef

It makes it much easier if we have iomap_punch_t typedef for "punch"
function pointer in all delalloc related punch, scan and release
functions. It will be useful in later patches when we will factor out
iomap_write_delalloc_punch() function.

Suggested-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 38e95302bd6a..864a2358e5d2 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -23,6 +23,7 @@
 
 #define IOEND_BATCH_SIZE	4096
 
+typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length);
 /*
  * Structure allocated for each folio to track per-block uptodate state
  * and I/O completions.
@@ -901,7 +902,7 @@ EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
  */
 static int iomap_write_delalloc_scan(struct inode *inode,
 		loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte,
-		int (*punch)(struct inode *inode, loff_t offset, loff_t length))
+		iomap_punch_t punch)
 {
 	while (start_byte < end_byte) {
 		struct folio	*folio;
@@ -979,8 +980,7 @@ static int iomap_write_delalloc_scan(struct inode *inode,
  * the code to subtle off-by-one bugs....
  */
 static int iomap_write_delalloc_release(struct inode *inode,
-		loff_t start_byte, loff_t end_byte,
-		int (*punch)(struct inode *inode, loff_t pos, loff_t length))
+		loff_t start_byte, loff_t end_byte, iomap_punch_t punch)
 {
 	loff_t punch_start_byte = start_byte;
 	loff_t scan_end_byte = min(i_size_read(inode), end_byte);
@@ -1073,8 +1073,7 @@ out_unlock:
  */
 int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
 		struct iomap *iomap, loff_t pos, loff_t length,
-		ssize_t written,
-		int (*punch)(struct inode *inode, loff_t pos, loff_t length))
+		ssize_t written, iomap_punch_t punch)
 {
 	loff_t			start_byte;
 	loff_t			end_byte;

From 7f79d85b525b3494ab0d28ba1fd2a01874d0fc8a Mon Sep 17 00:00:00 2001
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Date: Mon, 10 Jul 2023 14:12:24 -0700
Subject: [PATCH 0515/2047] iomap: Refactor iomap_write_delalloc_punch()
 function out

This patch factors iomap_write_delalloc_punch() function out. This function
is resposible for actual punch out operation.
The reason for doing this is, to avoid deep indentation when we bring
punch-out of individual non-dirty blocks within a dirty folio in a later
patch (which adds per-block dirty status handling to iomap) to avoid
delalloc block leak.

Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/buffered-io.c | 54 ++++++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 864a2358e5d2..baed68e2b03a 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -883,6 +883,33 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
 }
 EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
 
+static int iomap_write_delalloc_punch(struct inode *inode, struct folio *folio,
+		loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte,
+		iomap_punch_t punch)
+{
+	int ret = 0;
+
+	if (!folio_test_dirty(folio))
+		return ret;
+
+	/* if dirty, punch up to offset */
+	if (start_byte > *punch_start_byte) {
+		ret = punch(inode, *punch_start_byte,
+				start_byte - *punch_start_byte);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * Make sure the next punch start is correctly bound to
+	 * the end of this data range, not the end of the folio.
+	 */
+	*punch_start_byte = min_t(loff_t, end_byte,
+				folio_pos(folio) + folio_size(folio));
+
+	return ret;
+}
+
 /*
  * Scan the data range passed to us for dirty page cache folios. If we find a
  * dirty folio, punch out the preceeding range and update the offset from which
@@ -906,6 +933,7 @@ static int iomap_write_delalloc_scan(struct inode *inode,
 {
 	while (start_byte < end_byte) {
 		struct folio	*folio;
+		int ret;
 
 		/* grab locked page */
 		folio = filemap_lock_folio(inode->i_mapping,
@@ -916,26 +944,12 @@ static int iomap_write_delalloc_scan(struct inode *inode,
 			continue;
 		}
 
-		/* if dirty, punch up to offset */
-		if (folio_test_dirty(folio)) {
-			if (start_byte > *punch_start_byte) {
-				int	error;
-
-				error = punch(inode, *punch_start_byte,
-						start_byte - *punch_start_byte);
-				if (error) {
-					folio_unlock(folio);
-					folio_put(folio);
-					return error;
-				}
-			}
-
-			/*
-			 * Make sure the next punch start is correctly bound to
-			 * the end of this data range, not the end of the folio.
-			 */
-			*punch_start_byte = min_t(loff_t, end_byte,
-					folio_pos(folio) + folio_size(folio));
+		ret = iomap_write_delalloc_punch(inode, folio, punch_start_byte,
+						 start_byte, end_byte, punch);
+		if (ret) {
+			folio_unlock(folio);
+			folio_put(folio);
+			return ret;
 		}
 
 		/* move offset to start of next folio in range */

From a01b8f225248e86f3328a48c3311882148a8c5d3 Mon Sep 17 00:00:00 2001
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Date: Mon, 10 Jul 2023 14:12:24 -0700
Subject: [PATCH 0516/2047] iomap: Allocate ifs in ->write_begin() early

We dont need to allocate an ifs in ->write_begin() for writes where the
position and length completely overlap with the given folio.
Therefore, such cases are skipped.

Currently when the folio is uptodate, we only allocate ifs at writeback
time (in iomap_writepage_map()). This is ok until now, but when we are
going to add support for per-block dirty state bitmap in ifs, this
could cause some performance degradation. The reason is that if we don't
allocate ifs during ->write_begin(), then we will never mark the
necessary dirty bits in ->write_end() call. And we will have to mark all
the bits as dirty at the writeback time, that could cause the same write
amplification and performance problems as it is now.

Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/buffered-io.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index baed68e2b03a..1ec4ef44851f 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -557,14 +557,23 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
 	size_t from = offset_in_folio(folio, pos), to = from + len;
 	size_t poff, plen;
 
-	if (folio_test_uptodate(folio))
+	/*
+	 * If the write completely overlaps the current folio, then
+	 * entire folio will be dirtied so there is no need for
+	 * per-block state tracking structures to be attached to this folio.
+	 */
+	if (pos <= folio_pos(folio) &&
+	    pos + len >= folio_pos(folio) + folio_size(folio))
 		return 0;
-	folio_clear_error(folio);
 
 	ifs = ifs_alloc(iter->inode, folio, iter->flags);
 	if ((iter->flags & IOMAP_NOWAIT) && !ifs && nr_blocks > 1)
 		return -EAGAIN;
 
+	if (folio_test_uptodate(folio))
+		return 0;
+	folio_clear_error(folio);
+
 	do {
 		iomap_adjust_read_range(iter->inode, folio, &block_start,
 				block_end - block_start, &poff, &plen);

From 4ce02c67972211be488408c275c8fbf19faf29b3 Mon Sep 17 00:00:00 2001
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Date: Mon, 10 Jul 2023 14:12:43 -0700
Subject: [PATCH 0517/2047] iomap: Add per-block dirty state tracking to
 improve performance

When filesystem blocksize is less than folio size (either with
mapping_large_folio_support() or with blocksize < pagesize) and when the
folio is uptodate in pagecache, then even a byte write can cause
an entire folio to be written to disk during writeback. This happens
because we currently don't have a mechanism to track per-block dirty
state within struct iomap_folio_state. We currently only track uptodate
state.

This patch implements support for tracking per-block dirty state in
iomap_folio_state->state bitmap. This should help improve the filesystem
write performance and help reduce write amplification.

Performance testing of below fio workload reveals ~16x performance
improvement using nvme with XFS (4k blocksize) on Power (64K pagesize)
FIO reported write bw scores improved from around ~28 MBps to ~452 MBps.

1. <test_randwrite.fio>
[global]
	ioengine=psync
	rw=randwrite
	overwrite=1
	pre_read=1
	direct=0
	bs=4k
	size=1G
	dir=./
	numjobs=8
	fdatasync=1
	runtime=60
	iodepth=64
	group_reporting=1

[fio-run]

2. Also our internal performance team reported that this patch improves
   their database workload performance by around ~83% (with XFS on Power)

Reported-by: Aravinda Herle <araherle@in.ibm.com>
Reported-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/gfs2/aops.c         |   2 +-
 fs/iomap/buffered-io.c | 161 ++++++++++++++++++++++++++++++++++++++---
 fs/xfs/xfs_aops.c      |   2 +-
 fs/zonefs/file.c       |   2 +-
 include/linux/iomap.h  |   1 +
 5 files changed, 154 insertions(+), 14 deletions(-)

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ae49256b7c8c..9c4b26aec580 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -747,7 +747,7 @@ static const struct address_space_operations gfs2_aops = {
 	.writepages = gfs2_writepages,
 	.read_folio = gfs2_read_folio,
 	.readahead = gfs2_readahead,
-	.dirty_folio = filemap_dirty_folio,
+	.dirty_folio = iomap_dirty_folio,
 	.release_folio = iomap_release_folio,
 	.invalidate_folio = iomap_invalidate_folio,
 	.bmap = gfs2_bmap,
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 1ec4ef44851f..283fb96f6609 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -25,13 +25,19 @@
 
 typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length);
 /*
- * Structure allocated for each folio to track per-block uptodate state
+ * Structure allocated for each folio to track per-block uptodate, dirty state
  * and I/O completions.
  */
 struct iomap_folio_state {
 	atomic_t		read_bytes_pending;
 	atomic_t		write_bytes_pending;
 	spinlock_t		state_lock;
+
+	/*
+	 * Each block has two bits in this bitmap:
+	 * Bits [0..blocks_per_folio) has the uptodate status.
+	 * Bits [b_p_f...(2*b_p_f))   has the dirty status.
+	 */
 	unsigned long		state[];
 };
 
@@ -78,6 +84,61 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off,
 		folio_mark_uptodate(folio);
 }
 
+static inline bool ifs_block_is_dirty(struct folio *folio,
+		struct iomap_folio_state *ifs, int block)
+{
+	struct inode *inode = folio->mapping->host;
+	unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
+
+	return test_bit(block + blks_per_folio, ifs->state);
+}
+
+static void ifs_clear_range_dirty(struct folio *folio,
+		struct iomap_folio_state *ifs, size_t off, size_t len)
+{
+	struct inode *inode = folio->mapping->host;
+	unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
+	unsigned int first_blk = (off >> inode->i_blkbits);
+	unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+	unsigned int nr_blks = last_blk - first_blk + 1;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ifs->state_lock, flags);
+	bitmap_clear(ifs->state, first_blk + blks_per_folio, nr_blks);
+	spin_unlock_irqrestore(&ifs->state_lock, flags);
+}
+
+static void iomap_clear_range_dirty(struct folio *folio, size_t off, size_t len)
+{
+	struct iomap_folio_state *ifs = folio->private;
+
+	if (ifs)
+		ifs_clear_range_dirty(folio, ifs, off, len);
+}
+
+static void ifs_set_range_dirty(struct folio *folio,
+		struct iomap_folio_state *ifs, size_t off, size_t len)
+{
+	struct inode *inode = folio->mapping->host;
+	unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
+	unsigned int first_blk = (off >> inode->i_blkbits);
+	unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+	unsigned int nr_blks = last_blk - first_blk + 1;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ifs->state_lock, flags);
+	bitmap_set(ifs->state, first_blk + blks_per_folio, nr_blks);
+	spin_unlock_irqrestore(&ifs->state_lock, flags);
+}
+
+static void iomap_set_range_dirty(struct folio *folio, size_t off, size_t len)
+{
+	struct iomap_folio_state *ifs = folio->private;
+
+	if (ifs)
+		ifs_set_range_dirty(folio, ifs, off, len);
+}
+
 static struct iomap_folio_state *ifs_alloc(struct inode *inode,
 		struct folio *folio, unsigned int flags)
 {
@@ -93,14 +154,24 @@ static struct iomap_folio_state *ifs_alloc(struct inode *inode,
 	else
 		gfp = GFP_NOFS | __GFP_NOFAIL;
 
-	ifs = kzalloc(struct_size(ifs, state, BITS_TO_LONGS(nr_blocks)),
-		      gfp);
-	if (ifs) {
-		spin_lock_init(&ifs->state_lock);
-		if (folio_test_uptodate(folio))
-			bitmap_fill(ifs->state, nr_blocks);
-		folio_attach_private(folio, ifs);
-	}
+	/*
+	 * ifs->state tracks two sets of state flags when the
+	 * filesystem block size is smaller than the folio size.
+	 * The first state tracks per-block uptodate and the
+	 * second tracks per-block dirty state.
+	 */
+	ifs = kzalloc(struct_size(ifs, state,
+		      BITS_TO_LONGS(2 * nr_blocks)), gfp);
+	if (!ifs)
+		return ifs;
+
+	spin_lock_init(&ifs->state_lock);
+	if (folio_test_uptodate(folio))
+		bitmap_set(ifs->state, 0, nr_blocks);
+	if (folio_test_dirty(folio))
+		bitmap_set(ifs->state, nr_blocks, nr_blocks);
+	folio_attach_private(folio, ifs);
+
 	return ifs;
 }
 
@@ -519,6 +590,17 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len)
 }
 EXPORT_SYMBOL_GPL(iomap_invalidate_folio);
 
+bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio)
+{
+	struct inode *inode = mapping->host;
+	size_t len = folio_size(folio);
+
+	ifs_alloc(inode, folio, 0);
+	iomap_set_range_dirty(folio, 0, len);
+	return filemap_dirty_folio(mapping, folio);
+}
+EXPORT_SYMBOL_GPL(iomap_dirty_folio);
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
@@ -723,6 +805,7 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
 	if (unlikely(copied < len && !folio_test_uptodate(folio)))
 		return 0;
 	iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len);
+	iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied);
 	filemap_dirty_folio(inode->i_mapping, folio);
 	return copied;
 }
@@ -892,6 +975,43 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
 }
 EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
 
+static int iomap_write_delalloc_ifs_punch(struct inode *inode,
+		struct folio *folio, loff_t start_byte, loff_t end_byte,
+		iomap_punch_t punch)
+{
+	unsigned int first_blk, last_blk, i;
+	loff_t last_byte;
+	u8 blkbits = inode->i_blkbits;
+	struct iomap_folio_state *ifs;
+	int ret = 0;
+
+	/*
+	 * When we have per-block dirty tracking, there can be
+	 * blocks within a folio which are marked uptodate
+	 * but not dirty. In that case it is necessary to punch
+	 * out such blocks to avoid leaking any delalloc blocks.
+	 */
+	ifs = folio->private;
+	if (!ifs)
+		return ret;
+
+	last_byte = min_t(loff_t, end_byte - 1,
+			folio_pos(folio) + folio_size(folio) - 1);
+	first_blk = offset_in_folio(folio, start_byte) >> blkbits;
+	last_blk = offset_in_folio(folio, last_byte) >> blkbits;
+	for (i = first_blk; i <= last_blk; i++) {
+		if (!ifs_block_is_dirty(folio, ifs, i)) {
+			ret = punch(inode, folio_pos(folio) + (i << blkbits),
+				    1 << blkbits);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return ret;
+}
+
+
 static int iomap_write_delalloc_punch(struct inode *inode, struct folio *folio,
 		loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte,
 		iomap_punch_t punch)
@@ -909,6 +1029,12 @@ static int iomap_write_delalloc_punch(struct inode *inode, struct folio *folio,
 			return ret;
 	}
 
+	/* Punch non-dirty blocks within folio */
+	ret = iomap_write_delalloc_ifs_punch(inode, folio, start_byte,
+			end_byte, punch);
+	if (ret)
+		return ret;
+
 	/*
 	 * Make sure the next punch start is correctly bound to
 	 * the end of this data range, not the end of the folio.
@@ -1639,7 +1765,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 		struct writeback_control *wbc, struct inode *inode,
 		struct folio *folio, u64 end_pos)
 {
-	struct iomap_folio_state *ifs = ifs_alloc(inode, folio, 0);
+	struct iomap_folio_state *ifs = folio->private;
 	struct iomap_ioend *ioend, *next;
 	unsigned len = i_blocksize(inode);
 	unsigned nblocks = i_blocks_per_folio(inode, folio);
@@ -1647,6 +1773,13 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 	int error = 0, count = 0, i;
 	LIST_HEAD(submit_list);
 
+	WARN_ON_ONCE(end_pos <= pos);
+
+	if (!ifs && nblocks > 1) {
+		ifs = ifs_alloc(inode, folio, 0);
+		iomap_set_range_dirty(folio, 0, end_pos - pos);
+	}
+
 	WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) != 0);
 
 	/*
@@ -1655,7 +1788,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 	 * invalid, grab a new one.
 	 */
 	for (i = 0; i < nblocks && pos < end_pos; i++, pos += len) {
-		if (ifs && !ifs_block_is_uptodate(ifs, i))
+		if (ifs && !ifs_block_is_dirty(folio, ifs, i))
 			continue;
 
 		error = wpc->ops->map_blocks(wpc, inode, pos);
@@ -1699,6 +1832,12 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 		}
 	}
 
+	/*
+	 * We can have dirty bits set past end of file in page_mkwrite path
+	 * while mapping the last partial folio. Hence it's better to clear
+	 * all the dirty bits in the folio here.
+	 */
+	iomap_clear_range_dirty(folio, 0, folio_size(folio));
 	folio_start_writeback(folio);
 	folio_unlock(folio);
 
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 451942fb38ec..2fca4b4e7fd8 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -578,7 +578,7 @@ const struct address_space_operations xfs_address_space_operations = {
 	.read_folio		= xfs_vm_read_folio,
 	.readahead		= xfs_vm_readahead,
 	.writepages		= xfs_vm_writepages,
-	.dirty_folio		= filemap_dirty_folio,
+	.dirty_folio		= iomap_dirty_folio,
 	.release_folio		= iomap_release_folio,
 	.invalidate_folio	= iomap_invalidate_folio,
 	.bmap			= xfs_vm_bmap,
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 92c9aaae3663..bf9bf245530c 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -175,7 +175,7 @@ const struct address_space_operations zonefs_file_aops = {
 	.read_folio		= zonefs_read_folio,
 	.readahead		= zonefs_readahead,
 	.writepages		= zonefs_writepages,
-	.dirty_folio		= filemap_dirty_folio,
+	.dirty_folio		= iomap_dirty_folio,
 	.release_folio		= iomap_release_folio,
 	.invalidate_folio	= iomap_invalidate_folio,
 	.migrate_folio		= filemap_migrate_folio,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 80facb9c9e5b..fdc6e64f49d6 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -264,6 +264,7 @@ bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
 struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
 bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
 void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len);
+bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
 int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,

From 6a270bbd3a74c432b867214044377ccfcdb5ddc4 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 11 Jul 2023 09:20:53 +0200
Subject: [PATCH 0518/2047] gpio: mxc: Improve PM configuration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_PM=n (e.g. m68k/allmodconfig):

    drivers/gpio/gpio-mxc.c:612:12: error: ‘mxc_gpio_runtime_resume’ defined but not used [-Werror=unused-function]
      612 | static int mxc_gpio_runtime_resume(struct device *dev)
          |            ^~~~~~~~~~~~~~~~~~~~~~~
    drivers/gpio/gpio-mxc.c:602:12: error: ‘mxc_gpio_runtime_suspend’ defined but not used [-Werror=unused-function]
      602 | static int mxc_gpio_runtime_suspend(struct device *dev)
          |            ^~~~~~~~~~~~~~~~~~~~~~~~

Fix this by using the non-SET *_PM_OPS to configure the dev_pm_ops
callbacks, and by wrapping the driver.pm initializer insider pm_ptr().

As NOIRQ_SYSTEM_SLEEP_PM_OPS() uses pm_sleep_ptr() internally, the
__maybe_unused annotations for the noirq callbacks are no longer needed,
and can be removed.

Fixes: 3283d820dce649ad ("gpio: mxc: add runtime pm support")
Reported-by: noreply@ellerman.id.au
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-mxc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index 443f3e4d9aff..377d3ab8d626 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -622,7 +622,7 @@ static int mxc_gpio_runtime_resume(struct device *dev)
 	return 0;
 }
 
-static int __maybe_unused mxc_gpio_noirq_suspend(struct device *dev)
+static int mxc_gpio_noirq_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct mxc_gpio_port *port = platform_get_drvdata(pdev);
@@ -633,7 +633,7 @@ static int __maybe_unused mxc_gpio_noirq_suspend(struct device *dev)
 	return 0;
 }
 
-static int __maybe_unused mxc_gpio_noirq_resume(struct device *dev)
+static int mxc_gpio_noirq_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct mxc_gpio_port *port = platform_get_drvdata(pdev);
@@ -646,8 +646,8 @@ static int __maybe_unused mxc_gpio_noirq_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops mxc_gpio_dev_pm_ops = {
-	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mxc_gpio_noirq_suspend, mxc_gpio_noirq_resume)
-	SET_RUNTIME_PM_OPS(mxc_gpio_runtime_suspend, mxc_gpio_runtime_resume, NULL)
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(mxc_gpio_noirq_suspend, mxc_gpio_noirq_resume)
+	RUNTIME_PM_OPS(mxc_gpio_runtime_suspend, mxc_gpio_runtime_resume, NULL)
 };
 
 static int mxc_gpio_syscore_suspend(void)
@@ -694,7 +694,7 @@ static struct platform_driver mxc_gpio_driver = {
 		.name	= "gpio-mxc",
 		.of_match_table = mxc_gpio_dt_ids,
 		.suppress_bind_attrs = true,
-		.pm = &mxc_gpio_dev_pm_ops,
+		.pm = pm_ptr(&mxc_gpio_dev_pm_ops),
 	},
 	.probe		= mxc_gpio_probe,
 };

From 5d472a7ef0f35e39906ef0eafa0c76727949f56b Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland@sifive.com>
Date: Mon, 24 Jul 2023 17:40:39 -0700
Subject: [PATCH 0519/2047] gpio: sifive: Directly use the device's fwnode

There is no need to convert dev->of_node back to a fwnode_handle.

Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-sifive.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
index efab7b58f739..a1b5bae05a35 100644
--- a/drivers/gpio/gpio-sifive.c
+++ b/drivers/gpio/gpio-sifive.c
@@ -10,6 +10,7 @@
 #include <linux/gpio/driver.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/regmap.h>
@@ -255,7 +256,7 @@ static int sifive_gpio_probe(struct platform_device *pdev)
 	chip->gc.owner = THIS_MODULE;
 	girq = &chip->gc.irq;
 	gpio_irq_chip_set_chip(girq, &sifive_gpio_irqchip);
-	girq->fwnode = of_node_to_fwnode(node);
+	girq->fwnode = dev_fwnode(dev);
 	girq->parent_domain = parent;
 	girq->child_to_parent_hwirq = sifive_gpio_child_to_parent_hwirq;
 	girq->handler = handle_bad_irq;

From 1cd9cee75f99cfe31a6e339af518679649812838 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland@sifive.com>
Date: Mon, 24 Jul 2023 17:40:40 -0700
Subject: [PATCH 0520/2047] gpio: sifive: Look up IRQs only once during probe

of_irq_count(), or eqivalently platform_irq_count(), simply looks up
successively-numbered IRQs until that fails. Since this driver needs to
look up each IRQ anyway to get its virq number, use that existing loop
to count the IRQs at the same time.

The check against SIFIVE_GPIO_MAX functioned as a bounds check for
chip->irq_number. That is now handled by the loop condition.

Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-sifive.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
index a1b5bae05a35..e292c333f8dd 100644
--- a/drivers/gpio/gpio-sifive.c
+++ b/drivers/gpio/gpio-sifive.c
@@ -187,7 +187,7 @@ static int sifive_gpio_probe(struct platform_device *pdev)
 	struct irq_domain *parent;
 	struct gpio_irq_chip *girq;
 	struct sifive_gpio *chip;
-	int ret, ngpio, i;
+	int ret, ngpio;
 
 	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
 	if (!chip)
@@ -204,13 +204,6 @@ static int sifive_gpio_probe(struct platform_device *pdev)
 	if (IS_ERR(chip->regs))
 		return PTR_ERR(chip->regs);
 
-	ngpio = of_irq_count(node);
-	if (ngpio > SIFIVE_GPIO_MAX) {
-		dev_err(dev, "Too many GPIO interrupts (max=%d)\n",
-			SIFIVE_GPIO_MAX);
-		return -ENXIO;
-	}
-
 	irq_parent = of_irq_find_parent(node);
 	if (!irq_parent) {
 		dev_err(dev, "no IRQ parent node\n");
@@ -223,11 +216,11 @@ static int sifive_gpio_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	for (i = 0; i < ngpio; i++) {
-		ret = platform_get_irq(pdev, i);
+	for (ngpio = 0; ngpio < SIFIVE_GPIO_MAX; ngpio++) {
+		ret = platform_get_irq_optional(pdev, ngpio);
 		if (ret < 0)
-			return ret;
-		chip->irq_number[i] = ret;
+			break;
+		chip->irq_number[ngpio] = ret;
 	}
 
 	ret = bgpio_init(&chip->gc, dev, 4,

From 3b5560c8f074aee8839b66093b4d565702a6921d Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland@sifive.com>
Date: Mon, 24 Jul 2023 17:40:41 -0700
Subject: [PATCH 0521/2047] gpio: sifive: Get the parent IRQ's domain from its
 irq_data

Do not parse the devicetree again when the data is already available
from the IRQ subsystem. This follows the example of the ThunderX and
X-Gene GPIO drivers. The ngpio check is needed to avoid a possible
out-of-bounds read.

Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-sifive.c | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
index e292c333f8dd..8033bb8246c6 100644
--- a/drivers/gpio/gpio-sifive.c
+++ b/drivers/gpio/gpio-sifive.c
@@ -6,7 +6,6 @@
 #include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/errno.h>
-#include <linux/of_irq.h>
 #include <linux/gpio/driver.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
@@ -182,8 +181,6 @@ static const struct regmap_config sifive_gpio_regmap_config = {
 static int sifive_gpio_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *node = pdev->dev.of_node;
-	struct device_node *irq_parent;
 	struct irq_domain *parent;
 	struct gpio_irq_chip *girq;
 	struct sifive_gpio *chip;
@@ -204,24 +201,22 @@ static int sifive_gpio_probe(struct platform_device *pdev)
 	if (IS_ERR(chip->regs))
 		return PTR_ERR(chip->regs);
 
-	irq_parent = of_irq_find_parent(node);
-	if (!irq_parent) {
-		dev_err(dev, "no IRQ parent node\n");
-		return -ENODEV;
-	}
-	parent = irq_find_host(irq_parent);
-	of_node_put(irq_parent);
-	if (!parent) {
-		dev_err(dev, "no IRQ parent domain\n");
-		return -ENODEV;
-	}
-
 	for (ngpio = 0; ngpio < SIFIVE_GPIO_MAX; ngpio++) {
 		ret = platform_get_irq_optional(pdev, ngpio);
 		if (ret < 0)
 			break;
 		chip->irq_number[ngpio] = ret;
 	}
+	if (!ngpio) {
+		dev_err(dev, "no IRQ found\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * The check above ensures at least one parent IRQ is valid.
+	 * Assume all parent IRQs belong to the same domain.
+	 */
+	parent = irq_get_irq_data(chip->irq_number[0])->domain;
 
 	ret = bgpio_init(&chip->gc, dev, 4,
 			 chip->base + SIFIVE_GPIO_INPUT_VAL,

From 6b4c76ded3582651afca319f2ca58c22ec908529 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland@sifive.com>
Date: Mon, 24 Jul 2023 17:40:42 -0700
Subject: [PATCH 0522/2047] gpio: sifive: Allow building the driver as a module

This can reduce the kernel image size in multiplatform configurations.

Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig       | 2 +-
 drivers/gpio/gpio-sifive.c | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 1cf267a3ed8b..398d9ef7e680 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -564,7 +564,7 @@ config GPIO_SAMA5D2_PIOBU
 	  maintain their value during backup/self-refresh.
 
 config GPIO_SIFIVE
-	bool "SiFive GPIO support"
+	tristate "SiFive GPIO support"
 	depends on OF_GPIO
 	select IRQ_DOMAIN_HIERARCHY
 	select GPIO_GENERIC
diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
index 8033bb8246c6..8decd9b5d229 100644
--- a/drivers/gpio/gpio-sifive.c
+++ b/drivers/gpio/gpio-sifive.c
@@ -267,4 +267,8 @@ static struct platform_driver sifive_gpio_driver = {
 		.of_match_table = sifive_gpio_match,
 	},
 };
-builtin_platform_driver(sifive_gpio_driver)
+module_platform_driver(sifive_gpio_driver)
+
+MODULE_AUTHOR("Yash Shah <yash.shah@sifive.com>");
+MODULE_DESCRIPTION("SiFive GPIO driver");
+MODULE_LICENSE("GPL");

From 55c8b8ddc0d95912c7b0d066aaa4bbac146e3c42 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 25 Jul 2023 12:54:21 +0200
Subject: [PATCH 0523/2047] regulator: dt-bindings: mps,mpq7920: drop incorrect
 ref to regulator.yaml

"regulators" node is just grouping regulators, but itself is not
describing one regulator, thus reference to regulator.yaml schema is
incorrect.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230725105421.99160-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml b/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml
index f3fcfc8be72f..019c60942efc 100644
--- a/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml
+++ b/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml
@@ -21,7 +21,6 @@ properties:
 
   regulators:
     type: object
-    $ref: regulator.yaml#
 
     description: |
       list of regulators provided by this controller, must be named

From 269cb04b601dd8c35bbee180a9800335b93111fb Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Fri, 14 Jul 2023 16:14:07 +0800
Subject: [PATCH 0524/2047] regulator: Use bitfield values for range selectors

Right now the regulator helpers expect raw register values for the range
selectors. This is different from the voltage selectors, which are
normalized as bitfield values. This leads to a bit of confusion. Also,
raw values are harder to copy from datasheets or match up with them,
as datasheets will typically have bitfield values.

Make the helpers expect bitfield values, and convert existing users. The
field in regulator_desc is renamed to |linear_range_selectors_bitfield|.
This is intended to cause drivers added in the same merge window and
out-of-tree drivers using the incorrect variable and values to break,
preventing incorrect values being used on actual hardware and potentially
producing magic smoke.

Also include bitops.h explicitly for ffs(), and reorder the header include
statements. While at it, also replace module.h with export.h, since the
only use is EXPORT_SYMBOL_GPL.

Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Link: https://lore.kernel.org/r/20230714081408.274567-1-wenst@chromium.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/atc260x-regulator.c  |  4 ++--
 drivers/regulator/bd718x7-regulator.c  | 22 +++++++++++-----------
 drivers/regulator/helpers.c            | 15 +++++++++------
 drivers/regulator/max77541-regulator.c |  6 +++---
 drivers/regulator/max77650-regulator.c |  2 +-
 drivers/regulator/rohm-regulator.c     |  2 +-
 drivers/regulator/tps6287x-regulator.c |  4 ++--
 include/linux/regulator/driver.h       | 11 ++++++-----
 8 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/drivers/regulator/atc260x-regulator.c b/drivers/regulator/atc260x-regulator.c
index 09fe51464090..3e9f8fd54fca 100644
--- a/drivers/regulator/atc260x-regulator.c
+++ b/drivers/regulator/atc260x-regulator.c
@@ -38,7 +38,7 @@ static const struct linear_range atc2609a_ldo_voltage_ranges1[] = {
 };
 
 static const unsigned int atc260x_ldo_voltage_range_sel[] = {
-	0x0, 0x20,
+	0x0, 0x1,
 };
 
 static int atc260x_dcdc_set_voltage_time_sel(struct regulator_dev *rdev,
@@ -428,7 +428,7 @@ enum atc2609a_reg_ids {
 	.vsel_mask = GENMASK(4, 1), \
 	.vsel_range_reg = ATC2609A_PMU_LDO##num##_CTL0, \
 	.vsel_range_mask = BIT(5), \
-	.linear_range_selectors = atc260x_ldo_voltage_range_sel, \
+	.linear_range_selectors_bitfield = atc260x_ldo_voltage_range_sel, \
 	.enable_reg = ATC2609A_PMU_LDO##num##_CTL0, \
 	.enable_mask = BIT(0), \
 	.enable_time = 2000, \
diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c
index b0b9938c20a1..c3fb05dce40c 100644
--- a/drivers/regulator/bd718x7-regulator.c
+++ b/drivers/regulator/bd718x7-regulator.c
@@ -289,7 +289,7 @@ static const struct linear_range bd71837_buck5_volts[] = {
  * and 0x1 for last 3 ranges.
  */
 static const unsigned int bd71837_buck5_volt_range_sel[] = {
-	0x0, 0x0, 0x0, 0x80, 0x80, 0x80
+	0x0, 0x0, 0x0, 0x1, 0x1, 0x1
 };
 
 /*
@@ -309,7 +309,7 @@ static const struct linear_range bd71847_buck3_volts[] = {
 };
 
 static const unsigned int bd71847_buck3_volt_range_sel[] = {
-	0x0, 0x0, 0x0, 0x40, 0x80, 0x80, 0x80
+	0x0, 0x0, 0x0, 0x1, 0x2, 0x2, 0x2
 };
 
 static const struct linear_range bd71847_buck4_volts[] = {
@@ -317,7 +317,7 @@ static const struct linear_range bd71847_buck4_volts[] = {
 	REGULATOR_LINEAR_RANGE(2600000, 0x00, 0x03, 100000),
 };
 
-static const unsigned int bd71847_buck4_volt_range_sel[] = { 0x0, 0x40 };
+static const unsigned int bd71847_buck4_volt_range_sel[] = { 0x0, 0x1 };
 
 /*
  * BUCK6
@@ -360,7 +360,7 @@ static const struct linear_range bd718xx_ldo1_volts[] = {
 	REGULATOR_LINEAR_RANGE(1600000, 0x00, 0x03, 100000),
 };
 
-static const unsigned int bd718xx_ldo1_volt_range_sel[] = { 0x0, 0x20 };
+static const unsigned int bd718xx_ldo1_volt_range_sel[] = { 0x0, 0x1 };
 
 /*
  * LDO2
@@ -403,7 +403,7 @@ static const struct linear_range bd71847_ldo5_volts[] = {
 	REGULATOR_LINEAR_RANGE(800000, 0x00, 0x0F, 100000),
 };
 
-static const unsigned int bd71847_ldo5_volt_range_sel[] = { 0x0, 0x20 };
+static const unsigned int bd71847_ldo5_volt_range_sel[] = { 0x0, 0x1 };
 
 /*
  * LDO6
@@ -817,7 +817,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.vsel_mask = BD718XX_1ST_NODVS_BUCK_MASK,
 			.vsel_range_reg = BD718XX_REG_1ST_NODVS_BUCK_VOLT,
 			.vsel_range_mask = BD71847_BUCK3_RANGE_MASK,
-			.linear_range_selectors = bd71847_buck3_volt_range_sel,
+			.linear_range_selectors_bitfield = bd71847_buck3_volt_range_sel,
 			.enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
 			.enable_time = BD71847_BUCK3_STARTUP_TIME,
@@ -845,7 +845,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.vsel_mask = BD71847_BUCK4_MASK,
 			.vsel_range_reg = BD718XX_REG_2ND_NODVS_BUCK_VOLT,
 			.vsel_range_mask = BD71847_BUCK4_RANGE_MASK,
-			.linear_range_selectors = bd71847_buck4_volt_range_sel,
+			.linear_range_selectors_bitfield = bd71847_buck4_volt_range_sel,
 			.enable_mask = BD718XX_BUCK_EN,
 			.enable_time = BD71847_BUCK4_STARTUP_TIME,
 			.owner = THIS_MODULE,
@@ -916,7 +916,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.vsel_mask = BD718XX_LDO1_MASK,
 			.vsel_range_reg = BD718XX_REG_LDO1_VOLT,
 			.vsel_range_mask = BD718XX_LDO1_RANGE_MASK,
-			.linear_range_selectors = bd718xx_ldo1_volt_range_sel,
+			.linear_range_selectors_bitfield = bd718xx_ldo1_volt_range_sel,
 			.enable_reg = BD718XX_REG_LDO1_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
 			.enable_time = BD71847_LDO1_STARTUP_TIME,
@@ -1010,7 +1010,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = {
 			.vsel_mask = BD71847_LDO5_MASK,
 			.vsel_range_reg = BD718XX_REG_LDO5_VOLT,
 			.vsel_range_mask = BD71847_LDO5_RANGE_MASK,
-			.linear_range_selectors = bd71847_ldo5_volt_range_sel,
+			.linear_range_selectors_bitfield = bd71847_ldo5_volt_range_sel,
 			.enable_reg = BD718XX_REG_LDO5_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
 			.enable_time = BD71847_LDO5_STARTUP_TIME,
@@ -1232,7 +1232,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = BD71837_BUCK5_MASK,
 			.vsel_range_reg = BD718XX_REG_1ST_NODVS_BUCK_VOLT,
 			.vsel_range_mask = BD71837_BUCK5_RANGE_MASK,
-			.linear_range_selectors = bd71837_buck5_volt_range_sel,
+			.linear_range_selectors_bitfield = bd71837_buck5_volt_range_sel,
 			.enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
 			.enable_time = BD71837_BUCK5_STARTUP_TIME,
@@ -1328,7 +1328,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = {
 			.vsel_mask = BD718XX_LDO1_MASK,
 			.vsel_range_reg = BD718XX_REG_LDO1_VOLT,
 			.vsel_range_mask = BD718XX_LDO1_RANGE_MASK,
-			.linear_range_selectors = bd718xx_ldo1_volt_range_sel,
+			.linear_range_selectors_bitfield = bd718xx_ldo1_volt_range_sel,
 			.enable_reg = BD718XX_REG_LDO1_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
 			.enable_time = BD71837_LDO1_STARTUP_TIME,
diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index e6c999ba3fa2..5ad5f3b3a6b5 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -5,13 +5,14 @@
 // Copyright 2007, 2008 Wolfson Microelectronics PLC.
 // Copyright 2008 SlimLogic Ltd.
 
-#include <linux/kernel.h>
-#include <linux/err.h>
+#include <linux/bitops.h>
 #include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/regulator/driver.h>
-#include <linux/module.h>
 
 #include "internal.h"
 
@@ -104,13 +105,14 @@ static int regulator_range_selector_to_index(struct regulator_dev *rdev,
 {
 	int i;
 
-	if (!rdev->desc->linear_range_selectors)
+	if (!rdev->desc->linear_range_selectors_bitfield)
 		return -EINVAL;
 
 	rval &= rdev->desc->vsel_range_mask;
+	rval >>= ffs(rdev->desc->vsel_range_mask) - 1;
 
 	for (i = 0; i < rdev->desc->n_linear_ranges; i++) {
-		if (rdev->desc->linear_range_selectors[i] == rval)
+		if (rdev->desc->linear_range_selectors_bitfield[i] == rval)
 			return i;
 	}
 	return -EINVAL;
@@ -194,7 +196,8 @@ int regulator_set_voltage_sel_pickable_regmap(struct regulator_dev *rdev,
 	sel <<= ffs(rdev->desc->vsel_mask) - 1;
 	sel += rdev->desc->linear_ranges[i].min_sel;
 
-	range = rdev->desc->linear_range_selectors[i];
+	range = rdev->desc->linear_range_selectors_bitfield[i];
+	range <<= ffs(rdev->desc->vsel_mask) - 1;
 
 	if (rdev->desc->vsel_reg == rdev->desc->vsel_range_reg) {
 		ret = regmap_update_bits(rdev->regmap,
diff --git a/drivers/regulator/max77541-regulator.c b/drivers/regulator/max77541-regulator.c
index 2976f9cb3e26..e6b3d9147c37 100644
--- a/drivers/regulator/max77541-regulator.c
+++ b/drivers/regulator/max77541-regulator.c
@@ -44,7 +44,7 @@ static const struct linear_range max77541_buck_ranges[] = {
 };
 
 static const unsigned int max77541_buck_volt_range_sel[] = {
-	0x00, 0x00, 0x40, 0x40, 0x80, 0x80,
+	0x0, 0x0, 0x1, 0x1, 0x2, 0x2,
 };
 
 enum max77541_regulators {
@@ -67,7 +67,7 @@ enum max77541_regulators {
 		.vsel_mask = MAX77541_BITS_MX_VOUT,			\
 		.vsel_range_reg = MAX77541_REG_M ## _id ## _CFG1,	\
 		.vsel_range_mask = MAX77541_BITS_MX_CFG1_RNG,		\
-		.linear_range_selectors = max77541_buck_volt_range_sel, \
+		.linear_range_selectors_bitfield = max77541_buck_volt_range_sel, \
 		.owner = THIS_MODULE,					\
 	}
 
@@ -86,7 +86,7 @@ enum max77541_regulators {
 		.vsel_mask = MAX77541_BITS_MX_VOUT,			\
 		.vsel_range_reg = MAX77541_REG_M ## _id ## _CFG1,	\
 		.vsel_range_mask = MAX77541_BITS_MX_CFG1_RNG,		\
-		.linear_range_selectors = max77541_buck_volt_range_sel, \
+		.linear_range_selectors_bitfield = max77541_buck_volt_range_sel, \
 		.owner = THIS_MODULE,					\
 	}
 
diff --git a/drivers/regulator/max77650-regulator.c b/drivers/regulator/max77650-regulator.c
index f6539b945037..94abfbb2bc1e 100644
--- a/drivers/regulator/max77650-regulator.c
+++ b/drivers/regulator/max77650-regulator.c
@@ -239,7 +239,7 @@ static struct max77650_regulator_desc max77651_SBB1_desc = {
 		.supply_name		= "in-sbb1",
 		.id			= MAX77650_REGULATOR_ID_SBB1,
 		.ops			= &max77651_SBB1_regulator_ops,
-		.linear_range_selectors	= max77651_sbb1_volt_range_sel,
+		.linear_range_selectors_bitfield	= max77651_sbb1_volt_range_sel,
 		.linear_ranges		= max77651_sbb1_volt_ranges,
 		.n_linear_ranges	= ARRAY_SIZE(max77651_sbb1_volt_ranges),
 		.n_voltages		= 58,
diff --git a/drivers/regulator/rohm-regulator.c b/drivers/regulator/rohm-regulator.c
index f97a9a51ee76..0e2418ed957c 100644
--- a/drivers/regulator/rohm-regulator.c
+++ b/drivers/regulator/rohm-regulator.c
@@ -36,7 +36,7 @@ static int set_dvs_level(const struct regulator_desc *desc,
 	}
 	for (i = 0; i < desc->n_voltages; i++) {
 		/* NOTE to next hacker - Does not support pickable ranges */
-		if (desc->linear_range_selectors)
+		if (desc->linear_range_selectors_bitfield)
 			return -EINVAL;
 		if (desc->n_linear_ranges)
 			ret = regulator_desc_list_voltage_linear_range(desc, i);
diff --git a/drivers/regulator/tps6287x-regulator.c b/drivers/regulator/tps6287x-regulator.c
index 19a4a300a963..d022184a8e7d 100644
--- a/drivers/regulator/tps6287x-regulator.c
+++ b/drivers/regulator/tps6287x-regulator.c
@@ -41,7 +41,7 @@ static const struct linear_range tps6287x_voltage_ranges[] = {
 };
 
 static const unsigned int tps6287x_voltage_range_sel[] = {
-	0x0, 0x4, 0x8, 0xC
+	0x0, 0x1, 0x2, 0x3
 };
 
 static const unsigned int tps6287x_ramp_table[] = {
@@ -122,7 +122,7 @@ static struct regulator_desc tps6287x_reg = {
 	.n_voltages = 256,
 	.linear_ranges = tps6287x_voltage_ranges,
 	.n_linear_ranges = ARRAY_SIZE(tps6287x_voltage_ranges),
-	.linear_range_selectors = tps6287x_voltage_range_sel,
+	.linear_range_selectors_bitfield = tps6287x_voltage_range_sel,
 };
 
 static int tps6287x_i2c_probe(struct i2c_client *i2c)
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index c6ef7d68eb9a..4b7eceb3828b 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -292,11 +292,12 @@ enum regulator_type {
  * @ramp_delay: Time to settle down after voltage change (unit: uV/us)
  * @min_dropout_uV: The minimum dropout voltage this regulator can handle
  * @linear_ranges: A constant table of possible voltage ranges.
- * @linear_range_selectors: A constant table of voltage range selectors.
- *			    If pickable ranges are used each range must
- *			    have corresponding selector here.
+ * @linear_range_selectors_bitfield: A constant table of voltage range
+ *                                   selectors as bitfield values. If
+ *                                   pickable ranges are used each range
+ *                                   must have corresponding selector here.
  * @n_linear_ranges: Number of entries in the @linear_ranges (and in
- *		     linear_range_selectors if used) table(s).
+ *		     linear_range_selectors_bitfield if used) table(s).
  * @volt_table: Voltage mapping table (if table based mapping)
  * @curr_table: Current limit mapping table (if table based mapping)
  *
@@ -384,7 +385,7 @@ struct regulator_desc {
 	int min_dropout_uV;
 
 	const struct linear_range *linear_ranges;
-	const unsigned int *linear_range_selectors;
+	const unsigned int *linear_range_selectors_bitfield;
 
 	int n_linear_ranges;
 

From 908e5a3d4e6f60fa2d3912be7087e745639c4404 Mon Sep 17 00:00:00 2001
From: Minjie Du <duminjie@vivo.com>
Date: Tue, 25 Jul 2023 11:50:37 +0800
Subject: [PATCH 0525/2047] spi: fsl-dspi: Use dev_err_probe() in
 dspi_request_dma()

It is possible for dma_request_chan() to return EPROBE_DEFER, which means
dev is not ready yet.
At this point dev_err() will have no output.

Signed-off-by: Minjie Du <duminjie@vivo.com>
Link: https://lore.kernel.org/r/20230725035038.1702-1-duminjie@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-dspi.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index ca41c8a8ba42..150d2ebf234b 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -503,15 +503,14 @@ static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
 
 	dma->chan_rx = dma_request_chan(dev, "rx");
 	if (IS_ERR(dma->chan_rx)) {
-		dev_err(dev, "rx dma channel not available\n");
-		ret = PTR_ERR(dma->chan_rx);
-		return ret;
+		return dev_err_probe(dev, PTR_ERR(dma->chan_rx),
+			"rx dma channel not available\n");
 	}
 
 	dma->chan_tx = dma_request_chan(dev, "tx");
 	if (IS_ERR(dma->chan_tx)) {
-		dev_err(dev, "tx dma channel not available\n");
 		ret = PTR_ERR(dma->chan_tx);
+		dev_err_probe(dev, ret, "tx dma channel not available\n");
 		goto err_tx_channel;
 	}
 

From 8e289f4542890168705219e54f0231dccfabddbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:14:49 +0300
Subject: [PATCH 0526/2047] selftests/resctrl: Add resctrl.h into build deps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Makefile only lists *.c as build dependencies for the resctrl_tests
executable which excludes resctrl.h.

Add *.h to wildcard() to include resctrl.h.

Fixes: 591a6e8588fc ("selftests/resctrl: Add basic resctrl file system operations and data")
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index 73d53257df42..5073dbc96125 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -7,4 +7,4 @@ TEST_GEN_PROGS := resctrl_tests
 
 include ../lib.mk
 
-$(OUTPUT)/resctrl_tests: $(wildcard *.c)
+$(OUTPUT)/resctrl_tests: $(wildcard *.[ch])

From 2d320b1029ee7329ee0638181be967789775b962 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:14:50 +0300
Subject: [PATCH 0527/2047] selftests/resctrl: Don't leak buffer in
 fill_cache()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The error path in fill_cache() does return before the allocated buffer
is freed leaking the buffer.

The leak was introduced when fill_cache_read() started to return errors
in commit c7b607fa9325 ("selftests/resctrl: Fix null pointer
dereference on open failed"), before that both fill functions always
returned 0.

Move free() earlier to prevent the mem leak.

Fixes: c7b607fa9325 ("selftests/resctrl: Fix null pointer dereference on open failed")
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/fill_buf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index 341cc93ca84c..3b328c844896 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -177,12 +177,13 @@ fill_cache(unsigned long long buf_size, int malloc_and_init, int memflush,
 	else
 		ret = fill_cache_write(start_ptr, end_ptr, resctrl_val);
 
+	free(startptr);
+
 	if (ret) {
 		printf("\n Error in fill cache read/write...\n");
 		return -1;
 	}
 
-	free(startptr);
 
 	return 0;
 }

From f99e413eb54652e2436cc56d081176bc9a34cd8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:14:51 +0300
Subject: [PATCH 0528/2047] selftests/resctrl: Unmount resctrl FS if child
 fails to run benchmark
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A child calls PARENT_EXIT() when it fails to run a benchmark to kill
the parent process. PARENT_EXIT() lacks unmount for the resctrl FS and
the parent won't be there to unmount it either after it gets killed.

Add the resctrl FS unmount also to PARENT_EXIT().

Fixes: 591a6e8588fc ("selftests/resctrl: Add basic resctrl file system operations and data")
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/resctrl.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 87e39456dee0..f455f0b7e314 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -43,6 +43,7 @@
 	do {					\
 		perror(err_msg);		\
 		kill(ppid, SIGKILL);		\
+		umount_resctrlfs();		\
 		exit(EXIT_FAILURE);		\
 	} while (0)
 

From 51a0c3b7f028169e40db930575dd01fe81c3e765 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:14:52 +0300
Subject: [PATCH 0529/2047] selftests/resctrl: Close perf value read fd on
 errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Perf event fd (fd_lm) is not closed when run_fill_buf() returns error.

Close fd_lm only in cat_val() to make it easier to track it is always
closed.

Fixes: 790bf585b0ee ("selftests/resctrl: Add Cache Allocation Technology (CAT) selftest")
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/cache.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 8a4fe8693be6..289b619116fe 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -87,21 +87,19 @@ static int reset_enable_llc_perf(pid_t pid, int cpu_no)
 static int get_llc_perf(unsigned long *llc_perf_miss)
 {
 	__u64 total_misses;
+	int ret;
 
 	/* Stop counters after one span to get miss rate */
 
 	ioctl(fd_lm, PERF_EVENT_IOC_DISABLE, 0);
 
-	if (read(fd_lm, &rf_cqm, sizeof(struct read_format)) == -1) {
+	ret = read(fd_lm, &rf_cqm, sizeof(struct read_format));
+	if (ret == -1) {
 		perror("Could not get llc misses through perf");
-
 		return -1;
 	}
 
 	total_misses = rf_cqm.values[0].value;
-
-	close(fd_lm);
-
 	*llc_perf_miss = total_misses;
 
 	return 0;
@@ -253,19 +251,25 @@ int cat_val(struct resctrl_val_param *param)
 					 memflush, operation, resctrl_val)) {
 				fprintf(stderr, "Error-running fill buffer\n");
 				ret = -1;
-				break;
+				goto pe_close;
 			}
 
 			sleep(1);
 			ret = measure_cache_vals(param, bm_pid);
 			if (ret)
-				break;
+				goto pe_close;
+
+			close(fd_lm);
 		} else {
 			break;
 		}
 	}
 
 	return ret;
+
+pe_close:
+	close(fd_lm);
+	return ret;
 }
 
 /*

From ede13008489b36f2adcdf01e3f6435c7b24610c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:14:53 +0300
Subject: [PATCH 0530/2047] selftests/resctrl: Unmount resctrl FS before
 starting the first test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resctrl FS mount/remount/umount code is hard to track. Better approach
is to use mount/umount pair for each test but that assumes resctrl FS
is not mounted beforehand.

Change umount_resctrlfs() so that it can unmount resctrl FS from any
path, and enable further simplifications into mount/remount/umount
logic by unmounting resctrl FS at the start if a pre-existing
mountpoint is found.

Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/resctrl_tests.c |  3 +++
 tools/testing/selftests/resctrl/resctrlfs.c     | 12 +++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 9b9751206e1c..61c3882a26c8 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -250,6 +250,9 @@ int main(int argc, char **argv)
 	if (!check_resctrlfs_support())
 		return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n");
 
+	if (umount_resctrlfs())
+		return ksft_exit_skip("resctrl FS unmount failed.\n");
+
 	filter_dmesg();
 
 	ksft_set_plan(tests ? : 4);
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index fb00245dee92..cf0c8dd34b8b 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -82,10 +82,16 @@ int remount_resctrlfs(bool mum_resctrlfs)
 
 int umount_resctrlfs(void)
 {
-	if (find_resctrl_mount(NULL))
-		return 0;
+	char mountpoint[256];
+	int ret;
 
-	if (umount(RESCTRL_PATH)) {
+	ret = find_resctrl_mount(mountpoint);
+	if (ret == -ENOENT)
+		return 0;
+	if (ret)
+		return ret;
+
+	if (umount(mountpoint)) {
 		perror("# Unable to umount resctrl");
 
 		return errno;

From caddc0fbe4958a84b0dcd257439e9090c8fbfb8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:14:54 +0300
Subject: [PATCH 0531/2047] selftests/resctrl: Move resctrl FS mount/umount to
 higher level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A few places currently lack umounting resctrl FS on error paths:
  - cmt_resctrl_val() has multiple error paths with direct return.
  - cat_perf_miss_val() has multiple error paths with direct return.
In addition, validate_resctrl_feature_request() is called by
run_mbm_test() and run_mba_test(). Neither MBA nor MBM test tries to
umount resctrl FS.

Each and every test does require resctrl FS to be present already for
feature check. Thus, it makes sense to just mount it on higher level in
resctrl_tests.c and properly pair it with umount.

Move resctrl FS (re)mount/unmount into each test function in
resctrl_tests.c. Make feature validation to simply check that resctrl
FS is mounted.

As there's the final umount in main() before this change, the selftest
should not leave resctrl FS behind after the tests even if one of the
forementioned paths is taken (thus, no fixes tag).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/cat_test.c    |  6 ---
 tools/testing/selftests/resctrl/cmt_test.c    |  4 --
 .../testing/selftests/resctrl/resctrl_tests.c | 47 ++++++++++++++++---
 tools/testing/selftests/resctrl/resctrl_val.c |  5 --
 tools/testing/selftests/resctrl/resctrlfs.c   |  7 ++-
 5 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index fb1443f888c4..e1c071dec1b0 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -106,10 +106,6 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
 
 	cache_size = 0;
 
-	ret = remount_resctrlfs(true);
-	if (ret)
-		return ret;
-
 	/* Get default cbm mask for L3/L2 cache */
 	ret = get_cbm_mask(cache_type, cbm_mask);
 	if (ret)
@@ -227,8 +223,6 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
 
 out:
 	cat_test_cleanup();
-	if (bm_pid)
-		umount_resctrlfs();
 
 	return ret;
 }
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index af71b2141271..426d11189a99 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -86,10 +86,6 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
 
 	cache_size = 0;
 
-	ret = remount_resctrlfs(true);
-	if (ret)
-		return ret;
-
 	if (!validate_resctrl_feature_request(CMT_STR))
 		return -1;
 
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 61c3882a26c8..6bf4ead74496 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -77,9 +77,15 @@ static void run_mbm_test(bool has_ben, char **benchmark_cmd, int span,
 
 	ksft_print_msg("Starting MBM BW change ...\n");
 
+	res = remount_resctrlfs(true);
+	if (res) {
+		ksft_exit_fail_msg("Failed to mount resctrl FS\n");
+		return;
+	}
+
 	if (!validate_resctrl_feature_request(MBM_STR) || (get_vendor() != ARCH_INTEL)) {
 		ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n");
-		return;
+		goto umount;
 	}
 
 	if (!has_ben)
@@ -88,6 +94,9 @@ static void run_mbm_test(bool has_ben, char **benchmark_cmd, int span,
 	ksft_test_result(!res, "MBM: bw change\n");
 	if ((get_vendor() == ARCH_INTEL) && res)
 		ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
+
+umount:
+	umount_resctrlfs();
 }
 
 static void run_mba_test(bool has_ben, char **benchmark_cmd, int span,
@@ -97,15 +106,24 @@ static void run_mba_test(bool has_ben, char **benchmark_cmd, int span,
 
 	ksft_print_msg("Starting MBA Schemata change ...\n");
 
+	res = remount_resctrlfs(true);
+	if (res) {
+		ksft_exit_fail_msg("Failed to mount resctrl FS\n");
+		return;
+	}
+
 	if (!validate_resctrl_feature_request(MBA_STR) || (get_vendor() != ARCH_INTEL)) {
 		ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n");
-		return;
+		goto umount;
 	}
 
 	if (!has_ben)
 		sprintf(benchmark_cmd[1], "%d", span);
 	res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd);
 	ksft_test_result(!res, "MBA: schemata change\n");
+
+umount:
+	umount_resctrlfs();
 }
 
 static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no)
@@ -113,9 +131,16 @@ static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no)
 	int res;
 
 	ksft_print_msg("Starting CMT test ...\n");
+
+	res = remount_resctrlfs(true);
+	if (res) {
+		ksft_exit_fail_msg("Failed to mount resctrl FS\n");
+		return;
+	}
+
 	if (!validate_resctrl_feature_request(CMT_STR)) {
 		ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n");
-		return;
+		goto umount;
 	}
 
 	if (!has_ben)
@@ -124,6 +149,9 @@ static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no)
 	ksft_test_result(!res, "CMT: test\n");
 	if ((get_vendor() == ARCH_INTEL) && res)
 		ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
+
+umount:
+	umount_resctrlfs();
 }
 
 static void run_cat_test(int cpu_no, int no_of_bits)
@@ -132,13 +160,22 @@ static void run_cat_test(int cpu_no, int no_of_bits)
 
 	ksft_print_msg("Starting CAT test ...\n");
 
+	res = remount_resctrlfs(true);
+	if (res) {
+		ksft_exit_fail_msg("Failed to mount resctrl FS\n");
+		return;
+	}
+
 	if (!validate_resctrl_feature_request(CAT_STR)) {
 		ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n");
-		return;
+		goto umount;
 	}
 
 	res = cat_perf_miss_val(cpu_no, no_of_bits, "L3");
 	ksft_test_result(!res, "CAT: test\n");
+
+umount:
+	umount_resctrlfs();
 }
 
 int main(int argc, char **argv)
@@ -269,7 +306,5 @@ int main(int argc, char **argv)
 	if (cat_test)
 		run_cat_test(cpu_no, no_of_bits);
 
-	umount_resctrlfs();
-
 	ksft_finished();
 }
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index ab1eab1e7ff6..e8f1e6a59f4a 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -648,10 +648,6 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
 			return ret;
 	}
 
-	ret = remount_resctrlfs(param->mum_resctrlfs);
-	if (ret)
-		return ret;
-
 	/*
 	 * If benchmark wasn't successfully started by child, then child should
 	 * kill parent, so save parent's pid
@@ -788,7 +784,6 @@ unregister:
 	signal_handler_unregister();
 out:
 	kill(bm_pid, SIGKILL);
-	umount_resctrlfs();
 
 	return ret;
 }
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index cf0c8dd34b8b..edc3f92b11b1 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -617,7 +617,8 @@ char *fgrep(FILE *inf, const char *str)
  * validate_resctrl_feature_request - Check if requested feature is valid.
  * @resctrl_val:	Requested feature
  *
- * Return: True if the feature is supported, else false
+ * Return: True if the feature is supported, else false. False is also
+ *         returned if resctrl FS is not mounted.
  */
 bool validate_resctrl_feature_request(const char *resctrl_val)
 {
@@ -625,11 +626,13 @@ bool validate_resctrl_feature_request(const char *resctrl_val)
 	bool found = false;
 	char *res;
 	FILE *inf;
+	int ret;
 
 	if (!resctrl_val)
 		return false;
 
-	if (remount_resctrlfs(false))
+	ret = find_resctrl_mount(NULL);
+	if (ret)
 		return false;
 
 	if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {

From 6383851a193b76dbd3ec09b91154e2dda6c96308 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:14:55 +0300
Subject: [PATCH 0532/2047] selftests/resctrl: Refactor remount_resctrl(bool
 mum_resctrlfs) to mount_resctrl()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mount/umount of the resctrl FS is now paired nicely per test.

Rename remount_resctrl(bool mum_resctrlfs) to mount_resctrl(). Make
it unconditionally try to mount the resctrl FS and return error if
resctrl FS was mounted already.

While at it, group the mount/umount prototypes in the header.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/resctrl.h     |  2 +-
 .../testing/selftests/resctrl/resctrl_tests.c |  8 +++----
 tools/testing/selftests/resctrl/resctrlfs.c   | 23 ++++++-------------
 3 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index f455f0b7e314..23af3fb73cb4 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -85,8 +85,8 @@ extern char llc_occup_path[1024];
 int get_vendor(void);
 bool check_resctrlfs_support(void);
 int filter_dmesg(void);
-int remount_resctrlfs(bool mum_resctrlfs);
 int get_resource_id(int cpu_no, int *resource_id);
+int mount_resctrlfs(void);
 int umount_resctrlfs(void);
 int validate_bw_report_request(char *bw_report);
 bool validate_resctrl_feature_request(const char *resctrl_val);
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 6bf4ead74496..0363c4b607d2 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -77,7 +77,7 @@ static void run_mbm_test(bool has_ben, char **benchmark_cmd, int span,
 
 	ksft_print_msg("Starting MBM BW change ...\n");
 
-	res = remount_resctrlfs(true);
+	res = mount_resctrlfs();
 	if (res) {
 		ksft_exit_fail_msg("Failed to mount resctrl FS\n");
 		return;
@@ -106,7 +106,7 @@ static void run_mba_test(bool has_ben, char **benchmark_cmd, int span,
 
 	ksft_print_msg("Starting MBA Schemata change ...\n");
 
-	res = remount_resctrlfs(true);
+	res = mount_resctrlfs();
 	if (res) {
 		ksft_exit_fail_msg("Failed to mount resctrl FS\n");
 		return;
@@ -132,7 +132,7 @@ static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no)
 
 	ksft_print_msg("Starting CMT test ...\n");
 
-	res = remount_resctrlfs(true);
+	res = mount_resctrlfs();
 	if (res) {
 		ksft_exit_fail_msg("Failed to mount resctrl FS\n");
 		return;
@@ -160,7 +160,7 @@ static void run_cat_test(int cpu_no, int no_of_bits)
 
 	ksft_print_msg("Starting CAT test ...\n");
 
-	res = remount_resctrlfs(true);
+	res = mount_resctrlfs();
 	if (res) {
 		ksft_exit_fail_msg("Failed to mount resctrl FS\n");
 		return;
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index edc3f92b11b1..c45666942439 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -48,29 +48,20 @@ static int find_resctrl_mount(char *buffer)
 }
 
 /*
- * remount_resctrlfs - Remount resctrl FS at /sys/fs/resctrl
- * @mum_resctrlfs:	Should the resctrl FS be remounted?
+ * mount_resctrlfs - Mount resctrl FS at /sys/fs/resctrl
  *
- * If not mounted, mount it.
- * If mounted and mum_resctrlfs then remount resctrl FS.
- * If mounted and !mum_resctrlfs then noop
+ * Mounts resctrl FS. Fails if resctrl FS is already mounted to avoid
+ * pre-existing settings interfering with the test results.
  *
  * Return: 0 on success, non-zero on failure
  */
-int remount_resctrlfs(bool mum_resctrlfs)
+int mount_resctrlfs(void)
 {
-	char mountpoint[256];
 	int ret;
 
-	ret = find_resctrl_mount(mountpoint);
-	if (ret)
-		strcpy(mountpoint, RESCTRL_PATH);
-
-	if (!ret && mum_resctrlfs && umount(mountpoint))
-		ksft_print_msg("Fail: unmounting \"%s\"\n", mountpoint);
-
-	if (!ret && !mum_resctrlfs)
-		return 0;
+	ret = find_resctrl_mount(NULL);
+	if (ret != -ENOENT)
+		return -1;
 
 	ksft_print_msg("Mounting resctrl to \"%s\"\n", RESCTRL_PATH);
 	ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL);

From 48f3b68738c74941b93ef499f69da313c07f24c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:14:56 +0300
Subject: [PATCH 0533/2047] selftests/resctrl: Remove mum_resctrlfs from struct
 resctrl_val_param
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resctrl FS mount/umount are now cleanly paired leaving .mum_resctrlfs
in the struct resctrl_val_param unused.

Remove .mum_resctrlfs from struct resctrl_val_param.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/cat_test.c | 1 -
 tools/testing/selftests/resctrl/cmt_test.c | 1 -
 tools/testing/selftests/resctrl/mba_test.c | 1 -
 tools/testing/selftests/resctrl/mbm_test.c | 1 -
 tools/testing/selftests/resctrl/resctrl.h  | 2 --
 5 files changed, 6 deletions(-)

diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index e1c071dec1b0..480db0dc4e0e 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -140,7 +140,6 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
 	struct resctrl_val_param param = {
 		.resctrl_val	= CAT_STR,
 		.cpu_no		= cpu_no,
-		.mum_resctrlfs	= false,
 		.setup		= cat_setup,
 	};
 
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index 426d11189a99..d31e28416bb7 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -113,7 +113,6 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
 		.ctrlgrp	= "c1",
 		.mongrp		= "m1",
 		.cpu_no		= cpu_no,
-		.mum_resctrlfs	= false,
 		.filename	= RESULT_FILE_NAME,
 		.mask		= ~(long_mask << n) & long_mask,
 		.span		= cache_size * n / count_of_bits,
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
index cde3781a9ab0..3d53c6c9b9ce 100644
--- a/tools/testing/selftests/resctrl/mba_test.c
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -154,7 +154,6 @@ int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd)
 		.ctrlgrp	= "c1",
 		.mongrp		= "m1",
 		.cpu_no		= cpu_no,
-		.mum_resctrlfs	= true,
 		.filename	= RESULT_FILE_NAME,
 		.bw_report	= bw_report,
 		.setup		= mba_setup
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index 538d35a6485a..24326cb7bc21 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -123,7 +123,6 @@ int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd)
 		.mongrp		= "m1",
 		.span		= span,
 		.cpu_no		= cpu_no,
-		.mum_resctrlfs	= true,
 		.filename	= RESULT_FILE_NAME,
 		.bw_report	=  bw_report,
 		.setup		= mbm_setup
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 23af3fb73cb4..99678d688a80 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -54,7 +54,6 @@
  * @mongrp:		Name of the monitor group (mon grp)
  * @cpu_no:		CPU number to which the benchmark would be binded
  * @span:		Memory bytes accessed in each benchmark iteration
- * @mum_resctrlfs:	Should the resctrl FS be remounted?
  * @filename:		Name of file to which the o/p should be written
  * @bw_report:		Bandwidth report type (reads vs writes)
  * @setup:		Call back function to setup test environment
@@ -65,7 +64,6 @@ struct resctrl_val_param {
 	char		mongrp[64];
 	int		cpu_no;
 	unsigned long	span;
-	bool		mum_resctrlfs;
 	char		filename[64];
 	char		*bw_report;
 	unsigned long	mask;

From 67a86643db2b12408430d1d627b3dc6208f06765 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:14:57 +0300
Subject: [PATCH 0534/2047] selftests/resctrl: Convert span to size_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Span is defined either as unsigned long or int.

Consistently use size_t everywhere for span as it refers to size of the
memory block.

Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/cache.c         |  4 ++--
 tools/testing/selftests/resctrl/cmt_test.c      |  2 +-
 tools/testing/selftests/resctrl/fill_buf.c      |  8 ++++----
 tools/testing/selftests/resctrl/mbm_test.c      |  8 ++++----
 tools/testing/selftests/resctrl/resctrl.h       | 10 +++++-----
 tools/testing/selftests/resctrl/resctrl_tests.c | 11 ++++++-----
 tools/testing/selftests/resctrl/resctrlfs.c     |  2 +-
 7 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 289b619116fe..53df66814cd6 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -286,7 +286,7 @@ pe_close:
  * Return:		0 on success. non-zero on failure.
  */
 int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
-		    unsigned long cache_span, unsigned long max_diff,
+		    size_t cache_span, unsigned long max_diff,
 		    unsigned long max_diff_percent, unsigned long num_of_runs,
 		    bool platform, bool cmt)
 {
@@ -308,7 +308,7 @@ int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
 	ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent));
 	ksft_print_msg("Number of bits: %d\n", no_of_bits);
 	ksft_print_msg("Average LLC val: %lu\n", avg_llc_val);
-	ksft_print_msg("Cache span (%s): %lu\n", cmt ? "bytes" : "lines",
+	ksft_print_msg("Cache span (%s): %zu\n", cmt ? "bytes" : "lines",
 		       cache_span);
 
 	return ret;
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index d31e28416bb7..beb0f0687c6d 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -121,7 +121,7 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
 	};
 
 	if (strcmp(benchmark_cmd[0], "fill_buf") == 0)
-		sprintf(benchmark_cmd[1], "%lu", param.span);
+		sprintf(benchmark_cmd[1], "%zu", param.span);
 
 	remove(RESULT_FILE_NAME);
 
diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index 3b328c844896..785cbd8d0148 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -139,7 +139,7 @@ static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr,
 }
 
 static int
-fill_cache(unsigned long long buf_size, int malloc_and_init, int memflush,
+fill_cache(size_t buf_size, int malloc_and_init, int memflush,
 	   int op, char *resctrl_val)
 {
 	unsigned char *start_ptr, *end_ptr;
@@ -188,10 +188,10 @@ fill_cache(unsigned long long buf_size, int malloc_and_init, int memflush,
 	return 0;
 }
 
-int run_fill_buf(unsigned long span, int malloc_and_init_memory,
-		 int memflush, int op, char *resctrl_val)
+int run_fill_buf(size_t span, int malloc_and_init_memory, int memflush, int op,
+		 char *resctrl_val)
 {
-	unsigned long long cache_size = span;
+	size_t cache_size = span;
 	int ret;
 
 	ret = fill_cache(cache_size, malloc_and_init_memory, memflush, op,
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index 24326cb7bc21..fd116158d008 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -15,7 +15,7 @@
 #define NUM_OF_RUNS		5
 
 static int
-show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span)
+show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span)
 {
 	unsigned long avg_bw_imc = 0, avg_bw_resc = 0;
 	unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
@@ -40,14 +40,14 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span)
 	ksft_print_msg("%s Check MBM diff within %d%%\n",
 		       ret ? "Fail:" : "Pass:", MAX_DIFF_PERCENT);
 	ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per);
-	ksft_print_msg("Span (MB): %d\n", span);
+	ksft_print_msg("Span (MB): %zu\n", span);
 	ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc);
 	ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc);
 
 	return ret;
 }
 
-static int check_results(int span)
+static int check_results(size_t span)
 {
 	unsigned long bw_imc[NUM_OF_RUNS], bw_resc[NUM_OF_RUNS];
 	char temp[1024], *token_array[8];
@@ -115,7 +115,7 @@ void mbm_test_cleanup(void)
 	remove(RESULT_FILE_NAME);
 }
 
-int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd)
+int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd)
 {
 	struct resctrl_val_param param = {
 		.resctrl_val	= MBM_STR,
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 99678d688a80..52068ceea956 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -63,7 +63,7 @@ struct resctrl_val_param {
 	char		ctrlgrp[64];
 	char		mongrp[64];
 	int		cpu_no;
-	unsigned long	span;
+	size_t		span;
 	char		filename[64];
 	char		*bw_report;
 	unsigned long	mask;
@@ -97,10 +97,10 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
 			    char *resctrl_val);
 int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
 		    int group_fd, unsigned long flags);
-int run_fill_buf(unsigned long span, int malloc_and_init_memory, int memflush,
-		 int op, char *resctrl_va);
+int run_fill_buf(size_t span, int malloc_and_init_memory, int memflush, int op,
+		 char *resctrl_va);
 int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param);
-int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd);
+int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd);
 void tests_cleanup(void);
 void mbm_test_cleanup(void);
 int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd);
@@ -119,7 +119,7 @@ void cmt_test_cleanup(void);
 int get_core_sibling(int cpu_no);
 int measure_cache_vals(struct resctrl_val_param *param, int bm_pid);
 int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
-		    unsigned long cache_span, unsigned long max_diff,
+		    size_t cache_span, unsigned long max_diff,
 		    unsigned long max_diff_percent, unsigned long num_of_runs,
 		    bool platform, bool cmt);
 
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 0363c4b607d2..bae8f205fc3f 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -70,7 +70,7 @@ void tests_cleanup(void)
 	cat_test_cleanup();
 }
 
-static void run_mbm_test(bool has_ben, char **benchmark_cmd, int span,
+static void run_mbm_test(bool has_ben, char **benchmark_cmd, size_t span,
 			 int cpu_no, char *bw_report)
 {
 	int res;
@@ -99,7 +99,7 @@ umount:
 	umount_resctrlfs();
 }
 
-static void run_mba_test(bool has_ben, char **benchmark_cmd, int span,
+static void run_mba_test(bool has_ben, char **benchmark_cmd, size_t span,
 			 int cpu_no, char *bw_report)
 {
 	int res;
@@ -118,7 +118,7 @@ static void run_mba_test(bool has_ben, char **benchmark_cmd, int span,
 	}
 
 	if (!has_ben)
-		sprintf(benchmark_cmd[1], "%d", span);
+		sprintf(benchmark_cmd[1], "%zu", span);
 	res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd);
 	ksft_test_result(!res, "MBA: schemata change\n");
 
@@ -181,11 +181,12 @@ umount:
 int main(int argc, char **argv)
 {
 	bool has_ben = false, mbm_test = true, mba_test = true, cmt_test = true;
-	int c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 0;
 	char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64];
 	char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE];
+	int c, cpu_no = 1, argc_new = argc, i, no_of_bits = 0;
 	int ben_ind, ben_count, tests = 0;
 	bool cat_test = true;
+	size_t span = 250;
 
 	for (i = 0; i < argc; i++) {
 		if (strcmp(argv[i], "-b") == 0) {
@@ -273,7 +274,7 @@ int main(int argc, char **argv)
 			benchmark_cmd[i] = benchmark_cmd_area[i];
 
 		strcpy(benchmark_cmd[0], "fill_buf");
-		sprintf(benchmark_cmd[1], "%d", span);
+		sprintf(benchmark_cmd[1], "%zu", span);
 		strcpy(benchmark_cmd[2], "1");
 		strcpy(benchmark_cmd[3], "1");
 		strcpy(benchmark_cmd[4], "0");
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index c45666942439..14fb85a7e526 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -303,7 +303,7 @@ int taskset_benchmark(pid_t bm_pid, int cpu_no)
 void run_benchmark(int signum, siginfo_t *info, void *ucontext)
 {
 	int operation, ret, malloc_and_init_memory, memflush;
-	unsigned long span, buffer_span;
+	size_t span, buffer_span;
 	char **benchmark_cmd;
 	char resctrl_val[64];
 	FILE *fp;

From e24ac42889810168f7a2022b868bb4b9a9481760 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:14:58 +0300
Subject: [PATCH 0535/2047] selftests/resctrl: Express span internally in bytes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MBA and MBM tests to use megabytes to represent span. CMT test uses
bytes. The difference requires run_benchmark() to size the buffer
differently based on the test name, which in turn requires passing the
test name into run_benchmark().

Convert MBA and MBM tests to use span internally in bytes like CMT test
to remove the internal inconsistency between the tests. Remove the test
dependent buffer sizing from run_benchmark().

This change eliminates one of the reasons why the test name has to be
passed around but there are still other users too so the test name
passing cannot yet be removed.

Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/mbm_test.c      | 2 +-
 tools/testing/selftests/resctrl/resctrl_tests.c | 2 +-
 tools/testing/selftests/resctrl/resctrlfs.c     | 9 ++-------
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index fd116158d008..2d58d4b8a918 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -40,7 +40,7 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span)
 	ksft_print_msg("%s Check MBM diff within %d%%\n",
 		       ret ? "Fail:" : "Pass:", MAX_DIFF_PERCENT);
 	ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per);
-	ksft_print_msg("Span (MB): %zu\n", span);
+	ksft_print_msg("Span (MB): %zu\n", span / MB);
 	ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc);
 	ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc);
 
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index bae8f205fc3f..3a65dacb441e 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -185,8 +185,8 @@ int main(int argc, char **argv)
 	char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE];
 	int c, cpu_no = 1, argc_new = argc, i, no_of_bits = 0;
 	int ben_ind, ben_count, tests = 0;
+	size_t span = 250 * MB;
 	bool cat_test = true;
-	size_t span = 250;
 
 	for (i = 0; i < argc; i++) {
 		if (strcmp(argv[i], "-b") == 0) {
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 14fb85a7e526..6c00e79df033 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -303,9 +303,9 @@ int taskset_benchmark(pid_t bm_pid, int cpu_no)
 void run_benchmark(int signum, siginfo_t *info, void *ucontext)
 {
 	int operation, ret, malloc_and_init_memory, memflush;
-	size_t span, buffer_span;
 	char **benchmark_cmd;
 	char resctrl_val[64];
+	size_t span;
 	FILE *fp;
 
 	benchmark_cmd = info->si_ptr;
@@ -326,12 +326,7 @@ void run_benchmark(int signum, siginfo_t *info, void *ucontext)
 		operation = atoi(benchmark_cmd[4]);
 		sprintf(resctrl_val, "%s", benchmark_cmd[5]);
 
-		if (strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
-			buffer_span = span * MB;
-		else
-			buffer_span = span;
-
-		if (run_fill_buf(buffer_span, malloc_and_init_memory, memflush,
+		if (run_fill_buf(span, malloc_and_init_memory, memflush,
 				 operation, resctrl_val))
 			fprintf(stderr, "Error in running fill buffer\n");
 	} else {

From 1b9537baf8513d605a2de6c5ea218ca936418892 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:14:59 +0300
Subject: [PATCH 0536/2047] selftests/resctrl: Remove duplicated preparation
 for span arg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When no benchmark_cmd is given, benchmark_cmd[1] is set to span in
main(). There's no need to do it again in run_mba_test().

Remove the duplicated preparation for span argument into
benchmark_cmd[1] from run_mba_test(). After this, the has_ben and span
arguments to run_mba_test() can be removed.

Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/resctrl_tests.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 3a65dacb441e..bf0cadab36b0 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -99,8 +99,7 @@ umount:
 	umount_resctrlfs();
 }
 
-static void run_mba_test(bool has_ben, char **benchmark_cmd, size_t span,
-			 int cpu_no, char *bw_report)
+static void run_mba_test(char **benchmark_cmd, int cpu_no, char *bw_report)
 {
 	int res;
 
@@ -117,8 +116,6 @@ static void run_mba_test(bool has_ben, char **benchmark_cmd, size_t span,
 		goto umount;
 	}
 
-	if (!has_ben)
-		sprintf(benchmark_cmd[1], "%zu", span);
 	res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd);
 	ksft_test_result(!res, "MBA: schemata change\n");
 
@@ -299,7 +296,7 @@ int main(int argc, char **argv)
 		run_mbm_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
 
 	if (mba_test)
-		run_mba_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
+		run_mba_test(benchmark_cmd, cpu_no, bw_report);
 
 	if (cmt_test)
 		run_cmt_test(has_ben, benchmark_cmd, cpu_no);

From 326baed260e2e4f2df18d42d41b261740c6b8c04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:15:00 +0300
Subject: [PATCH 0537/2047] selftests/resctrl: Remove "malloc_and_init_memory"
 param from run_fill_buf()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

run_fill_buf()'s malloc_and_init_memory parameter is always 1. There's
also duplicated memory init code for malloc_and_init_memory == 0 case
in fill_buf() which is unused.

Remove the malloc_and_init_memory parameter and the duplicated mem init
code.

While at it, fix also a typo in run_fill_buf() prototype's argument.

Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/cache.c       |  6 ++--
 tools/testing/selftests/resctrl/fill_buf.c    | 28 +++----------------
 tools/testing/selftests/resctrl/resctrl.h     |  3 +-
 .../testing/selftests/resctrl/resctrl_tests.c | 13 ++++-----
 tools/testing/selftests/resctrl/resctrlfs.c   | 12 ++++----
 5 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 53df66814cd6..63b551c44f1d 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -210,7 +210,7 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
  */
 int cat_val(struct resctrl_val_param *param)
 {
-	int malloc_and_init_memory = 1, memflush = 1, operation = 0, ret = 0;
+	int memflush = 1, operation = 0, ret = 0;
 	char *resctrl_val = param->resctrl_val;
 	pid_t bm_pid;
 
@@ -247,8 +247,8 @@ int cat_val(struct resctrl_val_param *param)
 			if (ret)
 				break;
 
-			if (run_fill_buf(param->span, malloc_and_init_memory,
-					 memflush, operation, resctrl_val)) {
+			if (run_fill_buf(param->span, memflush, operation,
+					 resctrl_val)) {
 				fprintf(stderr, "Error-running fill buffer\n");
 				ret = -1;
 				goto pe_close;
diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index 785cbd8d0148..d8f5505eb9e6 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -138,36 +138,18 @@ static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr,
 	return 0;
 }
 
-static int
-fill_cache(size_t buf_size, int malloc_and_init, int memflush,
-	   int op, char *resctrl_val)
+static int fill_cache(size_t buf_size, int memflush, int op, char *resctrl_val)
 {
 	unsigned char *start_ptr, *end_ptr;
-	unsigned long long i;
 	int ret;
 
-	if (malloc_and_init)
-		start_ptr = malloc_and_init_memory(buf_size);
-	else
-		start_ptr = malloc(buf_size);
-
+	start_ptr = malloc_and_init_memory(buf_size);
 	if (!start_ptr)
 		return -1;
 
 	startptr = start_ptr;
 	end_ptr = start_ptr + buf_size;
 
-	/*
-	 * It's better to touch the memory once to avoid any compiler
-	 * optimizations
-	 */
-	if (!malloc_and_init) {
-		for (i = 0; i < buf_size; i++)
-			*start_ptr++ = (unsigned char)rand();
-	}
-
-	start_ptr = startptr;
-
 	/* Flush the memory before using to avoid "cache hot pages" effect */
 	if (memflush)
 		mem_flush(start_ptr, buf_size);
@@ -188,14 +170,12 @@ fill_cache(size_t buf_size, int malloc_and_init, int memflush,
 	return 0;
 }
 
-int run_fill_buf(size_t span, int malloc_and_init_memory, int memflush, int op,
-		 char *resctrl_val)
+int run_fill_buf(size_t span, int memflush, int op, char *resctrl_val)
 {
 	size_t cache_size = span;
 	int ret;
 
-	ret = fill_cache(cache_size, malloc_and_init_memory, memflush, op,
-			 resctrl_val);
+	ret = fill_cache(cache_size, memflush, op, resctrl_val);
 	if (ret) {
 		printf("\n Error in fill cache\n");
 		return -1;
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 52068ceea956..3054cc4ef0e3 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -97,8 +97,7 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
 			    char *resctrl_val);
 int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
 		    int group_fd, unsigned long flags);
-int run_fill_buf(size_t span, int malloc_and_init_memory, int memflush, int op,
-		 char *resctrl_va);
+int run_fill_buf(size_t span, int memflush, int op, char *resctrl_val);
 int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param);
 int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd);
 void tests_cleanup(void);
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index bf0cadab36b0..125ed0ca11e3 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -89,7 +89,7 @@ static void run_mbm_test(bool has_ben, char **benchmark_cmd, size_t span,
 	}
 
 	if (!has_ben)
-		sprintf(benchmark_cmd[5], "%s", MBA_STR);
+		sprintf(benchmark_cmd[4], "%s", MBA_STR);
 	res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
 	ksft_test_result(!res, "MBM: bw change\n");
 	if ((get_vendor() == ARCH_INTEL) && res)
@@ -141,7 +141,7 @@ static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no)
 	}
 
 	if (!has_ben)
-		sprintf(benchmark_cmd[5], "%s", CMT_STR);
+		sprintf(benchmark_cmd[4], "%s", CMT_STR);
 	res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd);
 	ksft_test_result(!res, "CMT: test\n");
 	if ((get_vendor() == ARCH_INTEL) && res)
@@ -267,16 +267,15 @@ int main(int argc, char **argv)
 		benchmark_cmd[ben_count] = NULL;
 	} else {
 		/* If no benchmark is given by "-b" argument, use fill_buf. */
-		for (i = 0; i < 6; i++)
+		for (i = 0; i < 5; i++)
 			benchmark_cmd[i] = benchmark_cmd_area[i];
 
 		strcpy(benchmark_cmd[0], "fill_buf");
 		sprintf(benchmark_cmd[1], "%zu", span);
 		strcpy(benchmark_cmd[2], "1");
-		strcpy(benchmark_cmd[3], "1");
-		strcpy(benchmark_cmd[4], "0");
-		strcpy(benchmark_cmd[5], "");
-		benchmark_cmd[6] = NULL;
+		strcpy(benchmark_cmd[3], "0");
+		strcpy(benchmark_cmd[4], "");
+		benchmark_cmd[5] = NULL;
 	}
 
 	sprintf(bw_report, "reads");
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 6c00e79df033..8a66445079f8 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -302,7 +302,7 @@ int taskset_benchmark(pid_t bm_pid, int cpu_no)
  */
 void run_benchmark(int signum, siginfo_t *info, void *ucontext)
 {
-	int operation, ret, malloc_and_init_memory, memflush;
+	int operation, ret, memflush;
 	char **benchmark_cmd;
 	char resctrl_val[64];
 	size_t span;
@@ -321,13 +321,11 @@ void run_benchmark(int signum, siginfo_t *info, void *ucontext)
 	if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
 		/* Execute default fill_buf benchmark */
 		span = strtoul(benchmark_cmd[1], NULL, 10);
-		malloc_and_init_memory = atoi(benchmark_cmd[2]);
-		memflush =  atoi(benchmark_cmd[3]);
-		operation = atoi(benchmark_cmd[4]);
-		sprintf(resctrl_val, "%s", benchmark_cmd[5]);
+		memflush =  atoi(benchmark_cmd[2]);
+		operation = atoi(benchmark_cmd[3]);
+		sprintf(resctrl_val, "%s", benchmark_cmd[4]);
 
-		if (run_fill_buf(span, malloc_and_init_memory, memflush,
-				 operation, resctrl_val))
+		if (run_fill_buf(span, memflush, operation, resctrl_val))
 			fprintf(stderr, "Error in running fill buffer\n");
 	} else {
 		/* Execute specified benchmark */

From 5e3e4f1a03f075bd49104ea4c998c20649a49491 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:15:01 +0300
Subject: [PATCH 0538/2047] selftests/resctrl: Remove unnecessary startptr
 global from fill_buf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fill_buf stores buffer pointer into global variable startptr that is
only used in fill_cache().

Remove startptr as global variable, the local variable in fill_cache()
is enough to keep the pointer.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/fill_buf.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index d8f5505eb9e6..a5ec9c82a960 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -22,8 +22,6 @@
 #define PAGE_SIZE		(4 * 1024)
 #define MB			(1024 * 1024)
 
-static unsigned char *startptr;
-
 static void sb(void)
 {
 #if defined(__i386) || defined(__x86_64)
@@ -147,7 +145,6 @@ static int fill_cache(size_t buf_size, int memflush, int op, char *resctrl_val)
 	if (!start_ptr)
 		return -1;
 
-	startptr = start_ptr;
 	end_ptr = start_ptr + buf_size;
 
 	/* Flush the memory before using to avoid "cache hot pages" effect */
@@ -159,7 +156,7 @@ static int fill_cache(size_t buf_size, int memflush, int op, char *resctrl_val)
 	else
 		ret = fill_cache_write(start_ptr, end_ptr, resctrl_val);
 
-	free(startptr);
+	free(start_ptr);
 
 	if (ret) {
 		printf("\n Error in fill cache read/write...\n");

From f4123975cf341fd12121e1631c4b172a190f48d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:15:02 +0300
Subject: [PATCH 0539/2047] selftests/resctrl: Improve parameter consistency in
 fill_buf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fill_buf's arguments can be improved in multiple ways:

  - Multiple functions in fill_buf have start_ptr as one of their
    argument which is a bit long and the extra "start" is pretty
    obvious when it comes to pointers.

  - Some of the functions take end_ptr and others size_t to indicate
    the end of the buffer.

  - Some arguments meaning buffer size are called just 's'

  - mem_flush() takes void * but immediately converts it to char *

Cleanup the parameters to make things simpler and more consistent:

  - Rename start_ptr to simply buf as it's shorter.

  - Replace end_ptr and s parameters with buf_size and only calculate
    end_ptr in the functions that truly use it.

  - Make mem_flush() parameters to follow the same convention as the
    other functions in fill_buf.

  - convert mem_flush() char * to unsigned char *.

While at it, fix also a typo in a comment.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/fill_buf.c | 49 +++++++++++-----------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index a5ec9c82a960..5f16c4f5dfbf 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -38,32 +38,32 @@ static void cl_flush(void *p)
 #endif
 }
 
-static void mem_flush(void *p, size_t s)
+static void mem_flush(unsigned char *buf, size_t buf_size)
 {
-	char *cp = (char *)p;
+	unsigned char *cp = buf;
 	size_t i = 0;
 
-	s = s / CL_SIZE; /* mem size in cache llines */
+	buf_size = buf_size / CL_SIZE; /* mem size in cache lines */
 
-	for (i = 0; i < s; i++)
+	for (i = 0; i < buf_size; i++)
 		cl_flush(&cp[i * CL_SIZE]);
 
 	sb();
 }
 
-static void *malloc_and_init_memory(size_t s)
+static void *malloc_and_init_memory(size_t buf_size)
 {
 	void *p = NULL;
 	uint64_t *p64;
 	size_t s64;
 	int ret;
 
-	ret = posix_memalign(&p, PAGE_SIZE, s);
+	ret = posix_memalign(&p, PAGE_SIZE, buf_size);
 	if (ret < 0)
 		return NULL;
 
 	p64 = (uint64_t *)p;
-	s64 = s / sizeof(uint64_t);
+	s64 = buf_size / sizeof(uint64_t);
 
 	while (s64 > 0) {
 		*p64 = (uint64_t)rand();
@@ -74,12 +74,13 @@ static void *malloc_and_init_memory(size_t s)
 	return p;
 }
 
-static int fill_one_span_read(unsigned char *start_ptr, unsigned char *end_ptr)
+static int fill_one_span_read(unsigned char *buf, size_t buf_size)
 {
+	unsigned char *end_ptr = buf + buf_size;
 	unsigned char sum, *p;
 
 	sum = 0;
-	p = start_ptr;
+	p = buf;
 	while (p < end_ptr) {
 		sum += *p;
 		p += (CL_SIZE / 2);
@@ -88,26 +89,26 @@ static int fill_one_span_read(unsigned char *start_ptr, unsigned char *end_ptr)
 	return sum;
 }
 
-static
-void fill_one_span_write(unsigned char *start_ptr, unsigned char *end_ptr)
+static void fill_one_span_write(unsigned char *buf, size_t buf_size)
 {
+	unsigned char *end_ptr = buf + buf_size;
 	unsigned char *p;
 
-	p = start_ptr;
+	p = buf;
 	while (p < end_ptr) {
 		*p = '1';
 		p += (CL_SIZE / 2);
 	}
 }
 
-static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr,
+static int fill_cache_read(unsigned char *buf, size_t buf_size,
 			   char *resctrl_val)
 {
 	int ret = 0;
 	FILE *fp;
 
 	while (1) {
-		ret = fill_one_span_read(start_ptr, end_ptr);
+		ret = fill_one_span_read(buf, buf_size);
 		if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)))
 			break;
 	}
@@ -124,11 +125,11 @@ static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr,
 	return 0;
 }
 
-static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr,
+static int fill_cache_write(unsigned char *buf, size_t buf_size,
 			    char *resctrl_val)
 {
 	while (1) {
-		fill_one_span_write(start_ptr, end_ptr);
+		fill_one_span_write(buf, buf_size);
 		if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)))
 			break;
 	}
@@ -138,25 +139,23 @@ static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr,
 
 static int fill_cache(size_t buf_size, int memflush, int op, char *resctrl_val)
 {
-	unsigned char *start_ptr, *end_ptr;
+	unsigned char *buf;
 	int ret;
 
-	start_ptr = malloc_and_init_memory(buf_size);
-	if (!start_ptr)
+	buf = malloc_and_init_memory(buf_size);
+	if (!buf)
 		return -1;
 
-	end_ptr = start_ptr + buf_size;
-
 	/* Flush the memory before using to avoid "cache hot pages" effect */
 	if (memflush)
-		mem_flush(start_ptr, buf_size);
+		mem_flush(buf, buf_size);
 
 	if (op == 0)
-		ret = fill_cache_read(start_ptr, end_ptr, resctrl_val);
+		ret = fill_cache_read(buf, buf_size, resctrl_val);
 	else
-		ret = fill_cache_write(start_ptr, end_ptr, resctrl_val);
+		ret = fill_cache_write(buf, buf_size, resctrl_val);
 
-	free(start_ptr);
+	free(buf);
 
 	if (ret) {
 		printf("\n Error in fill cache read/write...\n");

From 7f3c980c7738d0624077f926216bc7ac4cefcf60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:15:03 +0300
Subject: [PATCH 0540/2047] selftests/resctrl: Don't pass test name to fill_buf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Test name is passed to fill_buf functions so that they can loop around
buffer only once. This is required for CAT test case.

To loop around buffer only once, caller doesn't need to let fill_buf
know which test case it is. Instead, pass a boolean argument 'once'
which makes fill_buf more generic.

As run_benchmark() no longer needs to pass the test name to
run_fill_buf(), a few test running functions can be simplified to not
write the test name into the default benchmark_cmd. The has_ben
argument can also be removed now from those test running functions.

Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/cache.c       |  3 +--
 tools/testing/selftests/resctrl/fill_buf.c    | 20 +++++++++----------
 tools/testing/selftests/resctrl/resctrl.h     |  2 +-
 .../testing/selftests/resctrl/resctrl_tests.c | 14 +++++--------
 tools/testing/selftests/resctrl/resctrlfs.c   | 11 +++++++---
 5 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 63b551c44f1d..8ae9ac150fd3 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -247,8 +247,7 @@ int cat_val(struct resctrl_val_param *param)
 			if (ret)
 				break;
 
-			if (run_fill_buf(param->span, memflush, operation,
-					 resctrl_val)) {
+			if (run_fill_buf(param->span, memflush, operation, true)) {
 				fprintf(stderr, "Error-running fill buffer\n");
 				ret = -1;
 				goto pe_close;
diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index 5f16c4f5dfbf..0d425f26583a 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -101,15 +101,14 @@ static void fill_one_span_write(unsigned char *buf, size_t buf_size)
 	}
 }
 
-static int fill_cache_read(unsigned char *buf, size_t buf_size,
-			   char *resctrl_val)
+static int fill_cache_read(unsigned char *buf, size_t buf_size, bool once)
 {
 	int ret = 0;
 	FILE *fp;
 
 	while (1) {
 		ret = fill_one_span_read(buf, buf_size);
-		if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)))
+		if (once)
 			break;
 	}
 
@@ -125,19 +124,18 @@ static int fill_cache_read(unsigned char *buf, size_t buf_size,
 	return 0;
 }
 
-static int fill_cache_write(unsigned char *buf, size_t buf_size,
-			    char *resctrl_val)
+static int fill_cache_write(unsigned char *buf, size_t buf_size, bool once)
 {
 	while (1) {
 		fill_one_span_write(buf, buf_size);
-		if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)))
+		if (once)
 			break;
 	}
 
 	return 0;
 }
 
-static int fill_cache(size_t buf_size, int memflush, int op, char *resctrl_val)
+static int fill_cache(size_t buf_size, int memflush, int op, bool once)
 {
 	unsigned char *buf;
 	int ret;
@@ -151,9 +149,9 @@ static int fill_cache(size_t buf_size, int memflush, int op, char *resctrl_val)
 		mem_flush(buf, buf_size);
 
 	if (op == 0)
-		ret = fill_cache_read(buf, buf_size, resctrl_val);
+		ret = fill_cache_read(buf, buf_size, once);
 	else
-		ret = fill_cache_write(buf, buf_size, resctrl_val);
+		ret = fill_cache_write(buf, buf_size, once);
 
 	free(buf);
 
@@ -166,12 +164,12 @@ static int fill_cache(size_t buf_size, int memflush, int op, char *resctrl_val)
 	return 0;
 }
 
-int run_fill_buf(size_t span, int memflush, int op, char *resctrl_val)
+int run_fill_buf(size_t span, int memflush, int op, bool once)
 {
 	size_t cache_size = span;
 	int ret;
 
-	ret = fill_cache(cache_size, memflush, op, resctrl_val);
+	ret = fill_cache(cache_size, memflush, op, once);
 	if (ret) {
 		printf("\n Error in fill cache\n");
 		return -1;
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 3054cc4ef0e3..645ad407bd8d 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -97,7 +97,7 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
 			    char *resctrl_val);
 int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
 		    int group_fd, unsigned long flags);
-int run_fill_buf(size_t span, int memflush, int op, char *resctrl_val);
+int run_fill_buf(size_t span, int memflush, int op, bool once);
 int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param);
 int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd);
 void tests_cleanup(void);
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 125ed0ca11e3..d511daeb6851 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -70,7 +70,7 @@ void tests_cleanup(void)
 	cat_test_cleanup();
 }
 
-static void run_mbm_test(bool has_ben, char **benchmark_cmd, size_t span,
+static void run_mbm_test(char **benchmark_cmd, size_t span,
 			 int cpu_no, char *bw_report)
 {
 	int res;
@@ -88,8 +88,6 @@ static void run_mbm_test(bool has_ben, char **benchmark_cmd, size_t span,
 		goto umount;
 	}
 
-	if (!has_ben)
-		sprintf(benchmark_cmd[4], "%s", MBA_STR);
 	res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
 	ksft_test_result(!res, "MBM: bw change\n");
 	if ((get_vendor() == ARCH_INTEL) && res)
@@ -123,7 +121,7 @@ umount:
 	umount_resctrlfs();
 }
 
-static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no)
+static void run_cmt_test(char **benchmark_cmd, int cpu_no)
 {
 	int res;
 
@@ -140,8 +138,6 @@ static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no)
 		goto umount;
 	}
 
-	if (!has_ben)
-		sprintf(benchmark_cmd[4], "%s", CMT_STR);
 	res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd);
 	ksft_test_result(!res, "CMT: test\n");
 	if ((get_vendor() == ARCH_INTEL) && res)
@@ -274,7 +270,7 @@ int main(int argc, char **argv)
 		sprintf(benchmark_cmd[1], "%zu", span);
 		strcpy(benchmark_cmd[2], "1");
 		strcpy(benchmark_cmd[3], "0");
-		strcpy(benchmark_cmd[4], "");
+		strcpy(benchmark_cmd[4], "false");
 		benchmark_cmd[5] = NULL;
 	}
 
@@ -292,13 +288,13 @@ int main(int argc, char **argv)
 	ksft_set_plan(tests ? : 4);
 
 	if (mbm_test)
-		run_mbm_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
+		run_mbm_test(benchmark_cmd, span, cpu_no, bw_report);
 
 	if (mba_test)
 		run_mba_test(benchmark_cmd, cpu_no, bw_report);
 
 	if (cmt_test)
-		run_cmt_test(has_ben, benchmark_cmd, cpu_no);
+		run_cmt_test(benchmark_cmd, cpu_no);
 
 	if (cat_test)
 		run_cat_test(cpu_no, no_of_bits);
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 8a66445079f8..bd36ee206602 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -304,8 +304,8 @@ void run_benchmark(int signum, siginfo_t *info, void *ucontext)
 {
 	int operation, ret, memflush;
 	char **benchmark_cmd;
-	char resctrl_val[64];
 	size_t span;
+	bool once;
 	FILE *fp;
 
 	benchmark_cmd = info->si_ptr;
@@ -323,9 +323,14 @@ void run_benchmark(int signum, siginfo_t *info, void *ucontext)
 		span = strtoul(benchmark_cmd[1], NULL, 10);
 		memflush =  atoi(benchmark_cmd[2]);
 		operation = atoi(benchmark_cmd[3]);
-		sprintf(resctrl_val, "%s", benchmark_cmd[4]);
+		if (!strcmp(benchmark_cmd[4], "true"))
+			once = true;
+		else if (!strcmp(benchmark_cmd[4], "false"))
+			once = false;
+		else
+			PARENT_EXIT("Invalid once parameter");
 
-		if (run_fill_buf(span, memflush, operation, resctrl_val))
+		if (run_fill_buf(span, memflush, operation, once))
 			fprintf(stderr, "Error in running fill buffer\n");
 	} else {
 		/* Execute specified benchmark */

From 8ee592a638fc246958acdb543acb42e76a72c5aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:15:04 +0300
Subject: [PATCH 0541/2047] selftests/resctrl: Don't use variable argument list
 for ->setup()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

struct resctrl_val_param has ->setup() function that accepts variable
argument list. All test cases use only 1 argument as input and it's
the struct resctrl_val_param pointer.

Instead of variable argument list, directly pass struct
resctrl_val_param pointer as the only parameter to ->setup().

Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/cache.c       | 2 +-
 tools/testing/selftests/resctrl/cat_test.c    | 8 +-------
 tools/testing/selftests/resctrl/cmt_test.c    | 9 +--------
 tools/testing/selftests/resctrl/mba_test.c    | 8 +-------
 tools/testing/selftests/resctrl/mbm_test.c    | 8 +-------
 tools/testing/selftests/resctrl/resctrl.h     | 3 +--
 tools/testing/selftests/resctrl/resctrl_val.c | 2 +-
 7 files changed, 7 insertions(+), 33 deletions(-)

diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 8ae9ac150fd3..9c0e4ce4f28e 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -236,7 +236,7 @@ int cat_val(struct resctrl_val_param *param)
 	/* Test runs until the callback setup() tells the test to stop. */
 	while (1) {
 		if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {
-			ret = param->setup(1, param);
+			ret = param->setup(param);
 			if (ret == END_OF_TESTS) {
 				ret = 0;
 				break;
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 480db0dc4e0e..42e35269d8b6 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -27,17 +27,11 @@ static unsigned long cache_size;
  * con_mon grp, mon_grp in resctrl FS.
  * Run 5 times in order to get average values.
  */
-static int cat_setup(int num, ...)
+static int cat_setup(struct resctrl_val_param *p)
 {
-	struct resctrl_val_param *p;
 	char schemata[64];
-	va_list param;
 	int ret = 0;
 
-	va_start(param, num);
-	p = va_arg(param, struct resctrl_val_param *);
-	va_end(param);
-
 	/* Run NUM_OF_RUNS times */
 	if (p->num_of_runs >= NUM_OF_RUNS)
 		return END_OF_TESTS;
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index beb0f0687c6d..7214aefb55ed 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -21,15 +21,8 @@ static char cbm_mask[256];
 static unsigned long long_mask;
 static unsigned long cache_size;
 
-static int cmt_setup(int num, ...)
+static int cmt_setup(struct resctrl_val_param *p)
 {
-	struct resctrl_val_param *p;
-	va_list param;
-
-	va_start(param, num);
-	p = va_arg(param, struct resctrl_val_param *);
-	va_end(param);
-
 	/* Run NUM_OF_RUNS times */
 	if (p->num_of_runs >= NUM_OF_RUNS)
 		return END_OF_TESTS;
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
index 3d53c6c9b9ce..4d2f145804b8 100644
--- a/tools/testing/selftests/resctrl/mba_test.c
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -22,18 +22,12 @@
  * con_mon grp, mon_grp in resctrl FS.
  * For each allocation, run 5 times in order to get average values.
  */
-static int mba_setup(int num, ...)
+static int mba_setup(struct resctrl_val_param *p)
 {
 	static int runs_per_allocation, allocation = 100;
-	struct resctrl_val_param *p;
 	char allocation_str[64];
-	va_list param;
 	int ret;
 
-	va_start(param, num);
-	p = va_arg(param, struct resctrl_val_param *);
-	va_end(param);
-
 	if (runs_per_allocation >= NUM_OF_RUNS)
 		runs_per_allocation = 0;
 
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index 2d58d4b8a918..c7de6f5977f6 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -86,16 +86,10 @@ static int check_results(size_t span)
 	return ret;
 }
 
-static int mbm_setup(int num, ...)
+static int mbm_setup(struct resctrl_val_param *p)
 {
-	struct resctrl_val_param *p;
-	va_list param;
 	int ret = 0;
 
-	va_start(param, num);
-	p = va_arg(param, struct resctrl_val_param *);
-	va_end(param);
-
 	/* Run NUM_OF_RUNS times */
 	if (p->num_of_runs >= NUM_OF_RUNS)
 		return END_OF_TESTS;
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 645ad407bd8d..838d1a438f33 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -3,7 +3,6 @@
 #ifndef RESCTRL_H
 #define RESCTRL_H
 #include <stdio.h>
-#include <stdarg.h>
 #include <math.h>
 #include <errno.h>
 #include <sched.h>
@@ -68,7 +67,7 @@ struct resctrl_val_param {
 	char		*bw_report;
 	unsigned long	mask;
 	int		num_of_runs;
-	int		(*setup)(int num, ...);
+	int		(*setup)(struct resctrl_val_param *param);
 };
 
 #define MBM_STR			"mbm"
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index e8f1e6a59f4a..f0f6c5f6e98b 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -759,7 +759,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
 
 	/* Test runs until the callback setup() tells the test to stop. */
 	while (1) {
-		ret = param->setup(1, param);
+		ret = param->setup(param);
 		if (ret == END_OF_TESTS) {
 			ret = 0;
 			break;

From 85b73447a44cd215815464a0981604e177384192 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:15:05 +0300
Subject: [PATCH 0542/2047] selftests/resctrl: Move CAT/CMT test global vars to
 function they are used in
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CAT and CMT tests have count_of_bits, long_mask, cbm_mask, and
cache_size global variables that can be moved into the sole using
function.

Make the global variables local variables of the relevant function to
scope them better.

While at it, move cache_size initialization into the declaration line.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/cat_test.c | 11 ++++-------
 tools/testing/selftests/resctrl/cmt_test.c | 11 ++++-------
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 42e35269d8b6..ed6c8e64ad11 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -17,11 +17,6 @@
 #define MAX_DIFF_PERCENT	4
 #define MAX_DIFF		1000000
 
-static int count_of_bits;
-static char cbm_mask[256];
-static unsigned long long_mask;
-static unsigned long cache_size;
-
 /*
  * Change schemata. Write schemata to specified
  * con_mon grp, mon_grp in resctrl FS.
@@ -96,10 +91,12 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
 {
 	unsigned long l_mask, l_mask_1;
 	int ret, pipefd[2], sibling_cpu_no;
+	unsigned long cache_size = 0;
+	unsigned long long_mask;
+	char cbm_mask[256];
+	int count_of_bits;
 	char pipe_message;
 
-	cache_size = 0;
-
 	/* Get default cbm mask for L3/L2 cache */
 	ret = get_cbm_mask(cache_type, cbm_mask);
 	if (ret)
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index 7214aefb55ed..0ac9d6bbd13d 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -16,11 +16,6 @@
 #define MAX_DIFF		2000000
 #define MAX_DIFF_PERCENT	15
 
-static int count_of_bits;
-static char cbm_mask[256];
-static unsigned long long_mask;
-static unsigned long cache_size;
-
 static int cmt_setup(struct resctrl_val_param *p)
 {
 	/* Run NUM_OF_RUNS times */
@@ -75,10 +70,12 @@ void cmt_test_cleanup(void)
 
 int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
 {
+	unsigned long cache_size = 0;
+	unsigned long long_mask;
+	char cbm_mask[256];
+	int count_of_bits;
 	int ret;
 
-	cache_size = 0;
-
 	if (!validate_resctrl_feature_request(CMT_STR))
 		return -1;
 

From 3dad011b9ab1a3904b449c9351a6d9be4b0afbc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:15:06 +0300
Subject: [PATCH 0543/2047] selftests/resctrl: Pass the real number of tests to
 show_cache_info()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Results include warm-up test which is discarded before passing the sum
to show_cache_info(). show_cache_info() handles this by subtracting one
from the number of tests in divisor. It is a trappy construct to have
sum and number of tests parameters to disagree like this.

A more logical place for subtracting the skipped tests is where the sum
is calculated so move it there. Pass the correct number of tests to
show_cache_info() so it can be used directly as the divisor for
calculating the average.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/cache.c    | 2 +-
 tools/testing/selftests/resctrl/cat_test.c | 2 +-
 tools/testing/selftests/resctrl/cmt_test.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 9c0e4ce4f28e..18ebd1324cb3 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -294,7 +294,7 @@ int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
 	long avg_diff = 0;
 	int ret;
 
-	avg_llc_val = sum_llc_val / (num_of_runs - 1);
+	avg_llc_val = sum_llc_val / num_of_runs;
 	avg_diff = (long)abs(cache_span - avg_llc_val);
 	diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100;
 
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index ed6c8e64ad11..3848dfb46aba 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -77,7 +77,7 @@ static int check_results(struct resctrl_val_param *param)
 	no_of_bits = count_bits(param->mask);
 
 	return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64,
-			       MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS,
+			       MAX_DIFF, MAX_DIFF_PERCENT, runs - 1,
 			       get_vendor() == ARCH_INTEL, false);
 }
 
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index 0ac9d6bbd13d..cb2197647c6c 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -59,7 +59,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits)
 	fclose(fp);
 
 	return show_cache_info(sum_llc_occu_resc, no_of_bits, param->span,
-			       MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS,
+			       MAX_DIFF, MAX_DIFF_PERCENT, runs - 1,
 			       true, true);
 }
 

From bf68c717e57133f1f0c2a6d5715cc985c711cdc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Jul 2023 16:15:07 +0300
Subject: [PATCH 0544/2047] selftests/resctrl: Remove test type checks from
 cat_val()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

cat_val() is only used during CAT test but it checks for test type.

Remove test type checks and the unused else branch from cat_val().

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Shaopeng Tan (Fujitsu) <tan.shaopeng@fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/cache.c | 47 +++++++++++--------------
 1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 18ebd1324cb3..d3cbb829ff6a 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -230,38 +230,31 @@ int cat_val(struct resctrl_val_param *param)
 	if (ret)
 		return ret;
 
-	if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)))
-		initialize_llc_perf();
+	initialize_llc_perf();
 
 	/* Test runs until the callback setup() tells the test to stop. */
 	while (1) {
-		if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {
-			ret = param->setup(param);
-			if (ret == END_OF_TESTS) {
-				ret = 0;
-				break;
-			}
-			if (ret < 0)
-				break;
-			ret = reset_enable_llc_perf(bm_pid, param->cpu_no);
-			if (ret)
-				break;
-
-			if (run_fill_buf(param->span, memflush, operation, true)) {
-				fprintf(stderr, "Error-running fill buffer\n");
-				ret = -1;
-				goto pe_close;
-			}
-
-			sleep(1);
-			ret = measure_cache_vals(param, bm_pid);
-			if (ret)
-				goto pe_close;
-
-			close(fd_lm);
-		} else {
+		ret = param->setup(param);
+		if (ret == END_OF_TESTS) {
+			ret = 0;
 			break;
 		}
+		if (ret < 0)
+			break;
+		ret = reset_enable_llc_perf(bm_pid, param->cpu_no);
+		if (ret)
+			break;
+
+		if (run_fill_buf(param->span, memflush, operation, true)) {
+			fprintf(stderr, "Error-running fill buffer\n");
+			ret = -1;
+			goto pe_close;
+		}
+
+		sleep(1);
+		ret = measure_cache_vals(param, bm_pid);
+		if (ret)
+			goto pe_close;
 	}
 
 	return ret;

From 7611c8ff152fe58585fc51bcf452ad4c089dc751 Mon Sep 17 00:00:00 2001
From: Shuah Khan <skhan@linuxfoundation.org>
Date: Tue, 25 Jul 2023 09:17:37 -0600
Subject: [PATCH 0545/2047] selftests:prctl: Fix make clean override warning

Remove clean target in Makefile to fix the following warning
and use the one in common lib.mk

Makefile:14: warning: overriding recipe for target 'clean'
../lib.mk:160: warning: ignoring old recipe for target 'clean'

Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/prctl/Makefile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/testing/selftests/prctl/Makefile b/tools/testing/selftests/prctl/Makefile
index cfc35d29fc2e..01dc90fbb509 100644
--- a/tools/testing/selftests/prctl/Makefile
+++ b/tools/testing/selftests/prctl/Makefile
@@ -10,7 +10,5 @@ all: $(TEST_PROGS)
 
 include ../lib.mk
 
-clean:
-	rm -fr $(TEST_PROGS)
 endif
 endif

From 7e4bf0c1d0f707d446ec1bb673d946ee55c9a4f4 Mon Sep 17 00:00:00 2001
From: Shuah Khan <skhan@linuxfoundation.org>
Date: Tue, 25 Jul 2023 09:22:05 -0600
Subject: [PATCH 0546/2047] selftests:prctl: add set-process-name to .gitignore

Add newly addded set-process-name test to .gitignore

Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/prctl/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/prctl/.gitignore b/tools/testing/selftests/prctl/.gitignore
index 7a657b25f686..05d5e31661df 100644
--- a/tools/testing/selftests/prctl/.gitignore
+++ b/tools/testing/selftests/prctl/.gitignore
@@ -3,3 +3,4 @@ disable-tsc-ctxt-sw-stress-test
 disable-tsc-on-off-stress-test
 disable-tsc-test
 set-anon-vma-name-test
+set-process-name

From 7ad1c439fb25907dba31af5d66b44c6a3b999c89 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 25 Jul 2023 21:54:11 +0800
Subject: [PATCH 0547/2047] spi: fsl: Remove unused extern declarations

commit b36ece832512 ("spi/mpc8xxx: refactor the common code for SPI/eSPI controller")
left mpc8xxx_spi_bufs() behind.
And since commit 3c5395b66ff6 ("spi: fsl-(e)spi: simplify cleanup code")
mpc8xxx_spi_remove() is not used anymore.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Link: https://lore.kernel.org/r/20230725135411.21152-1-yuehaibing@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-lib.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/spi/spi-fsl-lib.h b/drivers/spi/spi-fsl-lib.h
index 015a1abb6a84..50a07f984b23 100644
--- a/drivers/spi/spi-fsl-lib.h
+++ b/drivers/spi/spi-fsl-lib.h
@@ -103,12 +103,9 @@ extern void mpc8xxx_spi_rx_buf_u32(u32 data, struct mpc8xxx_spi *mpc8xxx_spi);
 
 extern struct mpc8xxx_spi_probe_info *to_of_pinfo(
 		struct fsl_spi_platform_data *pdata);
-extern int mpc8xxx_spi_bufs(struct mpc8xxx_spi *mspi,
-		struct spi_transfer *t, unsigned int len);
 extern const char *mpc8xxx_spi_strmode(unsigned int flags);
 extern void mpc8xxx_spi_probe(struct device *dev, struct resource *mem,
 		unsigned int irq);
-extern int mpc8xxx_spi_remove(struct device *dev);
 extern int of_mpc8xxx_spi_probe(struct platform_device *ofdev);
 
 #endif /* __SPI_FSL_LIB_H__ */

From 6b677c1ae8e4096c84fbcbe1b7fd300dab0b381b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 25 Jul 2023 17:47:43 +0200
Subject: [PATCH 0548/2047] regulator: REGULATOR_QCOM_REFGEN should depend on
 ARCH_QCOM

The Qualcomm MMIO-mapped reference voltage regulator is only present on
Qualcomm SoCs.  Hence add a dependency on ARCH_QCOM, to prevent asking
the user about this driver when configuring a kernel without Qualcomm
SoC support.

Fixes: 7cbfbe23796086fd ("regulator: Introduce Qualcomm REFGEN regulator driver")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/60938ed138c9331ba3d2891fbd3b3d6644d3fbdc.1690300012.git.geert+renesas@glider.be
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 542a7b427994..94f44736473e 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1001,6 +1001,7 @@ config REGULATOR_PWM
 
 config REGULATOR_QCOM_REFGEN
 	tristate "Qualcomm REFGEN regulator driver"
+	depends on ARCH_QCOM || COMPILE_TEST
 	depends on HAS_IOMEM
 	depends on REGMAP
 	help

From 9b7e0645ba65e4824436a2f1817843291e744443 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 25 Jul 2023 14:46:27 +0200
Subject: [PATCH 0549/2047] regulator: dt-bindings: dlg,da9121: add buck2
 constraints

Instead of describing the constraints for some devices (buck2 not
present), code it in allOf:if:then section to actually allow validation
of this requirement.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230725124629.150113-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/dlg,da9121.yaml        | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/regulator/dlg,da9121.yaml b/Documentation/devicetree/bindings/regulator/dlg,da9121.yaml
index dc626517c2ad..43f99bc8fbd4 100644
--- a/Documentation/devicetree/bindings/regulator/dlg,da9121.yaml
+++ b/Documentation/devicetree/bindings/regulator/dlg,da9121.yaml
@@ -97,8 +97,6 @@ properties:
         properties:
           regulator-name:
             pattern: "^BUCK([1-2])$"
-            description: |
-              BUCK2 present in DA9122, DA9220, DA9131, DA9132 only
 
           regulator-initial-mode:
             enum: [ 0, 1, 2, 3 ]
@@ -122,6 +120,23 @@ required:
   - reg
   - regulators
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          not:
+            contains:
+              enum:
+                - dlg,da9122
+                - dlg,da9131
+                - dlg,da9132
+                - dlg,da9220
+    then:
+      properties:
+        regulators:
+          properties:
+            buck2: false
+
 additionalProperties: false
 
 examples:

From e5481cc40d00b9f4a3250b2fd4a805e3d000b229 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 25 Jul 2023 14:46:28 +0200
Subject: [PATCH 0550/2047] regulator: dt-bindings: dlg,da9121: allow different
 names

Regulator names, as specified by regulator-name, are board-dependent and
should describe the name used on the actual board.  Do not enforce
generic buck1/2 names.

This also fixes dtbs_check warnings like:

  zynqmp-sm-k26-revA.dtb: pmic@33: regulators:buck1:regulator-name:0: 'da9131_buck1' does not match '^BUCK([1-2])$'

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230725124629.150113-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/regulator/dlg,da9121.yaml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/regulator/dlg,da9121.yaml b/Documentation/devicetree/bindings/regulator/dlg,da9121.yaml
index 43f99bc8fbd4..13b3f75f8e5e 100644
--- a/Documentation/devicetree/bindings/regulator/dlg,da9121.yaml
+++ b/Documentation/devicetree/bindings/regulator/dlg,da9121.yaml
@@ -95,9 +95,6 @@ properties:
           Properties for a single BUCK regulator
 
         properties:
-          regulator-name:
-            pattern: "^BUCK([1-2])$"
-
           regulator-initial-mode:
             enum: [ 0, 1, 2, 3 ]
             description: Defined in include/dt-bindings/regulator/dlg,da9121-regulator.h

From 7631a0c5b093fe1bc27b4770021c4aa0d06fb3c5 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 25 Jul 2023 14:46:29 +0200
Subject: [PATCH 0551/2047] regulator: dt-bindings: active-semi,act8846:
 correct supplies

Regulator supplies are per-device, not per regulator, so they are
expected to be present in device node.  Moving them to proper place
allows to simplify a lot, because now none of the regulators differ.

This also fixes dtbs_check warnings like:

  rk3368-evb-act8846.dtb: act8846@5a: 'inl1-supply', 'inl2-supply', 'inl3-supply', 'vp1-supply',
    'vp2-supply', 'vp3-supply', 'vp4-supply' do not match any of the regexes: 'pinctrl-[0-9]+'

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230725124629.150113-3-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../regulator/active-semi,act8846.yaml        | 82 +++++--------------
 1 file changed, 22 insertions(+), 60 deletions(-)

diff --git a/Documentation/devicetree/bindings/regulator/active-semi,act8846.yaml b/Documentation/devicetree/bindings/regulator/active-semi,act8846.yaml
index 3725348bb235..02f45b5834d0 100644
--- a/Documentation/devicetree/bindings/regulator/active-semi,act8846.yaml
+++ b/Documentation/devicetree/bindings/regulator/active-semi,act8846.yaml
@@ -28,75 +28,37 @@ properties:
       the VSEL pin is assumed to be low.
     type: boolean
 
+  inl1-supply:
+    description: Handle to the INL1 input supply (REG5-7)
+
+  inl2-supply:
+    description: Handle to the INL2 input supply (REG8-9)
+
+  inl3-supply:
+    description: Handle to the INL3 input supply (REG10-12)
+
+  vp1-supply:
+    description: Handle to the VP1 input supply (REG1)
+
+  vp2-supply:
+    description: Handle to the VP2 input supply (REG2)
+
+  vp3-supply:
+    description: Handle to the VP3 input supply (REG3)
+
+  vp4-supply:
+    description: Handle to the VP4 input supply (REG4)
+
   regulators:
     type: object
     additionalProperties: false
 
-    properties:
-      REG1:
-        type: object
-        $ref: /schemas/regulator/regulator.yaml#
-        unevaluatedProperties: false
-
-        properties:
-          vp1-supply:
-            description: Handle to the VP1 input supply
-
-      REG2:
-        type: object
-        $ref: /schemas/regulator/regulator.yaml#
-        unevaluatedProperties: false
-
-        properties:
-          vp2-supply:
-            description: Handle to the VP2 input supply
-
-      REG3:
-        type: object
-        $ref: /schemas/regulator/regulator.yaml#
-        unevaluatedProperties: false
-
-        properties:
-          vp3-supply:
-            description: Handle to the VP3 input supply
-
-      REG4:
-        type: object
-        $ref: /schemas/regulator/regulator.yaml#
-        unevaluatedProperties: false
-
-        properties:
-          vp4-supply:
-            description: Handle to the VP4 input supply
-
     patternProperties:
-      "^REG[5-7]$":
+      "^REG([1-9]|1[0-2])$":
         type: object
         $ref: /schemas/regulator/regulator.yaml#
         unevaluatedProperties: false
 
-        properties:
-          inl1-supply:
-            description: Handle to the INL1 input supply
-
-      "^REG[8-9]$":
-        type: object
-        $ref: /schemas/regulator/regulator.yaml#
-        unevaluatedProperties: false
-
-        properties:
-          inl2-supply:
-            description: Handle to the INL2 input supply
-
-      "^REG1[0-2]$":
-        type: object
-        $ref: /schemas/regulator/regulator.yaml#
-        unevaluatedProperties: false
-
-        properties:
-          inl3-supply:
-            description: Handle to the INL3 input supply
-
 additionalProperties: false
 
 required:

From 75d9bf03e2fa38242b35e941ce7c7cdabe479961 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 25 Jul 2023 18:40:47 +0200
Subject: [PATCH 0552/2047] regulator: dt-bindings: qcom,rpm: fix pattern for
 children

The "or" (|) in regular expression must be within parentheses,
otherwise it is not really an "or" and it matches supplies:

  qcom-apq8060-dragonboard.dtb: regulators-1: vdd_ncp-supply: [[34]] is not of type 'object'

Fixes: fde0e25b71a9 ("dt-bindings: regulators: convert non-smd RPM Regulators bindings to dt-schema")
Cc: stable@vger.kernel.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230725164047.368892-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/qcom,rpm-regulator.yaml       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpm-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,rpm-regulator.yaml
index 8a08698e3484..b4eb4001eb3d 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpm-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpm-regulator.yaml
@@ -49,7 +49,7 @@ patternProperties:
   ".*-supply$":
     description: Input supply phandle(s) for this node
 
-  "^((s|l|lvs)[0-9]*)|(s[1-2][a-b])|(ncp)|(mvs)|(usb-switch)|(hdmi-switch)$":
+  "^((s|l|lvs)[0-9]*|s[1-2][a-b]|ncp|mvs|usb-switch|hdmi-switch)$":
     description: List of regulators and its properties
     $ref: regulator.yaml#
     unevaluatedProperties: false

From 526fffabc5fb63e80eb890c74b6570df2570c87f Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@amd.com>
Date: Tue, 25 Jul 2023 20:32:04 +0530
Subject: [PATCH 0553/2047] perf/mem: Introduce PERF_MEM_LVLNUM_UNC

Older API PERF_MEM_LVL_UNC can be replaced by PERF_MEM_LVLNUM_UNC.

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230725150206.184-2-ravi.bangoria@amd.com
---
 include/uapi/linux/perf_event.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 37675437b768..39c6a250dd1b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1339,7 +1339,8 @@ union perf_mem_data_src {
 #define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
 #define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
 #define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
-/* 5-0x8 available */
+/* 5-0x7 available */
+#define PERF_MEM_LVLNUM_UNC	0x08 /* Uncached */
 #define PERF_MEM_LVLNUM_CXL	0x09 /* CXL */
 #define PERF_MEM_LVLNUM_IO	0x0a /* I/O */
 #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */

From 5c6e623f1b8ebca39eeefba4b18d574eb5acf0bd Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@amd.com>
Date: Tue, 25 Jul 2023 20:32:05 +0530
Subject: [PATCH 0554/2047] perf/mem: Add PERF_MEM_LVLNUM_NA to PERF_MEM_NA

Add PERF_MEM_LVLNUM_NA wherever PERF_MEM_NA is used to set default values.

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230725150206.184-3-ravi.bangoria@amd.com
---
 include/linux/perf_event.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2166a69e3bf2..dd92b4f5d370 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1194,7 +1194,8 @@ struct perf_sample_data {
 		    PERF_MEM_S(LVL, NA)   |\
 		    PERF_MEM_S(SNOOP, NA) |\
 		    PERF_MEM_S(LOCK, NA)  |\
-		    PERF_MEM_S(TLB, NA))
+		    PERF_MEM_S(TLB, NA)   |\
+		    PERF_MEM_S(LVLNUM, NA))
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
 					 u64 addr, u64 period)

From 8bfc20baa9355d2ce6b031ff8bdb62c9456923f7 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 25 Jul 2023 20:32:06 +0530
Subject: [PATCH 0555/2047] perf/x86/ibs: Set mem_lvl_num, mem_remote and
 mem_hops for data_src

Kernel IBS driver wasn't using new PERF_MEM_* APIs due to some of its
limitations. Mainly:

1. mem_lvl_num doesn't allow setting multiple sources whereas old API
   allows it. Setting multiple data sources is useful because IBS on
   pre-zen4 uarch doesn't provide fine granular DataSrc details (there
   is only one such DataSrc(2h) though).
2. perf mem sorting logic (sort__lvl_cmp()) ignores mem_lvl_num. perf
   c2c (c2c_decode_stats()) does not use mem_lvl_num at all.

1st one can be handled using ANY_CACHE with HOPS_0. 2nd is purely perf
tool specific issue and should be fixed separately.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230725150206.184-4-ravi.bangoria@amd.com
---
 arch/x86/events/amd/ibs.c | 160 +++++++++++++++++---------------------
 1 file changed, 70 insertions(+), 90 deletions(-)

diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 7d29be0a279b..6911c5399d02 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -728,38 +728,63 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
 	return op_data2->data_src_lo;
 }
 
-static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
-				 union ibs_op_data3 *op_data3,
-				 struct perf_sample_data *data)
+#define	L(x)		(PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
+#define	LN(x)		PERF_MEM_S(LVLNUM, x)
+#define	REM		PERF_MEM_S(REMOTE, REMOTE)
+#define	HOPS(x)		PERF_MEM_S(HOPS, x)
+
+static u64 g_data_src[8] = {
+	[IBS_DATA_SRC_LOC_CACHE]	  = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0),
+	[IBS_DATA_SRC_DRAM]		  = L(LOC_RAM) | LN(RAM),
+	[IBS_DATA_SRC_REM_CACHE]	  = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
+	[IBS_DATA_SRC_IO]		  = L(IO) | LN(IO),
+};
+
+#define RMT_NODE_BITS			(1 << IBS_DATA_SRC_DRAM)
+#define RMT_NODE_APPLICABLE(x)		(RMT_NODE_BITS & (1 << x))
+
+static u64 g_zen4_data_src[32] = {
+	[IBS_DATA_SRC_EXT_LOC_CACHE]	  = L(L3) | LN(L3),
+	[IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0),
+	[IBS_DATA_SRC_EXT_DRAM]		  = L(LOC_RAM) | LN(RAM),
+	[IBS_DATA_SRC_EXT_FAR_CCX_CACHE]  = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
+	[IBS_DATA_SRC_EXT_PMEM]		  = LN(PMEM),
+	[IBS_DATA_SRC_EXT_IO]		  = L(IO) | LN(IO),
+	[IBS_DATA_SRC_EXT_EXT_MEM]	  = LN(CXL),
+};
+
+#define ZEN4_RMT_NODE_BITS		((1 << IBS_DATA_SRC_EXT_DRAM) | \
+					 (1 << IBS_DATA_SRC_EXT_PMEM) | \
+					 (1 << IBS_DATA_SRC_EXT_EXT_MEM))
+#define ZEN4_RMT_NODE_APPLICABLE(x)	(ZEN4_RMT_NODE_BITS & (1 << x))
+
+static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
+				  union ibs_op_data3 *op_data3,
+				  struct perf_sample_data *data)
 {
 	union perf_mem_data_src *data_src = &data->data_src;
 	u8 ibs_data_src = perf_ibs_data_src(op_data2);
 
 	data_src->mem_lvl = 0;
+	data_src->mem_lvl_num = 0;
 
 	/*
 	 * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
 	 * memory accesses. So, check DcUcMemAcc bit early.
 	 */
-	if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
-		data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
-		return;
-	}
+	if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
+		return L(UNC) | LN(UNC);
 
 	/* L1 Hit */
-	if (op_data3->dc_miss == 0) {
-		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
-		return;
-	}
+	if (op_data3->dc_miss == 0)
+		return L(L1) | LN(L1);
 
 	/* L2 Hit */
 	if (op_data3->l2_miss == 0) {
 		/* Erratum #1293 */
 		if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
-		    !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
-			data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
-			return;
-		}
+		    !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc))
+			return L(L2) | LN(L2);
 	}
 
 	/*
@@ -769,82 +794,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
 	if (data_src->mem_op != PERF_MEM_OP_LOAD)
 		goto check_mab;
 
-	/* L3 Hit */
 	if (ibs_caps & IBS_CAPS_ZEN4) {
-		if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
-			data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
-			return;
+		u64 val = g_zen4_data_src[ibs_data_src];
+
+		if (!val)
+			goto check_mab;
+
+		/* HOPS_1 because IBS doesn't provide remote socket detail */
+		if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) {
+			if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM)
+				val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
+			else
+				val |= REM | HOPS(1);
 		}
+
+		return val;
 	} else {
-		if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
-			data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
-					    PERF_MEM_LVL_HIT;
-			return;
-		}
-	}
+		u64 val = g_data_src[ibs_data_src];
 
-	/* A peer cache in a near CCX */
-	if (ibs_caps & IBS_CAPS_ZEN4 &&
-	    ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
-		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
-		return;
-	}
+		if (!val)
+			goto check_mab;
 
-	/* A peer cache in a far CCX */
-	if (ibs_caps & IBS_CAPS_ZEN4) {
-		if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
-			data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
-			return;
+		/* HOPS_1 because IBS doesn't provide remote socket detail */
+		if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) {
+			if (ibs_data_src == IBS_DATA_SRC_DRAM)
+				val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
+			else
+				val |= REM | HOPS(1);
 		}
-	} else {
-		if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
-			data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
-			return;
-		}
-	}
 
-	/* DRAM */
-	if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
-		if (op_data2->rmt_node == 0)
-			data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
-		else
-			data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
-		return;
-	}
-
-	/* PMEM */
-	if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
-		data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
-		if (op_data2->rmt_node) {
-			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
-			/* IBS doesn't provide Remote socket detail */
-			data_src->mem_hops = PERF_MEM_HOPS_1;
-		}
-		return;
-	}
-
-	/* Extension Memory */
-	if (ibs_caps & IBS_CAPS_ZEN4 &&
-	    ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
-		data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL;
-		if (op_data2->rmt_node) {
-			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
-			/* IBS doesn't provide Remote socket detail */
-			data_src->mem_hops = PERF_MEM_HOPS_1;
-		}
-		return;
-	}
-
-	/* IO */
-	if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
-		data_src->mem_lvl = PERF_MEM_LVL_IO;
-		data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
-		if (op_data2->rmt_node) {
-			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
-			/* IBS doesn't provide Remote socket detail */
-			data_src->mem_hops = PERF_MEM_HOPS_1;
-		}
-		return;
+		return val;
 	}
 
 check_mab:
@@ -855,12 +834,11 @@ check_mab:
 	 * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
 	 * MAB only when IBS fails to provide DataSrc.
 	 */
-	if (op_data3->dc_miss_no_mab_alloc) {
-		data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
-		return;
-	}
+	if (op_data3->dc_miss_no_mab_alloc)
+		return L(LFB) | LN(LFB);
 
-	data_src->mem_lvl = PERF_MEM_LVL_NA;
+	/* Don't set HIT with NA */
+	return PERF_MEM_S(LVL, NA) | LN(NA);
 }
 
 static bool perf_ibs_cache_hit_st_valid(void)
@@ -950,7 +928,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
 				  union ibs_op_data2 *op_data2,
 				  union ibs_op_data3 *op_data3)
 {
-	perf_ibs_get_mem_lvl(op_data2, op_data3, data);
+	union perf_mem_data_src *data_src = &data->data_src;
+
+	data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data);
 	perf_ibs_get_mem_snoop(op_data2, data);
 	perf_ibs_get_tlb_lvl(op_data3, data);
 	perf_ibs_get_mem_lock(op_data3, data);

From 5c816728651ae425954542fed64d21d40cb75a9f Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 24 Jul 2023 14:44:56 +0100
Subject: [PATCH 0556/2047] arm_pmu: Add PERF_PMU_CAP_EXTENDED_HW_TYPE
 capability

This capability gives us the ability to open PERF_TYPE_HARDWARE and
PERF_TYPE_HW_CACHE events on a specific PMU for free. All the
implementation is contained in the Perf core and tool code so no change
to the Arm PMU driver is needed.

The following basic use case now results in Perf opening the event on
all PMUs rather than picking only one in an unpredictable way:

  $ perf stat -e cycles -- taskset --cpu-list 0,1 stress -c 2

   Performance counter stats for 'taskset --cpu-list 0,1 stress -c 2':

         963279620      armv8_cortex_a57/cycles/                (99.19%)
         752745657      armv8_cortex_a53/cycles/                (94.80%)

Fixes: 55bcf6ef314a ("perf: Extend PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE")
Suggested-by: Ian Rogers <irogers@google.com>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20230724134500.970496-2-james.clark@arm.com
---
 drivers/perf/arm_pmu.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index f6ccb2cd4dfc..2e79201daa4a 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -880,8 +880,13 @@ struct arm_pmu *armpmu_alloc(void)
 		 * configuration (e.g. big.LITTLE). This is not an uncore PMU,
 		 * and we have taken ctx sharing into account (e.g. with our
 		 * pmu::filter callback and pmu::event_init group validation).
+		 *
+		 * PERF_PMU_CAP_EXTENDED_HW_TYPE is required to open
+		 * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE events on a
+		 * specific PMU.
 		 */
-		.capabilities	= PERF_PMU_CAP_HETEROGENEOUS_CPUS | PERF_PMU_CAP_EXTENDED_REGS,
+		.capabilities	= PERF_PMU_CAP_HETEROGENEOUS_CPUS | PERF_PMU_CAP_EXTENDED_REGS |
+				  PERF_PMU_CAP_EXTENDED_HW_TYPE,
 	};
 
 	pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =

From 4b36873b4a3455590f686903c354c4716e149c74 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 24 Jul 2023 14:44:57 +0100
Subject: [PATCH 0557/2047] perf/x86: Remove unused
 PERF_PMU_CAP_HETEROGENEOUS_CPUS capability

Since commit bd2756811766 ("perf: Rewrite core context handling") the
relationship between perf_event_context and PMUs has changed so that
the error scenario that PERF_PMU_CAP_HETEROGENEOUS_CPUS originally
silenced no longer exists.

Remove the capability to avoid confusion that it actually influences
any perf core behavior. This change should be a no-op.

Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20230724134500.970496-3-james.clark@arm.com
---
 arch/x86/events/core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 23c96420dea1..185f902e5f28 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2166,7 +2166,6 @@ static int __init init_hw_perf_events(void)
 			hybrid_pmu->pmu = pmu;
 			hybrid_pmu->pmu.type = -1;
 			hybrid_pmu->pmu.attr_update = x86_pmu.attr_update;
-			hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS;
 			hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE;
 
 			err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name,

From 80391d8c387d406e2ec79776ec834666ab9611b0 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 24 Jul 2023 14:44:58 +0100
Subject: [PATCH 0558/2047] arm_pmu: Remove unused
 PERF_PMU_CAP_HETEROGENEOUS_CPUS capability

Since commit bd2756811766 ("perf: Rewrite core context handling") the
relationship between perf_event_context and PMUs has changed so that
the error scenario that PERF_PMU_CAP_HETEROGENEOUS_CPUS originally
silenced no longer exists.

Remove the capability and associated comment to avoid confusion that it
actually influences any perf core behavior. This change should be a
no-op.

Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20230724134500.970496-4-james.clark@arm.com
---
 drivers/perf/arm_pmu.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 2e79201daa4a..d712a19e47ac 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -877,15 +877,12 @@ struct arm_pmu *armpmu_alloc(void)
 		.attr_groups	= pmu->attr_groups,
 		/*
 		 * This is a CPU PMU potentially in a heterogeneous
-		 * configuration (e.g. big.LITTLE). This is not an uncore PMU,
-		 * and we have taken ctx sharing into account (e.g. with our
-		 * pmu::filter callback and pmu::event_init group validation).
-		 *
+		 * configuration (e.g. big.LITTLE) so
 		 * PERF_PMU_CAP_EXTENDED_HW_TYPE is required to open
 		 * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE events on a
 		 * specific PMU.
 		 */
-		.capabilities	= PERF_PMU_CAP_HETEROGENEOUS_CPUS | PERF_PMU_CAP_EXTENDED_REGS |
+		.capabilities	= PERF_PMU_CAP_EXTENDED_REGS |
 				  PERF_PMU_CAP_EXTENDED_HW_TYPE,
 	};
 

From 0cb52ad7bbb27bc6700412b055c743d5ae501b29 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 24 Jul 2023 14:44:59 +0100
Subject: [PATCH 0559/2047] perf: Remove unused PERF_PMU_CAP_HETEROGENEOUS_CPUS
 capability

Since commit bd2756811766 ("perf: Rewrite core context handling") the
relationship between perf_event_context and PMUs has changed so that
the error scenario that PERF_PMU_CAP_HETEROGENEOUS_CPUS originally
silenced no longer exists.

Remove the capability to avoid confusion that it actually influences
any perf core behavior and shift down the following capability bits to
fill in the unused space. This change should be a no-op.

Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20230724134500.970496-5-james.clark@arm.com
---
 include/linux/perf_event.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index dd92b4f5d370..9b1cf3c19c5f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -288,10 +288,9 @@ struct perf_event_pmu_context;
 #define PERF_PMU_CAP_EXTENDED_REGS		0x0008
 #define PERF_PMU_CAP_EXCLUSIVE			0x0010
 #define PERF_PMU_CAP_ITRACE			0x0020
-#define PERF_PMU_CAP_HETEROGENEOUS_CPUS		0x0040
-#define PERF_PMU_CAP_NO_EXCLUDE			0x0080
-#define PERF_PMU_CAP_AUX_OUTPUT			0x0100
-#define PERF_PMU_CAP_EXTENDED_HW_TYPE		0x0200
+#define PERF_PMU_CAP_NO_EXCLUDE			0x0040
+#define PERF_PMU_CAP_AUX_OUTPUT			0x0080
+#define PERF_PMU_CAP_EXTENDED_HW_TYPE		0x0100
 
 struct perf_output_handle;
 

From 62af03223785c11a0916df6a854ef4785d2350a5 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 25 Jul 2023 21:50:38 +0800
Subject: [PATCH 0560/2047] perf: Remove unused extern declaration
 arch_perf_get_page_size()

commit 8af26be06272 ("perf/core: Fix arch_perf_get_page_size()")
left behind this.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230725135038.25060-1-yuehaibing@huawei.com
---
 include/linux/perf_event.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9b1cf3c19c5f..e83f13ce4a9f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1860,10 +1860,6 @@ extern void arch_perf_update_userpage(struct perf_event *event,
 				      struct perf_event_mmap_page *userpg,
 				      u64 now);
 
-#ifdef CONFIG_MMU
-extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
-#endif
-
 /*
  * Snapshot branch stack on software events.
  *

From c2e164ac33f75e0acb93004960c73bd9166d3d35 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Thu, 6 Jul 2023 15:51:44 +0200
Subject: [PATCH 0561/2047] sched/fair: remove util_est boosting

There is no need to use runnable_avg when estimating util_est and that
even generates wrong behavior because one includes blocked tasks whereas
the other one doesn't. This can lead to accounting twice the waking task p,
once with the blocked runnable_avg and another one when adding its
util_est.

cpu's runnable_avg is already used when computing util_avg which is then
compared with util_est.

In some situation, feec will not select prev_cpu but another one on the
same performance domain because of higher max_util

Fixes: 7d0583cf9ec7 ("sched/fair, cpufreq: Introduce 'runnable boosting'")
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lore.kernel.org/r/20230706135144.324311-1-vincent.guittot@linaro.org
---
 kernel/sched/fair.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d3df5b1642a6..f55b0a72772e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7320,9 +7320,6 @@ cpu_util(int cpu, struct task_struct *p, int dst_cpu, int boost)
 
 		util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
 
-		if (boost)
-			util_est = max(util_est, runnable);
-
 		/*
 		 * During wake-up @p isn't enqueued yet and doesn't contribute
 		 * to any cpu_rq(cpu)->cfs.avg.util_est.enqueued.

From 4efcc8bc7e08c09c58a2f5cbc2096fbda5b7cf5e Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Thu, 13 Jul 2023 09:31:33 +0800
Subject: [PATCH 0562/2047] sched/topology: Align group flags when removing
 degenerate domain

The flags of the child of a given scheduling domain are used to initialize
the flags of its scheduling groups. When the child of a scheduling domain
is degenerated, the flags of its local scheduling group need to be updated
to align with the flags of its new child domain.

The flag SD_SHARE_CPUCAPACITY was aligned in
Commit bf2dc42d6beb ("sched/topology: Propagate SMT flags when removing degenerate domain").
Further generalize this alignment so other flags can be used later, such as
in cluster-based task wakeup. [1]

Reported-by: Yicong Yang <yangyicong@huawei.com>
Suggested-by: Ricardo Neri <ricardo.neri@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Reviewed-by: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20230713013133.2314153-1-yu.c.chen@intel.com
---
 kernel/sched/topology.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 7cfcfe5d27b9..05a5bc678c08 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -722,8 +722,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 
 			if (parent->parent) {
 				parent->parent->child = tmp;
-				if (tmp->flags & SD_SHARE_CPUCAPACITY)
-					parent->parent->groups->flags |= SD_SHARE_CPUCAPACITY;
+				parent->parent->groups->flags = tmp->flags;
 			}
 
 			/*

From f66c538098b61e2eb596bb88fae90dbd2cebb378 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Sat, 15 Jul 2023 16:52:57 -0700
Subject: [PATCH 0563/2047] lockdep/selftests: Use SBRM APIs for wait context
 tests

The "__cleanup__" attribute is already used for wait context tests, so
using it for locking tests has already been proven working. Now since
SBRM APIs are merged, let's use these APIs instead of a local guard
framework. This also helps testing SBRM APIs.

Note that originally the tests don't rely on the cleanup ordering of
two variables in the same scope, but since now it's something we'd like
to assume and rely on[1], drop the extra scope in inner_in_outer()
function. Again this gives us another opportunity to test the compiler
behavior.

[1]: https://lore.kernel.org/lkml/CAHk-=whEsr6fuVSdsoNPokLR2fZiGuo_hCLyrS-LCw7hT_N7cQ@mail.gmail.com/
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230715235257.110325-1-boqun.feng@gmail.com
---
 lib/locking-selftest.c | 133 +++++++++++------------------------------
 1 file changed, 34 insertions(+), 99 deletions(-)

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 8d24279fad05..6f6a5fc85b42 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -2506,94 +2506,29 @@ static void fs_reclaim_tests(void)
 	pr_cont("\n");
 }
 
-#define __guard(cleanup) __maybe_unused __attribute__((__cleanup__(cleanup)))
+/* Defines guard classes to create contexts */
+DEFINE_LOCK_GUARD_0(HARDIRQ, HARDIRQ_ENTER(), HARDIRQ_EXIT())
+DEFINE_LOCK_GUARD_0(NOTTHREADED_HARDIRQ,
+	do {
+		local_irq_disable();
+		__irq_enter();
+		WARN_ON(!in_irq());
+	} while(0), HARDIRQ_EXIT())
+DEFINE_LOCK_GUARD_0(SOFTIRQ, SOFTIRQ_ENTER(), SOFTIRQ_EXIT())
 
-static void hardirq_exit(int *_)
-{
-	HARDIRQ_EXIT();
-}
+/* Define RCU guards, should go away when RCU has its own guard definitions */
+DEFINE_LOCK_GUARD_0(RCU, rcu_read_lock(), rcu_read_unlock())
+DEFINE_LOCK_GUARD_0(RCU_BH, rcu_read_lock_bh(), rcu_read_unlock_bh())
+DEFINE_LOCK_GUARD_0(RCU_SCHED, rcu_read_lock_sched(), rcu_read_unlock_sched())
 
-#define HARDIRQ_CONTEXT(name, ...)					\
-	int hardirq_guard_##name __guard(hardirq_exit);			\
-	HARDIRQ_ENTER();
-
-#define NOTTHREADED_HARDIRQ_CONTEXT(name, ...)				\
-	int notthreaded_hardirq_guard_##name __guard(hardirq_exit);	\
-	local_irq_disable();						\
-	__irq_enter();							\
-	WARN_ON(!in_irq());
-
-static void softirq_exit(int *_)
-{
-	SOFTIRQ_EXIT();
-}
-
-#define SOFTIRQ_CONTEXT(name, ...)				\
-	int softirq_guard_##name __guard(softirq_exit);		\
-	SOFTIRQ_ENTER();
-
-static void rcu_exit(int *_)
-{
-	rcu_read_unlock();
-}
-
-#define RCU_CONTEXT(name, ...)					\
-	int rcu_guard_##name __guard(rcu_exit);			\
-	rcu_read_lock();
-
-static void rcu_bh_exit(int *_)
-{
-	rcu_read_unlock_bh();
-}
-
-#define RCU_BH_CONTEXT(name, ...)				\
-	int rcu_bh_guard_##name __guard(rcu_bh_exit);		\
-	rcu_read_lock_bh();
-
-static void rcu_sched_exit(int *_)
-{
-	rcu_read_unlock_sched();
-}
-
-#define RCU_SCHED_CONTEXT(name, ...)				\
-	int rcu_sched_guard_##name __guard(rcu_sched_exit);	\
-	rcu_read_lock_sched();
-
-static void raw_spinlock_exit(raw_spinlock_t **lock)
-{
-	raw_spin_unlock(*lock);
-}
-
-#define RAW_SPINLOCK_CONTEXT(name, lock)						\
-	raw_spinlock_t *raw_spinlock_guard_##name __guard(raw_spinlock_exit) = &(lock);	\
-	raw_spin_lock(&(lock));
-
-static void spinlock_exit(spinlock_t **lock)
-{
-	spin_unlock(*lock);
-}
-
-#define SPINLOCK_CONTEXT(name, lock)						\
-	spinlock_t *spinlock_guard_##name __guard(spinlock_exit) = &(lock);	\
-	spin_lock(&(lock));
-
-static void mutex_exit(struct mutex **lock)
-{
-	mutex_unlock(*lock);
-}
-
-#define MUTEX_CONTEXT(name, lock)					\
-	struct mutex *mutex_guard_##name __guard(mutex_exit) = &(lock);	\
-	mutex_lock(&(lock));
 
 #define GENERATE_2_CONTEXT_TESTCASE(outer, outer_lock, inner, inner_lock)	\
 										\
 static void __maybe_unused inner##_in_##outer(void)				\
 {										\
-	outer##_CONTEXT(_, outer_lock);						\
-	{									\
-		inner##_CONTEXT(_, inner_lock);					\
-	}									\
+	/* Relies the reversed clean-up ordering: inner first */		\
+	guard(outer)(outer_lock);						\
+	guard(inner)(inner_lock);						\
 }
 
 /*
@@ -2632,21 +2567,21 @@ GENERATE_2_CONTEXT_TESTCASE(SOFTIRQ, , inner, inner_lock)			\
 GENERATE_2_CONTEXT_TESTCASE(RCU, , inner, inner_lock)				\
 GENERATE_2_CONTEXT_TESTCASE(RCU_BH, , inner, inner_lock)			\
 GENERATE_2_CONTEXT_TESTCASE(RCU_SCHED, , inner, inner_lock)			\
-GENERATE_2_CONTEXT_TESTCASE(RAW_SPINLOCK, raw_lock_A, inner, inner_lock)	\
-GENERATE_2_CONTEXT_TESTCASE(SPINLOCK, lock_A, inner, inner_lock)		\
-GENERATE_2_CONTEXT_TESTCASE(MUTEX, mutex_A, inner, inner_lock)
+GENERATE_2_CONTEXT_TESTCASE(raw_spinlock, &raw_lock_A, inner, inner_lock)	\
+GENERATE_2_CONTEXT_TESTCASE(spinlock, &lock_A, inner, inner_lock)		\
+GENERATE_2_CONTEXT_TESTCASE(mutex, &mutex_A, inner, inner_lock)
 
 GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(RCU, )
-GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(RAW_SPINLOCK, raw_lock_B)
-GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(SPINLOCK, lock_B)
-GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(MUTEX, mutex_B)
+GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(raw_spinlock, &raw_lock_B)
+GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(spinlock, &lock_B)
+GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(mutex, &mutex_B)
 
 /* the outer context allows all kinds of preemption */
 #define DO_CONTEXT_TESTCASE_OUTER_PREEMPTIBLE(outer)			\
 	dotest(RCU_in_##outer, SUCCESS, LOCKTYPE_RWLOCK);		\
-	dotest(RAW_SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);	\
-	dotest(SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);		\
-	dotest(MUTEX_in_##outer, SUCCESS, LOCKTYPE_MUTEX);		\
+	dotest(raw_spinlock_in_##outer, SUCCESS, LOCKTYPE_SPIN);	\
+	dotest(spinlock_in_##outer, SUCCESS, LOCKTYPE_SPIN);		\
+	dotest(mutex_in_##outer, SUCCESS, LOCKTYPE_MUTEX);		\
 
 /*
  * the outer context only allows the preemption introduced by spinlock_t (which
@@ -2654,16 +2589,16 @@ GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(MUTEX, mutex_B)
  */
 #define DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(outer)		\
 	dotest(RCU_in_##outer, SUCCESS, LOCKTYPE_RWLOCK);		\
-	dotest(RAW_SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);	\
-	dotest(SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);		\
-	dotest(MUTEX_in_##outer, FAILURE, LOCKTYPE_MUTEX);		\
+	dotest(raw_spinlock_in_##outer, SUCCESS, LOCKTYPE_SPIN);	\
+	dotest(spinlock_in_##outer, SUCCESS, LOCKTYPE_SPIN);		\
+	dotest(mutex_in_##outer, FAILURE, LOCKTYPE_MUTEX);		\
 
 /* the outer doesn't allows any kind of preemption */
 #define DO_CONTEXT_TESTCASE_OUTER_NOT_PREEMPTIBLE(outer)			\
 	dotest(RCU_in_##outer, SUCCESS, LOCKTYPE_RWLOCK);		\
-	dotest(RAW_SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);	\
-	dotest(SPINLOCK_in_##outer, FAILURE, LOCKTYPE_SPIN);		\
-	dotest(MUTEX_in_##outer, FAILURE, LOCKTYPE_MUTEX);		\
+	dotest(raw_spinlock_in_##outer, SUCCESS, LOCKTYPE_SPIN);	\
+	dotest(spinlock_in_##outer, FAILURE, LOCKTYPE_SPIN);		\
+	dotest(mutex_in_##outer, FAILURE, LOCKTYPE_MUTEX);		\
 
 static void wait_context_tests(void)
 {
@@ -2697,15 +2632,15 @@ static void wait_context_tests(void)
 	pr_cont("\n");
 
 	print_testname("in RAW_SPINLOCK context");
-	DO_CONTEXT_TESTCASE_OUTER_NOT_PREEMPTIBLE(RAW_SPINLOCK);
+	DO_CONTEXT_TESTCASE_OUTER_NOT_PREEMPTIBLE(raw_spinlock);
 	pr_cont("\n");
 
 	print_testname("in SPINLOCK context");
-	DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(SPINLOCK);
+	DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(spinlock);
 	pr_cont("\n");
 
 	print_testname("in MUTEX context");
-	DO_CONTEXT_TESTCASE_OUTER_PREEMPTIBLE(MUTEX);
+	DO_CONTEXT_TESTCASE_OUTER_PREEMPTIBLE(mutex);
 	pr_cont("\n");
 }
 

From d2d54819779e1ec0d7908ec98220fa54e72adc48 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Wed, 26 Jul 2023 09:49:40 +0200
Subject: [PATCH 0564/2047] regulator: dt-bindings: add missing
 unevaluatedProperties for each regulator

Each regulator node, which references common regulator.yaml schema,
should disallow additional or unevaluated properties.  Otherwise
mistakes in properties will go unnoticed.

Reported-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Closes: https://git.codelinaro.org/linaro/qcomlt/kernel/-/commit/1c8aeef8a6e84520b77f0c270d99c8bf692c5933
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230726074940.121040-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/regulator/mps,mp5416.yaml   | 2 ++
 Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml  | 2 ++
 Documentation/devicetree/bindings/regulator/pfuze100.yaml     | 4 ++++
 .../devicetree/bindings/regulator/qcom,rpmh-regulator.yaml    | 2 ++
 .../devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml | 1 +
 .../bindings/regulator/richtek,rt4831-regulator.yaml          | 1 +
 .../bindings/regulator/richtek,rtmv20-regulator.yaml          | 1 +
 .../bindings/regulator/richtek,rtq6752-regulator.yaml         | 1 +
 .../devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml    | 2 +-
 Documentation/devicetree/bindings/regulator/wlf,arizona.yaml  | 2 ++
 10 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/regulator/mps,mp5416.yaml b/Documentation/devicetree/bindings/regulator/mps,mp5416.yaml
index 2e720d152890..0221397eb51e 100644
--- a/Documentation/devicetree/bindings/regulator/mps,mp5416.yaml
+++ b/Documentation/devicetree/bindings/regulator/mps,mp5416.yaml
@@ -29,10 +29,12 @@ properties:
     patternProperties:
       "^buck[1-4]$":
         $ref: regulator.yaml#
+        unevaluatedProperties: false
         type: object
 
       "^ldo[1-4]$":
         $ref: regulator.yaml#
+        unevaluatedProperties: false
         type: object
 
     additionalProperties: false
diff --git a/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml b/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml
index 019c60942efc..6de5b027f990 100644
--- a/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml
+++ b/Documentation/devicetree/bindings/regulator/mps,mpq7920.yaml
@@ -38,11 +38,13 @@ properties:
       ldortc:
         type: object
         $ref: regulator.yaml#
+        unevaluatedProperties: false
 
     patternProperties:
       "^ldo[1-4]$":
         type: object
         $ref: regulator.yaml#
+        unevaluatedProperties: false
 
       "^buck[1-4]$":
         type: object
diff --git a/Documentation/devicetree/bindings/regulator/pfuze100.yaml b/Documentation/devicetree/bindings/regulator/pfuze100.yaml
index e384e4953f0a..0eda44752cdd 100644
--- a/Documentation/devicetree/bindings/regulator/pfuze100.yaml
+++ b/Documentation/devicetree/bindings/regulator/pfuze100.yaml
@@ -68,18 +68,22 @@ properties:
       "^sw([1-4]|[1-4][a-c]|[1-4][a-c][a-c])$":
         $ref: regulator.yaml#
         type: object
+        unevaluatedProperties: false
 
       "^vgen[1-6]$":
         $ref: regulator.yaml#
         type: object
+        unevaluatedProperties: false
 
       "^vldo[1-4]$":
         $ref: regulator.yaml#
         type: object
+        unevaluatedProperties: false
 
       "^(vsnvs|vref|vrefddr|swbst|coin|v33|vccsd)$":
         $ref: regulator.yaml#
         type: object
+        unevaluatedProperties: false
 
     additionalProperties: false
 
diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
index 72b533c3761a..e758093365bc 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
@@ -111,6 +111,7 @@ properties:
   bob:
     type: object
     $ref: regulator.yaml#
+    unevaluatedProperties: false
     description: BOB regulator node.
     dependencies:
       regulator-allow-set-load: [ regulator-allowed-modes ]
@@ -119,6 +120,7 @@ patternProperties:
   "^(smps|ldo|lvs|bob)[0-9]+$":
     type: object
     $ref: regulator.yaml#
+    unevaluatedProperties: false
     description: smps/ldo regulator nodes(s).
     dependencies:
       regulator-allow-set-load: [ regulator-allowed-modes ]
diff --git a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml
index a8ca8e0b27f8..9ea8ac0786ac 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml
@@ -110,6 +110,7 @@ patternProperties:
   "^((s|l|lvs|5vs)[0-9]*)|(boost-bypass)|(bob)$":
     description: List of regulators and its properties
     $ref: regulator.yaml#
+    unevaluatedProperties: false
 
 additionalProperties: false
 
diff --git a/Documentation/devicetree/bindings/regulator/richtek,rt4831-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rt4831-regulator.yaml
index d9c23333e157..cd06e957b9db 100644
--- a/Documentation/devicetree/bindings/regulator/richtek,rt4831-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/richtek,rt4831-regulator.yaml
@@ -29,6 +29,7 @@ patternProperties:
   "^DSV(LCM|P|N)$":
     type: object
     $ref: regulator.yaml#
+    unevaluatedProperties: false
     description:
       Properties for single Display Bias Voltage regulator.
 
diff --git a/Documentation/devicetree/bindings/regulator/richtek,rtmv20-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rtmv20-regulator.yaml
index 446ec5127d1f..fec3d396ca50 100644
--- a/Documentation/devicetree/bindings/regulator/richtek,rtmv20-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/richtek,rtmv20-regulator.yaml
@@ -121,6 +121,7 @@ properties:
     description: load switch current regulator description.
     type: object
     $ref: regulator.yaml#
+    unevaluatedProperties: false
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/regulator/richtek,rtq6752-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rtq6752-regulator.yaml
index e6e5a9a7d940..ef62c618de67 100644
--- a/Documentation/devicetree/bindings/regulator/richtek,rtq6752-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/richtek,rtq6752-regulator.yaml
@@ -35,6 +35,7 @@ properties:
       "^(p|n)avdd$":
         type: object
         $ref: regulator.yaml#
+        unevaluatedProperties: false
         description: |
           regulator description for pavdd and navdd.
 
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml b/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml
index 7d53cfa2c288..c9586d277f41 100644
--- a/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml
+++ b/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml
@@ -25,8 +25,8 @@ properties:
 patternProperties:
   "^(reg11|reg18|usb33)$":
     type: object
-
     $ref: regulator.yaml#
+    unevaluatedProperties: false
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/regulator/wlf,arizona.yaml b/Documentation/devicetree/bindings/regulator/wlf,arizona.yaml
index 011819c10988..11e378648b3f 100644
--- a/Documentation/devicetree/bindings/regulator/wlf,arizona.yaml
+++ b/Documentation/devicetree/bindings/regulator/wlf,arizona.yaml
@@ -29,11 +29,13 @@ properties:
       Initial data for the LDO1 regulator.
     $ref: regulator.yaml#
     type: object
+    unevaluatedProperties: false
 
   micvdd:
     description:
       Initial data for the MICVDD regulator.
     $ref: regulator.yaml#
     type: object
+    unevaluatedProperties: false
 
 additionalProperties: true

From 05f26f86f4a1f31d5ed2015ac1c89ea9da1d2bb7 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 26 Jul 2023 11:21:35 +0800
Subject: [PATCH 0565/2047] epoll: simplify ep_alloc()

The get_current_user() does not fail, and moving it after kzalloc() can
simplify the code a bit.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Message-Id: <20230726032135.933-1-thunder.leizhen@huaweicloud.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/eventpoll.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4b1b3362f697..1d9a71a0c4c1 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -975,15 +975,11 @@ again:
 
 static int ep_alloc(struct eventpoll **pep)
 {
-	int error;
-	struct user_struct *user;
 	struct eventpoll *ep;
 
-	user = get_current_user();
-	error = -ENOMEM;
 	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
 	if (unlikely(!ep))
-		goto free_uid;
+		return -ENOMEM;
 
 	mutex_init(&ep->mtx);
 	rwlock_init(&ep->lock);
@@ -992,16 +988,12 @@ static int ep_alloc(struct eventpoll **pep)
 	INIT_LIST_HEAD(&ep->rdllist);
 	ep->rbr = RB_ROOT_CACHED;
 	ep->ovflist = EP_UNACTIVE_PTR;
-	ep->user = user;
+	ep->user = get_current_user();
 	refcount_set(&ep->refcount, 1);
 
 	*pep = ep;
 
 	return 0;
-
-free_uid:
-	free_uid(user);
-	return error;
 }
 
 /*

From 29a449e765ff70a5bd533be94babb6d36985d096 Mon Sep 17 00:00:00 2001
From: Zhang Shurong <zhang_shurong@foxmail.com>
Date: Sat, 22 Jul 2023 23:49:09 +0800
Subject: [PATCH 0566/2047] spi: tegra20-sflash: fix to check return value of
 platform_get_irq() in tegra_sflash_probe()

The platform_get_irq might be failed and return a negative result. So
there should have an error handling code.

Fixed this by adding an error handling code.

Fixes: 8528547bcc33 ("spi: tegra: add spi driver for sflash controller")
Signed-off-by: Zhang Shurong <zhang_shurong@foxmail.com>
Link: https://lore.kernel.org/r/tencent_71FC162D589E4788C2152AAC84CD8D5C6D06@qq.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra20-sflash.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-tegra20-sflash.c b/drivers/spi/spi-tegra20-sflash.c
index 4286310628a2..0c5507473f97 100644
--- a/drivers/spi/spi-tegra20-sflash.c
+++ b/drivers/spi/spi-tegra20-sflash.c
@@ -455,7 +455,11 @@ static int tegra_sflash_probe(struct platform_device *pdev)
 		goto exit_free_master;
 	}
 
-	tsd->irq = platform_get_irq(pdev, 0);
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		goto exit_free_master;
+	tsd->irq = ret;
+
 	ret = request_irq(tsd->irq, tegra_sflash_isr, 0,
 			dev_name(&pdev->dev), tsd);
 	if (ret < 0) {

From 39e92cb1e4a1f6a12097ea2aa9e9ca6f2d2f8a83 Mon Sep 17 00:00:00 2001
From: Rae Moar <rmoar@google.com>
Date: Tue, 25 Jul 2023 21:25:12 +0000
Subject: [PATCH 0567/2047] kunit: Add test attributes API structure

Add the basic structure of the test attribute API to KUnit, which can be
used to save and access test associated data.

Add attributes.c and attributes.h to hold associated structs and functions
for the API.

Create a struct that holds a variety of associated helper functions for
each test attribute. These helper functions will be used to get the
attribute value, convert the value to a string, and filter based on the
value. This struct is flexible by design to allow for attributes of
numerous types and contexts.

Add a method to print test attributes in the format of "# [<test_name if
not suite>.]<attribute_name>: <attribute_value>".

Example for a suite: "# speed: slow"

Example for a test case: "# test_case.speed: very_slow"

Use this method to report attributes in the KTAP output (KTAP spec:
https://docs.kernel.org/dev-tools/ktap.html) and _list_tests output when
kernel's new kunit.action=list_attr option is used. Note this is derivative
of the kunit.action=list option.

In test.h, add fields and associated helper functions to test cases and
suites to hold user-inputted test attributes.

Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Rae Moar <rmoar@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/attributes.h | 19 +++++++++
 include/kunit/test.h       | 33 ++++++++++++++++
 lib/kunit/Makefile         |  3 +-
 lib/kunit/attributes.c     | 80 ++++++++++++++++++++++++++++++++++++++
 lib/kunit/executor.c       | 21 ++++++++--
 lib/kunit/test.c           | 17 ++++----
 6 files changed, 161 insertions(+), 12 deletions(-)
 create mode 100644 include/kunit/attributes.h
 create mode 100644 lib/kunit/attributes.c

diff --git a/include/kunit/attributes.h b/include/kunit/attributes.h
new file mode 100644
index 000000000000..9fcd184cce36
--- /dev/null
+++ b/include/kunit/attributes.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KUnit API to save and access test attributes
+ *
+ * Copyright (C) 2023, Google LLC.
+ * Author: Rae Moar <rmoar@google.com>
+ */
+
+#ifndef _KUNIT_ATTRIBUTES_H
+#define _KUNIT_ATTRIBUTES_H
+
+/*
+ * Print all test attributes for a test case or suite.
+ * Output format for test cases: "# <test_name>.<attribute>: <value>"
+ * Output format for test suites: "# <attribute>: <value>"
+ */
+void kunit_print_attr(void *test_or_suite, bool is_test, unsigned int test_level);
+
+#endif /* _KUNIT_ATTRIBUTES_H */
diff --git a/include/kunit/test.h b/include/kunit/test.h
index 23120d50499e..1fc9155988e9 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -63,12 +63,16 @@ enum kunit_status {
 	KUNIT_SKIPPED,
 };
 
+/* Holds attributes for each test case and suite */
+struct kunit_attributes {};
+
 /**
  * struct kunit_case - represents an individual test case.
  *
  * @run_case: the function representing the actual test case.
  * @name:     the name of the test case.
  * @generate_params: the generator function for parameterized tests.
+ * @attr:     the attributes associated with the test
  *
  * A test case is a function with the signature,
  * ``void (*)(struct kunit *)``
@@ -104,6 +108,7 @@ struct kunit_case {
 	void (*run_case)(struct kunit *test);
 	const char *name;
 	const void* (*generate_params)(const void *prev, char *desc);
+	struct kunit_attributes attr;
 
 	/* private: internal use only. */
 	enum kunit_status status;
@@ -133,6 +138,18 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
  */
 #define KUNIT_CASE(test_name) { .run_case = test_name, .name = #test_name }
 
+/**
+ * KUNIT_CASE_ATTR - A helper for creating a &struct kunit_case
+ * with attributes
+ *
+ * @test_name: a reference to a test case function.
+ * @attributes: a reference to a struct kunit_attributes object containing
+ * test attributes
+ */
+#define KUNIT_CASE_ATTR(test_name, attributes)			\
+		{ .run_case = test_name, .name = #test_name,	\
+		  .attr = attributes }
+
 /**
  * KUNIT_CASE_PARAM - A helper for creation a parameterized &struct kunit_case
  *
@@ -154,6 +171,20 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
 		{ .run_case = test_name, .name = #test_name,	\
 		  .generate_params = gen_params }
 
+/**
+ * KUNIT_CASE_PARAM_ATTR - A helper for creating a parameterized &struct
+ * kunit_case with attributes
+ *
+ * @test_name: a reference to a test case function.
+ * @gen_params: a reference to a parameter generator function.
+ * @attributes: a reference to a struct kunit_attributes object containing
+ * test attributes
+ */
+#define KUNIT_CASE_PARAM_ATTR(test_name, gen_params, attributes)	\
+		{ .run_case = test_name, .name = #test_name,	\
+		  .generate_params = gen_params,				\
+		  .attr = attributes }
+
 /**
  * struct kunit_suite - describes a related collection of &struct kunit_case
  *
@@ -163,6 +194,7 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
  * @init:	called before every test case.
  * @exit:	called after every test case.
  * @test_cases:	a null terminated array of test cases.
+ * @attr:	the attributes associated with the test suite
  *
  * A kunit_suite is a collection of related &struct kunit_case s, such that
  * @init is called before every test case and @exit is called after every
@@ -182,6 +214,7 @@ struct kunit_suite {
 	int (*init)(struct kunit *test);
 	void (*exit)(struct kunit *test);
 	struct kunit_case *test_cases;
+	struct kunit_attributes attr;
 
 	/* private: internal use only */
 	char status_comment[KUNIT_STATUS_COMMENT_SIZE];
diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile
index cb417f504996..46f75f23dfe4 100644
--- a/lib/kunit/Makefile
+++ b/lib/kunit/Makefile
@@ -6,7 +6,8 @@ kunit-objs +=				test.o \
 					string-stream.o \
 					assert.o \
 					try-catch.o \
-					executor.o
+					executor.o \
+					attributes.o
 
 ifeq ($(CONFIG_KUNIT_DEBUGFS),y)
 kunit-objs +=				debugfs.o
diff --git a/lib/kunit/attributes.c b/lib/kunit/attributes.c
new file mode 100644
index 000000000000..9bda5a5f4030
--- /dev/null
+++ b/lib/kunit/attributes.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit API to save and access test attributes
+ *
+ * Copyright (C) 2023, Google LLC.
+ * Author: Rae Moar <rmoar@google.com>
+ */
+
+#include <kunit/test.h>
+#include <kunit/attributes.h>
+
+/* Options for printing attributes:
+ * PRINT_ALWAYS - attribute is printed for every test case and suite if set
+ * PRINT_SUITE - attribute is printed for every suite if set but not for test cases
+ * PRINT_NEVER - attribute is never printed
+ */
+enum print_ops {
+	PRINT_ALWAYS,
+	PRINT_SUITE,
+	PRINT_NEVER,
+};
+
+/**
+ * struct kunit_attr - represents a test attribute and holds flexible
+ * helper functions to interact with attribute.
+ *
+ * @name: name of test attribute, eg. speed
+ * @get_attr: function to return attribute value given a test
+ * @to_string: function to return string representation of given
+ * attribute value
+ * @filter: function to indicate whether a given attribute value passes a
+ * filter
+ */
+struct kunit_attr {
+	const char *name;
+	void *(*get_attr)(void *test_or_suite, bool is_test);
+	const char *(*to_string)(void *attr, bool *to_free);
+	int (*filter)(void *attr, const char *input, int *err);
+	void *attr_default;
+	enum print_ops print;
+};
+
+/* List of all Test Attributes */
+
+static struct kunit_attr kunit_attr_list[] = {};
+
+/* Helper Functions to Access Attributes */
+
+void kunit_print_attr(void *test_or_suite, bool is_test, unsigned int test_level)
+{
+	int i;
+	bool to_free;
+	void *attr;
+	const char *attr_name, *attr_str;
+	struct kunit_suite *suite = is_test ? NULL : test_or_suite;
+	struct kunit_case *test = is_test ? test_or_suite : NULL;
+
+	for (i = 0; i < ARRAY_SIZE(kunit_attr_list); i++) {
+		if (kunit_attr_list[i].print == PRINT_NEVER ||
+				(test && kunit_attr_list[i].print == PRINT_SUITE))
+			continue;
+		attr = kunit_attr_list[i].get_attr(test_or_suite, is_test);
+		if (attr) {
+			attr_name = kunit_attr_list[i].name;
+			attr_str = kunit_attr_list[i].to_string(attr, &to_free);
+			if (test) {
+				kunit_log(KERN_INFO, test, "%*s# %s.%s: %s",
+					KUNIT_INDENT_LEN * test_level, "", test->name,
+					attr_name, attr_str);
+			} else {
+				kunit_log(KERN_INFO, suite, "%*s# %s: %s",
+					KUNIT_INDENT_LEN * test_level, "", attr_name, attr_str);
+			}
+
+			/* Free to_string of attribute if needed */
+			if (to_free)
+				kfree(attr_str);
+		}
+	}
+}
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 74982b83707c..12e38a48a5cc 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -2,6 +2,7 @@
 
 #include <linux/reboot.h>
 #include <kunit/test.h>
+#include <kunit/attributes.h>
 #include <linux/glob.h>
 #include <linux/moduleparam.h>
 
@@ -24,7 +25,8 @@ module_param_named(action, action_param, charp, 0);
 MODULE_PARM_DESC(action,
 		 "Changes KUnit executor behavior, valid values are:\n"
 		 "<none>: run the tests like normal\n"
-		 "'list' to list test names instead of running them.\n");
+		 "'list' to list test names instead of running them.\n"
+		 "'list_attr' to list test names and attributes instead of running them.\n");
 
 /* glob_match() needs NULL terminated strings, so we need a copy of filter_glob_param. */
 struct kunit_test_filter {
@@ -172,7 +174,7 @@ static void kunit_exec_run_tests(struct suite_set *suite_set)
 	__kunit_test_suites_init(suite_set->start, num_suites);
 }
 
-static void kunit_exec_list_tests(struct suite_set *suite_set)
+static void kunit_exec_list_tests(struct suite_set *suite_set, bool include_attr)
 {
 	struct kunit_suite * const *suites;
 	struct kunit_case *test_case;
@@ -180,10 +182,19 @@ static void kunit_exec_list_tests(struct suite_set *suite_set)
 	/* Hack: print a ktap header so kunit.py can find the start of KUnit output. */
 	pr_info("KTAP version 1\n");
 
-	for (suites = suite_set->start; suites < suite_set->end; suites++)
+	for (suites = suite_set->start; suites < suite_set->end; suites++) {
+		/* Print suite name and suite attributes */
+		pr_info("%s\n", (*suites)->name);
+		if (include_attr)
+			kunit_print_attr((void *)(*suites), false, 0);
+
+		/* Print test case name and attributes in suite */
 		kunit_suite_for_each_test_case((*suites), test_case) {
 			pr_info("%s.%s\n", (*suites)->name, test_case->name);
+			if (include_attr)
+				kunit_print_attr((void *)test_case, true, 0);
 		}
+	}
 }
 
 int kunit_run_all_tests(void)
@@ -206,7 +217,9 @@ int kunit_run_all_tests(void)
 	if (!action_param)
 		kunit_exec_run_tests(&suite_set);
 	else if (strcmp(action_param, "list") == 0)
-		kunit_exec_list_tests(&suite_set);
+		kunit_exec_list_tests(&suite_set, false);
+	else if (strcmp(action_param, "list_attr") == 0)
+		kunit_exec_list_tests(&suite_set, true);
 	else
 		pr_err("kunit executor: unknown action '%s'\n", action_param);
 
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index 84e4666555c9..9ee55139ecd1 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -9,6 +9,7 @@
 #include <kunit/resource.h>
 #include <kunit/test.h>
 #include <kunit/test-bug.h>
+#include <kunit/attributes.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -168,6 +169,13 @@ size_t kunit_suite_num_test_cases(struct kunit_suite *suite)
 }
 EXPORT_SYMBOL_GPL(kunit_suite_num_test_cases);
 
+/* Currently supported test levels */
+enum {
+	KUNIT_LEVEL_SUITE = 0,
+	KUNIT_LEVEL_CASE,
+	KUNIT_LEVEL_CASE_PARAM,
+};
+
 static void kunit_print_suite_start(struct kunit_suite *suite)
 {
 	/*
@@ -181,17 +189,11 @@ static void kunit_print_suite_start(struct kunit_suite *suite)
 	pr_info(KUNIT_SUBTEST_INDENT "KTAP version 1\n");
 	pr_info(KUNIT_SUBTEST_INDENT "# Subtest: %s\n",
 		  suite->name);
+	kunit_print_attr((void *)suite, false, KUNIT_LEVEL_CASE);
 	pr_info(KUNIT_SUBTEST_INDENT "1..%zd\n",
 		  kunit_suite_num_test_cases(suite));
 }
 
-/* Currently supported test levels */
-enum {
-	KUNIT_LEVEL_SUITE = 0,
-	KUNIT_LEVEL_CASE,
-	KUNIT_LEVEL_CASE_PARAM,
-};
-
 static void kunit_print_ok_not_ok(struct kunit *test,
 				  unsigned int test_level,
 				  enum kunit_status status,
@@ -651,6 +653,7 @@ int kunit_run_tests(struct kunit_suite *suite)
 			}
 		}
 
+		kunit_print_attr((void *)test_case, true, KUNIT_LEVEL_CASE);
 
 		kunit_print_test_stats(&test, param_stats);
 

From 02c2d0c2a84172c3c7ec0229c61db55d23dd4730 Mon Sep 17 00:00:00 2001
From: Rae Moar <rmoar@google.com>
Date: Tue, 25 Jul 2023 21:25:13 +0000
Subject: [PATCH 0568/2047] kunit: Add speed attribute

Add speed attribute to the test attribute API. This attribute will allow
users to mark tests with a category of speed.

Currently the categories of speed proposed are: normal, slow, and very_slow
(outlined in enum kunit_speed). These are outlined in the enum kunit_speed.

The assumed default speed for tests is "normal". This indicates that the
test takes a relatively trivial amount of time (less than 1 second),
regardless of the machine it is running on. Any test slower than this could
be marked as "slow" or "very_slow".

Add the macro KUNIT_CASE_SLOW to set a test as slow, as this is likely a
common use of the attributes API.

Add an example of marking a slow test to kunit-example-test.c.

Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Rae Moar <rmoar@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h           | 32 ++++++++++++++++++++++++-
 lib/kunit/attributes.c         | 44 +++++++++++++++++++++++++++++++++-
 lib/kunit/kunit-example-test.c |  9 +++++++
 3 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/include/kunit/test.h b/include/kunit/test.h
index 1fc9155988e9..ed5f5000a095 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -63,8 +63,27 @@ enum kunit_status {
 	KUNIT_SKIPPED,
 };
 
+/* Attribute struct/enum definitions */
+
+/*
+ * Speed Attribute is stored as an enum and separated into categories of
+ * speed: very_slowm, slow, and normal. These speeds are relative to
+ * other KUnit tests.
+ *
+ * Note: unset speed attribute acts as default of KUNIT_SPEED_NORMAL.
+ */
+enum kunit_speed {
+	KUNIT_SPEED_UNSET,
+	KUNIT_SPEED_VERY_SLOW,
+	KUNIT_SPEED_SLOW,
+	KUNIT_SPEED_NORMAL,
+	KUNIT_SPEED_MAX = KUNIT_SPEED_NORMAL,
+};
+
 /* Holds attributes for each test case and suite */
-struct kunit_attributes {};
+struct kunit_attributes {
+	enum kunit_speed speed;
+};
 
 /**
  * struct kunit_case - represents an individual test case.
@@ -150,6 +169,17 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
 		{ .run_case = test_name, .name = #test_name,	\
 		  .attr = attributes }
 
+/**
+ * KUNIT_CASE_SLOW - A helper for creating a &struct kunit_case
+ * with the slow attribute
+ *
+ * @test_name: a reference to a test case function.
+ */
+
+#define KUNIT_CASE_SLOW(test_name)			\
+		{ .run_case = test_name, .name = #test_name,	\
+		  .attr.speed = KUNIT_SPEED_SLOW }
+
 /**
  * KUNIT_CASE_PARAM - A helper for creation a parameterized &struct kunit_case
  *
diff --git a/lib/kunit/attributes.c b/lib/kunit/attributes.c
index 9bda5a5f4030..ffd0d692b334 100644
--- a/lib/kunit/attributes.c
+++ b/lib/kunit/attributes.c
@@ -40,9 +40,51 @@ struct kunit_attr {
 	enum print_ops print;
 };
 
+/* String Lists for enum Attributes */
+
+static const char * const speed_str_list[] = {"unset", "very_slow", "slow", "normal"};
+
+/* To String Methods */
+
+static const char *attr_enum_to_string(void *attr, const char * const str_list[], bool *to_free)
+{
+	long val = (long)attr;
+
+	*to_free = false;
+	if (!val)
+		return NULL;
+	return str_list[val];
+}
+
+static const char *attr_speed_to_string(void *attr, bool *to_free)
+{
+	return attr_enum_to_string(attr, speed_str_list, to_free);
+}
+
+/* Get Attribute Methods */
+
+static void *attr_speed_get(void *test_or_suite, bool is_test)
+{
+	struct kunit_suite *suite = is_test ? NULL : test_or_suite;
+	struct kunit_case *test = is_test ? test_or_suite : NULL;
+
+	if (test)
+		return ((void *) test->attr.speed);
+	else
+		return ((void *) suite->attr.speed);
+}
+
 /* List of all Test Attributes */
 
-static struct kunit_attr kunit_attr_list[] = {};
+static struct kunit_attr kunit_attr_list[] = {
+	{
+		.name = "speed",
+		.get_attr = attr_speed_get,
+		.to_string = attr_speed_to_string,
+		.attr_default = (void *)KUNIT_SPEED_NORMAL,
+		.print = PRINT_ALWAYS,
+	},
+};
 
 /* Helper Functions to Access Attributes */
 
diff --git a/lib/kunit/kunit-example-test.c b/lib/kunit/kunit-example-test.c
index b69b689ea850..01a769f35e1d 100644
--- a/lib/kunit/kunit-example-test.c
+++ b/lib/kunit/kunit-example-test.c
@@ -220,6 +220,14 @@ static void example_params_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, param->value % param->value, 0);
 }
 
+/*
+ * This test should always pass. Can be used to practice filtering attributes.
+ */
+static void example_slow_test(struct kunit *test)
+{
+	KUNIT_EXPECT_EQ(test, 1 + 1, 2);
+}
+
 /*
  * Here we make a list of all the test cases we want to add to the test suite
  * below.
@@ -237,6 +245,7 @@ static struct kunit_case example_test_cases[] = {
 	KUNIT_CASE(example_all_expect_macros_test),
 	KUNIT_CASE(example_static_stub_test),
 	KUNIT_CASE_PARAM(example_params_test, example_gen_params),
+	KUNIT_CASE_SLOW(example_slow_test),
 	{}
 };
 

From a00a72709175a4d53096301a8792b8171d1223e5 Mon Sep 17 00:00:00 2001
From: Rae Moar <rmoar@google.com>
Date: Tue, 25 Jul 2023 21:25:14 +0000
Subject: [PATCH 0569/2047] kunit: Add module attribute

Add module attribute to the test attribute API. This attribute stores the
module name associated with the test using KBUILD_MODNAME.

The name of a test suite and the module name often do not match. A
reference to the module name associated with the suite could be extremely
helpful in running tests as modules without needing to check the codebase.

This attribute will be printed for each suite.

Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Rae Moar <rmoar@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h   | 13 ++++++++-----
 lib/kunit/attributes.c | 25 +++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/include/kunit/test.h b/include/kunit/test.h
index ed5f5000a095..011e0d6bb506 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -131,6 +131,7 @@ struct kunit_case {
 
 	/* private: internal use only. */
 	enum kunit_status status;
+	char *module_name;
 	char *log;
 };
 
@@ -155,7 +156,9 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
  * &struct kunit_case object from it. See the documentation for
  * &struct kunit_case for an example on how to use it.
  */
-#define KUNIT_CASE(test_name) { .run_case = test_name, .name = #test_name }
+#define KUNIT_CASE(test_name)			\
+		{ .run_case = test_name, .name = #test_name,	\
+		  .module_name = KBUILD_MODNAME}
 
 /**
  * KUNIT_CASE_ATTR - A helper for creating a &struct kunit_case
@@ -167,7 +170,7 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
  */
 #define KUNIT_CASE_ATTR(test_name, attributes)			\
 		{ .run_case = test_name, .name = #test_name,	\
-		  .attr = attributes }
+		  .attr = attributes, .module_name = KBUILD_MODNAME}
 
 /**
  * KUNIT_CASE_SLOW - A helper for creating a &struct kunit_case
@@ -178,7 +181,7 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
 
 #define KUNIT_CASE_SLOW(test_name)			\
 		{ .run_case = test_name, .name = #test_name,	\
-		  .attr.speed = KUNIT_SPEED_SLOW }
+		  .attr.speed = KUNIT_SPEED_SLOW, .module_name = KBUILD_MODNAME}
 
 /**
  * KUNIT_CASE_PARAM - A helper for creation a parameterized &struct kunit_case
@@ -199,7 +202,7 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
  */
 #define KUNIT_CASE_PARAM(test_name, gen_params)			\
 		{ .run_case = test_name, .name = #test_name,	\
-		  .generate_params = gen_params }
+		  .generate_params = gen_params, .module_name = KBUILD_MODNAME}
 
 /**
  * KUNIT_CASE_PARAM_ATTR - A helper for creating a parameterized &struct
@@ -213,7 +216,7 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
 #define KUNIT_CASE_PARAM_ATTR(test_name, gen_params, attributes)	\
 		{ .run_case = test_name, .name = #test_name,	\
 		  .generate_params = gen_params,				\
-		  .attr = attributes }
+		  .attr = attributes, .module_name = KBUILD_MODNAME}
 
 /**
  * struct kunit_suite - describes a related collection of &struct kunit_case
diff --git a/lib/kunit/attributes.c b/lib/kunit/attributes.c
index ffd0d692b334..9dce4f4d726c 100644
--- a/lib/kunit/attributes.c
+++ b/lib/kunit/attributes.c
@@ -61,6 +61,12 @@ static const char *attr_speed_to_string(void *attr, bool *to_free)
 	return attr_enum_to_string(attr, speed_str_list, to_free);
 }
 
+static const char *attr_string_to_string(void *attr, bool *to_free)
+{
+	*to_free = false;
+	return (char *) attr;
+}
+
 /* Get Attribute Methods */
 
 static void *attr_speed_get(void *test_or_suite, bool is_test)
@@ -74,6 +80,18 @@ static void *attr_speed_get(void *test_or_suite, bool is_test)
 		return ((void *) suite->attr.speed);
 }
 
+static void *attr_module_get(void *test_or_suite, bool is_test)
+{
+	struct kunit_suite *suite = is_test ? NULL : test_or_suite;
+	struct kunit_case *test = is_test ? test_or_suite : NULL;
+
+	// Suites get their module attribute from their first test_case
+	if (test)
+		return ((void *) test->module_name);
+	else
+		return ((void *) suite->test_cases[0].module_name);
+}
+
 /* List of all Test Attributes */
 
 static struct kunit_attr kunit_attr_list[] = {
@@ -84,6 +102,13 @@ static struct kunit_attr kunit_attr_list[] = {
 		.attr_default = (void *)KUNIT_SPEED_NORMAL,
 		.print = PRINT_ALWAYS,
 	},
+	{
+		.name = "module",
+		.get_attr = attr_module_get,
+		.to_string = attr_string_to_string,
+		.attr_default = (void *)"",
+		.print = PRINT_SUITE,
+	}
 };
 
 /* Helper Functions to Access Attributes */

From 529534e8cba3e60f843a682e2a3149612b30d608 Mon Sep 17 00:00:00 2001
From: Rae Moar <rmoar@google.com>
Date: Tue, 25 Jul 2023 21:25:15 +0000
Subject: [PATCH 0570/2047] kunit: Add ability to filter attributes

Add filtering of test attributes. Users can filter tests using the
module_param called "filter".

Filters are imputed in the format: <attribute_name><operation><value>

Example: kunit.filter="speed>slow"

Operations include: >, <, >=, <=, !=, and =. These operations will act the
same for attributes of the same type but may not between types.

Note multiple filters can be inputted by separating them with a comma.
Example: kunit.filter="speed=slow, module!=example"

Since both suites and test cases can have attributes, there may be
conflicts. The process of filtering follows these rules:
- Filtering always operates at a per-test level.
- If a test has an attribute set, then the test's value is filtered on.
- Otherwise, the value falls back to the suite's value.
- If neither are set, the attribute has a global "default" value, which
  is used.

Filtered tests will not be run or show in output. The tests can instead be
skipped using the configurable option "kunit.filter_action=skip".

Note the default settings for running tests remains unfiltered.

Finally, add "filter" methods for the speed and module attributes to parse
and compare attribute values.

Note this filtering functionality will be added to kunit.py in the next
patch.

Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Rae Moar <rmoar@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/attributes.h |  31 +++++
 lib/kunit/attributes.c     | 271 +++++++++++++++++++++++++++++++++++++
 lib/kunit/executor.c       |  93 ++++++++++---
 lib/kunit/executor_test.c  |  12 +-
 lib/kunit/test.c           |  10 +-
 5 files changed, 389 insertions(+), 28 deletions(-)

diff --git a/include/kunit/attributes.h b/include/kunit/attributes.h
index 9fcd184cce36..bc76a0b786d2 100644
--- a/include/kunit/attributes.h
+++ b/include/kunit/attributes.h
@@ -9,6 +9,20 @@
 #ifndef _KUNIT_ATTRIBUTES_H
 #define _KUNIT_ATTRIBUTES_H
 
+/*
+ * struct kunit_attr_filter - representation of attributes filter with the
+ * attribute object and string input
+ */
+struct kunit_attr_filter {
+	struct kunit_attr *attr;
+	char *input;
+};
+
+/*
+ * Returns the name of the filter's attribute.
+ */
+const char *kunit_attr_filter_name(struct kunit_attr_filter filter);
+
 /*
  * Print all test attributes for a test case or suite.
  * Output format for test cases: "# <test_name>.<attribute>: <value>"
@@ -16,4 +30,21 @@
  */
 void kunit_print_attr(void *test_or_suite, bool is_test, unsigned int test_level);
 
+/*
+ * Returns the number of fitlers in input.
+ */
+int kunit_get_filter_count(char *input);
+
+/*
+ * Parse attributes filter input and return an objects containing the
+ * attribute object and the string input of the next filter.
+ */
+struct kunit_attr_filter kunit_next_attr_filter(char **filters, int *err);
+
+/*
+ * Returns a copy of the suite containing only tests that pass the filter.
+ */
+struct kunit_suite *kunit_filter_attr_tests(const struct kunit_suite *const suite,
+		struct kunit_attr_filter filter, char *action, int *err);
+
 #endif /* _KUNIT_ATTRIBUTES_H */
diff --git a/lib/kunit/attributes.c b/lib/kunit/attributes.c
index 9dce4f4d726c..d37c40c0ce4f 100644
--- a/lib/kunit/attributes.c
+++ b/lib/kunit/attributes.c
@@ -67,6 +67,104 @@ static const char *attr_string_to_string(void *attr, bool *to_free)
 	return (char *) attr;
 }
 
+/* Filter Methods */
+
+static const char op_list[] = "<>!=";
+
+/*
+ * Returns whether the inputted integer value matches the filter given
+ * by the operation string and inputted integer.
+ */
+static int int_filter(long val, const char *op, int input, int *err)
+{
+	if (!strncmp(op, "<=", 2))
+		return (val <= input);
+	else if (!strncmp(op, ">=", 2))
+		return (val >= input);
+	else if (!strncmp(op, "!=", 2))
+		return (val != input);
+	else if (!strncmp(op, ">", 1))
+		return (val > input);
+	else if (!strncmp(op, "<", 1))
+		return (val < input);
+	else if (!strncmp(op, "=", 1))
+		return (val == input);
+	*err = -EINVAL;
+	pr_err("kunit executor: invalid filter operation: %s\n", op);
+	return false;
+}
+
+/*
+ * Returns whether the inputted enum value "attr" matches the filter given
+ * by the input string. Note: the str_list includes the corresponding string
+ * list to the enum values.
+ */
+static int attr_enum_filter(void *attr, const char *input, int *err,
+		const char * const str_list[], int max)
+{
+	int i, j, input_int;
+	long test_val = (long)attr;
+	const char *input_val = NULL;
+
+	for (i = 0; input[i]; i++) {
+		if (!strchr(op_list, input[i])) {
+			input_val = input + i;
+			break;
+		}
+	}
+
+	if (!input_val) {
+		*err = -EINVAL;
+		pr_err("kunit executor: filter value not found: %s\n", input);
+		return false;
+	}
+
+	for (j = 0; j <= max; j++) {
+		if (!strcmp(input_val, str_list[j]))
+			input_int = j;
+	}
+
+	if (!input_int) {
+		*err = -EINVAL;
+		pr_err("kunit executor: invalid filter input: %s\n", input);
+		return false;
+	}
+
+	return int_filter(test_val, input, input_int, err);
+}
+
+static int attr_speed_filter(void *attr, const char *input, int *err)
+{
+	return attr_enum_filter(attr, input, err, speed_str_list, KUNIT_SPEED_MAX);
+}
+
+/*
+ * Returns whether the inputted string value (attr) matches the filter given
+ * by the input string.
+ */
+static int attr_string_filter(void *attr, const char *input, int *err)
+{
+	char *str = attr;
+
+	if (!strncmp(input, "<", 1)) {
+		*err = -EINVAL;
+		pr_err("kunit executor: invalid filter input: %s\n", input);
+		return false;
+	} else if (!strncmp(input, ">", 1)) {
+		*err = -EINVAL;
+		pr_err("kunit executor: invalid filter input: %s\n", input);
+		return false;
+	} else if (!strncmp(input, "!=", 2)) {
+		return (strcmp(input + 2, str) != 0);
+	} else if (!strncmp(input, "=", 1)) {
+		return (strcmp(input + 1, str) == 0);
+	}
+	*err = -EINVAL;
+	pr_err("kunit executor: invalid filter operation: %s\n", input);
+	return false;
+}
+
+
 /* Get Attribute Methods */
 
 static void *attr_speed_get(void *test_or_suite, bool is_test)
@@ -99,6 +197,7 @@ static struct kunit_attr kunit_attr_list[] = {
 		.name = "speed",
 		.get_attr = attr_speed_get,
 		.to_string = attr_speed_to_string,
+		.filter = attr_speed_filter,
 		.attr_default = (void *)KUNIT_SPEED_NORMAL,
 		.print = PRINT_ALWAYS,
 	},
@@ -106,6 +205,7 @@ static struct kunit_attr kunit_attr_list[] = {
 		.name = "module",
 		.get_attr = attr_module_get,
 		.to_string = attr_string_to_string,
+		.filter = attr_string_filter,
 		.attr_default = (void *)"",
 		.print = PRINT_SUITE,
 	}
@@ -113,6 +213,11 @@ static struct kunit_attr kunit_attr_list[] = {
 
 /* Helper Functions to Access Attributes */
 
+const char *kunit_attr_filter_name(struct kunit_attr_filter filter)
+{
+	return filter.attr->name;
+}
+
 void kunit_print_attr(void *test_or_suite, bool is_test, unsigned int test_level)
 {
 	int i;
@@ -145,3 +250,169 @@ void kunit_print_attr(void *test_or_suite, bool is_test, unsigned int test_level
 		}
 	}
 }
+
+/* Helper Functions to Filter Attributes */
+
+int kunit_get_filter_count(char *input)
+{
+	int i, comma_index, count = 0;
+
+	for (i = 0; input[i]; i++) {
+		if (input[i] == ',') {
+			if ((i - comma_index) > 1)
+				count++;
+			comma_index = i;
+		}
+	}
+	if ((i - comma_index) > 0)
+		count++;
+	return count;
+}
+
+struct kunit_attr_filter kunit_next_attr_filter(char **filters, int *err)
+{
+	struct kunit_attr_filter filter = {};
+	int i, j, comma_index, new_start_index;
+	int op_index = -1, attr_index = -1;
+	char op;
+	char *input = *filters;
+
+	/* Parse input until operation */
+	for (i = 0; input[i]; i++) {
+		if (op_index < 0 && strchr(op_list, input[i])) {
+			op_index = i;
+		} else if (!comma_index && input[i] == ',') {
+			comma_index = i;
+		} else if (comma_index && input[i] != ' ') {
+			new_start_index = i;
+			break;
+		}
+	}
+
+	if (op_index <= 0) {
+		*err = -EINVAL;
+		pr_err("kunit executor: filter operation not found: %s\n", input);
+		return filter;
+	}
+
+	/* Temporarily set operator to \0 character. */
+	op = input[op_index];
+	input[op_index] = '\0';
+
+	/* Find associated kunit_attr object */
+	for (j = 0; j < ARRAY_SIZE(kunit_attr_list); j++) {
+		if (!strcmp(input, kunit_attr_list[j].name)) {
+			attr_index = j;
+			break;
+		}
+	}
+
+	input[op_index] = op;
+
+	if (attr_index < 0) {
+		*err = -EINVAL;
+		pr_err("kunit executor: attribute not found: %s\n", input);
+	} else {
+		filter.attr = &kunit_attr_list[attr_index];
+	}
+
+	if (comma_index) {
+		input[comma_index] = '\0';
+		filter.input = input + op_index;
+		input = input + new_start_index;
+	} else {
+		filter.input = input + op_index;
+		input = NULL;
+	}
+
+	*filters = input;
+
+	return filter;
+}
+
+struct kunit_suite *kunit_filter_attr_tests(const struct kunit_suite *const suite,
+		struct kunit_attr_filter filter, char *action, int *err)
+{
+	int n = 0;
+	struct kunit_case *filtered, *test_case;
+	struct kunit_suite *copy;
+	void *suite_val, *test_val;
+	bool suite_result, test_result, default_result, result;
+
+	/* Allocate memory for new copy of suite and list of test cases */
+	copy = kmemdup(suite, sizeof(*copy), GFP_KERNEL);
+	if (!copy)
+		return ERR_PTR(-ENOMEM);
+
+	kunit_suite_for_each_test_case(suite, test_case) { n++; }
+
+	filtered = kcalloc(n + 1, sizeof(*filtered), GFP_KERNEL);
+	if (!filtered) {
+		kfree(copy);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	n = 0;
+
+	/* Save filtering result on default value */
+	default_result = filter.attr->filter(filter.attr->attr_default, filter.input, err);
+	if (*err) {
+		kfree(copy);
+		kfree(filtered);
+		return NULL;
+	}
+
+	/* Save suite attribute value and filtering result on that value */
+	suite_val = filter.attr->get_attr((void *)suite, false);
+	suite_result = filter.attr->filter(suite_val, filter.input, err);
+	if (*err) {
+		kfree(copy);
+		kfree(filtered);
+		return NULL;
+	}
+
+	/* For each test case, save test case if passes filtering. */
+	kunit_suite_for_each_test_case(suite, test_case) {
+		test_val = filter.attr->get_attr((void *) test_case, true);
+		test_result = filter.attr->filter(filter.attr->get_attr(test_case, true),
+				filter.input, err);
+		if (*err) {
+			kfree(copy);
+			kfree(filtered);
+			return NULL;
+		}
+
+		/*
+		 * If attribute value of test case is set, filter on that value.
+		 * If not, filter on suite value if set. If not, filter on
+		 * default value.
+		 */
+		result = false;
+		if (test_val) {
+			if (test_result)
+				result = true;
+		} else if (suite_val) {
+			if (suite_result)
+				result = true;
+		} else if (default_result) {
+			result = true;
+		}
+
+		if (result) {
+			filtered[n++] = *test_case;
+		} else if (action && strcmp(action, "skip") == 0) {
+			test_case->status = KUNIT_SKIPPED;
+			filtered[n++] = *test_case;
+		}
+	}
+
+	if (n == 0) {
+		kfree(copy);
+		kfree(filtered);
+		return NULL;
+	}
+
+	copy->test_cases = filtered;
+
+	return copy;
+}
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 12e38a48a5cc..483f7b7873a7 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -17,6 +17,8 @@ extern struct kunit_suite * const __kunit_suites_end[];
 
 static char *filter_glob_param;
 static char *action_param;
+static char *filter_param;
+static char *filter_action_param;
 
 module_param_named(filter_glob, filter_glob_param, charp, 0);
 MODULE_PARM_DESC(filter_glob,
@@ -27,15 +29,23 @@ MODULE_PARM_DESC(action,
 		 "<none>: run the tests like normal\n"
 		 "'list' to list test names instead of running them.\n"
 		 "'list_attr' to list test names and attributes instead of running them.\n");
+module_param_named(filter, filter_param, charp, 0);
+MODULE_PARM_DESC(filter,
+		"Filter which KUnit test suites/tests run at boot-time using attributes, e.g. speed>slow");
+module_param_named(filter_action, filter_action_param, charp, 0);
+MODULE_PARM_DESC(filter_action,
+		"Changes behavior of filtered tests using attributes, valid values are:\n"
+		"<none>: do not run filtered tests as normal\n"
+		"'skip': skip all filtered tests instead so tests will appear in output\n");
 
 /* glob_match() needs NULL terminated strings, so we need a copy of filter_glob_param. */
-struct kunit_test_filter {
+struct kunit_glob_filter {
 	char *suite_glob;
 	char *test_glob;
 };
 
 /* Split "suite_glob.test_glob" into two. Assumes filter_glob is not empty. */
-static void kunit_parse_filter_glob(struct kunit_test_filter *parsed,
+static void kunit_parse_glob_filter(struct kunit_glob_filter *parsed,
 				    const char *filter_glob)
 {
 	const int len = strlen(filter_glob);
@@ -57,7 +67,7 @@ static void kunit_parse_filter_glob(struct kunit_test_filter *parsed,
 
 /* Create a copy of suite with only tests that match test_glob. */
 static struct kunit_suite *
-kunit_filter_tests(const struct kunit_suite *const suite, const char *test_glob)
+kunit_filter_glob_tests(const struct kunit_suite *const suite, const char *test_glob)
 {
 	int n = 0;
 	struct kunit_case *filtered, *test_case;
@@ -111,12 +121,15 @@ static void kunit_free_suite_set(struct suite_set suite_set)
 
 static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
 					    const char *filter_glob,
+						char *filters,
+						char *filter_action,
 					    int *err)
 {
-	int i;
-	struct kunit_suite **copy, *filtered_suite;
+	int i, j, k, filter_count;
+	struct kunit_suite **copy, *filtered_suite, *new_filtered_suite;
 	struct suite_set filtered;
-	struct kunit_test_filter filter;
+	struct kunit_glob_filter parsed_glob;
+	struct kunit_attr_filter *parsed_filters;
 
 	const size_t max = suite_set->end - suite_set->start;
 
@@ -127,17 +140,52 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
 		return filtered;
 	}
 
-	kunit_parse_filter_glob(&filter, filter_glob);
+	if (filter_glob)
+		kunit_parse_glob_filter(&parsed_glob, filter_glob);
+
+	/* Parse attribute filters */
+	if (filters) {
+		filter_count = kunit_get_filter_count(filters);
+		parsed_filters = kcalloc(filter_count + 1, sizeof(*parsed_filters), GFP_KERNEL);
+		for (j = 0; j < filter_count; j++)
+			parsed_filters[j] = kunit_next_attr_filter(&filters, err);
+		if (*err)
+			return filtered;
+	}
 
 	for (i = 0; &suite_set->start[i] != suite_set->end; i++) {
-		if (!glob_match(filter.suite_glob, suite_set->start[i]->name))
-			continue;
-
-		filtered_suite = kunit_filter_tests(suite_set->start[i], filter.test_glob);
-		if (IS_ERR(filtered_suite)) {
-			*err = PTR_ERR(filtered_suite);
-			return filtered;
+		filtered_suite = suite_set->start[i];
+		if (filter_glob) {
+			if (!glob_match(parsed_glob.suite_glob, filtered_suite->name))
+				continue;
+			filtered_suite = kunit_filter_glob_tests(filtered_suite,
+					parsed_glob.test_glob);
+			if (IS_ERR(filtered_suite)) {
+				*err = PTR_ERR(filtered_suite);
+				return filtered;
+			}
 		}
+		if (filter_count) {
+			for (k = 0; k < filter_count; k++) {
+				new_filtered_suite = kunit_filter_attr_tests(filtered_suite,
+						parsed_filters[k], filter_action, err);
+
+				/* Free previous copy of suite */
+				if (k > 0 || filter_glob)
+					kfree(filtered_suite);
+				filtered_suite = new_filtered_suite;
+
+				if (*err)
+					return filtered;
+				if (IS_ERR(filtered_suite)) {
+					*err = PTR_ERR(filtered_suite);
+					return filtered;
+				}
+				if (!filtered_suite)
+					break;
+			}
+		}
+
 		if (!filtered_suite)
 			continue;
 
@@ -145,8 +193,14 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
 	}
 	filtered.end = copy;
 
-	kfree(filter.suite_glob);
-	kfree(filter.test_glob);
+	if (filter_glob) {
+		kfree(parsed_glob.suite_glob);
+		kfree(parsed_glob.test_glob);
+	}
+
+	if (filter_count)
+		kfree(parsed_filters);
+
 	return filtered;
 }
 
@@ -206,8 +260,9 @@ int kunit_run_all_tests(void)
 		goto out;
 	}
 
-	if (filter_glob_param) {
-		suite_set = kunit_filter_suites(&suite_set, filter_glob_param, &err);
+	if (filter_glob_param || filter_param) {
+		suite_set = kunit_filter_suites(&suite_set, filter_glob_param,
+				filter_param, filter_action_param, &err);
 		if (err) {
 			pr_err("kunit executor: error filtering suites: %d\n", err);
 			goto out;
@@ -223,7 +278,7 @@ int kunit_run_all_tests(void)
 	else
 		pr_err("kunit executor: unknown action '%s'\n", action_param);
 
-	if (filter_glob_param) { /* a copy was made of each suite */
+	if (filter_glob_param || filter_param) { /* a copy was made of each suite */
 		kunit_free_suite_set(suite_set);
 	}
 
diff --git a/lib/kunit/executor_test.c b/lib/kunit/executor_test.c
index ce6749af374d..d7ab069324b5 100644
--- a/lib/kunit/executor_test.c
+++ b/lib/kunit/executor_test.c
@@ -24,15 +24,15 @@ static struct kunit_case dummy_test_cases[] = {
 
 static void parse_filter_test(struct kunit *test)
 {
-	struct kunit_test_filter filter = {NULL, NULL};
+	struct kunit_glob_filter filter = {NULL, NULL};
 
-	kunit_parse_filter_glob(&filter, "suite");
+	kunit_parse_glob_filter(&filter, "suite");
 	KUNIT_EXPECT_STREQ(test, filter.suite_glob, "suite");
 	KUNIT_EXPECT_FALSE(test, filter.test_glob);
 	kfree(filter.suite_glob);
 	kfree(filter.test_glob);
 
-	kunit_parse_filter_glob(&filter, "suite.test");
+	kunit_parse_glob_filter(&filter, "suite.test");
 	KUNIT_EXPECT_STREQ(test, filter.suite_glob, "suite");
 	KUNIT_EXPECT_STREQ(test, filter.test_glob, "test");
 	kfree(filter.suite_glob);
@@ -50,7 +50,7 @@ static void filter_suites_test(struct kunit *test)
 	subsuite[1] = alloc_fake_suite(test, "suite2", dummy_test_cases);
 
 	/* Want: suite1, suite2, NULL -> suite2, NULL */
-	got = kunit_filter_suites(&suite_set, "suite2", &err);
+	got = kunit_filter_suites(&suite_set, "suite2", NULL, NULL, &err);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start);
 	KUNIT_ASSERT_EQ(test, err, 0);
 	kfree_at_end(test, got.start);
@@ -74,7 +74,7 @@ static void filter_suites_test_glob_test(struct kunit *test)
 	subsuite[1] = alloc_fake_suite(test, "suite2", dummy_test_cases);
 
 	/* Want: suite1, suite2, NULL -> suite2 (just test1), NULL */
-	got = kunit_filter_suites(&suite_set, "suite2.test2", &err);
+	got = kunit_filter_suites(&suite_set, "suite2.test2", NULL, NULL, &err);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start);
 	KUNIT_ASSERT_EQ(test, err, 0);
 	kfree_at_end(test, got.start);
@@ -100,7 +100,7 @@ static void filter_suites_to_empty_test(struct kunit *test)
 	subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases);
 	subsuite[1] = alloc_fake_suite(test, "suite2", dummy_test_cases);
 
-	got = kunit_filter_suites(&suite_set, "not_found", &err);
+	got = kunit_filter_suites(&suite_set, "not_found", NULL, NULL, &err);
 	KUNIT_ASSERT_EQ(test, err, 0);
 	kfree_at_end(test, got.start); /* just in case */
 
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index 9ee55139ecd1..cb9797fa6303 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -613,18 +613,22 @@ int kunit_run_tests(struct kunit_suite *suite)
 	kunit_suite_for_each_test_case(suite, test_case) {
 		struct kunit test = { .param_value = NULL, .param_index = 0 };
 		struct kunit_result_stats param_stats = { 0 };
-		test_case->status = KUNIT_SKIPPED;
 
 		kunit_init_test(&test, test_case->name, test_case->log);
-
-		if (!test_case->generate_params) {
+		if (test_case->status == KUNIT_SKIPPED) {
+			/* Test marked as skip */
+			test.status = KUNIT_SKIPPED;
+			kunit_update_stats(&param_stats, test.status);
+		} else if (!test_case->generate_params) {
 			/* Non-parameterised test. */
+			test_case->status = KUNIT_SKIPPED;
 			kunit_run_case_catch_errors(suite, test_case, &test);
 			kunit_update_stats(&param_stats, test.status);
 		} else {
 			/* Get initial param. */
 			param_desc[0] = '\0';
 			test.param_value = test_case->generate_params(NULL, param_desc);
+			test_case->status = KUNIT_SKIPPED;
 			kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT
 				  "KTAP version 1\n");
 			kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT

From 723c8258c8fe167191b53e274dea435c4522e4d7 Mon Sep 17 00:00:00 2001
From: Rae Moar <rmoar@google.com>
Date: Tue, 25 Jul 2023 21:25:16 +0000
Subject: [PATCH 0571/2047] kunit: tool: Add command line interface to filter
 and report attributes

Add ability to kunit.py to filter attributes and report a list of tests
including attributes without running tests.

Add flag "--filter" to input filters on test attributes. Tests will be
filtered out if they do not match all inputted filters.

Example: --filter speed=slow (This filter would run only the tests that are
marked as slow)

Filters have operations: <, >, <=, >=, !=, and =. But note that the
characters < and > are often interpreted by the shell, so they may need to
be quoted or escaped.

Example: --filter "speed>slow" or --filter speed\>slow (This filter would
run only the tests that have the speed faster than slow.

Additionally, multiple filters can be used.

Example: --filter "speed=slow, module!=example" (This filter would run
only the tests that have the speed slow and are not in the "example"
module)

Note if the user wants to skip filtered tests instead of not
running/showing them use the "--filter_action=skip" flag instead.

Expose the output of kunit.action=list option with flag "--list_tests" to
output a list of tests. Additionally, add flag "--list_tests_attr" to
output a list of tests and their attributes. These flags are useful to see
tests and test attributes without needing to run tests.

Example of the output of "--list_tests_attr":
  example
  example.test_1
  example.test_2
  # example.test_2.speed: slow

This output includes a suite, example, with two test cases, test_1 and
test_2. And in this instance test_2 has been marked as slow.

Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Rae Moar <rmoar@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit.py           | 70 ++++++++++++++++++++++++--
 tools/testing/kunit/kunit_kernel.py    |  8 ++-
 tools/testing/kunit/kunit_parser.py    | 11 +++-
 tools/testing/kunit/kunit_tool_test.py | 39 +++++++-------
 4 files changed, 99 insertions(+), 29 deletions(-)

diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 3905c43369c3..bc74088c458a 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -55,8 +55,12 @@ class KunitExecRequest(KunitParseRequest):
 	build_dir: str
 	timeout: int
 	filter_glob: str
+	filter: str
+	filter_action: Optional[str]
 	kernel_args: Optional[List[str]]
 	run_isolated: Optional[str]
+	list_tests: bool
+	list_tests_attr: bool
 
 @dataclass
 class KunitRequest(KunitExecRequest, KunitBuildRequest):
@@ -102,19 +106,41 @@ def config_and_build_tests(linux: kunit_kernel.LinuxSourceTree,
 
 def _list_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> List[str]:
 	args = ['kunit.action=list']
+
 	if request.kernel_args:
 		args.extend(request.kernel_args)
 
 	output = linux.run_kernel(args=args,
 			   timeout=request.timeout,
 			   filter_glob=request.filter_glob,
+			   filter=request.filter,
+			   filter_action=request.filter_action,
 			   build_dir=request.build_dir)
 	lines = kunit_parser.extract_tap_lines(output)
 	# Hack! Drop the dummy TAP version header that the executor prints out.
 	lines.pop()
 
 	# Filter out any extraneous non-test output that might have gotten mixed in.
-	return [l for l in lines if re.match(r'^[^\s.]+\.[^\s.]+$', l)]
+	return [l for l in output if re.match(r'^[^\s.]+\.[^\s.]+$', l)]
+
+def _list_tests_attr(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> Iterable[str]:
+	args = ['kunit.action=list_attr']
+
+	if request.kernel_args:
+		args.extend(request.kernel_args)
+
+	output = linux.run_kernel(args=args,
+			   timeout=request.timeout,
+			   filter_glob=request.filter_glob,
+			   filter=request.filter,
+			   filter_action=request.filter_action,
+			   build_dir=request.build_dir)
+	lines = kunit_parser.extract_tap_lines(output)
+	# Hack! Drop the dummy TAP version header that the executor prints out.
+	lines.pop()
+
+	# Filter out any extraneous non-test output that might have gotten mixed in.
+	return lines
 
 def _suites_from_test_list(tests: List[str]) -> List[str]:
 	"""Extracts all the suites from an ordered list of tests."""
@@ -128,10 +154,18 @@ def _suites_from_test_list(tests: List[str]) -> List[str]:
 			suites.append(suite)
 	return suites
 
-
-
 def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> KunitResult:
 	filter_globs = [request.filter_glob]
+	if request.list_tests:
+		output = _list_tests(linux, request)
+		for line in output:
+			print(line.rstrip())
+		return KunitResult(status=KunitStatus.SUCCESS, elapsed_time=0.0)
+	if request.list_tests_attr:
+		attr_output = _list_tests_attr(linux, request)
+		for line in attr_output:
+			print(line.rstrip())
+		return KunitResult(status=KunitStatus.SUCCESS, elapsed_time=0.0)
 	if request.run_isolated:
 		tests = _list_tests(linux, request)
 		if request.run_isolated == 'test':
@@ -155,6 +189,8 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -
 			args=request.kernel_args,
 			timeout=request.timeout,
 			filter_glob=filter_glob,
+			filter=request.filter,
+			filter_action=request.filter_action,
 			build_dir=request.build_dir)
 
 		_, test_result = parse_tests(request, metadata, run_result)
@@ -341,6 +377,16 @@ def add_exec_opts(parser: argparse.ArgumentParser) -> None:
 			    nargs='?',
 			    default='',
 			    metavar='filter_glob')
+	parser.add_argument('--filter',
+			    help='Filter KUnit tests with attributes, '
+			    'e.g. module=example or speed>slow',
+			    type=str,
+				default='')
+	parser.add_argument('--filter_action',
+			    help='If set to skip, filtered tests will be skipped, '
+				'e.g. --filter_action=skip. Otherwise they will not run.',
+			    type=str,
+				choices=['skip'])
 	parser.add_argument('--kernel_args',
 			    help='Kernel command-line parameters. Maybe be repeated',
 			     action='append', metavar='')
@@ -350,6 +396,12 @@ def add_exec_opts(parser: argparse.ArgumentParser) -> None:
 			    'what ran before it.',
 			    type=str,
 			    choices=['suite', 'test'])
+	parser.add_argument('--list_tests', help='If set, list all tests that will be '
+			    'run.',
+			    action='store_true')
+	parser.add_argument('--list_tests_attr', help='If set, list all tests and test '
+			    'attributes.',
+			    action='store_true')
 
 def add_parse_opts(parser: argparse.ArgumentParser) -> None:
 	parser.add_argument('--raw_output', help='If set don\'t parse output from kernel. '
@@ -398,8 +450,12 @@ def run_handler(cli_args: argparse.Namespace) -> None:
 					json=cli_args.json,
 					timeout=cli_args.timeout,
 					filter_glob=cli_args.filter_glob,
+					filter=cli_args.filter,
+					filter_action=cli_args.filter_action,
 					kernel_args=cli_args.kernel_args,
-					run_isolated=cli_args.run_isolated)
+					run_isolated=cli_args.run_isolated,
+					list_tests=cli_args.list_tests,
+					list_tests_attr=cli_args.list_tests_attr)
 	result = run_tests(linux, request)
 	if result.status != KunitStatus.SUCCESS:
 		sys.exit(1)
@@ -441,8 +497,12 @@ def exec_handler(cli_args: argparse.Namespace) -> None:
 					json=cli_args.json,
 					timeout=cli_args.timeout,
 					filter_glob=cli_args.filter_glob,
+					filter=cli_args.filter,
+					filter_action=cli_args.filter_action,
 					kernel_args=cli_args.kernel_args,
-					run_isolated=cli_args.run_isolated)
+					run_isolated=cli_args.run_isolated,
+					list_tests=cli_args.list_tests,
+					list_tests_attr=cli_args.list_tests_attr)
 	result = exec_tests(linux, exec_request)
 	stdout.print_with_timestamp((
 		'Elapsed time: %.3fs\n') % (result.elapsed_time))
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 7f648802caf6..0b6488efed47 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -330,11 +330,15 @@ class LinuxSourceTree:
 			return False
 		return self.validate_config(build_dir)
 
-	def run_kernel(self, args: Optional[List[str]]=None, build_dir: str='', filter_glob: str='', timeout: Optional[int]=None) -> Iterator[str]:
+	def run_kernel(self, args: Optional[List[str]]=None, build_dir: str='', filter_glob: str='', filter: str='', filter_action: Optional[str]=None, timeout: Optional[int]=None) -> Iterator[str]:
 		if not args:
 			args = []
 		if filter_glob:
-			args.append('kunit.filter_glob='+filter_glob)
+			args.append('kunit.filter_glob=' + filter_glob)
+		if filter:
+			args.append('kunit.filter="' + filter + '"')
+		if filter_action:
+			args.append('kunit.filter_action=' + filter_action)
 		args.append('kunit.enable=1')
 
 		process = self._ops.start(args, build_dir)
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index fbc094f0567e..79d8832c862a 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -212,6 +212,7 @@ KTAP_START = re.compile(r'\s*KTAP version ([0-9]+)$')
 TAP_START = re.compile(r'\s*TAP version ([0-9]+)$')
 KTAP_END = re.compile(r'\s*(List of all partitions:|'
 	'Kernel panic - not syncing: VFS:|reboot: System halted)')
+EXECUTOR_ERROR = re.compile(r'\s*kunit executor: (.*)$')
 
 def extract_tap_lines(kernel_output: Iterable[str]) -> LineStream:
 	"""Extracts KTAP lines from the kernel output."""
@@ -242,6 +243,8 @@ def extract_tap_lines(kernel_output: Iterable[str]) -> LineStream:
 				# remove the prefix, if any.
 				line = line[prefix_len:]
 				yield line_num, line
+			elif EXECUTOR_ERROR.search(line):
+				yield line_num, line
 	return LineStream(lines=isolate_ktap_output(kernel_output))
 
 KTAP_VERSIONS = [1]
@@ -447,7 +450,7 @@ def parse_diagnostic(lines: LineStream) -> List[str]:
 	Log of diagnostic lines
 	"""
 	log = []  # type: List[str]
-	non_diagnostic_lines = [TEST_RESULT, TEST_HEADER, KTAP_START]
+	non_diagnostic_lines = [TEST_RESULT, TEST_HEADER, KTAP_START, TAP_START]
 	while lines and not any(re.match(lines.peek())
 			for re in non_diagnostic_lines):
 		log.append(lines.pop())
@@ -713,6 +716,11 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest:
 	"""
 	test = Test()
 	test.log.extend(log)
+
+	# Parse any errors prior to parsing tests
+	err_log = parse_diagnostic(lines)
+	test.log.extend(err_log)
+
 	if not is_subtest:
 		# If parsing the main/top-level test, parse KTAP version line and
 		# test plan
@@ -774,6 +782,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest:
 		# Don't override a bad status if this test had one reported.
 		# Assumption: no subtests means CRASHED is from Test.__init__()
 		if test.status in (TestStatus.TEST_CRASHED, TestStatus.SUCCESS):
+			print_log(test.log)
 			test.status = TestStatus.NO_TESTS
 			test.add_error('0 tests run!')
 
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index be35999bb84f..b28c1510be2e 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -597,7 +597,7 @@ class KUnitMainTest(unittest.TestCase):
 		self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0)
 		self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			args=None, build_dir='.kunit', filter_glob='', timeout=300)
+			args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=300)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_run_passes_args_pass(self):
@@ -605,7 +605,7 @@ class KUnitMainTest(unittest.TestCase):
 		self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
 		self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			args=None, build_dir='.kunit', filter_glob='', timeout=300)
+			args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=300)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_exec_passes_args_fail(self):
@@ -629,7 +629,7 @@ class KUnitMainTest(unittest.TestCase):
 			kunit.main(['run'])
 		self.assertEqual(e.exception.code, 1)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			args=None, build_dir='.kunit', filter_glob='', timeout=300)
+			args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=300)
 		self.print_mock.assert_any_call(StrContains(' 0 tests run!'))
 
 	def test_exec_raw_output(self):
@@ -670,13 +670,13 @@ class KUnitMainTest(unittest.TestCase):
 		self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
 		kunit.main(['run', '--raw_output', 'filter_glob'])
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			args=None, build_dir='.kunit', filter_glob='filter_glob', timeout=300)
+			args=None, build_dir='.kunit', filter_glob='filter_glob', filter='', filter_action=None, timeout=300)
 
 	def test_exec_timeout(self):
 		timeout = 3453
 		kunit.main(['exec', '--timeout', str(timeout)])
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			args=None, build_dir='.kunit', filter_glob='', timeout=timeout)
+			args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=timeout)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_run_timeout(self):
@@ -684,7 +684,7 @@ class KUnitMainTest(unittest.TestCase):
 		kunit.main(['run', '--timeout', str(timeout)])
 		self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			args=None, build_dir='.kunit', filter_glob='', timeout=timeout)
+			args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=timeout)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_run_builddir(self):
@@ -692,7 +692,7 @@ class KUnitMainTest(unittest.TestCase):
 		kunit.main(['run', '--build_dir=.kunit'])
 		self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			args=None, build_dir=build_dir, filter_glob='', timeout=300)
+			args=None, build_dir=build_dir, filter_glob='', filter='', filter_action=None, timeout=300)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_config_builddir(self):
@@ -710,7 +710,7 @@ class KUnitMainTest(unittest.TestCase):
 		build_dir = '.kunit'
 		kunit.main(['exec', '--build_dir', build_dir])
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			args=None, build_dir=build_dir, filter_glob='', timeout=300)
+			args=None, build_dir=build_dir, filter_glob='', filter='', filter_action=None, timeout=300)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_run_kunitconfig(self):
@@ -786,7 +786,7 @@ class KUnitMainTest(unittest.TestCase):
 		kunit.main(['run', '--kernel_args=a=1', '--kernel_args=b=2'])
 		self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-		      args=['a=1','b=2'], build_dir='.kunit', filter_glob='', timeout=300)
+		      args=['a=1','b=2'], build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=300)
 		self.print_mock.assert_any_call(StrContains('Testing complete.'))
 
 	def test_list_tests(self):
@@ -794,13 +794,11 @@ class KUnitMainTest(unittest.TestCase):
 		self.linux_source_mock.run_kernel.return_value = ['TAP version 14', 'init: random output'] + want
 
 		got = kunit._list_tests(self.linux_source_mock,
-				     kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', None, 'suite'))
-
+				     kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', '', None, None, 'suite', False, False))
 		self.assertEqual(got, want)
 		# Should respect the user's filter glob when listing tests.
 		self.linux_source_mock.run_kernel.assert_called_once_with(
-			args=['kunit.action=list'], build_dir='.kunit', filter_glob='suite*', timeout=300)
-
+			args=['kunit.action=list'], build_dir='.kunit', filter_glob='suite*', filter='', filter_action=None, timeout=300)
 
 	@mock.patch.object(kunit, '_list_tests')
 	def test_run_isolated_by_suite(self, mock_tests):
@@ -809,10 +807,10 @@ class KUnitMainTest(unittest.TestCase):
 
 		# Should respect the user's filter glob when listing tests.
 		mock_tests.assert_called_once_with(mock.ANY,
-				     kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*.test*', None, 'suite'))
+				     kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*.test*', '', None, None, 'suite', False, False))
 		self.linux_source_mock.run_kernel.assert_has_calls([
-			mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', timeout=300),
-			mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', timeout=300),
+			mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', filter='', filter_action=None, timeout=300),
+			mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', filter='', filter_action=None, timeout=300),
 		])
 
 	@mock.patch.object(kunit, '_list_tests')
@@ -822,13 +820,12 @@ class KUnitMainTest(unittest.TestCase):
 
 		# Should respect the user's filter glob when listing tests.
 		mock_tests.assert_called_once_with(mock.ANY,
-				     kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', None, 'test'))
+				     kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', '', None, None, 'test', False, False))
 		self.linux_source_mock.run_kernel.assert_has_calls([
-			mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', timeout=300),
-			mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', timeout=300),
-			mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test1', timeout=300),
+			mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', filter='', filter_action=None, timeout=300),
+			mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', filter='', filter_action=None, timeout=300),
+			mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test1', filter='', filter_action=None, timeout=300),
 		])
 
-
 if __name__ == '__main__':
 	unittest.main()

From d055c6a2cc162fa3fe23d0a88a279baf46abe85d Mon Sep 17 00:00:00 2001
From: Rae Moar <rmoar@google.com>
Date: Tue, 25 Jul 2023 21:25:17 +0000
Subject: [PATCH 0572/2047] kunit: memcpy: Mark tests as slow using test
 attributes

Mark slow memcpy KUnit tests using test attributes.

Tests marked as slow are as follows: memcpy_large_test, memmove_test,
memmove_large_test, and memmove_overlap_test. These tests were the slowest
of the memcpy tests and relatively slower to most other KUnit tests. Most
of these tests are already skipped when CONFIG_MEMCPY_SLOW_KUNIT_TEST is
not enabled.

These tests can now be filtered using the KUnit test attribute filtering
feature. Example: --filter "speed>slow". This will run only the tests that
have speeds faster than slow. The slow attribute will also be outputted in
KTAP.

Note: This patch is intended to replace the use of
CONFIG_MEMCPY_SLOW_KUNIT_TEST and to potentially deprecate this feature.
This patch does not remove the config option but does add a note to the
config definition commenting on this future shift.

Reviewed-by: David Gow <davidgow@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Rae Moar <rmoar@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 lib/Kconfig.debug  | 3 +++
 lib/memcpy_kunit.c | 8 ++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 550cb967b668..1b3894e861f2 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2701,6 +2701,9 @@ config MEMCPY_SLOW_KUNIT_TEST
 	  and bit ranges. These can be very slow, so they are split out
 	  as a separate config, in case they need to be disabled.
 
+	  Note this config option will be replaced by the use of KUnit test
+	  attributes.
+
 config IS_SIGNED_TYPE_KUNIT_TEST
 	tristate "Test is_signed_type() macro" if !KUNIT_ALL_TESTS
 	depends on KUNIT
diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c
index 887926f04731..440aee705ccc 100644
--- a/lib/memcpy_kunit.c
+++ b/lib/memcpy_kunit.c
@@ -551,10 +551,10 @@ static void strtomem_test(struct kunit *test)
 static struct kunit_case memcpy_test_cases[] = {
 	KUNIT_CASE(memset_test),
 	KUNIT_CASE(memcpy_test),
-	KUNIT_CASE(memcpy_large_test),
-	KUNIT_CASE(memmove_test),
-	KUNIT_CASE(memmove_large_test),
-	KUNIT_CASE(memmove_overlap_test),
+	KUNIT_CASE_SLOW(memcpy_large_test),
+	KUNIT_CASE_SLOW(memmove_test),
+	KUNIT_CASE_SLOW(memmove_large_test),
+	KUNIT_CASE_SLOW(memmove_overlap_test),
 	KUNIT_CASE(strtomem_test),
 	{}
 };

From a547c4ce10bde42d4e9ea63d1155970fbfd77db2 Mon Sep 17 00:00:00 2001
From: Rae Moar <rmoar@google.com>
Date: Tue, 25 Jul 2023 21:25:18 +0000
Subject: [PATCH 0573/2047] kunit: time: Mark test as slow using test
 attributes

Mark the time KUnit test, time64_to_tm_test_date_range, as slow using test
attributes.

This test ran relatively much slower than most other KUnit tests.

By marking this test as slow, the test can now be filtered using the KUnit
test attribute filtering feature. Example: --filter "speed>slow". This will
run only the tests that have speeds faster than slow. The slow attribute
will also be outputted in KTAP.

Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Rae Moar <rmoar@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 kernel/time/time_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/time_test.c b/kernel/time/time_test.c
index 831e8e779ace..ca058c8af6ba 100644
--- a/kernel/time/time_test.c
+++ b/kernel/time/time_test.c
@@ -86,7 +86,7 @@ static void time64_to_tm_test_date_range(struct kunit *test)
 }
 
 static struct kunit_case time_test_cases[] = {
-	KUNIT_CASE(time64_to_tm_test_date_range),
+	KUNIT_CASE_SLOW(time64_to_tm_test_date_range),
 	{}
 };
 

From 76066f93f1df27657eb937b7c9c091e3a6abf4db Mon Sep 17 00:00:00 2001
From: Rae Moar <rmoar@google.com>
Date: Tue, 25 Jul 2023 21:25:19 +0000
Subject: [PATCH 0574/2047] kunit: add tests for filtering attributes

Add four tests to executor_test.c to test behavior of filtering attributes.

- parse_filter_attr_test - to test the parsing of inputted filters

- filter_attr_test - to test the filtering procedure on attributes

- filter_attr_empty_test - to test the behavior when all tests are filtered
  out

- filter_attr_skip_test - to test the configurable filter_action=skip
  option

Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Rae Moar <rmoar@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 lib/kunit/executor_test.c | 116 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)

diff --git a/lib/kunit/executor_test.c b/lib/kunit/executor_test.c
index d7ab069324b5..01280cb8d451 100644
--- a/lib/kunit/executor_test.c
+++ b/lib/kunit/executor_test.c
@@ -7,6 +7,7 @@
  */
 
 #include <kunit/test.h>
+#include <kunit/attributes.h>
 
 static void kfree_at_end(struct kunit *test, const void *to_free);
 static struct kunit_suite *alloc_fake_suite(struct kunit *test,
@@ -108,11 +109,126 @@ static void filter_suites_to_empty_test(struct kunit *test)
 				"should be empty to indicate no match");
 }
 
+static void parse_filter_attr_test(struct kunit *test)
+{
+	int j, filter_count;
+	struct kunit_attr_filter *parsed_filters;
+	char *filters = "speed>slow, module!=example";
+	int err = 0;
+
+	filter_count = kunit_get_filter_count(filters);
+	KUNIT_EXPECT_EQ(test, filter_count, 2);
+
+	parsed_filters = kunit_kcalloc(test, filter_count + 1, sizeof(*parsed_filters),
+			GFP_KERNEL);
+	for (j = 0; j < filter_count; j++) {
+		parsed_filters[j] = kunit_next_attr_filter(&filters, &err);
+		KUNIT_ASSERT_EQ_MSG(test, err, 0, "failed to parse filter '%s'", filters[j]);
+	}
+
+	KUNIT_EXPECT_STREQ(test, kunit_attr_filter_name(parsed_filters[0]), "speed");
+	KUNIT_EXPECT_STREQ(test, parsed_filters[0].input, ">slow");
+
+	KUNIT_EXPECT_STREQ(test, kunit_attr_filter_name(parsed_filters[1]), "module");
+	KUNIT_EXPECT_STREQ(test, parsed_filters[1].input, "!=example");
+}
+
+static struct kunit_case dummy_attr_test_cases[] = {
+	/* .run_case is not important, just needs to be non-NULL */
+	{ .name = "slow", .run_case = dummy_test, .module_name = "dummy",
+	  .attr.speed = KUNIT_SPEED_SLOW },
+	{ .name = "normal", .run_case = dummy_test, .module_name = "dummy" },
+	{},
+};
+
+static void filter_attr_test(struct kunit *test)
+{
+	struct kunit_suite *subsuite[3] = {NULL, NULL};
+	struct suite_set suite_set = {.start = subsuite, .end = &subsuite[2]};
+	struct suite_set got;
+	int err = 0;
+
+	subsuite[0] = alloc_fake_suite(test, "normal_suite", dummy_attr_test_cases);
+	subsuite[1] = alloc_fake_suite(test, "slow_suite", dummy_attr_test_cases);
+	subsuite[1]->attr.speed = KUNIT_SPEED_SLOW; // Set suite attribute
+
+	/*
+	 * Want: normal_suite(slow, normal), slow_suite(slow, normal),
+	 *		NULL -> normal_suite(normal), NULL
+	 *
+	 * The normal test in slow_suite is filtered out because the speed
+	 * attribute is unset and thus, the filtering is based on the parent attribute
+	 * of slow.
+	 */
+	got = kunit_filter_suites(&suite_set, NULL, "speed>slow", NULL, &err);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start);
+	KUNIT_ASSERT_EQ(test, err, 0);
+	kfree_at_end(test, got.start);
+
+	/* Validate we just have normal_suite */
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start[0]);
+	KUNIT_EXPECT_STREQ(test, got.start[0]->name, "normal_suite");
+	KUNIT_ASSERT_EQ(test, got.end - got.start, 1);
+
+	/* Now validate we just have normal test case */
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start[0]->test_cases);
+	KUNIT_EXPECT_STREQ(test, got.start[0]->test_cases[0].name, "normal");
+	KUNIT_EXPECT_FALSE(test, got.start[0]->test_cases[1].name);
+}
+
+static void filter_attr_empty_test(struct kunit *test)
+{
+	struct kunit_suite *subsuite[3] = {NULL, NULL};
+	struct suite_set suite_set = {.start = subsuite, .end = &subsuite[2]};
+	struct suite_set got;
+	int err = 0;
+
+	subsuite[0] = alloc_fake_suite(test, "suite1", dummy_attr_test_cases);
+	subsuite[1] = alloc_fake_suite(test, "suite2", dummy_attr_test_cases);
+
+	got = kunit_filter_suites(&suite_set, NULL, "module!=dummy", NULL, &err);
+	KUNIT_ASSERT_EQ(test, err, 0);
+	kfree_at_end(test, got.start); /* just in case */
+
+	KUNIT_EXPECT_PTR_EQ_MSG(test, got.start, got.end,
+				"should be empty to indicate no match");
+}
+
+static void filter_attr_skip_test(struct kunit *test)
+{
+	struct kunit_suite *subsuite[2] = {NULL};
+	struct suite_set suite_set = {.start = subsuite, .end = &subsuite[1]};
+	struct suite_set got;
+	int err = 0;
+
+	subsuite[0] = alloc_fake_suite(test, "suite", dummy_attr_test_cases);
+
+	/* Want: suite(slow, normal), NULL -> suite(slow with SKIP, normal), NULL */
+	got = kunit_filter_suites(&suite_set, NULL, "speed>slow", "skip", &err);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start);
+	KUNIT_ASSERT_EQ(test, err, 0);
+	kfree_at_end(test, got.start);
+
+	/* Validate we have both the slow and normal test */
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start[0]->test_cases);
+	KUNIT_ASSERT_EQ(test, kunit_suite_num_test_cases(got.start[0]), 2);
+	KUNIT_EXPECT_STREQ(test, got.start[0]->test_cases[0].name, "slow");
+	KUNIT_EXPECT_STREQ(test, got.start[0]->test_cases[1].name, "normal");
+
+	/* Now ensure slow is skipped and normal is not */
+	KUNIT_EXPECT_EQ(test, got.start[0]->test_cases[0].status, KUNIT_SKIPPED);
+	KUNIT_EXPECT_FALSE(test, got.start[0]->test_cases[1].status);
+}
+
 static struct kunit_case executor_test_cases[] = {
 	KUNIT_CASE(parse_filter_test),
 	KUNIT_CASE(filter_suites_test),
 	KUNIT_CASE(filter_suites_test_glob_test),
 	KUNIT_CASE(filter_suites_to_empty_test),
+	KUNIT_CASE(parse_filter_attr_test),
+	KUNIT_CASE(filter_attr_test),
+	KUNIT_CASE(filter_attr_empty_test),
+	KUNIT_CASE(filter_attr_skip_test),
 	{}
 };
 

From e588586675f62b710fcc2d7bc6ea3ea8353e055d Mon Sep 17 00:00:00 2001
From: Rae Moar <rmoar@google.com>
Date: Tue, 25 Jul 2023 21:25:20 +0000
Subject: [PATCH 0575/2047] kunit: Add documentation of KUnit test attributes

Add documentation on the use of test attributes under the section "Tips for
Running KUnit Tests" in the KUnit docs.

Documentation includes three sections on how to mark tests with attributes,
how attributes are reported, and how the user can filter tests using test
attributes.

Add descriptions of new flags to list of command-line arguments.

Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Rae Moar <rmoar@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 Documentation/dev-tools/kunit/run_wrapper.rst |  12 ++
 .../dev-tools/kunit/running_tips.rst          | 166 ++++++++++++++++++
 2 files changed, 178 insertions(+)

diff --git a/Documentation/dev-tools/kunit/run_wrapper.rst b/Documentation/dev-tools/kunit/run_wrapper.rst
index dafe8eb28d30..19ddf5e07013 100644
--- a/Documentation/dev-tools/kunit/run_wrapper.rst
+++ b/Documentation/dev-tools/kunit/run_wrapper.rst
@@ -321,3 +321,15 @@ command line arguments:
 
 - ``--json``: If set, stores the test results in a JSON format and prints to `stdout` or
   saves to a file if a filename is specified.
+
+- ``--filter``: Specifies filters on test attributes, for example, ``speed!=slow``.
+  Multiple filters can be used by wrapping input in quotes and separating filters
+  by commas. Example: ``--filter "speed>slow, module=example"``.
+
+- ``--filter_action``: If set to ``skip``, filtered tests will be shown as skipped
+  in the output rather than showing no output.
+
+- ``--list_tests``: If set, lists all tests that will be run.
+
+- ``--list_tests_attr``: If set, lists all tests that will be run and all of their
+  attributes.
diff --git a/Documentation/dev-tools/kunit/running_tips.rst b/Documentation/dev-tools/kunit/running_tips.rst
index 8e8c493f17d1..766f9cdea0fa 100644
--- a/Documentation/dev-tools/kunit/running_tips.rst
+++ b/Documentation/dev-tools/kunit/running_tips.rst
@@ -262,3 +262,169 @@ other code executed during boot, e.g.
 	# Reset coverage counters before running the test.
 	$ echo 0 > /sys/kernel/debug/gcov/reset
 	$ modprobe kunit-example-test
+
+
+Test Attributes and Filtering
+=============================
+
+Test suites and cases can be marked with test attributes, such as speed of
+test. These attributes will later be printed in test output and can be used to
+filter test execution.
+
+Marking Test Attributes
+-----------------------
+
+Tests are marked with an attribute by including a ``kunit_attributes`` object
+in the test definition.
+
+Test cases can be marked using the ``KUNIT_CASE_ATTR(test_name, attributes)``
+macro to define the test case instead of ``KUNIT_CASE(test_name)``.
+
+.. code-block:: c
+
+	static const struct kunit_attributes example_attr = {
+		.speed = KUNIT_VERY_SLOW,
+	};
+
+	static struct kunit_case example_test_cases[] = {
+		KUNIT_CASE_ATTR(example_test, example_attr),
+	};
+
+.. note::
+	To mark a test case as slow, you can also use ``KUNIT_CASE_SLOW(test_name)``.
+	This is a helpful macro as the slow attribute is the most commonly used.
+
+Test suites can be marked with an attribute by setting the "attr" field in the
+suite definition.
+
+.. code-block:: c
+
+	static const struct kunit_attributes example_attr = {
+		.speed = KUNIT_VERY_SLOW,
+	};
+
+	static struct kunit_suite example_test_suite = {
+		...,
+		.attr = example_attr,
+	};
+
+.. note::
+	Not all attributes need to be set in a ``kunit_attributes`` object. Unset
+	attributes will remain uninitialized and act as though the attribute is set
+	to 0 or NULL. Thus, if an attribute is set to 0, it is treated as unset.
+	These unset attributes will not be reported and may act as a default value
+	for filtering purposes.
+
+Reporting Attributes
+--------------------
+
+When a user runs tests, attributes will be present in the raw kernel output (in
+KTAP format). Note that attributes will be hidden by default in kunit.py output
+for all passing tests but the raw kernel output can be accessed using the
+``--raw_output`` flag. This is an example of how test attributes for test cases
+will be formatted in kernel output:
+
+.. code-block:: none
+
+	# example_test.speed: slow
+	ok 1 example_test
+
+This is an example of how test attributes for test suites will be formatted in
+kernel output:
+
+.. code-block:: none
+
+	  KTAP version 2
+	  # Subtest: example_suite
+	  # module: kunit_example_test
+	  1..3
+	  ...
+	ok 1 example_suite
+
+Additionally, users can output a full attribute report of tests with their
+attributes, using the command line flag ``--list_tests_attr``:
+
+.. code-block:: bash
+
+	kunit.py run "example" --list_tests_attr
+
+.. note::
+	This report can be accessed when running KUnit manually by passing in the
+	module_param ``kunit.action=list_attr``.
+
+Filtering
+---------
+
+Users can filter tests using the ``--filter`` command line flag when running
+tests. As an example:
+
+.. code-block:: bash
+
+	kunit.py run --filter speed=slow
+
+
+You can also use the following operations on filters: "<", ">", "<=", ">=",
+"!=", and "=". Example:
+
+.. code-block:: bash
+
+	kunit.py run --filter "speed>slow"
+
+This example will run all tests with speeds faster than slow. Note that the
+characters < and > are often interpreted by the shell, so they may need to be
+quoted or escaped, as above.
+
+Additionally, you can use multiple filters at once. Simply separate filters
+using commas. Example:
+
+.. code-block:: bash
+
+	kunit.py run --filter "speed>slow, module=kunit_example_test"
+
+.. note::
+	You can use this filtering feature when running KUnit manually by passing
+	the filter as a module param: ``kunit.filter="speed>slow, speed<=normal"``.
+
+Filtered tests will not run or show up in the test output. You can use the
+``--filter_action=skip`` flag to skip filtered tests instead. These tests will be
+shown in the test output in the test but will not run. To use this feature when
+running KUnit manually, use the module param ``kunit.filter_action=skip``.
+
+Rules of Filtering Procedure
+----------------------------
+
+Since both suites and test cases can have attributes, there may be conflicts
+between attributes during filtering. The process of filtering follows these
+rules:
+
+- Filtering always operates at a per-test level.
+
+- If a test has an attribute set, then the test's value is filtered on.
+
+- Otherwise, the value falls back to the suite's value.
+
+- If neither are set, the attribute has a global "default" value, which is used.
+
+List of Current Attributes
+--------------------------
+
+``speed``
+
+This attribute indicates the speed of a test's execution (how slow or fast the
+test is).
+
+This attribute is saved as an enum with the following categories: "normal",
+"slow", or "very_slow". The assumed default speed for tests is "normal". This
+indicates that the test takes a relatively trivial amount of time (less than
+1 second), regardless of the machine it is running on. Any test slower than
+this could be marked as "slow" or "very_slow".
+
+The macro ``KUNIT_CASE_SLOW(test_name)`` can be easily used to set the speed
+of a test case to "slow".
+
+``module``
+
+This attribute indicates the name of the module associated with the test.
+
+This attribute is automatically saved as a string and is printed for each suite.
+Tests can also be filtered using this attribute.

From 905c50cd15c1256a869b0a98da5025db0be89b9e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 24 Jul 2023 19:13:15 +0300
Subject: [PATCH 0576/2047] gpio: ge: Add missing header

Add missing platform_device.h that used to be implied by of_device.h.

While at it, sort headers alphabetically for better maintenance.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Fixes: e91d0f05e66a ("gpio: Explicitly include correct DT includes")
Closes: https://lore.kernel.org/r/65b4ac1a-1128-6e2a-92c0-9bbcca4b760a@infradead.org
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-ge.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpio/gpio-ge.c b/drivers/gpio/gpio-ge.c
index 4eecbc862abc..ecadb81ce7f8 100644
--- a/drivers/gpio/gpio-ge.c
+++ b/drivers/gpio/gpio-ge.c
@@ -17,13 +17,14 @@
  * the I/O interrupt controllers mask to stop them propergating
  */
 
-#include <linux/kernel.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/module.h>
 #include <linux/gpio/driver.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
 
 #define GEF_GPIO_DIRECT		0x00
 #define GEF_GPIO_IN		0x04

From 94484a7935161795b3d8ff0168684821083e8ddf Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 24 Jul 2023 19:13:16 +0300
Subject: [PATCH 0577/2047] gpio: ge: Fix English spelling and grammar

Fix English spelling and grammar in the comments.
While at it, fix the MODULE_AUTHOR() email address format.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Closes: https://lore.kernel.org/r/65b4ac1a-1128-6e2a-92c0-9bbcca4b760a@infradead.org
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-ge.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpio/gpio-ge.c b/drivers/gpio/gpio-ge.c
index ecadb81ce7f8..f92b3c8a3a8a 100644
--- a/drivers/gpio/gpio-ge.c
+++ b/drivers/gpio/gpio-ge.c
@@ -10,11 +10,12 @@
  * kind, whether express or implied.
  */
 
-/* TODO
+/*
+ * TODO:
  *
- * Configuration of output modes (totem-pole/open-drain)
- * Interrupt configuration - interrupts are always generated the FPGA relies on
- * the I/O interrupt controllers mask to stop them propergating
+ * Configuration of output modes (totem-pole/open-drain).
+ * Interrupt configuration - interrupts are always generated, the FPGA relies
+ * on the I/O interrupt controllers mask to stop them from being propagated.
  */
 
 #include <linux/gpio/driver.h>
@@ -104,5 +105,5 @@ static struct platform_driver gef_gpio_driver = {
 module_platform_driver_probe(gef_gpio_driver, gef_gpio_probe);
 
 MODULE_DESCRIPTION("GE I/O FPGA GPIO driver");
-MODULE_AUTHOR("Martyn Welch <martyn.welch@ge.com");
+MODULE_AUTHOR("Martyn Welch <martyn.welch@ge.com>");
 MODULE_LICENSE("GPL");

From 0cf2b4f550fd6e2e7378a9f13a216a784ebcd7c0 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 24 Jul 2023 19:13:17 +0300
Subject: [PATCH 0578/2047] gpio: ge: Make driver OF-independent

There is nothing in the driver that requires OF APIs,
make the driver OF independent.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-ge.c | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/gpio/gpio-ge.c b/drivers/gpio/gpio-ge.c
index f92b3c8a3a8a..d019669048e6 100644
--- a/drivers/gpio/gpio-ge.c
+++ b/drivers/gpio/gpio-ge.c
@@ -21,10 +21,10 @@
 #include <linux/gpio/driver.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/slab.h>
 
 #define GEF_GPIO_DIRECT		0x00
@@ -54,6 +54,7 @@ MODULE_DEVICE_TABLE(of, gef_gpio_ids);
 
 static int __init gef_gpio_probe(struct platform_device *pdev)
 {
+	struct device *dev = &pdev->dev;
 	struct gpio_chip *gc;
 	void __iomem *regs;
 	int ret;
@@ -62,38 +63,30 @@ static int __init gef_gpio_probe(struct platform_device *pdev)
 	if (!gc)
 		return -ENOMEM;
 
-	regs = of_iomap(pdev->dev.of_node, 0);
-	if (!regs)
-		return -ENOMEM;
+	regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(regs))
+		return PTR_ERR(regs);
 
 	ret = bgpio_init(gc, &pdev->dev, 4, regs + GEF_GPIO_IN,
 			 regs + GEF_GPIO_OUT, NULL, NULL,
 			 regs + GEF_GPIO_DIRECT, BGPIOF_BIG_ENDIAN_BYTE_ORDER);
-	if (ret) {
-		dev_err(&pdev->dev, "bgpio_init failed\n");
-		goto err0;
-	}
+	if (ret)
+		return dev_err_probe(dev, ret, "bgpio_init failed\n");
 
 	/* Setup pointers to chip functions */
-	gc->label = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%pOF", pdev->dev.of_node);
-	if (!gc->label) {
-		ret = -ENOMEM;
-		goto err0;
-	}
+	gc->label = devm_kasprintf(dev, GFP_KERNEL, "%pfw", dev_fwnode(dev));
+	if (!gc->label)
+		return -ENOMEM;
 
 	gc->base = -1;
-	gc->ngpio = (u16)(uintptr_t)of_device_get_match_data(&pdev->dev);
+	gc->ngpio = (uintptr_t)device_get_match_data(dev);
 
 	/* This function adds a memory mapped GPIO chip */
 	ret = devm_gpiochip_add_data(&pdev->dev, gc, NULL);
 	if (ret)
-		goto err0;
+		return dev_err_probe(dev, ret, "GPIO chip registration failed\n");
 
 	return 0;
-err0:
-	iounmap(regs);
-	pr_err("%pOF: GPIO chip registration failed\n", pdev->dev.of_node);
-	return ret;
 };
 
 static struct platform_driver gef_gpio_driver = {

From 806693e6cb8cb96349e482670cf0c82fe6359c4a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 24 Jul 2023 19:13:18 +0300
Subject: [PATCH 0579/2047] gpio: ge: Utilise temporary variable for struct
 device

We have a temporary variable to keep pointer to struct device.
Utilise it inside the ->probe() implementation.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-ge.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpio/gpio-ge.c b/drivers/gpio/gpio-ge.c
index d019669048e6..268de5496fcb 100644
--- a/drivers/gpio/gpio-ge.c
+++ b/drivers/gpio/gpio-ge.c
@@ -59,7 +59,7 @@ static int __init gef_gpio_probe(struct platform_device *pdev)
 	void __iomem *regs;
 	int ret;
 
-	gc = devm_kzalloc(&pdev->dev, sizeof(*gc), GFP_KERNEL);
+	gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL);
 	if (!gc)
 		return -ENOMEM;
 
@@ -67,9 +67,9 @@ static int __init gef_gpio_probe(struct platform_device *pdev)
 	if (IS_ERR(regs))
 		return PTR_ERR(regs);
 
-	ret = bgpio_init(gc, &pdev->dev, 4, regs + GEF_GPIO_IN,
-			 regs + GEF_GPIO_OUT, NULL, NULL,
-			 regs + GEF_GPIO_DIRECT, BGPIOF_BIG_ENDIAN_BYTE_ORDER);
+	ret = bgpio_init(gc, dev, 4, regs + GEF_GPIO_IN, regs + GEF_GPIO_OUT,
+			 NULL, NULL, regs + GEF_GPIO_DIRECT,
+			 BGPIOF_BIG_ENDIAN_BYTE_ORDER);
 	if (ret)
 		return dev_err_probe(dev, ret, "bgpio_init failed\n");
 
@@ -82,7 +82,7 @@ static int __init gef_gpio_probe(struct platform_device *pdev)
 	gc->ngpio = (uintptr_t)device_get_match_data(dev);
 
 	/* This function adds a memory mapped GPIO chip */
-	ret = devm_gpiochip_add_data(&pdev->dev, gc, NULL);
+	ret = devm_gpiochip_add_data(dev, gc, NULL);
 	if (ret)
 		return dev_err_probe(dev, ret, "GPIO chip registration failed\n");
 

From a13f5e77a510af20800829190a282dfe78c83787 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 24 Jul 2023 19:13:19 +0300
Subject: [PATCH 0580/2047] gpio: ge: Replace GPLv2 boilerplate with SPDX

Replace the GPLv2 boilerplate text with a nice and short
SPDX header.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-ge.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpio/gpio-ge.c b/drivers/gpio/gpio-ge.c
index 268de5496fcb..5dc49648d8e3 100644
--- a/drivers/gpio/gpio-ge.c
+++ b/drivers/gpio/gpio-ge.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Driver for GE FPGA based GPIO
  *
  * Author: Martyn Welch <martyn.welch@ge.com>
  *
  * 2008 (c) GE Intelligent Platforms Embedded Systems, Inc.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2.  This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 /*

From 55b473538247550305abe0ae931d31249c6b1a47 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 24 Jul 2023 19:13:20 +0300
Subject: [PATCH 0581/2047] gpio: ge: Enable COMPILE_TEST for the driver

Driver is so simple, yet there was a room for mistakes.
Reduce their appearance in the future by enabling COMPILE_TEST
option.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 398d9ef7e680..8b41893bc5cd 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -283,7 +283,7 @@ config GPIO_EXAR
 
 config GPIO_GE_FPGA
 	bool "GE FPGA based GPIO"
-	depends on GE_FPGA
+	depends on GE_FPGA || COMPILE_TEST
 	select GPIO_GENERIC
 	help
 	  Support for common GPIO functionality provided on some GE Single Board

From 27d5a3cc2137f9e7bf03e4420ef01653d913d3b1 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Tue, 25 Jul 2023 18:23:30 +0800
Subject: [PATCH 0582/2047] dt-bindings: gpio: fsl-imx-gpio: support
 i.MX8QM/DXL

Add i.MX8QM/DXL gpio compatible which is compatible with i.MX35.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 Documentation/devicetree/bindings/gpio/fsl-imx-gpio.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.yaml b/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.yaml
index ae18603697d7..d0ca2af89f1e 100644
--- a/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.yaml
@@ -32,10 +32,12 @@ properties:
               - fsl,imx6sx-gpio
               - fsl,imx6ul-gpio
               - fsl,imx7d-gpio
+              - fsl,imx8dxl-gpio
               - fsl,imx8mm-gpio
               - fsl,imx8mn-gpio
               - fsl,imx8mp-gpio
               - fsl,imx8mq-gpio
+              - fsl,imx8qm-gpio
               - fsl,imx8qxp-gpio
               - fsl,imxrt1050-gpio
               - fsl,imxrt1170-gpio

From 92f7a35836c2e13ae5f0dc8c7f889e92f66a9d19 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Thu, 20 Jul 2023 14:49:43 -0400
Subject: [PATCH 0583/2047] gpio: 104-dio-48e: Add Counter/Timer support

The 104-DIO-48E features an 8254 Counter/Timer chip providing three
counter/timers which can be used for frequency measurement, frequency
output, pulse width modulation, pulse width measurement, event count,
etc. The counter/timers use the same addresses as PPI 0 (addresses 0x0
to 0x3), so a raw_spinlock_t is used to synchronize operations between
the two regmap mappings to prevent clobbering.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig            |   1 +
 drivers/gpio/gpio-104-dio-48e.c | 127 ++++++++++++++++++++++++++++----
 2 files changed, 112 insertions(+), 16 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 8b41893bc5cd..61a8d07e5a1c 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -858,6 +858,7 @@ config GPIO_104_DIO_48E
 	select REGMAP_IRQ
 	select GPIOLIB_IRQCHIP
 	select GPIO_I8255
+	select I8254
 	help
 	  Enables GPIO support for the ACCES 104-DIO-48E series (104-DIO-48E,
 	  104-DIO-24E). The base port addresses for the devices may be
diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index 8ff5f4ff5958..4df9becaf349 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -9,6 +9,7 @@
 #include <linux/bits.h>
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/i8254.h>
 #include <linux/ioport.h>
 #include <linux/irq.h>
 #include <linux/isa.h>
@@ -16,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/regmap.h>
+#include <linux/spinlock.h>
 #include <linux/types.h>
 
 #include "gpio-i8255.h"
@@ -37,6 +39,8 @@ MODULE_PARM_DESC(irq, "ACCES 104-DIO-48E interrupt line numbers");
 
 #define DIO48E_ENABLE_INTERRUPT 0xB
 #define DIO48E_DISABLE_INTERRUPT DIO48E_ENABLE_INTERRUPT
+#define DIO48E_ENABLE_COUNTER_TIMER_ADDRESSING 0xD
+#define DIO48E_DISABLE_COUNTER_TIMER_ADDRESSING DIO48E_ENABLE_COUNTER_TIMER_ADDRESSING
 #define DIO48E_CLEAR_INTERRUPT 0xF
 
 #define DIO48E_NUM_PPI 2
@@ -75,18 +79,20 @@ static const struct regmap_access_table dio48e_precious_table = {
 	.yes_ranges = dio48e_precious_ranges,
 	.n_yes_ranges = ARRAY_SIZE(dio48e_precious_ranges),
 };
-static const struct regmap_config dio48e_regmap_config = {
-	.reg_bits = 8,
-	.reg_stride = 1,
-	.val_bits = 8,
-	.io_port = true,
-	.max_register = 0xF,
-	.wr_table = &dio48e_wr_table,
-	.rd_table = &dio48e_rd_table,
-	.volatile_table = &dio48e_volatile_table,
-	.precious_table = &dio48e_precious_table,
-	.cache_type = REGCACHE_FLAT,
-	.use_raw_spinlock = true,
+
+static const struct regmap_range pit_wr_ranges[] = {
+	regmap_reg_range(0x0, 0x3),
+};
+static const struct regmap_range pit_rd_ranges[] = {
+	regmap_reg_range(0x0, 0x2),
+};
+static const struct regmap_access_table pit_wr_table = {
+	.yes_ranges = pit_wr_ranges,
+	.n_yes_ranges = ARRAY_SIZE(pit_wr_ranges),
+};
+static const struct regmap_access_table pit_rd_table = {
+	.yes_ranges = pit_rd_ranges,
+	.n_yes_ranges = ARRAY_SIZE(pit_rd_ranges),
 };
 
 /* only bit 3 on each respective Port C supports interrupts */
@@ -102,14 +108,56 @@ static const struct regmap_irq dio48e_regmap_irqs[] = {
 
 /**
  * struct dio48e_gpio - GPIO device private data structure
+ * @lock:	synchronization lock to prevent I/O race conditions
  * @map:	Regmap for the device
+ * @regs:	virtual mapping for device registers
+ * @flags:	IRQ flags saved during locking
  * @irq_mask:	Current IRQ mask state on the device
  */
 struct dio48e_gpio {
+	raw_spinlock_t lock;
 	struct regmap *map;
+	void __iomem *regs;
+	unsigned long flags;
 	unsigned int irq_mask;
 };
 
+static void dio48e_regmap_lock(void *lock_arg) __acquires(&dio48egpio->lock)
+{
+	struct dio48e_gpio *const dio48egpio = lock_arg;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&dio48egpio->lock, flags);
+	dio48egpio->flags = flags;
+}
+
+static void dio48e_regmap_unlock(void *lock_arg) __releases(&dio48egpio->lock)
+{
+	struct dio48e_gpio *const dio48egpio = lock_arg;
+
+	raw_spin_unlock_irqrestore(&dio48egpio->lock, dio48egpio->flags);
+}
+
+static void pit_regmap_lock(void *lock_arg) __acquires(&dio48egpio->lock)
+{
+	struct dio48e_gpio *const dio48egpio = lock_arg;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&dio48egpio->lock, flags);
+	dio48egpio->flags = flags;
+
+	iowrite8(0x00, dio48egpio->regs + DIO48E_ENABLE_COUNTER_TIMER_ADDRESSING);
+}
+
+static void pit_regmap_unlock(void *lock_arg) __releases(&dio48egpio->lock)
+{
+	struct dio48e_gpio *const dio48egpio = lock_arg;
+
+	ioread8(dio48egpio->regs + DIO48E_DISABLE_COUNTER_TIMER_ADDRESSING);
+
+	raw_spin_unlock_irqrestore(&dio48egpio->lock, dio48egpio->flags);
+}
+
 static int dio48e_handle_mask_sync(const int index,
 				   const unsigned int mask_buf_def,
 				   const unsigned int mask_buf,
@@ -176,6 +224,9 @@ static int dio48e_probe(struct device *dev, unsigned int id)
 	struct i8255_regmap_config config = {};
 	void __iomem *regs;
 	struct regmap *map;
+	struct regmap_config dio48e_regmap_config;
+	struct regmap_config pit_regmap_config;
+	struct i8254_regmap_config pit_config;
 	int err;
 	struct regmap_irq_chip *chip;
 	struct dio48e_gpio *dio48egpio;
@@ -187,21 +238,58 @@ static int dio48e_probe(struct device *dev, unsigned int id)
 		return -EBUSY;
 	}
 
+	dio48egpio = devm_kzalloc(dev, sizeof(*dio48egpio), GFP_KERNEL);
+	if (!dio48egpio)
+		return -ENOMEM;
+
 	regs = devm_ioport_map(dev, base[id], DIO48E_EXTENT);
 	if (!regs)
 		return -ENOMEM;
 
+	dio48egpio->regs = regs;
+
+	raw_spin_lock_init(&dio48egpio->lock);
+
+	dio48e_regmap_config = (struct regmap_config) {
+		.reg_bits = 8,
+		.reg_stride = 1,
+		.val_bits = 8,
+		.lock = dio48e_regmap_lock,
+		.unlock = dio48e_regmap_unlock,
+		.lock_arg = dio48egpio,
+		.io_port = true,
+		.wr_table = &dio48e_wr_table,
+		.rd_table = &dio48e_rd_table,
+		.volatile_table = &dio48e_volatile_table,
+		.precious_table = &dio48e_precious_table,
+		.cache_type = REGCACHE_FLAT,
+	};
+
 	map = devm_regmap_init_mmio(dev, regs, &dio48e_regmap_config);
 	if (IS_ERR(map))
 		return dev_err_probe(dev, PTR_ERR(map),
 				     "Unable to initialize register map\n");
 
-	dio48egpio = devm_kzalloc(dev, sizeof(*dio48egpio), GFP_KERNEL);
-	if (!dio48egpio)
-		return -ENOMEM;
-
 	dio48egpio->map = map;
 
+	pit_regmap_config = (struct regmap_config) {
+		.name = "i8254",
+		.reg_bits = 8,
+		.reg_stride = 1,
+		.val_bits = 8,
+		.lock = pit_regmap_lock,
+		.unlock = pit_regmap_unlock,
+		.lock_arg = dio48egpio,
+		.io_port = true,
+		.wr_table = &pit_wr_table,
+		.rd_table = &pit_rd_table,
+	};
+
+	pit_config.map = devm_regmap_init_mmio(dev, regs, &pit_regmap_config);
+	if (IS_ERR(pit_config.map))
+		return dev_err_probe(dev, PTR_ERR(pit_config.map),
+				     "Unable to initialize i8254 register map\n");
+
 	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
 	if (!chip)
 		return -ENOMEM;
@@ -225,6 +313,12 @@ static int dio48e_probe(struct device *dev, unsigned int id)
 	if (err)
 		return dev_err_probe(dev, err, "IRQ registration failed\n");
 
+	pit_config.parent = dev;
+
+	err = devm_i8254_regmap_register(dev, &pit_config);
+	if (err)
+		return err;
+
 	config.parent = dev;
 	config.map = map;
 	config.num_ppi = DIO48E_NUM_PPI;
@@ -245,3 +339,4 @@ module_isa_driver_with_irq(dio48e_driver, num_dio48e, num_irq);
 MODULE_AUTHOR("William Breathitt Gray <vilhelm.gray@gmail.com>");
 MODULE_DESCRIPTION("ACCES 104-DIO-48E GPIO driver");
 MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(I8254);

From 05e91e72113833385fb8c9a33bda9dbd93e27609 Mon Sep 17 00:00:00 2001
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Date: Wed, 7 Jun 2023 21:01:06 +0200
Subject: [PATCH 0584/2047] x86/microcode/AMD: Rip out static buffers

Load straight from the containers (initrd or builtin, for example).
There's no need to cache the patch per node.

This even simplifies the code a bit with the opportunity for more
cleanups later.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: John Allen <john.allen@amd.com>
Link: https://lore.kernel.org/r/20230720202813.3269888-1-john.allen@amd.com
---
 arch/x86/include/asm/microcode_amd.h |  6 +-
 arch/x86/kernel/cpu/microcode/amd.c  | 91 +++++++++-------------------
 arch/x86/kernel/cpu/microcode/core.c |  4 +-
 3 files changed, 31 insertions(+), 70 deletions(-)

diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index e6662adf3af4..a995b7685223 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -44,13 +44,11 @@ struct microcode_amd {
 #define PATCH_MAX_SIZE (3 * PAGE_SIZE)
 
 #ifdef CONFIG_MICROCODE_AMD
-extern void __init load_ucode_amd_bsp(unsigned int family);
-extern void load_ucode_amd_ap(unsigned int family);
+extern void load_ucode_amd_early(unsigned int cpuid_1_eax);
 extern int __init save_microcode_in_initrd_amd(unsigned int family);
 void reload_ucode_amd(unsigned int cpu);
 #else
-static inline void __init load_ucode_amd_bsp(unsigned int family) {}
-static inline void load_ucode_amd_ap(unsigned int family) {}
+static inline void load_ucode_amd_early(unsigned int cpuid_1_eax) {}
 static inline int __init
 save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
 static inline void reload_ucode_amd(unsigned int cpu) {}
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 87208e46f7ed..a28b103256ff 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -56,9 +56,6 @@ struct cont_desc {
 
 static u32 ucode_new_rev;
 
-/* One blob per node. */
-static u8 amd_ucode_patch[MAX_NUMNODES][PATCH_MAX_SIZE];
-
 /*
  * Microcode patch container file is prepended to the initrd in cpio
  * format. See Documentation/arch/x86/microcode.rst
@@ -415,20 +412,17 @@ static int __apply_microcode_amd(struct microcode_amd *mc)
  *
  * Returns true if container found (sets @desc), false otherwise.
  */
-static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size, bool save_patch)
+static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size)
 {
 	struct cont_desc desc = { 0 };
-	u8 (*patch)[PATCH_MAX_SIZE];
 	struct microcode_amd *mc;
 	u32 rev, dummy, *new_rev;
 	bool ret = false;
 
 #ifdef CONFIG_X86_32
 	new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
-	patch	= (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
 #else
 	new_rev = &ucode_new_rev;
-	patch	= &amd_ucode_patch[0];
 #endif
 
 	desc.cpuid_1_eax = cpuid_1_eax;
@@ -452,9 +446,6 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size, boo
 	if (!__apply_microcode_amd(mc)) {
 		*new_rev = mc->hdr.patch_id;
 		ret      = true;
-
-		if (save_patch)
-			memcpy(patch, mc, min_t(u32, desc.psize, PATCH_MAX_SIZE));
 	}
 
 	return ret;
@@ -507,7 +498,7 @@ static void find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpio_data
 	*ret = cp;
 }
 
-void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
+static void apply_ucode_from_containers(unsigned int cpuid_1_eax)
 {
 	struct cpio_data cp = { };
 
@@ -515,42 +506,12 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
 	if (!(cp.data && cp.size))
 		return;
 
-	early_apply_microcode(cpuid_1_eax, cp.data, cp.size, true);
+	early_apply_microcode(cpuid_1_eax, cp.data, cp.size);
 }
 
-void load_ucode_amd_ap(unsigned int cpuid_1_eax)
+void load_ucode_amd_early(unsigned int cpuid_1_eax)
 {
-	struct microcode_amd *mc;
-	struct cpio_data cp;
-	u32 *new_rev, rev, dummy;
-
-	if (IS_ENABLED(CONFIG_X86_32)) {
-		mc	= (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
-		new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
-	} else {
-		mc	= (struct microcode_amd *)amd_ucode_patch;
-		new_rev = &ucode_new_rev;
-	}
-
-	native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-
-	/*
-	 * Check whether a new patch has been saved already. Also, allow application of
-	 * the same revision in order to pick up SMT-thread-specific configuration even
-	 * if the sibling SMT thread already has an up-to-date revision.
-	 */
-	if (*new_rev && rev <= mc->hdr.patch_id) {
-		if (!__apply_microcode_amd(mc)) {
-			*new_rev = mc->hdr.patch_id;
-			return;
-		}
-	}
-
-	find_blobs_in_containers(cpuid_1_eax, &cp);
-	if (!(cp.data && cp.size))
-		return;
-
-	early_apply_microcode(cpuid_1_eax, cp.data, cp.size, false);
+	return apply_ucode_from_containers(cpuid_1_eax);
 }
 
 static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
@@ -578,23 +539,6 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 	return 0;
 }
 
-void reload_ucode_amd(unsigned int cpu)
-{
-	u32 rev, dummy __always_unused;
-	struct microcode_amd *mc;
-
-	mc = (struct microcode_amd *)amd_ucode_patch[cpu_to_node(cpu)];
-
-	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-
-	if (rev < mc->hdr.patch_id) {
-		if (!__apply_microcode_amd(mc)) {
-			ucode_new_rev = mc->hdr.patch_id;
-			pr_info("reload patch_level=0x%08x\n", ucode_new_rev);
-		}
-	}
-}
-
 /*
  * a small, trivial cache of per-family ucode patches
  */
@@ -655,6 +599,28 @@ static struct ucode_patch *find_patch(unsigned int cpu)
 	return cache_find_patch(equiv_id);
 }
 
+void reload_ucode_amd(unsigned int cpu)
+{
+	u32 rev, dummy __always_unused;
+	struct microcode_amd *mc;
+	struct ucode_patch *p;
+
+	p = find_patch(cpu);
+	if (!p)
+		return;
+
+	mc = p->data;
+
+	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+
+	if (rev < mc->hdr.patch_id) {
+		if (!__apply_microcode_amd(mc)) {
+			ucode_new_rev = mc->hdr.patch_id;
+			pr_info("reload patch_level=0x%08x\n", ucode_new_rev);
+		}
+	}
+}
+
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -875,9 +841,6 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz
 			continue;
 
 		ret = UCODE_NEW;
-
-		memset(&amd_ucode_patch[nid], 0, PATCH_MAX_SIZE);
-		memcpy(&amd_ucode_patch[nid], p->data, min_t(u32, p->size, PATCH_MAX_SIZE));
 	}
 
 	return ret;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 3afcf3de0dd4..192adf5f948e 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -172,7 +172,7 @@ void __init load_ucode_bsp(void)
 	if (intel)
 		load_ucode_intel_bsp();
 	else
-		load_ucode_amd_bsp(cpuid_1_eax);
+		load_ucode_amd_early(cpuid_1_eax);
 }
 
 static bool check_loader_disabled_ap(void)
@@ -200,7 +200,7 @@ void load_ucode_ap(void)
 		break;
 	case X86_VENDOR_AMD:
 		if (x86_family(cpuid_1_eax) >= 0x10)
-			load_ucode_amd_ap(cpuid_1_eax);
+			load_ucode_amd_early(cpuid_1_eax);
 		break;
 	default:
 		break;

From cce8365fc47b053ab1ea9dcc8cd8c8466e53fcde Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 6 Jul 2023 14:30:31 -0600
Subject: [PATCH 0585/2047] arm64: errata: Group all Cortex-A510 errata
 together

There are 2 sections of Cortex-A510 errata. As the ordering within
vendors is in order by CPU/IP name, move the 2nd section up to the 1st
section of A510 errata.

Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230706203030.276437-1-robh@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/arch/arm64/silicon-errata.rst | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index 496cdca5cb99..e3e4450c1c98 100644
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -63,6 +63,14 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A510     | #1902691        | ARM64_ERRATUM_1902691       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2051678        | ARM64_ERRATUM_2051678       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2077057        | ARM64_ERRATUM_2077057       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2441009        | ARM64_ERRATUM_2441009       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2658417        | ARM64_ERRATUM_2658417       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A53      | #826319         | ARM64_ERRATUM_826319        |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A53      | #827319         | ARM64_ERRATUM_827319        |
@@ -109,14 +117,6 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A77      | #1508412        | ARM64_ERRATUM_1508412       |
 +----------------+-----------------+-----------------+-----------------------------+
-| ARM            | Cortex-A510     | #2051678        | ARM64_ERRATUM_2051678       |
-+----------------+-----------------+-----------------+-----------------------------+
-| ARM            | Cortex-A510     | #2077057        | ARM64_ERRATUM_2077057       |
-+----------------+-----------------+-----------------+-----------------------------+
-| ARM            | Cortex-A510     | #2441009        | ARM64_ERRATUM_2441009       |
-+----------------+-----------------+-----------------+-----------------------------+
-| ARM            | Cortex-A510     | #2658417        | ARM64_ERRATUM_2658417       |
-+----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2119858        | ARM64_ERRATUM_2119858       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2054223        | ARM64_ERRATUM_2054223       |

From d0999555e306db8d4f973f7316baa5650495a01f Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Tue, 11 Jul 2023 14:34:58 +0530
Subject: [PATCH 0586/2047] arm64/mm: Replace an open coding with
 ID_AA64MMFR1_EL1_HAFDBS_MASK

Replace '0xf' with ID_AA64MMFR1_EL1_HAFDBS_MASK while evaluating if the cpu
supports implicit page table entry access flag update in HW.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20230711090458.238346-1-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/proc.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 2baeec419f62..14fdf645edc8 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -447,7 +447,7 @@ SYM_FUNC_START(__cpu_setup)
 	 * via capabilities.
 	 */
 	mrs	x9, ID_AA64MMFR1_EL1
-	and	x9, x9, #0xf
+	and	x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK
 	cbz	x9, 1f
 	orr	tcr, tcr, #TCR_HA		// hardware Access flag update
 1:

From 62ce7af97ba5096ecb60bc28d6a619e6eb77ebda Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Tue, 11 Jul 2023 14:50:55 +0530
Subject: [PATCH 0587/2047] arm64/mm: Directly use
 ID_AA64MMFR2_EL1_VARange_MASK

Tools generated register fields have in place mask macros which can be used
directly instead of shifting the older right end sided masks.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20230711092055.245756-1-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/head.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 757a0de07f91..7b236994f0e1 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -113,7 +113,7 @@ SYM_CODE_START(primary_entry)
 	 */
 #if VA_BITS > 48
 	mrs_s	x0, SYS_ID_AA64MMFR2_EL1
-	tst	x0, #0xf << ID_AA64MMFR2_EL1_VARange_SHIFT
+	tst	x0, ID_AA64MMFR2_EL1_VARange_MASK
 	mov	x0, #VA_BITS
 	mov	x25, #VA_BITS_MIN
 	csel	x25, x25, x0, eq
@@ -756,7 +756,7 @@ SYM_FUNC_START(__cpu_secondary_check52bitva)
 	b.ne	2f
 
 	mrs_s	x0, SYS_ID_AA64MMFR2_EL1
-	and	x0, x0, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
+	and	x0, x0, ID_AA64MMFR2_EL1_VARange_MASK
 	cbnz	x0, 2f
 
 	update_early_cpu_boot_status \

From 5f69ca4229c7d8e23f238174827ee7aa49b0bcb2 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 17 Jul 2023 19:55:05 +0200
Subject: [PATCH 0588/2047] arm64/ptrace: Clean up error handling path in
 sve_set_common()

All error handling paths go to 'out', except this one. Be consistent and
also branch to 'out' here.

Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/aa61301ed2dfd079b74b37f7fede5f179ac3087a.1689616473.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/ptrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index d7f4f0d1ae12..9bc23f1b499e 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -884,7 +884,8 @@ static int sve_set_common(struct task_struct *target,
 			break;
 		default:
 			WARN_ON_ONCE(1);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto out;
 		}
 
 		/*

From ce33cea5d833c24fe2564e3f0416c7f2720a9c9c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 18 Jul 2023 20:45:44 +0100
Subject: [PATCH 0589/2047] arm64/cpufeature: Use ARM64_CPUID_FIELD() to match
 EVT

The recently added Enhanced Virtualization Traps cpufeature does not use
the ARM64_CPUID_FIELDS() helper, convert it to do so. No functional
change.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Zenghui Yu <zenghui.yu@linux.dev>
Link: https://lore.kernel.org/r/20230718-arm64-evt-cpuid-helper-v1-1-68375d1e6b92@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpufeature.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index f9d456fe132d..bcb006390e55 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2708,12 +2708,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.desc = "Enhanced Virtualization Traps",
 		.capability = ARM64_HAS_EVT,
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
-		.sys_reg = SYS_ID_AA64MMFR2_EL1,
-		.sign = FTR_UNSIGNED,
-		.field_pos = ID_AA64MMFR2_EL1_EVT_SHIFT,
-		.field_width = 4,
-		.min_field_value = ID_AA64MMFR2_EL1_EVT_IMP,
 		.matches = has_cpuid_feature,
+		ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
 	},
 	{},
 };

From d0ba961217e0ef8608e751a46902259ce4ff52b7 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 13 Jul 2023 14:50:04 +0530
Subject: [PATCH 0590/2047] arm64/mm: Add pte_rdonly() helper

This replaces open coding PTE_RDONLY check with a new helper pte_rdonly().
No functional change is intended here.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20230713092004.693749-1-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/pgtable.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0bd18de9fd97..02b7abcf1f9f 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -103,6 +103,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
 #define pte_young(pte)		(!!(pte_val(pte) & PTE_AF))
 #define pte_special(pte)	(!!(pte_val(pte) & PTE_SPECIAL))
 #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
+#define pte_rdonly(pte)		(!!(pte_val(pte) & PTE_RDONLY))
 #define pte_user(pte)		(!!(pte_val(pte) & PTE_USER))
 #define pte_user_exec(pte)	(!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
@@ -120,7 +121,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
 	(__boundary - 1 < (end) - 1) ? __boundary : (end);			\
 })
 
-#define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
+#define pte_hw_dirty(pte)	(pte_write(pte) && !pte_rdonly(pte))
 #define pte_sw_dirty(pte)	(!!(pte_val(pte) & PTE_DIRTY))
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 

From 6477c3886ae101367aae2e808caf71f7b0fcc18e Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 13 Jul 2023 12:45:18 +0530
Subject: [PATCH 0591/2047] arm64/mm: Set only the PTE_DIRTY bit while
 preserving the HW dirty state

pte_mkdirty() creates dirty states both in SW and HW bits, which is really
not required, either in pte_wrprotect() or pte_modify() for preserving the
HW dirty state. Because pte_mkdirty() sets PTE_DIRTY and clears PTE_RDONLY
as pte_write() always evaluates to be true - otherwise pte_hw_dirty() will
not test out in the first place. Clearing PTE_RDONLY again is not required
here because the pte is already in pte_hw_dirty() but might soon loose its
dirty state thus requiring preservation in SW dirty bit i.e PTE_DIRTY.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20230713071518.628440-1-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/pgtable.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 02b7abcf1f9f..72c2e8431360 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -213,7 +213,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
 	 * clear), set the PTE_DIRTY bit.
 	 */
 	if (pte_hw_dirty(pte))
-		pte = pte_mkdirty(pte);
+		pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
 
 	pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
 	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
@@ -824,7 +824,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 			      PTE_ATTRINDX_MASK;
 	/* preserve the hardware dirty information */
 	if (pte_hw_dirty(pte))
-		pte = pte_mkdirty(pte);
+		pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
 	pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
 	return pte;
 }

From 42501f6d4d5d4e5b69fa083193c8247918e8c393 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 20 Jul 2023 22:35:55 +0800
Subject: [PATCH 0592/2047] arm64: Remove unsued extern declaration
 init_mem_pgprot()

commit a501e32430d4 ("arm64: Clean up the default pgprot setting")
left behind this.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20230720143555.26044-1-yuehaibing@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mmu.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 4384eaa0aeb7..94b68850cb9f 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -64,7 +64,6 @@ extern void arm64_memblock_init(void);
 extern void paging_init(void);
 extern void bootmem_init(void);
 extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
-extern void init_mem_pgprot(void);
 extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
 				   phys_addr_t size, pgprot_t prot);
 extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,

From 09da082b07bbae1c11d9560c8502800039aebcea Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Tue, 11 Jul 2023 18:16:04 +0200
Subject: [PATCH 0593/2047] fs: Add fchmodat2()

On the userspace side fchmodat(3) is implemented as a wrapper
function which implements the POSIX-specified interface. This
interface differs from the underlying kernel system call, which does not
have a flags argument. Most implementations require procfs [1][2].

There doesn't appear to be a good userspace workaround for this issue
but the implementation in the kernel is pretty straight-forward.

The new fchmodat2() syscall allows to pass the AT_SYMLINK_NOFOLLOW flag,
unlike existing fchmodat.

[1] https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/fchmodat.c;h=17eca54051ee28ba1ec3f9aed170a62630959143;hb=a492b1e5ef7ab50c6fdd4e4e9879ea5569ab0a6c#l35
[2] https://git.musl-libc.org/cgit/musl/tree/src/stat/fchmodat.c?id=718f363bc2067b6487900eddc9180c84e7739f80#n28

Co-developed-by: Palmer Dabbelt <palmer@sifive.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Message-Id: <f2a846ef495943c5d101011eebcf01179d0c7b61.1689092120.git.legion@kernel.org>
[brauner: pre reviews, do flag conversion in do_fchmodat() directly]
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/open.c                | 21 +++++++++++++++++----
 include/linux/syscalls.h |  2 ++
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index 0c55c8e7f837..e52d78e5a333 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -671,11 +671,18 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
 	return err;
 }
 
-static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
+static int do_fchmodat(int dfd, const char __user *filename, umode_t mode,
+		       unsigned int flags)
 {
 	struct path path;
 	int error;
-	unsigned int lookup_flags = LOOKUP_FOLLOW;
+	unsigned int lookup_flags;
+
+	if (unlikely(flags & ~AT_SYMLINK_NOFOLLOW))
+		return -EINVAL;
+
+	lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+
 retry:
 	error = user_path_at(dfd, filename, lookup_flags, &path);
 	if (!error) {
@@ -689,15 +696,21 @@ retry:
 	return error;
 }
 
+SYSCALL_DEFINE4(fchmodat2, int, dfd, const char __user *, filename,
+		umode_t, mode, unsigned int, flags)
+{
+	return do_fchmodat(dfd, filename, mode, flags);
+}
+
 SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
 		umode_t, mode)
 {
-	return do_fchmodat(dfd, filename, mode);
+	return do_fchmodat(dfd, filename, mode, 0);
 }
 
 SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
 {
-	return do_fchmodat(AT_FDCWD, filename, mode);
+	return do_fchmodat(AT_FDCWD, filename, mode, 0);
 }
 
 /*
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 584f404bf868..5690aef7b3a8 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -440,6 +440,8 @@ asmlinkage long sys_chroot(const char __user *filename);
 asmlinkage long sys_fchmod(unsigned int fd, umode_t mode);
 asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
 			     umode_t mode);
+asmlinkage long sys_fchmodat2(int dfd, const char __user *filename,
+			     umode_t mode, unsigned int flags);
 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 			     gid_t group, int flag);
 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);

From 78252deb023cf0879256fcfbafe37022c390762b Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@sifive.com>
Date: Tue, 11 Jul 2023 18:16:05 +0200
Subject: [PATCH 0594/2047] arch: Register fchmodat2, usually as syscall 452

This registers the new fchmodat2 syscall in most places as nuber 452,
with alpha being the exception where it's 562.  I found all these sites
by grepping for fspick, which I assume has found me everything.

Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Message-Id: <a677d521f048e4ca439e7080a5328f21eb8e960e.1689092120.git.legion@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 arch/alpha/kernel/syscalls/syscall.tbl      | 1 +
 arch/arm/tools/syscall.tbl                  | 1 +
 arch/arm64/include/asm/unistd.h             | 2 +-
 arch/arm64/include/asm/unistd32.h           | 2 ++
 arch/ia64/kernel/syscalls/syscall.tbl       | 1 +
 arch/m68k/kernel/syscalls/syscall.tbl       | 1 +
 arch/microblaze/kernel/syscalls/syscall.tbl | 1 +
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 1 +
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 1 +
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 1 +
 arch/parisc/kernel/syscalls/syscall.tbl     | 1 +
 arch/powerpc/kernel/syscalls/syscall.tbl    | 1 +
 arch/s390/kernel/syscalls/syscall.tbl       | 1 +
 arch/sh/kernel/syscalls/syscall.tbl         | 1 +
 arch/sparc/kernel/syscalls/syscall.tbl      | 1 +
 arch/x86/entry/syscalls/syscall_32.tbl      | 1 +
 arch/x86/entry/syscalls/syscall_64.tbl      | 1 +
 arch/xtensa/kernel/syscalls/syscall.tbl     | 1 +
 include/uapi/asm-generic/unistd.h           | 5 ++++-
 19 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 1f13995d00d7..ad37569d0507 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -491,3 +491,4 @@
 559	common  futex_waitv                     sys_futex_waitv
 560	common	set_mempolicy_home_node		sys_ni_syscall
 561	common	cachestat			sys_cachestat
+562	common	fchmodat2			sys_fchmodat2
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 8ebed8a13874..c572d6c3dee0 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -465,3 +465,4 @@
 449	common	futex_waitv			sys_futex_waitv
 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 64a514f90131..bd77253b62e0 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -39,7 +39,7 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		452
+#define __NR_compat_syscalls		453
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index d952a28463e0..78b68311ec81 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -909,6 +909,8 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
 #define __NR_cachestat 451
 __SYSCALL(__NR_cachestat, sys_cachestat)
+#define __NR_fchmodat2 452
+__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index f8c74ffeeefb..83d8609aec03 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -372,3 +372,4 @@
 449	common  futex_waitv                     sys_futex_waitv
 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 4f504783371f..259ceb125367 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -451,3 +451,4 @@
 449	common  futex_waitv                     sys_futex_waitv
 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 858d22bf275c..a3798c2637fd 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -457,3 +457,4 @@
 449	common  futex_waitv                     sys_futex_waitv
 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 1976317d4e8b..152034b8e0a0 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -390,3 +390,4 @@
 449	n32	futex_waitv			sys_futex_waitv
 450	n32	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	n32	cachestat			sys_cachestat
+452	n32	fchmodat2			sys_fchmodat2
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index cfda2511badf..cb5e757f6621 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -366,3 +366,4 @@
 449	n64	futex_waitv			sys_futex_waitv
 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	n64	cachestat			sys_cachestat
+452	n64	fchmodat2			sys_fchmodat2
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 7692234c3768..1a646813afdc 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -439,3 +439,4 @@
 449	o32	futex_waitv			sys_futex_waitv
 450	o32	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	o32	cachestat			sys_cachestat
+452	o32	fchmodat2			sys_fchmodat2
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index a0a9145b6dd4..e97c175b56f9 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -450,3 +450,4 @@
 449	common	futex_waitv			sys_futex_waitv
 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 8c0b08b7a80e..20e50586e8a2 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -538,3 +538,4 @@
 449	common  futex_waitv                     sys_futex_waitv
 450 	nospu	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index a6935af2235c..0122cc156952 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -454,3 +454,4 @@
 449  common	futex_waitv		sys_futex_waitv			sys_futex_waitv
 450  common	set_mempolicy_home_node	sys_set_mempolicy_home_node	sys_set_mempolicy_home_node
 451  common	cachestat		sys_cachestat			sys_cachestat
+452  common	fchmodat2		sys_fchmodat2			sys_fchmodat2
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 97377e8c5025..e90d585c4d3e 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -454,3 +454,4 @@
 449	common  futex_waitv                     sys_futex_waitv
 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index faa835f3c54a..4ed06c71c43f 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -497,3 +497,4 @@
 449	common  futex_waitv                     sys_futex_waitv
 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index bc0a3c941b35..2d0b1bd866ea 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -456,3 +456,4 @@
 449	i386	futex_waitv		sys_futex_waitv
 450	i386	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	i386	cachestat		sys_cachestat
+452	i386	fchmodat2		sys_fchmodat2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 227538b0ce80..814768249eae 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -373,6 +373,7 @@
 449	common	futex_waitv		sys_futex_waitv
 450	common	set_mempolicy_home_node	sys_set_mempolicy_home_node
 451	common	cachestat		sys_cachestat
+452	common	fchmodat2		sys_fchmodat2
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 2b69c3c035b6..fc1a4f3c81d9 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -422,3 +422,4 @@
 449	common  futex_waitv                     sys_futex_waitv
 450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
 451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index fd6c1cb585db..abe087c53b4b 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -820,8 +820,11 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
 #define __NR_cachestat 451
 __SYSCALL(__NR_cachestat, sys_cachestat)
 
+#define __NR_fchmodat2 452
+__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
+
 #undef __NR_syscalls
-#define __NR_syscalls 452
+#define __NR_syscalls 453
 
 /*
  * 32 bit systems traditionally used different

From 4859c257d295949c23f4074850a8c2ec31357abb Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Tue, 11 Jul 2023 18:16:07 +0200
Subject: [PATCH 0595/2047] selftests: Add fchmodat2 selftest

The test marks as skipped if a syscall with the AT_SYMLINK_NOFOLLOW flag
fails. This is because not all filesystems support changing the mode
bits of symlinks properly. These filesystems return an error but change
the mode bits:

newfstatat(4, "regfile", {st_mode=S_IFREG|0640, st_size=0, ...}, AT_SYMLINK_NOFOLLOW) = 0
newfstatat(4, "symlink", {st_mode=S_IFLNK|0777, st_size=7, ...}, AT_SYMLINK_NOFOLLOW) = 0
syscall_0x1c3(0x4, 0x55fa1f244396, 0x180, 0x100, 0x55fa1f24438e, 0x34) = -1 EOPNOTSUPP (Operation not supported)
newfstatat(4, "regfile", {st_mode=S_IFREG|0640, st_size=0, ...}, AT_SYMLINK_NOFOLLOW) = 0

This happens with btrfs and xfs:

 $ tools/testing/selftests/fchmodat2/fchmodat2_test
 TAP version 13
 1..1
 ok 1 # SKIP fchmodat2(symlink)
 # Totals: pass:0 fail:0 xfail:0 xpass:0 skip:1 error:0

 $ stat /tmp/ksft-fchmodat2.*/symlink
   File: /tmp/ksft-fchmodat2.3NCqlE/symlink -> regfile
   Size: 7               Blocks: 0          IO Block: 4096   symbolic link
 Device: 7,0     Inode: 133         Links: 1
 Access: (0600/lrw-------)  Uid: (    0/    root)   Gid: (    0/    root)

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Message-Id: <4532a04a870ff589ba62ceeacf76f0bd81b9ba01.1689092120.git.legion@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 tools/testing/selftests/Makefile              |   1 +
 tools/testing/selftests/fchmodat2/.gitignore  |   2 +
 tools/testing/selftests/fchmodat2/Makefile    |   6 +
 .../selftests/fchmodat2/fchmodat2_test.c      | 162 ++++++++++++++++++
 4 files changed, 171 insertions(+)
 create mode 100644 tools/testing/selftests/fchmodat2/.gitignore
 create mode 100644 tools/testing/selftests/fchmodat2/Makefile
 create mode 100644 tools/testing/selftests/fchmodat2/fchmodat2_test.c

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 666b56f22a41..8dca8acdb671 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -18,6 +18,7 @@ TARGETS += drivers/net/bonding
 TARGETS += drivers/net/team
 TARGETS += efivarfs
 TARGETS += exec
+TARGETS += fchmodat2
 TARGETS += filesystems
 TARGETS += filesystems/binderfs
 TARGETS += filesystems/epoll
diff --git a/tools/testing/selftests/fchmodat2/.gitignore b/tools/testing/selftests/fchmodat2/.gitignore
new file mode 100644
index 000000000000..82a4846cbc4b
--- /dev/null
+++ b/tools/testing/selftests/fchmodat2/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/*_test
diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile
new file mode 100644
index 000000000000..45b519eab851
--- /dev/null
+++ b/tools/testing/selftests/fchmodat2/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined
+TEST_GEN_PROGS := fchmodat2_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/fchmodat2/fchmodat2_test.c b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
new file mode 100644
index 000000000000..2d98eb215bc6
--- /dev/null
+++ b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#ifndef __NR_fchmodat2
+	#if defined __alpha__
+		#define __NR_fchmodat2 562
+	#elif defined _MIPS_SIM
+		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
+			#define __NR_fchmodat2 (452 + 4000)
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
+			#define __NR_fchmodat2 (452 + 6000)
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+			#define __NR_fchmodat2 (452 + 5000)
+		#endif
+	#elif defined __ia64__
+		#define __NR_fchmodat2 (452 + 1024)
+	#else
+		#define __NR_fchmodat2 452
+	#endif
+#endif
+
+int sys_fchmodat2(int dfd, const char *filename, mode_t mode, int flags)
+{
+	int ret = syscall(__NR_fchmodat2, dfd, filename, mode, flags);
+
+	return ret >= 0 ? ret : -errno;
+}
+
+int setup_testdir(void)
+{
+	int dfd, ret;
+	char dirname[] = "/tmp/ksft-fchmodat2.XXXXXX";
+
+	/* Make the top-level directory. */
+	if (!mkdtemp(dirname))
+		ksft_exit_fail_msg("%s: failed to create tmpdir\n", __func__);
+
+	dfd = open(dirname, O_PATH | O_DIRECTORY);
+	if (dfd < 0)
+		ksft_exit_fail_msg("%s: failed to open tmpdir\n", __func__);
+
+	ret = openat(dfd, "regfile", O_CREAT | O_WRONLY | O_TRUNC, 0644);
+	if (ret < 0)
+		ksft_exit_fail_msg("%s: failed to create file in tmpdir\n",
+				__func__);
+	close(ret);
+
+	ret = symlinkat("regfile", dfd, "symlink");
+	if (ret < 0)
+		ksft_exit_fail_msg("%s: failed to create symlink in tmpdir\n",
+				__func__);
+
+	return dfd;
+}
+
+int expect_mode(int dfd, const char *filename, mode_t expect_mode)
+{
+	struct stat st;
+	int ret = fstatat(dfd, filename, &st, AT_SYMLINK_NOFOLLOW);
+
+	if (ret)
+		ksft_exit_fail_msg("%s: %s: fstatat failed\n",
+				__func__, filename);
+
+	return (st.st_mode == expect_mode);
+}
+
+void test_regfile(void)
+{
+	int dfd, ret;
+
+	dfd = setup_testdir();
+
+	ret = sys_fchmodat2(dfd, "regfile", 0640, 0);
+
+	if (ret < 0)
+		ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__);
+
+	if (!expect_mode(dfd, "regfile", 0100640))
+		ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n",
+				__func__);
+
+	ret = sys_fchmodat2(dfd, "regfile", 0600, AT_SYMLINK_NOFOLLOW);
+
+	if (ret < 0)
+		ksft_exit_fail_msg("%s: fchmodat2(AT_SYMLINK_NOFOLLOW) failed\n",
+				__func__);
+
+	if (!expect_mode(dfd, "regfile", 0100600))
+		ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n",
+				__func__);
+
+	ksft_test_result_pass("fchmodat2(regfile)\n");
+}
+
+void test_symlink(void)
+{
+	int dfd, ret;
+
+	dfd = setup_testdir();
+
+	ret = sys_fchmodat2(dfd, "symlink", 0640, 0);
+
+	if (ret < 0)
+		ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__);
+
+	if (!expect_mode(dfd, "regfile", 0100640))
+		ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n",
+				__func__);
+
+	if (!expect_mode(dfd, "symlink", 0120777))
+		ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2\n",
+				__func__);
+
+	ret = sys_fchmodat2(dfd, "symlink", 0600, AT_SYMLINK_NOFOLLOW);
+
+	/*
+	 * On certain filesystems (xfs or btrfs), chmod operation fails. So we
+	 * first check the symlink target but if the operation fails we mark the
+	 * test as skipped.
+	 *
+	 * https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00467.html
+	 */
+	if (ret == 0 && !expect_mode(dfd, "symlink", 0120600))
+		ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2 with nofollow\n",
+				__func__);
+
+	if (!expect_mode(dfd, "regfile", 0100640))
+		ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n",
+				__func__);
+
+	if (ret != 0)
+		ksft_test_result_skip("fchmodat2(symlink)\n");
+	else
+		ksft_test_result_pass("fchmodat2(symlink)\n");
+}
+
+#define NUM_TESTS 2
+
+int main(int argc, char **argv)
+{
+	ksft_print_header();
+	ksft_set_plan(NUM_TESTS);
+
+	test_regfile();
+	test_symlink();
+
+	if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+		ksft_exit_fail();
+	else
+		ksft_exit_pass();
+}

From e810487385de409e826f249fcce13105b5513a65 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 24 Jul 2023 18:24:27 +0200
Subject: [PATCH 0596/2047] s390/diag: fix diagnose 8c description

The comment above diag8c() describes diagnose 210, not diagnose 8c.
Add a proper short description.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/diag.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index f287713baf6d..f9f06cd8fcee 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -223,7 +223,7 @@ int diag210(struct diag210 *addr)
 EXPORT_SYMBOL(diag210);
 
 /*
- * Diagnose 210: Get information about a virtual device
+ * Diagnose 8C: Access 3270 Display Device Information
  */
 int diag8c(struct diag8c *addr, struct ccw_dev_id *devno)
 {

From 7fb0ad1938ef129aa39d565bf2ad3728a39136ef Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 10 Jun 2023 16:25:28 +0200
Subject: [PATCH 0597/2047] s390/ebcdic: fix typo in comment

s/ECBDIC/EBCDIC/ 	(C and B are swapped)

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/08ed63331699177b3354458da66a2f63c0217e49.1686407113.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/ebcdic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c
index 7f8246c9be08..0e51fa537262 100644
--- a/arch/s390/kernel/ebcdic.c
+++ b/arch/s390/kernel/ebcdic.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *    ECBDIC -> ASCII, ASCII -> ECBDIC,
+ *    EBCDIC -> ASCII, ASCII -> EBCDIC,
  *    upper to lower case (EBCDIC) conversion tables.
  *
  *  S390 version

From 7b27d9ef0f63da736d3a585a5d5098cea62a3ad7 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Wed, 26 Jul 2023 08:18:34 +0200
Subject: [PATCH 0598/2047] s390/ftrace: use la instead of aghik in
 return_to_handler()

Nathan Chancellor reported the following build error when compiling the
kernel with CONFIG_MARCH_Z10=y:

  arch/s390/kernel/mcount.S: Assembler messages:
  arch/s390/kernel/mcount.S:140: Error: Unrecognized opcode: `aghik'

The aghik instruction is only available since z196. Use the la instruction
instead which is available for all machines.

Reported-by: Nathan Chancellor <nathan@kernel.org>
Closes: https://lore.kernel.org/all/20230725211105.GA224840@dev-arch.thelio-3990X
Fixes: 1256e70a082a ("s390/ftrace: enable HAVE_FUNCTION_GRAPH_RETVAL")
Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build
Link: https://lore.kernel.org/r/20230726061834.1300984-1-hca@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/mcount.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index d2596e0df6fa..71c5fa05e7f1 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -137,7 +137,7 @@ SYM_FUNC_START(return_to_handler)
 	lgr	%r1,%r15
 	aghi	%r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE)
 	stg	%r1,__SF_BACKCHAIN(%r15)
-	aghik	%r3,%r15,STACK_FRAME_OVERHEAD
+	la	%r3,STACK_FRAME_OVERHEAD(%r15)
 	stg	%r1,__FGRAPH_RET_FP(%r3)
 	stg	%r2,__FGRAPH_RET_GPR2(%r3)
 	lgr	%r2,%r3

From 8b46451c8bd63da543598ef8e0d67cb52281c6ef Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Wed, 26 Jul 2023 15:39:39 +0200
Subject: [PATCH 0599/2047] s390/defconfigs: set CONFIG_FUNCTION_GRAPH_RETVAL=y

Enable recording and printing function return values
for the function graph tracer.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/configs/debug_defconfig | 1 +
 arch/s390/configs/defconfig       | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index aa95cf6dfabb..b042ccb8a8a6 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -836,6 +836,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300
 # CONFIG_RCU_TRACE is not set
 CONFIG_LATENCYTOP=y
 CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
 CONFIG_FPROBE=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_STACK_TRACER=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index f041945f9148..0fa45c0d6bee 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -787,6 +787,7 @@ CONFIG_RCU_REF_SCALE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
 CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
 CONFIG_FPROBE=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_STACK_TRACER=y

From a96a7a7ddf9559eb20e608c0de30d1867d755a33 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Thu, 27 Jul 2023 01:36:19 +0800
Subject: [PATCH 0600/2047] arm64: vdso: remove two .altinstructions related
 symbols

The two symbols __alt_instructions and __alt_instructions_end are not
used, since the vDSO patching code looks for the '.altinstructions' ELF
section directly.

Remove the unused linker symbols.

Fixes: 4e3bca8f7cdd ("arm64: alternative: patch alternatives in the vDSO")
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Link: https://lore.kernel.org/r/20230726173619.3732-1-jszhang@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/vdso/vdso.lds.S | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index 6028f1fe2d1c..45354f2ddf70 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -50,9 +50,7 @@ SECTIONS
 
 	. = ALIGN(4);
 	.altinstructions : {
-		__alt_instructions = .;
 		*(.altinstructions)
-		__alt_instructions_end = .;
 	}
 
 	.dynamic	: { *(.dynamic) }		:text	:dynamic

From cbbc6fdd85be6cd748d6c6db23c1d5be6b04161e Mon Sep 17 00:00:00 2001
From: Jing Zhang <renyu.zj@linux.alibaba.com>
Date: Tue, 20 Jun 2023 15:12:33 +0800
Subject: [PATCH 0601/2047] driver/perf: Add identifier sysfs file for Yitian
 710 DDR

To allow userspace to identify the specific implementation of the device,
add an "identifier" sysfs file.

The perf tool can match the Yitian 710 DDR metric through the identifier.

Signed-off-by: Jing Zhang <renyu.zj@linux.alibaba.com>
Acked-by: Ian Rogers <irogers@google.com>
Reviewed-by: Shuai Xue <xueshuai@linux.alibaba.com>
Link: https://lore.kernel.org/r/1687245156-61215-2-git-send-email-renyu.zj@linux.alibaba.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/alibaba_uncore_drw_pmu.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c
index 5c5be9fc1b15..19d459a36be5 100644
--- a/drivers/perf/alibaba_uncore_drw_pmu.c
+++ b/drivers/perf/alibaba_uncore_drw_pmu.c
@@ -236,10 +236,37 @@ static const struct attribute_group ali_drw_pmu_cpumask_attr_group = {
 	.attrs = ali_drw_pmu_cpumask_attrs,
 };
 
+static ssize_t ali_drw_pmu_identifier_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	return sysfs_emit(page, "%s\n", "ali_drw_pmu");
+}
+
+static umode_t ali_drw_pmu_identifier_attr_visible(struct kobject *kobj,
+						struct attribute *attr, int n)
+{
+	return attr->mode;
+}
+
+static struct device_attribute ali_drw_pmu_identifier_attr =
+	__ATTR(identifier, 0444, ali_drw_pmu_identifier_show, NULL);
+
+static struct attribute *ali_drw_pmu_identifier_attrs[] = {
+	&ali_drw_pmu_identifier_attr.attr,
+	NULL
+};
+
+static const struct attribute_group ali_drw_pmu_identifier_attr_group = {
+	.attrs = ali_drw_pmu_identifier_attrs,
+	.is_visible = ali_drw_pmu_identifier_attr_visible
+};
+
 static const struct attribute_group *ali_drw_pmu_attr_groups[] = {
 	&ali_drw_pmu_events_attr_group,
 	&ali_drw_pmu_cpumask_attr_group,
 	&ali_drw_pmu_format_group,
+	&ali_drw_pmu_identifier_attr_group,
 	NULL,
 };
 

From c47ea342d85db6fde628dc55996d852d5244bda3 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 17:35:55 +0800
Subject: [PATCH 0602/2047] perf: xgene_pmu: Convert to
 devm_platform_ioremap_resource()

Use devm_platform_ioremap_resource() to simplify code.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230704093556.17926-1-frank.li@vivo.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/xgene_pmu.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 0c32dffc7ede..9972bfc11a5c 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -1833,7 +1833,6 @@ static int xgene_pmu_probe(struct platform_device *pdev)
 	const struct xgene_pmu_data *dev_data;
 	const struct of_device_id *of_id;
 	struct xgene_pmu *xgene_pmu;
-	struct resource *res;
 	int irq, rc;
 	int version;
 
@@ -1883,8 +1882,7 @@ static int xgene_pmu_probe(struct platform_device *pdev)
 	xgene_pmu->version = version;
 	dev_info(&pdev->dev, "X-Gene PMU version %d\n", xgene_pmu->version);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	xgene_pmu->pcppmu_csr = devm_ioremap_resource(&pdev->dev, res);
+	xgene_pmu->pcppmu_csr = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(xgene_pmu->pcppmu_csr)) {
 		dev_err(&pdev->dev, "ioremap failed for PCP PMU resource\n");
 		return PTR_ERR(xgene_pmu->pcppmu_csr);

From 7c3f204e544dfa376bf1b34ebaa5552304a2b7d9 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Thu, 6 Jul 2023 11:23:05 +0200
Subject: [PATCH 0603/2047] perf/smmuv3: Remove build dependency on ACPI

This driver supports working without ACPI since commit 3f7be43561766
("perf/smmuv3: Add devicetree support"), so remove the build dependency.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/20230706-smmuv3-pmu-noacpi-v1-1-7083ef189158@axis.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index f4572a5cca72..273d67ecf6d2 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -92,7 +92,7 @@ config ARM_PMU_ACPI
 
 config ARM_SMMU_V3_PMU
 	 tristate "ARM SMMUv3 Performance Monitors Extension"
-	 depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT)
+	 depends on ARM64 || (COMPILE_TEST && 64BIT)
 	 depends on GENERIC_MSI_IRQ
 	   help
 	   Provides support for the ARM SMMUv3 Performance Monitor Counter

From 039768b558537af23734c4cc6fd688ed575158c8 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 6 Jul 2023 14:55:03 -0600
Subject: [PATCH 0604/2047] dt-bindings: arm: pmu: Add Cortex A520, A715, A720,
 X3, and X4

Add compatible strings for the Arm Cortex-A520, Cortex-A715,
Cortex-A720, Cortex-X3, and Cortex-X4 CPU PMUs.

Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230706205505.308523-1-robh@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/devicetree/bindings/arm/pmu.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml
index e14358bf0b9c..99b5e9530707 100644
--- a/Documentation/devicetree/bindings/arm/pmu.yaml
+++ b/Documentation/devicetree/bindings/arm/pmu.yaml
@@ -49,9 +49,14 @@ properties:
           - arm,cortex-a77-pmu
           - arm,cortex-a78-pmu
           - arm,cortex-a510-pmu
+          - arm,cortex-a520-pmu
           - arm,cortex-a710-pmu
+          - arm,cortex-a715-pmu
+          - arm,cortex-a720-pmu
           - arm,cortex-x1-pmu
           - arm,cortex-x2-pmu
+          - arm,cortex-x3-pmu
+          - arm,cortex-x4-pmu
           - arm,neoverse-e1-pmu
           - arm,neoverse-n1-pmu
           - arm,neoverse-n2-pmu

From 989567fc0f3d5ba1c2a48cdb857a3965b7e1a276 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 6 Jul 2023 14:55:04 -0600
Subject: [PATCH 0605/2047] perf: pmuv3: Add Cortex A520, A715, A720, X3 and X4
 PMUs

Add support for the Arm Cortex-A520, Cortex-A715, Cortex-A720,
Cortex-X3, and Cortex-X4 CPU PMUs. They are straight-forward additions
with just new compatible strings.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230706205505.308523-2-robh@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmuv3.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 08b3a1bf0ef6..d2dffb4e9d07 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -1266,9 +1266,14 @@ PMUV3_INIT_SIMPLE(armv8_cortex_a76)
 PMUV3_INIT_SIMPLE(armv8_cortex_a77)
 PMUV3_INIT_SIMPLE(armv8_cortex_a78)
 PMUV3_INIT_SIMPLE(armv9_cortex_a510)
+PMUV3_INIT_SIMPLE(armv9_cortex_a520)
 PMUV3_INIT_SIMPLE(armv9_cortex_a710)
+PMUV3_INIT_SIMPLE(armv9_cortex_a715)
+PMUV3_INIT_SIMPLE(armv9_cortex_a720)
 PMUV3_INIT_SIMPLE(armv8_cortex_x1)
 PMUV3_INIT_SIMPLE(armv9_cortex_x2)
+PMUV3_INIT_SIMPLE(armv9_cortex_x3)
+PMUV3_INIT_SIMPLE(armv9_cortex_x4)
 PMUV3_INIT_SIMPLE(armv8_neoverse_e1)
 PMUV3_INIT_SIMPLE(armv8_neoverse_n1)
 PMUV3_INIT_SIMPLE(armv9_neoverse_n2)
@@ -1334,9 +1339,14 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
 	{.compatible = "arm,cortex-a77-pmu",	.data = armv8_cortex_a77_pmu_init},
 	{.compatible = "arm,cortex-a78-pmu",	.data = armv8_cortex_a78_pmu_init},
 	{.compatible = "arm,cortex-a510-pmu",	.data = armv9_cortex_a510_pmu_init},
+	{.compatible = "arm,cortex-a520-pmu",	.data = armv9_cortex_a520_pmu_init},
 	{.compatible = "arm,cortex-a710-pmu",	.data = armv9_cortex_a710_pmu_init},
+	{.compatible = "arm,cortex-a715-pmu",	.data = armv9_cortex_a715_pmu_init},
+	{.compatible = "arm,cortex-a720-pmu",	.data = armv9_cortex_a720_pmu_init},
 	{.compatible = "arm,cortex-x1-pmu",	.data = armv8_cortex_x1_pmu_init},
 	{.compatible = "arm,cortex-x2-pmu",	.data = armv9_cortex_x2_pmu_init},
+	{.compatible = "arm,cortex-x3-pmu",	.data = armv9_cortex_x3_pmu_init},
+	{.compatible = "arm,cortex-x4-pmu",	.data = armv9_cortex_x4_pmu_init},
 	{.compatible = "arm,neoverse-e1-pmu",	.data = armv8_neoverse_e1_pmu_init},
 	{.compatible = "arm,neoverse-n1-pmu",	.data = armv8_neoverse_n1_pmu_init},
 	{.compatible = "arm,neoverse-n2-pmu",	.data = armv9_neoverse_n2_pmu_init},

From 918dc87b746e0114eb64d6e478da7f370e266d5a Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 14 Jul 2023 11:48:31 -0600
Subject: [PATCH 0606/2047] drivers/perf: Explicitly include correct DT
 includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230714174832.4061752-1-robh@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/amlogic/meson_ddr_pmu_core.c | 2 --
 drivers/perf/arm-cci.c                    | 5 +----
 drivers/perf/arm_dsu_pmu.c                | 2 +-
 drivers/perf/arm_pmu_platform.c           | 1 -
 drivers/perf/arm_spe_pmu.c                | 3 +--
 drivers/perf/fsl_imx8_ddr_perf.c          | 3 +--
 drivers/perf/fsl_imx9_ddr_perf.c          | 4 +---
 drivers/perf/marvell_cn10k_ddr_pmu.c      | 3 +--
 drivers/perf/marvell_cn10k_tad_pmu.c      | 3 +--
 9 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
index 0b24dee1ed3c..bbc7285fd934 100644
--- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -9,8 +9,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 998259f1d973..61de861eaf91 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -7,10 +7,7 @@
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index fe2abb412c00..8223c49bd082 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -20,7 +20,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 933b96e243b8..3596db36cbff 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -16,7 +16,6 @@
 #include <linux/irqdesc.h>
 #include <linux/kconfig.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/percpu.h>
 #include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index b9ba4c4fe5a2..d2b0cbf0e0c4 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -25,8 +25,7 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/perf_event.h>
 #include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 5222ba1e79d0..1cb3861ab0e0 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -10,10 +10,9 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/perf_event.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 
 #define COUNTER_CNTL		0x0
diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c
index 71d5b07e3aff..5cf770a1bc31 100644
--- a/drivers/perf/fsl_imx9_ddr_perf.c
+++ b/drivers/perf/fsl_imx9_ddr_perf.c
@@ -7,9 +7,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/platform_device.h>
 #include <linux/perf_event.h>
 
 /* Performance monitor configuration */
diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c
index b94a5f6cc22b..524ba82bfce2 100644
--- a/drivers/perf/marvell_cn10k_ddr_pmu.c
+++ b/drivers/perf/marvell_cn10k_ddr_pmu.c
@@ -8,11 +8,10 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/perf_event.h>
 #include <linux/hrtimer.h>
 #include <linux/acpi.h>
+#include <linux/platform_device.h>
 
 /* Performance Counters Operating Mode Control Registers */
 #define DDRC_PERF_CNT_OP_MODE_CTRL	0x8020
diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index 3972197e2210..fec8e82edb95 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -6,10 +6,9 @@
 
 #define pr_fmt(fmt) "tad_pmu: " fmt
 
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/cpuhotplug.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>

From cfef69cbe3726c095f55769bd0e7c72f32bf5060 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 27 Jul 2023 10:54:34 +0200
Subject: [PATCH 0607/2047] regulator: dt-bindings: dlg,slg51000: Convert to DT
 schema

Convert the bindings for Dialog Semiconductor SLG51000 Voltage Regulator
to DT schema.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230727085434.16596-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/dlg,slg51000.yaml      | 132 ++++++++++++++++++
 .../bindings/regulator/slg51000.txt           |  88 ------------
 MAINTAINERS                                   |   2 +-
 3 files changed, 133 insertions(+), 89 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/regulator/dlg,slg51000.yaml
 delete mode 100644 Documentation/devicetree/bindings/regulator/slg51000.txt

diff --git a/Documentation/devicetree/bindings/regulator/dlg,slg51000.yaml b/Documentation/devicetree/bindings/regulator/dlg,slg51000.yaml
new file mode 100644
index 000000000000..bad140418e49
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/dlg,slg51000.yaml
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/dlg,slg51000.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Dialog Semiconductor SLG51000 Voltage Regulator
+
+maintainers:
+  - Eric Jeong <eric.jeong.opensource@diasemi.com>
+  - Support Opensource <support.opensource@diasemi.com>
+
+properties:
+  compatible:
+    const: dlg,slg51000
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  dlg,cs-gpios:
+    maxItems: 1
+    description:
+      GPIO for chip select
+
+  vin3-supply:
+    description:
+      Input supply for ldo3, required if regulator is enabled
+
+  vin4-supply:
+    description:
+      Input supply for ldo4, required if regulator is enabled
+
+  vin5-supply:
+    description:
+      Input supply for ldo5, required if regulator is enabled
+
+  vin6-supply:
+    description:
+      Input supply for ldo6, required if regulator is enabled
+
+  vin7-supply:
+    description:
+      Input supply for ldo7, required if regulator is enabled
+
+  regulators:
+    type: object
+    additionalProperties: false
+
+    patternProperties:
+      "^ldo[1-7]$":
+        type: object
+        $ref: /schemas/regulator/regulator.yaml#
+        unevaluatedProperties: false
+
+        properties:
+          enable-gpios:
+            maxItems: 1
+
+        required:
+          - regulator-name
+
+required:
+  - compatible
+  - reg
+  - regulators
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/regulator/dlg,da9121-regulator.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        pmic@75 {
+            compatible = "dlg,slg51000";
+            reg = <0x75>;
+            dlg,cs-gpios = <&tlmm 69 GPIO_ACTIVE_HIGH>;
+            vin5-supply = <&vreg_s1f_1p2>;
+            vin6-supply = <&vreg_s1f_1p2>;
+
+            regulators {
+                ldo1 {
+                    regulator-name = "slg51000_b_ldo1";
+                    regulator-min-microvolt = <2400000>;
+                    regulator-max-microvolt = <3300000>;
+                };
+
+                ldo2 {
+                    regulator-name = "slg51000_b_ldo2";
+                    regulator-min-microvolt = <2400000>;
+                    regulator-max-microvolt = <3300000>;
+                };
+
+                ldo3 {
+                    regulator-name = "slg51000_b_ldo3";
+                    regulator-min-microvolt = <1200000>;
+                    regulator-max-microvolt = <3750000>;
+                };
+
+                ldo4 {
+                    regulator-name = "slg51000_b_ldo4";
+                    regulator-min-microvolt = <1200000>;
+                    regulator-max-microvolt = <3750000>;
+                };
+
+                ldo5 {
+                    regulator-name = "slg51000_b_ldo5";
+                    regulator-min-microvolt = <500000>;
+                    regulator-max-microvolt = <1200000>;
+                };
+
+                ldo6 {
+                    regulator-name = "slg51000_b_ldo6";
+                    regulator-min-microvolt = <500000>;
+                    regulator-max-microvolt = <1200000>;
+                };
+
+                ldo7 {
+                    regulator-name = "slg51000_b_ldo7";
+                    regulator-min-microvolt = <1200000>;
+                    regulator-max-microvolt = <3750000>;
+                };
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/regulator/slg51000.txt b/Documentation/devicetree/bindings/regulator/slg51000.txt
deleted file mode 100644
index aa0733e49b90..000000000000
--- a/Documentation/devicetree/bindings/regulator/slg51000.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-* Dialog Semiconductor SLG51000 Voltage Regulator
-
-Required properties:
-- compatible : Should be "dlg,slg51000" for SLG51000
-- reg : Specifies the I2C slave address.
-- xxx-supply: Input voltage supply regulator for ldo3 to ldo7.
-  These entries are required if regulators are enabled for a device.
-  An absence of these properties can cause the regulator registration to fail.
-  If some of input supply is powered through battery or always-on supply then
-  also it is required to have these parameters with proper node handle of always
-  on power supply.
-    vin3-supply: Input supply for ldo3
-    vin4-supply: Input supply for ldo4
-    vin5-supply: Input supply for ldo5
-    vin6-supply: Input supply for ldo6
-    vin7-supply: Input supply for ldo7
-
-Optional properties:
-- interrupt-parent : Specifies the reference to the interrupt controller.
-- interrupts : IRQ line information.
-- dlg,cs-gpios : Specify a valid GPIO for chip select
-
-Sub-nodes:
-- regulators : This node defines the settings for the regulators.
-  The content of the sub-node is defined by the standard binding
-  for regulators; see regulator.txt.
-
-  The SLG51000 regulators are bound using their names listed below:
-    ldo1
-    ldo2
-    ldo3
-    ldo4
-    ldo5
-    ldo6
-    ldo7
-
-Optional properties for regulators:
-- enable-gpios : Specify a valid GPIO for platform control of the regulator.
-
-Example:
-	pmic: slg51000@75 {
-		compatible = "dlg,slg51000";
-		reg = <0x75>;
-
-		regulators {
-			ldo1 {
-			        regulator-name = "ldo1";
-			        regulator-min-microvolt = <2400000>;
-			        regulator-max-microvolt = <3300000>;
-			};
-
-			ldo2 {
-			        regulator-name = "ldo2";
-			        regulator-min-microvolt = <2400000>;
-			        regulator-max-microvolt = <3300000>;
-			};
-
-			ldo3 {
-			        regulator-name = "ldo3";
-			        regulator-min-microvolt = <1200000>;
-			        regulator-max-microvolt = <3750000>;
-			};
-
-			ldo4 {
-			        regulator-name = "ldo4";
-			        regulator-min-microvolt = <1200000>;
-			        regulator-max-microvolt = <3750000>;
-			};
-
-			ldo5 {
-			        regulator-name = "ldo5";
-			        regulator-min-microvolt = <500000>;
-			        regulator-max-microvolt = <1200000>;
-			};
-
-			ldo6 {
-			        regulator-name = "ldo6";
-			        regulator-min-microvolt = <500000>;
-			        regulator-max-microvolt = <1200000>;
-			};
-
-			ldo7 {
-			        regulator-name = "ldo7";
-			        regulator-min-microvolt = <1200000>;
-			        regulator-max-microvolt = <3750000>;
-			};
-		};
-	};
diff --git a/MAINTAINERS b/MAINTAINERS
index d516295978a4..7ff9b94ac3ba 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6010,7 +6010,7 @@ F:	Documentation/devicetree/bindings/mfd/da90*.txt
 F:	Documentation/devicetree/bindings/mfd/dlg,da90*.yaml
 F:	Documentation/devicetree/bindings/regulator/da92*.txt
 F:	Documentation/devicetree/bindings/regulator/dlg,da9*.yaml
-F:	Documentation/devicetree/bindings/regulator/slg51000.txt
+F:	Documentation/devicetree/bindings/regulator/dlg,slg51000.yaml
 F:	Documentation/devicetree/bindings/sound/da[79]*.txt
 F:	Documentation/devicetree/bindings/thermal/da90??-thermal.txt
 F:	Documentation/devicetree/bindings/watchdog/da90??-wdt.txt

From b505e2ecf31b6a2a62c76203558384d7646ab02f Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Thu, 27 Jul 2023 11:05:58 +0000
Subject: [PATCH 0608/2047] spi: stm32: Remove redundant dev_err_probe()

There is no need to call the dev_err_probe() function directly to print
a custom message when handling an error from platform_get_irq() function as
it is going to display an appropriate error message in case of a failure.

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Link: https://lore.kernel.org/r/20230727110558.2904084-1-ruanjinjie@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-stm32.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 8cf7b41481b2..d16ee6e54de9 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -1836,8 +1836,7 @@ static int stm32_spi_probe(struct platform_device *pdev)
 
 	spi->irq = platform_get_irq(pdev, 0);
 	if (spi->irq <= 0)
-		return dev_err_probe(&pdev->dev, spi->irq,
-				     "failed to get irq\n");
+		return spi->irq;
 
 	ret = devm_request_threaded_irq(&pdev->dev, spi->irq,
 					spi->cfg->irq_handler_event,

From 630fdd592912614a72d00026fdadad72d9ef62eb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 26 Jul 2023 14:59:57 -0700
Subject: [PATCH 0609/2047] seq_file: seq_show_option_n() is used for precise
 sizes

When seq_show_option_n() is used, it is for non-string memory that
happens to be printable bytes. As such, we must use memcpy() to copy the
bytes and then explicitly NUL-terminate the result.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Muchun Song <muchun.song@linux.dev>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230726215957.never.619-kees@kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/seq_file.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index bd023dd38ae6..386ab580b839 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -249,18 +249,19 @@ static inline void seq_show_option(struct seq_file *m, const char *name,
 
 /**
  * seq_show_option_n - display mount options with appropriate escapes
- *		       where @value must be a specific length.
+ *		       where @value must be a specific length (i.e.
+ *		       not NUL-terminated).
  * @m: the seq_file handle
  * @name: the mount option name
  * @value: the mount option name's value, cannot be NULL
- * @length: the length of @value to display
+ * @length: the exact length of @value to display, must be constant expression
  *
  * This is a macro since this uses "length" to define the size of the
  * stack buffer.
  */
 #define seq_show_option_n(m, name, value, length) {	\
 	char val_buf[length + 1];			\
-	strncpy(val_buf, value, length);		\
+	memcpy(val_buf, value, length);			\
 	val_buf[length] = '\0';				\
 	seq_show_option(m, name, val_buf);		\
 }

From 61ce78f29a694772c3b2c5c749589682dbdfec2d Mon Sep 17 00:00:00 2001
From: Azeem Shaikh <azeemshaikh38@gmail.com>
Date: Mon, 3 Jul 2023 16:06:41 +0000
Subject: [PATCH 0610/2047] um: Remove strlcpy declaration

strlcpy() reads the entire source buffer first.
This read may exceed the destination size limit.
This is both inefficient and can lead to linear read
overflows if a source string is not NUL-terminated [1].
In an effort to remove strlcpy() completely [2], replace
strlcpy() here with strscpy().
No return values were used, so direct replacement is safe.

[1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy
[2] https://github.com/KSPP/linux/issues/89

Signed-off-by: Azeem Shaikh <azeemshaikh38@gmail.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230703160641.1790935-1-azeemshaikh38@gmail.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/um/include/shared/user.h | 1 -
 arch/um/os-Linux/umid.c       | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h
index 0347a190429c..981e11d8e025 100644
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -50,7 +50,6 @@ static inline int printk(const char *fmt, ...)
 #endif
 
 extern int in_aton(char *str);
-extern size_t strlcpy(char *, const char *, size_t);
 extern size_t strlcat(char *, const char *, size_t);
 extern size_t strscpy(char *, const char *, size_t);
 
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index 7a1abb829930..288c422bfa96 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -40,7 +40,7 @@ static int __init make_uml_dir(void)
 				__func__);
 			goto err;
 		}
-		strlcpy(dir, home, sizeof(dir));
+		strscpy(dir, home, sizeof(dir));
 		uml_dir++;
 	}
 	strlcat(dir, uml_dir, sizeof(dir));
@@ -243,7 +243,7 @@ int __init set_umid(char *name)
 	if (strlen(name) > UMID_LEN - 1)
 		return -E2BIG;
 
-	strlcpy(umid, name, sizeof(umid));
+	strscpy(umid, name, sizeof(umid));
 
 	return 0;
 }
@@ -262,7 +262,7 @@ static int __init make_umid(void)
 	make_uml_dir();
 
 	if (*umid == '\0') {
-		strlcpy(tmp, uml_dir, sizeof(tmp));
+		strscpy(tmp, uml_dir, sizeof(tmp));
 		strlcat(tmp, "XXXXXX", sizeof(tmp));
 		fd = mkstemp(tmp);
 		if (fd < 0) {

From c9732f1461f947429f8ee9289792c5ffea793350 Mon Sep 17 00:00:00 2001
From: Azeem Shaikh <azeemshaikh38@gmail.com>
Date: Mon, 3 Jul 2023 16:58:16 +0000
Subject: [PATCH 0611/2047] perf: Replace strlcpy with strscpy

strlcpy() reads the entire source buffer first.
This read may exceed the destination size limit.
This is both inefficient and can lead to linear read
overflows if a source string is not NUL-terminated [1].
In an effort to remove strlcpy() completely [2], replace
strlcpy() here with strscpy().
No return values were used, so direct replacement is safe.

[1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy
[2] https://github.com/KSPP/linux/issues/89

Signed-off-by: Azeem Shaikh <azeemshaikh38@gmail.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230703165817.2840457-1-azeemshaikh38@gmail.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/events/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 78ae7b6f90fd..2554f5fc70dc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8249,7 +8249,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 	unsigned int size;
 
 	memset(comm, 0, sizeof(comm));
-	strlcpy(comm, comm_event->task->comm, sizeof(comm));
+	strscpy(comm, comm_event->task->comm, sizeof(comm));
 	size = ALIGN(strlen(comm)+1, sizeof(u64));
 
 	comm_event->comm = comm;
@@ -8704,7 +8704,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 	}
 
 cpy_name:
-	strlcpy(tmp, name, sizeof(tmp));
+	strscpy(tmp, name, sizeof(tmp));
 	name = tmp;
 got_name:
 	/*
@@ -9128,7 +9128,7 @@ void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
 	    ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
 		goto err;
 
-	strlcpy(name, sym, KSYM_NAME_LEN);
+	strscpy(name, sym, KSYM_NAME_LEN);
 	name_len = strlen(name) + 1;
 	while (!IS_ALIGNED(name_len, sizeof(u64)))
 		name[name_len++] = '\0';

From 0c7c237b1c35011ef0b8d30c1d5c20bc6ae7b69b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 20 Jul 2023 19:38:59 +0100
Subject: [PATCH 0612/2047] kselftest/arm64: Add a test case for SVE VL changes
 with SME active

We just fixed an issue where changing the SVE VL while SME was active could
result in us attempting to save the streaming mode SVE vectors without any
backing storage. Add a test case which provokes that issue, ideally we
should also verify that the contents of ZA are unaffected by any of what we
did.

Note that since we need to keep streaming mode enabled we can't use any
syscalls to trigger the issue, we have to sit in a loop in usersapce and
hope to be preempted. The chosen numbers trigger with defconfig on all the
virtual platforms for me, this won't be 100% on all systems but avoid an
overcomplicated test implementation.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230720-arm64-fix-sve-sme-vl-change-v2-2-8eea06b82d57@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/fp/vec-syscfg.c | 105 +++++++++++++++++-
 1 file changed, 102 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
index 9bcfcdc34ee9..58ea4bde5be7 100644
--- a/tools/testing/selftests/arm64/fp/vec-syscfg.c
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -6,6 +6,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -39,9 +40,11 @@ struct vec_data {
 	int max_vl;
 };
 
+#define VEC_SVE 0
+#define VEC_SME 1
 
 static struct vec_data vec_data[] = {
-	{
+	[VEC_SVE] = {
 		.name = "SVE",
 		.hwcap_type = AT_HWCAP,
 		.hwcap = HWCAP_SVE,
@@ -51,7 +54,7 @@ static struct vec_data vec_data[] = {
 		.prctl_set = PR_SVE_SET_VL,
 		.default_vl_file = "/proc/sys/abi/sve_default_vector_length",
 	},
-	{
+	[VEC_SME] = {
 		.name = "SME",
 		.hwcap_type = AT_HWCAP2,
 		.hwcap = HWCAP2_SME,
@@ -644,18 +647,107 @@ static const test_type tests[] = {
 	prctl_set_all_vqs,
 };
 
+static inline void smstart(void)
+{
+	asm volatile("msr S0_3_C4_C7_3, xzr");
+}
+
+static inline void smstart_sm(void)
+{
+	asm volatile("msr S0_3_C4_C3_3, xzr");
+}
+
+static inline void smstop(void)
+{
+	asm volatile("msr S0_3_C4_C6_3, xzr");
+}
+
+
+/*
+ * Verify we can change the SVE vector length while SME is active and
+ * continue to use SME afterwards.
+ */
+static void change_sve_with_za(void)
+{
+	struct vec_data *sve_data = &vec_data[VEC_SVE];
+	bool pass = true;
+	int ret, i;
+
+	if (sve_data->min_vl == sve_data->max_vl) {
+		ksft_print_msg("Only one SVE VL supported, can't change\n");
+		ksft_test_result_skip("change_sve_while_sme\n");
+		return;
+	}
+
+	/* Ensure we will trigger a change when we set the maximum */
+	ret = prctl(sve_data->prctl_set, sve_data->min_vl);
+	if (ret != sve_data->min_vl) {
+		ksft_print_msg("Failed to set SVE VL %d: %d\n",
+			       sve_data->min_vl, ret);
+		pass = false;
+	}
+
+	/* Enable SM and ZA */
+	smstart();
+
+	/* Trigger another VL change */
+	ret = prctl(sve_data->prctl_set, sve_data->max_vl);
+	if (ret != sve_data->max_vl) {
+		ksft_print_msg("Failed to set SVE VL %d: %d\n",
+			       sve_data->max_vl, ret);
+		pass = false;
+	}
+
+	/*
+	 * Spin for a bit with SM enabled to try to trigger another
+	 * save/restore.  We can't use syscalls without exiting
+	 * streaming mode.
+	 */
+	for (i = 0; i < 100000000; i++)
+		smstart_sm();
+
+	/*
+	 * TODO: Verify that ZA was preserved over the VL change and
+	 * spin.
+	 */
+
+	/* Clean up after ourselves */
+	smstop();
+	ret = prctl(sve_data->prctl_set, sve_data->default_vl);
+	if (ret != sve_data->default_vl) {
+	        ksft_print_msg("Failed to restore SVE VL %d: %d\n",
+			       sve_data->default_vl, ret);
+		pass = false;
+	}
+
+	ksft_test_result(pass, "change_sve_with_za\n");
+}
+
+typedef void (*test_all_type)(void);
+
+static const struct {
+	const char *name;
+	test_all_type test;
+}  all_types_tests[] = {
+	{ "change_sve_with_za", change_sve_with_za },
+};
+
 int main(void)
 {
+	bool all_supported = true;
 	int i, j;
 
 	ksft_print_header();
-	ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data));
+	ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data) +
+		      ARRAY_SIZE(all_types_tests));
 
 	for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
 		struct vec_data *data = &vec_data[i];
 		unsigned long supported;
 
 		supported = getauxval(data->hwcap_type) & data->hwcap;
+		if (!supported)
+			all_supported = false;
 
 		for (j = 0; j < ARRAY_SIZE(tests); j++) {
 			if (supported)
@@ -666,5 +758,12 @@ int main(void)
 		}
 	}
 
+	for (i = 0; i < ARRAY_SIZE(all_types_tests); i++) {
+		if (all_supported)
+			all_types_tests[i].test();
+		else
+			ksft_test_result_skip("%s\n", all_types_tests[i].name);
+	}
+
 	ksft_exit_pass();
 }

From 0aeead9bb240a6965ede2bed68096c4381fd46a3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 20 Jul 2023 19:39:00 +0100
Subject: [PATCH 0613/2047] kselftest/arm64: Validate that changing one VL type
 does not affect another

On a system with both SVE and SME when we change one of the VLs this should
not result in a change in the other VL. Add a check that this is in fact
the case to vec-syscfg.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230720-arm64-fix-sve-sme-vl-change-v2-3-8eea06b82d57@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/fp/vec-syscfg.c | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
index 58ea4bde5be7..5f648b97a06f 100644
--- a/tools/testing/selftests/arm64/fp/vec-syscfg.c
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -554,7 +554,8 @@ static void prctl_set_onexec(struct vec_data *data)
 /* For each VQ verify that setting via prctl() does the right thing */
 static void prctl_set_all_vqs(struct vec_data *data)
 {
-	int ret, vq, vl, new_vl;
+	int ret, vq, vl, new_vl, i;
+	int orig_vls[ARRAY_SIZE(vec_data)];
 	int errors = 0;
 
 	if (!data->min_vl || !data->max_vl) {
@@ -563,6 +564,9 @@ static void prctl_set_all_vqs(struct vec_data *data)
 		return;
 	}
 
+	for (i = 0; i < ARRAY_SIZE(vec_data); i++)
+		orig_vls[i] = vec_data[i].rdvl();
+
 	for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
 		vl = sve_vl_from_vq(vq);
 
@@ -585,6 +589,22 @@ static void prctl_set_all_vqs(struct vec_data *data)
 			errors++;
 		}
 
+		/* Did any other VLs change? */
+		for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
+			if (&vec_data[i] == data)
+				continue;
+
+			if (!(getauxval(vec_data[i].hwcap_type) & vec_data[i].hwcap))
+				continue;
+
+			if (vec_data[i].rdvl() != orig_vls[i]) {
+				ksft_print_msg("%s VL changed from %d to %d\n",
+					       vec_data[i].name, orig_vls[i],
+					       vec_data[i].rdvl());
+				errors++;
+			}
+		}
+
 		/* Was that the VL we asked for? */
 		if (new_vl == vl)
 			continue;

From c3651feff2969b13c45f87444befcb96a0137b8d Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Mon, 24 Jul 2023 11:45:39 +0100
Subject: [PATCH 0614/2047] Documentation: arm64: Correct SME ZA macros name

It should be ZA_PT_ZA*. ZA_PT_ZA_OFFSET is one example.

It is not ZA_PT_ZA_* because there is one macro ZA_PT_ZAV_OFFSET
that doesn't fit that pattern.

Fixes: 96d32e630935 ("arm64/sme: Provide ABI documentation for SME")
Signed-off-by: David Spickett <david.spickett@linaro.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/arch/arm64/sme.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/arch/arm64/sme.rst b/Documentation/arch/arm64/sme.rst
index ba529a1dc606..3d0e53ecac4f 100644
--- a/Documentation/arch/arm64/sme.rst
+++ b/Documentation/arch/arm64/sme.rst
@@ -322,7 +322,7 @@ The regset data starts with struct user_za_header, containing:
   VL is supported.
 
 * The size and layout of the payload depends on the header fields.  The
-  SME_PT_ZA_*() macros are provided to facilitate access to the data.
+  ZA_PT_ZA*() macros are provided to facilitate access to the data.
 
 * In either case, for SETREGSET it is permissible to omit the payload, in which
   case the vector length and flags are changed and PSTATE.ZA is set to 0

From 64a0b90a3c1ed0aa15e75f8c688f8e55f1f12999 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Thu, 27 Jul 2023 10:03:24 +0800
Subject: [PATCH 0615/2047] arm64/Kconfig: Sort the RCpc feature under the
 ARMv8.3 features menu

Moving LDAPR detective config under the ARMv8.3 menu would be more
reasonable than under ARMv8.1, since this feature was released together
with the ARMv8.3 features list.

Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Link: https://lore.kernel.org/r/20230727020324.2149960-1-zengheng4@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a2511b30d0f6..29db061db9bb 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1793,9 +1793,6 @@ config ARM64_PAN
 	  The feature is detected at runtime, and will remain as a 'nop'
 	  instruction if the cpu does not implement the feature.
 
-config AS_HAS_LDAPR
-	def_bool $(as-instr,.arch_extension rcpc)
-
 config AS_HAS_LSE_ATOMICS
 	def_bool $(as-instr,.arch_extension lse)
 
@@ -1933,6 +1930,9 @@ config AS_HAS_ARMV8_3
 config AS_HAS_CFI_NEGATE_RA_STATE
 	def_bool $(as-instr,.cfi_startproc\n.cfi_negate_ra_state\n.cfi_endproc\n)
 
+config AS_HAS_LDAPR
+	def_bool $(as-instr,.arch_extension rcpc)
+
 endmenu # "ARMv8.3 architectural features"
 
 menu "ARMv8.4 architectural features"

From b2b561757027ef03b1243c828820a9004458194c Mon Sep 17 00:00:00 2001
From: Chen Jiahao <chenjiahao16@huawei.com>
Date: Thu, 27 Jul 2023 21:16:35 +0800
Subject: [PATCH 0616/2047] spi: s3c64xx: Clean up redundant dev_err_probe()

Referring to platform_get_irq()'s definition, the return value has
already been checked if ret < 0, and printed via dev_err_probe().
Calling dev_err_probe() one more time outside platform_get_irq()
is obviously redundant.

Removing dev_err_probe() outside platform_get_irq() to clean up
above problem.

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20230727131635.2898051-1-chenjiahao16@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-s3c64xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index a1b68fdc205d..bf7cb18be9dc 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -1167,7 +1167,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
-		return dev_err_probe(&pdev->dev, irq, "Failed to get IRQ\n");
+		return irq;
 
 	master = devm_spi_alloc_master(&pdev->dev, sizeof(*sdd));
 	if (!master)

From b6ed2f7758a558485ff889b4a3912fcb6abcf689 Mon Sep 17 00:00:00 2001
From: Azeem Shaikh <azeemshaikh38@gmail.com>
Date: Thu, 6 Jul 2023 17:58:04 +0000
Subject: [PATCH 0617/2047] EISA: Replace all non-returning strlcpy with
 strscpy

strlcpy() reads the entire source buffer first.
This read may exceed the destination size limit.
This is both inefficient and can lead to linear read
overflows if a source string is not NUL-terminated [1].
In an effort to remove strlcpy() completely [2], replace
strlcpy() here with strscpy().
No return values were used, so direct replacement is safe.

[1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy
[2] https://github.com/KSPP/linux/issues/89

Signed-off-by: Azeem Shaikh <azeemshaikh38@gmail.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230706175804.2249018-1-azeemshaikh38@gmail.com
Signed-off-by: Kees Cook <keescook@chromium.org>
# diff --git a/drivers/eisa/eisa-bus.c b/drivers/eisa/eisa-bus.c
# index 713582cc27d1..33f0ba11c6ad 100644
# --- a/drivers/eisa/eisa-bus.c
# +++ b/drivers/eisa/eisa-bus.c
# @@ -60,7 +60,7 @@ static void __init eisa_name_device(struct eisa_device *edev)
#  	int i;
#  	for (i = 0; i < EISA_INFOS; i++) {
#  		if (!strcmp(edev->id.sig, eisa_table[i].id.sig)) {
# -			strlcpy(edev->pretty_name,
# +			strscpy(edev->pretty_name,
#  				eisa_table[i].name,
#  				sizeof(edev->pretty_name));
#  			return;
---
 drivers/eisa/eisa-bus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/eisa/eisa-bus.c b/drivers/eisa/eisa-bus.c
index 713582cc27d1..33f0ba11c6ad 100644
--- a/drivers/eisa/eisa-bus.c
+++ b/drivers/eisa/eisa-bus.c
@@ -60,7 +60,7 @@ static void __init eisa_name_device(struct eisa_device *edev)
 	int i;
 	for (i = 0; i < EISA_INFOS; i++) {
 		if (!strcmp(edev->id.sig, eisa_table[i].id.sig)) {
-			strlcpy(edev->pretty_name,
+			strscpy(edev->pretty_name,
 				eisa_table[i].name,
 				sizeof(edev->pretty_name));
 			return;

From 7a4dcb4a5de1214c4a59448a759e2e264c2c4473 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Wed, 5 Jul 2023 16:51:34 +0200
Subject: [PATCH 0618/2047] cpu/hotplug: Remove dependancy against
 cpu_primary_thread_mask

The commit 18415f33e2ac ("cpu/hotplug: Allow "parallel" bringup up to
CPUHP_BP_KICK_AP_STATE") introduce a dependancy against a global variable
cpu_primary_thread_mask exported by the X86 code. This variable is only
used when CONFIG_HOTPLUG_PARALLEL is set.

Since cpuhp_get_primary_thread_mask() and cpuhp_smt_aware() are only used
when CONFIG_HOTPLUG_PARALLEL is set, don't define them when it is not set.

No functional change.

Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Link: https://lore.kernel.org/r/20230705145143.40545-2-ldufour@linux.ibm.com
---
 kernel/cpu.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 88a7ede322bd..03309f2f35a4 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -650,22 +650,8 @@ bool cpu_smt_possible(void)
 }
 EXPORT_SYMBOL_GPL(cpu_smt_possible);
 
-static inline bool cpuhp_smt_aware(void)
-{
-	return topology_smt_supported();
-}
-
-static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
-{
-	return cpu_primary_thread_mask;
-}
 #else
 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
-static inline bool cpuhp_smt_aware(void) { return false; }
-static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
-{
-	return cpu_present_mask;
-}
 #endif
 
 static inline enum cpuhp_state
@@ -1793,6 +1779,16 @@ static int __init parallel_bringup_parse_param(char *arg)
 }
 early_param("cpuhp.parallel", parallel_bringup_parse_param);
 
+static inline bool cpuhp_smt_aware(void)
+{
+	return topology_smt_supported();
+}
+
+static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
+{
+	return cpu_primary_thread_mask;
+}
+
 /*
  * On architectures which have enabled parallel bringup this invokes all BP
  * prepare states for each of the to be onlined APs first. The last state

From 3f9169196be55590a794b52f49637561ddd1ba4f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 5 Jul 2023 16:51:35 +0200
Subject: [PATCH 0619/2047] cpu/SMT: Move SMT prototypes into cpu_smt.h

In order to export the cpuhp_smt_control enum as part of the interface
between generic and architecture code, the architecture code needs to
include asm/topology.h.

But that leads to circular header dependencies. So split the enum and
related declarations into a separate header.

[ ldufour: Reworded the commit's description ]

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Link: https://lore.kernel.org/r/20230705145143.40545-3-ldufour@linux.ibm.com
---
 arch/x86/include/asm/topology.h |  2 ++
 include/linux/cpu.h             | 25 +------------------------
 include/linux/cpu_smt.h         | 29 +++++++++++++++++++++++++++++
 kernel/cpu.c                    |  1 +
 4 files changed, 33 insertions(+), 24 deletions(-)
 create mode 100644 include/linux/cpu_smt.h

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index caf41c4869a0..ae49ed4417d0 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -136,6 +136,8 @@ static inline int topology_max_smt_threads(void)
 	return __max_smt_threads;
 }
 
+#include <linux/cpu_smt.h>
+
 int topology_update_package_map(unsigned int apicid, unsigned int cpu);
 int topology_update_die_map(unsigned int dieid, unsigned int cpu);
 int topology_phys_to_logical_pkg(unsigned int pkg);
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 6e6e57ec69e8..6b326a9e8191 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -18,6 +18,7 @@
 #include <linux/compiler.h>
 #include <linux/cpumask.h>
 #include <linux/cpuhotplug.h>
+#include <linux/cpu_smt.h>
 
 struct device;
 struct device_node;
@@ -204,30 +205,6 @@ void cpuhp_report_idle_dead(void);
 static inline void cpuhp_report_idle_dead(void) { }
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
-enum cpuhp_smt_control {
-	CPU_SMT_ENABLED,
-	CPU_SMT_DISABLED,
-	CPU_SMT_FORCE_DISABLED,
-	CPU_SMT_NOT_SUPPORTED,
-	CPU_SMT_NOT_IMPLEMENTED,
-};
-
-#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
-extern enum cpuhp_smt_control cpu_smt_control;
-extern void cpu_smt_disable(bool force);
-extern void cpu_smt_check_topology(void);
-extern bool cpu_smt_possible(void);
-extern int cpuhp_smt_enable(void);
-extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval);
-#else
-# define cpu_smt_control		(CPU_SMT_NOT_IMPLEMENTED)
-static inline void cpu_smt_disable(bool force) { }
-static inline void cpu_smt_check_topology(void) { }
-static inline bool cpu_smt_possible(void) { return false; }
-static inline int cpuhp_smt_enable(void) { return 0; }
-static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
-#endif
-
 extern bool cpu_mitigations_off(void);
 extern bool cpu_mitigations_auto_nosmt(void);
 
diff --git a/include/linux/cpu_smt.h b/include/linux/cpu_smt.h
new file mode 100644
index 000000000000..722c2e306fef
--- /dev/null
+++ b/include/linux/cpu_smt.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CPU_SMT_H_
+#define _LINUX_CPU_SMT_H_
+
+enum cpuhp_smt_control {
+	CPU_SMT_ENABLED,
+	CPU_SMT_DISABLED,
+	CPU_SMT_FORCE_DISABLED,
+	CPU_SMT_NOT_SUPPORTED,
+	CPU_SMT_NOT_IMPLEMENTED,
+};
+
+#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
+extern enum cpuhp_smt_control cpu_smt_control;
+extern void cpu_smt_disable(bool force);
+extern void cpu_smt_check_topology(void);
+extern bool cpu_smt_possible(void);
+extern int cpuhp_smt_enable(void);
+extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval);
+#else
+# define cpu_smt_control               (CPU_SMT_NOT_IMPLEMENTED)
+static inline void cpu_smt_disable(bool force) { }
+static inline void cpu_smt_check_topology(void) { }
+static inline bool cpu_smt_possible(void) { return false; }
+static inline int cpuhp_smt_enable(void) { return 0; }
+static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
+#endif
+
+#endif /* _LINUX_CPU_SMT_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 03309f2f35a4..e02204c4675a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -592,6 +592,7 @@ static void lockdep_release_cpus_lock(void)
 void __weak arch_smt_update(void) { }
 
 #ifdef CONFIG_HOTPLUG_SMT
+
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 
 void __init cpu_smt_disable(bool force)

From c53361ce7d8771129c517dca529d2f2dc5bf04d1 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 5 Jul 2023 16:51:36 +0200
Subject: [PATCH 0620/2047] cpu/SMT: Move smt/control simple exit cases earlier

Move the simple exit cases, i.e. those which don't depend on the value
written, earlier in the function. That makes it clearer that regardless of
the input those states cannot be transitioned out of.

That does have a user-visible effect, in that the error returned will
now always be EPERM/ENODEV for those states, regardless of the value
written. Previously writing an invalid value would return EINVAL even
when in those states.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Link: https://lore.kernel.org/r/20230705145143.40545-4-ldufour@linux.ibm.com
---
 kernel/cpu.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index e02204c4675a..b6fe170c93e9 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2841,6 +2841,12 @@ __store_smt_control(struct device *dev, struct device_attribute *attr,
 {
 	int ctrlval, ret;
 
+	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
+		return -EPERM;
+
+	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+		return -ENODEV;
+
 	if (sysfs_streq(buf, "on"))
 		ctrlval = CPU_SMT_ENABLED;
 	else if (sysfs_streq(buf, "off"))
@@ -2850,12 +2856,6 @@ __store_smt_control(struct device *dev, struct device_attribute *attr,
 	else
 		return -EINVAL;
 
-	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
-		return -EPERM;
-
-	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
-		return -ENODEV;
-
 	ret = lock_device_hotplug_sysfs();
 	if (ret)
 		return ret;

From 447ae4ac41130a7f127c2581a5e816bb0800b560 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 5 Jul 2023 16:51:37 +0200
Subject: [PATCH 0621/2047] cpu/SMT: Store the current/max number of threads

Some architectures allow partial SMT states at boot time, ie. when not all
SMT threads are brought online.

To support that the SMT code needs to know the maximum number of SMT
threads, and also the currently configured number.

The architecture code knows the max number of threads, so have the
architecture code pass that value to cpu_smt_set_num_threads(). Note that
although topology_max_smt_threads() exists, it is not configured early
enough to be used here. As architecture, like PowerPC, allows the threads
number to be set through the kernel command line, also pass that value.

[ ldufour: Slightly reword the commit message ]
[ ldufour: Rename cpu_smt_check_topology and add a num_threads argument ]

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Link: https://lore.kernel.org/r/20230705145143.40545-5-ldufour@linux.ibm.com
---
 arch/x86/kernel/cpu/common.c |  2 +-
 include/linux/cpu_smt.h      |  8 ++++++--
 kernel/cpu.c                 | 21 ++++++++++++++++++++-
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 52683fddafaf..12a48a85da3d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2317,7 +2317,7 @@ void __init arch_cpu_finalize_init(void)
 	 * identify_boot_cpu() initialized SMT support information, let the
 	 * core code know.
 	 */
-	cpu_smt_check_topology();
+	cpu_smt_set_num_threads(smp_num_siblings, smp_num_siblings);
 
 	if (!IS_ENABLED(CONFIG_SMP)) {
 		pr_info("CPU: ");
diff --git a/include/linux/cpu_smt.h b/include/linux/cpu_smt.h
index 722c2e306fef..0c1664294b57 100644
--- a/include/linux/cpu_smt.h
+++ b/include/linux/cpu_smt.h
@@ -12,15 +12,19 @@ enum cpuhp_smt_control {
 
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
+extern unsigned int cpu_smt_num_threads;
 extern void cpu_smt_disable(bool force);
-extern void cpu_smt_check_topology(void);
+extern void cpu_smt_set_num_threads(unsigned int num_threads,
+				    unsigned int max_threads);
 extern bool cpu_smt_possible(void);
 extern int cpuhp_smt_enable(void);
 extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval);
 #else
 # define cpu_smt_control               (CPU_SMT_NOT_IMPLEMENTED)
+# define cpu_smt_num_threads 1
 static inline void cpu_smt_disable(bool force) { }
-static inline void cpu_smt_check_topology(void) { }
+static inline void cpu_smt_set_num_threads(unsigned int num_threads,
+					   unsigned int max_threads) { }
 static inline bool cpu_smt_possible(void) { return false; }
 static inline int cpuhp_smt_enable(void) { return 0; }
 static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b6fe170c93e9..d7dd535cb5b5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -594,6 +594,8 @@ void __weak arch_smt_update(void) { }
 #ifdef CONFIG_HOTPLUG_SMT
 
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
+static unsigned int cpu_smt_max_threads __ro_after_init;
+unsigned int cpu_smt_num_threads __read_mostly = UINT_MAX;
 
 void __init cpu_smt_disable(bool force)
 {
@@ -607,16 +609,33 @@ void __init cpu_smt_disable(bool force)
 		pr_info("SMT: disabled\n");
 		cpu_smt_control = CPU_SMT_DISABLED;
 	}
+	cpu_smt_num_threads = 1;
 }
 
 /*
  * The decision whether SMT is supported can only be done after the full
  * CPU identification. Called from architecture code.
  */
-void __init cpu_smt_check_topology(void)
+void __init cpu_smt_set_num_threads(unsigned int num_threads,
+				    unsigned int max_threads)
 {
+	WARN_ON(!num_threads || (num_threads > max_threads));
+
 	if (!topology_smt_supported())
 		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+
+	cpu_smt_max_threads = max_threads;
+
+	/*
+	 * If SMT has been disabled via the kernel command line or SMT is
+	 * not supported, set cpu_smt_num_threads to 1 for consistency.
+	 * If enabled, take the architecture requested number of threads
+	 * to bring up into account.
+	 */
+	if (cpu_smt_control != CPU_SMT_ENABLED)
+		cpu_smt_num_threads = 1;
+	else if (num_threads < cpu_smt_num_threads)
+		cpu_smt_num_threads = num_threads;
 }
 
 static int __init smt_cmdline_disable(char *str)

From 91b4a7dbfe05ddb6fd3cf78cc11fb5ed64d3af90 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Wed, 5 Jul 2023 16:51:38 +0200
Subject: [PATCH 0622/2047] cpu/SMT: Remove topology_smt_supported()

Since the maximum number of threads is now passed to cpu_smt_set_num_threads(),
checking that value is enough to know whether SMT is supported.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Link: https://lore.kernel.org/r/20230705145143.40545-6-ldufour@linux.ibm.com
---
 arch/x86/include/asm/topology.h | 2 --
 arch/x86/kernel/smpboot.c       | 8 --------
 kernel/cpu.c                    | 4 ++--
 3 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ae49ed4417d0..3235ba1e5b06 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -141,7 +141,6 @@ static inline int topology_max_smt_threads(void)
 int topology_update_package_map(unsigned int apicid, unsigned int cpu);
 int topology_update_die_map(unsigned int dieid, unsigned int cpu);
 int topology_phys_to_logical_pkg(unsigned int pkg);
-bool topology_smt_supported(void);
 
 extern struct cpumask __cpu_primary_thread_mask;
 #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
@@ -164,7 +163,6 @@ static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 static inline int topology_max_die_per_package(void) { return 1; }
 static inline int topology_max_smt_threads(void) { return 1; }
 static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
-static inline bool topology_smt_supported(void) { return false; }
 #endif /* !CONFIG_SMP */
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e1aa2cd7734b..d4e897b820c4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -326,14 +326,6 @@ static void notrace start_secondary(void *unused)
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
-/**
- * topology_smt_supported - Check whether SMT is supported by the CPUs
- */
-bool topology_smt_supported(void)
-{
-	return smp_num_siblings > 1;
-}
-
 /**
  * topology_phys_to_logical_pkg - Map a physical package id to a logical
  * @phys_pkg:	The physical package id to map
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d7dd535cb5b5..70add058e77b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -621,7 +621,7 @@ void __init cpu_smt_set_num_threads(unsigned int num_threads,
 {
 	WARN_ON(!num_threads || (num_threads > max_threads));
 
-	if (!topology_smt_supported())
+	if (max_threads == 1)
 		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
 
 	cpu_smt_max_threads = max_threads;
@@ -1801,7 +1801,7 @@ early_param("cpuhp.parallel", parallel_bringup_parse_param);
 
 static inline bool cpuhp_smt_aware(void)
 {
-	return topology_smt_supported();
+	return cpu_smt_max_threads > 1;
 }
 
 static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)

From 38253464bc821d6de6bba81bb1412ebb36f6cbd1 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 5 Jul 2023 16:51:39 +0200
Subject: [PATCH 0623/2047] cpu/SMT: Create topology_smt_thread_allowed()

Some architectures allows partial SMT states, i.e. when not all SMT threads
are brought online.

To support that, add an architecture helper which checks whether a given
CPU is allowed to be brought online depending on how many SMT threads are
currently enabled. Since this is only applicable to architecture supporting
partial SMT, only these architectures should select the new configuration
variable CONFIG_SMT_NUM_THREADS_DYNAMIC. For the other architectures, not
supporting the partial SMT states, there is no need to define
topology_cpu_smt_allowed(), the generic code assumed that all the threads
are allowed or only the primary ones.

Call the helper from cpu_smt_enable(), and cpu_smt_allowed() when SMT is
enabled, to check if the particular thread should be onlined. Notably,
also call it from cpu_smt_disable() if CPU_SMT_ENABLED, to allow
offlining some threads to move from a higher to lower number of threads
online.

[ ldufour: Slightly reword the commit's description ]
[ ldufour: Introduce CONFIG_SMT_NUM_THREADS_DYNAMIC ]

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Link: https://lore.kernel.org/r/20230705145143.40545-7-ldufour@linux.ibm.com
---
 arch/Kconfig |  3 +++
 kernel/cpu.c | 24 +++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index aff2746c8af2..63c5d6a2022b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -34,6 +34,9 @@ config ARCH_HAS_SUBPAGE_FAULTS
 config HOTPLUG_SMT
 	bool
 
+config SMT_NUM_THREADS_DYNAMIC
+	bool
+
 # Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL
 config HOTPLUG_CORE_SYNC
 	bool
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 70add058e77b..9a8d0685e055 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -645,9 +645,23 @@ static int __init smt_cmdline_disable(char *str)
 }
 early_param("nosmt", smt_cmdline_disable);
 
+/*
+ * For Archicture supporting partial SMT states check if the thread is allowed.
+ * Otherwise this has already been checked through cpu_smt_max_threads when
+ * setting the SMT level.
+ */
+static inline bool cpu_smt_thread_allowed(unsigned int cpu)
+{
+#ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC
+	return topology_smt_thread_allowed(cpu);
+#else
+	return true;
+#endif
+}
+
 static inline bool cpu_smt_allowed(unsigned int cpu)
 {
-	if (cpu_smt_control == CPU_SMT_ENABLED)
+	if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
 		return true;
 
 	if (topology_is_primary_thread(cpu))
@@ -2642,6 +2656,12 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
 	for_each_online_cpu(cpu) {
 		if (topology_is_primary_thread(cpu))
 			continue;
+		/*
+		 * Disable can be called with CPU_SMT_ENABLED when changing
+		 * from a higher to lower number of SMT threads per core.
+		 */
+		if (ctrlval == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
+			continue;
 		ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
 		if (ret)
 			break;
@@ -2676,6 +2696,8 @@ int cpuhp_smt_enable(void)
 		/* Skip online CPUs and CPUs on offline nodes */
 		if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
 			continue;
+		if (!cpu_smt_thread_allowed(cpu))
+			continue;
 		ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
 		if (ret)
 			break;

From 7f48405c3c3437d578d0f1e4287aa0a3bfa5623f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 5 Jul 2023 16:51:40 +0200
Subject: [PATCH 0624/2047] cpu/SMT: Allow enabling partial SMT states via
 sysfs

Add support to the /sys/devices/system/cpu/smt/control interface for
enabling a specified number of SMT threads per core, including partial
SMT states where not all threads are brought online.

The current interface accepts "on" and "off", to enable either 1 or all
SMT threads per core.

This commit allows writing an integer, between 1 and the number of SMT
threads supported by the machine. Writing 1 is a synonym for "off", 2 or
more enables SMT with the specified number of threads.

When reading the file, if all threads are online "on" is returned, to
avoid changing behaviour for existing users. If some other number of
threads is online then the integer value is returned.

Architectures like x86 only supporting 1 thread or all threads, should not
define CONFIG_SMT_NUM_THREADS_DYNAMIC. Architecture supporting partial SMT
states, like PowerPC, should define it.

[ ldufour: Slightly reword the commit's description ]
[ ldufour: Remove switch() in __store_smt_control() ]
[ ldufour: Rix build issue in control_show() ]

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Link: https://lore.kernel.org/r/20230705145143.40545-8-ldufour@linux.ibm.com
---
 .../ABI/testing/sysfs-devices-system-cpu      |  1 +
 kernel/cpu.c                                  | 60 ++++++++++++++-----
 2 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index ecd585ca2d50..6dba65fb1956 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -555,6 +555,7 @@ Description:	Control Symmetric Multi Threading (SMT)
 			 ================ =========================================
 			 "on"		  SMT is enabled
 			 "off"		  SMT is disabled
+			 "<N>"		  SMT is enabled with N threads per core.
 			 "forceoff"	  SMT is force disabled. Cannot be changed.
 			 "notsupported"   SMT is not supported by the CPU
 			 "notimplemented" SMT runtime toggling is not
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9a8d0685e055..7e8f1b044772 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2876,11 +2876,19 @@ static const struct attribute_group cpuhp_cpu_root_attr_group = {
 
 #ifdef CONFIG_HOTPLUG_SMT
 
+static bool cpu_smt_num_threads_valid(unsigned int threads)
+{
+	if (IS_ENABLED(CONFIG_SMT_NUM_THREADS_DYNAMIC))
+		return threads >= 1 && threads <= cpu_smt_max_threads;
+	return threads == 1 || threads == cpu_smt_max_threads;
+}
+
 static ssize_t
 __store_smt_control(struct device *dev, struct device_attribute *attr,
 		    const char *buf, size_t count)
 {
-	int ctrlval, ret;
+	int ctrlval, ret, num_threads, orig_threads;
+	bool force_off;
 
 	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
 		return -EPERM;
@@ -2888,30 +2896,39 @@ __store_smt_control(struct device *dev, struct device_attribute *attr,
 	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
 		return -ENODEV;
 
-	if (sysfs_streq(buf, "on"))
+	if (sysfs_streq(buf, "on")) {
 		ctrlval = CPU_SMT_ENABLED;
-	else if (sysfs_streq(buf, "off"))
+		num_threads = cpu_smt_max_threads;
+	} else if (sysfs_streq(buf, "off")) {
 		ctrlval = CPU_SMT_DISABLED;
-	else if (sysfs_streq(buf, "forceoff"))
+		num_threads = 1;
+	} else if (sysfs_streq(buf, "forceoff")) {
 		ctrlval = CPU_SMT_FORCE_DISABLED;
-	else
+		num_threads = 1;
+	} else if (kstrtoint(buf, 10, &num_threads) == 0) {
+		if (num_threads == 1)
+			ctrlval = CPU_SMT_DISABLED;
+		else if (cpu_smt_num_threads_valid(num_threads))
+			ctrlval = CPU_SMT_ENABLED;
+		else
+			return -EINVAL;
+	} else {
 		return -EINVAL;
+	}
 
 	ret = lock_device_hotplug_sysfs();
 	if (ret)
 		return ret;
 
-	if (ctrlval != cpu_smt_control) {
-		switch (ctrlval) {
-		case CPU_SMT_ENABLED:
-			ret = cpuhp_smt_enable();
-			break;
-		case CPU_SMT_DISABLED:
-		case CPU_SMT_FORCE_DISABLED:
-			ret = cpuhp_smt_disable(ctrlval);
-			break;
-		}
-	}
+	orig_threads = cpu_smt_num_threads;
+	cpu_smt_num_threads = num_threads;
+
+	force_off = ctrlval != cpu_smt_control && ctrlval == CPU_SMT_FORCE_DISABLED;
+
+	if (num_threads > orig_threads)
+		ret = cpuhp_smt_enable();
+	else if (num_threads < orig_threads || force_off)
+		ret = cpuhp_smt_disable(ctrlval);
 
 	unlock_device_hotplug();
 	return ret ? ret : count;
@@ -2939,6 +2956,17 @@ static ssize_t control_show(struct device *dev,
 {
 	const char *state = smt_states[cpu_smt_control];
 
+#ifdef CONFIG_HOTPLUG_SMT
+	/*
+	 * If SMT is enabled but not all threads are enabled then show the
+	 * number of threads. If all threads are enabled show "on". Otherwise
+	 * show the state name.
+	 */
+	if (cpu_smt_control == CPU_SMT_ENABLED &&
+	    cpu_smt_num_threads != cpu_smt_max_threads)
+		return sysfs_emit(buf, "%d\n", cpu_smt_num_threads);
+#endif
+
 	return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
 }
 

From e22471c2331c984ba48e89acd00d0ba1f60a3ea7 Mon Sep 17 00:00:00 2001
From: Jia Jie Ho <jiajie.ho@starfivetech.com>
Date: Mon, 17 Jul 2023 12:03:02 +0800
Subject: [PATCH 0625/2047] crypto: starfive - Add AES skcipher and aead
 support

Adding AES skcipher and aead support to Starfive crypto module.
Skcipher modes of operation include ecb, cbc, ctr, ofb, cfb. Aead modes
include ccm and gcm.

v1->v2:
- Add include interrupt.h to fix compile error. (Herbert)

Co-developed-by: Huan Feng <huan.feng@starfivetech.com>
Signed-off-by: Huan Feng <huan.feng@starfivetech.com>
Signed-off-by: Jia Jie Ho <jiajie.ho@starfivetech.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/starfive/Kconfig       |    2 +
 drivers/crypto/starfive/Makefile      |    2 +-
 drivers/crypto/starfive/jh7110-aes.c  | 1034 +++++++++++++++++++++++++
 drivers/crypto/starfive/jh7110-cryp.c |   36 +-
 drivers/crypto/starfive/jh7110-cryp.h |   65 ++
 5 files changed, 1132 insertions(+), 7 deletions(-)
 create mode 100644 drivers/crypto/starfive/jh7110-aes.c

diff --git a/drivers/crypto/starfive/Kconfig b/drivers/crypto/starfive/Kconfig
index df745fcb09df..2cb192502c1b 100644
--- a/drivers/crypto/starfive/Kconfig
+++ b/drivers/crypto/starfive/Kconfig
@@ -12,6 +12,8 @@ config CRYPTO_DEV_JH7110
 	select CRYPTO_SHA512
 	select CRYPTO_SM3_GENERIC
 	select CRYPTO_RSA
+	select CRYPTO_AES
+	select CRYPTO_CCM
 	help
 	  Support for StarFive JH7110 crypto hardware acceleration engine.
 	  This module provides acceleration for public key algo,
diff --git a/drivers/crypto/starfive/Makefile b/drivers/crypto/starfive/Makefile
index 98b01d2f1ccf..8c137afe58ad 100644
--- a/drivers/crypto/starfive/Makefile
+++ b/drivers/crypto/starfive/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-$(CONFIG_CRYPTO_DEV_JH7110) += jh7110-crypto.o
-jh7110-crypto-objs := jh7110-cryp.o jh7110-hash.o jh7110-rsa.o
+jh7110-crypto-objs := jh7110-cryp.o jh7110-hash.o jh7110-rsa.o jh7110-aes.o
diff --git a/drivers/crypto/starfive/jh7110-aes.c b/drivers/crypto/starfive/jh7110-aes.c
new file mode 100644
index 000000000000..04dd7958054f
--- /dev/null
+++ b/drivers/crypto/starfive/jh7110-aes.c
@@ -0,0 +1,1034 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * StarFive AES acceleration driver
+ *
+ * Copyright (c) 2022 StarFive Technology
+ */
+
+#include <linux/iopoll.h>
+#include <crypto/gcm.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include "jh7110-cryp.h"
+
+#define STARFIVE_AES_REGS_OFFSET	0x100
+#define STARFIVE_AES_AESDIO0R		(STARFIVE_AES_REGS_OFFSET + 0x0)
+#define STARFIVE_AES_KEY0		(STARFIVE_AES_REGS_OFFSET + 0x4)
+#define STARFIVE_AES_KEY1		(STARFIVE_AES_REGS_OFFSET + 0x8)
+#define STARFIVE_AES_KEY2		(STARFIVE_AES_REGS_OFFSET + 0xC)
+#define STARFIVE_AES_KEY3		(STARFIVE_AES_REGS_OFFSET + 0x10)
+#define STARFIVE_AES_KEY4		(STARFIVE_AES_REGS_OFFSET + 0x14)
+#define STARFIVE_AES_KEY5		(STARFIVE_AES_REGS_OFFSET + 0x18)
+#define STARFIVE_AES_KEY6		(STARFIVE_AES_REGS_OFFSET + 0x1C)
+#define STARFIVE_AES_KEY7		(STARFIVE_AES_REGS_OFFSET + 0x20)
+#define STARFIVE_AES_CSR		(STARFIVE_AES_REGS_OFFSET + 0x24)
+#define STARFIVE_AES_IV0		(STARFIVE_AES_REGS_OFFSET + 0x28)
+#define STARFIVE_AES_IV1		(STARFIVE_AES_REGS_OFFSET + 0x2C)
+#define STARFIVE_AES_IV2		(STARFIVE_AES_REGS_OFFSET + 0x30)
+#define STARFIVE_AES_IV3		(STARFIVE_AES_REGS_OFFSET + 0x34)
+#define STARFIVE_AES_NONCE0		(STARFIVE_AES_REGS_OFFSET + 0x3C)
+#define STARFIVE_AES_NONCE1		(STARFIVE_AES_REGS_OFFSET + 0x40)
+#define STARFIVE_AES_NONCE2		(STARFIVE_AES_REGS_OFFSET + 0x44)
+#define STARFIVE_AES_NONCE3		(STARFIVE_AES_REGS_OFFSET + 0x48)
+#define STARFIVE_AES_ALEN0		(STARFIVE_AES_REGS_OFFSET + 0x4C)
+#define STARFIVE_AES_ALEN1		(STARFIVE_AES_REGS_OFFSET + 0x50)
+#define STARFIVE_AES_MLEN0		(STARFIVE_AES_REGS_OFFSET + 0x54)
+#define STARFIVE_AES_MLEN1		(STARFIVE_AES_REGS_OFFSET + 0x58)
+#define STARFIVE_AES_IVLEN		(STARFIVE_AES_REGS_OFFSET + 0x5C)
+
+#define FLG_MODE_MASK			GENMASK(2, 0)
+#define FLG_ENCRYPT			BIT(4)
+
+/* Misc */
+#define CCM_B0_ADATA			0x40
+#define AES_BLOCK_32			(AES_BLOCK_SIZE / sizeof(u32))
+
+static inline int starfive_aes_wait_busy(struct starfive_cryp_dev *cryp)
+{
+	u32 status;
+
+	return readl_relaxed_poll_timeout(cryp->base + STARFIVE_AES_CSR, status,
+					  !(status & STARFIVE_AES_BUSY), 10, 100000);
+}
+
+static inline int starfive_aes_wait_keydone(struct starfive_cryp_dev *cryp)
+{
+	u32 status;
+
+	return readl_relaxed_poll_timeout(cryp->base + STARFIVE_AES_CSR, status,
+					  (status & STARFIVE_AES_KEY_DONE), 10, 100000);
+}
+
+static inline int starfive_aes_wait_gcmdone(struct starfive_cryp_dev *cryp)
+{
+	u32 status;
+
+	return readl_relaxed_poll_timeout(cryp->base + STARFIVE_AES_CSR, status,
+					  (status & STARFIVE_AES_GCM_DONE), 10, 100000);
+}
+
+static inline int is_gcm(struct starfive_cryp_dev *cryp)
+{
+	return (cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_GCM;
+}
+
+static inline int is_encrypt(struct starfive_cryp_dev *cryp)
+{
+	return cryp->flags & FLG_ENCRYPT;
+}
+
+static void starfive_aes_aead_hw_start(struct starfive_cryp_ctx *ctx, u32 hw_mode)
+{
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+	unsigned int value;
+
+	switch (hw_mode) {
+	case STARFIVE_AES_MODE_GCM:
+		value = readl(ctx->cryp->base + STARFIVE_AES_CSR);
+		value |= STARFIVE_AES_GCM_START;
+		writel(value, cryp->base + STARFIVE_AES_CSR);
+		starfive_aes_wait_gcmdone(cryp);
+		break;
+	case STARFIVE_AES_MODE_CCM:
+		value = readl(ctx->cryp->base + STARFIVE_AES_CSR);
+		value |= STARFIVE_AES_CCM_START;
+		writel(value, cryp->base + STARFIVE_AES_CSR);
+		break;
+	}
+}
+
+static inline void starfive_aes_set_ivlen(struct starfive_cryp_ctx *ctx)
+{
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+
+	if (is_gcm(cryp))
+		writel(GCM_AES_IV_SIZE, cryp->base + STARFIVE_AES_IVLEN);
+	else
+		writel(AES_BLOCK_SIZE, cryp->base + STARFIVE_AES_IVLEN);
+}
+
+static inline void starfive_aes_set_alen(struct starfive_cryp_ctx *ctx)
+{
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+
+	writel(upper_32_bits(cryp->assoclen), cryp->base + STARFIVE_AES_ALEN0);
+	writel(lower_32_bits(cryp->assoclen), cryp->base + STARFIVE_AES_ALEN1);
+}
+
+static inline void starfive_aes_set_mlen(struct starfive_cryp_ctx *ctx)
+{
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+
+	writel(upper_32_bits(cryp->total_in), cryp->base + STARFIVE_AES_MLEN0);
+	writel(lower_32_bits(cryp->total_in), cryp->base + STARFIVE_AES_MLEN1);
+}
+
+static inline int starfive_aes_ccm_check_iv(const u8 *iv)
+{
+	/* 2 <= L <= 8, so 1 <= L' <= 7. */
+	if (iv[0] < 1 || iv[0] > 7)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int starfive_aes_write_iv(struct starfive_cryp_ctx *ctx, u32 *iv)
+{
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+
+	writel(iv[0], cryp->base + STARFIVE_AES_IV0);
+	writel(iv[1], cryp->base + STARFIVE_AES_IV1);
+	writel(iv[2], cryp->base + STARFIVE_AES_IV2);
+
+	if (is_gcm(cryp)) {
+		if (starfive_aes_wait_gcmdone(cryp))
+			return -ETIMEDOUT;
+
+		return 0;
+	}
+
+	writel(iv[3], cryp->base + STARFIVE_AES_IV3);
+
+	return 0;
+}
+
+static inline void starfive_aes_get_iv(struct starfive_cryp_dev *cryp, u32 *iv)
+{
+	iv[0] = readl(cryp->base + STARFIVE_AES_IV0);
+	iv[1] = readl(cryp->base + STARFIVE_AES_IV1);
+	iv[2] = readl(cryp->base + STARFIVE_AES_IV2);
+	iv[3] = readl(cryp->base + STARFIVE_AES_IV3);
+}
+
+static inline void starfive_aes_write_nonce(struct starfive_cryp_ctx *ctx, u32 *nonce)
+{
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+
+	writel(nonce[0], cryp->base + STARFIVE_AES_NONCE0);
+	writel(nonce[1], cryp->base + STARFIVE_AES_NONCE1);
+	writel(nonce[2], cryp->base + STARFIVE_AES_NONCE2);
+	writel(nonce[3], cryp->base + STARFIVE_AES_NONCE3);
+}
+
+static int starfive_aes_write_key(struct starfive_cryp_ctx *ctx)
+{
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+	u32 *key = (u32 *)ctx->key;
+
+	if (ctx->keylen >= AES_KEYSIZE_128) {
+		writel(key[0], cryp->base + STARFIVE_AES_KEY0);
+		writel(key[1], cryp->base + STARFIVE_AES_KEY1);
+		writel(key[2], cryp->base + STARFIVE_AES_KEY2);
+		writel(key[3], cryp->base + STARFIVE_AES_KEY3);
+	}
+
+	if (ctx->keylen >= AES_KEYSIZE_192) {
+		writel(key[4], cryp->base + STARFIVE_AES_KEY4);
+		writel(key[5], cryp->base + STARFIVE_AES_KEY5);
+	}
+
+	if (ctx->keylen >= AES_KEYSIZE_256) {
+		writel(key[6], cryp->base + STARFIVE_AES_KEY6);
+		writel(key[7], cryp->base + STARFIVE_AES_KEY7);
+	}
+
+	if (starfive_aes_wait_keydone(cryp))
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int starfive_aes_ccm_init(struct starfive_cryp_ctx *ctx)
+{
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+	u8 iv[AES_BLOCK_SIZE], b0[AES_BLOCK_SIZE];
+	unsigned int textlen;
+
+	memcpy(iv, cryp->req.areq->iv, AES_BLOCK_SIZE);
+	memset(iv + AES_BLOCK_SIZE - 1 - iv[0], 0, iv[0] + 1);
+
+	/* Build B0 */
+	memcpy(b0, iv, AES_BLOCK_SIZE);
+
+	b0[0] |= (8 * ((cryp->authsize - 2) / 2));
+
+	if (cryp->assoclen)
+		b0[0] |= CCM_B0_ADATA;
+
+	textlen = cryp->total_in;
+
+	b0[AES_BLOCK_SIZE - 2] = textlen >> 8;
+	b0[AES_BLOCK_SIZE - 1] = textlen & 0xFF;
+
+	starfive_aes_write_nonce(ctx, (u32 *)b0);
+
+	return 0;
+}
+
+static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx)
+{
+	struct starfive_cryp_request_ctx *rctx = ctx->rctx;
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+	u32 hw_mode;
+
+	/* reset */
+	rctx->csr.aes.v = 0;
+	rctx->csr.aes.aesrst = 1;
+	writel(rctx->csr.aes.v, cryp->base + STARFIVE_AES_CSR);
+
+	/* csr setup */
+	hw_mode = cryp->flags & FLG_MODE_MASK;
+
+	rctx->csr.aes.v = 0;
+
+	switch (ctx->keylen) {
+	case AES_KEYSIZE_128:
+		rctx->csr.aes.keymode = STARFIVE_AES_KEYMODE_128;
+		break;
+	case AES_KEYSIZE_192:
+		rctx->csr.aes.keymode = STARFIVE_AES_KEYMODE_192;
+		break;
+	case AES_KEYSIZE_256:
+		rctx->csr.aes.keymode = STARFIVE_AES_KEYMODE_256;
+		break;
+	}
+
+	rctx->csr.aes.mode  = hw_mode;
+	rctx->csr.aes.cmode = !is_encrypt(cryp);
+	rctx->csr.aes.ie = 1;
+
+	if (hw_mode == STARFIVE_AES_MODE_CFB ||
+	    hw_mode == STARFIVE_AES_MODE_OFB)
+		rctx->csr.aes.stmode = STARFIVE_AES_MODE_XFB_128;
+	else
+		rctx->csr.aes.stmode = STARFIVE_AES_MODE_XFB_1;
+
+	if (cryp->side_chan) {
+		rctx->csr.aes.delay_aes = 1;
+		rctx->csr.aes.vaes_start = 1;
+	}
+
+	writel(rctx->csr.aes.v, cryp->base + STARFIVE_AES_CSR);
+
+	cryp->err = starfive_aes_write_key(ctx);
+	if (cryp->err)
+		return cryp->err;
+
+	switch (hw_mode) {
+	case STARFIVE_AES_MODE_GCM:
+		starfive_aes_set_alen(ctx);
+		starfive_aes_set_mlen(ctx);
+		starfive_aes_set_ivlen(ctx);
+		starfive_aes_aead_hw_start(ctx, hw_mode);
+		starfive_aes_write_iv(ctx, (void *)cryp->req.areq->iv);
+		break;
+	case STARFIVE_AES_MODE_CCM:
+		starfive_aes_set_alen(ctx);
+		starfive_aes_set_mlen(ctx);
+		starfive_aes_ccm_init(ctx);
+		starfive_aes_aead_hw_start(ctx, hw_mode);
+		break;
+	case STARFIVE_AES_MODE_OFB:
+	case STARFIVE_AES_MODE_CFB:
+	case STARFIVE_AES_MODE_CBC:
+	case STARFIVE_AES_MODE_CTR:
+		starfive_aes_write_iv(ctx, (void *)cryp->req.sreq->iv);
+		break;
+	default:
+		break;
+	}
+
+	return cryp->err;
+}
+
+static int starfive_aes_read_authtag(struct starfive_cryp_dev *cryp)
+{
+	int i, start_addr;
+
+	if (starfive_aes_wait_busy(cryp))
+		return dev_err_probe(cryp->dev, -ETIMEDOUT,
+				     "Timeout waiting for tag generation.");
+
+	start_addr = STARFIVE_AES_NONCE0;
+
+	if (is_gcm(cryp))
+		for (i = 0; i < AES_BLOCK_32; i++, start_addr += 4)
+			cryp->tag_out[i] = readl(cryp->base + start_addr);
+	else
+		for (i = 0; i < AES_BLOCK_32; i++)
+			cryp->tag_out[i] = readl(cryp->base + STARFIVE_AES_AESDIO0R);
+
+	if (is_encrypt(cryp)) {
+		scatterwalk_copychunks(cryp->tag_out, &cryp->out_walk, cryp->authsize, 1);
+	} else {
+		scatterwalk_copychunks(cryp->tag_in, &cryp->in_walk, cryp->authsize, 0);
+
+		if (crypto_memneq(cryp->tag_in, cryp->tag_out, cryp->authsize))
+			return dev_err_probe(cryp->dev, -EBADMSG, "Failed tag verification\n");
+	}
+
+	return 0;
+}
+
+static void starfive_aes_finish_req(struct starfive_cryp_dev *cryp)
+{
+	union starfive_aes_csr csr;
+	int err = cryp->err;
+
+	if (!err && cryp->authsize)
+		err = starfive_aes_read_authtag(cryp);
+
+	if (!err && ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CBC ||
+		     (cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CTR))
+		starfive_aes_get_iv(cryp, (void *)cryp->req.sreq->iv);
+
+	/* reset irq flags*/
+	csr.v = 0;
+	csr.aesrst = 1;
+	writel(csr.v, cryp->base + STARFIVE_AES_CSR);
+
+	if (cryp->authsize)
+		crypto_finalize_aead_request(cryp->engine, cryp->req.areq, err);
+	else
+		crypto_finalize_skcipher_request(cryp->engine, cryp->req.sreq,
+						 err);
+}
+
+void starfive_aes_done_task(unsigned long param)
+{
+	struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)param;
+	u32 block[AES_BLOCK_32];
+	u32 stat;
+	int i;
+
+	for (i = 0; i < AES_BLOCK_32; i++)
+		block[i] = readl(cryp->base + STARFIVE_AES_AESDIO0R);
+
+	scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, AES_BLOCK_SIZE,
+							     cryp->total_out), 1);
+
+	cryp->total_out -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_out);
+
+	if (!cryp->total_out) {
+		starfive_aes_finish_req(cryp);
+		return;
+	}
+
+	memset(block, 0, AES_BLOCK_SIZE);
+	scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE,
+							    cryp->total_in), 0);
+	cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in);
+
+	for (i = 0; i < AES_BLOCK_32; i++)
+		writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R);
+
+	stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET);
+	stat &= ~STARFIVE_IE_MASK_AES_DONE;
+	writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET);
+}
+
+static int starfive_aes_gcm_write_adata(struct starfive_cryp_ctx *ctx)
+{
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+	struct starfive_cryp_request_ctx *rctx = ctx->rctx;
+	u32 *buffer;
+	int total_len, loop;
+
+	total_len = ALIGN(cryp->assoclen, AES_BLOCK_SIZE) / sizeof(unsigned int);
+	buffer = (u32 *)rctx->adata;
+
+	for (loop = 0; loop < total_len; loop += 4) {
+		writel(*buffer, cryp->base + STARFIVE_AES_NONCE0);
+		buffer++;
+		writel(*buffer, cryp->base + STARFIVE_AES_NONCE1);
+		buffer++;
+		writel(*buffer, cryp->base + STARFIVE_AES_NONCE2);
+		buffer++;
+		writel(*buffer, cryp->base + STARFIVE_AES_NONCE3);
+		buffer++;
+	}
+
+	if (starfive_aes_wait_gcmdone(cryp))
+		return dev_err_probe(cryp->dev, -ETIMEDOUT,
+				     "Timeout processing gcm aad block");
+
+	return 0;
+}
+
+static int starfive_aes_ccm_write_adata(struct starfive_cryp_ctx *ctx)
+{
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+	struct starfive_cryp_request_ctx *rctx = ctx->rctx;
+	u32 *buffer;
+	u8 *ci;
+	int total_len, loop;
+
+	total_len = cryp->assoclen;
+
+	ci = rctx->adata;
+	writeb(*ci, cryp->base + STARFIVE_AES_AESDIO0R);
+	ci++;
+	writeb(*ci, cryp->base + STARFIVE_AES_AESDIO0R);
+	ci++;
+	total_len -= 2;
+	buffer = (u32 *)ci;
+
+	for (loop = 0; loop < 3; loop++, buffer++)
+		writel(*buffer, cryp->base + STARFIVE_AES_AESDIO0R);
+
+	total_len -= 12;
+
+	while (total_len > 0) {
+		for (loop = 0; loop < AES_BLOCK_32; loop++, buffer++)
+			writel(*buffer, cryp->base + STARFIVE_AES_AESDIO0R);
+
+		total_len -= AES_BLOCK_SIZE;
+	}
+
+	if (starfive_aes_wait_busy(cryp))
+		return dev_err_probe(cryp->dev, -ETIMEDOUT,
+				     "Timeout processing ccm aad block");
+
+	return 0;
+}
+
+static int starfive_aes_prepare_req(struct skcipher_request *req,
+				    struct aead_request *areq)
+{
+	struct starfive_cryp_ctx *ctx;
+	struct starfive_cryp_request_ctx *rctx;
+	struct starfive_cryp_dev *cryp;
+
+	if (!req && !areq)
+		return -EINVAL;
+
+	ctx = req ? crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)) :
+		    crypto_aead_ctx(crypto_aead_reqtfm(areq));
+
+	cryp = ctx->cryp;
+	rctx = req ? skcipher_request_ctx(req) : aead_request_ctx(areq);
+
+	if (req) {
+		cryp->req.sreq = req;
+		cryp->total_in = req->cryptlen;
+		cryp->total_out = req->cryptlen;
+		cryp->assoclen = 0;
+		cryp->authsize = 0;
+	} else {
+		cryp->req.areq = areq;
+		cryp->assoclen = areq->assoclen;
+		cryp->authsize = crypto_aead_authsize(crypto_aead_reqtfm(areq));
+		if (is_encrypt(cryp)) {
+			cryp->total_in = areq->cryptlen;
+			cryp->total_out = areq->cryptlen;
+		} else {
+			cryp->total_in = areq->cryptlen - cryp->authsize;
+			cryp->total_out = cryp->total_in;
+		}
+	}
+
+	rctx->in_sg = req ? req->src : areq->src;
+	scatterwalk_start(&cryp->in_walk, rctx->in_sg);
+
+	rctx->out_sg = req ? req->dst : areq->dst;
+	scatterwalk_start(&cryp->out_walk, rctx->out_sg);
+
+	if (cryp->assoclen) {
+		rctx->adata = kzalloc(ALIGN(cryp->assoclen, AES_BLOCK_SIZE), GFP_KERNEL);
+		if (IS_ERR(rctx->adata))
+			return dev_err_probe(cryp->dev, PTR_ERR(rctx->adata),
+					     "Failed to alloc memory for adata");
+
+		scatterwalk_copychunks(rctx->adata, &cryp->in_walk, cryp->assoclen, 0);
+		scatterwalk_copychunks(NULL, &cryp->out_walk, cryp->assoclen, 2);
+	}
+
+	ctx->rctx = rctx;
+
+	return starfive_aes_hw_init(ctx);
+}
+
+static int starfive_aes_do_one_req(struct crypto_engine *engine, void *areq)
+{
+	struct skcipher_request *req =
+		container_of(areq, struct skcipher_request, base);
+	struct starfive_cryp_ctx *ctx =
+		crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+	u32 block[AES_BLOCK_32];
+	u32 stat;
+	int i;
+
+	/*
+	 * Write first plain/ciphertext block to start the module
+	 * then let irq tasklet handle the rest of the data blocks.
+	 */
+	scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE,
+							    cryp->total_in), 0);
+	cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in);
+
+	for (i = 0; i < AES_BLOCK_32; i++)
+		writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R);
+
+	stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET);
+	stat &= ~STARFIVE_IE_MASK_AES_DONE;
+	writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET);
+
+	return 0;
+}
+
+static int starfive_aes_skcipher_prepare_req(struct crypto_engine *engine,
+					     void *areq)
+{
+	struct skcipher_request *req =
+		container_of(areq, struct skcipher_request, base);
+
+	return starfive_aes_prepare_req(req, NULL);
+}
+
+static int starfive_aes_init_tfm(struct crypto_skcipher *tfm)
+{
+	struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->cryp = starfive_cryp_find_dev(ctx);
+	if (!ctx->cryp)
+		return -ENODEV;
+
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) +
+				    sizeof(struct skcipher_request));
+
+	ctx->enginectx.op.do_one_request = starfive_aes_do_one_req;
+	ctx->enginectx.op.prepare_request = starfive_aes_skcipher_prepare_req;
+	ctx->enginectx.op.unprepare_request = NULL;
+
+	return 0;
+}
+
+static void starfive_aes_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->enginectx.op.do_one_request = NULL;
+	ctx->enginectx.op.prepare_request = NULL;
+	ctx->enginectx.op.unprepare_request = NULL;
+}
+
+static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq)
+{
+	struct aead_request *req =
+		container_of(areq, struct aead_request, base);
+	struct starfive_cryp_ctx *ctx =
+		crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+	struct starfive_cryp_request_ctx *rctx = ctx->rctx;
+	u32 block[AES_BLOCK_32];
+	u32 stat;
+	int i;
+
+	if (!cryp->assoclen)
+		goto write_text;
+
+	if ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CCM)
+		cryp->err = starfive_aes_ccm_write_adata(ctx);
+	else
+		cryp->err = starfive_aes_gcm_write_adata(ctx);
+
+	kfree(rctx->adata);
+
+	if (cryp->err)
+		return cryp->err;
+
+write_text:
+	if (!cryp->total_in)
+		goto finish_req;
+
+	/*
+	 * Write first plain/ciphertext block to start the module
+	 * then let irq tasklet handle the rest of the data blocks.
+	 */
+	scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE,
+							    cryp->total_in), 0);
+	cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in);
+
+	for (i = 0; i < AES_BLOCK_32; i++)
+		writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R);
+
+	stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET);
+	stat &= ~STARFIVE_IE_MASK_AES_DONE;
+	writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET);
+
+	return 0;
+
+finish_req:
+	starfive_aes_finish_req(cryp);
+	return 0;
+}
+
+static int starfive_aes_aead_prepare_req(struct crypto_engine *engine, void *areq)
+{
+	struct aead_request *req =
+		container_of(areq, struct aead_request, base);
+
+	return starfive_aes_prepare_req(NULL, req);
+}
+
+static int starfive_aes_aead_init_tfm(struct crypto_aead *tfm)
+{
+	struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm);
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+	struct crypto_tfm *aead = crypto_aead_tfm(tfm);
+	struct crypto_alg *alg = aead->__crt_alg;
+
+	ctx->cryp = starfive_cryp_find_dev(ctx);
+	if (!ctx->cryp)
+		return -ENODEV;
+
+	if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
+		ctx->aead_fbk = crypto_alloc_aead(alg->cra_name, 0,
+						  CRYPTO_ALG_NEED_FALLBACK);
+		if (IS_ERR(ctx->aead_fbk))
+			return dev_err_probe(cryp->dev, PTR_ERR(ctx->aead_fbk),
+					     "%s() failed to allocate fallback for %s\n",
+					     __func__, alg->cra_name);
+	}
+
+	crypto_aead_set_reqsize(tfm, sizeof(struct starfive_cryp_ctx) +
+				sizeof(struct aead_request));
+
+	ctx->enginectx.op.do_one_request = starfive_aes_aead_do_one_req;
+	ctx->enginectx.op.prepare_request = starfive_aes_aead_prepare_req;
+	ctx->enginectx.op.unprepare_request = NULL;
+
+	return 0;
+}
+
+static void starfive_aes_aead_exit_tfm(struct crypto_aead *tfm)
+{
+	struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm);
+
+	if (ctx->aead_fbk) {
+		crypto_free_aead(ctx->aead_fbk);
+		ctx->aead_fbk = NULL;
+	}
+
+	ctx->enginectx.op.do_one_request = NULL;
+	ctx->enginectx.op.prepare_request = NULL;
+	ctx->enginectx.op.unprepare_request = NULL;
+}
+
+static int starfive_aes_crypt(struct skcipher_request *req, unsigned long flags)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+	unsigned int blocksize_align = crypto_skcipher_blocksize(tfm) - 1;
+
+	cryp->flags = flags;
+
+	if ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_ECB ||
+	    (cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CBC)
+		if (req->cryptlen & blocksize_align)
+			return -EINVAL;
+
+	return crypto_transfer_skcipher_request_to_engine(cryp->engine, req);
+}
+
+static int starfive_aes_aead_crypt(struct aead_request *req, unsigned long flags)
+{
+	struct starfive_cryp_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct starfive_cryp_dev *cryp = ctx->cryp;
+
+	cryp->flags = flags;
+
+	/*
+	 * HW engine could not perform CCM tag verification on
+	 * non-blocksize aligned text, use fallback algo instead
+	 */
+	if (ctx->aead_fbk && !is_encrypt(cryp)) {
+		struct aead_request *subreq = aead_request_ctx(req);
+
+		aead_request_set_tfm(subreq, ctx->aead_fbk);
+		aead_request_set_callback(subreq, req->base.flags,
+					  req->base.complete, req->base.data);
+		aead_request_set_crypt(subreq, req->src,
+				       req->dst, req->cryptlen, req->iv);
+		aead_request_set_ad(subreq, req->assoclen);
+
+		return crypto_aead_decrypt(subreq);
+	}
+
+	return crypto_transfer_aead_request_to_engine(cryp->engine, req);
+}
+
+static int starfive_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int keylen)
+{
+	struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	if (!key || !keylen)
+		return -EINVAL;
+
+	if (keylen != AES_KEYSIZE_128 &&
+	    keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256)
+		return -EINVAL;
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int starfive_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key,
+				    unsigned int keylen)
+{
+	struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm);
+
+	if (!key || !keylen)
+		return -EINVAL;
+
+	if (keylen != AES_KEYSIZE_128 &&
+	    keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256)
+		return -EINVAL;
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	if (ctx->aead_fbk)
+		return crypto_aead_setkey(ctx->aead_fbk, key, keylen);
+
+	return 0;
+}
+
+static int starfive_aes_gcm_setauthsize(struct crypto_aead *tfm,
+					unsigned int authsize)
+{
+	return crypto_gcm_check_authsize(authsize);
+}
+
+static int starfive_aes_ccm_setauthsize(struct crypto_aead *tfm,
+					unsigned int authsize)
+{
+	struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm);
+
+	switch (authsize) {
+	case 4:
+	case 6:
+	case 8:
+	case 10:
+	case 12:
+	case 14:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return crypto_aead_setauthsize(ctx->aead_fbk, authsize);
+}
+
+static int starfive_aes_ecb_encrypt(struct skcipher_request *req)
+{
+	return starfive_aes_crypt(req, STARFIVE_AES_MODE_ECB | FLG_ENCRYPT);
+}
+
+static int starfive_aes_ecb_decrypt(struct skcipher_request *req)
+{
+	return starfive_aes_crypt(req, STARFIVE_AES_MODE_ECB);
+}
+
+static int starfive_aes_cbc_encrypt(struct skcipher_request *req)
+{
+	return starfive_aes_crypt(req, STARFIVE_AES_MODE_CBC | FLG_ENCRYPT);
+}
+
+static int starfive_aes_cbc_decrypt(struct skcipher_request *req)
+{
+	return starfive_aes_crypt(req, STARFIVE_AES_MODE_CBC);
+}
+
+static int starfive_aes_cfb_encrypt(struct skcipher_request *req)
+{
+	return starfive_aes_crypt(req, STARFIVE_AES_MODE_CFB | FLG_ENCRYPT);
+}
+
+static int starfive_aes_cfb_decrypt(struct skcipher_request *req)
+{
+	return starfive_aes_crypt(req, STARFIVE_AES_MODE_CFB);
+}
+
+static int starfive_aes_ofb_encrypt(struct skcipher_request *req)
+{
+	return starfive_aes_crypt(req, STARFIVE_AES_MODE_OFB | FLG_ENCRYPT);
+}
+
+static int starfive_aes_ofb_decrypt(struct skcipher_request *req)
+{
+	return starfive_aes_crypt(req, STARFIVE_AES_MODE_OFB);
+}
+
+static int starfive_aes_ctr_encrypt(struct skcipher_request *req)
+{
+	return starfive_aes_crypt(req, STARFIVE_AES_MODE_CTR | FLG_ENCRYPT);
+}
+
+static int starfive_aes_ctr_decrypt(struct skcipher_request *req)
+{
+	return starfive_aes_crypt(req, STARFIVE_AES_MODE_CTR);
+}
+
+static int starfive_aes_gcm_encrypt(struct aead_request *req)
+{
+	return starfive_aes_aead_crypt(req, STARFIVE_AES_MODE_GCM | FLG_ENCRYPT);
+}
+
+static int starfive_aes_gcm_decrypt(struct aead_request *req)
+{
+	return starfive_aes_aead_crypt(req, STARFIVE_AES_MODE_GCM);
+}
+
+static int starfive_aes_ccm_encrypt(struct aead_request *req)
+{
+	int ret;
+
+	ret = starfive_aes_ccm_check_iv(req->iv);
+	if (ret)
+		return ret;
+
+	return starfive_aes_aead_crypt(req, STARFIVE_AES_MODE_CCM | FLG_ENCRYPT);
+}
+
+static int starfive_aes_ccm_decrypt(struct aead_request *req)
+{
+	int ret;
+
+	ret = starfive_aes_ccm_check_iv(req->iv);
+	if (ret)
+		return ret;
+
+	return starfive_aes_aead_crypt(req, STARFIVE_AES_MODE_CCM);
+}
+
+static struct skcipher_alg skcipher_algs[] = {
+{
+	.init				= starfive_aes_init_tfm,
+	.exit				= starfive_aes_exit_tfm,
+	.setkey				= starfive_aes_setkey,
+	.encrypt			= starfive_aes_ecb_encrypt,
+	.decrypt			= starfive_aes_ecb_decrypt,
+	.min_keysize			= AES_MIN_KEY_SIZE,
+	.max_keysize			= AES_MAX_KEY_SIZE,
+	.base = {
+		.cra_name		= "ecb(aes)",
+		.cra_driver_name	= "starfive-ecb-aes",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct starfive_cryp_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+}, {
+	.init				= starfive_aes_init_tfm,
+	.exit				= starfive_aes_exit_tfm,
+	.setkey				= starfive_aes_setkey,
+	.encrypt			= starfive_aes_cbc_encrypt,
+	.decrypt			= starfive_aes_cbc_decrypt,
+	.min_keysize			= AES_MIN_KEY_SIZE,
+	.max_keysize			= AES_MAX_KEY_SIZE,
+	.ivsize				= AES_BLOCK_SIZE,
+	.base = {
+		.cra_name		= "cbc(aes)",
+		.cra_driver_name	= "starfive-cbc-aes",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct starfive_cryp_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+}, {
+	.init				= starfive_aes_init_tfm,
+	.exit				= starfive_aes_exit_tfm,
+	.setkey				= starfive_aes_setkey,
+	.encrypt			= starfive_aes_ctr_encrypt,
+	.decrypt			= starfive_aes_ctr_decrypt,
+	.min_keysize			= AES_MIN_KEY_SIZE,
+	.max_keysize			= AES_MAX_KEY_SIZE,
+	.ivsize				= AES_BLOCK_SIZE,
+	.base = {
+		.cra_name		= "ctr(aes)",
+		.cra_driver_name	= "starfive-ctr-aes",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct starfive_cryp_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+}, {
+	.init				= starfive_aes_init_tfm,
+	.exit				= starfive_aes_exit_tfm,
+	.setkey				= starfive_aes_setkey,
+	.encrypt			= starfive_aes_cfb_encrypt,
+	.decrypt			= starfive_aes_cfb_decrypt,
+	.min_keysize			= AES_MIN_KEY_SIZE,
+	.max_keysize			= AES_MAX_KEY_SIZE,
+	.ivsize				= AES_BLOCK_SIZE,
+	.base = {
+		.cra_name		= "cfb(aes)",
+		.cra_driver_name	= "starfive-cfb-aes",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct starfive_cryp_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+}, {
+	.init				= starfive_aes_init_tfm,
+	.exit				= starfive_aes_exit_tfm,
+	.setkey				= starfive_aes_setkey,
+	.encrypt			= starfive_aes_ofb_encrypt,
+	.decrypt			= starfive_aes_ofb_decrypt,
+	.min_keysize			= AES_MIN_KEY_SIZE,
+	.max_keysize			= AES_MAX_KEY_SIZE,
+	.ivsize				= AES_BLOCK_SIZE,
+	.base = {
+		.cra_name		= "ofb(aes)",
+		.cra_driver_name	= "starfive-ofb-aes",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct starfive_cryp_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+},
+};
+
+static struct aead_alg aead_algs[] = {
+{
+	.setkey                         = starfive_aes_aead_setkey,
+	.setauthsize                    = starfive_aes_gcm_setauthsize,
+	.encrypt                        = starfive_aes_gcm_encrypt,
+	.decrypt                        = starfive_aes_gcm_decrypt,
+	.init                           = starfive_aes_aead_init_tfm,
+	.exit                           = starfive_aes_aead_exit_tfm,
+	.ivsize                         = GCM_AES_IV_SIZE,
+	.maxauthsize                    = AES_BLOCK_SIZE,
+	.base = {
+		.cra_name               = "gcm(aes)",
+		.cra_driver_name        = "starfive-gcm-aes",
+		.cra_priority           = 200,
+		.cra_flags              = CRYPTO_ALG_ASYNC,
+		.cra_blocksize          = 1,
+		.cra_ctxsize            = sizeof(struct starfive_cryp_ctx),
+		.cra_alignmask          = 0xf,
+		.cra_module             = THIS_MODULE,
+	},
+}, {
+	.setkey		                = starfive_aes_aead_setkey,
+	.setauthsize	                = starfive_aes_ccm_setauthsize,
+	.encrypt	                = starfive_aes_ccm_encrypt,
+	.decrypt	                = starfive_aes_ccm_decrypt,
+	.init		                = starfive_aes_aead_init_tfm,
+	.exit		                = starfive_aes_aead_exit_tfm,
+	.ivsize		                = AES_BLOCK_SIZE,
+	.maxauthsize	                = AES_BLOCK_SIZE,
+	.base = {
+		.cra_name		= "ccm(aes)",
+		.cra_driver_name	= "starfive-ccm-aes",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct starfive_cryp_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+},
+};
+
+int starfive_aes_register_algs(void)
+{
+	int ret;
+
+	ret = crypto_register_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
+	if (ret)
+		return ret;
+
+	ret = crypto_register_aeads(aead_algs, ARRAY_SIZE(aead_algs));
+	if (ret)
+		crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
+
+	return ret;
+}
+
+void starfive_aes_unregister_algs(void)
+{
+	crypto_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs));
+	crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
+}
diff --git a/drivers/crypto/starfive/jh7110-cryp.c b/drivers/crypto/starfive/jh7110-cryp.c
index e573c097a4a0..ab37010ceb88 100644
--- a/drivers/crypto/starfive/jh7110-cryp.c
+++ b/drivers/crypto/starfive/jh7110-cryp.c
@@ -51,6 +51,13 @@ struct starfive_cryp_dev *starfive_cryp_find_dev(struct starfive_cryp_ctx *ctx)
 	return cryp;
 }
 
+static u16 side_chan;
+module_param(side_chan, ushort, 0);
+MODULE_PARM_DESC(side_chan, "Enable side channel mitigation for AES module.\n"
+			    "Enabling this feature will reduce speed performance.\n"
+			    " 0 - Disabled\n"
+			    " other - Enabled");
+
 static int starfive_dma_init(struct starfive_cryp_dev *cryp)
 {
 	dma_cap_mask_t mask;
@@ -82,20 +89,26 @@ static void starfive_dma_cleanup(struct starfive_cryp_dev *cryp)
 static irqreturn_t starfive_cryp_irq(int irq, void *priv)
 {
 	u32 status;
+	u32 mask;
 	struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)priv;
 
+	mask = readl(cryp->base + STARFIVE_IE_MASK_OFFSET);
 	status = readl(cryp->base + STARFIVE_IE_FLAG_OFFSET);
+	if (status & STARFIVE_IE_FLAG_AES_DONE) {
+		mask |= STARFIVE_IE_MASK_AES_DONE;
+		writel(mask, cryp->base + STARFIVE_IE_MASK_OFFSET);
+		tasklet_schedule(&cryp->aes_done);
+	}
+
 	if (status & STARFIVE_IE_FLAG_HASH_DONE) {
-		status = readl(cryp->base + STARFIVE_IE_MASK_OFFSET);
-		status |= STARFIVE_IE_MASK_HASH_DONE;
-		writel(status, cryp->base + STARFIVE_IE_MASK_OFFSET);
+		mask |= STARFIVE_IE_MASK_HASH_DONE;
+		writel(mask, cryp->base + STARFIVE_IE_MASK_OFFSET);
 		tasklet_schedule(&cryp->hash_done);
 	}
 
 	if (status & STARFIVE_IE_FLAG_PKA_DONE) {
-		status = readl(cryp->base + STARFIVE_IE_MASK_OFFSET);
-		status |= STARFIVE_IE_MASK_PKA_DONE;
-		writel(status, cryp->base + STARFIVE_IE_MASK_OFFSET);
+		mask |= STARFIVE_IE_MASK_PKA_DONE;
+		writel(mask, cryp->base + STARFIVE_IE_MASK_OFFSET);
 		complete(&cryp->pka_done);
 	}
 
@@ -121,10 +134,12 @@ static int starfive_cryp_probe(struct platform_device *pdev)
 		return dev_err_probe(&pdev->dev, PTR_ERR(cryp->base),
 				     "Error remapping memory for platform device\n");
 
+	tasklet_init(&cryp->aes_done, starfive_aes_done_task, (unsigned long)cryp);
 	tasklet_init(&cryp->hash_done, starfive_hash_done_task, (unsigned long)cryp);
 
 	cryp->phys_base = res->start;
 	cryp->dma_maxburst = 32;
+	cryp->side_chan = side_chan;
 
 	cryp->hclk = devm_clk_get(&pdev->dev, "hclk");
 	if (IS_ERR(cryp->hclk))
@@ -180,6 +195,10 @@ static int starfive_cryp_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_engine_start;
 
+	ret = starfive_aes_register_algs();
+	if (ret)
+		goto err_algs_aes;
+
 	ret = starfive_hash_register_algs();
 	if (ret)
 		goto err_algs_hash;
@@ -193,6 +212,8 @@ static int starfive_cryp_probe(struct platform_device *pdev)
 err_algs_rsa:
 	starfive_hash_unregister_algs();
 err_algs_hash:
+	starfive_aes_unregister_algs();
+err_algs_aes:
 	crypto_engine_stop(cryp->engine);
 err_engine_start:
 	crypto_engine_exit(cryp->engine);
@@ -207,6 +228,7 @@ err_dma_init:
 	clk_disable_unprepare(cryp->ahb);
 	reset_control_assert(cryp->rst);
 
+	tasklet_kill(&cryp->aes_done);
 	tasklet_kill(&cryp->hash_done);
 err_probe_defer:
 	return ret;
@@ -216,9 +238,11 @@ static void starfive_cryp_remove(struct platform_device *pdev)
 {
 	struct starfive_cryp_dev *cryp = platform_get_drvdata(pdev);
 
+	starfive_aes_unregister_algs();
 	starfive_hash_unregister_algs();
 	starfive_rsa_unregister_algs();
 
+	tasklet_kill(&cryp->aes_done);
 	tasklet_kill(&cryp->hash_done);
 
 	crypto_engine_stop(cryp->engine);
diff --git a/drivers/crypto/starfive/jh7110-cryp.h b/drivers/crypto/starfive/jh7110-cryp.h
index 0cdcffc0d7d4..b6d809e8fe45 100644
--- a/drivers/crypto/starfive/jh7110-cryp.h
+++ b/drivers/crypto/starfive/jh7110-cryp.h
@@ -5,7 +5,9 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
+#include <linux/interrupt.h>
 
+#include <crypto/aes.h>
 #include <crypto/engine.h>
 #include <crypto/sha2.h>
 #include <crypto/sm3.h>
@@ -17,13 +19,56 @@
 #define STARFIVE_DMA_IN_LEN_OFFSET		0x10
 #define STARFIVE_DMA_OUT_LEN_OFFSET		0x14
 
+#define STARFIVE_IE_MASK_AES_DONE		0x1
 #define STARFIVE_IE_MASK_HASH_DONE		0x4
 #define STARFIVE_IE_MASK_PKA_DONE		0x8
+#define STARFIVE_IE_FLAG_AES_DONE		0x1
 #define STARFIVE_IE_FLAG_HASH_DONE		0x4
 #define STARFIVE_IE_FLAG_PKA_DONE		0x8
 
 #define STARFIVE_MSG_BUFFER_SIZE		SZ_16K
 #define MAX_KEY_SIZE				SHA512_BLOCK_SIZE
+#define STARFIVE_AES_IV_LEN			AES_BLOCK_SIZE
+#define STARFIVE_AES_CTR_LEN			AES_BLOCK_SIZE
+
+union starfive_aes_csr {
+	u32 v;
+	struct {
+		u32 cmode			:1;
+#define STARFIVE_AES_KEYMODE_128		0x0
+#define STARFIVE_AES_KEYMODE_192		0x1
+#define STARFIVE_AES_KEYMODE_256		0x2
+		u32 keymode			:2;
+#define STARFIVE_AES_BUSY			BIT(3)
+		u32 busy			:1;
+		u32 done			:1;
+#define STARFIVE_AES_KEY_DONE			BIT(5)
+		u32 krdy			:1;
+		u32 aesrst			:1;
+		u32 ie				:1;
+#define STARFIVE_AES_CCM_START			BIT(8)
+		u32 ccm_start			:1;
+#define STARFIVE_AES_MODE_ECB			0x0
+#define STARFIVE_AES_MODE_CBC			0x1
+#define STARFIVE_AES_MODE_CFB			0x2
+#define STARFIVE_AES_MODE_OFB			0x3
+#define STARFIVE_AES_MODE_CTR			0x4
+#define STARFIVE_AES_MODE_CCM			0x5
+#define STARFIVE_AES_MODE_GCM			0x6
+		u32 mode			:3;
+#define STARFIVE_AES_GCM_START			BIT(12)
+		u32 gcm_start			:1;
+#define STARFIVE_AES_GCM_DONE			BIT(13)
+		u32 gcm_done			:1;
+		u32 delay_aes			:1;
+		u32 vaes_start			:1;
+		u32 rsvd_0			:8;
+#define STARFIVE_AES_MODE_XFB_1			0x0
+#define STARFIVE_AES_MODE_XFB_128		0x5
+		u32 stmode			:3;
+		u32 rsvd_1			:5;
+	};
+};
 
 union starfive_hash_csr {
 	u32 v;
@@ -116,6 +161,7 @@ struct starfive_cryp_ctx {
 	struct starfive_rsa_key			rsa_key;
 	struct crypto_akcipher			*akcipher_fbk;
 	struct crypto_ahash			*ahash_fbk;
+	struct crypto_aead			*aead_fbk;
 };
 
 struct starfive_cryp_dev {
@@ -133,13 +179,26 @@ struct starfive_cryp_dev {
 	struct dma_chan				*rx;
 	struct dma_slave_config			cfg_in;
 	struct dma_slave_config			cfg_out;
+	struct scatter_walk			in_walk;
+	struct scatter_walk			out_walk;
 	struct crypto_engine			*engine;
+	struct tasklet_struct			aes_done;
 	struct tasklet_struct			hash_done;
 	struct completion			pka_done;
+	size_t					assoclen;
+	size_t					total_in;
+	size_t					total_out;
+	u32					tag_in[4];
+	u32					tag_out[4];
+	unsigned int				authsize;
+	unsigned long				flags;
 	int					err;
+	bool					side_chan;
 	union starfive_alg_cr			alg_cr;
 	union {
 		struct ahash_request		*hreq;
+		struct aead_request		*areq;
+		struct skcipher_request		*sreq;
 	} req;
 };
 
@@ -147,6 +206,7 @@ struct starfive_cryp_request_ctx {
 	union {
 		union starfive_hash_csr		hash;
 		union starfive_pka_cacr		pka;
+		union starfive_aes_csr		aes;
 	} csr;
 
 	struct scatterlist			*in_sg;
@@ -157,6 +217,7 @@ struct starfive_cryp_request_ctx {
 	unsigned int				blksize;
 	unsigned int				digsize;
 	unsigned long				in_sg_len;
+	unsigned char				*adata;
 	u8 rsa_data[] __aligned(sizeof(u32));
 };
 
@@ -168,5 +229,9 @@ void starfive_hash_unregister_algs(void);
 int starfive_rsa_register_algs(void);
 void starfive_rsa_unregister_algs(void);
 
+int starfive_aes_register_algs(void);
+void starfive_aes_unregister_algs(void);
+
 void starfive_hash_done_task(unsigned long param);
+void starfive_aes_done_task(unsigned long param);
 #endif

From 9f3fa6bc4ff8515da1349c44a77e7327bd2f4788 Mon Sep 17 00:00:00 2001
From: Mahmoud Adam <mngyadam@amazon.com>
Date: Mon, 17 Jul 2023 12:55:09 +0000
Subject: [PATCH 0626/2047] KEYS: use kfree_sensitive with key

key might contain private part of the key, so better use
kfree_sensitive to free it

Signed-off-by: Mahmoud Adam <mngyadam@amazon.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/asymmetric_keys/public_key.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 773e159dbbcb..abeecb8329b3 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -42,7 +42,7 @@ static void public_key_describe(const struct key *asymmetric_key,
 void public_key_free(struct public_key *key)
 {
 	if (key) {
-		kfree(key->key);
+		kfree_sensitive(key->key);
 		kfree(key->params);
 		kfree(key);
 	}
@@ -263,7 +263,7 @@ error_free_tfm:
 	else
 		crypto_free_akcipher(tfm);
 error_free_key:
-	kfree(key);
+	kfree_sensitive(key);
 	pr_devel("<==%s() = %d\n", __func__, ret);
 	return ret;
 }
@@ -369,7 +369,7 @@ error_free_tfm:
 	else
 		crypto_free_akcipher(tfm);
 error_free_key:
-	kfree(key);
+	kfree_sensitive(key);
 	pr_devel("<==%s() = %d\n", __func__, ret);
 	return ret;
 }
@@ -441,7 +441,7 @@ int public_key_verify_signature(const struct public_key *pkey,
 				sig->digest, sig->digest_size);
 
 error_free_key:
-	kfree(key);
+	kfree_sensitive(key);
 error_free_tfm:
 	crypto_free_sig(tfm);
 	pr_devel("<==%s() = %d\n", __func__, ret);

From 80e40fea8e2a7528b079ac1853da528c5cabf625 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Wed, 19 Jul 2023 09:18:03 +0200
Subject: [PATCH 0627/2047] hwrng: cctrng - don't open code init and exit
 functions

Do not open code the init and exit functions of the cctrng driver.

If we move the BUILD_BUG_ON checks into the probe function, we can use
module_platform_driver and make the code shorter.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/cctrng.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/char/hw_random/cctrng.c b/drivers/char/hw_random/cctrng.c
index 302ffa354c2f..241da7e44675 100644
--- a/drivers/char/hw_random/cctrng.c
+++ b/drivers/char/hw_random/cctrng.c
@@ -492,6 +492,10 @@ static int cctrng_probe(struct platform_device *pdev)
 	u32 val;
 	int irq;
 
+	/* Compile time assertion checks */
+	BUILD_BUG_ON(CCTRNG_DATA_BUF_WORDS < 6);
+	BUILD_BUG_ON((CCTRNG_DATA_BUF_WORDS & (CCTRNG_DATA_BUF_WORDS-1)) != 0);
+
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
@@ -698,21 +702,7 @@ static struct platform_driver cctrng_driver = {
 	.remove = cctrng_remove,
 };
 
-static int __init cctrng_mod_init(void)
-{
-	/* Compile time assertion checks */
-	BUILD_BUG_ON(CCTRNG_DATA_BUF_WORDS < 6);
-	BUILD_BUG_ON((CCTRNG_DATA_BUF_WORDS & (CCTRNG_DATA_BUF_WORDS-1)) != 0);
-
-	return platform_driver_register(&cctrng_driver);
-}
-module_init(cctrng_mod_init);
-
-static void __exit cctrng_mod_exit(void)
-{
-	platform_driver_unregister(&cctrng_driver);
-}
-module_exit(cctrng_mod_exit);
+module_platform_driver(cctrng_driver);
 
 /* Module description */
 MODULE_DESCRIPTION("ARM CryptoCell TRNG Driver");

From 8bcd9689384810bd1c610419563c827e23118e34 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Wed, 19 Jul 2023 09:18:04 +0200
Subject: [PATCH 0628/2047] hwrng: cctrng - let devres enable the clock

Call devm_clk_get_optional_enabled in the cctrng driver. We don't have to
disable and unprepare the clock any more in error paths or in the remove
function.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/cctrng.c | 29 +++++------------------------
 1 file changed, 5 insertions(+), 24 deletions(-)

diff --git a/drivers/char/hw_random/cctrng.c b/drivers/char/hw_random/cctrng.c
index 241da7e44675..ac4a2fbc6a0f 100644
--- a/drivers/char/hw_random/cctrng.c
+++ b/drivers/char/hw_random/cctrng.c
@@ -460,30 +460,16 @@ static int cc_trng_clk_init(struct cctrng_drvdata *drvdata)
 {
 	struct clk *clk;
 	struct device *dev = &(drvdata->pdev->dev);
-	int rc = 0;
 
-	clk = devm_clk_get_optional(dev, NULL);
+	clk = devm_clk_get_optional_enabled(dev, NULL);
 	if (IS_ERR(clk))
 		return dev_err_probe(dev, PTR_ERR(clk),
-				     "Error getting clock\n");
+				     "Failed to get or enable the clock\n");
 
 	drvdata->clk = clk;
-
-	rc = clk_prepare_enable(drvdata->clk);
-	if (rc) {
-		dev_err(dev, "Failed to enable clock\n");
-		return rc;
-	}
-
 	return 0;
 }
 
-static void cc_trng_clk_fini(struct cctrng_drvdata *drvdata)
-{
-	clk_disable_unprepare(drvdata->clk);
-}
-
-
 static int cctrng_probe(struct platform_device *pdev)
 {
 	struct cctrng_drvdata *drvdata;
@@ -545,7 +531,7 @@ static int cctrng_probe(struct platform_device *pdev)
 	rc = devm_request_irq(dev, irq, cc_isr, IRQF_SHARED, "cctrng", drvdata);
 	if (rc) {
 		dev_err(dev, "Could not register to interrupt %d\n", irq);
-		goto post_clk_err;
+		return rc;
 	}
 	dev_dbg(dev, "Registered to IRQ: %d\n", irq);
 
@@ -563,14 +549,14 @@ static int cctrng_probe(struct platform_device *pdev)
 	rc = cc_trng_pm_init(drvdata);
 	if (rc) {
 		dev_err(dev, "cc_trng_pm_init failed\n");
-		goto post_clk_err;
+		return rc;
 	}
 
 	/* increment device's usage counter */
 	rc = cc_trng_pm_get(dev);
 	if (rc) {
 		dev_err(dev, "cc_trng_pm_get returned %x\n", rc);
-		goto post_pm_err;
+		return rc;
 	}
 
 	/* set pending_hw to verify that HW won't be triggered from read */
@@ -597,9 +583,6 @@ static int cctrng_probe(struct platform_device *pdev)
 post_pm_err:
 	cc_trng_pm_fini(drvdata);
 
-post_clk_err:
-	cc_trng_clk_fini(drvdata);
-
 	return rc;
 }
 
@@ -612,8 +595,6 @@ static int cctrng_remove(struct platform_device *pdev)
 
 	cc_trng_pm_fini(drvdata);
 
-	cc_trng_clk_fini(drvdata);
-
 	dev_info(dev, "ARM cctrng device terminated\n");
 
 	return 0;

From 80a34c037713b183032fad578cf5cb88b94298a8 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Wed, 19 Jul 2023 09:18:05 +0200
Subject: [PATCH 0629/2047] hwrng: cctrng - merge cc_trng_clk_init into its
 only caller

cc_trng_clk_init is called only from the probe function. Merge the two
functions, this saves some lines of code.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/cctrng.c | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/drivers/char/hw_random/cctrng.c b/drivers/char/hw_random/cctrng.c
index ac4a2fbc6a0f..2a1887f6483f 100644
--- a/drivers/char/hw_random/cctrng.c
+++ b/drivers/char/hw_random/cctrng.c
@@ -455,21 +455,6 @@ static void cc_trng_startwork_handler(struct work_struct *w)
 	cc_trng_hw_trigger(drvdata);
 }
 
-
-static int cc_trng_clk_init(struct cctrng_drvdata *drvdata)
-{
-	struct clk *clk;
-	struct device *dev = &(drvdata->pdev->dev);
-
-	clk = devm_clk_get_optional_enabled(dev, NULL);
-	if (IS_ERR(clk))
-		return dev_err_probe(dev, PTR_ERR(clk),
-				     "Failed to get or enable the clock\n");
-
-	drvdata->clk = clk;
-	return 0;
-}
-
 static int cctrng_probe(struct platform_device *pdev)
 {
 	struct cctrng_drvdata *drvdata;
@@ -517,11 +502,10 @@ static int cctrng_probe(struct platform_device *pdev)
 		return rc;
 	}
 
-	rc = cc_trng_clk_init(drvdata);
-	if (rc) {
-		dev_err(dev, "cc_trng_clk_init failed\n");
-		return rc;
-	}
+	drvdata->clk = devm_clk_get_optional_enabled(dev, NULL);
+	if (IS_ERR(drvdata->clk))
+		return dev_err_probe(dev, PTR_ERR(drvdata->clk),
+				     "Failed to get or enable the clock\n");
 
 	INIT_WORK(&drvdata->compwork, cc_trng_compwork_handler);
 	INIT_WORK(&drvdata->startwork, cc_trng_startwork_handler);

From 9553ae3497f0753b79efa54b1719a7117e10bdf4 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Wed, 19 Jul 2023 09:18:06 +0200
Subject: [PATCH 0630/2047] hwrng: cctrng - use dev_err_probe in error paths

Use dev_err_probe in error paths to make the code a bit shorter.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/cctrng.c | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/drivers/char/hw_random/cctrng.c b/drivers/char/hw_random/cctrng.c
index 2a1887f6483f..1abbff04a015 100644
--- a/drivers/char/hw_random/cctrng.c
+++ b/drivers/char/hw_random/cctrng.c
@@ -485,10 +485,8 @@ static int cctrng_probe(struct platform_device *pdev)
 	drvdata->circ.buf = (char *)drvdata->data_buf;
 
 	drvdata->cc_base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(drvdata->cc_base)) {
-		dev_err(dev, "Failed to ioremap registers");
-		return PTR_ERR(drvdata->cc_base);
-	}
+	if (IS_ERR(drvdata->cc_base))
+		return dev_err_probe(dev, PTR_ERR(drvdata->cc_base), "Failed to ioremap registers");
 
 	/* Then IRQ */
 	irq = platform_get_irq(pdev, 0);
@@ -497,10 +495,8 @@ static int cctrng_probe(struct platform_device *pdev)
 
 	/* parse sampling rate from device tree */
 	rc = cc_trng_parse_sampling_ratio(drvdata);
-	if (rc) {
-		dev_err(dev, "Failed to get legal sampling ratio for rosc\n");
-		return rc;
-	}
+	if (rc)
+		return dev_err_probe(dev, rc, "Failed to get legal sampling ratio for rosc\n");
 
 	drvdata->clk = devm_clk_get_optional_enabled(dev, NULL);
 	if (IS_ERR(drvdata->clk))
@@ -513,10 +509,8 @@ static int cctrng_probe(struct platform_device *pdev)
 
 	/* register the driver isr function */
 	rc = devm_request_irq(dev, irq, cc_isr, IRQF_SHARED, "cctrng", drvdata);
-	if (rc) {
-		dev_err(dev, "Could not register to interrupt %d\n", irq);
-		return rc;
-	}
+	if (rc)
+		return dev_err_probe(dev, rc, "Could not register to interrupt %d\n", irq);
 	dev_dbg(dev, "Registered to IRQ: %d\n", irq);
 
 	/* Clear all pending interrupts */
@@ -531,17 +525,13 @@ static int cctrng_probe(struct platform_device *pdev)
 
 	/* init PM */
 	rc = cc_trng_pm_init(drvdata);
-	if (rc) {
-		dev_err(dev, "cc_trng_pm_init failed\n");
-		return rc;
-	}
+	if (rc)
+		return dev_err_probe(dev, rc, "cc_trng_pm_init failed\n");
 
 	/* increment device's usage counter */
 	rc = cc_trng_pm_get(dev);
-	if (rc) {
-		dev_err(dev, "cc_trng_pm_get returned %x\n", rc);
-		return rc;
-	}
+	if (rc)
+		return dev_err_probe(dev, rc, "cc_trng_pm_get returned %x\n", rc);
 
 	/* set pending_hw to verify that HW won't be triggered from read */
 	atomic_set(&drvdata->pending_hw, 1);

From 5a3d66acf072ae78dfa06af712a42b24bef32c0f Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Fri, 21 Jul 2023 10:54:43 +0200
Subject: [PATCH 0631/2047] hwrng: cn10k - delete empty remove function

The remove function is empty, we can delete it. It's ok for a PCI driver
to have no remove function.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/cn10k-rng.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/char/hw_random/cn10k-rng.c b/drivers/char/hw_random/cn10k-rng.c
index 0cd7e1a8e499..794ec77feb2c 100644
--- a/drivers/char/hw_random/cn10k-rng.c
+++ b/drivers/char/hw_random/cn10k-rng.c
@@ -213,11 +213,6 @@ static int cn10k_rng_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;
 }
 
-static void cn10k_rng_remove(struct pci_dev *pdev)
-{
-	/* Nothing to do */
-}
-
 static const struct pci_device_id cn10k_rng_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xA098) }, /* RNG PF */
 	{0,},
@@ -229,7 +224,6 @@ static struct pci_driver cn10k_rng_driver = {
 	.name		= "cn10k_rng",
 	.id_table	= cn10k_rng_id_table,
 	.probe		= cn10k_rng_probe,
-	.remove		= cn10k_rng_remove,
 };
 
 module_pci_driver(cn10k_rng_driver);

From 81511798bdfb1a61f97569436789721aace942fb Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Fri, 21 Jul 2023 10:54:44 +0200
Subject: [PATCH 0632/2047] hwrng: cn10k - use dev_err_probe

Use dev_err_probe in error paths of the probe function, making the code a
tiny bit simpler.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/cn10k-rng.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/char/hw_random/cn10k-rng.c b/drivers/char/hw_random/cn10k-rng.c
index 794ec77feb2c..31935316a160 100644
--- a/drivers/char/hw_random/cn10k-rng.c
+++ b/drivers/char/hw_random/cn10k-rng.c
@@ -187,10 +187,8 @@ static int cn10k_rng_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_set_drvdata(pdev, rng);
 
 	rng->reg_base = pcim_iomap(pdev, 0, 0);
-	if (!rng->reg_base) {
-		dev_err(&pdev->dev, "Error while mapping CSRs, exiting\n");
-		return -ENOMEM;
-	}
+	if (!rng->reg_base)
+		return dev_err_probe(&pdev->dev, -ENOMEM, "Error while mapping CSRs, exiting\n");
 
 	rng->ops.name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
 				       "cn10k-rng-%s", dev_name(&pdev->dev));
@@ -205,10 +203,8 @@ static int cn10k_rng_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	reset_rng_health_state(rng);
 
 	err = devm_hwrng_register(&pdev->dev, &rng->ops);
-	if (err) {
-		dev_err(&pdev->dev, "Could not register hwrng device.\n");
-		return err;
-	}
+	if (err)
+		return dev_err_probe(&pdev->dev, err, "Could not register hwrng device.\n");
 
 	return 0;
 }

From 6df04505f3b285f3bcfe81cf0461339b0f4ed41d Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 22 Jul 2023 16:53:58 +0200
Subject: [PATCH 0633/2047] crypto: caam - Use struct_size()

Use struct_size() instead of hand-writing it, when allocating a structure
with a flex array.

This is less verbose, more robust and more informative.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Gaurav Jain <gaurav.jain@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamhash.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 80deb003f0a5..9e5924e24f2e 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -708,9 +708,8 @@ static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req,
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 	struct ahash_edesc *edesc;
-	unsigned int sg_size = sg_num * sizeof(struct sec4_sg_entry);
 
-	edesc = kzalloc(sizeof(*edesc) + sg_size, flags);
+	edesc = kzalloc(struct_size(edesc, sec4_sg, sg_num), flags);
 	if (!edesc) {
 		dev_err(ctx->jrdev, "could not allocate extended descriptor\n");
 		return NULL;

From 3de0152bf26ff0c5083ef831ba7676fc4c92e63a Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 22 Jul 2023 16:53:59 +0200
Subject: [PATCH 0634/2047] crypto: caam - Remove messages related to memory
 allocation failure

On memory allocation failure, the function calling stack is already logged.
So there is no need to explicitly log an extra message.

Remove them, ans simplify some code accordingly.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Gaurav Jain <gaurav.jain@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamhash.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 9e5924e24f2e..641c3afd59ca 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -368,10 +368,8 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key,
 	int ret;
 
 	desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL);
-	if (!desc) {
-		dev_err(jrdev, "unable to allocate key input memory\n");
+	if (!desc)
 		return -ENOMEM;
-	}
 
 	init_job_desc(desc, 0);
 
@@ -702,18 +700,14 @@ static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req,
 					     int sg_num, u32 *sh_desc,
 					     dma_addr_t sh_desc_dma)
 {
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 	struct ahash_edesc *edesc;
 
 	edesc = kzalloc(struct_size(edesc, sec4_sg, sg_num), flags);
-	if (!edesc) {
-		dev_err(ctx->jrdev, "could not allocate extended descriptor\n");
+	if (!edesc)
 		return NULL;
-	}
 
 	state->edesc = edesc;
 
@@ -1908,10 +1902,8 @@ caam_hash_alloc(struct caam_hash_template *template,
 	struct crypto_alg *alg;
 
 	t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL);
-	if (!t_alg) {
-		pr_err("failed to allocate t_alg\n");
+	if (!t_alg)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	t_alg->ahash_alg = template->template_ahash;
 	halg = &t_alg->ahash_alg;

From 00df90934c9effc52b60749788b0eeb131d1bfd7 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 7 Jul 2023 17:38:11 +0100
Subject: [PATCH 0635/2047] perf/arm-cmn: Remove spurious event aliases

As the name suggests, the "partial DAT flit" event is only counted for
the DAT channel, and furthermore is only applicable to device ports, not
mesh links (strictly it's only device ports with CHI-A requesters
connected, but detecting that degree of detail is more bother than it's
worth). Stop generating spurious event aliases for other combinations
which aren't meaningful.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/b01a58e3ff05c322547fbfd015f6dbfedf555ed3.1688746690.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cmn.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index b8c15878bc86..a007648dbf1c 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -791,16 +791,22 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 	_CMN_EVENT_HNF(_model, _name##_read, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \
 	_CMN_EVENT_HNF(_model, _name##_write, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL)
 
-#define _CMN_EVENT_XP(_name, _event)				\
+#define _CMN_EVENT_XP_MESH(_name, _event)			\
 	__CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)),		\
 	__CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)),		\
 	__CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)),		\
-	__CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)),		\
+	__CMN_EVENT_XP(s_##_name, (_event) | (3 << 2))
+
+#define _CMN_EVENT_XP_PORT(_name, _event)			\
 	__CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)),	\
 	__CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)),	\
 	__CMN_EVENT_XP(p2_##_name, (_event) | (6 << 2)),	\
 	__CMN_EVENT_XP(p3_##_name, (_event) | (7 << 2))
 
+#define _CMN_EVENT_XP(_name, _event)				\
+	_CMN_EVENT_XP_MESH(_name, _event),			\
+	_CMN_EVENT_XP_PORT(_name, _event)
+
 /* Good thing there are only 3 fundamental XP events... */
 #define CMN_EVENT_XP(_name, _event)				\
 	_CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)),	\
@@ -813,6 +819,10 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 	_CMN_EVENT_XP(snp2_##_name, (_event) | (7 << 5)),	\
 	_CMN_EVENT_XP(req2_##_name, (_event) | (8 << 5))
 
+#define CMN_EVENT_XP_DAT(_name, _event)				\
+	_CMN_EVENT_XP_PORT(dat_##_name, (_event) | (3 << 5)),	\
+	_CMN_EVENT_XP_PORT(dat2_##_name, (_event) | (6 << 5))
+
 
 static struct attribute *arm_cmn_event_attrs[] = {
 	CMN_EVENT_DTC(cycles),
@@ -943,7 +953,7 @@ static struct attribute *arm_cmn_event_attrs[] = {
 
 	CMN_EVENT_XP(txflit_valid,			0x01),
 	CMN_EVENT_XP(txflit_stall,			0x02),
-	CMN_EVENT_XP(partial_dat_flit,			0x03),
+	CMN_EVENT_XP_DAT(partial_dat_flit,		0x03),
 	/* We treat watchpoints as a special made-up class of XP events */
 	CMN_EVENT_ATTR(CMN_ANY, watchpoint_up, CMN_TYPE_WP, CMN_WP_UP),
 	CMN_EVENT_ATTR(CMN_ANY, watchpoint_down, CMN_TYPE_WP, CMN_WP_DOWN),

From b1b7dc38e4827bf842ed763a4dfb4c0d72259ad5 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 7 Jul 2023 17:38:12 +0100
Subject: [PATCH 0636/2047] perf/arm-cmn: Refactor HN-F event selector macros

Refactor the macros for defining HN-F events with additional selectors,
so they can be shared with another upcoming similar-but-distinct HN
type. No functional change intended.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/0f05327941e06c665dbfd47e03fad29276b9e63c.1688746690.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cmn.c | 49 +++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index a007648dbf1c..1efe9b72c0e6 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -742,8 +742,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 	_CMN_EVENT_ATTR(_model, dn_##_name, CMN_TYPE_DVM, _event, _occup, _fsel)
 #define CMN_EVENT_DTC(_name)					\
 	CMN_EVENT_ATTR(CMN_ANY, dtc_##_name, CMN_TYPE_DTC, 0)
-#define _CMN_EVENT_HNF(_model, _name, _event, _occup, _fsel)		\
-	_CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event, _occup, _fsel)
+#define CMN_EVENT_HNF(_model, _name, _event)			\
+	CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event)
 #define CMN_EVENT_HNI(_name, _event)				\
 	CMN_EVENT_ATTR(CMN_ANY, hni_##_name, CMN_TYPE_HNI, _event)
 #define CMN_EVENT_HNP(_name, _event)				\
@@ -775,21 +775,34 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 	_CMN_EVENT_DVM(_model, _name##_all, _event, 0, SEL_OCCUP1ID),	\
 	_CMN_EVENT_DVM(_model, _name##_dvmop, _event, 1, SEL_OCCUP1ID),	\
 	_CMN_EVENT_DVM(_model, _name##_dvmsync, _event, 2, SEL_OCCUP1ID)
-#define CMN_EVENT_HNF(_model, _name, _event)			\
-	_CMN_EVENT_HNF(_model, _name, _event, 0, SEL_NONE)
+
+#define CMN_EVENT_HN_OCC(_model, _name, _type, _event)		\
+	_CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_OCCUP1ID), \
+	_CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 1, SEL_OCCUP1ID), \
+	_CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 2, SEL_OCCUP1ID), \
+	_CMN_EVENT_ATTR(_model, _name##_atomic, _type, _event, 3, SEL_OCCUP1ID), \
+	_CMN_EVENT_ATTR(_model, _name##_stash, _type, _event, 4, SEL_OCCUP1ID)
+#define CMN_EVENT_HN_CLS(_model, _name, _type, _event)			\
+	_CMN_EVENT_ATTR(_model, _name##_class0, _type, _event, 0, SEL_CLASS_OCCUP_ID), \
+	_CMN_EVENT_ATTR(_model, _name##_class1, _type, _event, 1, SEL_CLASS_OCCUP_ID), \
+	_CMN_EVENT_ATTR(_model, _name##_class2, _type, _event, 2, SEL_CLASS_OCCUP_ID), \
+	_CMN_EVENT_ATTR(_model, _name##_class3, _type, _event, 3, SEL_CLASS_OCCUP_ID)
+#define CMN_EVENT_HN_SNT(_model, _name, _type, _event)			\
+	_CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \
+	_CMN_EVENT_ATTR(_model, _name##_group0_read, _type, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \
+	_CMN_EVENT_ATTR(_model, _name##_group0_write, _type, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \
+	_CMN_EVENT_ATTR(_model, _name##_group1_read, _type, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \
+	_CMN_EVENT_ATTR(_model, _name##_group1_write, _type, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \
+	_CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \
+	_CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL)
+
+#define CMN_EVENT_HNF_OCC(_model, _name, _event)			\
+	CMN_EVENT_HN_OCC(_model, hnf_##_name, CMN_TYPE_HNF, _event)
 #define CMN_EVENT_HNF_CLS(_model, _name, _event)			\
-	_CMN_EVENT_HNF(_model, _name##_class0, _event, 0, SEL_CLASS_OCCUP_ID), \
-	_CMN_EVENT_HNF(_model, _name##_class1, _event, 1, SEL_CLASS_OCCUP_ID), \
-	_CMN_EVENT_HNF(_model, _name##_class2, _event, 2, SEL_CLASS_OCCUP_ID), \
-	_CMN_EVENT_HNF(_model, _name##_class3, _event, 3, SEL_CLASS_OCCUP_ID)
+	CMN_EVENT_HN_CLS(_model, hnf_##_name, CMN_TYPE_HNS, _event)
 #define CMN_EVENT_HNF_SNT(_model, _name, _event)			\
-	_CMN_EVENT_HNF(_model, _name##_all, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \
-	_CMN_EVENT_HNF(_model, _name##_group0_read, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \
-	_CMN_EVENT_HNF(_model, _name##_group0_write, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \
-	_CMN_EVENT_HNF(_model, _name##_group1_read, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \
-	_CMN_EVENT_HNF(_model, _name##_group1_write, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \
-	_CMN_EVENT_HNF(_model, _name##_read, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \
-	_CMN_EVENT_HNF(_model, _name##_write, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL)
+	CMN_EVENT_HN_SNT(_model, hnf_##_name, CMN_TYPE_HNF, _event)
+
 
 #define _CMN_EVENT_XP_MESH(_name, _event)			\
 	__CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)),		\
@@ -872,11 +885,7 @@ static struct attribute *arm_cmn_event_attrs[] = {
 	CMN_EVENT_HNF(CMN_ANY, mc_retries,		0x0c),
 	CMN_EVENT_HNF(CMN_ANY, mc_reqs,			0x0d),
 	CMN_EVENT_HNF(CMN_ANY, qos_hh_retry,		0x0e),
-	_CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_all,	0x0f, 0, SEL_OCCUP1ID),
-	_CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_read, 0x0f, 1, SEL_OCCUP1ID),
-	_CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_write, 0x0f, 2, SEL_OCCUP1ID),
-	_CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_atomic, 0x0f, 3, SEL_OCCUP1ID),
-	_CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_stash, 0x0f, 4, SEL_OCCUP1ID),
+	CMN_EVENT_HNF_OCC(CMN_ANY, qos_pocq_occupancy,	0x0f),
 	CMN_EVENT_HNF(CMN_ANY, pocq_addrhaz,		0x10),
 	CMN_EVENT_HNF(CMN_ANY, pocq_atomic_addrhaz,	0x11),
 	CMN_EVENT_HNF(CMN_ANY, ld_st_swp_adq_full,	0x12),

From ac18ea1a8935920a6b3284f24e58f903b95dac20 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 7 Jul 2023 17:38:13 +0100
Subject: [PATCH 0637/2047] perf/arm-cmn: Add CMN-700 r3 support

CMN-700 r3 has a special configuration option for a so-called "Super
Home Node", which is a superset of the standard HN-F that also manages
remote-chip coherency for multi-chip setups. As such it has a similar
but expanded set of PMU events compared to HN-F, with some additional
filtering options to boot.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/49153b72253f6af0e625cb55b9e1b825b110c49c.1688746690.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cmn.c | 94 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 1efe9b72c0e6..913dc04b3a40 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -72,6 +72,8 @@
 /* For most nodes, this is all there is */
 #define CMN_PMU_EVENT_SEL		0x000
 #define CMN__PMU_CBUSY_SNTHROTTLE_SEL	GENMASK_ULL(44, 42)
+#define CMN__PMU_SN_HOME_SEL		GENMASK_ULL(40, 39)
+#define CMN__PMU_HBT_LBT_SEL		GENMASK_ULL(38, 37)
 #define CMN__PMU_CLASS_OCCUP_ID		GENMASK_ULL(36, 35)
 /* Technically this is 4 bits wide on DNs, but we only use 2 there anyway */
 #define CMN__PMU_OCCUP1_ID		GENMASK_ULL(34, 32)
@@ -226,6 +228,7 @@ enum cmn_revision {
 	REV_CMN700_R0P0 = 0,
 	REV_CMN700_R1P0,
 	REV_CMN700_R2P0,
+	REV_CMN700_R3P0,
 	REV_CI700_R0P0 = 0,
 	REV_CI700_R1P0,
 	REV_CI700_R2P0,
@@ -254,6 +257,9 @@ enum cmn_node_type {
 	CMN_TYPE_CCHA,
 	CMN_TYPE_CCLA,
 	CMN_TYPE_CCLA_RNI,
+	CMN_TYPE_HNS = 0x200,
+	CMN_TYPE_HNS_MPAM_S,
+	CMN_TYPE_HNS_MPAM_NS,
 	/* Not a real node type */
 	CMN_TYPE_WP = 0x7770
 };
@@ -263,6 +269,8 @@ enum cmn_filter_select {
 	SEL_OCCUP1ID,
 	SEL_CLASS_OCCUP_ID,
 	SEL_CBUSY_SNTHROTTLE_SEL,
+	SEL_HBT_LBT_SEL,
+	SEL_SN_HOME_SEL,
 	SEL_MAX
 };
 
@@ -768,6 +776,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 	CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event)
 #define CMN_EVENT_CCLA_RNI(_name, _event)				\
 	CMN_EVENT_ATTR(CMN_ANY, ccla_rni_##_name, CMN_TYPE_CCLA_RNI, _event)
+#define CMN_EVENT_HNS(_name, _event)				\
+	CMN_EVENT_ATTR(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
 
 #define CMN_EVENT_DVM(_model, _name, _event)			\
 	_CMN_EVENT_DVM(_model, _name, _event, 0, SEL_NONE)
@@ -803,6 +813,23 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
 #define CMN_EVENT_HNF_SNT(_model, _name, _event)			\
 	CMN_EVENT_HN_SNT(_model, hnf_##_name, CMN_TYPE_HNF, _event)
 
+#define CMN_EVENT_HNS_OCC(_name, _event)				\
+	CMN_EVENT_HN_OCC(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event),	\
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_rxsnp, CMN_TYPE_HNS, _event, 5, SEL_OCCUP1ID), \
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 6, SEL_OCCUP1ID), \
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 7, SEL_OCCUP1ID)
+#define CMN_EVENT_HNS_CLS( _name, _event)				\
+	CMN_EVENT_HN_CLS(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
+#define CMN_EVENT_HNS_SNT(_name, _event)				\
+	CMN_EVENT_HN_SNT(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
+#define CMN_EVENT_HNS_HBT(_name, _event)				\
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_HBT_LBT_SEL), \
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 1, SEL_HBT_LBT_SEL), \
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 2, SEL_HBT_LBT_SEL)
+#define CMN_EVENT_HNS_SNH(_name, _event)				\
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_SN_HOME_SEL), \
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_sn, CMN_TYPE_HNS, _event, 1, SEL_SN_HOME_SEL), \
+	_CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_home, CMN_TYPE_HNS, _event, 2, SEL_SN_HOME_SEL)
 
 #define _CMN_EVENT_XP_MESH(_name, _event)			\
 	__CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)),		\
@@ -1151,6 +1178,66 @@ static struct attribute *arm_cmn_event_attrs[] = {
 	CMN_EVENT_CCLA(pfwd_sndr_stalls_static_crd,	0x2a),
 	CMN_EVENT_CCLA(pfwd_sndr_stalls_dynmaic_crd,	0x2b),
 
+	CMN_EVENT_HNS_HBT(cache_miss,			0x01),
+	CMN_EVENT_HNS_HBT(slc_sf_cache_access,		0x02),
+	CMN_EVENT_HNS_HBT(cache_fill,			0x03),
+	CMN_EVENT_HNS_HBT(pocq_retry,			0x04),
+	CMN_EVENT_HNS_HBT(pocq_reqs_recvd,		0x05),
+	CMN_EVENT_HNS_HBT(sf_hit,			0x06),
+	CMN_EVENT_HNS_HBT(sf_evictions,			0x07),
+	CMN_EVENT_HNS(dir_snoops_sent,			0x08),
+	CMN_EVENT_HNS(brd_snoops_sent,			0x09),
+	CMN_EVENT_HNS_HBT(slc_eviction,			0x0a),
+	CMN_EVENT_HNS_HBT(slc_fill_invalid_way,		0x0b),
+	CMN_EVENT_HNS(mc_retries_local,			0x0c),
+	CMN_EVENT_HNS_SNH(mc_reqs_local,		0x0d),
+	CMN_EVENT_HNS(qos_hh_retry,			0x0e),
+	CMN_EVENT_HNS_OCC(qos_pocq_occupancy,		0x0f),
+	CMN_EVENT_HNS(pocq_addrhaz,			0x10),
+	CMN_EVENT_HNS(pocq_atomic_addrhaz,		0x11),
+	CMN_EVENT_HNS(ld_st_swp_adq_full,		0x12),
+	CMN_EVENT_HNS(cmp_adq_full,			0x13),
+	CMN_EVENT_HNS(txdat_stall,			0x14),
+	CMN_EVENT_HNS(txrsp_stall,			0x15),
+	CMN_EVENT_HNS(seq_full,				0x16),
+	CMN_EVENT_HNS(seq_hit,				0x17),
+	CMN_EVENT_HNS(snp_sent,				0x18),
+	CMN_EVENT_HNS(sfbi_dir_snp_sent,		0x19),
+	CMN_EVENT_HNS(sfbi_brd_snp_sent,		0x1a),
+	CMN_EVENT_HNS(intv_dirty,			0x1c),
+	CMN_EVENT_HNS(stash_snp_sent,			0x1d),
+	CMN_EVENT_HNS(stash_data_pull,			0x1e),
+	CMN_EVENT_HNS(snp_fwded,			0x1f),
+	CMN_EVENT_HNS(atomic_fwd,			0x20),
+	CMN_EVENT_HNS(mpam_hardlim,			0x21),
+	CMN_EVENT_HNS(mpam_softlim,			0x22),
+	CMN_EVENT_HNS(snp_sent_cluster,			0x23),
+	CMN_EVENT_HNS(sf_imprecise_evict,		0x24),
+	CMN_EVENT_HNS(sf_evict_shared_line,		0x25),
+	CMN_EVENT_HNS_CLS(pocq_class_occup,		0x26),
+	CMN_EVENT_HNS_CLS(pocq_class_retry,		0x27),
+	CMN_EVENT_HNS_CLS(class_mc_reqs_local,		0x28),
+	CMN_EVENT_HNS_CLS(class_cgnt_cmin,		0x29),
+	CMN_EVENT_HNS_SNT(sn_throttle,			0x2a),
+	CMN_EVENT_HNS_SNT(sn_throttle_min,		0x2b),
+	CMN_EVENT_HNS(sf_precise_to_imprecise,		0x2c),
+	CMN_EVENT_HNS(snp_intv_cln,			0x2d),
+	CMN_EVENT_HNS(nc_excl,				0x2e),
+	CMN_EVENT_HNS(excl_mon_ovfl,			0x2f),
+	CMN_EVENT_HNS(snp_req_recvd,			0x30),
+	CMN_EVENT_HNS(snp_req_byp_pocq,			0x31),
+	CMN_EVENT_HNS(dir_ccgha_snp_sent,		0x32),
+	CMN_EVENT_HNS(brd_ccgha_snp_sent,		0x33),
+	CMN_EVENT_HNS(ccgha_snp_stall,			0x34),
+	CMN_EVENT_HNS(lbt_req_hardlim,			0x35),
+	CMN_EVENT_HNS(hbt_req_hardlim,			0x36),
+	CMN_EVENT_HNS(sf_reupdate,			0x37),
+	CMN_EVENT_HNS(excl_sf_imprecise,		0x38),
+	CMN_EVENT_HNS(snp_pocq_addrhaz,			0x39),
+	CMN_EVENT_HNS(mc_retries_remote,		0x3a),
+	CMN_EVENT_HNS_SNH(mc_reqs_remote,		0x3b),
+	CMN_EVENT_HNS_CLS(class_mc_reqs_remote,		0x3c),
+
 	NULL
 };
 
@@ -1392,6 +1479,10 @@ static int arm_cmn_set_event_sel_hi(struct arm_cmn_node *dn,
 		dn->occupid[fsel].val = occupid;
 		reg = FIELD_PREP(CMN__PMU_CBUSY_SNTHROTTLE_SEL,
 				 dn->occupid[SEL_CBUSY_SNTHROTTLE_SEL].val) |
+		      FIELD_PREP(CMN__PMU_SN_HOME_SEL,
+				 dn->occupid[SEL_SN_HOME_SEL].val) |
+		      FIELD_PREP(CMN__PMU_HBT_LBT_SEL,
+				 dn->occupid[SEL_HBT_LBT_SEL].val) |
 		      FIELD_PREP(CMN__PMU_CLASS_OCCUP_ID,
 				 dn->occupid[SEL_CLASS_OCCUP_ID].val) |
 		      FIELD_PREP(CMN__PMU_OCCUP1_ID,
@@ -2219,6 +2310,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 			case CMN_TYPE_CCRA:
 			case CMN_TYPE_CCHA:
 			case CMN_TYPE_CCLA:
+			case CMN_TYPE_HNS:
 				dn++;
 				break;
 			/* Nothing to see here */
@@ -2226,6 +2318,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 			case CMN_TYPE_MPAM_NS:
 			case CMN_TYPE_RNSAM:
 			case CMN_TYPE_CXLA:
+			case CMN_TYPE_HNS_MPAM_S:
+			case CMN_TYPE_HNS_MPAM_NS:
 				break;
 			/*
 			 * Split "optimised" combination nodes into separate

From 026badeb7055e41000e4b139833ca5bafe360eb6 Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@ew.tq-group.com>
Date: Fri, 28 Jul 2023 08:08:04 +0200
Subject: [PATCH 0638/2047] spi: fspi: Add power-domains to the DT bindings

i.MX8(X) based SoC use a power domain. Allow supplying this domain in
bindings.

Signed-off-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Acked-by: Han Xu <han.xu@nxp.com>
Link: https://lore.kernel.org/r/20230728060804.22796-1-alexander.stein@ew.tq-group.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml
index a813c971ecf6..7fd591145480 100644
--- a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml
@@ -45,6 +45,9 @@ properties:
       - const: fspi_en
       - const: fspi
 
+  power-domains:
+    maxItems: 1
+
 required:
   - compatible
   - reg

From 71ee2a4f95e205bd33e79c7bd4bece3885e0e366 Mon Sep 17 00:00:00 2001
From: Chen Jiahao <chenjiahao16@huawei.com>
Date: Fri, 28 Jul 2023 15:57:29 +0800
Subject: [PATCH 0639/2047] spi: microchip-core: Clean up redundant
 dev_err_probe()

Refering to platform_get_irq()'s definition, the return value has
already been checked if ret < 0, and printed via dev_err_probe().
Calling dev_err_probe() one more time outside platform_get_irq()
is obviously redundant.

Furthermore, platform_get_irq() will never return irq equals 0,
removing spi->irq == 0 checking to clean it up.

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20230728075729.3451867-1-chenjiahao16@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-microchip-core.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-microchip-core.c b/drivers/spi/spi-microchip-core.c
index b59e8a0c5b97..b451cd4860ec 100644
--- a/drivers/spi/spi-microchip-core.c
+++ b/drivers/spi/spi-microchip-core.c
@@ -530,10 +530,8 @@ static int mchp_corespi_probe(struct platform_device *pdev)
 		return PTR_ERR(spi->regs);
 
 	spi->irq = platform_get_irq(pdev, 0);
-	if (spi->irq <= 0)
-		return dev_err_probe(&pdev->dev, -ENXIO,
-				     "invalid IRQ %d for SPI controller\n",
-				     spi->irq);
+	if (spi->irq < 0)
+		return spi->irq;
 
 	ret = devm_request_irq(&pdev->dev, spi->irq, mchp_corespi_interrupt,
 			       IRQF_SHARED, dev_name(&pdev->dev), master);

From 5daeb41a6fc9d0d81cb2291884b7410e062d8fa1 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Fri, 28 Jul 2023 21:58:26 +1000
Subject: [PATCH 0640/2047] fchmodat2: add support for AT_EMPTY_PATH

This allows userspace to avoid going through /proc/self/fd when dealing
with all types of file descriptors for chmod(), and makes fchmodat2() a
proper superset of all other chmod syscalls.

The primary difference between fchmodat2(AT_EMPTY_PATH) and fchmod() is
that fchmod() doesn't operate on O_PATH file descriptors by design. To
quote open(2):

> O_PATH (since Linux 2.6.39)
> [...]
> The file itself is not opened, and other file operations (e.g.,
> read(2), write(2), fchmod(2), fchown(2), fgetxattr(2), ioctl(2),
> mmap(2)) fail with the error EBADF.

However, procfs has allowed userspace to do this operation ever since
the introduction of O_PATH through magic-links, so adding this feature
is only an improvement for programs that have to mess around with
/proc/self/fd/$n today to get this behaviour. In addition,
fchownat(AT_EMPTY_PATH) has existed since the introduction of O_PATH and
allows chown() operations directly on O_PATH descriptors.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Acked-by: Alexey Gladkov <legion@kernel.org>
Message-Id: <20230728-fchmodat2-at_empty_path-v1-1-f3add31d3516@cyphar.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/open.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/open.c b/fs/open.c
index e52d78e5a333..b8883ec286f5 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -678,10 +678,12 @@ static int do_fchmodat(int dfd, const char __user *filename, umode_t mode,
 	int error;
 	unsigned int lookup_flags;
 
-	if (unlikely(flags & ~AT_SYMLINK_NOFOLLOW))
+	if (unlikely(flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)))
 		return -EINVAL;
 
 	lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+	if (flags & AT_EMPTY_PATH)
+		lookup_flags |= LOOKUP_EMPTY;
 
 retry:
 	error = user_path_at(dfd, filename, lookup_flags, &path);

From c28c07fe235ccaafe11003393de064b2a24dd2e3 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 24 Jul 2023 17:20:26 +0200
Subject: [PATCH 0641/2047] s390/mm: move pfault code to own C file

The pfault code has nothing to do with regular fault handling.

Therefore move it to an own C file. Also add an own pfault header
file. This way changes to setup.h don't cause a recompile of the
pfault code and vice versa.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/pfault.h   |  24 ++++
 arch/s390/include/asm/setup.h    |   8 --
 arch/s390/kernel/machine_kexec.c |   1 +
 arch/s390/kernel/smp.c           |   1 +
 arch/s390/mm/Makefile            |   1 +
 arch/s390/mm/fault.c             | 228 -----------------------------
 arch/s390/mm/pfault.c            | 239 +++++++++++++++++++++++++++++++
 7 files changed, 266 insertions(+), 236 deletions(-)
 create mode 100644 arch/s390/include/asm/pfault.h
 create mode 100644 arch/s390/mm/pfault.c

diff --git a/arch/s390/include/asm/pfault.h b/arch/s390/include/asm/pfault.h
new file mode 100644
index 000000000000..beabeebf2859
--- /dev/null
+++ b/arch/s390/include/asm/pfault.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 1999, 2023
+ */
+#ifndef _ASM_S390_PFAULT_H
+#define _ASM_S390_PFAULT_H
+
+int __pfault_init(void);
+void __pfault_fini(void);
+
+static inline int pfault_init(void)
+{
+	if (IS_ENABLED(CONFIG_PFAULT))
+		return __pfault_init();
+	return -1;
+}
+
+static inline void pfault_fini(void)
+{
+	if (IS_ENABLED(CONFIG_PFAULT))
+		__pfault_fini();
+}
+
+#endif /* _ASM_S390_PFAULT_H */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index e795f425627a..b30fe91166e3 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -118,14 +118,6 @@ extern unsigned int console_irq;
 #define SET_CONSOLE_VT220	do { console_mode = 4; } while (0)
 #define SET_CONSOLE_HVC		do { console_mode = 5; } while (0)
 
-#ifdef CONFIG_PFAULT
-extern int pfault_init(void);
-extern void pfault_fini(void);
-#else /* CONFIG_PFAULT */
-#define pfault_init()		({-1;})
-#define pfault_fini()		do { } while (0)
-#endif /* CONFIG_PFAULT */
-
 #ifdef CONFIG_VMCP
 void vmcp_cma_reserve(void);
 #else
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 6d9276c096a6..12a2bd4fc88c 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -13,6 +13,7 @@
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
 #include <linux/debug_locks.h>
+#include <asm/pfault.h>
 #include <asm/cio.h>
 #include <asm/setup.h>
 #include <asm/smp.h>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index f9a2b755f510..9244130721d6 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -37,6 +37,7 @@
 #include <linux/crash_dump.h>
 #include <linux/kprobes.h>
 #include <asm/asm-offsets.h>
+#include <asm/pfault.h>
 #include <asm/diag.h>
 #include <asm/switch_to.h>
 #include <asm/facility.h>
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index d90db06a8af5..352ff520fd94 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_PTDUMP_CORE)	+= dump_pagetables.o
 obj-$(CONFIG_PGSTE)		+= gmap.o
+obj-$(CONFIG_PFAULT)		+= pfault.o
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2f123429a291..b5e1bea9194c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -43,8 +43,6 @@
 #include "../kernel/entry.h"
 
 #define __FAIL_ADDR_MASK -4096L
-#define __SUBCODE_MASK 0x0600
-#define __PF_RES_FIELD 0x8000000000000000ULL
 
 /*
  * Allocate private vm_fault_reason from top.  Please make sure it won't
@@ -583,232 +581,6 @@ void do_dat_exception(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(do_dat_exception);
 
-#ifdef CONFIG_PFAULT 
-/*
- * 'pfault' pseudo page faults routines.
- */
-static int pfault_disable;
-
-static int __init nopfault(char *str)
-{
-	pfault_disable = 1;
-	return 1;
-}
-
-__setup("nopfault", nopfault);
-
-struct pfault_refbk {
-	u16 refdiagc;
-	u16 reffcode;
-	u16 refdwlen;
-	u16 refversn;
-	u64 refgaddr;
-	u64 refselmk;
-	u64 refcmpmk;
-	u64 reserved;
-} __attribute__ ((packed, aligned(8)));
-
-static struct pfault_refbk pfault_init_refbk = {
-	.refdiagc = 0x258,
-	.reffcode = 0,
-	.refdwlen = 5,
-	.refversn = 2,
-	.refgaddr = __LC_LPP,
-	.refselmk = 1ULL << 48,
-	.refcmpmk = 1ULL << 48,
-	.reserved = __PF_RES_FIELD
-};
-
-int pfault_init(void)
-{
-        int rc;
-
-	if (pfault_disable)
-		return -1;
-	diag_stat_inc(DIAG_STAT_X258);
-	asm volatile(
-		"	diag	%1,%0,0x258\n"
-		"0:	j	2f\n"
-		"1:	la	%0,8\n"
-		"2:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc)
-		: "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc");
-        return rc;
-}
-
-static struct pfault_refbk pfault_fini_refbk = {
-	.refdiagc = 0x258,
-	.reffcode = 1,
-	.refdwlen = 5,
-	.refversn = 2,
-};
-
-void pfault_fini(void)
-{
-
-	if (pfault_disable)
-		return;
-	diag_stat_inc(DIAG_STAT_X258);
-	asm volatile(
-		"	diag	%0,0,0x258\n"
-		"0:	nopr	%%r7\n"
-		EX_TABLE(0b,0b)
-		: : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc");
-}
-
-static DEFINE_SPINLOCK(pfault_lock);
-static LIST_HEAD(pfault_list);
-
-#define PF_COMPLETE	0x0080
-
-/*
- * The mechanism of our pfault code: if Linux is running as guest, runs a user
- * space process and the user space process accesses a page that the host has
- * paged out we get a pfault interrupt.
- *
- * This allows us, within the guest, to schedule a different process. Without
- * this mechanism the host would have to suspend the whole virtual cpu until
- * the page has been paged in.
- *
- * So when we get such an interrupt then we set the state of the current task
- * to uninterruptible and also set the need_resched flag. Both happens within
- * interrupt context(!). If we later on want to return to user space we
- * recognize the need_resched flag and then call schedule().  It's not very
- * obvious how this works...
- *
- * Of course we have a lot of additional fun with the completion interrupt (->
- * host signals that a page of a process has been paged in and the process can
- * continue to run). This interrupt can arrive on any cpu and, since we have
- * virtual cpus, actually appear before the interrupt that signals that a page
- * is missing.
- */
-static void pfault_interrupt(struct ext_code ext_code,
-			     unsigned int param32, unsigned long param64)
-{
-	struct task_struct *tsk;
-	__u16 subcode;
-	pid_t pid;
-
-	/*
-	 * Get the external interruption subcode & pfault initial/completion
-	 * signal bit. VM stores this in the 'cpu address' field associated
-	 * with the external interrupt.
-	 */
-	subcode = ext_code.subcode;
-	if ((subcode & 0xff00) != __SUBCODE_MASK)
-		return;
-	inc_irq_stat(IRQEXT_PFL);
-	/* Get the token (= pid of the affected task). */
-	pid = param64 & LPP_PID_MASK;
-	rcu_read_lock();
-	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
-	if (tsk)
-		get_task_struct(tsk);
-	rcu_read_unlock();
-	if (!tsk)
-		return;
-	spin_lock(&pfault_lock);
-	if (subcode & PF_COMPLETE) {
-		/* signal bit is set -> a page has been swapped in by VM */
-		if (tsk->thread.pfault_wait == 1) {
-			/* Initial interrupt was faster than the completion
-			 * interrupt. pfault_wait is valid. Set pfault_wait
-			 * back to zero and wake up the process. This can
-			 * safely be done because the task is still sleeping
-			 * and can't produce new pfaults. */
-			tsk->thread.pfault_wait = 0;
-			list_del(&tsk->thread.list);
-			wake_up_process(tsk);
-			put_task_struct(tsk);
-		} else {
-			/* Completion interrupt was faster than initial
-			 * interrupt. Set pfault_wait to -1 so the initial
-			 * interrupt doesn't put the task to sleep.
-			 * If the task is not running, ignore the completion
-			 * interrupt since it must be a leftover of a PFAULT
-			 * CANCEL operation which didn't remove all pending
-			 * completion interrupts. */
-			if (task_is_running(tsk))
-				tsk->thread.pfault_wait = -1;
-		}
-	} else {
-		/* signal bit not set -> a real page is missing. */
-		if (WARN_ON_ONCE(tsk != current))
-			goto out;
-		if (tsk->thread.pfault_wait == 1) {
-			/* Already on the list with a reference: put to sleep */
-			goto block;
-		} else if (tsk->thread.pfault_wait == -1) {
-			/* Completion interrupt was faster than the initial
-			 * interrupt (pfault_wait == -1). Set pfault_wait
-			 * back to zero and exit. */
-			tsk->thread.pfault_wait = 0;
-		} else {
-			/* Initial interrupt arrived before completion
-			 * interrupt. Let the task sleep.
-			 * An extra task reference is needed since a different
-			 * cpu may set the task state to TASK_RUNNING again
-			 * before the scheduler is reached. */
-			get_task_struct(tsk);
-			tsk->thread.pfault_wait = 1;
-			list_add(&tsk->thread.list, &pfault_list);
-block:
-			/* Since this must be a userspace fault, there
-			 * is no kernel task state to trample. Rely on the
-			 * return to userspace schedule() to block. */
-			__set_current_state(TASK_UNINTERRUPTIBLE);
-			set_tsk_need_resched(tsk);
-			set_preempt_need_resched();
-		}
-	}
-out:
-	spin_unlock(&pfault_lock);
-	put_task_struct(tsk);
-}
-
-static int pfault_cpu_dead(unsigned int cpu)
-{
-	struct thread_struct *thread, *next;
-	struct task_struct *tsk;
-
-	spin_lock_irq(&pfault_lock);
-	list_for_each_entry_safe(thread, next, &pfault_list, list) {
-		thread->pfault_wait = 0;
-		list_del(&thread->list);
-		tsk = container_of(thread, struct task_struct, thread);
-		wake_up_process(tsk);
-		put_task_struct(tsk);
-	}
-	spin_unlock_irq(&pfault_lock);
-	return 0;
-}
-
-static int __init pfault_irq_init(void)
-{
-	int rc;
-
-	rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
-	if (rc)
-		goto out_extint;
-	rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
-	if (rc)
-		goto out_pfault;
-	irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
-	cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
-				  NULL, pfault_cpu_dead);
-	return 0;
-
-out_pfault:
-	unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
-out_extint:
-	pfault_disable = 1;
-	return rc;
-}
-early_initcall(pfault_irq_init);
-
-#endif /* CONFIG_PFAULT */
-
 #if IS_ENABLED(CONFIG_PGSTE)
 
 void do_secure_storage_access(struct pt_regs *regs)
diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
new file mode 100644
index 000000000000..5c0547f8d5ee
--- /dev/null
+++ b/arch/s390/mm/pfault.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 1999, 2023
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/sched/task.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <asm/asm-extable.h>
+#include <asm/pfault.h>
+#include <asm/diag.h>
+
+#define __SUBCODE_MASK 0x0600
+#define __PF_RES_FIELD 0x8000000000000000ULL
+
+/*
+ * 'pfault' pseudo page faults routines.
+ */
+static int pfault_disable;
+
+static int __init nopfault(char *str)
+{
+	pfault_disable = 1;
+	return 1;
+}
+
+__setup("nopfault", nopfault);
+
+struct pfault_refbk {
+	u16 refdiagc;
+	u16 reffcode;
+	u16 refdwlen;
+	u16 refversn;
+	u64 refgaddr;
+	u64 refselmk;
+	u64 refcmpmk;
+	u64 reserved;
+} __attribute__ ((packed, aligned(8)));
+
+static struct pfault_refbk pfault_init_refbk = {
+	.refdiagc = 0x258,
+	.reffcode = 0,
+	.refdwlen = 5,
+	.refversn = 2,
+	.refgaddr = __LC_LPP,
+	.refselmk = 1ULL << 48,
+	.refcmpmk = 1ULL << 48,
+	.reserved = __PF_RES_FIELD
+};
+
+int __pfault_init(void)
+{
+        int rc;
+
+	if (pfault_disable)
+		return -1;
+	diag_stat_inc(DIAG_STAT_X258);
+	asm volatile(
+		"	diag	%1,%0,0x258\n"
+		"0:	j	2f\n"
+		"1:	la	%0,8\n"
+		"2:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc)
+		: "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc");
+        return rc;
+}
+
+static struct pfault_refbk pfault_fini_refbk = {
+	.refdiagc = 0x258,
+	.reffcode = 1,
+	.refdwlen = 5,
+	.refversn = 2,
+};
+
+void __pfault_fini(void)
+{
+
+	if (pfault_disable)
+		return;
+	diag_stat_inc(DIAG_STAT_X258);
+	asm volatile(
+		"	diag	%0,0,0x258\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b,0b)
+		: : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc");
+}
+
+static DEFINE_SPINLOCK(pfault_lock);
+static LIST_HEAD(pfault_list);
+
+#define PF_COMPLETE	0x0080
+
+/*
+ * The mechanism of our pfault code: if Linux is running as guest, runs a user
+ * space process and the user space process accesses a page that the host has
+ * paged out we get a pfault interrupt.
+ *
+ * This allows us, within the guest, to schedule a different process. Without
+ * this mechanism the host would have to suspend the whole virtual cpu until
+ * the page has been paged in.
+ *
+ * So when we get such an interrupt then we set the state of the current task
+ * to uninterruptible and also set the need_resched flag. Both happens within
+ * interrupt context(!). If we later on want to return to user space we
+ * recognize the need_resched flag and then call schedule().  It's not very
+ * obvious how this works...
+ *
+ * Of course we have a lot of additional fun with the completion interrupt (->
+ * host signals that a page of a process has been paged in and the process can
+ * continue to run). This interrupt can arrive on any cpu and, since we have
+ * virtual cpus, actually appear before the interrupt that signals that a page
+ * is missing.
+ */
+static void pfault_interrupt(struct ext_code ext_code,
+			     unsigned int param32, unsigned long param64)
+{
+	struct task_struct *tsk;
+	__u16 subcode;
+	pid_t pid;
+
+	/*
+	 * Get the external interruption subcode & pfault initial/completion
+	 * signal bit. VM stores this in the 'cpu address' field associated
+	 * with the external interrupt.
+	 */
+	subcode = ext_code.subcode;
+	if ((subcode & 0xff00) != __SUBCODE_MASK)
+		return;
+	inc_irq_stat(IRQEXT_PFL);
+	/* Get the token (= pid of the affected task). */
+	pid = param64 & LPP_PID_MASK;
+	rcu_read_lock();
+	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+	if (tsk)
+		get_task_struct(tsk);
+	rcu_read_unlock();
+	if (!tsk)
+		return;
+	spin_lock(&pfault_lock);
+	if (subcode & PF_COMPLETE) {
+		/* signal bit is set -> a page has been swapped in by VM */
+		if (tsk->thread.pfault_wait == 1) {
+			/* Initial interrupt was faster than the completion
+			 * interrupt. pfault_wait is valid. Set pfault_wait
+			 * back to zero and wake up the process. This can
+			 * safely be done because the task is still sleeping
+			 * and can't produce new pfaults. */
+			tsk->thread.pfault_wait = 0;
+			list_del(&tsk->thread.list);
+			wake_up_process(tsk);
+			put_task_struct(tsk);
+		} else {
+			/* Completion interrupt was faster than initial
+			 * interrupt. Set pfault_wait to -1 so the initial
+			 * interrupt doesn't put the task to sleep.
+			 * If the task is not running, ignore the completion
+			 * interrupt since it must be a leftover of a PFAULT
+			 * CANCEL operation which didn't remove all pending
+			 * completion interrupts. */
+			if (task_is_running(tsk))
+				tsk->thread.pfault_wait = -1;
+		}
+	} else {
+		/* signal bit not set -> a real page is missing. */
+		if (WARN_ON_ONCE(tsk != current))
+			goto out;
+		if (tsk->thread.pfault_wait == 1) {
+			/* Already on the list with a reference: put to sleep */
+			goto block;
+		} else if (tsk->thread.pfault_wait == -1) {
+			/* Completion interrupt was faster than the initial
+			 * interrupt (pfault_wait == -1). Set pfault_wait
+			 * back to zero and exit. */
+			tsk->thread.pfault_wait = 0;
+		} else {
+			/* Initial interrupt arrived before completion
+			 * interrupt. Let the task sleep.
+			 * An extra task reference is needed since a different
+			 * cpu may set the task state to TASK_RUNNING again
+			 * before the scheduler is reached. */
+			get_task_struct(tsk);
+			tsk->thread.pfault_wait = 1;
+			list_add(&tsk->thread.list, &pfault_list);
+block:
+			/* Since this must be a userspace fault, there
+			 * is no kernel task state to trample. Rely on the
+			 * return to userspace schedule() to block. */
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+			set_tsk_need_resched(tsk);
+			set_preempt_need_resched();
+		}
+	}
+out:
+	spin_unlock(&pfault_lock);
+	put_task_struct(tsk);
+}
+
+static int pfault_cpu_dead(unsigned int cpu)
+{
+	struct thread_struct *thread, *next;
+	struct task_struct *tsk;
+
+	spin_lock_irq(&pfault_lock);
+	list_for_each_entry_safe(thread, next, &pfault_list, list) {
+		thread->pfault_wait = 0;
+		list_del(&thread->list);
+		tsk = container_of(thread, struct task_struct, thread);
+		wake_up_process(tsk);
+		put_task_struct(tsk);
+	}
+	spin_unlock_irq(&pfault_lock);
+	return 0;
+}
+
+static int __init pfault_irq_init(void)
+{
+	int rc;
+
+	rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+	if (rc)
+		goto out_extint;
+	rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
+	if (rc)
+		goto out_pfault;
+	irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
+	cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
+				  NULL, pfault_cpu_dead);
+	return 0;
+
+out_pfault:
+	unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+out_extint:
+	pfault_disable = 1;
+	return rc;
+}
+early_initcall(pfault_irq_init);

From b60624bb0a94126a26b9ac9653b4668a1a70ba2a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 24 Jul 2023 17:20:27 +0200
Subject: [PATCH 0642/2047] s390/pfault: use UL instead of ULL

Remove another leftover of the 31 bit area: replace the not needed
"unsigned long long" suffix with "unsigned long", and stay consistent
with the rest of the code.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/mm/pfault.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 5c0547f8d5ee..05865e5616b2 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -13,7 +13,7 @@
 #include <asm/diag.h>
 
 #define __SUBCODE_MASK 0x0600
-#define __PF_RES_FIELD 0x8000000000000000ULL
+#define __PF_RES_FIELD 0x8000000000000000UL
 
 /*
  * 'pfault' pseudo page faults routines.
@@ -45,8 +45,8 @@ static struct pfault_refbk pfault_init_refbk = {
 	.refdwlen = 5,
 	.refversn = 2,
 	.refgaddr = __LC_LPP,
-	.refselmk = 1ULL << 48,
-	.refcmpmk = 1ULL << 48,
+	.refselmk = 1UL << 48,
+	.refcmpmk = 1UL << 48,
 	.reserved = __PF_RES_FIELD
 };
 

From c5b6eef58f88810b8e641d8d32d24041fa20e016 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 24 Jul 2023 17:20:28 +0200
Subject: [PATCH 0643/2047] s390/pfault: remove not needed packed and aligned
 attributes

struct pfault_refbk is naturally packed and aligned; remove not needed
packed and aligned attributes.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/mm/pfault.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 05865e5616b2..e1c8cc14575d 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -37,7 +37,7 @@ struct pfault_refbk {
 	u64 refselmk;
 	u64 refcmpmk;
 	u64 reserved;
-} __attribute__ ((packed, aligned(8)));
+};
 
 static struct pfault_refbk pfault_init_refbk = {
 	.refdiagc = 0x258,

From 28254f36e2944a501e8bf440193e5c4f910cf10d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 24 Jul 2023 17:20:29 +0200
Subject: [PATCH 0644/2047] s390/pfault: use early_param() instead if __setup()

early_param() is the standard way of defining early kernel command
line parameters. Use that instead of the old __setup() variant.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/mm/pfault.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index e1c8cc14575d..64cc42d37c8b 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -25,8 +25,7 @@ static int __init nopfault(char *str)
 	pfault_disable = 1;
 	return 1;
 }
-
-__setup("nopfault", nopfault);
+early_param("nopfault", nopfault);
 
 struct pfault_refbk {
 	u16 refdiagc;

From 4c89eb874420a94680d1b842bb9c3785997713a4 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 24 Jul 2023 17:20:30 +0200
Subject: [PATCH 0645/2047] s390/pfault: cleanup inline assemblies

Cleanup the pfault inline assemblies:
- Use symbolic names for operands
- Add extra linebreaks, and whitespace to improve readability

In addition, change __pfault_init() to return -EOPNOTSUPP in case of
an exception, and don't return a made up valid diag 258 return value
(aka "8").
This allows to simplify the inline assembly, and makes debugging
easier, in case something is broken.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/pfault.h |  4 +++-
 arch/s390/mm/pfault.c          | 28 ++++++++++++++--------------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/arch/s390/include/asm/pfault.h b/arch/s390/include/asm/pfault.h
index beabeebf2859..a1bee4a1e470 100644
--- a/arch/s390/include/asm/pfault.h
+++ b/arch/s390/include/asm/pfault.h
@@ -5,6 +5,8 @@
 #ifndef _ASM_S390_PFAULT_H
 #define _ASM_S390_PFAULT_H
 
+#include <linux/errno.h>
+
 int __pfault_init(void);
 void __pfault_fini(void);
 
@@ -12,7 +14,7 @@ static inline int pfault_init(void)
 {
 	if (IS_ENABLED(CONFIG_PFAULT))
 		return __pfault_init();
-	return -1;
+	return -EOPNOTSUPP;
 }
 
 static inline void pfault_fini(void)
diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 64cc42d37c8b..1d65512c3351 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -51,20 +51,19 @@ static struct pfault_refbk pfault_init_refbk = {
 
 int __pfault_init(void)
 {
-        int rc;
+	int rc = -EOPNOTSUPP;
 
 	if (pfault_disable)
-		return -1;
+		return rc;
 	diag_stat_inc(DIAG_STAT_X258);
 	asm volatile(
-		"	diag	%1,%0,0x258\n"
-		"0:	j	2f\n"
-		"1:	la	%0,8\n"
-		"2:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc)
-		: "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc");
-        return rc;
+		"	diag	%[refbk],%[rc],0x258\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b, 0b)
+		: [rc] "+d" (rc)
+		: [refbk] "a" (&pfault_init_refbk), "m" (pfault_init_refbk)
+		: "cc");
+	return rc;
 }
 
 static struct pfault_refbk pfault_fini_refbk = {
@@ -76,15 +75,16 @@ static struct pfault_refbk pfault_fini_refbk = {
 
 void __pfault_fini(void)
 {
-
 	if (pfault_disable)
 		return;
 	diag_stat_inc(DIAG_STAT_X258);
 	asm volatile(
-		"	diag	%0,0,0x258\n"
+		"	diag	%[refbk],0,0x258\n"
 		"0:	nopr	%%r7\n"
-		EX_TABLE(0b,0b)
-		: : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc");
+		EX_TABLE(0b, 0b)
+		:
+		: [refbk] "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk)
+		: "cc");
 }
 
 static DEFINE_SPINLOCK(pfault_lock);

From 46a923fd86eb51acfc9e833ce0a27cc09f3e1c45 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 24 Jul 2023 17:20:31 +0200
Subject: [PATCH 0646/2047] s390/pfault: use consistent comment style

Use consistent comment style within the whole pfault C code.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/mm/pfault.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 1d65512c3351..1aac13bb8f53 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -142,23 +142,27 @@ static void pfault_interrupt(struct ext_code ext_code,
 	if (subcode & PF_COMPLETE) {
 		/* signal bit is set -> a page has been swapped in by VM */
 		if (tsk->thread.pfault_wait == 1) {
-			/* Initial interrupt was faster than the completion
+			/*
+			 * Initial interrupt was faster than the completion
 			 * interrupt. pfault_wait is valid. Set pfault_wait
 			 * back to zero and wake up the process. This can
 			 * safely be done because the task is still sleeping
-			 * and can't produce new pfaults. */
+			 * and can't produce new pfaults.
+			 */
 			tsk->thread.pfault_wait = 0;
 			list_del(&tsk->thread.list);
 			wake_up_process(tsk);
 			put_task_struct(tsk);
 		} else {
-			/* Completion interrupt was faster than initial
+			/*
+			 * Completion interrupt was faster than initial
 			 * interrupt. Set pfault_wait to -1 so the initial
 			 * interrupt doesn't put the task to sleep.
 			 * If the task is not running, ignore the completion
 			 * interrupt since it must be a leftover of a PFAULT
 			 * CANCEL operation which didn't remove all pending
-			 * completion interrupts. */
+			 * completion interrupts.
+			 */
 			if (task_is_running(tsk))
 				tsk->thread.pfault_wait = -1;
 		}
@@ -170,23 +174,29 @@ static void pfault_interrupt(struct ext_code ext_code,
 			/* Already on the list with a reference: put to sleep */
 			goto block;
 		} else if (tsk->thread.pfault_wait == -1) {
-			/* Completion interrupt was faster than the initial
+			/*
+			 * Completion interrupt was faster than the initial
 			 * interrupt (pfault_wait == -1). Set pfault_wait
-			 * back to zero and exit. */
+			 * back to zero and exit.
+			 */
 			tsk->thread.pfault_wait = 0;
 		} else {
-			/* Initial interrupt arrived before completion
+			/*
+			 * Initial interrupt arrived before completion
 			 * interrupt. Let the task sleep.
 			 * An extra task reference is needed since a different
 			 * cpu may set the task state to TASK_RUNNING again
-			 * before the scheduler is reached. */
+			 * before the scheduler is reached.
+			 */
 			get_task_struct(tsk);
 			tsk->thread.pfault_wait = 1;
 			list_add(&tsk->thread.list, &pfault_list);
 block:
-			/* Since this must be a userspace fault, there
+			/*
+			 * Since this must be a userspace fault, there
 			 * is no kernel task state to trample. Rely on the
-			 * return to userspace schedule() to block. */
+			 * return to userspace schedule() to block.
+			 */
 			__set_current_state(TASK_UNINTERRUPTIBLE);
 			set_tsk_need_resched(tsk);
 			set_preempt_need_resched();

From 3e8fc2d492207353c5ee469241c8df36c9765471 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 16:42:28 +0800
Subject: [PATCH 0647/2047] s390/cert_store: fix error return code in
 fill_cs_keyring()

The 'rc' will be re-assigned to 0 after calling get_vcssb(), it
needs be set to error code if create_cs_keyring() fails.

[hca@linux.ibm.com: slightly changed coding style]
Fixes: 8cf57d7217c3 ("s390: add support for user-defined certificates")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728084228.3186083-1-yangyingliang@huawei.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/cert_store.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
index 1cbeb9ce0eb1..3986a044eb36 100644
--- a/arch/s390/kernel/cert_store.c
+++ b/arch/s390/kernel/cert_store.c
@@ -702,6 +702,7 @@ static int fill_cs_keyring(void)
 	if (rc)
 		goto cleanup_keys;
 
+	rc = -ENOMEM;
 	cs_keyring = create_cs_keyring();
 	if (!cs_keyring)
 		goto cleanup_keys;

From 291bc793a0083ce69acc7ceb77c9ec7af4401093 Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Thu, 27 Jul 2023 20:40:43 +0800
Subject: [PATCH 0648/2047] gpio: omap: Remove redundant dev_err_probe() and
 zero value handle code

There is no need to call the dev_err_probe() function directly to print
a custom message when handling an error from platform_get_irq() function as
it is going to display an appropriate error message in case of a failure.

And the code to handle bank->irq = 0 is redundant because
platform_get_irq() do not return 0.

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-omap.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 2b78fde74e30..a927680c66f8 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -1412,11 +1412,8 @@ static int omap_gpio_probe(struct platform_device *pdev)
 	bank->dev = dev;
 
 	bank->irq = platform_get_irq(pdev, 0);
-	if (bank->irq <= 0) {
-		if (!bank->irq)
-			bank->irq = -ENXIO;
-		return dev_err_probe(dev, bank->irq, "can't get irq resource\n");
-	}
+	if (bank->irq < 0)
+		return bank->irq;
 
 	bank->chip.parent = dev;
 	bank->chip.owner = THIS_MODULE;

From 451c923d4c638a2f8215fea35e10f2dcd921a577 Mon Sep 17 00:00:00 2001
From: Okan Sahin <okan.sahin@analog.com>
Date: Thu, 27 Jul 2023 12:54:29 +0300
Subject: [PATCH 0649/2047] dt-bindings: gpio: ds4520: Add ADI DS4520

Add ADI DS4520 devicetree document.

Signed-off-by: Okan Sahin <okan.sahin@analog.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 .../bindings/gpio/adi,ds4520-gpio.yaml        | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/gpio/adi,ds4520-gpio.yaml

diff --git a/Documentation/devicetree/bindings/gpio/adi,ds4520-gpio.yaml b/Documentation/devicetree/bindings/gpio/adi,ds4520-gpio.yaml
new file mode 100644
index 000000000000..25b3198c4d3e
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/adi,ds4520-gpio.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/adi,ds4520-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: DS4520 I2C GPIO expander
+
+maintainers:
+  - Okan Sahin <okan.sahin@analog.com>
+
+properties:
+  compatible:
+    enum:
+      - adi,ds4520-gpio
+
+  reg:
+    maxItems: 1
+
+  gpio-controller: true
+
+  "#gpio-cells":
+    const: 2
+
+  ngpios:
+    minimum: 1
+    maximum: 9
+
+required:
+  - compatible
+  - reg
+  - gpio-controller
+  - "#gpio-cells"
+  - ngpios
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        gpio@50 {
+            compatible = "adi,ds4520-gpio";
+            reg = <0x50>;
+            ngpios = <9>;
+            gpio-controller;
+            #gpio-cells = <2>;
+        };
+    };

From 659ad5f7efece8f92213ca069c494a37507c8c67 Mon Sep 17 00:00:00 2001
From: Okan Sahin <okan.sahin@analog.com>
Date: Sat, 29 Jul 2023 15:56:33 +0200
Subject: [PATCH 0650/2047] gpio: ds4520: Add ADI DS4520 GPIO Expander Support

The DS4520 is a 9-bit nonvolatile (NV) I/O expander.
It offers users a digitally programmable alternative
to hardware jumpers and mechanical switches that are
being used to control digital logic node.

Signed-off-by: Okan Sahin <okan.sahin@analog.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig       | 11 ++++++
 drivers/gpio/Makefile      |  1 +
 drivers/gpio/gpio-ds4520.c | 80 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+)
 create mode 100644 drivers/gpio/gpio-ds4520.c

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 61a8d07e5a1c..c5324e44b74e 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -1029,6 +1029,17 @@ config GPIO_FXL6408
 	  To compile this driver as a module, choose M here: the module will
 	  be called gpio-fxl6408.
 
+config GPIO_DS4520
+	tristate "DS4520 I2C GPIO expander"
+	select REGMAP_I2C
+	select GPIO_REGMAP
+	help
+	  GPIO driver for ADI DS4520 I2C-based GPIO expander.
+	  Say yes here to enable the GPIO driver for the ADI DS4520 chip.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called gpio-ds4520.
+
 config GPIO_GW_PLD
 	tristate "Gateworks PLD GPIO Expander"
 	depends on OF_GPIO
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index c3ac51d47aa9..eb73b5d633eb 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_GPIO_DA9052)		+= gpio-da9052.o
 obj-$(CONFIG_GPIO_DA9055)		+= gpio-da9055.o
 obj-$(CONFIG_GPIO_DAVINCI)		+= gpio-davinci.o
 obj-$(CONFIG_GPIO_DLN2)			+= gpio-dln2.o
+obj-$(CONFIG_GPIO_DS4520)		+= gpio-ds4520.o
 obj-$(CONFIG_GPIO_DWAPB)		+= gpio-dwapb.o
 obj-$(CONFIG_GPIO_EIC_SPRD)		+= gpio-eic-sprd.o
 obj-$(CONFIG_GPIO_ELKHARTLAKE)		+= gpio-elkhartlake.o
diff --git a/drivers/gpio/gpio-ds4520.c b/drivers/gpio/gpio-ds4520.c
new file mode 100644
index 000000000000..1903deaef3e9
--- /dev/null
+++ b/drivers/gpio/gpio-ds4520.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2023 Analog Devices, Inc.
+ * Driver for the DS4520 I/O Expander
+ */
+
+#include <linux/device.h>
+#include <linux/gpio/driver.h>
+#include <linux/gpio/regmap.h>
+#include <linux/i2c.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+
+#define DS4520_PULLUP0		0xF0
+#define DS4520_IO_CONTROL0	0xF2
+#define DS4520_IO_STATUS0	0xF8
+
+static const struct regmap_config ds4520_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
+static int ds4520_gpio_probe(struct i2c_client *client)
+{
+	struct gpio_regmap_config config = { };
+	struct device *dev = &client->dev;
+	struct regmap *regmap;
+	u32 ngpio;
+	u32 base;
+	int ret;
+
+	ret = device_property_read_u32(dev, "reg", &base);
+	if (ret)
+		return dev_err_probe(dev, ret, "Missing 'reg' property.\n");
+
+	ret = device_property_read_u32(dev, "ngpios", &ngpio);
+	if (ret)
+		return dev_err_probe(dev, ret, "Missing 'ngpios' property.\n");
+
+	regmap = devm_regmap_init_i2c(client, &ds4520_regmap_config);
+	if (IS_ERR(regmap))
+		return dev_err_probe(dev, PTR_ERR(regmap),
+				     "Failed to allocate register map\n");
+
+	config.regmap = regmap;
+	config.parent = dev;
+	config.ngpio = ngpio;
+
+	config.reg_dat_base = base + DS4520_IO_STATUS0;
+	config.reg_set_base = base + DS4520_PULLUP0;
+	config.reg_dir_out_base = base + DS4520_IO_CONTROL0;
+
+	return PTR_ERR_OR_ZERO(devm_gpio_regmap_register(dev, &config));
+}
+
+static const struct of_device_id ds4520_gpio_of_match_table[] = {
+	{ .compatible = "adi,ds4520-gpio" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ds4520_gpio_of_match_table);
+
+static const struct i2c_device_id ds4520_gpio_id_table[] = {
+	{ "ds4520-gpio" },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ds4520_gpio_id_table);
+
+static struct i2c_driver ds4520_gpio_driver = {
+	.driver = {
+		.name = "ds4520-gpio",
+		.of_match_table = ds4520_gpio_of_match_table,
+	},
+	.probe = ds4520_gpio_probe,
+	.id_table = ds4520_gpio_id_table,
+};
+module_i2c_driver(ds4520_gpio_driver);
+
+MODULE_DESCRIPTION("DS4520 I/O Expander");
+MODULE_AUTHOR("Okan Sahin <okan.sahin@analog.com>");
+MODULE_LICENSE("GPL");

From 73561d281631630748fd94bca120a79076b52266 Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Thu, 27 Jul 2023 19:57:26 +0800
Subject: [PATCH 0651/2047] gpio: davinci: Remove redundant dev_err_probe()

There is no need to call the dev_err_probe() function directly to print
a custom message when handling an error from platform_get_irq() function as
it is going to display an appropriate error message in case of a failure.

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-davinci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index fff510d86e31..8db5717bdabe 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -236,7 +236,7 @@ static int davinci_gpio_probe(struct platform_device *pdev)
 	for (i = 0; i < nirq; i++) {
 		chips->irqs[i] = platform_get_irq(pdev, i);
 		if (chips->irqs[i] < 0)
-			return dev_err_probe(dev, chips->irqs[i], "IRQ not populated\n");
+			return chips->irqs[i];
 	}
 
 	chips->chip.label = dev_name(dev);

From e5780d80ce1d86298e5ad5a91264276bff8c9179 Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@ew.tq-group.com>
Date: Mon, 24 Jul 2023 08:35:19 +0200
Subject: [PATCH 0652/2047] dt-bindings: gpio: Add gpio-line-names to STMPE
 GPIO

This is a gpio-controller, so gpio-line-names should be allowed as well.
stmpe2403 supports up to 24 GPIOs.

Signed-off-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 Documentation/devicetree/bindings/gpio/st,stmpe-gpio.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/gpio/st,stmpe-gpio.yaml b/Documentation/devicetree/bindings/gpio/st,stmpe-gpio.yaml
index 22c0cae73425..4555f1644a4d 100644
--- a/Documentation/devicetree/bindings/gpio/st,stmpe-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/st,stmpe-gpio.yaml
@@ -28,6 +28,10 @@ properties:
 
   gpio-controller: true
 
+  gpio-line-names:
+    minItems: 1
+    maxItems: 24
+
   interrupt-controller: true
 
   st,norequest-mask:

From 28e6c5b86ac3756235b9a0ae6b1409f6ac33cd09 Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <alex@shruggie.ro>
Date: Wed, 12 Jul 2023 12:48:57 +0300
Subject: [PATCH 0653/2047] gpio: 74xx-mmio: remove unneeded
 platform_set_drvdata() call

The platform_set_drvdata() was needed when the driver had an explicit
remove function.
That function got removed a while back, so we don't need to keep a pointer
(on 'dev->driver_data') for the private data of the driver anymore.

Signed-off-by: Alexandru Ardelean <alex@shruggie.ro>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-74xx-mmio.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-74xx-mmio.c b/drivers/gpio/gpio-74xx-mmio.c
index 0464f1ecd20d..c7ac5a9ffb1f 100644
--- a/drivers/gpio/gpio-74xx-mmio.c
+++ b/drivers/gpio/gpio-74xx-mmio.c
@@ -135,8 +135,6 @@ static int mmio_74xx_gpio_probe(struct platform_device *pdev)
 	priv->gc.ngpio = MMIO_74XX_BIT_CNT(priv->flags);
 	priv->gc.owner = THIS_MODULE;
 
-	platform_set_drvdata(pdev, priv);
-
 	return devm_gpiochip_add_data(&pdev->dev, &priv->gc, priv);
 }
 

From ba8a90e8cb8ca4db4e57be6b53aa8c364137b38c Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Wed, 19 Jul 2023 12:39:23 +0300
Subject: [PATCH 0654/2047] gpio: exar: remove unneeded platform_set_drvdata()
 call

The platform_set_drvdata() was needed when the driver had an explicit
remove function.
That function got removed a while back, so we don't need to keep a pointer
(on 'dev->driver_data') for the private data of the driver anymore.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-exar.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-exar.c b/drivers/gpio/gpio-exar.c
index df1bdaae441c..5170fe7599cd 100644
--- a/drivers/gpio/gpio-exar.c
+++ b/drivers/gpio/gpio-exar.c
@@ -217,8 +217,6 @@ static int gpio_exar_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	platform_set_drvdata(pdev, exar_gpio);
-
 	return 0;
 }
 

From 8020619ad785c87e807ca9f782238294c2a602e0 Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Wed, 19 Jul 2023 12:48:03 +0300
Subject: [PATCH 0655/2047] gpio: logicvc: remove unneeded
 platform_set_drvdata() call

The platform_set_drvdata() isn't needed for anything. The function is a
simple setter that doesn't change anything in the code. That is because
there isn't a get function and since it has no dependencies it can be
removed.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-logicvc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-logicvc.c b/drivers/gpio/gpio-logicvc.c
index 2b9876bc1383..05d62011f335 100644
--- a/drivers/gpio/gpio-logicvc.c
+++ b/drivers/gpio/gpio-logicvc.c
@@ -138,8 +138,6 @@ static int logicvc_gpio_probe(struct platform_device *pdev)
 	logicvc->chip.set = logicvc_gpio_set;
 	logicvc->chip.direction_output = logicvc_gpio_direction_output;
 
-	platform_set_drvdata(pdev, logicvc);
-
 	return devm_gpiochip_add_data(dev, &logicvc->chip, logicvc);
 }
 

From c456c4d9707a0bd484a1af30188f5c416fc394f0 Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Mon, 17 Jul 2023 15:10:04 +0300
Subject: [PATCH 0656/2047] gpio: eic-sprd: remove unneeded
 platform_set_drvdata() call

The platform_set_drvdata() call was never used, ever since the driver was
originally added.
It looks like this copy+paste left-over. Possibly the author copied from a
driver that had this line, but also had a remove hook.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-eic-sprd.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c
index 67b1e09e8985..5320cf1de89c 100644
--- a/drivers/gpio/gpio-eic-sprd.c
+++ b/drivers/gpio/gpio-eic-sprd.c
@@ -653,7 +653,6 @@ static int sprd_eic_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	platform_set_drvdata(pdev, sprd_eic);
 	return 0;
 }
 

From 566ffa3ae9649f7aa1debcf559ee7e26ed4a110f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 25 Jul 2023 14:17:39 +0200
Subject: [PATCH 0657/2047] x86/cpu: Fix amd_check_microcode() declaration

The newly added amd_check_microcode() function has two conflicting definitions
if CONFIG_CPU_SUP_AMD is enabled and CONFIG_MICROCODE_AMD is disabled. Since
the header with the stub definition is not included in cpu/amd.c, this only
causes a -Wmissing-prototype warning with W=1:

  arch/x86/kernel/cpu/amd.c:1289:6: error: no previous prototype for 'amd_check_microcode' [-Werror=missing-prototypes]

Adding the missing #include shows the other problem:

  arch/x86/kernel/cpu/amd.c:1290:6: error: redefinition of 'amd_check_microcode'
  arch/x86/include/asm/microcode_amd.h:58:20: note: previous definition of 'amd_check_microcode' with type 'void(void)'

Move the declaration into a more appropriate header that is already
included, with the #ifdef check changed to match the definition's.

Fixes: 522b1d69219d8 ("x86/cpu/amd: Add a Zenbleed fix")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230725121751.2007665-1-arnd@kernel.org
---
 arch/x86/include/asm/microcode_amd.h | 3 +--
 arch/x86/include/asm/processor.h     | 2 ++
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index 9675c621c1ca..2ed09a1da4a2 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -48,13 +48,12 @@ extern void __init load_ucode_amd_bsp(unsigned int family);
 extern void load_ucode_amd_ap(unsigned int family);
 extern int __init save_microcode_in_initrd_amd(unsigned int family);
 void reload_ucode_amd(unsigned int cpu);
-extern void amd_check_microcode(void);
 #else
 static inline void __init load_ucode_amd_bsp(unsigned int family) {}
 static inline void load_ucode_amd_ap(unsigned int family) {}
 static inline int __init
 save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
 static inline void reload_ucode_amd(unsigned int cpu) {}
-static inline void amd_check_microcode(void) {}
 #endif
+
 #endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d46300e94f85..36d52075fdad 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -682,9 +682,11 @@ extern u16 get_llc_id(unsigned int cpu);
 #ifdef CONFIG_CPU_SUP_AMD
 extern u32 amd_get_nodes_per_socket(void);
 extern u32 amd_get_highest_perf(void);
+extern void amd_check_microcode(void);
 #else
 static inline u32 amd_get_nodes_per_socket(void)	{ return 0; }
 static inline u32 amd_get_highest_perf(void)		{ return 0; }
+static inline void amd_check_microcode(void)		{ }
 #endif
 
 extern unsigned long arch_align_stack(unsigned long sp);

From ac1c6283c45a77431af33fbae435f1a29f23a9f4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 25 Jul 2023 15:48:33 +0200
Subject: [PATCH 0658/2047] x86/apic: Hide unused safe_smp_processor_id() on
 32-bit UP

When CONFIG_SMP is disabled in a 32-bit config, the prototype for
safe_smp_processor_id() is hidden, which causes a W=1 warning:

  arch/x86/kernel/apic/ipi.c:316:5: error: no previous prototype for 'safe_smp_processor_id' [-Werror=missing-prototypes]

Since there are no callers in this configuration, just hide the definition
as well.

  [ bp: Clarify it is a 32-bit config. ]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230725134837.1534228-2-arnd@kernel.org
---
 arch/x86/kernel/apic/ipi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 2a6509e8c840..9bfd6e397384 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -301,6 +301,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_SMP
 /* must come after the send_IPI functions above for inlining */
 static int convert_apicid_to_cpu(int apic_id)
 {
@@ -329,3 +330,4 @@ int safe_smp_processor_id(void)
 	return cpuid >= 0 ? cpuid : 0;
 }
 #endif
+#endif

From 22dc02f81cddd19528fc1d4fbd7404defbf736c5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 24 Jul 2023 21:30:54 +0200
Subject: [PATCH 0659/2047] Revert "sched/fair: Move unused stub functions to
 header"

Revert commit 7aa55f2a5902 ("sched/fair: Move unused stub functions to
header"), for while it has the right Changelog, the actual patch
content a revert of the previous 4 patches:

  f7df852ad6db ("sched: Make task_vruntime_update() prototype visible")
  c0bdfd72fbfb ("sched/fair: Hide unused init_cfs_bandwidth() stub")
  378be384e01f ("sched: Add schedule_user() declaration")
  d55ebae3f312 ("sched: Hide unused sched_update_scaling()")

So in effect this is a revert of a revert and re-applies those
patches.

Fixes: 7aa55f2a5902 ("sched/fair: Move unused stub functions to header")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
---
 kernel/sched/fair.c  | 6 +++---
 kernel/sched/sched.h | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b3e25be58e2b..695f8e5c9e77 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -684,7 +684,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 /**************************************************************
  * Scheduling class statistics methods:
  */
-
+#ifdef CONFIG_SMP
 int sched_update_scaling(void)
 {
 	unsigned int factor = get_update_sysctl_factor();
@@ -702,6 +702,7 @@ int sched_update_scaling(void)
 	return 0;
 }
 #endif
+#endif
 
 /*
  * delta /= w
@@ -6186,9 +6187,8 @@ static inline int throttled_lb_pair(struct task_group *tg,
 	return 0;
 }
 
-void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
-
 #ifdef CONFIG_FAIR_GROUP_SCHED
+void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
 static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
 #endif
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e93e006a942b..44b540ad37b7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1245,6 +1245,7 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
 
 bool cfs_prio_less(const struct task_struct *a, const struct task_struct *b,
 			bool fi);
+void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
 
 /*
  * Helpers to check if the CPU's core cookie matches with the task's cookie
@@ -2398,6 +2399,7 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
 #endif
 
 extern void schedule_idle(void);
+asmlinkage void schedule_user(void);
 
 extern void sysrq_sched_debug_show(void);
 extern void sched_init_granularity(void);

From 893aa09ee5959533d8e51e06b1bde35286edaf70 Mon Sep 17 00:00:00 2001
From: Wang Ming <machel@vivo.com>
Date: Wed, 26 Jul 2023 18:54:47 +0800
Subject: [PATCH 0660/2047] spi: Use dev_err_probe instead of dev_err

It is possible that dma_request_chan will return EPROBE_DEFER,
which means that dev is not ready yet. In this case,
dev_err(dev), there will be no output. This patch fixes the bug.

Signed-off-by: Wang Ming <machel@vivo.com>
Link: https://lore.kernel.org/r/20230726105457.3743-1-machel@vivo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm2835.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index 83fd062fc491..c16abc2a9e9b 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -903,15 +903,15 @@ static int bcm2835_dma_init(struct spi_controller *ctlr, struct device *dev,
 	/* get tx/rx dma */
 	ctlr->dma_tx = dma_request_chan(dev, "tx");
 	if (IS_ERR(ctlr->dma_tx)) {
-		dev_err(dev, "no tx-dma configuration found - not using dma mode\n");
-		ret = PTR_ERR(ctlr->dma_tx);
+		ret = dev_err_probe(dev, PTR_ERR(ctlr->dma_tx),
+			"no tx-dma configuration found - not using dma mode\n");
 		ctlr->dma_tx = NULL;
 		goto err;
 	}
 	ctlr->dma_rx = dma_request_chan(dev, "rx");
 	if (IS_ERR(ctlr->dma_rx)) {
-		dev_err(dev, "no rx-dma configuration found - not using dma mode\n");
-		ret = PTR_ERR(ctlr->dma_rx);
+		ret = dev_err_probe(dev, PTR_ERR(ctlr->dma_rx),
+			"no rx-dma configuration found - not using dma mode\n");
 		ctlr->dma_rx = NULL;
 		goto err_release;
 	}

From b9d6012497400aa65ef0f2cc537ce7720615dea7 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 14 Jul 2023 11:40:20 -0600
Subject: [PATCH 0661/2047] arm64: Explicitly include correct DT includes

Remove unused 'of*.h' header inclusions from the arm64 arch code to
allow for the eventual untangling of 'of_device.h and 'of_platform.h',
which currently include each other.

Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230714174021.4039807-1-robh@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpuidle.c | 2 --
 arch/arm64/kernel/pci.c     | 2 --
 2 files changed, 4 deletions(-)

diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index d1f68599c29f..f372295207fb 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -9,8 +9,6 @@
 #include <linux/acpi.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/psci.h>
 
 #ifdef CONFIG_ACPI_PROCESSOR_IDLE
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 2276689b5411..f872c57e9909 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -11,8 +11,6 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/of_pci.h>
-#include <linux/of_platform.h>
 #include <linux/pci.h>
 #include <linux/pci-acpi.h>
 #include <linux/pci-ecam.h>

From 5a175d369c702ce08c9feb630125c9fc7a9e1370 Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Sat, 29 Jul 2023 09:00:03 +0800
Subject: [PATCH 0662/2047] kunit: fix wild-memory-access bug in
 kunit_filter_suites()

As for kunit_filter_suites(), When the filters arg = NULL, such as
the call of kunit_filter_suites(&suite_set, "suite2", NULL, NULL, &err)
in filter_suites_test() tese case in kunit, both filter_count and
parsed_filters will not be initialized.

So it's possible to enter kunit_filter_attr_tests(), and the use of
uninitialized parsed_filters will cause below wild-memory-access.

 RIP: 0010:kunit_filter_suites+0x780/0xa40
 Code: fe ff ff e8 42 87 4d ff 41 83 c6 01 49 83 c5 10 49 89 dc 44 39 74 24 50 0f 8e 81 fe ff ff e8 27 87 4d ff 4c 89 e8 48 c1 e8 03 <66> 42 83 3c 38 00 0f 85 af 01 00 00 49 8b 75 00 49 8b 55 08 4c 89
 RSP: 0000:ff1100010743fc38 EFLAGS: 00010203
 RAX: 03fc4400041d0ff1 RBX: ff1100010389a900 RCX: ffffffff9f940ad9
 RDX: ff11000107429740 RSI: 0000000000000000 RDI: ff110001037ec920
 RBP: ff1100010743fd50 R08: 0000000000000000 R09: ffe21c0020e87f1e
 R10: 0000000000000003 R11: 0000000000032001 R12: ff110001037ec800
 R13: 1fe2200020e87f8c R14: 0000000000000000 R15: dffffc0000000000
 FS:  0000000000000000(0000) GS:ff1100011b000000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: ff11000115201000 CR3: 0000000113066001 CR4: 0000000000771ef0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 PKRU: 55555554
 Call Trace:
  <TASK>
  ? die_addr+0x3c/0xa0
  ? exc_general_protection+0x148/0x220
  ? asm_exc_general_protection+0x26/0x30
  ? kunit_filter_suites+0x779/0xa40
  ? kunit_filter_suites+0x780/0xa40
  ? kunit_filter_suites+0x779/0xa40
  ? __pfx_kunit_filter_suites+0x10/0x10
  ? __pfx_kfree+0x10/0x10
  ? kunit_add_action_or_reset+0x3d/0x50
  filter_suites_test+0x1b7/0x440
  ? __pfx_filter_suites_test+0x10/0x10
  ? __pfx___schedule+0x10/0x10
  ? try_to_wake_up+0xa8e/0x1210
  ? _raw_spin_lock_irqsave+0x86/0xe0
  ? __pfx__raw_spin_lock_irqsave+0x10/0x10
  ? set_cpus_allowed_ptr+0x7c/0xb0
  kunit_try_run_case+0x119/0x270
  ? __kthread_parkme+0xdc/0x160
  ? __pfx_kunit_try_run_case+0x10/0x10
  kunit_generic_run_threadfn_adapter+0x4e/0xa0
  ? __pfx_kunit_generic_run_threadfn_adapter+0x10/0x10
  kthread+0x2c7/0x3c0
  ? __pfx_kthread+0x10/0x10
  ret_from_fork+0x2c/0x70
  ? __pfx_kthread+0x10/0x10
  ret_from_fork_asm+0x1b/0x30
  </TASK>
 Modules linked in:
 Dumping ftrace buffer:
    (ftrace buffer empty)
 ---[ end trace 0000000000000000 ]---
 RIP: 0010:kunit_filter_suites+0x780/0xa40
 Code: fe ff ff e8 42 87 4d ff 41 83 c6 01 49 83 c5 10 49 89 dc 44 39 74 24 50 0f 8e 81 fe ff ff e8 27 87 4d ff 4c 89 e8 48 c1 e8 03 <66> 42 83 3c 38 00 0f 85 af 01 00 00 49 8b 75 00 49 8b 55 08 4c 89
 RSP: 0000:ff1100010743fc38 EFLAGS: 00010203
 RAX: 03fc4400041d0ff1 RBX: ff1100010389a900 RCX: ffffffff9f940ad9
 RDX: ff11000107429740 RSI: 0000000000000000 RDI: ff110001037ec920
 RBP: ff1100010743fd50 R08: 0000000000000000 R09: ffe21c0020e87f1e
 R10: 0000000000000003 R11: 0000000000032001 R12: ff110001037ec800
 R13: 1fe2200020e87f8c R14: 0000000000000000 R15: dffffc0000000000
 FS:  0000000000000000(0000) GS:ff1100011b000000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: ff11000115201000 CR3: 0000000113066001 CR4: 0000000000771ef0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 PKRU: 55555554
 Kernel panic - not syncing: Fatal exception
 Dumping ftrace buffer:
    (ftrace buffer empty)
 Kernel Offset: 0x1da00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
 Rebooting in 1 seconds..

Fixes: 529534e8cba3 ("kunit: Add ability to filter attributes")
Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Reviewed-by: David Gow <davidgow@google.com>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 lib/kunit/executor.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 483f7b7873a7..5b5bed1efb93 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -125,7 +125,8 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
 						char *filter_action,
 					    int *err)
 {
-	int i, j, k, filter_count;
+	int i, j, k;
+	int filter_count = 0;
 	struct kunit_suite **copy, *filtered_suite, *new_filtered_suite;
 	struct suite_set filtered;
 	struct kunit_glob_filter parsed_glob;

From 59bbe86bb212b618ec2b50434f54bb4cfc704f44 Mon Sep 17 00:00:00 2001
From: Praveen Talari <quic_ptalari@quicinc.com>
Date: Fri, 14 Jul 2023 09:52:02 +0530
Subject: [PATCH 0663/2047] soc: qcom: geni-se: Add SPI Device mode support for
 GENI based QuPv3

Add device mode supported registers and masks.

Signed-off-by: Praveen Talari <quic_ptalari@quicinc.com>
Reviewed-by: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>
Link: https://lore.kernel.org/r/20230714042203.14251-2-quic_ptalari@quicinc.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/soc/qcom/geni-se.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h
index 821a19135bb6..29e06905bc1f 100644
--- a/include/linux/soc/qcom/geni-se.h
+++ b/include/linux/soc/qcom/geni-se.h
@@ -35,6 +35,7 @@ enum geni_se_protocol_type {
 	GENI_SE_UART,
 	GENI_SE_I2C,
 	GENI_SE_I3C,
+	GENI_SE_SPI_SLAVE,
 };
 
 struct geni_wrapper;
@@ -73,12 +74,14 @@ struct geni_se {
 
 /* Common SE registers */
 #define GENI_FORCE_DEFAULT_REG		0x20
+#define GENI_OUTPUT_CTRL		0x24
 #define SE_GENI_STATUS			0x40
 #define GENI_SER_M_CLK_CFG		0x48
 #define GENI_SER_S_CLK_CFG		0x4c
 #define GENI_IF_DISABLE_RO		0x64
 #define GENI_FW_REVISION_RO		0x68
 #define SE_GENI_CLK_SEL			0x7c
+#define SE_GENI_CFG_SEQ_START		0x84
 #define SE_GENI_DMA_MODE_EN		0x258
 #define SE_GENI_M_CMD0			0x600
 #define SE_GENI_M_CMD_CTRL_REG		0x604
@@ -111,6 +114,9 @@ struct geni_se {
 /* GENI_FORCE_DEFAULT_REG fields */
 #define FORCE_DEFAULT	BIT(0)
 
+/* GENI_OUTPUT_CTRL fields */
+#define GENI_IO_MUX_0_EN		BIT(0)
+
 /* GENI_STATUS fields */
 #define M_GENI_CMD_ACTIVE		BIT(0)
 #define S_GENI_CMD_ACTIVE		BIT(12)
@@ -130,6 +136,9 @@ struct geni_se {
 /* GENI_CLK_SEL fields */
 #define CLK_SEL_MSK			GENMASK(2, 0)
 
+/* SE_GENI_CFG_SEQ_START fields */
+#define START_TRIGGER			BIT(0)
+
 /* SE_GENI_DMA_MODE_EN */
 #define GENI_DMA_MODE_EN		BIT(0)
 

From d7f74cc31a89a45d4c7deaa5f759661a07a183d6 Mon Sep 17 00:00:00 2001
From: Praveen Talari <quic_ptalari@quicinc.com>
Date: Fri, 14 Jul 2023 09:52:03 +0530
Subject: [PATCH 0664/2047] spi: spi-geni-qcom: Add SPI Device mode support for
 GENI based QuPv3

Currently spi geni driver supports only master mode operation.

Add spi device mode support to GENI based QuPv3.

Signed-off-by: Praveen Talari <quic_ptalari@quicinc.com>
Reviewed-by: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>
Link: https://lore.kernel.org/r/20230714042203.14251-3-quic_ptalari@quicinc.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-geni-qcom.c | 53 ++++++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 6 deletions(-)

diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 1df9d4844a68..f4f376a8351b 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -12,6 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/pm_runtime.h>
+#include <linux/property.h>
 #include <linux/soc/qcom/geni-se.h>
 #include <linux/spi/spi.h>
 #include <linux/spinlock.h>
@@ -52,6 +53,9 @@
 #define SPI_CS_CLK_DELAY_MSK		GENMASK(19, 10)
 #define SPI_CS_CLK_DELAY_SHFT		10
 
+#define SE_SPI_SLAVE_EN				(0x2BC)
+#define SPI_SLAVE_EN				BIT(0)
+
 /* M_CMD OP codes for SPI */
 #define SPI_TX_ONLY		1
 #define SPI_RX_ONLY		2
@@ -99,6 +103,16 @@ struct spi_geni_master {
 	int cur_xfer_mode;
 };
 
+static void spi_slv_setup(struct spi_geni_master *mas)
+{
+	struct geni_se *se = &mas->se;
+
+	writel(SPI_SLAVE_EN, se->base + SE_SPI_SLAVE_EN);
+	writel(GENI_IO_MUX_0_EN, se->base + GENI_OUTPUT_CTRL);
+	writel(START_TRIGGER, se->base + SE_GENI_CFG_SEQ_START);
+	dev_dbg(mas->dev, "spi slave setup done\n");
+}
+
 static int get_spi_clk_cfg(unsigned int speed_hz,
 			struct spi_geni_master *mas,
 			unsigned int *clk_idx,
@@ -140,12 +154,22 @@ static void handle_se_timeout(struct spi_master *spi,
 	const struct spi_transfer *xfer;
 
 	spin_lock_irq(&mas->lock);
-	reinit_completion(&mas->cancel_done);
 	if (mas->cur_xfer_mode == GENI_SE_FIFO)
 		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
 
 	xfer = mas->cur_xfer;
 	mas->cur_xfer = NULL;
+
+	if (spi->slave) {
+		/*
+		 * skip CMD Cancel sequnece since spi slave
+		 * doesn`t support CMD Cancel sequnece
+		 */
+		spin_unlock_irq(&mas->lock);
+		goto unmap_if_dma;
+	}
+
+	reinit_completion(&mas->cancel_done);
 	geni_se_cancel_m_cmd(se);
 	spin_unlock_irq(&mas->lock);
 
@@ -542,6 +566,10 @@ static bool geni_can_dma(struct spi_controller *ctlr,
 	if (mas->cur_xfer_mode == GENI_GPI_DMA)
 		return true;
 
+	/* Set SE DMA mode for SPI slave. */
+	if (ctlr->slave)
+		return true;
+
 	len = get_xfer_len_in_words(xfer, mas);
 	fifo_size = mas->tx_fifo_depth * mas->fifo_width_bits / mas->cur_bits_per_word;
 
@@ -619,6 +647,7 @@ static void spi_geni_release_dma_chan(struct spi_geni_master *mas)
 
 static int spi_geni_init(struct spi_geni_master *mas)
 {
+	struct spi_master *spi = dev_get_drvdata(mas->dev);
 	struct geni_se *se = &mas->se;
 	unsigned int proto, major, minor, ver;
 	u32 spi_tx_cfg, fifo_disable;
@@ -627,7 +656,14 @@ static int spi_geni_init(struct spi_geni_master *mas)
 	pm_runtime_get_sync(mas->dev);
 
 	proto = geni_se_read_proto(se);
-	if (proto != GENI_SE_SPI) {
+
+	if (spi->slave) {
+		if (proto != GENI_SE_SPI_SLAVE) {
+			dev_err(mas->dev, "Invalid proto %d\n", proto);
+			goto out_pm;
+		}
+		spi_slv_setup(mas);
+	} else if (proto != GENI_SE_SPI) {
 		dev_err(mas->dev, "Invalid proto %d\n", proto);
 		goto out_pm;
 	}
@@ -679,9 +715,11 @@ static int spi_geni_init(struct spi_geni_master *mas)
 	}
 
 	/* We always control CS manually */
-	spi_tx_cfg = readl(se->base + SE_SPI_TRANS_CFG);
-	spi_tx_cfg &= ~CS_TOGGLE;
-	writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG);
+	if (!spi->slave) {
+		spi_tx_cfg = readl(se->base + SE_SPI_TRANS_CFG);
+		spi_tx_cfg &= ~CS_TOGGLE;
+		writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG);
+	}
 
 out_pm:
 	pm_runtime_put(mas->dev);
@@ -1074,6 +1112,9 @@ static int spi_geni_probe(struct platform_device *pdev)
 	pm_runtime_set_autosuspend_delay(&pdev->dev, 250);
 	pm_runtime_enable(dev);
 
+	if (device_property_read_bool(&pdev->dev, "spi-slave"))
+		spi->slave = true;
+
 	ret = geni_icc_get(&mas->se, NULL);
 	if (ret)
 		goto spi_geni_probe_runtime_disable;
@@ -1094,7 +1135,7 @@ static int spi_geni_probe(struct platform_device *pdev)
 	 * for dma (gsi) mode, the gsi will set cs based on params passed in
 	 * TRE
 	 */
-	if (mas->cur_xfer_mode == GENI_SE_FIFO)
+	if (!spi->slave && mas->cur_xfer_mode == GENI_SE_FIFO)
 		spi->set_cs = spi_geni_set_cs;
 
 	/*

From b350e6c6297aaa2533da23a21398751eeeecc101 Mon Sep 17 00:00:00 2001
From: Yinbo Zhu <zhuyinbo@loongson.cn>
Date: Tue, 13 Jun 2023 15:58:33 +0800
Subject: [PATCH 0665/2047] spi: dt-bindings: add loongson spi

Add the Loongson platform spi binding with DT schema format using
json-schema.

Signed-off-by: Yinbo Zhu <zhuyinbo@loongson.cn>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230613075834.5219-2-zhuyinbo@loongson.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/spi/loongson,ls2k-spi.yaml       | 46 +++++++++++++++++++
 MAINTAINERS                                   |  6 +++
 2 files changed, 52 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/spi/loongson,ls2k-spi.yaml

diff --git a/Documentation/devicetree/bindings/spi/loongson,ls2k-spi.yaml b/Documentation/devicetree/bindings/spi/loongson,ls2k-spi.yaml
new file mode 100644
index 000000000000..de9d32feadf5
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/loongson,ls2k-spi.yaml
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/loongson,ls2k-spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson SPI controller
+
+maintainers:
+  - Yinbo Zhu <zhuyinbo@loongson.cn>
+
+allOf:
+  - $ref: /schemas/spi/spi-controller.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - loongson,ls2k1000-spi
+      - items:
+          - enum:
+              - loongson,ls2k0500-spi
+          - const: loongson,ls2k1000-spi
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    spi0: spi@1fff0220{
+        compatible = "loongson,ls2k1000-spi";
+        reg = <0x1fff0220 0x10>;
+        clocks = <&clk 17>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 53b7ca804465..60c87a7bf709 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12271,6 +12271,12 @@ F:	Documentation/devicetree/bindings/clock/loongson,ls2k-clk.yaml
 F:	drivers/clk/clk-loongson2.c
 F:	include/dt-bindings/clock/loongson,ls2k-clk.h
 
+LOONGSON SPI DRIVER
+M:	Yinbo Zhu <zhuyinbo@loongson.cn>
+L:	linux-spi@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/spi/loongson,ls2k-spi.yaml
+
 LOONGSON-2 SOC SERIES GUTS DRIVER
 M:	Yinbo Zhu <zhuyinbo@loongson.cn>
 L:	loongarch@lists.linux.dev

From 6c7a864007b66e60a3f64858a9555efed408b048 Mon Sep 17 00:00:00 2001
From: Yinbo Zhu <zhuyinbo@loongson.cn>
Date: Tue, 13 Jun 2023 15:58:34 +0800
Subject: [PATCH 0666/2047] spi: loongson: add bus driver for the loongson spi
 controller

This bus driver supports the Loongson SPI hardware controller in the
Loongson platforms and supports the use DTS and PCI framework to
register SPI device resources.

Signed-off-by: Yinbo Zhu <zhuyinbo@loongson.cn>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Mark Brown <broonie@kernel.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20230613075834.5219-3-zhuyinbo@loongson.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 MAINTAINERS                     |   4 +
 drivers/spi/Kconfig             |  26 +++
 drivers/spi/Makefile            |   3 +
 drivers/spi/spi-loongson-core.c | 279 ++++++++++++++++++++++++++++++++
 drivers/spi/spi-loongson-pci.c  |  55 +++++++
 drivers/spi/spi-loongson-plat.c |  47 ++++++
 drivers/spi/spi-loongson.h      |  49 ++++++
 7 files changed, 463 insertions(+)
 create mode 100644 drivers/spi/spi-loongson-core.c
 create mode 100644 drivers/spi/spi-loongson-pci.c
 create mode 100644 drivers/spi/spi-loongson-plat.c
 create mode 100644 drivers/spi/spi-loongson.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 60c87a7bf709..6578fabb936f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12276,6 +12276,10 @@ M:	Yinbo Zhu <zhuyinbo@loongson.cn>
 L:	linux-spi@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/spi/loongson,ls2k-spi.yaml
+F:	drivers/spi/spi-loongson-core.c
+F:	drivers/spi/spi-loongson-pci.c
+F:	drivers/spi/spi-loongson-plat.c
+F:	drivers/spi/spi-loongson.h
 
 LOONGSON-2 SOC SERIES GUTS DRIVER
 M:	Yinbo Zhu <zhuyinbo@loongson.cn>
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 8962b2557615..a364fb8f1778 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -516,6 +516,32 @@ config SPI_LM70_LLP
 	  which interfaces to an LM70 temperature sensor using
 	  a parallel port.
 
+config SPI_LOONGSON_CORE
+	tristate
+	depends on LOONGARCH || COMPILE_TEST
+
+config SPI_LOONGSON_PCI
+	tristate "Loongson SPI Controller PCI Driver Support"
+	select SPI_LOONGSON_CORE
+	depends on PCI && (LOONGARCH || COMPILE_TEST)
+	help
+	  This bus driver supports the Loongson SPI hardware controller in
+	  the Loongson platforms and supports to use PCI framework to
+	  register SPI device resources.
+	  Say Y or M here if you want to use the SPI controller on
+	  Loongson platform.
+
+config SPI_LOONGSON_PLATFORM
+	tristate "Loongson SPI Controller Platform Driver Support"
+	select SPI_LOONGSON_CORE
+	depends on OF && (LOONGARCH || COMPILE_TEST)
+	help
+	  This bus driver supports the Loongson SPI hardware controller in
+	  the Loongson platforms and supports to use DTS framework to
+	  register SPI device resources.
+	  Say Y or M here if you want to use the SPI controller on
+	  Loongson platform.
+
 config SPI_LP8841_RTC
 	tristate "ICP DAS LP-8841 SPI Controller for RTC"
 	depends on MACH_PXA27X_DT || COMPILE_TEST
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 080c2c1b3ec1..b0a6669dd318 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -71,6 +71,9 @@ obj-$(CONFIG_SPI_INTEL_PLATFORM)	+= spi-intel-platform.o
 obj-$(CONFIG_SPI_LANTIQ_SSC)		+= spi-lantiq-ssc.o
 obj-$(CONFIG_SPI_JCORE)			+= spi-jcore.o
 obj-$(CONFIG_SPI_LM70_LLP)		+= spi-lm70llp.o
+obj-$(CONFIG_SPI_LOONGSON_CORE)		+= spi-loongson-core.o
+obj-$(CONFIG_SPI_LOONGSON_PCI)		+= spi-loongson-pci.o
+obj-$(CONFIG_SPI_LOONGSON_PLATFORM)	+= spi-loongson-plat.o
 obj-$(CONFIG_SPI_LP8841_RTC)		+= spi-lp8841-rtc.o
 obj-$(CONFIG_SPI_MESON_SPICC)		+= spi-meson-spicc.o
 obj-$(CONFIG_SPI_MESON_SPIFC)		+= spi-meson-spifc.o
diff --git a/drivers/spi/spi-loongson-core.c b/drivers/spi/spi-loongson-core.c
new file mode 100644
index 000000000000..f97800b6fd65
--- /dev/null
+++ b/drivers/spi/spi-loongson-core.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Loongson SPI Support
+// Copyright (C) 2023 Loongson Technology Corporation Limited
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+
+#include "spi-loongson.h"
+
+static inline void loongson_spi_write_reg(struct loongson_spi *spi, unsigned char reg,
+					  unsigned char data)
+{
+	writeb(data, spi->base + reg);
+}
+
+static inline char loongson_spi_read_reg(struct loongson_spi *spi, unsigned char reg)
+{
+	return readb(spi->base + reg);
+}
+
+static void loongson_spi_set_cs(struct spi_device *spi, bool en)
+{
+	int cs;
+	unsigned char mask = (BIT(4) | BIT(0)) << spi_get_chipselect(spi, 0);
+	unsigned char val = en ? mask :  (BIT(0) << spi_get_chipselect(spi, 0));
+	struct loongson_spi *loongson_spi = spi_controller_get_devdata(spi->controller);
+
+	cs = loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_SFCS_REG) & ~mask;
+	loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_SFCS_REG, val | cs);
+}
+
+static void loongson_spi_set_clk(struct loongson_spi *loongson_spi, unsigned int hz)
+{
+	unsigned char val;
+	unsigned int div, div_tmp;
+	static const char rdiv[12] = {0, 1, 4, 2, 3, 5, 6, 7, 8, 9, 10, 11};
+
+	div = clamp_val(DIV_ROUND_UP_ULL(loongson_spi->clk_rate, hz), 2, 4096);
+	div_tmp = rdiv[fls(div - 1)];
+	loongson_spi->spcr = (div_tmp & GENMASK(1, 0)) >> 0;
+	loongson_spi->sper = (div_tmp & GENMASK(3, 2)) >> 2;
+	val = loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_SPCR_REG);
+	val &= ~GENMASK(1, 0);
+	loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_SPCR_REG, val |
+			       loongson_spi->spcr);
+	val = loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_SPER_REG);
+	val &= ~GENMASK(1, 0);
+	loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_SPER_REG, val |
+			       loongson_spi->sper);
+	loongson_spi->hz = hz;
+}
+
+static void loongson_spi_set_mode(struct loongson_spi *loongson_spi,
+				  struct spi_device *spi)
+{
+	unsigned char val;
+
+	val = loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_SPCR_REG);
+	val &= ~(LOONGSON_SPI_SPCR_CPOL | LOONGSON_SPI_SPCR_CPHA);
+	if (spi->mode & SPI_CPOL)
+		val |= LOONGSON_SPI_SPCR_CPOL;
+	if (spi->mode & SPI_CPHA)
+		val |= LOONGSON_SPI_SPCR_CPHA;
+
+	loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_SPCR_REG, val);
+	loongson_spi->mode |= spi->mode;
+}
+
+static int loongson_spi_update_state(struct loongson_spi *loongson_spi,
+				struct spi_device *spi, struct spi_transfer *t)
+{
+	if (t && loongson_spi->hz != t->speed_hz)
+		loongson_spi_set_clk(loongson_spi, t->speed_hz);
+
+	if ((spi->mode ^ loongson_spi->mode) & SPI_MODE_X_MASK)
+		loongson_spi_set_mode(loongson_spi, spi);
+
+	return 0;
+}
+
+static int loongson_spi_setup(struct spi_device *spi)
+{
+	struct loongson_spi *loongson_spi;
+
+	loongson_spi = spi_controller_get_devdata(spi->controller);
+	if (spi->bits_per_word % 8)
+		return -EINVAL;
+
+	if (spi_get_chipselect(spi, 0) >= spi->controller->num_chipselect)
+		return -EINVAL;
+
+	loongson_spi->hz = 0;
+	loongson_spi_set_cs(spi, true);
+
+	return 0;
+}
+
+static int loongson_spi_write_read_8bit(struct spi_device *spi, const u8 **tx_buf,
+					u8 **rx_buf, unsigned int num)
+{
+	int ret;
+	struct loongson_spi *loongson_spi = spi_controller_get_devdata(spi->controller);
+
+	if (tx_buf && *tx_buf)
+		loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_FIFO_REG, *((*tx_buf)++));
+	else
+		loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_FIFO_REG, 0);
+
+	ret = readb_poll_timeout(loongson_spi->base + LOONGSON_SPI_SPSR_REG,
+				 loongson_spi->spsr, (loongson_spi->spsr &
+				 LOONGSON_SPI_SPSR_RFEMPTY) != LOONGSON_SPI_SPSR_RFEMPTY,
+				 1, USEC_PER_MSEC);
+
+	if (rx_buf && *rx_buf)
+		*(*rx_buf)++ = loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_FIFO_REG);
+	else
+		loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_FIFO_REG);
+
+	return ret;
+}
+
+static int loongson_spi_write_read(struct spi_device *spi, struct spi_transfer *xfer)
+{
+	int ret;
+	unsigned int count;
+	const u8 *tx = xfer->tx_buf;
+	u8 *rx = xfer->rx_buf;
+
+	count = xfer->len;
+	do {
+		ret = loongson_spi_write_read_8bit(spi, &tx, &rx, count);
+		if (ret)
+			break;
+	} while (--count);
+
+	return ret;
+}
+
+static int loongson_spi_prepare_message(struct spi_controller *ctlr, struct spi_message *m)
+{
+	struct loongson_spi *loongson_spi = spi_controller_get_devdata(ctlr);
+
+	loongson_spi->para = loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_PARA_REG);
+	loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_PARA_REG, loongson_spi->para &
+			       ~LOONGSON_SPI_PARA_MEM_EN);
+
+	return 0;
+}
+
+static int loongson_spi_transfer_one(struct spi_controller *ctrl, struct spi_device *spi,
+				     struct spi_transfer *xfer)
+{
+	struct loongson_spi *loongson_spi = spi_controller_get_devdata(spi->controller);
+
+	loongson_spi_update_state(loongson_spi, spi, xfer);
+	if (xfer->len)
+		return loongson_spi_write_read(spi, xfer);
+
+	return 0;
+}
+
+static int loongson_spi_unprepare_message(struct spi_controller *ctrl, struct spi_message *m)
+{
+	struct loongson_spi *loongson_spi = spi_controller_get_devdata(ctrl);
+
+	loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_PARA_REG, loongson_spi->para);
+
+	return 0;
+}
+
+static void loongson_spi_reginit(struct loongson_spi *loongson_spi_dev)
+{
+	unsigned char val;
+
+	val = loongson_spi_read_reg(loongson_spi_dev, LOONGSON_SPI_SPCR_REG);
+	val &= ~LOONGSON_SPI_SPCR_SPE;
+	loongson_spi_write_reg(loongson_spi_dev, LOONGSON_SPI_SPCR_REG, val);
+
+	loongson_spi_write_reg(loongson_spi_dev, LOONGSON_SPI_SPSR_REG,
+			       (LOONGSON_SPI_SPSR_SPIF | LOONGSON_SPI_SPSR_WCOL));
+
+	val = loongson_spi_read_reg(loongson_spi_dev, LOONGSON_SPI_SPCR_REG);
+	val |= LOONGSON_SPI_SPCR_SPE;
+	loongson_spi_write_reg(loongson_spi_dev, LOONGSON_SPI_SPCR_REG, val);
+}
+
+int loongson_spi_init_controller(struct device *dev, void __iomem *regs)
+{
+	struct spi_controller *controller;
+	struct loongson_spi *spi;
+	struct clk *clk;
+
+	controller = devm_spi_alloc_host(dev, sizeof(struct loongson_spi));
+	if (controller == NULL)
+		return -ENOMEM;
+
+	controller->mode_bits = SPI_MODE_X_MASK | SPI_CS_HIGH;
+	controller->setup = loongson_spi_setup;
+	controller->prepare_message = loongson_spi_prepare_message;
+	controller->transfer_one = loongson_spi_transfer_one;
+	controller->unprepare_message = loongson_spi_unprepare_message;
+	controller->set_cs = loongson_spi_set_cs;
+	controller->num_chipselect = 4;
+	device_set_node(&controller->dev, dev_fwnode(dev));
+	dev_set_drvdata(dev, controller);
+
+	spi = spi_controller_get_devdata(controller);
+	spi->base = regs;
+	spi->controller = controller;
+
+	clk = devm_clk_get_optional(dev, NULL);
+	if (IS_ERR(clk))
+		return dev_err_probe(dev, PTR_ERR(clk), "unable to get clock\n");
+
+	spi->clk_rate = clk_get_rate(clk);
+	loongson_spi_reginit(spi);
+
+	spi->mode = 0;
+
+	return devm_spi_register_controller(dev, controller);
+}
+EXPORT_SYMBOL_NS_GPL(loongson_spi_init_controller, SPI_LOONGSON_CORE);
+
+static int __maybe_unused loongson_spi_suspend(struct device *dev)
+{
+	struct loongson_spi *loongson_spi;
+	struct spi_controller *controller;
+
+	controller = dev_get_drvdata(dev);
+	spi_controller_suspend(controller);
+
+	loongson_spi = spi_controller_get_devdata(controller);
+
+	loongson_spi->spcr = loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_SPCR_REG);
+	loongson_spi->sper = loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_SPER_REG);
+	loongson_spi->spsr = loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_SPSR_REG);
+	loongson_spi->para = loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_PARA_REG);
+	loongson_spi->sfcs = loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_SFCS_REG);
+	loongson_spi->timi = loongson_spi_read_reg(loongson_spi, LOONGSON_SPI_TIMI_REG);
+
+	return 0;
+}
+
+static int __maybe_unused loongson_spi_resume(struct device *dev)
+{
+	struct loongson_spi *loongson_spi;
+	struct spi_controller *controller;
+
+	controller = dev_get_drvdata(dev);
+	loongson_spi = spi_controller_get_devdata(controller);
+
+	loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_SPCR_REG, loongson_spi->spcr);
+	loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_SPER_REG, loongson_spi->sper);
+	loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_SPSR_REG, loongson_spi->spsr);
+	loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_PARA_REG, loongson_spi->para);
+	loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_SFCS_REG, loongson_spi->sfcs);
+	loongson_spi_write_reg(loongson_spi, LOONGSON_SPI_TIMI_REG, loongson_spi->timi);
+
+	spi_controller_resume(controller);
+
+	return 0;
+}
+
+const struct dev_pm_ops loongson_spi_dev_pm_ops = {
+	.suspend = loongson_spi_suspend,
+	.resume = loongson_spi_resume,
+};
+EXPORT_SYMBOL_NS_GPL(loongson_spi_dev_pm_ops, SPI_LOONGSON_CORE);
+
+MODULE_DESCRIPTION("Loongson SPI core driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-loongson-pci.c b/drivers/spi/spi-loongson-pci.c
new file mode 100644
index 000000000000..134cda0c13a5
--- /dev/null
+++ b/drivers/spi/spi-loongson-pci.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0+
+// PCI interface driver for Loongson SPI Support
+// Copyright (C) 2023 Loongson Technology Corporation Limited
+
+#include <linux/mod_devicetable.h>
+#include <linux/pci.h>
+
+#include "spi-loongson.h"
+
+static int loongson_spi_pci_register(struct pci_dev *pdev,
+			const struct pci_device_id *ent)
+{
+	int ret;
+	void __iomem *reg_base;
+	struct device *dev = &pdev->dev;
+	int pci_bar = 0;
+
+	ret = pcim_enable_device(pdev);
+	if (ret < 0)
+		return dev_err_probe(dev, ret, "cannot enable pci device\n");
+
+	ret = pcim_iomap_regions(pdev, BIT(pci_bar), pci_name(pdev));
+	if (ret)
+		return dev_err_probe(dev, ret, "failed to request and remap memory\n");
+
+	reg_base = pcim_iomap_table(pdev)[pci_bar];
+
+	ret = loongson_spi_init_controller(dev, reg_base);
+	if (ret)
+		return dev_err_probe(dev, ret, "failed to initialize controller\n");
+
+	return 0;
+}
+
+static struct pci_device_id loongson_spi_devices[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, 0x7a0b) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, 0x7a1b) },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, loongson_spi_devices);
+
+static struct pci_driver loongson_spi_pci_driver = {
+	.name       = "loongson-spi-pci",
+	.id_table   = loongson_spi_devices,
+	.probe      = loongson_spi_pci_register,
+	.driver	= {
+		.bus = &pci_bus_type,
+		.pm = &loongson_spi_dev_pm_ops,
+	},
+};
+module_pci_driver(loongson_spi_pci_driver);
+
+MODULE_DESCRIPTION("Loongson spi pci driver");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(SPI_LOONGSON_CORE);
diff --git a/drivers/spi/spi-loongson-plat.c b/drivers/spi/spi-loongson-plat.c
new file mode 100644
index 000000000000..c066e5f5891e
--- /dev/null
+++ b/drivers/spi/spi-loongson-plat.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Platform driver for Loongson SPI Support
+// Copyright (C) 2023 Loongson Technology Corporation Limited
+
+#include <linux/err.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+
+#include "spi-loongson.h"
+
+static int loongson_spi_platform_probe(struct platform_device *pdev)
+{
+	int ret;
+	void __iomem *reg_base;
+	struct device *dev = &pdev->dev;
+
+	reg_base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(reg_base))
+		return PTR_ERR(reg_base);
+
+	ret = loongson_spi_init_controller(dev, reg_base);
+	if (ret)
+		return dev_err_probe(dev, ret, "failed to initialize controller\n");
+
+	return 0;
+}
+
+static const struct of_device_id loongson_spi_id_table[] = {
+	{ .compatible = "loongson,ls2k1000-spi" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, loongson_spi_id_table);
+
+static struct platform_driver loongson_spi_plat_driver = {
+	.probe = loongson_spi_platform_probe,
+	.driver	= {
+		.name	= "loongson-spi",
+		.bus = &platform_bus_type,
+		.pm = &loongson_spi_dev_pm_ops,
+		.of_match_table = loongson_spi_id_table,
+	},
+};
+module_platform_driver(loongson_spi_plat_driver);
+
+MODULE_DESCRIPTION("Loongson spi platform driver");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(SPI_LOONGSON_CORE);
diff --git a/drivers/spi/spi-loongson.h b/drivers/spi/spi-loongson.h
new file mode 100644
index 000000000000..35f95b161842
--- /dev/null
+++ b/drivers/spi/spi-loongson.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Header File for Loongson SPI Driver. */
+/* Copyright (C) 2023 Loongson Technology Corporation Limited */
+
+#ifndef __LINUX_SPI_LOONGSON_H
+#define __LINUX_SPI_LOONGSON_H
+
+#include <linux/bits.h>
+#include <linux/pm.h>
+#include <linux/types.h>
+
+#define	LOONGSON_SPI_SPCR_REG	0x00
+#define	LOONGSON_SPI_SPSR_REG	0x01
+#define	LOONGSON_SPI_FIFO_REG	0x02
+#define	LOONGSON_SPI_SPER_REG	0x03
+#define	LOONGSON_SPI_PARA_REG	0x04
+#define	LOONGSON_SPI_SFCS_REG	0x05
+#define	LOONGSON_SPI_TIMI_REG	0x06
+
+/* Bits definition for Loongson SPI register */
+#define	LOONGSON_SPI_PARA_MEM_EN	BIT(0)
+#define	LOONGSON_SPI_SPCR_CPHA	BIT(2)
+#define	LOONGSON_SPI_SPCR_CPOL	BIT(3)
+#define	LOONGSON_SPI_SPCR_SPE	BIT(6)
+#define	LOONGSON_SPI_SPSR_RFEMPTY	BIT(0)
+#define	LOONGSON_SPI_SPSR_WCOL	BIT(6)
+#define	LOONGSON_SPI_SPSR_SPIF	BIT(7)
+
+struct device;
+struct spi_controller;
+
+struct loongson_spi {
+	struct	spi_controller	*controller;
+	void __iomem		*base;
+	int			cs_active;
+	unsigned int		hz;
+	unsigned char		spcr;
+	unsigned char		sper;
+	unsigned char		spsr;
+	unsigned char		para;
+	unsigned char		sfcs;
+	unsigned char		timi;
+	unsigned int		mode;
+	u64			clk_rate;
+};
+
+int loongson_spi_init_controller(struct device *dev, void __iomem *reg);
+extern const struct dev_pm_ops loongson_spi_dev_pm_ops;
+#endif /* __LINUX_SPI_LOONGSON_H */

From e2c12739ccf76aae12e9e949526a475b9d17adaa Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Thu, 6 Jul 2023 16:22:25 +0200
Subject: [PATCH 0667/2047] genirq: Prevent nested thread vs
 synchronize_hardirq() deadlock

There is a possibility of deadlock if synchronize_hardirq() is called
when the nested threaded interrupt is active.  The following scenario
was observed on a uniprocessor PREEMPT_NONE system:

 Thread 1                      Thread 2

 handle_nested_thread()
  Set INPROGRESS
  Call ->thread_fn()
   thread_fn goes to sleep

                              free_irq()
                               __synchronize_hardirq()
                                Busy-loop forever waiting for INPROGRESS
                                to be cleared

The INPROGRESS flag is only supposed to be used for hard interrupt
handlers.  Remove the incorrect usage in the nested threaded interrupt
case and instead re-use the threads_active / wait_for_threads mechanism
to wait for nested threaded interrupts to complete.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20230613-genirq-nested-v3-1-ae58221143eb@axis.com
---
 kernel/irq/chip.c      | 11 ++++-------
 kernel/irq/internals.h |  2 ++
 kernel/irq/manage.c    | 26 ++++++++++++++------------
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ee8c0acf39df..dc94e0bf2c94 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -473,11 +473,12 @@ void handle_nested_irq(unsigned int irq)
 	action = desc->action;
 	if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
 		desc->istate |= IRQS_PENDING;
-		goto out_unlock;
+		raw_spin_unlock_irq(&desc->lock);
+		return;
 	}
 
 	kstat_incr_irqs_this_cpu(desc);
-	irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
+	atomic_inc(&desc->threads_active);
 	raw_spin_unlock_irq(&desc->lock);
 
 	action_ret = IRQ_NONE;
@@ -487,11 +488,7 @@ void handle_nested_irq(unsigned int irq)
 	if (!irq_settings_no_debug(desc))
 		note_interrupt(desc, action_ret);
 
-	raw_spin_lock_irq(&desc->lock);
-	irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
-
-out_unlock:
-	raw_spin_unlock_irq(&desc->lock);
+	wake_threads_waitq(desc);
 }
 EXPORT_SYMBOL_GPL(handle_nested_irq);
 
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index bdd35bb9c735..831f38308466 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -121,6 +121,8 @@ void irq_resend_init(struct irq_desc *desc);
 bool irq_wait_for_poll(struct irq_desc *desc);
 void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
 
+void wake_threads_waitq(struct irq_desc *desc);
+
 #ifdef CONFIG_PROC_FS
 extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
 extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d2742af0f0fd..d309ba84e08a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -108,6 +108,16 @@ bool synchronize_hardirq(unsigned int irq)
 }
 EXPORT_SYMBOL(synchronize_hardirq);
 
+static void __synchronize_irq(struct irq_desc *desc)
+{
+	__synchronize_hardirq(desc, true);
+	/*
+	 * We made sure that no hardirq handler is running. Now verify that no
+	 * threaded handlers are active.
+	 */
+	wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
+}
+
 /**
  *	synchronize_irq - wait for pending IRQ handlers (on other CPUs)
  *	@irq: interrupt number to wait for
@@ -127,16 +137,8 @@ void synchronize_irq(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	if (desc) {
-		__synchronize_hardirq(desc, true);
-		/*
-		 * We made sure that no hardirq handler is
-		 * running. Now verify that no threaded handlers are
-		 * active.
-		 */
-		wait_event(desc->wait_for_threads,
-			   !atomic_read(&desc->threads_active));
-	}
+	if (desc)
+		__synchronize_irq(desc);
 }
 EXPORT_SYMBOL(synchronize_irq);
 
@@ -1216,7 +1218,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc,
 	return ret;
 }
 
-static void wake_threads_waitq(struct irq_desc *desc)
+void wake_threads_waitq(struct irq_desc *desc)
 {
 	if (atomic_dec_and_test(&desc->threads_active))
 		wake_up(&desc->wait_for_threads);
@@ -1944,7 +1946,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
 	 * supports it also make sure that there is no (not yet serviced)
 	 * interrupt in flight at the hardware level.
 	 */
-	__synchronize_hardirq(desc, true);
+	__synchronize_irq(desc);
 
 #ifdef CONFIG_DEBUG_SHIRQ
 	/*

From 51a5acce7162185a9b9d695b314a12fc067562b4 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 20 Jul 2023 22:36:25 +0800
Subject: [PATCH 0668/2047] genirq: Remove unused extern declaration

commit 3795de236d67 ("genirq: Distangle kernel/irq/handle.c")
left behind this.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20230720143625.29176-1-yuehaibing@huawei.com
---
 kernel/irq/internals.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 831f38308466..bcc7f21db9ee 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -108,8 +108,6 @@ extern int __irq_get_irqchip_state(struct irq_data *data,
 				   enum irqchip_irq_state which,
 				   bool *state);
 
-extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
-
 irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc);
 irqreturn_t handle_irq_event_percpu(struct irq_desc *desc);
 irqreturn_t handle_irq_event(struct irq_desc *desc);

From 52b38b7ad589c89a8c69272497bed20034f8e322 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Fri, 28 Jul 2023 23:53:13 +0800
Subject: [PATCH 0669/2047] cpu/SMT: Fix cpu_smt_possible() comment

Commit e1572f1d08be ("cpu/SMT: create and export cpu_smt_possible()")
introduces cpu_smt_possible() to represent if SMT is theoretically
possible. It returns true when SMT is supported and not forcefully
disabled ('nosmt=force'). But the comment of it says "Returns true if
SMT is not supported of forcefully (irreversibly) disabled", which is
wrong. Fix that comment accordingly.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Link: https://lore.kernel.org/r/20230728155313.44170-1-rui.zhang@intel.com
---
 kernel/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 7e8f1b044772..f6811c857102 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -676,7 +676,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu)
 	return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
 }
 
-/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
+/* Returns true if SMT is supported and not forcefully (irreversibly) disabled */
 bool cpu_smt_possible(void)
 {
 	return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&

From f6a756e8fb12923f0e3996a575e935e94f3594eb Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 14 Jul 2023 11:50:07 -0600
Subject: [PATCH 0670/2047] thermal: Explicitly include correct DT includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/amlogic_thermal.c           | 2 --
 drivers/thermal/broadcom/bcm2711_thermal.c  | 2 +-
 drivers/thermal/broadcom/brcmstb_thermal.c  | 2 +-
 drivers/thermal/hisi_thermal.c              | 2 +-
 drivers/thermal/imx8mm_thermal.c            | 1 -
 drivers/thermal/imx_sc_thermal.c            | 1 -
 drivers/thermal/imx_thermal.c               | 2 +-
 drivers/thermal/k3_bandgap.c                | 2 +-
 drivers/thermal/k3_j72xx_bandgap.c          | 2 +-
 drivers/thermal/mediatek/auxadc_thermal.c   | 1 -
 drivers/thermal/mediatek/lvts_thermal.c     | 2 +-
 drivers/thermal/qcom/qcom-spmi-adc-tm5.c    | 1 -
 drivers/thermal/qcom/qcom-spmi-temp-alarm.c | 1 -
 drivers/thermal/rcar_gen3_thermal.c         | 2 +-
 drivers/thermal/rcar_thermal.c              | 2 +-
 drivers/thermal/rzg2l_thermal.c             | 2 +-
 drivers/thermal/samsung/exynos_tmu.c        | 2 +-
 drivers/thermal/sprd_thermal.c              | 2 +-
 drivers/thermal/st/stm_thermal.c            | 2 --
 drivers/thermal/sun8i_thermal.c             | 2 +-
 drivers/thermal/tegra/tegra30-tsensor.c     | 2 +-
 drivers/thermal/thermal_of.c                | 3 +--
 drivers/thermal/uniphier_thermal.c          | 1 -
 23 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/drivers/thermal/amlogic_thermal.c b/drivers/thermal/amlogic_thermal.c
index 756b218880a7..81ebbf6de0de 100644
--- a/drivers/thermal/amlogic_thermal.c
+++ b/drivers/thermal/amlogic_thermal.c
@@ -22,8 +22,6 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/thermal.h>
diff --git a/drivers/thermal/broadcom/bcm2711_thermal.c b/drivers/thermal/broadcom/bcm2711_thermal.c
index c243a76a3471..03ac2d02e9d4 100644
--- a/drivers/thermal/broadcom/bcm2711_thermal.c
+++ b/drivers/thermal/broadcom/bcm2711_thermal.c
@@ -15,8 +15,8 @@
 #include <linux/kernel.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/thermal.h>
 
diff --git a/drivers/thermal/broadcom/brcmstb_thermal.c b/drivers/thermal/broadcom/brcmstb_thermal.c
index 72d1dbe60b8f..0b73abdaa792 100644
--- a/drivers/thermal/broadcom/brcmstb_thermal.c
+++ b/drivers/thermal/broadcom/brcmstb_thermal.c
@@ -17,8 +17,8 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
-#include <linux/of_device.h>
 #include <linux/thermal.h>
 
 #define AVS_TMON_STATUS			0x00
diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index 3f09ef8be41a..fb54ed4bf6f0 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -13,9 +13,9 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
-#include <linux/of_device.h>
 #include <linux/thermal.h>
 
 #define HI6220_TEMP0_LAG			(0x0)
diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c
index d4b40869c7d7..e89b11b3f2b9 100644
--- a/drivers/thermal/imx8mm_thermal.c
+++ b/drivers/thermal/imx8mm_thermal.c
@@ -12,7 +12,6 @@
 #include <linux/module.h>
 #include <linux/nvmem-consumer.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/thermal.h>
diff --git a/drivers/thermal/imx_sc_thermal.c b/drivers/thermal/imx_sc_thermal.c
index 8d6b4ef23746..7224f8d21db9 100644
--- a/drivers/thermal/imx_sc_thermal.c
+++ b/drivers/thermal/imx_sc_thermal.c
@@ -8,7 +8,6 @@
 #include <linux/firmware/imx/sci.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/thermal.h>
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index a94ec0a0c9dd..826358cbe810 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -11,7 +11,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/thermal.h>
 #include <linux/nvmem-consumer.h>
diff --git a/drivers/thermal/k3_bandgap.c b/drivers/thermal/k3_bandgap.c
index 1c3e590157ec..68f59b3735d3 100644
--- a/drivers/thermal/k3_bandgap.c
+++ b/drivers/thermal/k3_bandgap.c
@@ -11,7 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/thermal.h>
 #include <linux/types.h>
diff --git a/drivers/thermal/k3_j72xx_bandgap.c b/drivers/thermal/k3_j72xx_bandgap.c
index 5be1f09eeb2c..a5a0fc9b9356 100644
--- a/drivers/thermal/k3_j72xx_bandgap.c
+++ b/drivers/thermal/k3_j72xx_bandgap.c
@@ -10,10 +10,10 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/err.h>
 #include <linux/types.h>
-#include <linux/of_platform.h>
 #include <linux/io.h>
 #include <linux/thermal.h>
 #include <linux/of.h>
diff --git a/drivers/thermal/mediatek/auxadc_thermal.c b/drivers/thermal/mediatek/auxadc_thermal.c
index f59d36de20a0..c537aed71017 100644
--- a/drivers/thermal/mediatek/auxadc_thermal.c
+++ b/drivers/thermal/mediatek/auxadc_thermal.c
@@ -15,7 +15,6 @@
 #include <linux/nvmem-consumer.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/io.h>
diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c
index b693fac2d677..054c965ae5e1 100644
--- a/drivers/thermal/mediatek/lvts_thermal.c
+++ b/drivers/thermal/mediatek/lvts_thermal.c
@@ -13,7 +13,7 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/nvmem-consumer.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/thermal.h>
diff --git a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
index 5ddc39b2be32..756ac6842ff9 100644
--- a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
+++ b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
@@ -14,7 +14,6 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/thermal.h>
diff --git a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c
index 0e8ebfcd84c5..78c5cfe6a0c0 100644
--- a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c
+++ b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c
@@ -10,7 +10,6 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/thermal.h>
diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
index 9029d01e029b..bd2fb8c2e968 100644
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -11,7 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/thermal.h>
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index b8571f7090aa..293f8dd9fe0a 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -11,7 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reboot.h>
diff --git a/drivers/thermal/rzg2l_thermal.c b/drivers/thermal/rzg2l_thermal.c
index b56981f85306..6b2bf3426f52 100644
--- a/drivers/thermal/rzg2l_thermal.c
+++ b/drivers/thermal/rzg2l_thermal.c
@@ -9,8 +9,8 @@
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/math.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index 45e5c840d130..58f4d8f7a3fd 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -15,7 +15,7 @@
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
diff --git a/drivers/thermal/sprd_thermal.c b/drivers/thermal/sprd_thermal.c
index 2fb90fdad76e..e27c4bdc8912 100644
--- a/drivers/thermal/sprd_thermal.c
+++ b/drivers/thermal/sprd_thermal.c
@@ -6,7 +6,7 @@
 #include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/nvmem-consumer.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/thermal.h>
diff --git a/drivers/thermal/st/stm_thermal.c b/drivers/thermal/st/stm_thermal.c
index 903fcf1763f1..142a7e5d12f4 100644
--- a/drivers/thermal/st/stm_thermal.c
+++ b/drivers/thermal/st/stm_thermal.c
@@ -14,8 +14,6 @@
 #include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/thermal.h>
 
diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
index 195f3c5d0b38..cca16d632d9f 100644
--- a/drivers/thermal/sun8i_thermal.c
+++ b/drivers/thermal/sun8i_thermal.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/nvmem-consumer.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
diff --git a/drivers/thermal/tegra/tegra30-tsensor.c b/drivers/thermal/tegra/tegra30-tsensor.c
index c243e9d76d3c..d911fa60f100 100644
--- a/drivers/thermal/tegra/tegra30-tsensor.c
+++ b/drivers/thermal/tegra/tegra30-tsensor.c
@@ -18,7 +18,7 @@
 #include <linux/iopoll.h>
 #include <linux/math.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/reset.h>
diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
index bc07ae1c284c..330690a3a208 100644
--- a/drivers/thermal/thermal_of.c
+++ b/drivers/thermal/thermal_of.c
@@ -10,8 +10,7 @@
 
 #include <linux/err.h>
 #include <linux/export.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/thermal.h>
 #include <linux/types.h>
diff --git a/drivers/thermal/uniphier_thermal.c b/drivers/thermal/uniphier_thermal.c
index aef6119cc004..6f32ab61d174 100644
--- a/drivers/thermal/uniphier_thermal.c
+++ b/drivers/thermal/uniphier_thermal.c
@@ -12,7 +12,6 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/thermal.h>

From 7ebf243a201856adcac240e490596bb908ee5fcc Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 14 Jul 2023 11:45:54 -0600
Subject: [PATCH 0671/2047] hsi: omap_ssi: Explicitly include correct DT
 includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230714174554.4056851-1-robh@kernel.org
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/hsi/controllers/omap_ssi_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c
index 84ba8b875199..4c1a00f9929e 100644
--- a/drivers/hsi/controllers/omap_ssi_core.c
+++ b/drivers/hsi/controllers/omap_ssi_core.c
@@ -25,6 +25,7 @@
 #include <linux/debugfs.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pm_runtime.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/hsi/hsi.h>
 #include <linux/idr.h>

From cb88f7f51bc6f351a529ff61d0a706c6eae1417a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 28 Jul 2023 13:33:02 -0700
Subject: [PATCH 0672/2047] rcu-tasks: Permit use of debug-objects with RCU
 Tasks flavors

Currently, cblist_init_generic() holds a raw spinlock when invoking
INIT_WORK().  This fails in kernels built with CONFIG_DEBUG_OBJECTS=y
due to memory allocation being forbidden while holding a raw spinlock.
But the only reason for holding the raw spinlock is to synchronize
with early boot calls to call_rcu_tasks(), call_rcu_tasks_rude, and,
last but not least, call_rcu_tasks_trace().  These calls also invoke
cblist_init_generic() in order to support early boot queueing of
callbacks.

Except that there are no early boot calls to either of these three
functions, and the BPF guys confirm that they have no plans to add any
such calls.

This commit therefore removes the synchronization and adds a
WARN_ON_ONCE() to catch the case of now-prohibited early boot RCU Tasks
callback queueing.

If early boot queueing is needed, an "initialized" flag may be added to
the rcu_tasks structure.  Then queueing a callback before this flag is set
would initialize the callback list (if needed) and queue the callback.
The decision as to where to queue the callback given the possibility of
non-zero boot CPUs is left as an exercise for the reader.

Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/tasks.h | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 291d536b1789..a4cd4ea2402c 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -236,7 +236,7 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
 #endif /* #ifndef CONFIG_TINY_RCU */
 
 // Initialize per-CPU callback lists for the specified flavor of
-// Tasks RCU.
+// Tasks RCU.  Do not enqueue callbacks before this function is invoked.
 static void cblist_init_generic(struct rcu_tasks *rtp)
 {
 	int cpu;
@@ -244,7 +244,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
 	int lim;
 	int shift;
 
-	raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
 	if (rcu_task_enqueue_lim < 0) {
 		rcu_task_enqueue_lim = 1;
 		rcu_task_cb_adjust = true;
@@ -267,17 +266,16 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
 		WARN_ON_ONCE(!rtpcp);
 		if (cpu)
 			raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock));
-		raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
+		local_irq_save(flags);  // serialize initialization
 		if (rcu_segcblist_empty(&rtpcp->cblist))
 			rcu_segcblist_init(&rtpcp->cblist);
+		local_irq_restore(flags);
 		INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq);
 		rtpcp->cpu = cpu;
 		rtpcp->rtpp = rtp;
 		if (!rtpcp->rtp_blkd_tasks.next)
 			INIT_LIST_HEAD(&rtpcp->rtp_blkd_tasks);
-		raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
 	}
-	raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
 
 	pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n", rtp->name,
 			data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim), rcu_task_cb_adjust);
@@ -351,11 +349,9 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
 		    READ_ONCE(rtp->percpu_enqueue_lim) != nr_cpu_ids)
 			needadjust = true;  // Defer adjustment to avoid deadlock.
 	}
-	if (!rcu_segcblist_is_enabled(&rtpcp->cblist)) {
-		raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
-		cblist_init_generic(rtp);
-		raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
-	}
+	// Queuing callbacks before initialization not yet supported.
+	if (WARN_ON_ONCE(!rcu_segcblist_is_enabled(&rtpcp->cblist)))
+		rcu_segcblist_init(&rtpcp->cblist);
 	needwake = (func == wakeme_after_rcu) ||
 		   (rcu_segcblist_n_cbs(&rtpcp->cblist) == rcu_task_lazy_lim);
 	if (havekthread && !needwake && !timer_pending(&rtpcp->lazy_timer)) {

From 481daa505bc3eb4ac7991e2e7a981506639935fd Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Fri, 28 Jul 2023 12:04:30 +0200
Subject: [PATCH 0673/2047] s390/cert_store: select CRYPTO_LIB_SHA256

A build failure was reported when sha256() is not present:

gcc-13.1.0-nolibc/s390-linux/bin/s390-linux-ld: arch/s390/kernel/cert_store.o: in function `check_certificate_hash':
arch/s390/kernel/cert_store.c:267: undefined reference to `sha256'

Therefore make CONFIG_CERT_STORE select CRYPTO_LIB_SHA256.

Fixes: 8cf57d7217c3 ("s390: add support for user-defined certificates")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Closes: https://lore.kernel.org/all/8ecb57fb-4560-bdfc-9e55-63e3b0937132@infradead.org/
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Tested-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20230728100430.1567328-1-svens@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index d9d50a7a2016..18bf754e1fad 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -516,6 +516,7 @@ config KEXEC_SIG
 config CERT_STORE
 	bool "Get user certificates via DIAG320"
 	depends on KEYS
+	select CRYPTO_LIB_SHA256
 	help
 	  Enable this option if you want to access user-provided secure boot
 	  certificates via DIAG 0x320.

From 2fa00769b1e4bcf20cf0c967cb45f0cd1cea2b5c Mon Sep 17 00:00:00 2001
From: xiongxin <xiongxin@kylinos.cn>
Date: Fri, 7 Apr 2023 10:47:59 +0800
Subject: [PATCH 0674/2047] powercap: intel_rapl: Optimize rp->domains memory
 allocation

In the memory allocation of rp->domains in rapl_detect_domains(), there
is an additional memory of struct rapl_domain allocated, optimize the
code here to save sizeof(struct rapl_domain) bytes of memory.

Test in Intel NUC (i5-1135G7).

Signed-off-by: xiongxin <xiongxin@kylinos.cn>
Tested-by: xiongxin <xiongxin@kylinos.cn>
Reviewed-by: Srinivas Pandruvada<srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 4e646e5e48f6..5e2a1c376a64 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -1485,7 +1485,7 @@ static int rapl_detect_domains(struct rapl_package *rp)
 	}
 	pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name);
 
-	rp->domains = kcalloc(rp->nr_domains + 1, sizeof(struct rapl_domain),
+	rp->domains = kcalloc(rp->nr_domains, sizeof(struct rapl_domain),
 			      GFP_KERNEL);
 	if (!rp->domains)
 		return -ENOMEM;

From fc001b36e50f4136c6b8d9fceaf17fc145ffbd0d Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Thu, 27 Jul 2023 14:33:47 +0100
Subject: [PATCH 0675/2047] ACPI: Move AMBA bus scan handling into arm64
 specific directory

Commit fcea0ccf4fd7 ("ACPI: bus: Consolidate all arm specific
initialisation into acpi_arm_init()") moved all of the ARM-specific
initialization into acpi_arm_init(). However, acpi_amba.c being outside
of drivers/acpi/arm64 got ignored and hence acpi_amba_init() was not
moved into acpi_arm_init().

Move the AMBA platform bus support into arm64 specific folder and make
acpi_amba_init() part of acpi_arm_init().

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/Makefile                      | 1 -
 drivers/acpi/arm64/Makefile                | 1 +
 drivers/acpi/{acpi_amba.c => arm64/amba.c} | 2 +-
 drivers/acpi/arm64/init.c                  | 2 ++
 drivers/acpi/arm64/init.h                  | 1 +
 drivers/acpi/internal.h                    | 5 -----
 drivers/acpi/scan.c                        | 1 -
 7 files changed, 5 insertions(+), 8 deletions(-)
 rename drivers/acpi/{acpi_amba.c => arm64/amba.c} (99%)

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 3fc5a0d54f6e..eaa09bf52f17 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -50,7 +50,6 @@ acpi-$(CONFIG_PCI)		+= acpi_lpss.o
 acpi-y				+= acpi_apd.o
 acpi-y				+= acpi_platform.o
 acpi-y				+= acpi_pnp.o
-acpi-$(CONFIG_ARM_AMBA)	+= acpi_amba.o
 acpi-y				+= power.o
 acpi-y				+= event.o
 acpi-y				+= evged.o
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index f81fe24894b2..143debc1ba4a 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -3,4 +3,5 @@ obj-$(CONFIG_ACPI_AGDI) 	+= agdi.o
 obj-$(CONFIG_ACPI_IORT) 	+= iort.o
 obj-$(CONFIG_ACPI_GTDT) 	+= gtdt.o
 obj-$(CONFIG_ACPI_APMT) 	+= apmt.o
+obj-$(CONFIG_ARM_AMBA)		+= amba.o
 obj-y				+= dma.o init.o
diff --git a/drivers/acpi/acpi_amba.c b/drivers/acpi/arm64/amba.c
similarity index 99%
rename from drivers/acpi/acpi_amba.c
rename to drivers/acpi/arm64/amba.c
index f5b443ab01c2..b2a7631d7ac7 100644
--- a/drivers/acpi/acpi_amba.c
+++ b/drivers/acpi/arm64/amba.c
@@ -17,7 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-#include "internal.h"
+#include "init.h"
 
 static const struct acpi_device_id amba_id_list[] = {
 	{"ARMH0061", 0}, /* PL061 GPIO Device */
diff --git a/drivers/acpi/arm64/init.c b/drivers/acpi/arm64/init.c
index d3ce53dda122..d0c8aed90fd1 100644
--- a/drivers/acpi/arm64/init.c
+++ b/drivers/acpi/arm64/init.c
@@ -10,4 +10,6 @@ void __init acpi_arm_init(void)
 		acpi_apmt_init();
 	if (IS_ENABLED(CONFIG_ACPI_IORT))
 		acpi_iort_init();
+	if (IS_ENABLED(CONFIG_ARM_AMBA))
+		acpi_amba_init();
 }
diff --git a/drivers/acpi/arm64/init.h b/drivers/acpi/arm64/init.h
index a1715a2a34e9..dcc277977194 100644
--- a/drivers/acpi/arm64/init.h
+++ b/drivers/acpi/arm64/init.h
@@ -4,3 +4,4 @@
 void __init acpi_agdi_init(void);
 void __init acpi_apmt_init(void);
 void __init acpi_iort_init(void);
+void __init acpi_amba_init(void);
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index f4148dc50b9c..21ec31b78216 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -28,11 +28,6 @@ void acpi_processor_init(void);
 void acpi_platform_init(void);
 void acpi_pnp_init(void);
 void acpi_int340x_thermal_init(void);
-#ifdef CONFIG_ARM_AMBA
-void acpi_amba_init(void);
-#else
-static inline void acpi_amba_init(void) {}
-#endif
 int acpi_sysfs_init(void);
 void acpi_gpe_apply_masked_gpes(void);
 void acpi_container_init(void);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 5b145f1aaa1b..902763430d56 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2615,7 +2615,6 @@ void __init acpi_scan_init(void)
 	acpi_watchdog_init();
 	acpi_pnp_init();
 	acpi_int340x_thermal_init();
-	acpi_amba_init();
 	acpi_init_lpit();
 
 	acpi_scan_add_handler(&generic_device_handler);

From dabc621a311007d78bfcf60f8125c5e8c33ded7b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 25 Jul 2023 20:06:37 +0200
Subject: [PATCH 0676/2047] ACPI: thermal: Drop enabled flag from struct
 acpi_thermal_active

The enabled field of struct acpi_thermal_active is only updated and never
read, so drop it along with the related code.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/thermal.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 3163a40f02e3..3aab121cd410 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -115,7 +115,6 @@ struct acpi_thermal_active {
 	struct acpi_handle_list devices;
 	unsigned long temperature;
 	bool valid;
-	bool enabled;
 };
 
 struct acpi_thermal_trips {
@@ -1039,15 +1038,10 @@ static int acpi_thermal_resume(struct device *dev)
 		if (!tz->trips.active[i].valid)
 			break;
 
-		tz->trips.active[i].enabled = true;
 		for (j = 0; j < tz->trips.active[i].devices.count; j++) {
 			result = acpi_bus_update_power(
 					tz->trips.active[i].devices.handles[j],
 					&power_state);
-			if (result || (power_state != ACPI_STATE_D0)) {
-				tz->trips.active[i].enabled = false;
-				break;
-			}
 		}
 	}
 

From 39b5ba6cb56962d2ac83e4dd89f68538f7861efe Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 1 Aug 2023 21:03:54 +0800
Subject: [PATCH 0677/2047] regulator: max77857: change some variables to
 static

max77857_regmap_config and max77857_driver are only used
in max77857-regulator.c now, change them to static.

Fixes: af71cccadeced ("regulator: max77857: Add ADI MAX77857/59/MAX77831 Regulator Support")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230801130354.552243-1-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max77857-regulator.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/max77857-regulator.c b/drivers/regulator/max77857-regulator.c
index 58f825d8d2b0..f2141b5be780 100644
--- a/drivers/regulator/max77857-regulator.c
+++ b/drivers/regulator/max77857-regulator.c
@@ -67,7 +67,7 @@ static bool max77857_volatile_reg(struct device *dev, unsigned int reg)
 	}
 }
 
-struct regmap_config max77857_regmap_config = {
+static struct regmap_config max77857_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 	.cache_type = REGCACHE_MAPLE,
@@ -443,7 +443,7 @@ static const struct of_device_id max77857_of_id[] = {
 };
 MODULE_DEVICE_TABLE(of, max77857_of_id);
 
-struct i2c_driver max77857_driver = {
+static struct i2c_driver max77857_driver = {
 	.driver = {
 		.name = "max77857",
 		.of_match_table = max77857_of_id,

From c518e7dc64573e3e3a196e8b79caa4f77c612366 Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Tue, 25 Jul 2023 13:25:32 +0300
Subject: [PATCH 0678/2047] gpio: lp3943: remove unneeded
 platform_set_drvdata() call

This function call was found to be unnecessary as there is no equivalent
platform_get_drvdata() call, to access the private data of the driver.
Also, the private data is defined in this driver, so there is no risk of
it being accessed outside of this driver file.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-lp3943.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-lp3943.c b/drivers/gpio/gpio-lp3943.c
index 79edd5db49d2..8e58242f5123 100644
--- a/drivers/gpio/gpio-lp3943.c
+++ b/drivers/gpio/gpio-lp3943.c
@@ -199,8 +199,6 @@ static int lp3943_gpio_probe(struct platform_device *pdev)
 	lp3943_gpio->chip = lp3943_gpio_chip;
 	lp3943_gpio->chip.parent = &pdev->dev;
 
-	platform_set_drvdata(pdev, lp3943_gpio);
-
 	return devm_gpiochip_add_data(&pdev->dev, &lp3943_gpio->chip,
 				      lp3943_gpio);
 }

From ceac51b1ee43532d35d0182117b326306bcfe7f4 Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Tue, 25 Jul 2023 15:36:23 +0300
Subject: [PATCH 0679/2047] gpio: max77620: remove unneeded
 platform_set_drvdata() call

This function call is not required because no counterpart
platform_get_drvdata() call is present to leverage the private data of
the driver.
Since the private data is confined to this driver file, external access
is not feasible.
The use of this function appears redundant in the current context of the
driver's implementation.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-max77620.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c
index c18b60e39a94..8c2a5609161f 100644
--- a/drivers/gpio/gpio-max77620.c
+++ b/drivers/gpio/gpio-max77620.c
@@ -331,8 +331,6 @@ static int max77620_gpio_probe(struct platform_device *pdev)
 	girq->init_hw = max77620_gpio_irq_init_hw;
 	girq->threaded = true;
 
-	platform_set_drvdata(pdev, mgpio);
-
 	ret = devm_gpiochip_add_data(&pdev->dev, &mgpio->gpio_chip, mgpio);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "gpio_init: Failed to add max77620_gpio\n");

From 5f57665a217e82ca5e5362829f64b526d2f245c5 Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Fri, 28 Jul 2023 20:59:14 +0300
Subject: [PATCH 0680/2047] gpio: palmas: remove unnecessary call to
 platform_set_drvdata()

This function call was found to be unnecessary as there is no equivalent
platform_get_drvdata() call to access the private data of the driver. Also,
the private data is defined in this driver, so there is no risk of it being
accessed outside of this driver file.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-palmas.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpio/gpio-palmas.c b/drivers/gpio/gpio-palmas.c
index 6140e87c6754..28dba7048509 100644
--- a/drivers/gpio/gpio-palmas.c
+++ b/drivers/gpio/gpio-palmas.c
@@ -183,7 +183,6 @@ static int palmas_gpio_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	platform_set_drvdata(pdev, palmas_gpio);
 	return ret;
 }
 

From b0c488615eb147d3f680ad3c2e2200a1502da1fc Mon Sep 17 00:00:00 2001
From: Stanislav Jakubek <stano.jakubek@gmail.com>
Date: Sun, 30 Jul 2023 17:23:32 +0200
Subject: [PATCH 0681/2047] dt-bindings: gpio: brcm,kona-gpio: convert to YAML

Convert Broadcom Kona family GPIO controller bindings to DT schema.

Changes during conversion:
  - add used, but previously undocumented SoC-specific compatibles

Signed-off-by: Stanislav Jakubek <stano.jakubek@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 .../bindings/gpio/brcm,kona-gpio.txt          |  52 ---------
 .../bindings/gpio/brcm,kona-gpio.yaml         | 100 ++++++++++++++++++
 MAINTAINERS                                   |   2 +-
 3 files changed, 101 insertions(+), 53 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
 create mode 100644 Documentation/devicetree/bindings/gpio/brcm,kona-gpio.yaml

diff --git a/Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt b/Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
deleted file mode 100644
index 4a63bc96b687..000000000000
--- a/Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-Broadcom Kona Family GPIO
-=========================
-
-This GPIO driver is used in the following Broadcom SoCs:
-  BCM11130, BCM11140, BCM11351, BCM28145, BCM28155
-
-The Broadcom GPIO Controller IP can be configured prior to synthesis to
-support up to 8 banks of 32 GPIOs where each bank has its own IRQ. The
-GPIO controller only supports edge, not level, triggering of interrupts.
-
-Required properties
--------------------
-
-- compatible: "brcm,bcm11351-gpio", "brcm,kona-gpio"
-- reg: Physical base address and length of the controller's registers.
-- interrupts: The interrupt outputs from the controller. There is one GPIO
-  interrupt per GPIO bank. The number of interrupts listed depends on the
-  number of GPIO banks on the SoC. The interrupts must be ordered by bank,
-  starting with bank 0. There is always a 1:1 mapping between banks and
-  IRQs.
-- #gpio-cells: Should be <2>. The first cell is the pin number, the second
-  cell is used to specify optional parameters:
-  - bit 0 specifies polarity (0 for normal, 1 for inverted)
-  See also "gpio-specifier" in .../devicetree/bindings/gpio/gpio.txt.
-- #interrupt-cells: Should be <2>. The first cell is the GPIO number. The
-  second cell is used to specify flags. The following subset of flags is
-  supported:
-  - trigger type (bits[1:0]):
-      1 = low-to-high edge triggered.
-      2 = high-to-low edge triggered.
-      3 = low-to-high or high-to-low edge triggered
-      Valid values are 1, 2, 3
-  See also .../devicetree/bindings/interrupt-controller/interrupts.txt.
-- gpio-controller: Marks the device node as a GPIO controller.
-- interrupt-controller: Marks the device node as an interrupt controller.
-
-Example:
-	gpio: gpio@35003000 {
-		compatible = "brcm,bcm11351-gpio", "brcm,kona-gpio";
-		reg = <0x35003000 0x800>;
-		interrupts =
-		       <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH
-			GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH
-			GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH
-			GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH
-			GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH
-			GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>;
-		#gpio-cells = <2>;
-		#interrupt-cells = <2>;
-		gpio-controller;
-		interrupt-controller;
-	};
diff --git a/Documentation/devicetree/bindings/gpio/brcm,kona-gpio.yaml b/Documentation/devicetree/bindings/gpio/brcm,kona-gpio.yaml
new file mode 100644
index 000000000000..296fdd6b8f38
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/brcm,kona-gpio.yaml
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/brcm,kona-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Kona family GPIO controller
+
+description:
+  The Broadcom GPIO Controller IP can be configured prior to synthesis to
+  support up to 8 banks of 32 GPIOs where each bank has its own IRQ. The
+  GPIO controller only supports edge, not level, triggering of interrupts.
+
+maintainers:
+  - Ray Jui <rjui@broadcom.com>
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - brcm,bcm11351-gpio
+          - brcm,bcm21664-gpio
+          - brcm,bcm23550-gpio
+      - const: brcm,kona-gpio
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 4
+    maxItems: 6
+    description:
+      The interrupt outputs from the controller. There is one GPIO interrupt
+      per GPIO bank. The number of interrupts listed depends on the number of
+      GPIO banks on the SoC. The interrupts must be ordered by bank, starting
+      with bank 0. There is always a 1:1 mapping between banks and IRQs.
+
+  '#gpio-cells':
+    const: 2
+
+  '#interrupt-cells':
+    const: 2
+
+  gpio-controller: true
+
+  interrupt-controller: true
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - '#gpio-cells'
+  - '#interrupt-cells'
+  - gpio-controller
+  - interrupt-controller
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: brcm,bcm11351-gpio
+    then:
+      properties:
+        interrupts:
+          minItems: 6
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - brcm,bcm21664-gpio
+              - brcm,bcm23550-gpio
+    then:
+      properties:
+        interrupts:
+          maxItems: 4
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    gpio@35003000 {
+        compatible = "brcm,bcm11351-gpio", "brcm,kona-gpio";
+        reg = <0x35003000 0x800>;
+        interrupts = <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>;
+        #gpio-cells = <2>;
+        #interrupt-cells = <2>;
+        gpio-controller;
+        interrupt-controller;
+    };
+...
diff --git a/MAINTAINERS b/MAINTAINERS
index 3be1bdfe8ecc..e423e15f48f1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4187,7 +4187,7 @@ BROADCOM KONA GPIO DRIVER
 M:	Ray Jui <rjui@broadcom.com>
 R:	Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
 S:	Supported
-F:	Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
+F:	Documentation/devicetree/bindings/gpio/brcm,kona-gpio.yaml
 F:	drivers/gpio/gpio-bcm-kona.c
 
 BROADCOM MPI3 STORAGE CONTROLLER DRIVER

From e91e8b537a6fa3ed5bbd53e0504b326edfaafb49 Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Fri, 28 Jul 2023 21:08:33 +0300
Subject: [PATCH 0682/2047] gpio: pmic-eic-sprd: remove unnecessary call to
 platform_set_drvdata()

This function call was found to be unnecessary as there is no equivalent
platform_get_drvdata() call to access the private data of the driver. Also,
the private data is defined in this driver, so there is no risk of it being
accessed outside of this driver file.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-pmic-eic-sprd.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpio/gpio-pmic-eic-sprd.c b/drivers/gpio/gpio-pmic-eic-sprd.c
index dabb0da3bd33..2b9b7be9b8fd 100644
--- a/drivers/gpio/gpio-pmic-eic-sprd.c
+++ b/drivers/gpio/gpio-pmic-eic-sprd.c
@@ -363,7 +363,6 @@ static int sprd_pmic_eic_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	platform_set_drvdata(pdev, pmic_eic);
 	return 0;
 }
 

From 8e85d6af1b3ceb619098c6b8a9f38e9e006e9c09 Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Fri, 28 Jul 2023 21:20:09 +0300
Subject: [PATCH 0683/2047] gpio: rc5t583: remove unneeded call to
 platform_set_drvdata()

This function call was found to be unnecessary as there is no equivalent
platform_get_drvdata() call to access the private data of the driver. Also,
the private data is defined in this driver, so there is no risk of it being
accessed outside of this driver file.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-rc5t583.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-rc5t583.c b/drivers/gpio/gpio-rc5t583.c
index 4fae3ebea790..c34dcadaee36 100644
--- a/drivers/gpio/gpio-rc5t583.c
+++ b/drivers/gpio/gpio-rc5t583.c
@@ -121,8 +121,6 @@ static int rc5t583_gpio_probe(struct platform_device *pdev)
 	if (pdata && pdata->gpio_base)
 		rc5t583_gpio->gpio_chip.base = pdata->gpio_base;
 
-	platform_set_drvdata(pdev, rc5t583_gpio);
-
 	return devm_gpiochip_add_data(&pdev->dev, &rc5t583_gpio->gpio_chip,
 				      rc5t583_gpio);
 }

From c975cc599eb7336e0fad3cd81bbebcbe40e24c4e Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Fri, 28 Jul 2023 21:25:22 +0300
Subject: [PATCH 0684/2047] gpio: sama5d2-piobu: remove unneeded call to
 platform_set_drvdata()

This function call was found to be unnecessary as there is no equivalent
platform_get_drvdata() call to access the private data of the driver. Also,
the private data is defined in this driver, so there is no risk of it being
accessed outside of this driver file.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-sama5d2-piobu.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpio/gpio-sama5d2-piobu.c b/drivers/gpio/gpio-sama5d2-piobu.c
index 767c33ae3213..d89da7300ddd 100644
--- a/drivers/gpio/gpio-sama5d2-piobu.c
+++ b/drivers/gpio/gpio-sama5d2-piobu.c
@@ -189,7 +189,6 @@ static int sama5d2_piobu_probe(struct platform_device *pdev)
 	if (!piobu)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, piobu);
 	piobu->chip.label = pdev->name;
 	piobu->chip.parent = &pdev->dev;
 	piobu->chip.owner = THIS_MODULE,

From 3486237c6fe8d0e5024f9c48bfe73843b1bd8284 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 19 Jul 2023 09:34:27 -0600
Subject: [PATCH 0685/2047] iomap: cleanup up iomap_dio_bio_end_io()

Make the logic a bit easier to follow:

1) Add a release_bio out path, as everybody needs to touch that, and
   have our bio ref check jump there if it's non-zero.
2) Add a kiocb local variable.
3) Add comments for each of the three conditions (sync, inline, or
   async workqueue punt).

No functional changes in this patch.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/iomap/direct-io.c | 44 ++++++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index ea3b868c8355..0ce60e80c901 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -152,27 +152,43 @@ void iomap_dio_bio_end_io(struct bio *bio)
 {
 	struct iomap_dio *dio = bio->bi_private;
 	bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
+	struct kiocb *iocb = dio->iocb;
 
 	if (bio->bi_status)
 		iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
+	if (!atomic_dec_and_test(&dio->ref))
+		goto release_bio;
 
-	if (atomic_dec_and_test(&dio->ref)) {
-		if (dio->wait_for_completion) {
-			struct task_struct *waiter = dio->submit.waiter;
-			WRITE_ONCE(dio->submit.waiter, NULL);
-			blk_wake_io_task(waiter);
-		} else if (dio->flags & IOMAP_DIO_WRITE) {
-			struct inode *inode = file_inode(dio->iocb->ki_filp);
+	/*
+	 * Synchronous dio, task itself will handle any completion work
+	 * that needs after IO. All we need to do is wake the task.
+	 */
+	if (dio->wait_for_completion) {
+		struct task_struct *waiter = dio->submit.waiter;
 
-			WRITE_ONCE(dio->iocb->private, NULL);
-			INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
-			queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
-		} else {
-			WRITE_ONCE(dio->iocb->private, NULL);
-			iomap_dio_complete_work(&dio->aio.work);
-		}
+		WRITE_ONCE(dio->submit.waiter, NULL);
+		blk_wake_io_task(waiter);
+		goto release_bio;
 	}
 
+	/* Read completion can always complete inline. */
+	if (!(dio->flags & IOMAP_DIO_WRITE)) {
+		WRITE_ONCE(iocb->private, NULL);
+		iomap_dio_complete_work(&dio->aio.work);
+		goto release_bio;
+	}
+
+	/*
+	 * Async DIO completion that requires filesystem level completion work
+	 * gets punted to a work queue to complete as the operation may require
+	 * more IO to be issued to finalise filesystem metadata changes or
+	 * guarantee data integrity.
+	 */
+	WRITE_ONCE(iocb->private, NULL);
+	INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
+	queue_work(file_inode(iocb->ki_filp)->i_sb->s_dio_done_wq,
+			&dio->aio.work);
+release_bio:
 	if (should_dirty) {
 		bio_check_pages_dirty(bio);
 	} else {

From 44842f647346cac4063b2bb8e9476fad09e363e7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 22 Jul 2023 10:28:37 -0600
Subject: [PATCH 0686/2047] iomap: use an unsigned type for IOMAP_DIO_* defines

IOMAP_DIO_DIRTY shifts by 31 bits, which makes UBSAN unhappy. Clean up
all the defines by making the shifted value an unsigned value.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reported-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/iomap/direct-io.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 0ce60e80c901..7d627d43d10b 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -20,10 +20,10 @@
  * Private flags for iomap_dio, must not overlap with the public ones in
  * iomap.h:
  */
-#define IOMAP_DIO_WRITE_FUA	(1 << 28)
-#define IOMAP_DIO_NEED_SYNC	(1 << 29)
-#define IOMAP_DIO_WRITE		(1 << 30)
-#define IOMAP_DIO_DIRTY		(1 << 31)
+#define IOMAP_DIO_WRITE_FUA	(1U << 28)
+#define IOMAP_DIO_NEED_SYNC	(1U << 29)
+#define IOMAP_DIO_WRITE		(1U << 30)
+#define IOMAP_DIO_DIRTY		(1U << 31)
 
 struct iomap_dio {
 	struct kiocb		*iocb;

From 3a0be38cc84d05e86f1c7d5b601513f31d3b24fa Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 19 Jul 2023 13:43:10 -0600
Subject: [PATCH 0687/2047] iomap: treat a write through cache the same as FUA

Whether we have a write back cache and are using FUA or don't have
a write back cache at all is the same situation. Treat them the same.

This makes the IOMAP_DIO_WRITE_FUA name a bit misleading, as we have
two cases that provide stable writes:

1) Volatile write cache with FUA writes
2) Normal write without a volatile write cache

Rename that flag to IOMAP_DIO_STABLE_WRITE to make that clearer, and
update some of the FUA comments as well.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/iomap/direct-io.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 7d627d43d10b..6b690fc22365 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -20,7 +20,7 @@
  * Private flags for iomap_dio, must not overlap with the public ones in
  * iomap.h:
  */
-#define IOMAP_DIO_WRITE_FUA	(1U << 28)
+#define IOMAP_DIO_WRITE_THROUGH	(1U << 28)
 #define IOMAP_DIO_NEED_SYNC	(1U << 29)
 #define IOMAP_DIO_WRITE		(1U << 30)
 #define IOMAP_DIO_DIRTY		(1U << 31)
@@ -219,7 +219,7 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
 /*
  * Figure out the bio's operation flags from the dio request, the
  * mapping, and whether or not we want FUA.  Note that we can end up
- * clearing the WRITE_FUA flag in the dio request.
+ * clearing the WRITE_THROUGH flag in the dio request.
  */
 static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
 		const struct iomap *iomap, bool use_fua)
@@ -233,7 +233,7 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
 	if (use_fua)
 		opflags |= REQ_FUA;
 	else
-		dio->flags &= ~IOMAP_DIO_WRITE_FUA;
+		dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
 
 	return opflags;
 }
@@ -273,11 +273,13 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 		 * Use a FUA write if we need datasync semantics, this is a pure
 		 * data IO that doesn't require any metadata updates (including
 		 * after IO completion such as unwritten extent conversion) and
-		 * the underlying device supports FUA. This allows us to avoid
-		 * cache flushes on IO completion.
+		 * the underlying device either supports FUA or doesn't have
+		 * a volatile write cache. This allows us to avoid cache flushes
+		 * on IO completion.
 		 */
 		if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
-		    (dio->flags & IOMAP_DIO_WRITE_FUA) && bdev_fua(iomap->bdev))
+		    (dio->flags & IOMAP_DIO_WRITE_THROUGH) &&
+		    (bdev_fua(iomap->bdev) || !bdev_write_cache(iomap->bdev)))
 			use_fua = true;
 	}
 
@@ -553,13 +555,16 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 			dio->flags |= IOMAP_DIO_NEED_SYNC;
 
 		       /*
-			* For datasync only writes, we optimistically try
-			* using FUA for this IO.  Any non-FUA write that
-			* occurs will clear this flag, hence we know before
-			* completion whether a cache flush is necessary.
+			* For datasync only writes, we optimistically try using
+			* WRITE_THROUGH for this IO. This flag requires either
+			* FUA writes through the device's write cache, or a
+			* normal write to a device without a volatile write
+			* cache. For the former, Any non-FUA write that occurs
+			* will clear this flag, hence we know before completion
+			* whether a cache flush is necessary.
 			*/
 			if (!(iocb->ki_flags & IOCB_SYNC))
-				dio->flags |= IOMAP_DIO_WRITE_FUA;
+				dio->flags |= IOMAP_DIO_WRITE_THROUGH;
 		}
 
 		/*
@@ -621,10 +626,11 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		iomap_dio_set_error(dio, ret);
 
 	/*
-	 * If all the writes we issued were FUA, we don't need to flush the
-	 * cache on IO completion. Clear the sync flag for this case.
+	 * If all the writes we issued were already written through to the
+	 * media, we don't need to flush the cache on IO completion. Clear the
+	 * sync flag for this case.
 	 */
-	if (dio->flags & IOMAP_DIO_WRITE_FUA)
+	if (dio->flags & IOMAP_DIO_WRITE_THROUGH)
 		dio->flags &= ~IOMAP_DIO_NEED_SYNC;
 
 	WRITE_ONCE(iocb->private, dio->submit.poll_bio);

From daa99c5a3319e371029834092ee5f7c84bb43a70 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 20 Jul 2023 11:47:56 -0600
Subject: [PATCH 0688/2047] iomap: only set iocb->private for polled bio

iocb->private is only used for polled IO, where the completer will
find the bio to poll through that field.

Assign it when we're submitting a polled bio, and get rid of the
dio->poll_bio indirection.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/iomap/direct-io.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 6b690fc22365..e4b9d9123b75 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -41,7 +41,6 @@ struct iomap_dio {
 		struct {
 			struct iov_iter		*iter;
 			struct task_struct	*waiter;
-			struct bio		*poll_bio;
 		} submit;
 
 		/* used for aio completion: */
@@ -63,12 +62,14 @@ static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter,
 static void iomap_dio_submit_bio(const struct iomap_iter *iter,
 		struct iomap_dio *dio, struct bio *bio, loff_t pos)
 {
+	struct kiocb *iocb = dio->iocb;
+
 	atomic_inc(&dio->ref);
 
 	/* Sync dio can't be polled reliably */
-	if ((dio->iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(dio->iocb)) {
-		bio_set_polled(bio, dio->iocb);
-		dio->submit.poll_bio = bio;
+	if ((iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(iocb)) {
+		bio_set_polled(bio, iocb);
+		WRITE_ONCE(iocb->private, bio);
 	}
 
 	if (dio->dops && dio->dops->submit_io)
@@ -184,7 +185,6 @@ void iomap_dio_bio_end_io(struct bio *bio)
 	 * more IO to be issued to finalise filesystem metadata changes or
 	 * guarantee data integrity.
 	 */
-	WRITE_ONCE(iocb->private, NULL);
 	INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
 	queue_work(file_inode(iocb->ki_filp)->i_sb->s_dio_done_wq,
 			&dio->aio.work);
@@ -523,7 +523,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 
 	dio->submit.iter = iter;
 	dio->submit.waiter = current;
-	dio->submit.poll_bio = NULL;
 
 	if (iocb->ki_flags & IOCB_NOWAIT)
 		iomi.flags |= IOMAP_NOWAIT;
@@ -633,8 +632,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	if (dio->flags & IOMAP_DIO_WRITE_THROUGH)
 		dio->flags &= ~IOMAP_DIO_NEED_SYNC;
 
-	WRITE_ONCE(iocb->private, dio->submit.poll_bio);
-
 	/*
 	 * We are about to drop our additional submission reference, which
 	 * might be the last reference to the dio.  There are three different

From 7b3c14d1a96bf63c078c3bbfe5573fb964e80b95 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 19 Jul 2023 11:21:09 -0600
Subject: [PATCH 0689/2047] iomap: add IOMAP_DIO_INLINE_COMP

Rather than gate whether or not we need to punt a dio completion to a
workqueue on whether the IO is a write or not, add an explicit flag for
it. For now we treat them the same, reads always set the flags and async
writes do not.

No functional changes in this patch.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/iomap/direct-io.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index e4b9d9123b75..b943bc5c7b18 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -20,6 +20,7 @@
  * Private flags for iomap_dio, must not overlap with the public ones in
  * iomap.h:
  */
+#define IOMAP_DIO_INLINE_COMP	(1U << 27)
 #define IOMAP_DIO_WRITE_THROUGH	(1U << 28)
 #define IOMAP_DIO_NEED_SYNC	(1U << 29)
 #define IOMAP_DIO_WRITE		(1U << 30)
@@ -172,8 +173,10 @@ void iomap_dio_bio_end_io(struct bio *bio)
 		goto release_bio;
 	}
 
-	/* Read completion can always complete inline. */
-	if (!(dio->flags & IOMAP_DIO_WRITE)) {
+	/*
+	 * Flagged with IOMAP_DIO_INLINE_COMP, we can complete it inline
+	 */
+	if (dio->flags & IOMAP_DIO_INLINE_COMP) {
 		WRITE_ONCE(iocb->private, NULL);
 		iomap_dio_complete_work(&dio->aio.work);
 		goto release_bio;
@@ -528,6 +531,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		iomi.flags |= IOMAP_NOWAIT;
 
 	if (iov_iter_rw(iter) == READ) {
+		/* reads can always complete inline */
+		dio->flags |= IOMAP_DIO_INLINE_COMP;
+
 		if (iomi.pos >= dio->i_size)
 			goto out_free_dio;
 

From 9cf3516c29e6dba95d36d7d86c03b06495107859 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 8 Jul 2023 09:55:27 -0600
Subject: [PATCH 0690/2047] fs: add IOCB flags related to passing back dio
 completions

Async dio completions generally happen from hard/soft IRQ context, which
means that users like iomap may need to defer some of the completion
handling to a workqueue. This is less efficient than having the original
issuer handle it, like we do for sync IO, and it adds latency to the
completions.

Add IOCB_DIO_CALLER_COMP, which the issuer can set if it is able to
safely punt these completions to a safe context. If the dio handler is
aware of this flag, assign a callback handler in kiocb->dio_complete and
associated data io kiocb->private. The issuer will then call this
handler with that data from task context.

No functional changes in this patch.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6867512907d6..1e6dbe309d52 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -338,6 +338,20 @@ enum rw_hint {
 #define IOCB_NOIO		(1 << 20)
 /* can use bio alloc cache */
 #define IOCB_ALLOC_CACHE	(1 << 21)
+/*
+ * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
+ * iocb completion can be passed back to the owner for execution from a safe
+ * context rather than needing to be punted through a workqueue. If this
+ * flag is set, the bio completion handling may set iocb->dio_complete to a
+ * handler function and iocb->private to context information for that handler.
+ * The issuer should call the handler with that context information from task
+ * context to complete the processing of the iocb. Note that while this
+ * provides a task context for the dio_complete() callback, it should only be
+ * used on the completion side for non-IO generating completions. It's fine to
+ * call blocking functions from this callback, but they should not wait for
+ * unrelated IO (like cache flushing, new IO generation, etc).
+ */
+#define IOCB_DIO_CALLER_COMP	(1 << 22)
 
 /* for use in trace events */
 #define TRACE_IOCB_STRINGS \
@@ -351,7 +365,8 @@ enum rw_hint {
 	{ IOCB_WRITE,		"WRITE" }, \
 	{ IOCB_WAITQ,		"WAITQ" }, \
 	{ IOCB_NOIO,		"NOIO" }, \
-	{ IOCB_ALLOC_CACHE,	"ALLOC_CACHE" }
+	{ IOCB_ALLOC_CACHE,	"ALLOC_CACHE" }, \
+	{ IOCB_DIO_CALLER_COMP,	"CALLER_COMP" }
 
 struct kiocb {
 	struct file		*ki_filp;
@@ -360,7 +375,23 @@ struct kiocb {
 	void			*private;
 	int			ki_flags;
 	u16			ki_ioprio; /* See linux/ioprio.h */
-	struct wait_page_queue	*ki_waitq; /* for async buffered IO */
+	union {
+		/*
+		 * Only used for async buffered reads, where it denotes the
+		 * page waitqueue associated with completing the read. Valid
+		 * IFF IOCB_WAITQ is set.
+		 */
+		struct wait_page_queue	*ki_waitq;
+		/*
+		 * Can be used for O_DIRECT IO, where the completion handling
+		 * is punted back to the issuer of the IO. May only be set
+		 * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
+		 * must then check for presence of this handler when ki_complete
+		 * is invoked. The data passed in to this handler must be
+		 * assigned to ->private when dio_complete is assigned.
+		 */
+		ssize_t (*dio_complete)(void *data);
+	};
 };
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)

From 099ada2c87260e5c52fbdad4ca40c165fc194a2e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 8 Jul 2023 10:03:52 -0600
Subject: [PATCH 0691/2047] io_uring/rw: add write support for
 IOCB_DIO_CALLER_COMP

If the filesystem dio handler understands IOCB_DIO_CALLER_COMP, we'll
get a kiocb->ki_complete() callback with kiocb->dio_complete set. In
that case, rather than complete the IO directly through task_work, queue
up an intermediate task_work handler that first processes this callback
and then immediately completes the request.

For XFS, this avoids a punt through a workqueue, which is a lot less
efficient and adds latency to lower queue depth (or sync) O_DIRECT
writes.

Only do this for non-polled IO, as polled IO doesn't need this kind
of deferral as it always completes within the task itself. This then
avoids a check for deferral in the polled IO completion handler.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/rw.c | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/io_uring/rw.c b/io_uring/rw.c
index 1bce2208b65c..e2166a9a7eaf 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -105,6 +105,7 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	} else {
 		rw->kiocb.ki_ioprio = get_current_ioprio();
 	}
+	rw->kiocb.dio_complete = NULL;
 
 	rw->addr = READ_ONCE(sqe->addr);
 	rw->len = READ_ONCE(sqe->len);
@@ -285,6 +286,15 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
 
 void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
 {
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+	struct kiocb *kiocb = &rw->kiocb;
+
+	if ((kiocb->ki_flags & IOCB_DIO_CALLER_COMP) && kiocb->dio_complete) {
+		long res = kiocb->dio_complete(rw->kiocb.private);
+
+		io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+	}
+
 	io_req_io_end(req);
 
 	if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
@@ -300,9 +310,11 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
 	struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
 	struct io_kiocb *req = cmd_to_io_kiocb(rw);
 
-	if (__io_complete_rw_common(req, res))
-		return;
-	io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+	if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) {
+		if (__io_complete_rw_common(req, res))
+			return;
+		io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+	}
 	req->io_task_work.func = io_req_rw_complete;
 	__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
 }
@@ -916,6 +928,15 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	}
 	kiocb->ki_flags |= IOCB_WRITE;
 
+	/*
+	 * For non-polled IO, set IOCB_DIO_CALLER_COMP, stating that our handler
+	 * groks deferring the completion to task context. This isn't
+	 * necessary and useful for polled IO as that can always complete
+	 * directly.
+	 */
+	if (!(kiocb->ki_flags & IOCB_HIPRI))
+		kiocb->ki_flags |= IOCB_DIO_CALLER_COMP;
+
 	if (likely(req->file->f_op->write_iter))
 		ret2 = call_write_iter(req->file, kiocb, &s->iter);
 	else if (req->file->f_op->write)

From 8c052fb3002e6e7eccdc138d2e459cd03f019ade Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 8 Jul 2023 10:01:50 -0600
Subject: [PATCH 0692/2047] iomap: support IOCB_DIO_CALLER_COMP

If IOCB_DIO_CALLER_COMP is set, utilize that to set kiocb->dio_complete
handler and data for that callback. Rather than punt the completion to a
workqueue, we pass back the handler and data to the issuer and will get
a callback from a safe task context.

Using the following fio job to randomly dio write 4k blocks at
queue depths of 1..16:

fio --name=dio-write --filename=/data1/file --time_based=1 \
--runtime=10 --bs=4096 --rw=randwrite --norandommap --buffered=0 \
--cpus_allowed=4 --ioengine=io_uring --iodepth=$depth

shows the following results before and after this patch:

	Stock	Patched		Diff
=======================================
QD1	155K	162K		+ 4.5%
QD2	290K	313K		+ 7.9%
QD4	533K	597K		+12.0%
QD8	604K	827K		+36.9%
QD16	615K	845K		+37.4%

which shows nice wins all around. If we factored in per-IOP efficiency,
the wins look even nicer. This becomes apparent as queue depth rises,
as the offloaded workqueue completions runs out of steam.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/iomap/direct-io.c | 62 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 60 insertions(+), 2 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index b943bc5c7b18..bcd3f8cf5ea4 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -20,6 +20,7 @@
  * Private flags for iomap_dio, must not overlap with the public ones in
  * iomap.h:
  */
+#define IOMAP_DIO_CALLER_COMP	(1U << 26)
 #define IOMAP_DIO_INLINE_COMP	(1U << 27)
 #define IOMAP_DIO_WRITE_THROUGH	(1U << 28)
 #define IOMAP_DIO_NEED_SYNC	(1U << 29)
@@ -132,6 +133,11 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
 }
 EXPORT_SYMBOL_GPL(iomap_dio_complete);
 
+static ssize_t iomap_dio_deferred_complete(void *data)
+{
+	return iomap_dio_complete(data);
+}
+
 static void iomap_dio_complete_work(struct work_struct *work)
 {
 	struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
@@ -182,6 +188,31 @@ void iomap_dio_bio_end_io(struct bio *bio)
 		goto release_bio;
 	}
 
+	/*
+	 * If this dio is flagged with IOMAP_DIO_CALLER_COMP, then schedule
+	 * our completion that way to avoid an async punt to a workqueue.
+	 */
+	if (dio->flags & IOMAP_DIO_CALLER_COMP) {
+		/* only polled IO cares about private cleared */
+		iocb->private = dio;
+		iocb->dio_complete = iomap_dio_deferred_complete;
+
+		/*
+		 * Invoke ->ki_complete() directly. We've assigned our
+		 * dio_complete callback handler, and since the issuer set
+		 * IOCB_DIO_CALLER_COMP, we know their ki_complete handler will
+		 * notice ->dio_complete being set and will defer calling that
+		 * handler until it can be done from a safe task context.
+		 *
+		 * Note that the 'res' being passed in here is not important
+		 * for this case. The actual completion value of the request
+		 * will be gotten from dio_complete when that is run by the
+		 * issuer.
+		 */
+		iocb->ki_complete(iocb, 0);
+		goto release_bio;
+	}
+
 	/*
 	 * Async DIO completion that requires filesystem level completion work
 	 * gets punted to a work queue to complete as the operation may require
@@ -278,12 +309,17 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 		 * after IO completion such as unwritten extent conversion) and
 		 * the underlying device either supports FUA or doesn't have
 		 * a volatile write cache. This allows us to avoid cache flushes
-		 * on IO completion.
+		 * on IO completion. If we can't use writethrough and need to
+		 * sync, disable in-task completions as dio completion will
+		 * need to call generic_write_sync() which will do a blocking
+		 * fsync / cache flush call.
 		 */
 		if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
 		    (dio->flags & IOMAP_DIO_WRITE_THROUGH) &&
 		    (bdev_fua(iomap->bdev) || !bdev_write_cache(iomap->bdev)))
 			use_fua = true;
+		else if (dio->flags & IOMAP_DIO_NEED_SYNC)
+			dio->flags &= ~IOMAP_DIO_CALLER_COMP;
 	}
 
 	/*
@@ -298,10 +334,23 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 		goto out;
 
 	/*
-	 * We can only poll for single bio I/Os.
+	 * We can only do deferred completion for pure overwrites that
+	 * don't require additional IO at completion. This rules out
+	 * writes that need zeroing or extent conversion, extend
+	 * the file size, or issue journal IO or cache flushes
+	 * during completion processing.
 	 */
 	if (need_zeroout ||
+	    ((dio->flags & IOMAP_DIO_NEED_SYNC) && !use_fua) ||
 	    ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode)))
+		dio->flags &= ~IOMAP_DIO_CALLER_COMP;
+
+	/*
+	 * The rules for polled IO completions follow the guidelines as the
+	 * ones we set for inline and deferred completions. If none of those
+	 * are available for this IO, clear the polled flag.
+	 */
+	if (!(dio->flags & (IOMAP_DIO_INLINE_COMP|IOMAP_DIO_CALLER_COMP)))
 		dio->iocb->ki_flags &= ~IOCB_HIPRI;
 
 	if (need_zeroout) {
@@ -547,6 +596,15 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		iomi.flags |= IOMAP_WRITE;
 		dio->flags |= IOMAP_DIO_WRITE;
 
+		/*
+		 * Flag as supporting deferred completions, if the issuer
+		 * groks it. This can avoid a workqueue punt for writes.
+		 * We may later clear this flag if we need to do other IO
+		 * as part of this IO completion.
+		 */
+		if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
+			dio->flags |= IOMAP_DIO_CALLER_COMP;
+
 		if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
 			ret = -EAGAIN;
 			if (iomi.pos >= dio->i_size ||

From ee042cdb9f0f8a802fc6497ca921e8863f090f6e Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Mon, 31 Jul 2023 19:25:33 +0800
Subject: [PATCH 0693/2047] fs/ecryptfs: remove kernel-doc warnings

Remove kernel-doc warnings:

fs/ecryptfs/mmap.c:270: warning: Excess function parameter 'flags'
description in 'ecryptfs_write_begin'

Message-ID: <20230731112533.214216-1-wangzhu9@huawei.com>
Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ecryptfs/mmap.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index cb1e998ce54d..e2483acc4366 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -255,7 +255,6 @@ out:
  * @mapping: The eCryptfs object
  * @pos: The file offset at which to start writing
  * @len: Length of the write
- * @flags: Various flags
  * @pagep: Pointer to return the page
  * @fsdata: Pointer to return fs data (unused)
  *

From 98dfdd9ee93995a408192dbbf3dd219ba23e3738 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 30 Jul 2023 20:07:40 -0700
Subject: [PATCH 0694/2047] sched/psi: Select KERNFS as needed

Users of KERNFS should select it to enforce its being built, so
do this to prevent a build error.

In file included from ../kernel/sched/build_utility.c:97:
../kernel/sched/psi.c: In function 'psi_trigger_poll':
../kernel/sched/psi.c:1479:17: error: implicit declaration of function 'kernfs_generic_poll' [-Werror=implicit-function-declaration]
 1479 |                 kernfs_generic_poll(t->of, wait);

Fixes: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Suren Baghdasaryan <surenb@google.com>
Link: lore.kernel.org/r/202307310732.r65EQFY0-lkp@intel.com
---
 init/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/init/Kconfig b/init/Kconfig
index f7f65af4ee12..5e7d4885d1bf 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -629,6 +629,7 @@ config TASK_IO_ACCOUNTING
 
 config PSI
 	bool "Pressure stall information tracking"
+	select KERNFS
 	help
 	  Collect metrics that indicate how overcommitted the CPU, memory,
 	  and IO capacity are in the system.

From 113d0a6b3954b57907d1a6e3209f4174f504e0ae Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 1 Aug 2023 09:18:21 -0400
Subject: [PATCH 0695/2047] MAINTAINERS: Add Peter explicitly to the psi
 section

Peter is kind enough to route the low-volume psi patches through the
scheduler tree, but he is frequently not CC'd on them.

While he is matched through the SCHEDULER maintainers and reviewers on
kern/sched/*, that list is long, and mostly not applicable to psi
code. Thus, patch submitters often just CC the explicit PSI entries.

Add him to that section, to make sure he gets those patches.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230801133235.GA1766885@cmpxchg.org
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index aee340630eca..f017dc6ce7ab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17047,6 +17047,7 @@ F:	drivers/net/ppp/pptp.c
 PRESSURE STALL INFORMATION (PSI)
 M:	Johannes Weiner <hannes@cmpxchg.org>
 M:	Suren Baghdasaryan <surenb@google.com>
+R:	Peter Ziljstra <peterz@infradead.org>
 S:	Maintained
 F:	include/linux/psi*
 F:	kernel/sched/psi.c

From c98c18270be115678f4295b10a5af5dcc9c4efa0 Mon Sep 17 00:00:00 2001
From: Phil Auld <pauld@redhat.com>
Date: Fri, 14 Jul 2023 08:57:46 -0400
Subject: [PATCH 0696/2047] sched, cgroup: Restore meaning to
 hierarchical_quota

In cgroupv2 cfs_b->hierarchical_quota is set to -1 for all task
groups due to the previous fix simply taking the min.  It should
reflect a limit imposed at that level or by an ancestor. Even
though cgroupv2 does not require child quota to be less than or
equal to that of its ancestors the task group will still be
constrained by such a quota so this should be shown here. Cgroupv1
continues to set this correctly.

In both cases, add initialization when a new task group is created
based on the current parent's value (or RUNTIME_INF in the case of
root_task_group). Otherwise, the field is wrong until a quota is
changed after creation and __cfs_schedulable() is called.

Fixes: c53593e5cb69 ("sched, cgroup: Don't reject lower cpu.max on ancestors")
Signed-off-by: Phil Auld <pauld@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ben Segall <bsegall@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20230714125746.812891-1-pauld@redhat.com
---
 kernel/sched/core.c  | 13 +++++++++----
 kernel/sched/fair.c  |  7 ++++---
 kernel/sched/sched.h |  2 +-
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 83e36547af17..3af25caf6343 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9953,7 +9953,7 @@ void __init sched_init(void)
 		ptr += nr_cpu_ids * sizeof(void **);
 
 		root_task_group.shares = ROOT_TASK_GROUP_LOAD;
-		init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
+		init_cfs_bandwidth(&root_task_group.cfs_bandwidth, NULL);
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 #ifdef CONFIG_RT_GROUP_SCHED
 		root_task_group.rt_se = (struct sched_rt_entity **)ptr;
@@ -11087,11 +11087,16 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
 
 		/*
 		 * Ensure max(child_quota) <= parent_quota.  On cgroup2,
-		 * always take the min.  On cgroup1, only inherit when no
-		 * limit is set:
+		 * always take the non-RUNTIME_INF min.  On cgroup1, only
+		 * inherit when no limit is set. In both cases this is used
+		 * by the scheduler to determine if a given CFS task has a
+		 * bandwidth constraint at some higher level.
 		 */
 		if (cgroup_subsys_on_dfl(cpu_cgrp_subsys)) {
-			quota = min(quota, parent_quota);
+			if (quota == RUNTIME_INF)
+				quota = parent_quota;
+			else if (parent_quota != RUNTIME_INF)
+				quota = min(quota, parent_quota);
 		} else {
 			if (quota == RUNTIME_INF)
 				quota = parent_quota;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f55b0a72772e..26bfbb640894 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6045,13 +6045,14 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
 	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
 }
 
-void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
+void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent)
 {
 	raw_spin_lock_init(&cfs_b->lock);
 	cfs_b->runtime = 0;
 	cfs_b->quota = RUNTIME_INF;
 	cfs_b->period = ns_to_ktime(default_cfs_period());
 	cfs_b->burst = 0;
+	cfs_b->hierarchical_quota = parent ? parent->hierarchical_quota : RUNTIME_INF;
 
 	INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
 	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
@@ -6217,7 +6218,7 @@ static inline int throttled_lb_pair(struct task_group *tg,
 	return 0;
 }
 
-void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
+void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent) {}
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
@@ -12599,7 +12600,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 
 	tg->shares = NICE_0_LOAD;
 
-	init_cfs_bandwidth(tg_cfs_bandwidth(tg));
+	init_cfs_bandwidth(tg_cfs_bandwidth(tg), tg_cfs_bandwidth(parent));
 
 	for_each_possible_cpu(i) {
 		cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9baeb1a2dfdd..602de71b48e1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -454,7 +454,7 @@ extern void unregister_fair_sched_group(struct task_group *tg);
 extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
 			struct sched_entity *se, int cpu,
 			struct sched_entity *parent);
-extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
+extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent);
 
 extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
 extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);

From 88c56cfeaec4642aee8aac58b38d5708c6aae0d3 Mon Sep 17 00:00:00 2001
From: Phil Auld <pauld@redhat.com>
Date: Wed, 12 Jul 2023 09:33:57 -0400
Subject: [PATCH 0697/2047] sched/fair: Block nohz tick_stop when cfs bandwidth
 in use

CFS bandwidth limits and NOHZ full don't play well together.  Tasks
can easily run well past their quotas before a remote tick does
accounting.  This leads to long, multi-period stalls before such
tasks can run again. Currently, when presented with these conflicting
requirements the scheduler is favoring nohz_full and letting the tick
be stopped. However, nohz tick stopping is already best-effort, there
are a number of conditions that can prevent it, whereas cfs runtime
bandwidth is expected to be enforced.

Make the scheduler favor bandwidth over stopping the tick by setting
TICK_DEP_BIT_SCHED when the only running task is a cfs task with
runtime limit enabled. We use cfs_b->hierarchical_quota to
determine if the task requires the tick.

Add check in pick_next_task_fair() as well since that is where
we have a handle on the task that is actually going to be running.

Add check in sched_can_stop_tick() to cover some edge cases such
as nr_running going from 2->1 and the 1 remains the running task.

Reviewed-By: Ben Segall <bsegall@google.com>
Signed-off-by: Phil Auld <pauld@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230712133357.381137-3-pauld@redhat.com
---
 kernel/sched/core.c     | 26 +++++++++++++++++++++
 kernel/sched/fair.c     | 52 ++++++++++++++++++++++++++++++++++++++++-
 kernel/sched/features.h |  2 ++
 kernel/sched/sched.h    |  2 ++
 4 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3af25caf6343..614271a75525 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1194,6 +1194,20 @@ static void nohz_csd_func(void *info)
 #endif /* CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
+static inline bool __need_bw_check(struct rq *rq, struct task_struct *p)
+{
+	if (rq->nr_running != 1)
+		return false;
+
+	if (p->sched_class != &fair_sched_class)
+		return false;
+
+	if (!task_on_rq_queued(p))
+		return false;
+
+	return true;
+}
+
 bool sched_can_stop_tick(struct rq *rq)
 {
 	int fifo_nr_running;
@@ -1229,6 +1243,18 @@ bool sched_can_stop_tick(struct rq *rq)
 	if (rq->nr_running > 1)
 		return false;
 
+	/*
+	 * If there is one task and it has CFS runtime bandwidth constraints
+	 * and it's on the cpu now we don't want to stop the tick.
+	 * This check prevents clearing the bit if a newly enqueued task here is
+	 * dequeued by migrating while the constrained task continues to run.
+	 * E.g. going from 2->1 without going through pick_next_task().
+	 */
+	if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) {
+		if (cfs_task_bw_constrained(rq->curr))
+			return false;
+	}
+
 	return true;
 }
 #endif /* CONFIG_NO_HZ_FULL */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 26bfbb640894..c28206499a3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6189,6 +6189,46 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 	rq_clock_stop_loop_update(rq);
 }
 
+bool cfs_task_bw_constrained(struct task_struct *p)
+{
+	struct cfs_rq *cfs_rq = task_cfs_rq(p);
+
+	if (!cfs_bandwidth_used())
+		return false;
+
+	if (cfs_rq->runtime_enabled ||
+	    tg_cfs_bandwidth(cfs_rq->tg)->hierarchical_quota != RUNTIME_INF)
+		return true;
+
+	return false;
+}
+
+#ifdef CONFIG_NO_HZ_FULL
+/* called from pick_next_task_fair() */
+static void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p)
+{
+	int cpu = cpu_of(rq);
+
+	if (!sched_feat(HZ_BW) || !cfs_bandwidth_used())
+		return;
+
+	if (!tick_nohz_full_cpu(cpu))
+		return;
+
+	if (rq->nr_running != 1)
+		return;
+
+	/*
+	 *  We know there is only one task runnable and we've just picked it. The
+	 *  normal enqueue path will have cleared TICK_DEP_BIT_SCHED if we will
+	 *  be otherwise able to stop the tick. Just need to check if we are using
+	 *  bandwidth control.
+	 */
+	if (cfs_task_bw_constrained(p))
+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
+}
+#endif
+
 #else /* CONFIG_CFS_BANDWIDTH */
 
 static inline bool cfs_bandwidth_used(void)
@@ -6231,9 +6271,18 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
 static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
 static inline void update_runtime_enabled(struct rq *rq) {}
 static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
-
+#ifdef CONFIG_CGROUP_SCHED
+bool cfs_task_bw_constrained(struct task_struct *p)
+{
+	return false;
+}
+#endif
 #endif /* CONFIG_CFS_BANDWIDTH */
 
+#if !defined(CONFIG_CFS_BANDWIDTH) || !defined(CONFIG_NO_HZ_FULL)
+static inline void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p) {}
+#endif
+
 /**************************************************
  * CFS operations on tasks:
  */
@@ -8201,6 +8250,7 @@ done: __maybe_unused;
 		hrtick_start_fair(rq, p);
 
 	update_misfit_status(p, rq);
+	sched_fair_update_stop_tick(rq, p);
 
 	return p;
 
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index ee7f23c76bd3..e10074cb4be4 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -101,3 +101,5 @@ SCHED_FEAT(LATENCY_WARN, false)
 
 SCHED_FEAT(ALT_PERIOD, true)
 SCHED_FEAT(BASE_SLICE, true)
+
+SCHED_FEAT(HZ_BW, true)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 602de71b48e1..19af1766df2d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -459,6 +459,7 @@ extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth
 extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
 extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
 extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
+extern bool cfs_task_bw_constrained(struct task_struct *p);
 
 extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
 		struct sched_rt_entity *rt_se, int cpu,
@@ -494,6 +495,7 @@ static inline void set_task_rq_fair(struct sched_entity *se,
 #else /* CONFIG_CGROUP_SCHED */
 
 struct cfs_bandwidth { };
+static inline bool cfs_task_bw_constrained(struct task_struct *p) { return false; }
 
 #endif	/* CONFIG_CGROUP_SCHED */
 

From 13a0d1088c8fea1565e30a169188b59bdd77759e Mon Sep 17 00:00:00 2001
From: Yu Liao <liaoyu15@huawei.com>
Date: Wed, 2 Aug 2023 10:31:30 +0800
Subject: [PATCH 0698/2047] power: supply: qcom_pmi8998_charger: fix
 uninitialized variable

smatch warnings:
  drivers/power/supply/qcom_pmi8998_charger.c:565 smb2_status_change_work() error: uninitialized symbol 'usb_online'.

usb_online is used uninitialized whenever smb2_get_prop_usb_online()
returns a negative value.

Thus, fix the issue by initializing usb_online to 0.

Fixes: 8648aeb5d7b7 ("power: supply: add Qualcomm PMI8998 SMB2 Charger driver")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/r/202307280638.556PrzIS-lkp@intel.com/
Signed-off-by: Yu Liao <liaoyu15@huawei.com>
Reviewed-by: Caleb Connolly <caleb.connolly@linaro.org
Link: https://lore.kernel.org/r/20230802023130.2516232-1-liaoyu15@huawei.com
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/qcom_pmi8998_charger.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/power/supply/qcom_pmi8998_charger.c b/drivers/power/supply/qcom_pmi8998_charger.c
index d16c5ee17249..cac89d233c38 100644
--- a/drivers/power/supply/qcom_pmi8998_charger.c
+++ b/drivers/power/supply/qcom_pmi8998_charger.c
@@ -556,7 +556,8 @@ static int smb2_set_current_limit(struct smb2_chip *chip, unsigned int val)
 static void smb2_status_change_work(struct work_struct *work)
 {
 	unsigned int charger_type, current_ua;
-	int usb_online, count, rc;
+	int usb_online = 0;
+	int count, rc;
 	struct smb2_chip *chip;
 
 	chip = container_of(work, struct smb2_chip, status_change_work.work);

From f57f5838c044d1554b71186d3d277619eab58d31 Mon Sep 17 00:00:00 2001
From: Caleb Connolly <caleb.connolly@linaro.org>
Date: Wed, 2 Aug 2023 15:49:28 +0100
Subject: [PATCH 0699/2047] power: supply: qcom_pmi8998_charger: remove
 CHARGE_CONTROL_LIMIT_* props

These property were intended to allow the power supply to be treated as a
cooling device, however the cooling device interface has been dropped
from the psy core code. They now just duplicate the CURRENT_NOW and
CURRENT_MAX properties and are generally confusing, drop them.

Signed-off-by: Caleb Connolly <caleb.connolly@linaro.org>
Link: https://lore.kernel.org/r/20230802-pmi8998-charger-fixes-v1-1-a8f1e8b84c1e@linaro.org
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/qcom_pmi8998_charger.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/power/supply/qcom_pmi8998_charger.c b/drivers/power/supply/qcom_pmi8998_charger.c
index cac89d233c38..c7ec9aeb7a64 100644
--- a/drivers/power/supply/qcom_pmi8998_charger.c
+++ b/drivers/power/supply/qcom_pmi8998_charger.c
@@ -409,8 +409,6 @@ static enum power_supply_property smb2_properties[] = {
 	POWER_SUPPLY_PROP_HEALTH,
 	POWER_SUPPLY_PROP_ONLINE,
 	POWER_SUPPLY_PROP_USB_TYPE,
-	POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT,
-	POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX,
 };
 
 static enum power_supply_usb_type smb2_usb_types[] = {
@@ -674,11 +672,7 @@ static int smb2_get_property(struct power_supply *psy,
 		val->strval = chip->name;
 		return 0;
 	case POWER_SUPPLY_PROP_CURRENT_MAX:
-	case POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT:
 		return smb2_get_current_limit(chip, &val->intval);
-	case POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX:
-		val->intval = DCP_CURRENT_UA;
-		return 0;
 	case POWER_SUPPLY_PROP_CURRENT_NOW:
 		return smb2_get_iio_chan(chip, chip->usb_in_i_chan,
 					 &val->intval);
@@ -707,7 +701,6 @@ static int smb2_set_property(struct power_supply *psy,
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_CURRENT_MAX:
-	case POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT:
 		return smb2_set_current_limit(chip, val->intval);
 	default:
 		dev_err(chip->dev, "No setter for property: %d\n", psp);
@@ -720,7 +713,6 @@ static int smb2_property_is_writable(struct power_supply *psy,
 {
 	switch (psp) {
 	case POWER_SUPPLY_PROP_CURRENT_MAX:
-	case POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT:
 		return 1;
 	default:
 		return 0;

From 7e6311f04b41174ccc738d37dcb4b93cd4de98b0 Mon Sep 17 00:00:00 2001
From: Caleb Connolly <caleb.connolly@linaro.org>
Date: Wed, 2 Aug 2023 15:49:29 +0100
Subject: [PATCH 0700/2047] power: supply: qcom_pmi8998_charger: fix charger
 status

The INHIBIT_CHARGE status bit means the battery has reached a
pre-programmed charge limit which is some voltage offset below the
target float voltage. This should be reported as a STATUS_FULL rather
than UNKNOWN, fix it.

Signed-off-by: Caleb Connolly <caleb.connolly@linaro.org>
Link: https://lore.kernel.org/r/20230802-pmi8998-charger-fixes-v1-2-a8f1e8b84c1e@linaro.org
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/qcom_pmi8998_charger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/qcom_pmi8998_charger.c b/drivers/power/supply/qcom_pmi8998_charger.c
index c7ec9aeb7a64..10f4dd0caca1 100644
--- a/drivers/power/supply/qcom_pmi8998_charger.c
+++ b/drivers/power/supply/qcom_pmi8998_charger.c
@@ -517,9 +517,9 @@ static int smb2_get_prop_status(struct smb2_chip *chip, int *val)
 		*val = POWER_SUPPLY_STATUS_NOT_CHARGING;
 		return rc;
 	case TERMINATE_CHARGE:
+	case INHIBIT_CHARGE:
 		*val = POWER_SUPPLY_STATUS_FULL;
 		return rc;
-	case INHIBIT_CHARGE:
 	default:
 		*val = POWER_SUPPLY_STATUS_UNKNOWN;
 		return rc;

From 6b6349d0d685fc3032455b43fce57f374819fb54 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@somainline.org>
Date: Wed, 2 Aug 2023 14:37:41 +0200
Subject: [PATCH 0701/2047] cpufreq: blocklist MSM8998 in cpufreq-dt-platdev

Add the MSM8998 to the blocklist since the CPU scaling on this platform
is handled by a separate driver.

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@somainline.org>
Tested-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Caleb Connolly <caleb.connolly@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq-dt-platdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index e2b20080de3a..adb3579a1fee 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -143,6 +143,7 @@ static const struct of_device_id blocklist[] __initconst = {
 
 	{ .compatible = "qcom,apq8096", },
 	{ .compatible = "qcom,msm8996", },
+	{ .compatible = "qcom,msm8998", },
 	{ .compatible = "qcom,qcs404", },
 	{ .compatible = "qcom,sa8155p" },
 	{ .compatible = "qcom,sa8540p" },

From e520d0b6be950ce3738cf4b9bd3b392be818f1dc Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 31 Jul 2023 21:15:48 -0600
Subject: [PATCH 0702/2047] cpufreq: brcmstb-avs-cpufreq: Fix -Warray-bounds
 bug

Allocate extra space for terminating element at:

drivers/cpufreq/brcmstb-avs-cpufreq.c:
449         table[i].frequency = CPUFREQ_TABLE_END;

and add code comment to make this clear.

This fixes the following -Warray-bounds warning seen after building
ARM with multi_v7_defconfig (GCC 13):
In function 'brcm_avs_get_freq_table',
    inlined from 'brcm_avs_cpufreq_init' at drivers/cpufreq/brcmstb-avs-cpufreq.c:623:15:
drivers/cpufreq/brcmstb-avs-cpufreq.c:449:28: warning: array subscript 5 is outside array bounds of 'void[60]' [-Warray-bounds=]
  449 |         table[i].frequency = CPUFREQ_TABLE_END;
In file included from include/linux/node.h:18,
                 from include/linux/cpu.h:17,
                 from include/linux/cpufreq.h:12,
                 from drivers/cpufreq/brcmstb-avs-cpufreq.c:44:
In function 'devm_kmalloc_array',
    inlined from 'devm_kcalloc' at include/linux/device.h:328:9,
    inlined from 'brcm_avs_get_freq_table' at drivers/cpufreq/brcmstb-avs-cpufreq.c:437:10,
    inlined from 'brcm_avs_cpufreq_init' at drivers/cpufreq/brcmstb-avs-cpufreq.c:623:15:
include/linux/device.h:323:16: note: at offset 60 into object of size 60 allocated by 'devm_kmalloc'
  323 |         return devm_kmalloc(dev, bytes, flags);
      |                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This helps with the ongoing efforts to tighten the FORTIFY_SOURCE
routines on memcpy() and help us make progress towards globally
enabling -Warray-bounds.

Link: https://github.com/KSPP/linux/issues/324
Fixes: de322e085995 ("cpufreq: brcmstb-avs-cpufreq: AVS CPUfreq driver for Broadcom STB SoCs")
Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/brcmstb-avs-cpufreq.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c
index 1bdd513bcd19..35fb3a559ea9 100644
--- a/drivers/cpufreq/brcmstb-avs-cpufreq.c
+++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c
@@ -434,7 +434,11 @@ brcm_avs_get_freq_table(struct device *dev, struct private_data *priv)
 	if (ret)
 		return ERR_PTR(ret);
 
-	table = devm_kcalloc(dev, AVS_PSTATE_MAX + 1, sizeof(*table),
+	/*
+	 * We allocate space for the 5 different P-STATES AVS,
+	 * plus extra space for a terminating element.
+	 */
+	table = devm_kcalloc(dev, AVS_PSTATE_MAX + 1 + 1, sizeof(*table),
 			     GFP_KERNEL);
 	if (!table)
 		return ERR_PTR(-ENOMEM);

From 65412c8d72741cc6e6b082b478b8957d7e7c0480 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 3 Aug 2023 10:26:15 +0200
Subject: [PATCH 0703/2047] x86/asm: Avoid unneeded __div64_32 function
 definition

The __div64_32() function is provided for 32-bit architectures that
don't have a custom do_div() implementation. x86_32 has one, and
does not use the header file that declares the function prototype,
so the definition causes a W=1 warning:

  lib/math/div64.c:31:32: error: no previous prototype for '__div64_32' [-Werror=missing-prototypes]

Define an empty macro to prevent the function definition from getting
built, which avoids the warning and saves a little .text space.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230803082619.1369127-4-arnd@kernel.org
---
 arch/x86/include/asm/div64.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index b8f1dc0761e4..9931e4c7d73f 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -71,6 +71,12 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
 }
 #define mul_u32_u32 mul_u32_u32
 
+/*
+ * __div64_32() is never called on x86, so prevent the
+ * generic definition from getting built.
+ */
+#define __div64_32
+
 #else
 # include <asm-generic/div64.h>
 

From 22475bcc2083196544fa55b861d76e0e7ee9da11 Mon Sep 17 00:00:00 2001
From: Naresh Solanki <naresh.solanki@9elements.com>
Date: Thu, 3 Aug 2023 13:12:25 +0200
Subject: [PATCH 0704/2047] regulator: userspace-consumer: Add regulator event
 support

Add sysfs attribute to track regulator events received from regulator
notifier block handler.

Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230803111225.107572-1-Naresh.Solanki@9elements.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/userspace-consumer.c | 52 +++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/userspace-consumer.c b/drivers/regulator/userspace-consumer.c
index 97f075ed68c9..a0b980022993 100644
--- a/drivers/regulator/userspace-consumer.c
+++ b/drivers/regulator/userspace-consumer.c
@@ -29,6 +29,10 @@ struct userspace_consumer_data {
 
 	int num_supplies;
 	struct regulator_bulk_data *supplies;
+
+	struct kobject *kobj;
+	struct notifier_block nb;
+	unsigned long events;
 };
 
 static ssize_t name_show(struct device *dev,
@@ -89,12 +93,30 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
+static DEFINE_MUTEX(events_lock);
+
+static ssize_t events_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct userspace_consumer_data *data = dev_get_drvdata(dev);
+	unsigned long e;
+
+	mutex_lock(&events_lock);
+	e = data->events;
+	data->events = 0;
+	mutex_unlock(&events_lock);
+
+	return sprintf(buf, "0x%lx\n", e);
+}
+
 static DEVICE_ATTR_RO(name);
 static DEVICE_ATTR_RW(state);
+static DEVICE_ATTR_RO(events);
 
 static struct attribute *attributes[] = {
 	&dev_attr_name.attr,
 	&dev_attr_state.attr,
+	&dev_attr_events.attr,
 	NULL,
 };
 
@@ -115,12 +137,28 @@ static const struct attribute_group attr_group = {
 	.is_visible =  attr_visible,
 };
 
+static int regulator_userspace_notify(struct notifier_block *nb,
+				      unsigned long event,
+				      void *ignored)
+{
+	struct userspace_consumer_data *data =
+		container_of(nb, struct userspace_consumer_data, nb);
+
+	mutex_lock(&events_lock);
+	data->events |= event;
+	mutex_unlock(&events_lock);
+
+	sysfs_notify(data->kobj, NULL, dev_attr_events.attr.name);
+
+	return NOTIFY_OK;
+}
+
 static int regulator_userspace_consumer_probe(struct platform_device *pdev)
 {
 	struct regulator_userspace_consumer_data tmpdata;
 	struct regulator_userspace_consumer_data *pdata;
 	struct userspace_consumer_data *drvdata;
-	int ret;
+	int i, ret;
 
 	pdata = dev_get_platdata(&pdev->dev);
 	if (!pdata) {
@@ -153,6 +191,7 @@ static int regulator_userspace_consumer_probe(struct platform_device *pdev)
 	drvdata->num_supplies = pdata->num_supplies;
 	drvdata->supplies = pdata->supplies;
 	drvdata->no_autoswitch = pdata->no_autoswitch;
+	drvdata->kobj = &pdev->dev.kobj;
 
 	mutex_init(&drvdata->lock);
 
@@ -186,6 +225,13 @@ static int regulator_userspace_consumer_probe(struct platform_device *pdev)
 	}
 	drvdata->enabled = !!ret;
 
+	drvdata->nb.notifier_call = regulator_userspace_notify;
+	for (i = 0; i < drvdata->num_supplies; i++) {
+		ret = devm_regulator_register_notifier(drvdata->supplies[i].consumer, &drvdata->nb);
+		if (ret)
+			goto err_enable;
+	}
+
 	return 0;
 
 err_enable:
@@ -197,6 +243,10 @@ err_enable:
 static int regulator_userspace_consumer_remove(struct platform_device *pdev)
 {
 	struct userspace_consumer_data *data = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < data->num_supplies; i++)
+		devm_regulator_unregister_notifier(data->supplies[i].consumer, &data->nb);
 
 	sysfs_remove_group(&pdev->dev.kobj, &attr_group);
 

From d8736266ae960504110e812994f555bf7cb8740c Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Thu, 3 Aug 2023 16:39:44 +0800
Subject: [PATCH 0705/2047] spi: fsl-spi: Do not check 0 for platform_get_irq()

Since platform_get_irq() never returned zero, so it need not to check
whether it returned zero, and we use the return error code of
platform_get_irq() to replace the current return error code, for that
platform_get_irq() may return -EINVAL or -ENXIO.

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Link: https://lore.kernel.org/r/20230803083944.21501-1-wangzhu9@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-spi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 106fe60a0a50..9933842ab727 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -740,8 +740,8 @@ static int plat_mpc8xxx_spi_probe(struct platform_device *pdev)
 		return -EINVAL;
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq <= 0)
-		return -EINVAL;
+	if (irq < 0)
+		return irq;
 
 	master = fsl_spi_probe(&pdev->dev, mem, irq);
 	return PTR_ERR_OR_ZERO(master);

From 8a132ecb6bc38f9ed0d2927acbcabeb1443613b2 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 25 Jul 2023 22:03:27 +0800
Subject: [PATCH 0706/2047] efi: Remove unused extern declaration
 efi_lookup_mapped_addr()

Since commit 50a0cb565246 ("x86/efi-bgrt: Fix kernel panic when mapping BGRT data")
this extern declaration is not used anymore.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/efi.h b/include/linux/efi.h
index ab088c662e88..e9004358f7bd 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -726,7 +726,6 @@ static inline efi_status_t efi_query_variable_store(u32 attributes,
 	return EFI_SUCCESS;
 }
 #endif
-extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
 
 extern int __init __efi_memmap_init(struct efi_memory_map_data *data);
 extern int __init efi_memmap_init_early(struct efi_memory_map_data *data);

From 92faa821bb160e0282ea936fe664cf5f0ead86df Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Mon, 31 Jul 2023 19:20:01 +0800
Subject: [PATCH 0707/2047] efi: memmap: Remove kernel-doc warnings

Remove kernel-doc warnings:

arch/x86/platform/efi/memmap.c:94: warning: Function parameter or member
'data' not described in 'efi_memmap_install'
arch/x86/platform/efi/memmap.c:94: warning: Excess function parameter
'ctx' description in 'efi_memmap_install'

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/platform/efi/memmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/platform/efi/memmap.c b/arch/x86/platform/efi/memmap.c
index c69f8471e6d0..4ef20b49eb5e 100644
--- a/arch/x86/platform/efi/memmap.c
+++ b/arch/x86/platform/efi/memmap.c
@@ -82,7 +82,7 @@ int __init efi_memmap_alloc(unsigned int num_entries,
 
 /**
  * efi_memmap_install - Install a new EFI memory map in efi.memmap
- * @ctx: map allocation parameters (address, size, flags)
+ * @data: efi memmap installation parameters
  *
  * Unlike efi_memmap_init_*(), this function does not allow the caller
  * to switch from early to late mappings. It simply uses the existing

From f6e6e95ce16205025b7b8680a66c30a0c4ec2270 Mon Sep 17 00:00:00 2001
From: Xiao Wang <xiao.w.wang@intel.com>
Date: Thu, 3 Aug 2023 13:56:11 +0800
Subject: [PATCH 0708/2047] efi/riscv: libstub: Fix comment about absolute
 relocation

We don't want absolute symbols references in the stub, so fix the double
negation in the comment.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/libstub/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 16d64a34d1e1..6f74c7eddeb0 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -146,7 +146,7 @@ STUBCOPY_RELOC-$(CONFIG_ARM64)	:= R_AARCH64_ABS
 
 # For RISC-V, we don't need anything special other than arm64. Keep all the
 # symbols in .init section and make sure that no absolute symbols references
-# doesn't exist.
+# exist.
 STUBCOPY_FLAGS-$(CONFIG_RISCV)	+= --prefix-alloc-sections=.init \
 				   --prefix-symbols=__efistub_
 STUBCOPY_RELOC-$(CONFIG_RISCV)	:= R_RISCV_HI20

From b11eba8b02e592b69b41af01ab7325a853441908 Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Tue, 1 Aug 2023 10:28:00 +0300
Subject: [PATCH 0709/2047] gpio: sch: remove unneeded call to
 platform_set_drvdata()

This function call was found to be unnecessary as there is no equivalent
platform_get_drvdata() call to access the private data of the driver. Also,
the private data is defined in this driver, so there is no risk of it being
accessed outside of this driver file.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-sch.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c
index 8a83f7bf4382..e48392074e4b 100644
--- a/drivers/gpio/gpio-sch.c
+++ b/drivers/gpio/gpio-sch.c
@@ -380,8 +380,6 @@ static int sch_gpio_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	platform_set_drvdata(pdev, sch);
-
 	girq = &sch->chip.irq;
 	gpio_irq_chip_set_chip(girq, &sch_irqchip);
 	girq->num_parents = 0;

From 41eb8510f18250ced8bef44f0dead759f502fb9e Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Tue, 1 Aug 2023 10:30:44 +0300
Subject: [PATCH 0710/2047] gpio: syscon: remove unneeded call to
 platform_set_drvdata()

This function call was found to be unnecessary as there is no equivalent
platform_get_drvdata() call to access the private data of the driver. Also,
the private data is defined in this driver, so there is no risk of it being
accessed outside of this driver file.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-syscon.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-syscon.c b/drivers/gpio/gpio-syscon.c
index 41bf47c8c983..6e1a2581e6ae 100644
--- a/drivers/gpio/gpio-syscon.c
+++ b/drivers/gpio/gpio-syscon.c
@@ -248,8 +248,6 @@ static int syscon_gpio_probe(struct platform_device *pdev)
 		priv->chip.direction_output = syscon_gpio_dir_out;
 	}
 
-	platform_set_drvdata(pdev, priv);
-
 	return devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv);
 }
 

From 02840579a94da4fb73292464c34c47222050923d Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Tue, 1 Aug 2023 10:32:11 +0300
Subject: [PATCH 0711/2047] gpio: timberdale: remove unneeded call to
 platform_set_drvdata()

This function call was found to be unnecessary as there is no equivalent
platform_get_drvdata() call to access the private data of the driver. Also,
the private data is defined in this driver, so there is no risk of it being
accessed outside of this driver file.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-timberdale.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c
index de14949a3fe5..bbd9e9191199 100644
--- a/drivers/gpio/gpio-timberdale.c
+++ b/drivers/gpio/gpio-timberdale.c
@@ -256,8 +256,6 @@ static int timbgpio_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
-	platform_set_drvdata(pdev, tgpio);
-
 	/* make sure to disable interrupts */
 	iowrite32(0x0, tgpio->membase + TGPIO_IER);
 

From 0a5e9306b812fe3517548fab92b3d3d6ce7576e5 Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Tue, 1 Aug 2023 23:38:39 +0300
Subject: [PATCH 0712/2047] gpio: tqmx86: remove unneeded call to
 platform_set_drvdata()

This function call was found to be unnecessary as there is no equivalent
platform_get_drvdata() call to access the private data of the driver. Also,
the private data is defined in this driver, so there is no risk of it being
accessed outside of this driver file.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-tqmx86.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c
index 6f8bd1155db7..3a28c1f273c3 100644
--- a/drivers/gpio/gpio-tqmx86.c
+++ b/drivers/gpio/gpio-tqmx86.c
@@ -277,8 +277,6 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
 
 	tqmx86_gpio_write(gpio, (u8)~TQMX86_DIR_INPUT_MASK, TQMX86_GPIODD);
 
-	platform_set_drvdata(pdev, gpio);
-
 	chip = &gpio->chip;
 	chip->label = "gpio-tqmx86";
 	chip->owner = THIS_MODULE;

From 872982cecd2f1f1dc67502732f5126267a22da3f Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Tue, 1 Aug 2023 23:44:45 +0300
Subject: [PATCH 0713/2047] gpio: vx855: remove unneeded call to
 platform_set_drvdata()

This function call was found to be unnecessary as there is no equivalent
platform_get_drvdata() call to access the private data of the driver. Also,
the private data is defined in this driver, so there is no risk of it being
accessed outside of this driver file.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-vx855.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-vx855.c b/drivers/gpio/gpio-vx855.c
index 69713fd5485b..8fd6c3913d69 100644
--- a/drivers/gpio/gpio-vx855.c
+++ b/drivers/gpio/gpio-vx855.c
@@ -240,8 +240,6 @@ static int vx855gpio_probe(struct platform_device *pdev)
 	if (!vg)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, vg);
-
 	dev_info(&pdev->dev, "found VX855 GPIO controller\n");
 	vg->io_gpi = res_gpi->start;
 	vg->io_gpo = res_gpo->start;

From 146bf98e00886420199d8892d3684c9736789b0c Mon Sep 17 00:00:00 2001
From: Andrei Coardos <aboutphysycs@gmail.com>
Date: Tue, 1 Aug 2023 23:46:06 +0300
Subject: [PATCH 0714/2047] gpio: zevio: remove unneeded call to
 platform_set_drvdata()

This function call was found to be unnecessary as there is no equivalent
platform_get_drvdata() call to access the private data of the driver. Also,
the private data is defined in this driver, so there is no risk of it being
accessed outside of this driver file.

Reviewed-by: Alexandru Ardelean <alex@shruggie.ro>
Signed-off-by: Andrei Coardos <aboutphysycs@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-zevio.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpio/gpio-zevio.c b/drivers/gpio/gpio-zevio.c
index f0f571b323f2..2de61337ad3b 100644
--- a/drivers/gpio/gpio-zevio.c
+++ b/drivers/gpio/gpio-zevio.c
@@ -176,8 +176,6 @@ static int zevio_gpio_probe(struct platform_device *pdev)
 	if (!controller)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, controller);
-
 	/* Copy our reference */
 	controller->chip = zevio_gpio_chip;
 	controller->chip.parent = &pdev->dev;

From 455d39ec96f0d8020adf465ab1f864a722d96eb3 Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Thu, 3 Aug 2023 11:39:37 +0800
Subject: [PATCH 0715/2047] gpio: ftgpio010: Do not check for 0 return after
 calling platform_get_irq()

Since commit a85a6c86c25b ("driver core: platform: Clarify that IRQ 0
is invalid"), there is no possible for platform_get_irq() to return 0.

And the return value of platform_get_irq() is more sensible
to show the error reason.

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-ftgpio010.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-ftgpio010.c b/drivers/gpio/gpio-ftgpio010.c
index 31e26072f6ae..5ce59dcf02e3 100644
--- a/drivers/gpio/gpio-ftgpio010.c
+++ b/drivers/gpio/gpio-ftgpio010.c
@@ -250,8 +250,8 @@ static int ftgpio_gpio_probe(struct platform_device *pdev)
 		return PTR_ERR(g->base);
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq <= 0)
-		return irq ? irq : -EINVAL;
+	if (irq < 0)
+		return irq;
 
 	g->clk = devm_clk_get(dev, NULL);
 	if (!IS_ERR(g->clk)) {

From c4dc167c684b8d9e58d3e4bde5b7eaef5d4e8cf5 Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Thu, 3 Aug 2023 10:28:44 +0800
Subject: [PATCH 0716/2047] gpio: tps65218: remove redundant of_match_ptr()

The driver depends on CONFIG_OF, so it is not necessary to use
of_match_ptr() here.

Even for drivers that do not depend on CONFIG_OF, it's almost always
better to leave out the of_match_ptr(), since the only thing it can
possibly do is to save a few bytes of .text if a driver can be used both
with and without it. Hence we remove of_match_ptr().

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-tps65218.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-tps65218.c b/drivers/gpio/gpio-tps65218.c
index e1d425a18854..d7d9d50dcddf 100644
--- a/drivers/gpio/gpio-tps65218.c
+++ b/drivers/gpio/gpio-tps65218.c
@@ -216,7 +216,7 @@ MODULE_DEVICE_TABLE(platform, tps65218_gpio_id_table);
 static struct platform_driver tps65218_gpio_driver = {
 	.driver = {
 		.name = "tps65218-gpio",
-		.of_match_table = of_match_ptr(tps65218_dt_match)
+		.of_match_table = tps65218_dt_match,
 	},
 	.probe = tps65218_gpio_probe,
 	.id_table = tps65218_gpio_id_table,

From 9c573074895f9e2da3cbe92605dce5d765b311b7 Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Thu, 3 Aug 2023 10:20:56 +0800
Subject: [PATCH 0717/2047] gpio: max732x: remove redundant CONFIG_OF and
 of_match_ptr()

The driver depends on CONFIG_OF, so it is not necessary to use
of_match_ptr() here. We remove both CONFIG_OF and of_match_ptr() here.

Even for drivers that do not depend on CONFIG_OF, it's almost always
better to leave out the of_match_ptr(), since the only thing it can
possibly do is to save a few bytes of .text if a driver can be used both
with and without it.

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-max732x.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c
index fca9ca68e387..49d362907bc7 100644
--- a/drivers/gpio/gpio-max732x.c
+++ b/drivers/gpio/gpio-max732x.c
@@ -18,8 +18,6 @@
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
 #include <linux/platform_data/max732x.h>
-#include <linux/of.h>
-
 
 /*
  * Each port of MAX732x (including MAX7319) falls into one of the
@@ -114,7 +112,6 @@ static const struct i2c_device_id max732x_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, max732x_id);
 
-#ifdef CONFIG_OF
 static const struct of_device_id max732x_of_table[] = {
 	{ .compatible = "maxim,max7319" },
 	{ .compatible = "maxim,max7320" },
@@ -128,7 +125,6 @@ static const struct of_device_id max732x_of_table[] = {
 	{ }
 };
 MODULE_DEVICE_TABLE(of, max732x_of_table);
-#endif
 
 struct max732x_chip {
 	struct gpio_chip gpio_chip;
@@ -709,7 +705,7 @@ static int max732x_probe(struct i2c_client *client)
 static struct i2c_driver max732x_driver = {
 	.driver = {
 		.name		= "max732x",
-		.of_match_table	= of_match_ptr(max732x_of_table),
+		.of_match_table	= max732x_of_table,
 	},
 	.probe		= max732x_probe,
 	.id_table	= max732x_id,

From a374467ae68c23d4c9e9d5009207c1414469c5b0 Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Wed, 2 Aug 2023 10:24:03 +0800
Subject: [PATCH 0718/2047] gpio: altera-a10sr: remove redundant of_match_ptr

The driver depends on CONFIG_OF, so it is not necessary to use
of_match_ptr() here.

Even for drivers that do not depend on CONFIG_OF, it's almost always
better to leave out the of_match_ptr(), since the only thing it can
possibly do is to save a few bytes of .text if a driver can be used both
with and without it. Hence we remove of_match_ptr().

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-altera-a10sr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c
index be1ed7ee5225..11edf1fe6c90 100644
--- a/drivers/gpio/gpio-altera-a10sr.c
+++ b/drivers/gpio/gpio-altera-a10sr.c
@@ -9,6 +9,7 @@
 
 #include <linux/gpio/driver.h>
 #include <linux/mfd/altera-a10sr.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/property.h>
 
@@ -104,7 +105,7 @@ static struct platform_driver altr_a10sr_gpio_driver = {
 	.probe = altr_a10sr_gpio_probe,
 	.driver = {
 		.name	= "altr_a10sr_gpio",
-		.of_match_table = of_match_ptr(altr_a10sr_gpio_of_match),
+		.of_match_table = altr_a10sr_gpio_of_match,
 	},
 };
 module_platform_driver(altr_a10sr_gpio_driver);

From a0d22277ba13b609a6f792b61aa1798c3c653676 Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Wed, 2 Aug 2023 10:54:31 +0800
Subject: [PATCH 0719/2047] gpio: clps711x: remove redundant of_match_ptr()

The driver depends on CONFIG_OF, so it is not necessary to use
of_match_ptr() here.

Even for drivers that do not depend on CONFIG_OF, it's almost always
better to leave out the of_match_ptr(), since the only thing it can
possibly do is to save a few bytes of .text if a driver can be used both
with and without it. Hence we remove of_match_ptr().

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-clps711x.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-clps711x.c b/drivers/gpio/gpio-clps711x.c
index 75f6f8d4323e..d69a24dd4828 100644
--- a/drivers/gpio/gpio-clps711x.c
+++ b/drivers/gpio/gpio-clps711x.c
@@ -67,7 +67,7 @@ static int clps711x_gpio_probe(struct platform_device *pdev)
 	return devm_gpiochip_add_data(&pdev->dev, gc, NULL);
 }
 
-static const struct of_device_id __maybe_unused clps711x_gpio_ids[] = {
+static const struct of_device_id clps711x_gpio_ids[] = {
 	{ .compatible = "cirrus,ep7209-gpio" },
 	{ }
 };
@@ -76,7 +76,7 @@ MODULE_DEVICE_TABLE(of, clps711x_gpio_ids);
 static struct platform_driver clps711x_gpio_driver = {
 	.driver	= {
 		.name		= "clps711x-gpio",
-		.of_match_table	= of_match_ptr(clps711x_gpio_ids),
+		.of_match_table	= clps711x_gpio_ids,
 	},
 	.probe	= clps711x_gpio_probe,
 };

From 07d93cbb3dc01e8cf2c39e996f60cbff3a77ebb2 Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Wed, 2 Aug 2023 10:44:55 +0800
Subject: [PATCH 0720/2047] gpio: ixp4xx: remove redundant of_match_ptr()

The driver depends on CONFIG_OF, so it is not necessary to use
of_match_ptr() here.

Even for drivers that do not depend on CONFIG_OF, it's almost always
better to leave out the of_match_ptr(), since the only thing it can
possibly do is to save a few bytes of .text if a driver can be used both
with and without it. Hence we remove of_match_ptr().

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-ixp4xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-ixp4xx.c b/drivers/gpio/gpio-ixp4xx.c
index 1e29de1671d4..dde6cf3a5779 100644
--- a/drivers/gpio/gpio-ixp4xx.c
+++ b/drivers/gpio/gpio-ixp4xx.c
@@ -302,7 +302,7 @@ static const struct of_device_id ixp4xx_gpio_of_match[] = {
 static struct platform_driver ixp4xx_gpio_driver = {
 	.driver = {
 		.name		= "ixp4xx-gpio",
-		.of_match_table = of_match_ptr(ixp4xx_gpio_of_match),
+		.of_match_table = ixp4xx_gpio_of_match,
 	},
 	.probe = ixp4xx_gpio_probe,
 };

From bcb6b9e50df8367696ab0704b3858790e5589983 Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Wed, 2 Aug 2023 11:17:49 +0800
Subject: [PATCH 0721/2047] gpio: lpc32xx: remove redundant CONFIG_OF and
 of_match_ptr()

The driver depends on CONFIG_OF, so it is not necessary to use
CONFIG_OF and of_match_ptr() here, so we remove them all.

Even for drivers that do not depend on CONFIG_OF, it's almost always
better to leave out the of_match_ptr(), since the only thing it can
possibly do is to save a few bytes of .text if a driver can be used both
with and without it.

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-lpc32xx.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpio/gpio-lpc32xx.c b/drivers/gpio/gpio-lpc32xx.c
index d2b65cfb336e..5ef8af824980 100644
--- a/drivers/gpio/gpio-lpc32xx.c
+++ b/drivers/gpio/gpio-lpc32xx.c
@@ -525,17 +525,15 @@ static int lpc32xx_gpio_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_OF
 static const struct of_device_id lpc32xx_gpio_of_match[] = {
 	{ .compatible = "nxp,lpc3220-gpio", },
 	{ },
 };
-#endif
 
 static struct platform_driver lpc32xx_gpio_driver = {
 	.driver		= {
 		.name	= "lpc32xx-gpio",
-		.of_match_table = of_match_ptr(lpc32xx_gpio_of_match),
+		.of_match_table = lpc32xx_gpio_of_match,
 	},
 	.probe		= lpc32xx_gpio_probe,
 };

From 5878753886c30c041110368982f34a494997501d Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Wed, 2 Aug 2023 11:29:57 +0800
Subject: [PATCH 0722/2047] gpio: max3191x: remove redundant of_match_ptr()

The driver depends on CONFIG_OF, so it is not necessary to use CONFIG_OF
and of_match_ptr() here, we remove them all.

Even for drivers that do not depend on CONFIG_OF, it's almost always
better to leave out the of_match_ptr(), since the only thing it can
possibly do is to save a few bytes of .text if a driver can be used both
with and without it.

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-max3191x.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c
index 161c4751c5f7..bbacc714632b 100644
--- a/drivers/gpio/gpio-max3191x.c
+++ b/drivers/gpio/gpio-max3191x.c
@@ -457,7 +457,6 @@ static int __init max3191x_register_driver(struct spi_driver *sdrv)
 	return spi_register_driver(sdrv);
 }
 
-#ifdef CONFIG_OF
 static const struct of_device_id max3191x_of_id[] = {
 	{ .compatible = "maxim,max31910" },
 	{ .compatible = "maxim,max31911" },
@@ -468,7 +467,6 @@ static const struct of_device_id max3191x_of_id[] = {
 	{ }
 };
 MODULE_DEVICE_TABLE(of, max3191x_of_id);
-#endif
 
 static const struct spi_device_id max3191x_spi_id[] = {
 	{ "max31910" },
@@ -484,7 +482,7 @@ MODULE_DEVICE_TABLE(spi, max3191x_spi_id);
 static struct spi_driver max3191x_driver = {
 	.driver = {
 		.name		= "max3191x",
-		.of_match_table	= of_match_ptr(max3191x_of_id),
+		.of_match_table	= max3191x_of_id,
 	},
 	.probe	  = max3191x_probe,
 	.remove	  = max3191x_remove,

From 30531e14c9490f08392001f91d6a6baeae1cbacf Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Wed, 2 Aug 2023 11:48:55 +0800
Subject: [PATCH 0723/2047] gpio: raspberrypi-exp: remove redundant
 of_match_ptr()

The driver depends on CONFIG_OF, so it is not necessary to use
of_match_ptr() here, and __maybe_unused can also be removed.

Even for drivers that do not depend on CONFIG_OF, it's almost always
better to leave out the of_match_ptr(), since the only thing it can
possibly do is to save a few bytes of .text if a driver can be used both
with and without it.

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-raspberrypi-exp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-raspberrypi-exp.c b/drivers/gpio/gpio-raspberrypi-exp.c
index ecb0d3800dfe..9d1b95e429f1 100644
--- a/drivers/gpio/gpio-raspberrypi-exp.c
+++ b/drivers/gpio/gpio-raspberrypi-exp.c
@@ -234,7 +234,7 @@ static int rpi_exp_gpio_probe(struct platform_device *pdev)
 	return devm_gpiochip_add_data(dev, &rpi_gpio->gc, rpi_gpio);
 }
 
-static const struct of_device_id rpi_exp_gpio_ids[] __maybe_unused = {
+static const struct of_device_id rpi_exp_gpio_ids[] = {
 	{ .compatible = "raspberrypi,firmware-gpio" },
 	{ }
 };
@@ -243,7 +243,7 @@ MODULE_DEVICE_TABLE(of, rpi_exp_gpio_ids);
 static struct platform_driver rpi_exp_gpio_driver = {
 	.driver	= {
 		.name		= MODULE_NAME,
-		.of_match_table	= of_match_ptr(rpi_exp_gpio_ids),
+		.of_match_table	= rpi_exp_gpio_ids,
 	},
 	.probe	= rpi_exp_gpio_probe,
 };

From 87d0688483f56c748b37a5298bdc382df5cf8f74 Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Thu, 3 Aug 2023 10:13:17 +0800
Subject: [PATCH 0724/2047] gpio: xra1403: remove redundant of_match_ptr()

The driver depends on CONFIG_OF, so it is not necessary to use
of_match_ptr() here, and __maybe_unused can also be removed.

Even for drivers that do not depend on CONFIG_OF, it's almost always
better to leave out the of_match_ptr(), since the only thing it can
possibly do is to save a few bytes of .text if a driver can be used both
with and without it.

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-xra1403.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-xra1403.c b/drivers/gpio/gpio-xra1403.c
index eed8a1684830..dc2710c21c50 100644
--- a/drivers/gpio/gpio-xra1403.c
+++ b/drivers/gpio/gpio-xra1403.c
@@ -194,7 +194,7 @@ static const struct spi_device_id xra1403_ids[] = {
 };
 MODULE_DEVICE_TABLE(spi, xra1403_ids);
 
-static const struct of_device_id xra1403_spi_of_match[] __maybe_unused = {
+static const struct of_device_id xra1403_spi_of_match[] = {
 	{ .compatible = "exar,xra1403" },
 	{},
 };
@@ -205,7 +205,7 @@ static struct spi_driver xra1403_driver = {
 	.id_table = xra1403_ids,
 	.driver   = {
 		.name           = "xra1403",
-		.of_match_table = of_match_ptr(xra1403_spi_of_match),
+		.of_match_table = xra1403_spi_of_match,
 	},
 };
 

From 6d33531bc0234332572ae612232d947f353a4450 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 3 Aug 2023 10:26:16 +0200
Subject: [PATCH 0725/2047] x86/purgatory: Include header for warn()
 declaration

The purgatory code uses parts of the decompressor and provides its own
warn() function, but has to include the corresponding header file to
avoid a -Wmissing-prototypes warning.

It turns out that this function prototype actually differs from the
declaration, so change it to get a constant pointer in the declaration
and the other definition as well.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230803082619.1369127-5-arnd@kernel.org
---
 arch/x86/boot/compressed/error.c | 2 +-
 arch/x86/boot/compressed/error.h | 2 +-
 arch/x86/purgatory/purgatory.c   | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c
index 5313c5cb2b80..19a8251de506 100644
--- a/arch/x86/boot/compressed/error.c
+++ b/arch/x86/boot/compressed/error.c
@@ -7,7 +7,7 @@
 #include "misc.h"
 #include "error.h"
 
-void warn(char *m)
+void warn(const char *m)
 {
 	error_putstr("\n\n");
 	error_putstr(m);
diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h
index 86fe33b93715..31f9e080d61a 100644
--- a/arch/x86/boot/compressed/error.h
+++ b/arch/x86/boot/compressed/error.h
@@ -4,7 +4,7 @@
 
 #include <linux/compiler.h>
 
-void warn(char *m);
+void warn(const char *m);
 void error(char *m) __noreturn;
 void panic(const char *fmt, ...) __noreturn __cold;
 
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 7558139920f8..aea47e793963 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -14,6 +14,7 @@
 #include <crypto/sha2.h>
 #include <asm/purgatory.h>
 
+#include "../boot/compressed/error.h"
 #include "../boot/string.h"
 
 u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(".kexec-purgatory");

From 1a3e4b4da39bbf540a379e21869faa74bb19d16f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 3 Aug 2023 10:26:17 +0200
Subject: [PATCH 0726/2047] x86/alternative: Add a __alt_reloc_selftest()
 prototype

The newly introduced selftest function causes a warning when -Wmissing-prototypes
is enabled:

  arch/x86/kernel/alternative.c:1461:32: error: no previous prototype for '__alt_reloc_selftest' [-Werror=missing-prototypes]

Since it's only used locally, add the prototype directly in front of it.

Fixes: 270a69c4485d ("x86/alternative: Support relocations in alternatives")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230803082619.1369127-6-arnd@kernel.org
---
 arch/x86/kernel/alternative.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 2dcf3a06af09..934c23f24a3f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1531,6 +1531,7 @@ static noinline void __init int3_selftest(void)
 
 static __initdata int __alt_reloc_selftest_addr;
 
+extern void __init __alt_reloc_selftest(void *arg);
 __visible noinline void __init __alt_reloc_selftest(void *arg)
 {
 	WARN_ON(arg != &__alt_reloc_selftest_addr);

From ce0a1b608bfc709cf366f020b520310a3b3272c3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 3 Aug 2023 10:26:18 +0200
Subject: [PATCH 0727/2047] x86/paravirt: Silence unused native_pv_lock_init()
 function warning

The native_pv_lock_init() function is only used in SMP configurations
and declared in asm/qspinlock.h which is not used in UP kernels, but
the function is still defined for both, which causes a warning:

  arch/x86/kernel/paravirt.c:76:13: error: no previous prototype for 'native_pv_lock_init' [-Werror=missing-prototypes]

Move the declaration to asm/paravirt.h so it is visible even
with CONFIG_SMP but short-circuit the definition to turn it
into an empty function.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230803082619.1369127-7-arnd@kernel.org
---
 arch/x86/include/asm/paravirt.h  | 7 +++++++
 arch/x86/include/asm/qspinlock.h | 7 +------
 arch/x86/kernel/paravirt.c       | 3 ++-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index b49778664d2b..6c8ff12140ae 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -739,6 +739,7 @@ static __always_inline unsigned long arch_local_irq_save(void)
 	     ".popsection")
 
 extern void default_banner(void);
+void native_pv_lock_init(void) __init;
 
 #else  /* __ASSEMBLY__ */
 
@@ -778,6 +779,12 @@ extern void default_banner(void);
 #endif /* __ASSEMBLY__ */
 #else  /* CONFIG_PARAVIRT */
 # define default_banner x86_init_noop
+
+#ifndef __ASSEMBLY__
+static inline void native_pv_lock_init(void)
+{
+}
+#endif
 #endif /* !CONFIG_PARAVIRT */
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index d87451df480b..cde8357bb226 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -74,8 +74,6 @@ static inline bool vcpu_is_preempted(long cpu)
  */
 DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
 
-void native_pv_lock_init(void) __init;
-
 /*
  * Shortcut for the queued_spin_lock_slowpath() function that allows
  * virt to hijack it.
@@ -103,10 +101,7 @@ static inline bool virt_spin_lock(struct qspinlock *lock)
 
 	return true;
 }
-#else
-static inline void native_pv_lock_init(void)
-{
-}
+
 #endif /* CONFIG_PARAVIRT */
 
 #include <asm-generic/qspinlock.h>
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index ac10b46c5832..32e94a320554 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -75,7 +75,8 @@ DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
 
 void __init native_pv_lock_init(void)
 {
-	if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
+	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) &&
+	    !boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		static_branch_disable(&virt_spin_lock_key);
 }
 

From 93083725e1ed86c07ea9846bb573b9340985853f Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 3 Aug 2023 19:36:54 +0800
Subject: [PATCH 0728/2047] regulator: max77857: fix build error in
 max77857-regulator.c

When using low verion gcc(7.5) to build the max77857-regulator driver,
got the following error:

drivers/regulator/max77857-regulator.c:312:16: error: initializer element is not constant
  .ramp_delay = max77857_ramp_table[0][0],

To fix this by introducing a macro RAMAP_DELAY_INIT_VAL to define the
value of max77857_ramp_table[0[0].

Fixes: af71cccadece ("regulator: max77857: Add ADI MAX77857/59/MAX77831 Regulator Support")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230803113654.818640-1-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max77857-regulator.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/max77857-regulator.c b/drivers/regulator/max77857-regulator.c
index f2141b5be780..44a510547318 100644
--- a/drivers/regulator/max77857-regulator.c
+++ b/drivers/regulator/max77857-regulator.c
@@ -293,8 +293,10 @@ static const unsigned int max77857_switch_freq[] = {
 	1200000, 1500000, 1800000, 2100000
 };
 
+#define RAMAP_DELAY_INIT_VAL 1333
+
 static const unsigned int max77857_ramp_table[2][4] = {
-	{ 1333, 667, 333, 227 }, /* when switch freq is 1.8MHz or 2.1MHz */
+	{ RAMAP_DELAY_INIT_VAL, 667, 333, 227 }, /* when switch freq is 1.8MHz or 2.1MHz */
 	{ 1166, 667, 333, 167 }, /* when switch freq is 1.2MHz or 1.5MHz */
 };
 
@@ -309,7 +311,7 @@ static struct regulator_desc max77857_regulator_desc = {
 	.n_ramp_values = ARRAY_SIZE(max77857_ramp_table[0]),
 	.ramp_reg = MAX77857_REG_CONT3,
 	.ramp_mask = GENMASK(1, 0),
-	.ramp_delay = max77857_ramp_table[0][0],
+	.ramp_delay = RAMAP_DELAY_INIT_VAL,
 	.owner = THIS_MODULE,
 };
 

From 8874a414f8f706daf1de467cbf2550988ebec09d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 3 Aug 2023 10:26:19 +0200
Subject: [PATCH 0729/2047] x86/qspinlock-paravirt: Fix missing-prototype
 warning

__pv_queued_spin_unlock_slowpath() is defined in a header file as
a global function, and designed to be called from inline asm, but
there is no prototype visible in the definition:

  kernel/locking/qspinlock_paravirt.h:493:1: error: no previous \
    prototype for '__pv_queued_spin_unlock_slowpath' [-Werror=missing-prototypes]

Add this to the x86 header that contains the inline asm calling it,
and ensure this gets included before the definition, rather than
after it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230803082619.1369127-8-arnd@kernel.org
---
 arch/x86/include/asm/qspinlock_paravirt.h |  2 ++
 kernel/locking/qspinlock_paravirt.h       | 20 ++++++++++----------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 42b17cf10b10..85b6e3609cb9 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -4,6 +4,8 @@
 
 #include <asm/ibt.h>
 
+void __lockfunc __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked);
+
 /*
  * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit
  * registers. For i386, however, only 1 32-bit register needs to be saved
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 6afc249ce697..6a0184e9c234 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -485,6 +485,16 @@ gotlock:
 	return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL);
 }
 
+/*
+ * Include the architecture specific callee-save thunk of the
+ * __pv_queued_spin_unlock(). This thunk is put together with
+ * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock
+ * function close to each other sharing consecutive instruction cachelines.
+ * Alternatively, architecture specific version of __pv_queued_spin_unlock()
+ * can be defined.
+ */
+#include <asm/qspinlock_paravirt.h>
+
 /*
  * PV versions of the unlock fastpath and slowpath functions to be used
  * instead of queued_spin_unlock().
@@ -533,16 +543,6 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
 	pv_kick(node->cpu);
 }
 
-/*
- * Include the architecture specific callee-save thunk of the
- * __pv_queued_spin_unlock(). This thunk is put together with
- * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock
- * function close to each other sharing consecutive instruction cachelines.
- * Alternatively, architecture specific version of __pv_queued_spin_unlock()
- * can be defined.
- */
-#include <asm/qspinlock_paravirt.h>
-
 #ifndef __pv_queued_spin_unlock
 __visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
 {

From de5e92cb5cefd2968b96075995a36e28298edf71 Mon Sep 17 00:00:00 2001
From: Li Zetao <lizetao1@huawei.com>
Date: Thu, 3 Aug 2023 21:48:05 +0800
Subject: [PATCH 0730/2047] spi: mpc5xxx-psc: Fix unsigned expression compared
 with zero

There is two warnings reported by coccinelle:

./drivers/spi/spi-mpc512x-psc.c:493:5-13: WARNING:
	Unsigned expression compared with zero: mps -> irq     <     0
./drivers/spi/spi-mpc52xx-psc.c:332:5-13: WARNING:
	Unsigned expression compared with zero: mps -> irq     <     0

The commit "208ee586f862"
("spi: mpc5xxx-psc: Return immediately if IRQ resource is unavailable")
was to check whether the IRQ resource is unavailable. When the IRQ
resource is unavailable, an error code is returned, however, the type
of "mps->irq" is "unsigned int", causing the error code to flip. Modify
the type of "mps->irq" to solve this problem.

Fixes: 208ee586f862 ("spi: mpc5xxx-psc: Return immediately if IRQ resource is unavailable")
Signed-off-by: Li Zetao <lizetao1@huawei.com>
Link: https://lore.kernel.org/r/20230803134805.1037251-1-lizetao1@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mpc512x-psc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c
index 99aeef28a477..5cecca1bef02 100644
--- a/drivers/spi/spi-mpc512x-psc.c
+++ b/drivers/spi/spi-mpc512x-psc.c
@@ -53,7 +53,7 @@ struct mpc512x_psc_spi {
 	int type;
 	void __iomem *psc;
 	struct mpc512x_psc_fifo __iomem *fifo;
-	unsigned int irq;
+	int irq;
 	u8 bits_per_word;
 	u32 mclk_rate;
 

From 88362275240303455bbec05c249daa84aff07059 Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@amd.com>
Date: Thu, 3 Aug 2023 09:24:24 +0200
Subject: [PATCH 0731/2047] dt-bindings: spi: spi-cadence: Describe
 power-domains property

ZynqMP Cadence SPI IP core has own power domain that's why describe it as
optional property.

Signed-off-by: Michal Simek <michal.simek@amd.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/987430ee905fd299fe962663d94f848b341c87df.1691047461.git.michal.simek@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-cadence.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/spi/spi-cadence.yaml b/Documentation/devicetree/bindings/spi/spi-cadence.yaml
index b7552739b554..9eda7f0d2869 100644
--- a/Documentation/devicetree/bindings/spi/spi-cadence.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-cadence.yaml
@@ -49,6 +49,9 @@ properties:
     enum: [ 0, 1 ]
     default: 0
 
+  power-domains:
+    maxItems: 1
+
 required:
   - compatible
   - reg

From ffae65fb1ae4738151158b4435fad822cb1ca29c Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@amd.com>
Date: Thu, 3 Aug 2023 09:24:25 +0200
Subject: [PATCH 0732/2047] dt-bindings: spi: spi-cadence: Add label property

Add a label property to allow a custom name to be used for identifying the
controller on a board. This is useful when there is more than one
controller present. Similar change was done by commit 412b7a521c30
("dt-bindings: eeprom: at24: Add label property for AT24").

Signed-off-by: Michal Simek <michal.simek@amd.com>
Link: https://lore.kernel.org/r/82cd1a57397867b5a1039cd15244344c02a3ece1.1691047461.git.michal.simek@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-cadence.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/spi/spi-cadence.yaml b/Documentation/devicetree/bindings/spi/spi-cadence.yaml
index 9eda7f0d2869..d4b61b0e8301 100644
--- a/Documentation/devicetree/bindings/spi/spi-cadence.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-cadence.yaml
@@ -52,6 +52,9 @@ properties:
   power-domains:
     maxItems: 1
 
+  label:
+    description: Descriptive name of the SPI controller.
+
 required:
   - compatible
   - reg

From 3f0b0966b30982e843950b170b7a9ddfd8094428 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 31 Jul 2023 20:56:35 +0200
Subject: [PATCH 0733/2047] cpuidle: teo: Update idle duration estimate when
 choosing shallower state

The TEO governor takes CPU utilization into account by refining idle state
selection when the utilization is above a certain threshold.  This is done by
choosing an idle state shallower than the previously selected one.

However, when doing this, the idle duration estimate needs to be
adjusted so as to prevent the scheduler tick from being stopped when the
candidate idle state is shallow, which may lead to excessive energy
usage if the CPU is not woken up quickly enough going forward.
Moreover, if the scheduler tick has been stopped already and the new
idle duration estimate is too small, the replacement candidate state
cannot be used.

Modify the relevant code to take the above observations into account.

Fixes: 9ce0f7c4bc64 ("cpuidle: teo: Introduce util-awareness")
Link: https://lore.kernel.org/linux-pm/CAJZ5v0jJxHj65r2HXBTd3wfbZtsg=_StzwO1kA5STDnaPe_dWA@mail.gmail.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-and-tested-by: Kajetan Puchalski <kajetan.puchalski@arm.com>
---
 drivers/cpuidle/governors/teo.c | 40 ++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 987fc5f3997d..2cdc711679a5 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -397,13 +397,23 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * the shallowest non-polling state and exit.
 	 */
 	if (drv->state_count < 3 && cpu_data->utilized) {
-		for (i = 0; i < drv->state_count; ++i) {
-			if (!dev->states_usage[i].disable &&
-			    !(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) {
-				idx = i;
-				goto end;
-			}
-		}
+		/* The CPU is utilized, so assume a short idle duration. */
+		duration_ns = teo_middle_of_bin(0, drv);
+		/*
+		 * If state 0 is enabled and it is not a polling one, select it
+		 * right away unless the scheduler tick has been stopped, in
+		 * which case care needs to be taken to leave the CPU in a deep
+		 * enough state in case it is not woken up any time soon after
+		 * all.  If state 1 is disabled, though, state 0 must be used
+		 * anyway.
+		 */
+		if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
+		    teo_time_ok(duration_ns)) || dev->states_usage[1].disable)
+			idx = 0;
+		else /* Assume that state 1 is not a polling one and use it. */
+			idx = 1;
+
+		goto end;
 	}
 
 	/*
@@ -539,10 +549,20 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 
 	/*
 	 * If the CPU is being utilized over the threshold, choose a shallower
-	 * non-polling state to improve latency
+	 * non-polling state to improve latency, unless the scheduler tick has
+	 * been stopped already and the shallower state's target residency is
+	 * not sufficiently large.
 	 */
-	if (cpu_data->utilized)
-		idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
+	if (cpu_data->utilized) {
+		s64 span_ns;
+
+		i = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
+		span_ns = teo_middle_of_bin(i, drv);
+		if (teo_time_ok(span_ns)) {
+			idx = i;
+			duration_ns = span_ns;
+		}
+	}
 
 end:
 	/*

From 04bae4e2267d49eb9cfec403acd0462b8d00d637 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 31 Jul 2023 21:03:09 +0200
Subject: [PATCH 0734/2047] cpuidle: teo: Avoid stopping the tick unnecessarily
 when bailing out

When teo_select() is going to return early in some special cases, make
it avoid stopping the tick if the idle state to be returned is shallow.

In particular, never stop the tick if state 0 is to be returned.

Link: https://lore.kernel.org/linux-pm/CAJZ5v0jJxHj65r2HXBTd3wfbZtsg=_StzwO1kA5STDnaPe_dWA@mail.gmail.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-and-tested-by: Kajetan Puchalski <kajetan.puchalski@arm.com>
---
 drivers/cpuidle/governors/teo.c | 56 +++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 2cdc711679a5..1b76db3689d0 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -382,12 +382,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	/* Check if there is any choice in the first place. */
 	if (drv->state_count < 2) {
 		idx = 0;
-		goto end;
+		goto out_tick;
 	}
+
 	if (!dev->states_usage[0].disable) {
 		idx = 0;
 		if (drv->states[1].target_residency_ns > duration_ns)
-			goto end;
+			goto out_tick;
 	}
 
 	cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
@@ -408,11 +409,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		 * anyway.
 		 */
 		if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
-		    teo_time_ok(duration_ns)) || dev->states_usage[1].disable)
+		    teo_time_ok(duration_ns)) || dev->states_usage[1].disable) {
 			idx = 0;
-		else /* Assume that state 1 is not a polling one and use it. */
-			idx = 1;
-
+			goto out_tick;
+		}
+		/* Assume that state 1 is not a polling one and use it. */
+		idx = 1;
 		goto end;
 	}
 
@@ -459,8 +461,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	/* Avoid unnecessary overhead. */
 	if (idx < 0) {
 		idx = 0; /* No states enabled, must use 0. */
-		goto end;
-	} else if (idx == idx0) {
+		goto out_tick;
+	}
+
+	if (idx == idx0) {
+		/*
+		 * This is the first enabled idle state, so use it, but do not
+		 * allow the tick to be stopped it is shallow enough.
+		 */
+		duration_ns = drv->states[idx].target_residency_ns;
 		goto end;
 	}
 
@@ -566,24 +575,25 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 
 end:
 	/*
-	 * Don't stop the tick if the selected state is a polling one or if the
-	 * expected idle duration is shorter than the tick period length.
+	 * Allow the tick to be stopped unless the selected state is a polling
+	 * one or the expected idle duration is shorter than the tick period
+	 * length.
 	 */
-	if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
-	    duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
-		*stop_tick = false;
+	if ((!(drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
+	    duration_ns >= TICK_NSEC) || tick_nohz_tick_stopped())
+		return idx;
 
-		/*
-		 * The tick is not going to be stopped, so if the target
-		 * residency of the state to be returned is not within the time
-		 * till the closest timer including the tick, try to correct
-		 * that.
-		 */
-		if (idx > idx0 &&
-		    drv->states[idx].target_residency_ns > delta_tick)
-			idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
-	}
+	/*
+	 * The tick is not going to be stopped, so if the target residency of
+	 * the state to be returned is not within the time till the closest
+	 * timer including the tick, try to correct that.
+	 */
+	if (idx > idx0 &&
+	    drv->states[idx].target_residency_ns > delta_tick)
+		idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
 
+out_tick:
+	*stop_tick = false;
 	return idx;
 }
 

From 9a41e16f11103c37263271ff2a433f2f3885fced Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 31 Jul 2023 21:04:41 +0200
Subject: [PATCH 0735/2047] cpuidle: teo: Drop utilized from struct teo_cpu

Because the utilized field in struct teo_cpu is only used locally in
teo_select(), replace it with a local variable in that function.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-and-tested-by: Kajetan Puchalski <kajetan.puchalski@arm.com>
---
 drivers/cpuidle/governors/teo.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 1b76db3689d0..3915e5639790 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -187,7 +187,6 @@ struct teo_bin {
  * @next_recent_idx: Index of the next @recent_idx entry to update.
  * @recent_idx: Indices of bins corresponding to recent "intercepts".
  * @util_threshold: Threshold above which the CPU is considered utilized
- * @utilized: Whether the last sleep on the CPU happened while utilized
  */
 struct teo_cpu {
 	s64 time_span_ns;
@@ -197,7 +196,6 @@ struct teo_cpu {
 	int next_recent_idx;
 	int recent_idx[NR_RECENT];
 	unsigned long util_threshold;
-	bool utilized;
 };
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -366,6 +364,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	int idx0 = 0, idx = -1;
 	bool alt_intercepts, alt_recent;
 	ktime_t delta_tick;
+	bool cpu_utilized;
 	s64 duration_ns;
 	int i;
 
@@ -391,13 +390,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 			goto out_tick;
 	}
 
-	cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
+	cpu_utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
 	/*
 	 * If the CPU is being utilized over the threshold and there are only 2
 	 * states to choose from, the metrics need not be considered, so choose
 	 * the shallowest non-polling state and exit.
 	 */
-	if (drv->state_count < 3 && cpu_data->utilized) {
+	if (drv->state_count < 3 && cpu_utilized) {
 		/* The CPU is utilized, so assume a short idle duration. */
 		duration_ns = teo_middle_of_bin(0, drv);
 		/*
@@ -562,7 +561,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * been stopped already and the shallower state's target residency is
 	 * not sufficiently large.
 	 */
-	if (cpu_data->utilized) {
+	if (cpu_utilized) {
 		s64 span_ns;
 
 		i = teo_find_shallower_state(drv, dev, idx, duration_ns, true);

From fcce1c6cb156d79debaf84f63233f146540031b1 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 26 Jul 2023 16:11:43 -0700
Subject: [PATCH 0736/2047] x86/paravirt: Fix tlb_remove_table function
 callback prototype warning

Under W=1, this warning is visible in Clang 16 and newer:

arch/x86/kernel/paravirt.c:337:4: warning: cast from 'void (*)(struct mmu_gather *, struct page *)' to 'void (*)(struct mmu_gather *, void *)' converts to incompatible function type [-Wcast-function-type-strict]
                           (void (*)(struct mmu_gather *, void *))tlb_remove_page,
                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Add a direct wrapper instead, which will make this warning (and
potential KCFI failures) go away.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202307260332.pJntWR6o-lkp@intel.com/
Cc: Juergen Gross <jgross@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Ajay Kaher <akaher@vmware.com>
Cc: Alexey Makhalov <amakhalov@vmware.com>
Cc: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: virtualization@lists.linux-foundation.org
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20230726231139.never.601-kees@kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/x86/kernel/paravirt.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index ac10b46c5832..23d4d7114473 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -79,6 +79,11 @@ void __init native_pv_lock_init(void)
 		static_branch_disable(&virt_spin_lock_key);
 }
 
+static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+	tlb_remove_page(tlb, table);
+}
+
 unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr,
 			    unsigned int len)
 {
@@ -295,8 +300,7 @@ struct paravirt_patch_template pv_ops = {
 	.mmu.flush_tlb_kernel	= native_flush_tlb_global,
 	.mmu.flush_tlb_one_user	= native_flush_tlb_one_user,
 	.mmu.flush_tlb_multi	= native_flush_tlb_multi,
-	.mmu.tlb_remove_table	=
-			(void (*)(struct mmu_gather *, void *))tlb_remove_page,
+	.mmu.tlb_remove_table	= native_tlb_remove_table,
 
 	.mmu.exit_mmap		= paravirt_nop,
 	.mmu.notify_page_enc_status_changed	= paravirt_nop,

From 9a6913feb46c601e0895fc9f89b715b90a4cbb87 Mon Sep 17 00:00:00 2001
From: Franck LENORMAND <franck.lenormand@nxp.com>
Date: Mon, 24 Jul 2023 08:52:29 +0200
Subject: [PATCH 0737/2047] crypto: caam - Change structure type representing
 DECO MID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The structure partid is not suitable to represent the DECO MID register.

This patch replace partid by masterid which is more appropriate.

Signed-off-by: Franck LENORMAND <franck.lenormand@nxp.com>
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Meenakshi Aggarwal <meenakshi.aggarwal@nxp.com>
Reviewed-by: Gaurav Jain <gaurav.jain@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/regs.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 189e74c21f0c..0f87bd365582 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -459,12 +459,6 @@ struct masterid {
 	u32 liodn_ls;	/* LIODN for non-sequence and seq access */
 };
 
-/* Partition ID for DMA configuration */
-struct partid {
-	u32 rsvd1;
-	u32 pidr;	/* partition ID, DECO */
-};
-
 /* RNGB test mode (replicated twice in some configurations) */
 /* Padded out to 0x100 */
 struct rngtst {
@@ -590,7 +584,7 @@ struct caam_ctrl {
 	u32 deco_rsr;			/* DECORSR - Deco Request Source */
 	u32 rsvd11;
 	u32 deco_rq;			/* DECORR - DECO Request */
-	struct partid deco_mid[5];	/* DECOxLIODNR - 1 per DECO */
+	struct masterid deco_mid[5];	/* DECOxLIODNR - 1 per DECO */
 	u32 rsvd5[22];
 
 	/* DECO Availability/Reset Section			120-3ff */

From 322d74752c28a71fbca3650b98c21c58d25414a8 Mon Sep 17 00:00:00 2001
From: Horia Geanta <horia.geanta@nxp.com>
Date: Mon, 24 Jul 2023 08:52:30 +0200
Subject: [PATCH 0738/2047] crypto: caam - add power management support

Add support for suspend and resume operation for PM in CAAM driver.

When the CAAM goes in suspend, the hardware is considered to do nothing.

On some platforms, the power of the CAAM is not turned off so it keeps
its configuration.
On other platforms, it doesn't so it is necessary to save the state of
the CAAM:
 - JRs MID
 - Address of input and output rings

Signed-off-by: Horia Geanta <horia.geanta@nxp.com>
Signed-off-by: Victoria Milhoan <vicki.milhoan@freescale.com>
Signed-off-by: Dan Douglass <dan.douglass@nxp.com>
Signed-off-by: Vipul Kumar <vipul_kumar@mentor.com>
Signed-off-by: Franck LENORMAND <franck.lenormand@nxp.com>
Signed-off-by: Meenakshi Aggarwal <meenakshi.aggarwal@nxp.com>
Reviewed-by: Gaurav Jain <gaurav.jain@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/ctrl.c   | 106 +++++++++++++++++++
 drivers/crypto/caam/intern.h |  25 ++++-
 drivers/crypto/caam/jr.c     | 193 +++++++++++++++++++++++++++++++----
 drivers/crypto/caam/regs.h   |   3 +-
 4 files changed, 306 insertions(+), 21 deletions(-)

diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index ff9ddbbca377..a7a4583107f4 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -740,6 +740,109 @@ static int caam_ctrl_rng_init(struct device *dev)
 	return 0;
 }
 
+/* Indicate if the internal state of the CAAM is lost during PM */
+static int caam_off_during_pm(void)
+{
+	bool not_off_during_pm = of_machine_is_compatible("fsl,imx6q") ||
+				 of_machine_is_compatible("fsl,imx6qp") ||
+				 of_machine_is_compatible("fsl,imx6dl");
+
+	return not_off_during_pm ? 0 : 1;
+}
+
+static void caam_state_save(struct device *dev)
+{
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(dev);
+	struct caam_ctl_state *state = &ctrlpriv->state;
+	struct caam_ctrl __iomem *ctrl = ctrlpriv->ctrl;
+	u32 deco_inst, jr_inst;
+	int i;
+
+	state->mcr = rd_reg32(&ctrl->mcr);
+	state->scfgr = rd_reg32(&ctrl->scfgr);
+
+	deco_inst = (rd_reg32(&ctrl->perfmon.cha_num_ms) &
+		     CHA_ID_MS_DECO_MASK) >> CHA_ID_MS_DECO_SHIFT;
+	for (i = 0; i < deco_inst; i++) {
+		state->deco_mid[i].liodn_ms =
+			rd_reg32(&ctrl->deco_mid[i].liodn_ms);
+		state->deco_mid[i].liodn_ls =
+			rd_reg32(&ctrl->deco_mid[i].liodn_ls);
+	}
+
+	jr_inst = (rd_reg32(&ctrl->perfmon.cha_num_ms) &
+		   CHA_ID_MS_JR_MASK) >> CHA_ID_MS_JR_SHIFT;
+	for (i = 0; i < jr_inst; i++) {
+		state->jr_mid[i].liodn_ms =
+			rd_reg32(&ctrl->jr_mid[i].liodn_ms);
+		state->jr_mid[i].liodn_ls =
+			rd_reg32(&ctrl->jr_mid[i].liodn_ls);
+	}
+}
+
+static void caam_state_restore(const struct device *dev)
+{
+	const struct caam_drv_private *ctrlpriv = dev_get_drvdata(dev);
+	const struct caam_ctl_state *state = &ctrlpriv->state;
+	struct caam_ctrl __iomem *ctrl = ctrlpriv->ctrl;
+	u32 deco_inst, jr_inst;
+	int i;
+
+	wr_reg32(&ctrl->mcr, state->mcr);
+	wr_reg32(&ctrl->scfgr, state->scfgr);
+
+	deco_inst = (rd_reg32(&ctrl->perfmon.cha_num_ms) &
+		     CHA_ID_MS_DECO_MASK) >> CHA_ID_MS_DECO_SHIFT;
+	for (i = 0; i < deco_inst; i++) {
+		wr_reg32(&ctrl->deco_mid[i].liodn_ms,
+			 state->deco_mid[i].liodn_ms);
+		wr_reg32(&ctrl->deco_mid[i].liodn_ls,
+			 state->deco_mid[i].liodn_ls);
+	}
+
+	jr_inst = (rd_reg32(&ctrl->perfmon.cha_num_ms) &
+		   CHA_ID_MS_JR_MASK) >> CHA_ID_MS_JR_SHIFT;
+	for (i = 0; i < jr_inst; i++) {
+		wr_reg32(&ctrl->jr_mid[i].liodn_ms,
+			 state->jr_mid[i].liodn_ms);
+		wr_reg32(&ctrl->jr_mid[i].liodn_ls,
+			 state->jr_mid[i].liodn_ls);
+	}
+
+	if (ctrlpriv->virt_en == 1)
+		clrsetbits_32(&ctrl->jrstart, 0, JRSTART_JR0_START |
+			      JRSTART_JR1_START | JRSTART_JR2_START |
+			      JRSTART_JR3_START);
+}
+
+static int caam_ctrl_suspend(struct device *dev)
+{
+	const struct caam_drv_private *ctrlpriv = dev_get_drvdata(dev);
+
+	if (ctrlpriv->caam_off_during_pm && !ctrlpriv->optee_en)
+		caam_state_save(dev);
+
+	return 0;
+}
+
+static int caam_ctrl_resume(struct device *dev)
+{
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(dev);
+	int ret = 0;
+
+	if (ctrlpriv->caam_off_during_pm && !ctrlpriv->optee_en) {
+		caam_state_restore(dev);
+
+		/* HW and rng will be reset so deinstantiation can be removed */
+		devm_remove_action(dev, devm_deinstantiate_rng, dev);
+		ret = caam_ctrl_rng_init(dev);
+	}
+
+	return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(caam_ctrl_pm_ops, caam_ctrl_suspend, caam_ctrl_resume);
+
 /* Probe routine for CAAM top (controller) level */
 static int caam_probe(struct platform_device *pdev)
 {
@@ -771,6 +874,8 @@ static int caam_probe(struct platform_device *pdev)
 
 	caam_imx = (bool)imx_soc_match;
 
+	ctrlpriv->caam_off_during_pm = caam_imx && caam_off_during_pm();
+
 	if (imx_soc_match) {
 		/*
 		 * Until Layerscape and i.MX OP-TEE get in sync,
@@ -1033,6 +1138,7 @@ static struct platform_driver caam_driver = {
 	.driver = {
 		.name = "caam",
 		.of_match_table = caam_match,
+		.pm = &caam_ctrl_pm_ops,
 	},
 	.probe       = caam_probe,
 };
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index b4f7bf77f487..e51320150872 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -4,7 +4,7 @@
  * Private/internal definitions between modules
  *
  * Copyright 2008-2011 Freescale Semiconductor, Inc.
- * Copyright 2019 NXP
+ * Copyright 2019, 2023 NXP
  */
 
 #ifndef INTERN_H
@@ -47,6 +47,16 @@ struct caam_jrentry_info {
 	u32 desc_size;	/* Stored size for postprocessing, header derived */
 };
 
+struct caam_jr_state {
+	dma_addr_t inpbusaddr;
+	dma_addr_t outbusaddr;
+};
+
+struct caam_jr_dequeue_params {
+	struct device *dev;
+	int enable_itr;
+};
+
 /* Private sub-storage for a single JobR */
 struct caam_drv_private_jr {
 	struct list_head	list_node;	/* Job Ring device list */
@@ -54,6 +64,7 @@ struct caam_drv_private_jr {
 	int ridx;
 	struct caam_job_ring __iomem *rregs;	/* JobR's register space */
 	struct tasklet_struct irqtask;
+	struct caam_jr_dequeue_params tasklet_params;
 	int irq;			/* One per queue */
 	bool hwrng;
 
@@ -71,6 +82,15 @@ struct caam_drv_private_jr {
 	int tail;			/* entinfo (s/w ring) tail index */
 	void *outring;			/* Base of output ring, DMA-safe */
 	struct crypto_engine *engine;
+
+	struct caam_jr_state state;	/* State of the JR during PM */
+};
+
+struct caam_ctl_state {
+	struct masterid deco_mid[16];
+	struct masterid jr_mid[4];
+	u32 mcr;
+	u32 scfgr;
 };
 
 /*
@@ -116,6 +136,9 @@ struct caam_drv_private {
 	struct dentry *ctl; /* controller dir */
 	struct debugfs_blob_wrapper ctl_kek_wrap, ctl_tkek_wrap, ctl_tdsk_wrap;
 #endif
+
+	int caam_off_during_pm;		/* If the CAAM is reset after suspend */
+	struct caam_ctl_state state;	/* State of the CTL during PM */
 };
 
 #ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 96dea5304d22..316180d26f8a 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -117,6 +117,23 @@ static int caam_jr_flush(struct device *dev)
 	return caam_jr_stop_processing(dev, JRCR_RESET);
 }
 
+/* The resume can be used after a park or a flush if CAAM has not been reset */
+static int caam_jr_restart_processing(struct device *dev)
+{
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
+	u32 halt_status = rd_reg32(&jrp->rregs->jrintstatus) &
+			  JRINT_ERR_HALT_MASK;
+
+	/* Check that the flush/park is completed */
+	if (halt_status != JRINT_ERR_HALT_COMPLETE)
+		return -1;
+
+	/* Resume processing of jobs */
+	clrsetbits_32(&jrp->rregs->jrintstatus, 0, JRINT_ERR_HALT_COMPLETE);
+
+	return 0;
+}
+
 static int caam_reset_hw_jr(struct device *dev)
 {
 	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
@@ -245,7 +262,8 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
 static void caam_jr_dequeue(unsigned long devarg)
 {
 	int hw_idx, sw_idx, i, head, tail;
-	struct device *dev = (struct device *)devarg;
+	struct caam_jr_dequeue_params *params = (void *)devarg;
+	struct device *dev = params->dev;
 	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
 	void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
 	u32 *userdesc, userstatus;
@@ -319,8 +337,9 @@ static void caam_jr_dequeue(unsigned long devarg)
 		outring_used--;
 	}
 
-	/* reenable / unmask IRQs */
-	clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
+	if (params->enable_itr)
+		/* reenable / unmask IRQs */
+		clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
 }
 
 /**
@@ -470,6 +489,29 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
 }
 EXPORT_SYMBOL(caam_jr_enqueue);
 
+static void caam_jr_init_hw(struct device *dev, dma_addr_t inpbusaddr,
+			    dma_addr_t outbusaddr)
+{
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
+
+	wr_reg64(&jrp->rregs->inpring_base, inpbusaddr);
+	wr_reg64(&jrp->rregs->outring_base, outbusaddr);
+	wr_reg32(&jrp->rregs->inpring_size, JOBR_DEPTH);
+	wr_reg32(&jrp->rregs->outring_size, JOBR_DEPTH);
+
+	/* Select interrupt coalescing parameters */
+	clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JOBR_INTC |
+		      (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) |
+		      (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT));
+}
+
+static void caam_jr_reset_index(struct caam_drv_private_jr *jrp)
+{
+	jrp->out_ring_read_index = 0;
+	jrp->head = 0;
+	jrp->tail = 0;
+}
+
 /*
  * Init JobR independent of platform property detection
  */
@@ -506,25 +548,16 @@ static int caam_jr_init(struct device *dev)
 		jrp->entinfo[i].desc_addr_dma = !0;
 
 	/* Setup rings */
-	jrp->out_ring_read_index = 0;
-	jrp->head = 0;
-	jrp->tail = 0;
-
-	wr_reg64(&jrp->rregs->inpring_base, inpbusaddr);
-	wr_reg64(&jrp->rregs->outring_base, outbusaddr);
-	wr_reg32(&jrp->rregs->inpring_size, JOBR_DEPTH);
-	wr_reg32(&jrp->rregs->outring_size, JOBR_DEPTH);
-
+	caam_jr_reset_index(jrp);
 	jrp->inpring_avail = JOBR_DEPTH;
+	caam_jr_init_hw(dev, inpbusaddr, outbusaddr);
 
 	spin_lock_init(&jrp->inplock);
 
-	/* Select interrupt coalescing parameters */
-	clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JOBR_INTC |
-		      (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) |
-		      (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT));
-
-	tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
+	jrp->tasklet_params.dev = dev;
+	jrp->tasklet_params.enable_itr = 1;
+	tasklet_init(&jrp->irqtask, caam_jr_dequeue,
+		     (unsigned long)&jrp->tasklet_params);
 
 	/* Connect job ring interrupt handler. */
 	error = devm_request_irq(dev, jrp->irq, caam_jr_interrupt, IRQF_SHARED,
@@ -635,11 +668,134 @@ static int caam_jr_probe(struct platform_device *pdev)
 
 	atomic_set(&jrpriv->tfm_count, 0);
 
+	device_init_wakeup(&pdev->dev, 1);
+	device_set_wakeup_enable(&pdev->dev, false);
+
 	register_algs(jrpriv, jrdev->parent);
 
 	return 0;
 }
 
+static void caam_jr_get_hw_state(struct device *dev)
+{
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
+
+	jrp->state.inpbusaddr = rd_reg64(&jrp->rregs->inpring_base);
+	jrp->state.outbusaddr = rd_reg64(&jrp->rregs->outring_base);
+}
+
+static int caam_jr_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct caam_drv_private_jr *jrpriv = platform_get_drvdata(pdev);
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(dev->parent);
+	struct caam_jr_dequeue_params suspend_params = {
+		.dev = dev,
+		.enable_itr = 0,
+	};
+
+	/* Remove the node from Physical JobR list maintained by driver */
+	spin_lock(&driver_data.jr_alloc_lock);
+	list_del(&jrpriv->list_node);
+	spin_unlock(&driver_data.jr_alloc_lock);
+
+	if (jrpriv->hwrng)
+		caam_rng_exit(dev->parent);
+
+	if (ctrlpriv->caam_off_during_pm) {
+		int err;
+
+		tasklet_disable(&jrpriv->irqtask);
+
+		/* mask itr to call flush */
+		clrsetbits_32(&jrpriv->rregs->rconfig_lo, 0, JRCFG_IMSK);
+
+		/* Invalid job in process */
+		err = caam_jr_flush(dev);
+		if (err) {
+			dev_err(dev, "Failed to flush\n");
+			return err;
+		}
+
+		/* Dequeing jobs flushed */
+		caam_jr_dequeue((unsigned long)&suspend_params);
+
+		/* Save state */
+		caam_jr_get_hw_state(dev);
+	} else if (device_may_wakeup(&pdev->dev)) {
+		enable_irq_wake(jrpriv->irq);
+	}
+
+	return 0;
+}
+
+static int caam_jr_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct caam_drv_private_jr *jrpriv = platform_get_drvdata(pdev);
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(dev->parent);
+
+	if (ctrlpriv->caam_off_during_pm) {
+		u64 inp_addr;
+		int err;
+
+		/*
+		 * Check if the CAAM has been resetted checking the address of
+		 * the input ring
+		 */
+		inp_addr = rd_reg64(&jrpriv->rregs->inpring_base);
+		if (inp_addr != 0) {
+			/* JR still has some configuration */
+			if (inp_addr == jrpriv->state.inpbusaddr) {
+				/* JR has not been resetted */
+				err = caam_jr_restart_processing(dev);
+				if (err) {
+					dev_err(dev,
+						"Restart processing failed\n");
+					return err;
+				}
+
+				tasklet_enable(&jrpriv->irqtask);
+
+				clrsetbits_32(&jrpriv->rregs->rconfig_lo,
+					      JRCFG_IMSK, 0);
+
+				goto add_jr;
+			} else if (ctrlpriv->optee_en) {
+				/* JR has been used by OPTEE, reset it */
+				err = caam_reset_hw_jr(dev);
+				if (err) {
+					dev_err(dev, "Failed to reset JR\n");
+					return err;
+				}
+			} else {
+				/* No explanation, return error */
+				return -EIO;
+			}
+		}
+
+		caam_jr_reset_index(jrpriv);
+		caam_jr_init_hw(dev, jrpriv->state.inpbusaddr,
+				jrpriv->state.outbusaddr);
+
+		tasklet_enable(&jrpriv->irqtask);
+	} else if (device_may_wakeup(&pdev->dev)) {
+		disable_irq_wake(jrpriv->irq);
+	}
+
+add_jr:
+	spin_lock(&driver_data.jr_alloc_lock);
+	list_add_tail(&jrpriv->list_node, &driver_data.jr_list);
+	spin_unlock(&driver_data.jr_alloc_lock);
+
+	if (jrpriv->hwrng)
+		jrpriv->hwrng = !caam_rng_init(dev->parent);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(caam_jr_pm_ops, caam_jr_suspend, caam_jr_resume);
+
 static const struct of_device_id caam_jr_match[] = {
 	{
 		.compatible = "fsl,sec-v4.0-job-ring",
@@ -655,6 +811,7 @@ static struct platform_driver caam_jr_driver = {
 	.driver = {
 		.name = "caam_jr",
 		.of_match_table = caam_jr_match,
+		.pm = &caam_jr_pm_ops,
 	},
 	.probe       = caam_jr_probe,
 	.remove      = caam_jr_remove,
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 0f87bd365582..873df9de9890 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -584,8 +584,7 @@ struct caam_ctrl {
 	u32 deco_rsr;			/* DECORSR - Deco Request Source */
 	u32 rsvd11;
 	u32 deco_rq;			/* DECORR - DECO Request */
-	struct masterid deco_mid[5];	/* DECOxLIODNR - 1 per DECO */
-	u32 rsvd5[22];
+	struct masterid deco_mid[16];	/* DECOxLIODNR - 1 per DECO */
 
 	/* DECO Availability/Reset Section			120-3ff */
 	u32 deco_avail;		/* DAR - DECO availability */

From 355bf65080399b0c1aba34368331802ce1998aef Mon Sep 17 00:00:00 2001
From: Wang Ming <machel@vivo.com>
Date: Wed, 26 Jul 2023 20:04:41 +0800
Subject: [PATCH 0739/2047] crypto: atmel - Use dev_err_probe instead of
 dev_err

It is possible that dma_request_chan will return EPROBE_DEFER,
which means that dd->dev is not ready yet. In this case,
dev_err(dd->dev), there will be no output. This patch fixes the bug.

Signed-off-by: Wang Ming <machel@vivo.com>
Reviewed-by: Ryan Wanner <Ryan.Wanner@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-sha.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index f2031f934be9..54fec72dfba2 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -2499,8 +2499,8 @@ static int atmel_sha_dma_init(struct atmel_sha_dev *dd)
 {
 	dd->dma_lch_in.chan = dma_request_chan(dd->dev, "tx");
 	if (IS_ERR(dd->dma_lch_in.chan)) {
-		dev_err(dd->dev, "DMA channel is not available\n");
-		return PTR_ERR(dd->dma_lch_in.chan);
+		return dev_err_probe(dd->dev, PTR_ERR(dd->dma_lch_in.chan),
+			"DMA channel is not available\n");
 	}
 
 	dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base +

From 6a4b8aa0a916b39a39175584c07222434fa6c6ef Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 26 Jul 2023 22:53:19 +0100
Subject: [PATCH 0740/2047] crypto: af_alg - Fix missing initialisation
 affecting gcm-aes-s390
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix af_alg_alloc_areq() to initialise areq->first_rsgl.sgl.sgt.sgl to point
to the scatterlist array in areq->first_rsgl.sgl.sgl.

Without this, the gcm-aes-s390 driver will oops when it tries to do
gcm_walk_start() on req->dst because req->dst is set to the value of
areq->first_rsgl.sgl.sgl by _aead_recvmsg() calling
aead_request_set_crypt().

The problem comes if an empty ciphertext is passed: the loop in
af_alg_get_rsgl() just passes straight out and doesn't set areq->first_rsgl
up.

This isn't a problem on x86_64 using gcmaes_crypt_by_sg() because, as far
as I can tell, that ignores req->dst and only uses req->src[*].

[*] Is this a bug in aesni-intel_glue.c?

The s390x oops looks something like:

 Unable to handle kernel pointer dereference in virtual kernel address space
 Failing address: 0000000a00000000 TEID: 0000000a00000803
 Fault in home space mode while using kernel ASCE.
 AS:00000000a43a0007 R3:0000000000000024
 Oops: 003b ilc:2 [#1] SMP
 ...
 Call Trace:
  [<000003ff7fc3d47e>] gcm_walk_start+0x16/0x28 [aes_s390]
  [<00000000a2a342f2>] crypto_aead_decrypt+0x9a/0xb8
  [<00000000a2a60888>] aead_recvmsg+0x478/0x698
  [<00000000a2e519a0>] sock_recvmsg+0x70/0xb0
  [<00000000a2e51a56>] sock_read_iter+0x76/0xa0
  [<00000000a273e066>] vfs_read+0x26e/0x2a8
  [<00000000a273e8c4>] ksys_read+0xbc/0x100
  [<00000000a311d808>] __do_syscall+0x1d0/0x1f8
  [<00000000a312ff30>] system_call+0x70/0x98
 Last Breaking-Event-Address:
  [<000003ff7fc3e6b4>] gcm_aes_crypt+0x104/0xa68 [aes_s390]

Fixes: c1abe6f570af ("crypto: af_alg: Use extract_iter_to_sg() to create scatterlists")
Reported-by: Ondrej Mosnáček <omosnacek@gmail.com>
Link: https://lore.kernel.org/r/CAAUqJDuRkHE8fPgZJGaKjUjd3QfGwzfumuJBmStPqBhubxyk_A@mail.gmail.com/
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Herbert Xu <herbert@gondor.apana.org.au>
cc: Sven Schnelle <svens@linux.ibm.com>
cc: Harald Freudenberger <freude@linux.vnet.ibm.com>
cc: "David S. Miller" <davem@davemloft.net>
cc: Paolo Abeni <pabeni@redhat.com>
cc: linux-crypto@vger.kernel.org
cc: linux-s390@vger.kernel.org
cc: regressions@lists.linux.dev
Tested-by: Sven Schnelle <svens@linux.ibm.com>
Tested-by: Ondrej Mosnáček <omosnacek@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/af_alg.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 06b15b9f661c..9ee8575d3b1a 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -1192,6 +1192,7 @@ struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
 
 	areq->areqlen = areqlen;
 	areq->sk = sk;
+	areq->first_rsgl.sgl.sgt.sgl = areq->first_rsgl.sgl.sgl;
 	areq->last_rsgl = NULL;
 	INIT_LIST_HEAD(&areq->rsgl_list);
 	areq->tsgl = NULL;

From 0788257aeebee9b8413c1c29e7d2029329b1a82e Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 28 Jul 2023 07:48:27 -0600
Subject: [PATCH 0741/2047] hwrng: Explicitly include correct DT includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it was merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/atmel-rng.c     | 2 +-
 drivers/char/hw_random/bcm2835-rng.c   | 3 +--
 drivers/char/hw_random/ingenic-trng.c  | 2 +-
 drivers/char/hw_random/iproc-rng200.c  | 3 +--
 drivers/char/hw_random/npcm-rng.c      | 3 +--
 drivers/char/hw_random/omap-rng.c      | 2 --
 drivers/char/hw_random/omap3-rom-rng.c | 1 -
 drivers/char/hw_random/pasemi-rng.c    | 3 +--
 drivers/char/hw_random/pic32-rng.c     | 5 ++---
 drivers/char/hw_random/stm32-rng.c     | 3 ++-
 drivers/char/hw_random/xgene-rng.c     | 5 ++---
 drivers/char/hw_random/xiphera-trng.c  | 1 -
 12 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index b8effe77d80f..a37367ebcbac 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -15,7 +15,7 @@
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/hw_random.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c
index e98fcac578d6..e19b0f9f48b9 100644
--- a/drivers/char/hw_random/bcm2835-rng.c
+++ b/drivers/char/hw_random/bcm2835-rng.c
@@ -8,8 +8,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 #include <linux/clk.h>
diff --git a/drivers/char/hw_random/ingenic-trng.c b/drivers/char/hw_random/ingenic-trng.c
index 3967a8dbe967..1672320e7d3d 100644
--- a/drivers/char/hw_random/ingenic-trng.c
+++ b/drivers/char/hw_random/ingenic-trng.c
@@ -11,8 +11,8 @@
 #include <linux/hw_random.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
diff --git a/drivers/char/hw_random/iproc-rng200.c b/drivers/char/hw_random/iproc-rng200.c
index 06bc060534d8..34df3f0d3e45 100644
--- a/drivers/char/hw_random/iproc-rng200.c
+++ b/drivers/char/hw_random/iproc-rng200.c
@@ -12,8 +12,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 
diff --git a/drivers/char/hw_random/npcm-rng.c b/drivers/char/hw_random/npcm-rng.c
index 9903d0357e06..8a304b754217 100644
--- a/drivers/char/hw_random/npcm-rng.c
+++ b/drivers/char/hw_random/npcm-rng.c
@@ -8,12 +8,11 @@
 #include <linux/init.h>
 #include <linux/random.h>
 #include <linux/err.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/hw_random.h>
 #include <linux/delay.h>
-#include <linux/of_irq.h>
 #include <linux/pm_runtime.h>
-#include <linux/of_device.h>
 
 #define NPCM_RNGCS_REG		0x00	/* Control and status register */
 #define NPCM_RNGD_REG		0x04	/* Data register */
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index 00ff96703dd2..be03f76a2a80 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -26,8 +26,6 @@
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
 #include <linux/interrupt.h>
 #include <linux/clk.h>
 #include <linux/io.h>
diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c
index f06e4f95114f..18dc46b1b58e 100644
--- a/drivers/char/hw_random/omap3-rom-rng.c
+++ b/drivers/char/hw_random/omap3-rom-rng.c
@@ -20,7 +20,6 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
diff --git a/drivers/char/hw_random/pasemi-rng.c b/drivers/char/hw_random/pasemi-rng.c
index 2498d4ef9fe2..6959d6edd44c 100644
--- a/drivers/char/hw_random/pasemi-rng.c
+++ b/drivers/char/hw_random/pasemi-rng.c
@@ -9,11 +9,10 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/hw_random.h>
 #include <linux/delay.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/io.h>
 
 #define SDCRNG_CTL_REG			0x00
diff --git a/drivers/char/hw_random/pic32-rng.c b/drivers/char/hw_random/pic32-rng.c
index 1902f4389a3f..888e6f5cec1f 100644
--- a/drivers/char/hw_random/pic32-rng.c
+++ b/drivers/char/hw_random/pic32-rng.c
@@ -12,9 +12,8 @@
 #include <linux/hw_random.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -110,7 +109,7 @@ static struct platform_driver pic32_rng_driver = {
 	.probe		= pic32_rng_probe,
 	.driver		= {
 		.name	= "pic32-rng",
-		.of_match_table = of_match_ptr(pic32_rng_of_match),
+		.of_match_table = pic32_rng_of_match,
 	},
 };
 
diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c
index a6731cf0627a..efb6a9f9a11b 100644
--- a/drivers/char/hw_random/stm32-rng.c
+++ b/drivers/char/hw_random/stm32-rng.c
@@ -10,8 +10,9 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
 #include <linux/slab.h>
diff --git a/drivers/char/hw_random/xgene-rng.c b/drivers/char/hw_random/xgene-rng.c
index 7c8f3cb7c6af..c25bb169563d 100644
--- a/drivers/char/hw_random/xgene-rng.c
+++ b/drivers/char/hw_random/xgene-rng.c
@@ -15,9 +15,8 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
-#include <linux/of_irq.h>
-#include <linux/of_address.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 #include <linux/timer.h>
 
 #define RNG_MAX_DATUM			4
diff --git a/drivers/char/hw_random/xiphera-trng.c b/drivers/char/hw_random/xiphera-trng.c
index 2a9fea72b2e0..2c586d1fe8a9 100644
--- a/drivers/char/hw_random/xiphera-trng.c
+++ b/drivers/char/hw_random/xiphera-trng.c
@@ -7,7 +7,6 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/hw_random.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 

From 1ce1cd8208ad6060e4fcf6e09068c8954687c127 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 28 Jul 2023 13:50:15 -0600
Subject: [PATCH 0742/2047] hwrng: Enable COMPILE_TEST for more drivers

There's quite a few hwrng drivers which are easily enabled for
COMPILE_TEST, so let's enable them.

The dependency on HW_RANDOM is redundant, so drop that while we're here.

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/Kconfig       | 32 +++++++++++++++-------------
 drivers/char/hw_random/amd-rng.c     |  1 +
 drivers/char/hw_random/ingenic-rng.c |  2 +-
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 5d72b8da3c36..8de74dcfa18c 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -37,7 +37,7 @@ config HW_RANDOM_TIMERIOMEM
 
 config HW_RANDOM_INTEL
 	tristate "Intel HW Random Number Generator support"
-	depends on (X86 || IA64) && PCI
+	depends on (X86 || IA64 || COMPILE_TEST) && PCI
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number
@@ -50,7 +50,8 @@ config HW_RANDOM_INTEL
 
 config HW_RANDOM_AMD
 	tristate "AMD HW Random Number Generator support"
-	depends on (X86 || PPC_MAPLE) && PCI
+	depends on (X86 || PPC_MAPLE || COMPILE_TEST)
+	depends on PCI && HAS_IOPORT_MAP
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number
@@ -63,7 +64,7 @@ config HW_RANDOM_AMD
 
 config HW_RANDOM_ATMEL
 	tristate "Atmel Random Number Generator support"
-	depends on (ARCH_AT91 || COMPILE_TEST) && HAVE_CLK && OF
+	depends on (ARCH_AT91 || COMPILE_TEST)
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number
@@ -113,7 +114,8 @@ config HW_RANDOM_IPROC_RNG200
 
 config HW_RANDOM_GEODE
 	tristate "AMD Geode HW Random Number Generator support"
-	depends on X86_32 && PCI
+	depends on (X86_32 || COMPILE_TEST)
+	depends on PCI
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number
@@ -205,7 +207,7 @@ config HW_RANDOM_OCTEON
 
 config HW_RANDOM_PASEMI
 	tristate "PA Semi HW Random Number Generator support"
-	depends on PPC_PASEMI
+	depends on PPC_PASEMI || (PPC && COMPILE_TEST)
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number
@@ -228,7 +230,7 @@ config HW_RANDOM_VIRTIO
 
 config HW_RANDOM_MXC_RNGA
 	tristate "Freescale i.MX RNGA Random Number Generator"
-	depends on SOC_IMX31
+	depends on SOC_IMX31 || COMPILE_TEST
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number
@@ -241,7 +243,7 @@ config HW_RANDOM_MXC_RNGA
 
 config HW_RANDOM_IMX_RNGC
 	tristate "Freescale i.MX RNGC Random Number Generator"
-	depends on HAS_IOMEM && HAVE_CLK
+	depends on HAS_IOMEM
 	depends on SOC_IMX25 || SOC_IMX6SL || SOC_IMX6SLL || SOC_IMX6UL || COMPILE_TEST
 	default HW_RANDOM
 	help
@@ -256,8 +258,7 @@ config HW_RANDOM_IMX_RNGC
 
 config HW_RANDOM_INGENIC_RNG
 	tristate "Ingenic Random Number Generator support"
-	depends on HW_RANDOM
-	depends on MACH_JZ4780 || MACH_X1000
+	depends on MACH_JZ4780 || MACH_X1000 || COMPILE_TEST
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number Generator
@@ -323,7 +324,7 @@ config HW_RANDOM_POWERNV
 
 config HW_RANDOM_HISI
 	tristate "Hisilicon Random Number Generator support"
-	depends on HW_RANDOM && ARCH_HISI
+	depends on ARCH_HISI || COMPILE_TEST
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number
@@ -347,7 +348,7 @@ config HW_RANDOM_HISTB
 
 config HW_RANDOM_ST
 	tristate "ST Microelectronics HW Random Number Generator support"
-	depends on HW_RANDOM && (ARCH_STI || COMPILE_TEST)
+	depends on ARCH_STI || COMPILE_TEST
 	help
 	  This driver provides kernel-side support for the Random Number
 	  Generator hardware found on STi series of SoCs.
@@ -357,7 +358,7 @@ config HW_RANDOM_ST
 
 config HW_RANDOM_XGENE
 	tristate "APM X-Gene True Random Number Generator (TRNG) support"
-	depends on HW_RANDOM && ARCH_XGENE
+	depends on ARCH_XGENE || COMPILE_TEST
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number
@@ -370,7 +371,7 @@ config HW_RANDOM_XGENE
 
 config HW_RANDOM_STM32
 	tristate "STMicroelectronics STM32 random number generator"
-	depends on HW_RANDOM && (ARCH_STM32 || COMPILE_TEST)
+	depends on ARCH_STM32 || COMPILE_TEST
 	depends on HAS_IOMEM
 	default HW_RANDOM
 	help
@@ -385,7 +386,7 @@ config HW_RANDOM_STM32
 config HW_RANDOM_PIC32
 	tristate "Microchip PIC32 Random Number Generator support"
 	depends on MACH_PIC32 || COMPILE_TEST
-	default y
+	default HW_RANDOM if MACH_PIC32
 	help
 	  This driver provides kernel-side support for the Random Number
 	  Generator hardware found on a PIC32.
@@ -424,7 +425,8 @@ config HW_RANDOM_MESON
 
 config HW_RANDOM_CAVIUM
 	tristate "Cavium ThunderX Random Number Generator support"
-	depends on HW_RANDOM && PCI && ARCH_THUNDER
+	depends on PCI
+	depends on ARCH_THUNDER || (ARM64 && COMPILE_TEST)
 	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number
diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c
index 0555e3838bce..86162a13681e 100644
--- a/drivers/char/hw_random/amd-rng.c
+++ b/drivers/char/hw_random/amd-rng.c
@@ -26,6 +26,7 @@
 
 #include <linux/delay.h>
 #include <linux/hw_random.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
diff --git a/drivers/char/hw_random/ingenic-rng.c b/drivers/char/hw_random/ingenic-rng.c
index 055cfe59f519..4f18c3fa5427 100644
--- a/drivers/char/hw_random/ingenic-rng.c
+++ b/drivers/char/hw_random/ingenic-rng.c
@@ -95,7 +95,7 @@ static int ingenic_rng_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->base);
 	}
 
-	priv->version = (enum ingenic_rng_version)of_device_get_match_data(&pdev->dev);
+	priv->version = (enum ingenic_rng_version)(uintptr_t)of_device_get_match_data(&pdev->dev);
 
 	priv->rng.name = pdev->name;
 	priv->rng.init = ingenic_rng_init;

From ccbe77f7e45dfb4420f7f531b650c00c6e9c7507 Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Fri, 4 Aug 2023 13:33:12 +0800
Subject: [PATCH 0743/2047] autofs: fix memory leak of waitqueues in
 autofs_catatonic_mode

Syzkaller reports a memory leak:

BUG: memory leak
unreferenced object 0xffff88810b279e00 (size 96):
  comm "syz-executor399", pid 3631, jiffies 4294964921 (age 23.870s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 08 9e 27 0b 81 88 ff ff  ..........'.....
    08 9e 27 0b 81 88 ff ff 00 00 00 00 00 00 00 00  ..'.............
  backtrace:
    [<ffffffff814cfc90>] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046
    [<ffffffff81bb75ca>] kmalloc include/linux/slab.h:576 [inline]
    [<ffffffff81bb75ca>] autofs_wait+0x3fa/0x9a0 fs/autofs/waitq.c:378
    [<ffffffff81bb88a7>] autofs_do_expire_multi+0xa7/0x3e0 fs/autofs/expire.c:593
    [<ffffffff81bb8c33>] autofs_expire_multi+0x53/0x80 fs/autofs/expire.c:619
    [<ffffffff81bb6972>] autofs_root_ioctl_unlocked+0x322/0x3b0 fs/autofs/root.c:897
    [<ffffffff81bb6a95>] autofs_root_ioctl+0x25/0x30 fs/autofs/root.c:910
    [<ffffffff81602a9c>] vfs_ioctl fs/ioctl.c:51 [inline]
    [<ffffffff81602a9c>] __do_sys_ioctl fs/ioctl.c:870 [inline]
    [<ffffffff81602a9c>] __se_sys_ioctl fs/ioctl.c:856 [inline]
    [<ffffffff81602a9c>] __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:856
    [<ffffffff84608225>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
    [<ffffffff84608225>] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
    [<ffffffff84800087>] entry_SYSCALL_64_after_hwframe+0x63/0xcd

autofs_wait_queue structs should be freed if their wait_ctr becomes zero.
Otherwise they will be lost.

In this case an AUTOFS_IOC_EXPIRE_MULTI ioctl is done, then a new
waitqueue struct is allocated in autofs_wait(), its initial wait_ctr
equals 2. After that wait_event_killable() is interrupted (it returns
-ERESTARTSYS), so that 'wq->name.name == NULL' condition may be not
satisfied. Actually, this condition can be satisfied when
autofs_wait_release() or autofs_catatonic_mode() is called and, what is
also important, wait_ctr is decremented in those places. Upon the exit of
autofs_wait(), wait_ctr is decremented to 1. Then the unmounting process
begins: kill_sb calls autofs_catatonic_mode(), which should have freed the
waitqueues, but it only decrements its usage counter to zero which is not
a correct behaviour.

edit:imk
This description is of course not correct. The umount performed as a result
of an expire is a umount of a mount that has been automounted, it's not the
autofs mount itself. They happen independently, usually after everything
mounted within the autofs file system has been expired away. If everything
hasn't been expired away the automount daemon can still exit leaving mounts
in place. But expires done in both cases will result in a notification that
calls autofs_wait_release() with a result status. The problem case is the
summary execution of of the automount daemon. In this case any waiting
processes won't be woken up until either they are terminated or the mount
is umounted.
end edit: imk

So in catatonic mode we should free waitqueues which counter becomes zero.

edit: imk
Initially I was concerned that the calling of autofs_wait_release() and
autofs_catatonic_mode() was not mutually exclusive but that can't be the
case (obviously) because the queue entry (or entries) is removed from the
list when either of these two functions are called. Consequently the wait
entry will be freed by only one of these functions or by the woken process
in autofs_wait() depending on the order of the calls.
end edit: imk

Reported-by: syzbot+5e53f70e69ff0c0a1c0c@syzkaller.appspotmail.com
Suggested-by: Takeshi Misawa <jeliantsurux@gmail.com>
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: Ian Kent <raven@themaw.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Andrei Vagin <avagin@gmail.com>
Cc: autofs@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Message-Id: <169112719161.7590.6700123246297365841.stgit@donald.themaw.net>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/autofs/waitq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 54c1f8b8b075..efdc76732fae 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -32,8 +32,9 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
 		wq->status = -ENOENT; /* Magic is gone - report failure */
 		kfree(wq->name.name - wq->offset);
 		wq->name.name = NULL;
-		wq->wait_ctr--;
 		wake_up_interruptible(&wq->queue);
+		if (!--wq->wait_ctr)
+			kfree(wq);
 		wq = nwq;
 	}
 	fput(sbi->pipe);	/* Close the pipe */

From 17fce12e7c0a53f0bed26af231a2a98a34d34c60 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Fri, 4 Aug 2023 13:33:18 +0800
Subject: [PATCH 0744/2047] autofs: use wake_up() instead of
 wake_up_interruptible(()

In autofs_wait_release() wake_up() is used to wake up processes waiting
on a mount callback to complete which matches the wait_event_killable()
in autofs_wait().

But in autofs_catatonic_mode() the wake_up_interruptible() was not also
changed at the time autofs_wait_release() was changed.

Signed-off-by: Ian Kent <raven@themaw.net>
Message-Id: <169112719813.7590.4971499386839952992.stgit@donald.themaw.net>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/autofs/waitq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index efdc76732fae..33dd4660d82f 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -32,7 +32,7 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
 		wq->status = -ENOENT; /* Magic is gone - report failure */
 		kfree(wq->name.name - wq->offset);
 		wq->name.name = NULL;
-		wake_up_interruptible(&wq->queue);
+		wake_up(&wq->queue);
 		if (!--wq->wait_ctr)
 			kfree(wq);
 		wq = nwq;

From 0d2b6a1b8515204924b9174ae0135e1f4ff29b21 Mon Sep 17 00:00:00 2001
From: William Qiu <william.qiu@starfivetech.com>
Date: Fri, 4 Aug 2023 10:02:52 +0800
Subject: [PATCH 0745/2047] dt-bindings: qspi: cdns,qspi-nor: Add clocks for
 StarFive JH7110 SoC

The QSPI controller needs three clock items to work properly on StarFive
JH7110 SoC, so there is need to change the maxItems's value to 3.

Signed-off-by: William Qiu <william.qiu@starfivetech.com>
Reviewed-by: Hal Feng <hal.feng@starfivetech.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20230804020254.291239-2-william.qiu@starfivetech.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/spi/cdns,qspi-nor.yaml       | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml b/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml
index 4f15f9a0cc34..cca81f89e252 100644
--- a/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml
+++ b/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml
@@ -86,7 +86,17 @@ properties:
     maxItems: 1
 
   clocks:
-    maxItems: 1
+    minItems: 1
+    maxItems: 3
+
+  clock-names:
+    oneOf:
+      - items:
+          - const: ref
+      - items:
+          - const: ref
+          - const: ahb
+          - const: apb
 
   cdns,fifo-depth:
     description:

From 33f1ef6d4eb6bca726608ed939c9fd94d96ceefd Mon Sep 17 00:00:00 2001
From: William Qiu <william.qiu@starfivetech.com>
Date: Fri, 4 Aug 2023 10:02:53 +0800
Subject: [PATCH 0746/2047] spi: cadence-quadspi: Add clock configuration for
 StarFive JH7110 QSPI

Add JH7110's clock initialization code to the driver.

Signed-off-by: William Qiu <william.qiu@starfivetech.com>
Reviewed-by: Hal Feng <hal.feng@starfivetech.com>
Link: https://lore.kernel.org/r/20230804020254.291239-3-william.qiu@starfivetech.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cadence-quadspi.c | 67 +++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index e1a8cf08ef66..6064b8e0319e 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -46,6 +46,12 @@
 
 #define CQSPI_OP_WIDTH(part) ((part).nbytes ? ilog2((part).buswidth) : 0)
 
+enum {
+	CLK_QSPI_APB = 0,
+	CLK_QSPI_AHB,
+	CLK_QSPI_NUM,
+};
+
 struct cqspi_st;
 
 struct cqspi_flash_pdata {
@@ -63,6 +69,7 @@ struct cqspi_st {
 	struct platform_device	*pdev;
 	struct spi_master	*master;
 	struct clk		*clk;
+	struct clk		*clks[CLK_QSPI_NUM];
 	unsigned int		sclk;
 
 	void __iomem		*iobase;
@@ -91,6 +98,8 @@ struct cqspi_st {
 	bool			wr_completion;
 	bool			slow_sram;
 	bool			apb_ahb_hazard;
+
+	bool			is_jh7110; /* Flag for StarFive JH7110 SoC */
 };
 
 struct cqspi_driver_platdata {
@@ -99,6 +108,8 @@ struct cqspi_driver_platdata {
 	int (*indirect_read_dma)(struct cqspi_flash_pdata *f_pdata,
 				 u_char *rxbuf, loff_t from_addr, size_t n_rx);
 	u32 (*get_dma_status)(struct cqspi_st *cqspi);
+	int (*jh7110_clk_init)(struct platform_device *pdev,
+			       struct cqspi_st *cqspi);
 };
 
 /* Operation timeout value */
@@ -1629,6 +1640,51 @@ static int cqspi_setup_flash(struct cqspi_st *cqspi)
 	return 0;
 }
 
+static int cqspi_jh7110_clk_init(struct platform_device *pdev, struct cqspi_st *cqspi)
+{
+	static struct clk_bulk_data qspiclk[] = {
+		{ .id = "apb" },
+		{ .id = "ahb" },
+	};
+
+	int ret = 0;
+
+	ret = devm_clk_bulk_get(&pdev->dev, ARRAY_SIZE(qspiclk), qspiclk);
+	if (ret) {
+		dev_err(&pdev->dev, "%s: failed to get qspi clocks\n", __func__);
+		return ret;
+	}
+
+	cqspi->clks[CLK_QSPI_APB] = qspiclk[0].clk;
+	cqspi->clks[CLK_QSPI_AHB] = qspiclk[1].clk;
+
+	ret = clk_prepare_enable(cqspi->clks[CLK_QSPI_APB]);
+	if (ret) {
+		dev_err(&pdev->dev, "%s: failed to enable CLK_QSPI_APB\n", __func__);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(cqspi->clks[CLK_QSPI_AHB]);
+	if (ret) {
+		dev_err(&pdev->dev, "%s: failed to enable CLK_QSPI_AHB\n", __func__);
+		goto disable_apb_clk;
+	}
+
+	cqspi->is_jh7110 = true;
+
+	return 0;
+
+disable_apb_clk:
+	clk_disable_unprepare(cqspi->clks[CLK_QSPI_APB]);
+
+	return ret;
+}
+
+static void cqspi_jh7110_disable_clk(struct platform_device *pdev, struct cqspi_st *cqspi)
+{
+	clk_disable_unprepare(cqspi->clks[CLK_QSPI_AHB]);
+	clk_disable_unprepare(cqspi->clks[CLK_QSPI_APB]);
+}
 static int cqspi_probe(struct platform_device *pdev)
 {
 	const struct cqspi_driver_platdata *ddata;
@@ -1654,6 +1710,7 @@ static int cqspi_probe(struct platform_device *pdev)
 
 	cqspi->pdev = pdev;
 	cqspi->master = master;
+	cqspi->is_jh7110 = false;
 	platform_set_drvdata(pdev, cqspi);
 
 	/* Obtain configuration from OF. */
@@ -1765,6 +1822,12 @@ static int cqspi_probe(struct platform_device *pdev)
 		if (ddata->quirks & CQSPI_NEEDS_APB_AHB_HAZARD_WAR)
 			cqspi->apb_ahb_hazard = true;
 
+		if (ddata->jh7110_clk_init) {
+			ret = cqspi_jh7110_clk_init(pdev, cqspi);
+			if (ret)
+				goto probe_clk_failed;
+		}
+
 		if (of_device_is_compatible(pdev->dev.of_node,
 					    "xlnx,versal-ospi-1.0")) {
 			ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
@@ -1829,6 +1892,9 @@ static void cqspi_remove(struct platform_device *pdev)
 
 	clk_disable_unprepare(cqspi->clk);
 
+	if (cqspi->is_jh7110)
+		cqspi_jh7110_disable_clk(pdev, cqspi);
+
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 }
@@ -1896,6 +1962,7 @@ static const struct cqspi_driver_platdata versal_ospi = {
 
 static const struct cqspi_driver_platdata jh7110_qspi = {
 	.quirks = CQSPI_DISABLE_DAC_MODE,
+	.jh7110_clk_init = cqspi_jh7110_clk_init,
 };
 
 static const struct cqspi_driver_platdata pensando_cdns_qspi = {

From ce85a1e04645b1ed386b074297df27ab5b8801c0 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Fri, 4 Aug 2023 08:20:57 -0700
Subject: [PATCH 0747/2047] xfs: stabilize fs summary counters for online fsck

If the fscounters scrubber notices incorrect summary counters, it's
entirely possible that scrub is simply racing with other threads that
are updating the incore counters.  There isn't a good way to stabilize
percpu counters or set ourselves up to observe live updates with hooks
like we do for the quotacheck or nlinks scanners, so we instead choose
to freeze the filesystem long enough to walk the incore per-AG
structures.

Past me thought that it was going to be commonplace to have to freeze
the filesystem to perform some kind of repair and set up a whole
separate infrastructure to freeze the filesystem in such a way that
userspace could not unfreeze while we were running.  This involved
adding a mutex and freeze_super/thaw_super functions and dealing with
the fact that the VFS freeze/thaw functions can free the VFS superblock
references on return.

This was all very overwrought, since fscounters turned out to be the
only user of scrub freezes, and it doesn't require the log to quiesce,
only the incore superblock counters.  We prevent other threads from
changing the freeze level by calling freeze_super_excl with a custom
freeze cookie to keep everyone else out of the filesystem.

The end result is that fscounters should be much more efficient.  When
we're checking a busy system and we can't stabilize the counters, the
custom freeze will do less work, which should result in less downtime.
Repair should be similarly speedy, but that's in a later patch.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/fscounters.c | 188 ++++++++++++++++++++++++++++++--------
 fs/xfs/scrub/scrub.c      |   6 +-
 fs/xfs/scrub/scrub.h      |   1 +
 fs/xfs/scrub/trace.h      |  26 ++++++
 4 files changed, 183 insertions(+), 38 deletions(-)

diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index e382a35e98d8..05be757668bb 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2019-2023 Oracle.  All Rights Reserved.
  * Author: Darrick J. Wong <djwong@kernel.org>
@@ -8,6 +8,8 @@
 #include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
 #include "xfs_mount.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
@@ -16,6 +18,7 @@
 #include "xfs_ag.h"
 #include "xfs_rtalloc.h"
 #include "xfs_inode.h"
+#include "xfs_icache.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
@@ -53,6 +56,7 @@ struct xchk_fscounters {
 	uint64_t		frextents;
 	unsigned long long	icount_min;
 	unsigned long long	icount_max;
+	bool			frozen;
 };
 
 /*
@@ -123,6 +127,82 @@ xchk_fscount_warmup(
 	return error;
 }
 
+static inline int
+xchk_fsfreeze(
+	struct xfs_scrub	*sc)
+{
+	int			error;
+
+	error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
+	trace_xchk_fsfreeze(sc, error);
+	return error;
+}
+
+static inline int
+xchk_fsthaw(
+	struct xfs_scrub	*sc)
+{
+	int			error;
+
+	/* This should always succeed, we have a kernel freeze */
+	error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
+	trace_xchk_fsthaw(sc, error);
+	return error;
+}
+
+/*
+ * We couldn't stabilize the filesystem long enough to sample all the variables
+ * that comprise the summary counters and compare them to the percpu counters.
+ * We need to disable all writer threads, which means taking the first two
+ * freeze levels to put userspace to sleep, and the third freeze level to
+ * prevent background threads from starting new transactions.  Take one level
+ * more to prevent other callers from unfreezing the filesystem while we run.
+ */
+STATIC int
+xchk_fscounters_freeze(
+	struct xfs_scrub	*sc)
+{
+	struct xchk_fscounters	*fsc = sc->buf;
+	int			error = 0;
+
+	if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
+		sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
+		mnt_drop_write_file(sc->file);
+	}
+
+	/* Try to grab a kernel freeze. */
+	while ((error = xchk_fsfreeze(sc)) == -EBUSY) {
+		if (xchk_should_terminate(sc, &error))
+			return error;
+
+		delay(HZ / 10);
+	}
+	if (error)
+		return error;
+
+	fsc->frozen = true;
+	return 0;
+}
+
+/* Thaw the filesystem after checking or repairing fscounters. */
+STATIC void
+xchk_fscounters_cleanup(
+	void			*buf)
+{
+	struct xchk_fscounters	*fsc = buf;
+	struct xfs_scrub	*sc = fsc->sc;
+	int			error;
+
+	if (!fsc->frozen)
+		return;
+
+	error = xchk_fsthaw(sc);
+	if (error)
+		xfs_emerg(sc->mp, "still frozen after scrub, err=%d", error);
+	else
+		fsc->frozen = false;
+}
+
 int
 xchk_setup_fscounters(
 	struct xfs_scrub	*sc)
@@ -140,6 +220,7 @@ xchk_setup_fscounters(
 	sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS);
 	if (!sc->buf)
 		return -ENOMEM;
+	sc->buf_cleanup = xchk_fscounters_cleanup;
 	fsc = sc->buf;
 	fsc->sc = sc;
 
@@ -150,7 +231,18 @@ xchk_setup_fscounters(
 	if (error)
 		return error;
 
-	return xchk_trans_alloc(sc, 0);
+	/*
+	 * Pause all writer activity in the filesystem while we're scrubbing to
+	 * reduce the likelihood of background perturbations to the counters
+	 * throwing off our calculations.
+	 */
+	if (sc->flags & XCHK_TRY_HARDER) {
+		error = xchk_fscounters_freeze(sc);
+		if (error)
+			return error;
+	}
+
+	return xfs_trans_alloc_empty(sc->mp, &sc->tp);
 }
 
 /*
@@ -290,8 +382,7 @@ retry:
 	if (fsc->ifree > fsc->icount) {
 		if (tries--)
 			goto retry;
-		xchk_set_incomplete(sc);
-		return 0;
+		return -EDEADLOCK;
 	}
 
 	return 0;
@@ -367,6 +458,8 @@ xchk_fscount_count_frextents(
  * Otherwise, we /might/ have a problem.  If the change in the summations is
  * more than we want to tolerate, the filesystem is probably busy and we should
  * just send back INCOMPLETE and see if userspace will try again.
+ *
+ * If we're repairing then we require an exact match.
  */
 static inline bool
 xchk_fscount_within_range(
@@ -396,21 +489,7 @@ xchk_fscount_within_range(
 	if (expected >= min_value && expected <= max_value)
 		return true;
 
-	/*
-	 * If the difference between the two summations is too large, the fs
-	 * might just be busy and so we'll mark the scrub incomplete.  Return
-	 * true here so that we don't mark the counter corrupt.
-	 *
-	 * XXX: In the future when userspace can grant scrub permission to
-	 * quiesce the filesystem to solve the outsized variance problem, this
-	 * check should be moved up and the return code changed to signal to
-	 * userspace that we need quiesce permission.
-	 */
-	if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) {
-		xchk_set_incomplete(sc);
-		return true;
-	}
-
+	/* Everything else is bad. */
 	return false;
 }
 
@@ -422,6 +501,7 @@ xchk_fscounters(
 	struct xfs_mount	*mp = sc->mp;
 	struct xchk_fscounters	*fsc = sc->buf;
 	int64_t			icount, ifree, fdblocks, frextents;
+	bool			try_again = false;
 	int			error;
 
 	/* Snapshot the percpu counters. */
@@ -431,9 +511,26 @@ xchk_fscounters(
 	frextents = percpu_counter_sum(&mp->m_frextents);
 
 	/* No negative values, please! */
-	if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0)
+	if (icount < 0 || ifree < 0)
 		xchk_set_corrupt(sc);
 
+	/*
+	 * If the filesystem is not frozen, the counter summation calls above
+	 * can race with xfs_mod_freecounter, which subtracts a requested space
+	 * reservation from the counter and undoes the subtraction if that made
+	 * the counter go negative.  Therefore, it's possible to see negative
+	 * values here, and we should only flag that as a corruption if we
+	 * froze the fs.  This is much more likely to happen with frextents
+	 * since there are no reserved pools.
+	 */
+	if (fdblocks < 0 || frextents < 0) {
+		if (!fsc->frozen)
+			return -EDEADLOCK;
+
+		xchk_set_corrupt(sc);
+		return 0;
+	}
+
 	/* See if icount is obviously wrong. */
 	if (icount < fsc->icount_min || icount > fsc->icount_max)
 		xchk_set_corrupt(sc);
@@ -446,12 +543,6 @@ xchk_fscounters(
 	if (frextents > mp->m_sb.sb_rextents)
 		xchk_set_corrupt(sc);
 
-	/*
-	 * XXX: We can't quiesce percpu counter updates, so exit early.
-	 * This can be re-enabled when we gain exclusive freeze functionality.
-	 */
-	return 0;
-
 	/*
 	 * If ifree exceeds icount by more than the minimum variance then
 	 * something's probably wrong with the counters.
@@ -463,8 +554,6 @@ xchk_fscounters(
 	error = xchk_fscount_aggregate_agcounts(sc, fsc);
 	if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
 		return error;
-	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
-		return 0;
 
 	/* Count the free extents counter for rt volumes. */
 	error = xchk_fscount_count_frextents(sc, fsc);
@@ -473,20 +562,45 @@ xchk_fscounters(
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
 		return 0;
 
-	/* Compare the in-core counters with whatever we counted. */
-	if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
-		xchk_set_corrupt(sc);
+	/*
+	 * Compare the in-core counters with whatever we counted.  If the fs is
+	 * frozen, we treat the discrepancy as a corruption because the freeze
+	 * should have stabilized the counter values.  Otherwise, we need
+	 * userspace to call us back having granted us freeze permission.
+	 */
+	if (!xchk_fscount_within_range(sc, icount, &mp->m_icount,
+				fsc->icount)) {
+		if (fsc->frozen)
+			xchk_set_corrupt(sc);
+		else
+			try_again = true;
+	}
 
-	if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree))
-		xchk_set_corrupt(sc);
+	if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) {
+		if (fsc->frozen)
+			xchk_set_corrupt(sc);
+		else
+			try_again = true;
+	}
 
 	if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
-			fsc->fdblocks))
-		xchk_set_corrupt(sc);
+			fsc->fdblocks)) {
+		if (fsc->frozen)
+			xchk_set_corrupt(sc);
+		else
+			try_again = true;
+	}
 
 	if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
-			fsc->frextents))
-		xchk_set_corrupt(sc);
+			fsc->frextents)) {
+		if (fsc->frozen)
+			xchk_set_corrupt(sc);
+		else
+			try_again = true;
+	}
+
+	if (try_again)
+		return -EDEADLOCK;
 
 	return 0;
 }
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 3d98f604765e..a0fffbcd022b 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -184,8 +184,10 @@ xchk_teardown(
 			xchk_irele(sc, sc->ip);
 		sc->ip = NULL;
 	}
-	if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
+	if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
+		sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
 		mnt_drop_write_file(sc->file);
+	}
 	if (sc->buf) {
 		if (sc->buf_cleanup)
 			sc->buf_cleanup(sc->buf);
@@ -505,6 +507,8 @@ retry_op:
 		error = mnt_want_write_file(sc->file);
 		if (error)
 			goto out_sc;
+
+		sc->flags |= XCHK_HAVE_FREEZE_PROT;
 	}
 
 	/* Set up for the operation. */
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index e113f2f5c254..f8ba00e51ca9 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -106,6 +106,7 @@ struct xfs_scrub {
 
 /* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
 #define XCHK_TRY_HARDER		(1U << 0)  /* can't get resources, try again */
+#define XCHK_HAVE_FREEZE_PROT	(1U << 1)  /* do we have freeze protection? */
 #define XCHK_FSGATES_DRAIN	(1U << 2)  /* defer ops draining enabled */
 #define XCHK_NEED_DRAIN		(1U << 3)  /* scrub needs to drain defer ops */
 #define XREP_ALREADY_FIXED	(1U << 31) /* checking our repair work */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index b3894daeb86a..0b54f1a1cf0c 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -98,6 +98,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
 
 #define XFS_SCRUB_STATE_STRINGS \
 	{ XCHK_TRY_HARDER,			"try_harder" }, \
+	{ XCHK_HAVE_FREEZE_PROT,		"nofreeze" }, \
 	{ XCHK_FSGATES_DRAIN,			"fsgates_drain" }, \
 	{ XCHK_NEED_DRAIN,			"need_drain" }, \
 	{ XREP_ALREADY_FIXED,			"already_fixed" }
@@ -693,6 +694,31 @@ TRACE_EVENT(xchk_fscounters_within_range,
 		  __entry->old_value)
 )
 
+DECLARE_EVENT_CLASS(xchk_fsfreeze_class,
+	TP_PROTO(struct xfs_scrub *sc, int error),
+	TP_ARGS(sc, error),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(unsigned int, type)
+		__field(int, error)
+	),
+	TP_fast_assign(
+		__entry->dev = sc->mp->m_super->s_dev;
+		__entry->type = sc->sm->sm_type;
+		__entry->error = error;
+	),
+	TP_printk("dev %d:%d type %s error %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+		  __entry->error)
+);
+#define DEFINE_XCHK_FSFREEZE_EVENT(name) \
+DEFINE_EVENT(xchk_fsfreeze_class, name, \
+	TP_PROTO(struct xfs_scrub *sc, int error), \
+	TP_ARGS(sc, error))
+DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsfreeze);
+DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsthaw);
+
 TRACE_EVENT(xchk_refcount_incorrect,
 	TP_PROTO(struct xfs_perag *pag, const struct xfs_refcount_irec *irec,
 		 xfs_nlink_t seen),

From 3b816601e279756e781e6c4d9b3f3bd21a72ac67 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 4 Aug 2023 10:52:20 -0400
Subject: [PATCH 0748/2047] nfsd: Fix race to FREE_STATEID and cl_revoked

We have some reports of linux NFS clients that cannot satisfy a linux knfsd
server that always sets SEQ4_STATUS_RECALLABLE_STATE_REVOKED even though
those clients repeatedly walk all their known state using TEST_STATEID and
receive NFS4_OK for all.

Its possible for revoke_delegation() to set NFS4_REVOKED_DELEG_STID, then
nfsd4_free_stateid() finds the delegation and returns NFS4_OK to
FREE_STATEID.  Afterward, revoke_delegation() moves the same delegation to
cl_revoked.  This would produce the observed client/server effect.

Fix this by ensuring that the setting of sc_type to NFS4_REVOKED_DELEG_STID
and move to cl_revoked happens within the same cl_lock.  This will allow
nfsd4_free_stateid() to properly remove the delegation from cl_revoked.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=2217103
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2176575
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Cc: stable@vger.kernel.org # v4.17+
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3aefbad4cc09..daf305daa751 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1354,9 +1354,9 @@ static void revoke_delegation(struct nfs4_delegation *dp)
 	trace_nfsd_stid_revoke(&dp->dl_stid);
 
 	if (clp->cl_minorversion) {
+		spin_lock(&clp->cl_lock);
 		dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
 		refcount_inc(&dp->dl_stid.sc_count);
-		spin_lock(&clp->cl_lock);
 		list_add(&dp->dl_recall_lru, &clp->cl_revoked);
 		spin_unlock(&clp->cl_lock);
 	}

From ed192c59f86910f089d061e635f6c1129bb14064 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Thu, 27 Jul 2023 13:38:09 +0200
Subject: [PATCH 0749/2047] file: mostly eliminate spurious relocking in
 __range_close

Stock code takes a lock trip for every fd in range, but this can be
trivially avoided and real-world consumers do have plenty of already
closed cases.

Just booting Debian 12 with a debug printk shows:
(sh) min 3 max 17 closed 15 empty 0
(sh) min 19 max 63 closed 31 empty 14
(sh) min 4 max 63 closed 0 empty 60
(spawn) min 3 max 63 closed 13 empty 48
(spawn) min 3 max 63 closed 13 empty 48
(mount) min 3 max 17 closed 15 empty 0
(mount) min 19 max 63 closed 32 empty 13

and so on.

While here use more idiomatic naming.

An avoidable relock is left in place to avoid uglifying the code.
The code was not switched to bitmap traversal for the same reason.

Tested with ltp kernel/syscalls/close_range

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Message-Id: <20230727113809.800067-1-mjguzik@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/file.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 7893ea161d77..7bb2cddbd82b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -693,29 +693,30 @@ static inline void __range_cloexec(struct files_struct *cur_fds,
 	spin_unlock(&cur_fds->file_lock);
 }
 
-static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
+static inline void __range_close(struct files_struct *files, unsigned int fd,
 				 unsigned int max_fd)
 {
+	struct file *file;
 	unsigned n;
 
-	rcu_read_lock();
-	n = last_fd(files_fdtable(cur_fds));
-	rcu_read_unlock();
+	spin_lock(&files->file_lock);
+	n = last_fd(files_fdtable(files));
 	max_fd = min(max_fd, n);
 
-	while (fd <= max_fd) {
-		struct file *file;
-
-		spin_lock(&cur_fds->file_lock);
-		file = pick_file(cur_fds, fd++);
-		spin_unlock(&cur_fds->file_lock);
-
+	for (; fd <= max_fd; fd++) {
+		file = pick_file(files, fd);
 		if (file) {
-			/* found a valid file to close */
-			filp_close(file, cur_fds);
+			spin_unlock(&files->file_lock);
+			filp_close(file, files);
 			cond_resched();
+			spin_lock(&files->file_lock);
+		} else if (need_resched()) {
+			spin_unlock(&files->file_lock);
+			cond_resched();
+			spin_lock(&files->file_lock);
 		}
 	}
+	spin_unlock(&files->file_lock);
 }
 
 /**

From 4e0bacd65e72f7f9b1b60e48e85e6d4187474644 Mon Sep 17 00:00:00 2001
From: Zhang Jianhua <chris.zjh@huawei.com>
Date: Fri, 4 Aug 2023 15:56:15 +0800
Subject: [PATCH 0750/2047] arm64: fix build warning for ARM64_MEMSTART_SHIFT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building with W=1, the following warning occurs.

arch/arm64/include/asm/kernel-pgtable.h:129:41: error: "PUD_SHIFT" is not defined, evaluates to 0 [-Werror=undef]
  129 | #define ARM64_MEMSTART_SHIFT            PUD_SHIFT
      |                                         ^~~~~~~~~
arch/arm64/include/asm/kernel-pgtable.h:142:5: note: in expansion of macro ‘ARM64_MEMSTART_SHIFT’
  142 | #if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
      |     ^~~~~~~~~~~~~~~~~~~~

The generic PUD_SHIFT was defined in include/asm-generic/pgtable-nopud.h,
however the #ifndef __ASSEMBLY__ guard in this header file makes it unavailable
for assembly files. While someone .S file include the <asm/kernel-pgtable.h>,
the build warning would occur. Now move the macro ARM64_MEMSTART_SHIFT and
ARM64_MEMSTART_ALIGN to arch/arm64/mm/init.c where it is used only, to avoid
this issue.

Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20230804075615.3334756-1-chris.zjh@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kernel-pgtable.h | 27 -------------------------
 arch/arm64/mm/init.c                    | 27 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 577773870b66..85d26143faa5 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -118,31 +118,4 @@
 #define SWAPPER_RX_MMUFLAGS	(SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
 #endif
 
-/*
- * To make optimal use of block mappings when laying out the linear
- * mapping, round down the base of physical memory to a size that can
- * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE
- * (64k granule), or a multiple that can be mapped using contiguous bits
- * in the page tables: 32 * PMD_SIZE (16k granule)
- */
-#if defined(CONFIG_ARM64_4K_PAGES)
-#define ARM64_MEMSTART_SHIFT		PUD_SHIFT
-#elif defined(CONFIG_ARM64_16K_PAGES)
-#define ARM64_MEMSTART_SHIFT		CONT_PMD_SHIFT
-#else
-#define ARM64_MEMSTART_SHIFT		PMD_SHIFT
-#endif
-
-/*
- * sparsemem vmemmap imposes an additional requirement on the alignment of
- * memstart_addr, due to the fact that the base of the vmemmap region
- * has a direct correspondence, and needs to appear sufficiently aligned
- * in the virtual address space.
- */
-#if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
-#define ARM64_MEMSTART_ALIGN	(1UL << SECTION_SIZE_BITS)
-#else
-#define ARM64_MEMSTART_ALIGN	(1UL << ARM64_MEMSTART_SHIFT)
-#endif
-
 #endif	/* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d31c3a9290c5..4fcb88a445ef 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -73,6 +73,33 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit;
 
 #define DEFAULT_CRASH_KERNEL_LOW_SIZE	(128UL << 20)
 
+/*
+ * To make optimal use of block mappings when laying out the linear
+ * mapping, round down the base of physical memory to a size that can
+ * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE
+ * (64k granule), or a multiple that can be mapped using contiguous bits
+ * in the page tables: 32 * PMD_SIZE (16k granule)
+ */
+#if defined(CONFIG_ARM64_4K_PAGES)
+#define ARM64_MEMSTART_SHIFT		PUD_SHIFT
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define ARM64_MEMSTART_SHIFT		CONT_PMD_SHIFT
+#else
+#define ARM64_MEMSTART_SHIFT		PMD_SHIFT
+#endif
+
+/*
+ * sparsemem vmemmap imposes an additional requirement on the alignment of
+ * memstart_addr, due to the fact that the base of the vmemmap region
+ * has a direct correspondence, and needs to appear sufficiently aligned
+ * in the virtual address space.
+ */
+#if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
+#define ARM64_MEMSTART_ALIGN	(1UL << SECTION_SIZE_BITS)
+#else
+#define ARM64_MEMSTART_ALIGN	(1UL << ARM64_MEMSTART_SHIFT)
+#endif
+
 static int __init reserve_crashkernel_low(unsigned long long low_size)
 {
 	unsigned long long low_base;

From 90d68677226ac7cf344648919df2016686b3e2ab Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Wed, 2 Aug 2023 14:38:53 +0530
Subject: [PATCH 0751/2047] perf: pmuv3: Remove comments from
 armv8pmu_[enable|disable]_event()

The comments in armv8pmu_[enable|disable]_event() are blindingly obvious,
and does not contribute in making things any better. Let's drop them off.
Functional change is not intended.

Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20230802090853.1190391-1-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmuv3.c | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index d2dffb4e9d07..e5a2ac4155f6 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -721,38 +721,15 @@ static void armv8pmu_enable_event(struct perf_event *event)
 	 * Enable counter and interrupt, and set the counter to count
 	 * the event that we're interested in.
 	 */
-
-	/*
-	 * Disable counter
-	 */
 	armv8pmu_disable_event_counter(event);
-
-	/*
-	 * Set event.
-	 */
 	armv8pmu_write_event_type(event);
-
-	/*
-	 * Enable interrupt for this counter
-	 */
 	armv8pmu_enable_event_irq(event);
-
-	/*
-	 * Enable counter
-	 */
 	armv8pmu_enable_event_counter(event);
 }
 
 static void armv8pmu_disable_event(struct perf_event *event)
 {
-	/*
-	 * Disable counter
-	 */
 	armv8pmu_disable_event_counter(event);
-
-	/*
-	 * Disable interrupt for this counter
-	 */
 	armv8pmu_disable_event_irq(event);
 }
 

From 7f86d128e437990fd08d9e66ae7c1571666cff8a Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Fri, 4 Aug 2023 15:37:45 +0100
Subject: [PATCH 0752/2047] arm64: add HWCAP for FEAT_HBC (hinted conditional
 branches)

Add a HWCAP for FEAT_HBC, so that userspace can make a decision on using
this feature.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20230804143746.3900803-2-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/hwcap.h      | 1 +
 arch/arm64/include/uapi/asm/hwcap.h | 1 +
 arch/arm64/kernel/cpufeature.c      | 3 ++-
 arch/arm64/kernel/cpuinfo.c         | 1 +
 4 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 692b1ec663b2..521267478d18 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -138,6 +138,7 @@
 #define KERNEL_HWCAP_SME_B16B16		__khwcap2_feature(SME_B16B16)
 #define KERNEL_HWCAP_SME_F16F16		__khwcap2_feature(SME_F16F16)
 #define KERNEL_HWCAP_MOPS		__khwcap2_feature(MOPS)
+#define KERNEL_HWCAP_HBC		__khwcap2_feature(HBC)
 
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index a2cac4305b1e..53026f45a509 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -103,5 +103,6 @@
 #define HWCAP2_SME_B16B16	(1UL << 41)
 #define HWCAP2_SME_F16F16	(1UL << 42)
 #define HWCAP2_MOPS		(1UL << 43)
+#define HWCAP2_HBC		(1UL << 44)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index bcb006390e55..a5f533f63b60 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -222,7 +222,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
 		       FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0),
@@ -2840,6 +2840,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES),
 	HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
 	HWCAP_CAP(ID_AA64ISAR2_EL1, MOPS, IMP, CAP_HWCAP, KERNEL_HWCAP_MOPS),
+	HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC),
 #ifdef CONFIG_ARM64_SME
 	HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
 	HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 58622dc85917..98fda8500535 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -126,6 +126,7 @@ static const char *const hwcap_str[] = {
 	[KERNEL_HWCAP_SME_B16B16]	= "smeb16b16",
 	[KERNEL_HWCAP_SME_F16F16]	= "smef16f16",
 	[KERNEL_HWCAP_MOPS]		= "mops",
+	[KERNEL_HWCAP_HBC]		= "hbc",
 };
 
 #ifdef CONFIG_COMPAT

From d70175b1470c4bdc8643fd744d722a00c7f0fb17 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Fri, 4 Aug 2023 15:37:46 +0100
Subject: [PATCH 0753/2047] selftests/arm64: add HWCAP2_HBC test

Add a test for the newly added HWCAP2_HBC.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20230804143746.3900803-3-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/hwcap.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index d4ad813fed10..fabeac9a1b5e 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -208,6 +208,13 @@ static void svebf16_sigill(void)
 	asm volatile(".inst 0x658aa000" : : : "z0");
 }
 
+static void hbc_sigill(void)
+{
+	/* BC.EQ +4 */
+	asm volatile("cmp xzr, xzr\n"
+		     ".inst 0x54000030" : : : "cc");
+}
+
 static const struct hwcap_data {
 	const char *name;
 	unsigned long at_hwcap;
@@ -386,6 +393,14 @@ static const struct hwcap_data {
 		.hwcap_bit = HWCAP2_SVE_EBF16,
 		.cpuinfo = "sveebf16",
 	},
+	{
+		.name = "HBC",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_HBC,
+		.cpuinfo = "hbc",
+		.sigill_fn = hbc_sigill,
+		.sigill_reliable = true,
+	},
 };
 
 static bool seen_sigill;

From d1890517ac751a427b962caddda91ecc2bbf21d0 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Thu, 3 Aug 2023 21:39:05 +0800
Subject: [PATCH 0754/2047] kselftest/arm64: add RCpc load-acquire to hwcap
 test

Add the RCpc and various features check in the set of hwcap tests.

Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230803133905.971697-1-zengheng4@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/hwcap.c | 26 +++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index d4ad813fed10..6a0adf916028 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -39,6 +39,18 @@ static void cssc_sigill(void)
 	asm volatile(".inst 0xdac01c00" : : : "x0");
 }
 
+static void ilrcpc_sigill(void)
+{
+	/* LDAPUR W0, [SP, #8] */
+	asm volatile(".inst 0x994083e0" : : : );
+}
+
+static void lrcpc_sigill(void)
+{
+	/* LDAPR W0, [SP, #0] */
+	asm volatile(".inst 0xb8bfc3e0" : : : );
+}
+
 static void mops_sigill(void)
 {
 	char dst[1], src[1];
@@ -223,6 +235,20 @@ static const struct hwcap_data {
 		.cpuinfo = "cssc",
 		.sigill_fn = cssc_sigill,
 	},
+	{
+		.name = "LRCPC",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_LRCPC,
+		.cpuinfo = "lrcpc",
+		.sigill_fn = lrcpc_sigill,
+	},
+	{
+		.name = "LRCPC2",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_ILRCPC,
+		.cpuinfo = "ilrcpc",
+		.sigill_fn = ilrcpc_sigill,
+	},
 	{
 		.name = "MOPS",
 		.at_hwcap = AT_HWCAP2,

From 5cd474e57368f0957c343bb21e309cf82826b1ef Mon Sep 17 00:00:00 2001
From: D Scott Phillips <scott@os.amperecomputing.com>
Date: Mon, 26 Jun 2023 17:29:39 -0700
Subject: [PATCH 0755/2047] arm64: sdei: abort running SDEI handlers during
 crash

Interrupts are blocked in SDEI context, per the SDEI spec: "The client
interrupts cannot preempt the event handler." If we crashed in the SDEI
handler-running context (as with ACPI's AGDI) then we need to clean up the
SDEI state before proceeding to the crash kernel so that the crash kernel
can have working interrupts.

Track the active SDEI handler per-cpu so that we can COMPLETE_AND_RESUME
the handler, discarding the interrupted context.

Fixes: f5df26961853 ("arm64: kernel: Add arch-specific SDEI entry code and CPU masking")
Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
Cc: stable@vger.kernel.org
Reviewed-by: James Morse <james.morse@arm.com>
Tested-by: Mihai Carabas <mihai.carabas@oracle.com>
Link: https://lore.kernel.org/r/20230627002939.2758-1-scott@os.amperecomputing.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/sdei.h |  6 ++++++
 arch/arm64/kernel/entry.S     | 27 +++++++++++++++++++++++++--
 arch/arm64/kernel/sdei.c      |  3 +++
 arch/arm64/kernel/smp.c       |  8 ++++----
 drivers/firmware/arm_sdei.c   | 19 +++++++++++++++++++
 include/linux/arm_sdei.h      |  2 ++
 6 files changed, 59 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
index 4292d9bafb9d..484cb6972e99 100644
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -17,6 +17,9 @@
 
 #include <asm/virt.h>
 
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
+
 extern unsigned long sdei_exit_mode;
 
 /* Software Delegated Exception entry point from firmware*/
@@ -29,6 +32,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num,
 						   unsigned long pc,
 						   unsigned long pstate);
 
+/* Abort a running handler. Context is discarded. */
+void __sdei_handler_abort(void);
+
 /*
  * The above entry point does the minimum to call C code. This function does
  * anything else, before calling the driver.
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index a40e5e50fa55..6ad61de03d0a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -986,9 +986,13 @@ SYM_CODE_START(__sdei_asm_handler)
 
 	mov	x19, x1
 
-#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+	/* Store the registered-event for crash_smp_send_stop() */
 	ldrb	w4, [x19, #SDEI_EVENT_PRIORITY]
-#endif
+	cbnz	w4, 1f
+	adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
+	b	2f
+1:	adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
+2:	str	x19, [x5]
 
 #ifdef CONFIG_VMAP_STACK
 	/*
@@ -1055,6 +1059,14 @@ SYM_CODE_START(__sdei_asm_handler)
 
 	ldr_l	x2, sdei_exit_mode
 
+	/* Clear the registered-event seen by crash_smp_send_stop() */
+	ldrb	w3, [x4, #SDEI_EVENT_PRIORITY]
+	cbnz	w3, 1f
+	adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
+	b	2f
+1:	adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
+2:	str	xzr, [x5]
+
 alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
 	sdei_handler_exit exit_mode=x2
 alternative_else_nop_endif
@@ -1065,4 +1077,15 @@ alternative_else_nop_endif
 #endif
 SYM_CODE_END(__sdei_asm_handler)
 NOKPROBE(__sdei_asm_handler)
+
+SYM_CODE_START(__sdei_handler_abort)
+	mov_q	x0, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
+	adr	x1, 1f
+	ldr_l	x2, sdei_exit_mode
+	sdei_handler_exit exit_mode=x2
+	// exit the handler and jump to the next instruction.
+	// Exit will stomp x0-x17, PSTATE, ELR_ELx, and SPSR_ELx.
+1:	ret
+SYM_CODE_END(__sdei_handler_abort)
+NOKPROBE(__sdei_handler_abort)
 #endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 830be01af32d..255d12f881c2 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -47,6 +47,9 @@ DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
 DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
 #endif
 
+DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
+DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
+
 static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
 {
 	unsigned long *p;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index edd63894d61e..960b98b43506 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -1044,10 +1044,8 @@ void crash_smp_send_stop(void)
 	 * If this cpu is the only one alive at this point in time, online or
 	 * not, there are no stop messages to be sent around, so just back out.
 	 */
-	if (num_other_online_cpus() == 0) {
-		sdei_mask_local_cpu();
-		return;
-	}
+	if (num_other_online_cpus() == 0)
+		goto skip_ipi;
 
 	cpumask_copy(&mask, cpu_online_mask);
 	cpumask_clear_cpu(smp_processor_id(), &mask);
@@ -1066,7 +1064,9 @@ void crash_smp_send_stop(void)
 		pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
 			cpumask_pr_args(&mask));
 
+skip_ipi:
 	sdei_mask_local_cpu();
+	sdei_handler_abort();
 }
 
 bool smp_crash_stop_failed(void)
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index f9040bd61081..285fe7ad490d 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1095,3 +1095,22 @@ int sdei_event_handler(struct pt_regs *regs,
 	return err;
 }
 NOKPROBE_SYMBOL(sdei_event_handler);
+
+void sdei_handler_abort(void)
+{
+	/*
+	 * If the crash happened in an SDEI event handler then we need to
+	 * finish the handler with the firmware so that we can have working
+	 * interrupts in the crash kernel.
+	 */
+	if (__this_cpu_read(sdei_active_critical_event)) {
+	        pr_warn("still in SDEI critical event context, attempting to finish handler.\n");
+	        __sdei_handler_abort();
+	        __this_cpu_write(sdei_active_critical_event, NULL);
+	}
+	if (__this_cpu_read(sdei_active_normal_event)) {
+	        pr_warn("still in SDEI normal event context, attempting to finish handler.\n");
+	        __sdei_handler_abort();
+	        __this_cpu_write(sdei_active_normal_event, NULL);
+	}
+}
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 14dc461b0e82..255701e1251b 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -47,10 +47,12 @@ int sdei_unregister_ghes(struct ghes *ghes);
 int sdei_mask_local_cpu(void);
 int sdei_unmask_local_cpu(void);
 void __init sdei_init(void);
+void sdei_handler_abort(void);
 #else
 static inline int sdei_mask_local_cpu(void) { return 0; }
 static inline int sdei_unmask_local_cpu(void) { return 0; }
 static inline void sdei_init(void) { }
+static inline void sdei_handler_abort(void) { }
 #endif /* CONFIG_ARM_SDE_INTERFACE */
 
 
From d6da04b6fbabf4b464bfe29e34ff10c62024d1e4 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 28 Jul 2023 00:26:12 +0100
Subject: [PATCH 0756/2047] kselftest/arm64: Exit streaming mode after
 collecting signal context

When we collect a signal context with one of the SME modes enabled we will
have enabled that mode behind the compiler and libc's back so they may
issue some instructions not valid in streaming mode, causing spurious
failures.

For the code prior to issuing the BRK to trigger signal handling we need to
stay in streaming mode if we were already there since that's a part of the
signal context the caller is trying to collect. Unfortunately this code
includes a memset() which is likely to be heavily optimised and is likely
to use FP instructions incompatible with streaming mode. We can avoid this
happening by open coding the memset(), inserting a volatile assembly
statement to avoid the compiler recognising what's being done and doing
something in optimisation. This code is not performance critical so the
inefficiency should not be an issue.

After collecting the context we can simply exit streaming mode, avoiding
these issues. Use a full SMSTOP for safety to prevent any issues appearing
with ZA.

Reported-by: Will Deacon <will@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230728-arm64-signal-memcpy-fix-v4-1-0c1290db5d46@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../arm64/signal/test_signals_utils.h         | 25 ++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
index 222093f51b67..c7f5627171dd 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
@@ -60,13 +60,25 @@ static __always_inline bool get_current_context(struct tdescr *td,
 						size_t dest_sz)
 {
 	static volatile bool seen_already;
+	int i;
+	char *uc = (char *)dest_uc;
 
 	assert(td && dest_uc);
 	/* it's a genuine invocation..reinit */
 	seen_already = 0;
 	td->live_uc_valid = 0;
 	td->live_sz = dest_sz;
-	memset(dest_uc, 0x00, td->live_sz);
+
+	/*
+	 * This is a memset() but we don't want the compiler to
+	 * optimise it into either instructions or a library call
+	 * which might be incompatible with streaming mode.
+	 */
+	for (i = 0; i < td->live_sz; i++) {
+		uc[i] = 0;
+		__asm__ ("" : "=r" (uc[i]) : "0" (uc[i]));
+	}
+
 	td->live_uc = dest_uc;
 	/*
 	 * Grab ucontext_t triggering a SIGTRAP.
@@ -103,6 +115,17 @@ static __always_inline bool get_current_context(struct tdescr *td,
 		      :
 		      : "memory");
 
+	/*
+	 * If we were grabbing a streaming mode context then we may
+	 * have entered streaming mode behind the system's back and
+	 * libc or compiler generated code might decide to do
+	 * something invalid in streaming mode, or potentially even
+	 * the state of ZA.  Issue a SMSTOP to exit both now we have
+	 * grabbed the state.
+	 */
+	if (td->feats_supported & FEAT_SME)
+		asm volatile("msr S0_3_C4_C6_3, xzr");
+
 	/*
 	 * If we get here with seen_already==1 it implies the td->live_uc
 	 * context has been used to get back here....this probably means

From e5d51a6650211baf38daa894d06c47748978e141 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 28 Jul 2023 00:26:13 +0100
Subject: [PATCH 0757/2047] tools compiler.h: Add OPTIMIZER_HIDE_VAR()

Port over the definition of OPTIMIZER_HIDE_VAR() so we can use it in
kselftests.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230728-arm64-signal-memcpy-fix-v4-2-0c1290db5d46@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/include/linux/compiler.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 9d36c8ce1fe7..f75cced41d59 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -190,4 +190,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 #define ___PASTE(a, b) a##b
 #define __PASTE(a, b) ___PASTE(a, b)
 
+#ifndef OPTIMIZER_HIDE_VAR
+/* Make the optimizer believe the variable can be manipulated arbitrarily. */
+#define OPTIMIZER_HIDE_VAR(var)						\
+	__asm__ ("" : "=r" (var) : "0" (var))
+#endif
+
 #endif /* _TOOLS_LINUX_COMPILER_H */

From 51e6ac1fa45160d400321fa06d7808b30d9d453c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 28 Jul 2023 00:26:14 +0100
Subject: [PATCH 0758/2047] tools include: Add some common function attributes

We don't have definitions of __always_unused or __noreturn in the tools
version of compiler.h, add them so we can use them in kselftests.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230728-arm64-signal-memcpy-fix-v4-3-0c1290db5d46@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/include/linux/compiler.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index f75cced41d59..1684216e826a 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -42,6 +42,18 @@
 # define __always_inline	inline __attribute__((always_inline))
 #endif
 
+#ifndef __always_unused
+#define __always_unused __attribute__((__unused__))
+#endif
+
+#ifndef __noreturn
+#define __noreturn __attribute__((__noreturn__))
+#endif
+
+#ifndef unreachable
+#define unreachable() __builtin_unreachable()
+#endif
+
 #ifndef noinline
 #define noinline
 #endif

From 35d7bc983a74db7067d874be2708a7d3bc269f13 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 28 Jul 2023 00:26:15 +0100
Subject: [PATCH 0759/2047] kselftest/arm64: Make the tools/include headers
 available

Make the generic tools/include headers available to the arm64 selftests so
we can reduce some duplication.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230728-arm64-signal-memcpy-fix-v4-4-0c1290db5d46@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index ace8b67fb22d..28b93cab8c0d 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -19,6 +19,8 @@ CFLAGS += -I$(top_srcdir)/tools/testing/selftests/
 
 CFLAGS += $(KHDR_INCLUDES)
 
+CFLAGS += -I$(top_srcdir)/tools/include
+
 export CFLAGS
 export top_srcdir
 

From db7a89f706d6080121b61099b1d69a752ce0c61e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 28 Jul 2023 00:26:16 +0100
Subject: [PATCH 0760/2047] kselftest/arm64: Use shared OPTIMZER_HIDE_VAR()
 definiton

We had open coded the definition of OPTIMIZER_HIDE_VAR() as a fix but now
that we have the generic tools/include available and that has had a
definition of OPTIMIZER_HIDE_VAR() we can switch to the define.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230728-arm64-signal-memcpy-fix-v4-5-0c1290db5d46@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/signal/test_signals_utils.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
index c7f5627171dd..762c8fe9c54a 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
@@ -8,6 +8,8 @@
 #include <stdio.h>
 #include <string.h>
 
+#include <linux/compiler.h>
+
 #include "test_signals.h"
 
 int test_init(struct tdescr *td);
@@ -76,7 +78,7 @@ static __always_inline bool get_current_context(struct tdescr *td,
 	 */
 	for (i = 0; i < td->live_sz; i++) {
 		uc[i] = 0;
-		__asm__ ("" : "=r" (uc[i]) : "0" (uc[i]));
+		OPTIMIZER_HIDE_VAR(uc[0]);
 	}
 
 	td->live_uc = dest_uc;

From 672dbf97f6123ffe61ee46bd8bfbbe1cae6de815 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 28 Jul 2023 00:26:17 +0100
Subject: [PATCH 0761/2047] kselftest/arm64: Use the tools/include compiler.h
 rather than our own

The BTI test program started life as standalone programs outside the
kselftest suite so provided it's own compiler.h. Now that we have updated
the tools/include compiler.h to have all the definitions that we are using
and the arm64 selftsets pull in tools/includes let's drop our custom
version.

__unreachable() is named unreachable() there requiring an update in the
code.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230728-arm64-signal-memcpy-fix-v4-6-0c1290db5d46@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/bti/compiler.h | 21 --------------------
 tools/testing/selftests/arm64/bti/system.c   |  4 +---
 tools/testing/selftests/arm64/bti/system.h   |  4 ++--
 tools/testing/selftests/arm64/bti/test.c     |  1 -
 4 files changed, 3 insertions(+), 27 deletions(-)
 delete mode 100644 tools/testing/selftests/arm64/bti/compiler.h

diff --git a/tools/testing/selftests/arm64/bti/compiler.h b/tools/testing/selftests/arm64/bti/compiler.h
deleted file mode 100644
index ebb6204f447a..000000000000
--- a/tools/testing/selftests/arm64/bti/compiler.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2019  Arm Limited
- * Original author: Dave Martin <Dave.Martin@arm.com>
- */
-
-#ifndef COMPILER_H
-#define COMPILER_H
-
-#define __always_unused __attribute__((__unused__))
-#define __noreturn __attribute__((__noreturn__))
-#define __unreachable() __builtin_unreachable()
-
-/* curse(e) has value e, but the compiler cannot assume so */
-#define curse(e) ({				\
-	__typeof__(e) __curse_e = (e);		\
-	asm ("" : "+r" (__curse_e));		\
-	__curse_e;				\
-})
-
-#endif /* ! COMPILER_H */
diff --git a/tools/testing/selftests/arm64/bti/system.c b/tools/testing/selftests/arm64/bti/system.c
index 6385d8d4973b..93d772b00bfe 100644
--- a/tools/testing/selftests/arm64/bti/system.c
+++ b/tools/testing/selftests/arm64/bti/system.c
@@ -8,12 +8,10 @@
 
 #include <asm/unistd.h>
 
-#include "compiler.h"
-
 void __noreturn exit(int n)
 {
 	syscall(__NR_exit, n);
-	__unreachable();
+	unreachable();
 }
 
 ssize_t write(int fd, const void *buf, size_t size)
diff --git a/tools/testing/selftests/arm64/bti/system.h b/tools/testing/selftests/arm64/bti/system.h
index aca118589705..2e9ee1284a0c 100644
--- a/tools/testing/selftests/arm64/bti/system.h
+++ b/tools/testing/selftests/arm64/bti/system.h
@@ -14,12 +14,12 @@ typedef __kernel_size_t size_t;
 typedef __kernel_ssize_t ssize_t;
 
 #include <linux/errno.h>
+#include <linux/compiler.h>
+
 #include <asm/hwcap.h>
 #include <asm/ptrace.h>
 #include <asm/unistd.h>
 
-#include "compiler.h"
-
 long syscall(int nr, ...);
 
 void __noreturn exit(int n);
diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c
index 2cd8dcee5aec..28a8e8a28a84 100644
--- a/tools/testing/selftests/arm64/bti/test.c
+++ b/tools/testing/selftests/arm64/bti/test.c
@@ -17,7 +17,6 @@
 typedef struct ucontext ucontext_t;
 
 #include "btitest.h"
-#include "compiler.h"
 #include "signal.h"
 
 #define EXPECTED_TESTS 18

From fe8c3623ab06603eb760444a032d426542212021 Mon Sep 17 00:00:00 2001
From: Enlin Mu <enlin.mu@unisoc.com>
Date: Tue, 1 Aug 2023 14:04:32 +0800
Subject: [PATCH 0762/2047] pstore/ram: Check start of empty przs during init

After commit 30696378f68a ("pstore/ram: Do not treat empty buffers as
valid"), initialization would assume a prz was valid after seeing that
the buffer_size is zero (regardless of the buffer start position). This
unchecked start value means it could be outside the bounds of the buffer,
leading to future access panics when written to:

 sysdump_panic_event+0x3b4/0x5b8
 atomic_notifier_call_chain+0x54/0x90
 panic+0x1c8/0x42c
 die+0x29c/0x2a8
 die_kernel_fault+0x68/0x78
 __do_kernel_fault+0x1c4/0x1e0
 do_bad_area+0x40/0x100
 do_translation_fault+0x68/0x80
 do_mem_abort+0x68/0xf8
 el1_da+0x1c/0xc0
 __raw_writeb+0x38/0x174
 __memcpy_toio+0x40/0xac
 persistent_ram_update+0x44/0x12c
 persistent_ram_write+0x1a8/0x1b8
 ramoops_pstore_write+0x198/0x1e8
 pstore_console_write+0x94/0xe0
 ...

To avoid this, also check if the prz start is 0 during the initialization
phase. If not, the next prz sanity check case will discover it (start >
size) and zap the buffer back to a sane state.

Fixes: 30696378f68a ("pstore/ram: Do not treat empty buffers as valid")
Cc: Yunlong Xing <yunlong.xing@unisoc.com>
Cc: stable@vger.kernel.org
Signed-off-by: Enlin Mu <enlin.mu@unisoc.com>
Link: https://lore.kernel.org/r/20230801060432.1307717-1-yunlong.xing@unisoc.com
[kees: update commit log with backtrace and clarifications]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/ram_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 85aaf0fc6d7d..eb6df190d752 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -519,7 +519,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
 	sig ^= PERSISTENT_RAM_SIG;
 
 	if (prz->buffer->sig == sig) {
-		if (buffer_size(prz) == 0) {
+		if (buffer_size(prz) == 0 && buffer_start(prz) == 0) {
 			pr_debug("found existing empty buffer\n");
 			return 0;
 		}

From abbf73816b6f5f4268fbfb3b3505003c2356d4a9 Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Tue, 1 Aug 2023 15:37:00 +0800
Subject: [PATCH 0763/2047] kunit: fix possible memory leak in
 kunit_filter_suites()

Inject fault while probing drm_kunit_helpers.ko, if one of
kunit_next_attr_filter(), kunit_filter_glob_tests() and
kunit_filter_attr_tests() fails, parsed_filters,
parsed_glob.suite_glob/test_glob alloced in
kunit_parse_glob_filter() is leaked.
And the filtered_suite->test_cases alloced in kunit_filter_glob_tests()
or kunit_filter_attr_tests() may also be leaked.

unreferenced object 0xff110001067e4800 (size 1024):
  comm "kunit_try_catch", pid 96, jiffies 4294671796 (age 763.547s)
  hex dump (first 32 bytes):
    73 75 69 74 65 32 00 00 00 00 00 00 00 00 00 00  suite2..........
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<00000000116e8eba>] __kmalloc_node_track_caller+0x4e/0x140
    [<00000000e2f9cce9>] kmemdup+0x2c/0x60
    [<000000002a36710b>] kunit_filter_suites+0x3e4/0xa50
    [<0000000045779fb9>] filter_suites_test+0x1b7/0x440
    [<00000000cd1104a7>] kunit_try_run_case+0x119/0x270
    [<00000000c654c917>] kunit_generic_run_threadfn_adapter+0x4e/0xa0
    [<00000000d195ac13>] kthread+0x2c7/0x3c0
    [<00000000b79c1ee9>] ret_from_fork+0x2c/0x70
    [<000000001167f7e6>] ret_from_fork_asm+0x1b/0x30
unreferenced object 0xff11000105d79b00 (size 192):
  comm "kunit_try_catch", pid 96, jiffies 4294671796 (age 763.547s)
  hex dump (first 32 bytes):
    f0 e1 5a 88 ff ff ff ff 60 59 bb 8a ff ff ff ff  ..Z.....`Y......
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<000000000d6e4891>] __kmalloc+0x4d/0x140
    [<000000006afe50bd>] kunit_filter_suites+0x424/0xa50
    [<0000000045779fb9>] filter_suites_test+0x1b7/0x440
    [<00000000cd1104a7>] kunit_try_run_case+0x119/0x270
    [<00000000c654c917>] kunit_generic_run_threadfn_adapter+0x4e/0xa0
    [<00000000d195ac13>] kthread+0x2c7/0x3c0
    [<00000000b79c1ee9>] ret_from_fork+0x2c/0x70
    [<000000001167f7e6>] ret_from_fork_asm+0x1b/0x30
unreferenced object 0xff110001067e6000 (size 1024):
  comm "kunit_try_catch", pid 98, jiffies 4294671798 (age 763.545s)
  hex dump (first 32 bytes):
    73 75 69 74 65 32 00 00 00 00 00 00 00 00 00 00  suite2..........
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<00000000116e8eba>] __kmalloc_node_track_caller+0x4e/0x140
    [<00000000e2f9cce9>] kmemdup+0x2c/0x60
    [<000000002a36710b>] kunit_filter_suites+0x3e4/0xa50
    [<00000000f452f130>] filter_suites_test_glob_test+0x1b7/0x660
    [<00000000cd1104a7>] kunit_try_run_case+0x119/0x270
    [<00000000c654c917>] kunit_generic_run_threadfn_adapter+0x4e/0xa0
    [<00000000d195ac13>] kthread+0x2c7/0x3c0
    [<00000000b79c1ee9>] ret_from_fork+0x2c/0x70
    [<000000001167f7e6>] ret_from_fork_asm+0x1b/0x30
unreferenced object 0xff11000103f3a800 (size 96):
  comm "kunit_try_catch", pid 98, jiffies 4294671798 (age 763.545s)
  hex dump (first 32 bytes):
    f0 e1 5a 88 ff ff ff ff 40 39 bb 8a ff ff ff ff  ..Z.....@9......
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<000000000d6e4891>] __kmalloc+0x4d/0x140
    [<000000006afe50bd>] kunit_filter_suites+0x424/0xa50
    [<00000000f452f130>] filter_suites_test_glob_test+0x1b7/0x660
    [<00000000cd1104a7>] kunit_try_run_case+0x119/0x270
    [<00000000c654c917>] kunit_generic_run_threadfn_adapter+0x4e/0xa0
    [<00000000d195ac13>] kthread+0x2c7/0x3c0
    [<00000000b79c1ee9>] ret_from_fork+0x2c/0x70
    [<000000001167f7e6>] ret_from_fork_asm+0x1b/0x30
unreferenced object 0xff11000101a72ac0 (size 16):
  comm "kunit_try_catch", pid 104, jiffies 4294671814 (age 763.529s)
  hex dump (first 16 bytes):
    00 00 00 00 00 00 00 00 e0 2a a7 01 01 00 11 ff  .........*......
  backtrace:
    [<000000000d6e4891>] __kmalloc+0x4d/0x140
    [<00000000c7b724e7>] kunit_filter_suites+0x108/0xa50
    [<00000000bad5427d>] filter_attr_test+0x1e9/0x6a0
    [<00000000cd1104a7>] kunit_try_run_case+0x119/0x270
    [<00000000c654c917>] kunit_generic_run_threadfn_adapter+0x4e/0xa0
    [<00000000d195ac13>] kthread+0x2c7/0x3c0
    [<00000000b79c1ee9>] ret_from_fork+0x2c/0x70
    [<000000001167f7e6>] ret_from_fork_asm+0x1b/0x30
unreferenced object 0xff11000103caf880 (size 32):
  comm "kunit_try_catch", pid 104, jiffies 4294671814 (age 763.547s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<000000000d6e4891>] __kmalloc+0x4d/0x140
    [<00000000c47b0f75>] kunit_filter_suites+0x189/0xa50
    [<00000000bad5427d>] filter_attr_test+0x1e9/0x6a0
    [<00000000cd1104a7>] kunit_try_run_case+0x119/0x270
    [<00000000c654c917>] kunit_generic_run_threadfn_adapter+0x4e/0xa0
    [<00000000d195ac13>] kthread+0x2c7/0x3c0
    [<00000000b79c1ee9>] ret_from_fork+0x2c/0x70
    [<000000001167f7e6>] ret_from_fork_asm+0x1b/0x30
unreferenced object 0xff11000101a72ae0 (size 16):
  comm "kunit_try_catch", pid 106, jiffies 4294671823 (age 763.538s)
  hex dump (first 16 bytes):
    00 00 00 00 00 00 00 00 00 2b a7 01 01 00 11 ff  .........+......
  backtrace:
    [<000000000d6e4891>] __kmalloc+0x4d/0x140
    [<00000000c7b724e7>] kunit_filter_suites+0x108/0xa50
    [<0000000096255c51>] filter_attr_empty_test+0x1b0/0x310
    [<00000000cd1104a7>] kunit_try_run_case+0x119/0x270
    [<00000000c654c917>] kunit_generic_run_threadfn_adapter+0x4e/0xa0
    [<00000000d195ac13>] kthread+0x2c7/0x3c0
    [<00000000b79c1ee9>] ret_from_fork+0x2c/0x70
    [<000000001167f7e6>] ret_from_fork_asm+0x1b/0x30
unreferenced object 0xff11000103caf9c0 (size 32):
  comm "kunit_try_catch", pid 106, jiffies 4294671823 (age 763.538s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<000000000d6e4891>] __kmalloc+0x4d/0x140
    [<00000000c47b0f75>] kunit_filter_suites+0x189/0xa50
    [<0000000096255c51>] filter_attr_empty_test+0x1b0/0x310
    [<00000000cd1104a7>] kunit_try_run_case+0x119/0x270
    [<00000000c654c917>] kunit_generic_run_threadfn_adapter+0x4e/0xa0
    [<00000000d195ac13>] kthread+0x2c7/0x3c0
    [<00000000b79c1ee9>] ret_from_fork+0x2c/0x70
    [<000000001167f7e6>] ret_from_fork_asm+0x1b/0x30
unreferenced object 0xff11000101a72b00 (size 16):
  comm "kunit_try_catch", pid 108, jiffies 4294671832 (age 763.529s)
  hex dump (first 16 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<000000000d6e4891>] __kmalloc+0x4d/0x140
    [<00000000c47b0f75>] kunit_filter_suites+0x189/0xa50
    [<00000000881258cc>] filter_attr_skip_test+0x148/0x770
    [<00000000cd1104a7>] kunit_try_run_case+0x119/0x270
    [<00000000c654c917>] kunit_generic_run_threadfn_adapter+0x4e/0xa0
    [<00000000d195ac13>] kthread+0x2c7/0x3c0
    [<00000000b79c1ee9>] ret_from_fork+0x2c/0x70
    [<000000001167f7e6>] ret_from_fork_asm+0x1b/0x30

Fixes: 5d31f71efcb6 ("kunit: add kunit.filter_glob cmdline option to filter suites")
Fixes: 529534e8cba3 ("kunit: Add ability to filter attributes")
Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Reviewed-by: Rae Moar <rmoar@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 lib/kunit/executor.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 5b5bed1efb93..481901d245d0 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -151,7 +151,7 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
 		for (j = 0; j < filter_count; j++)
 			parsed_filters[j] = kunit_next_attr_filter(&filters, err);
 		if (*err)
-			return filtered;
+			goto err;
 	}
 
 	for (i = 0; &suite_set->start[i] != suite_set->end; i++) {
@@ -163,7 +163,7 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
 					parsed_glob.test_glob);
 			if (IS_ERR(filtered_suite)) {
 				*err = PTR_ERR(filtered_suite);
-				return filtered;
+				goto err;
 			}
 		}
 		if (filter_count) {
@@ -172,15 +172,18 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
 						parsed_filters[k], filter_action, err);
 
 				/* Free previous copy of suite */
-				if (k > 0 || filter_glob)
+				if (k > 0 || filter_glob) {
+					kfree(filtered_suite->test_cases);
 					kfree(filtered_suite);
+				}
+
 				filtered_suite = new_filtered_suite;
 
 				if (*err)
-					return filtered;
+					goto err;
 				if (IS_ERR(filtered_suite)) {
 					*err = PTR_ERR(filtered_suite);
-					return filtered;
+					goto err;
 				}
 				if (!filtered_suite)
 					break;
@@ -194,6 +197,10 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
 	}
 	filtered.end = copy;
 
+err:
+	if (*err)
+		kfree(copy);
+
 	if (filter_glob) {
 		kfree(parsed_glob.suite_glob);
 		kfree(parsed_glob.test_glob);

From 1c9fd080dffe5e5ad763527fbc2aa3f6f8c653e9 Mon Sep 17 00:00:00 2001
From: Rae Moar <rmoar@google.com>
Date: Thu, 3 Aug 2023 19:36:35 +0000
Subject: [PATCH 0764/2047] kunit: fix uninitialized variables bug in
 attributes filtering

Fix smatch warnings regarding uninitialized variables in the filtering
patch of the new KUnit Attributes feature.

Fixes: 529534e8cba3 ("kunit: Add ability to filter attributes")

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/r/202307270610.s0w4NKEn-lkp@intel.com/

Signed-off-by: Rae Moar <rmoar@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 lib/kunit/attributes.c    | 40 +++++++++++++++++----------------------
 lib/kunit/executor.c      | 18 +++++++++++-------
 lib/kunit/executor_test.c |  2 +-
 3 files changed, 29 insertions(+), 31 deletions(-)

diff --git a/lib/kunit/attributes.c b/lib/kunit/attributes.c
index d37c40c0ce4f..5e3034b6be99 100644
--- a/lib/kunit/attributes.c
+++ b/lib/kunit/attributes.c
@@ -102,7 +102,7 @@ static int int_filter(long val, const char *op, int input, int *err)
 static int attr_enum_filter(void *attr, const char *input, int *err,
 		const char * const str_list[], int max)
 {
-	int i, j, input_int;
+	int i, j, input_int = -1;
 	long test_val = (long)attr;
 	const char *input_val = NULL;
 
@@ -124,7 +124,7 @@ static int attr_enum_filter(void *attr, const char *input, int *err,
 			input_int = j;
 	}
 
-	if (!input_int) {
+	if (input_int < 0) {
 		*err = -EINVAL;
 		pr_err("kunit executor: invalid filter input: %s\n", input);
 		return false;
@@ -186,8 +186,10 @@ static void *attr_module_get(void *test_or_suite, bool is_test)
 	// Suites get their module attribute from their first test_case
 	if (test)
 		return ((void *) test->module_name);
-	else
+	else if (kunit_suite_num_test_cases(suite) > 0)
 		return ((void *) suite->test_cases[0].module_name);
+	else
+		return (void *) "";
 }
 
 /* List of all Test Attributes */
@@ -221,7 +223,7 @@ const char *kunit_attr_filter_name(struct kunit_attr_filter filter)
 void kunit_print_attr(void *test_or_suite, bool is_test, unsigned int test_level)
 {
 	int i;
-	bool to_free;
+	bool to_free = false;
 	void *attr;
 	const char *attr_name, *attr_str;
 	struct kunit_suite *suite = is_test ? NULL : test_or_suite;
@@ -255,7 +257,7 @@ void kunit_print_attr(void *test_or_suite, bool is_test, unsigned int test_level
 
 int kunit_get_filter_count(char *input)
 {
-	int i, comma_index, count = 0;
+	int i, comma_index = 0, count = 0;
 
 	for (i = 0; input[i]; i++) {
 		if (input[i] == ',') {
@@ -272,7 +274,7 @@ int kunit_get_filter_count(char *input)
 struct kunit_attr_filter kunit_next_attr_filter(char **filters, int *err)
 {
 	struct kunit_attr_filter filter = {};
-	int i, j, comma_index, new_start_index;
+	int i, j, comma_index = 0, new_start_index = 0;
 	int op_index = -1, attr_index = -1;
 	char op;
 	char *input = *filters;
@@ -316,7 +318,7 @@ struct kunit_attr_filter kunit_next_attr_filter(char **filters, int *err)
 		filter.attr = &kunit_attr_list[attr_index];
 	}
 
-	if (comma_index) {
+	if (comma_index > 0) {
 		input[comma_index] = '\0';
 		filter.input = input + op_index;
 		input = input + new_start_index;
@@ -356,31 +358,22 @@ struct kunit_suite *kunit_filter_attr_tests(const struct kunit_suite *const suit
 
 	/* Save filtering result on default value */
 	default_result = filter.attr->filter(filter.attr->attr_default, filter.input, err);
-	if (*err) {
-		kfree(copy);
-		kfree(filtered);
-		return NULL;
-	}
+	if (*err)
+		goto err;
 
 	/* Save suite attribute value and filtering result on that value */
 	suite_val = filter.attr->get_attr((void *)suite, false);
 	suite_result = filter.attr->filter(suite_val, filter.input, err);
-	if (*err) {
-		kfree(copy);
-		kfree(filtered);
-		return NULL;
-	}
+	if (*err)
+		goto err;
 
 	/* For each test case, save test case if passes filtering. */
 	kunit_suite_for_each_test_case(suite, test_case) {
 		test_val = filter.attr->get_attr((void *) test_case, true);
 		test_result = filter.attr->filter(filter.attr->get_attr(test_case, true),
 				filter.input, err);
-		if (*err) {
-			kfree(copy);
-			kfree(filtered);
-			return NULL;
-		}
+		if (*err)
+			goto err;
 
 		/*
 		 * If attribute value of test case is set, filter on that value.
@@ -406,7 +399,8 @@ struct kunit_suite *kunit_filter_attr_tests(const struct kunit_suite *const suit
 		}
 	}
 
-	if (n == 0) {
+err:
+	if (n == 0 || *err) {
 		kfree(copy);
 		kfree(filtered);
 		return NULL;
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 481901d245d0..dc295150c4e5 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -127,19 +127,18 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
 {
 	int i, j, k;
 	int filter_count = 0;
-	struct kunit_suite **copy, *filtered_suite, *new_filtered_suite;
-	struct suite_set filtered;
+	struct kunit_suite **copy, **copy_start, *filtered_suite, *new_filtered_suite;
+	struct suite_set filtered = {NULL, NULL};
 	struct kunit_glob_filter parsed_glob;
-	struct kunit_attr_filter *parsed_filters;
+	struct kunit_attr_filter *parsed_filters = NULL;
 
 	const size_t max = suite_set->end - suite_set->start;
 
 	copy = kmalloc_array(max, sizeof(*filtered.start), GFP_KERNEL);
-	filtered.start = copy;
 	if (!copy) { /* won't be able to run anything, return an empty set */
-		filtered.end = copy;
 		return filtered;
 	}
+	copy_start = copy;
 
 	if (filter_glob)
 		kunit_parse_glob_filter(&parsed_glob, filter_glob);
@@ -147,7 +146,11 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
 	/* Parse attribute filters */
 	if (filters) {
 		filter_count = kunit_get_filter_count(filters);
-		parsed_filters = kcalloc(filter_count + 1, sizeof(*parsed_filters), GFP_KERNEL);
+		parsed_filters = kcalloc(filter_count, sizeof(*parsed_filters), GFP_KERNEL);
+		if (!parsed_filters) {
+			kfree(copy);
+			return filtered;
+		}
 		for (j = 0; j < filter_count; j++)
 			parsed_filters[j] = kunit_next_attr_filter(&filters, err);
 		if (*err)
@@ -166,7 +169,7 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
 				goto err;
 			}
 		}
-		if (filter_count) {
+		if (filter_count > 0 && parsed_filters != NULL) {
 			for (k = 0; k < filter_count; k++) {
 				new_filtered_suite = kunit_filter_attr_tests(filtered_suite,
 						parsed_filters[k], filter_action, err);
@@ -195,6 +198,7 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
 
 		*copy++ = filtered_suite;
 	}
+	filtered.start = copy_start;
 	filtered.end = copy;
 
 err:
diff --git a/lib/kunit/executor_test.c b/lib/kunit/executor_test.c
index 01280cb8d451..3e0a1c99cb4e 100644
--- a/lib/kunit/executor_test.c
+++ b/lib/kunit/executor_test.c
@@ -119,7 +119,7 @@ static void parse_filter_attr_test(struct kunit *test)
 	filter_count = kunit_get_filter_count(filters);
 	KUNIT_EXPECT_EQ(test, filter_count, 2);
 
-	parsed_filters = kunit_kcalloc(test, filter_count + 1, sizeof(*parsed_filters),
+	parsed_filters = kunit_kcalloc(test, filter_count, sizeof(*parsed_filters),
 			GFP_KERNEL);
 	for (j = 0; j < filter_count; j++) {
 		parsed_filters[j] = kunit_next_attr_filter(&filters, &err);

From dd1df82ce27a05c7cf0bc5387978412e35fbdaaf Mon Sep 17 00:00:00 2001
From: Francesco Dolcini <francesco.dolcini@toradex.com>
Date: Sat, 29 Jul 2023 00:26:39 +0200
Subject: [PATCH 0765/2047] clk: keystone: syscon-clk: Fix audio refclk

Audio REFCLK's are not working correctly, trying to use them lead to the
following errors:

[    6.575277] of_clk_hw_onecell_get: invalid index 4294934528
[    6.581515] wm8904 1-001a: Failed to get MCLK
[    6.586290] wm8904: probe of 1-001a failed with error -2

The issue is that Audio REFCLK has #clock-cells = 0 [1], while the driver
is registering those clocks assuming they have one cells. Fix this by
registering the clock with of_clk_hw_simple_get() when there is only one
instance, e.g. "audio_refclk".

[1] Documentation/devicetree/bindings/clock/ti,am62-audio-refclk.yaml

Fixes: 6acab96ee337 ("clk: keystone: syscon-clk: Add support for audio refclk")
Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
Link: https://lore.kernel.org/r/20230728222639.110409-1-francesco@dolcini.it
[sboyd@kernel.org: Simplify if-return-else logic]
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/keystone/syscon-clk.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/keystone/syscon-clk.c b/drivers/clk/keystone/syscon-clk.c
index d33f74119488..935d9a2d8c2b 100644
--- a/drivers/clk/keystone/syscon-clk.c
+++ b/drivers/clk/keystone/syscon-clk.c
@@ -151,8 +151,10 @@ static int ti_syscon_gate_clk_probe(struct platform_device *pdev)
 				 data[i].name);
 	}
 
-	return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
-					   hw_data);
+	if (num_clks == 1)
+		return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get,
+						   hw_data->hws[0]);
+	return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, hw_data);
 }
 
 #define TI_SYSCON_CLK_GATE(_name, _offset, _bit_idx)	\

From 2746f13f6f1df7999001d6595b16f789ecc28ad1 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Tue, 25 Jul 2023 18:51:40 +0100
Subject: [PATCH 0766/2047] clk: Fix undefined reference to
 `clk_rate_exclusive_{get,put}'

The COMMON_CLK config is not enabled in some of the architectures.
This causes below build issues:

pwm-rz-mtu3.c:(.text+0x114):
undefined reference to `clk_rate_exclusive_put'
pwm-rz-mtu3.c:(.text+0x32c):
undefined reference to `clk_rate_exclusive_get'

Fix these issues by moving clk_rate_exclusive_{get,put} inside COMMON_CLK
code block, as clk.c is enabled by COMMON_CLK.

Fixes: 55e9b8b7b806 ("clk: add clk_rate_exclusive api")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/all/202307251752.vLfmmhYm-lkp@intel.com/
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://lore.kernel.org/r/20230725175140.361479-1-biju.das.jz@bp.renesas.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk.h | 80 ++++++++++++++++++++++-----------------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/include/linux/clk.h b/include/linux/clk.h
index 1ef013324237..06f1b292f8a0 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -183,6 +183,39 @@ int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale);
  */
 bool clk_is_match(const struct clk *p, const struct clk *q);
 
+/**
+ * clk_rate_exclusive_get - get exclusivity over the rate control of a
+ *                          producer
+ * @clk: clock source
+ *
+ * This function allows drivers to get exclusive control over the rate of a
+ * provider. It prevents any other consumer to execute, even indirectly,
+ * opereation which could alter the rate of the provider or cause glitches
+ *
+ * If exlusivity is claimed more than once on clock, even by the same driver,
+ * the rate effectively gets locked as exclusivity can't be preempted.
+ *
+ * Must not be called from within atomic context.
+ *
+ * Returns success (0) or negative errno.
+ */
+int clk_rate_exclusive_get(struct clk *clk);
+
+/**
+ * clk_rate_exclusive_put - release exclusivity over the rate control of a
+ *                          producer
+ * @clk: clock source
+ *
+ * This function allows drivers to release the exclusivity it previously got
+ * from clk_rate_exclusive_get()
+ *
+ * The caller must balance the number of clk_rate_exclusive_get() and
+ * clk_rate_exclusive_put() calls.
+ *
+ * Must not be called from within atomic context.
+ */
+void clk_rate_exclusive_put(struct clk *clk);
+
 #else
 
 static inline int clk_notifier_register(struct clk *clk,
@@ -236,6 +269,13 @@ static inline bool clk_is_match(const struct clk *p, const struct clk *q)
 	return p == q;
 }
 
+static inline int clk_rate_exclusive_get(struct clk *clk)
+{
+	return 0;
+}
+
+static inline void clk_rate_exclusive_put(struct clk *clk) {}
+
 #endif
 
 #ifdef CONFIG_HAVE_CLK_PREPARE
@@ -583,38 +623,6 @@ struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id);
  */
 struct clk *devm_get_clk_from_child(struct device *dev,
 				    struct device_node *np, const char *con_id);
-/**
- * clk_rate_exclusive_get - get exclusivity over the rate control of a
- *                          producer
- * @clk: clock source
- *
- * This function allows drivers to get exclusive control over the rate of a
- * provider. It prevents any other consumer to execute, even indirectly,
- * opereation which could alter the rate of the provider or cause glitches
- *
- * If exlusivity is claimed more than once on clock, even by the same driver,
- * the rate effectively gets locked as exclusivity can't be preempted.
- *
- * Must not be called from within atomic context.
- *
- * Returns success (0) or negative errno.
- */
-int clk_rate_exclusive_get(struct clk *clk);
-
-/**
- * clk_rate_exclusive_put - release exclusivity over the rate control of a
- *                          producer
- * @clk: clock source
- *
- * This function allows drivers to release the exclusivity it previously got
- * from clk_rate_exclusive_get()
- *
- * The caller must balance the number of clk_rate_exclusive_get() and
- * clk_rate_exclusive_put() calls.
- *
- * Must not be called from within atomic context.
- */
-void clk_rate_exclusive_put(struct clk *clk);
 
 /**
  * clk_enable - inform the system when the clock source should be running.
@@ -974,14 +982,6 @@ static inline void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks) {}
 
 static inline void devm_clk_put(struct device *dev, struct clk *clk) {}
 
-
-static inline int clk_rate_exclusive_get(struct clk *clk)
-{
-	return 0;
-}
-
-static inline void clk_rate_exclusive_put(struct clk *clk) {}
-
 static inline int clk_enable(struct clk *clk)
 {
 	return 0;

From b9a281f1f72b9786a4cb04b4105c121f3333aabf Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 4 Aug 2023 08:48:44 -0600
Subject: [PATCH 0767/2047] hwrng: xgene: Add explicit io.h include

Commit 0788257aeebe ("hwrng: Explicitly include correct DT includes")
removed an implicit include of io.h. On most architectures, there's
still an implicit include of it, but not on s390. Enabling COMPILE_TEST
in commit 1ce1cd8208ad ("hwrng: Enable COMPILE_TEST for more drivers")
exposed this.

Fixes: 0788257aeebe ("hwrng: Explicitly include correct DT includes")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202308042049.8R2tNRoo-lkp@intel.com/
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/xgene-rng.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/char/hw_random/xgene-rng.c b/drivers/char/hw_random/xgene-rng.c
index c25bb169563d..99f4e86ac3e9 100644
--- a/drivers/char/hw_random/xgene-rng.c
+++ b/drivers/char/hw_random/xgene-rng.c
@@ -14,6 +14,7 @@
 #include <linux/hw_random.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>

From 71214379532794b5a05ea760524cdfb1c4ddbfcb Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Sat, 5 Aug 2023 12:38:02 +0500
Subject: [PATCH 0768/2047] selftests: fchmodat2: remove duplicate unneeded
 defines

These duplicate defines should automatically be picked up from kernel
headers. Use KHDR_INCLUDES to add kernel header files.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Message-Id: <20230805073809.1753462-4-usama.anjum@collabora.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 tools/testing/selftests/fchmodat2/Makefile    |  2 +-
 .../selftests/fchmodat2/fchmodat2_test.c      | 20 -------------------
 2 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile
index 45b519eab851..20839f8e43f2 100644
--- a/tools/testing/selftests/fchmodat2/Makefile
+++ b/tools/testing/selftests/fchmodat2/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-or-later
 
-CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES)
 TEST_GEN_PROGS := fchmodat2_test
 
 include ../lib.mk
diff --git a/tools/testing/selftests/fchmodat2/fchmodat2_test.c b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
index 2d98eb215bc6..e0319417124d 100644
--- a/tools/testing/selftests/fchmodat2/fchmodat2_test.c
+++ b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
@@ -9,26 +9,6 @@
 
 #include "../kselftest.h"
 
-#ifndef __NR_fchmodat2
-	#if defined __alpha__
-		#define __NR_fchmodat2 562
-	#elif defined _MIPS_SIM
-		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
-			#define __NR_fchmodat2 (452 + 4000)
-		#endif
-		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
-			#define __NR_fchmodat2 (452 + 6000)
-		#endif
-		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
-			#define __NR_fchmodat2 (452 + 5000)
-		#endif
-	#elif defined __ia64__
-		#define __NR_fchmodat2 (452 + 1024)
-	#else
-		#define __NR_fchmodat2 452
-	#endif
-#endif
-
 int sys_fchmodat2(int dfd, const char *filename, mode_t mode, int flags)
 {
 	int ret = syscall(__NR_fchmodat2, dfd, filename, mode, flags);

From 5ef956634bc894ab1145bf7cc8e500063fda614d Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Mon, 19 Jun 2023 14:56:22 +0800
Subject: [PATCH 0769/2047] selftests/nolibc: add a standalone test report
 macro

The run-user, run and rerun targets use the same test report script,
let's add a standalone test report macro for them.

This shrinks code lines and simplify the future maintainability.

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/ZIB792FtG6ibOudp@1wt.eu/
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 1b7b3c82f8ad..262a9f21d1b4 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -84,6 +84,10 @@ CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 \
 		$(CFLAGS_$(ARCH)) $(CFLAGS_STACKPROTECTOR)
 LDFLAGS := -s
 
+REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++} /\[SKIPPED\][\r]*$$/{s++} \
+		END{ printf("%d test(s) passed, %d skipped, %d failed.", p, s, f); \
+		if (s+f > 0) printf(" See all results in %s\n", ARGV[1]); else print; }'
+
 help:
 	@echo "Supported targets under selftests/nolibc:"
 	@echo "  all          call the \"run\" target below"
@@ -131,10 +135,7 @@ libc-test: nolibc-test.c
 # qemu user-land test
 run-user: nolibc-test
 	$(Q)qemu-$(QEMU_ARCH) ./nolibc-test > "$(CURDIR)/run.out" || :
-	$(Q)awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++} /\[SKIPPED\][\r]*$$/{s++} \
-	         END{ printf("%d test(s) passed, %d skipped, %d failed.", p, s, f); \
-	         if (s+f > 0) printf(" See all results in %s\n", ARGV[1]); else print; }' \
-	         $(CURDIR)/run.out
+	$(Q)$(REPORT) $(CURDIR)/run.out
 
 initramfs: nolibc-test
 	$(QUIET_MKDIR)mkdir -p initramfs
@@ -150,18 +151,12 @@ kernel: initramfs
 # run the tests after building the kernel
 run: kernel
 	$(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
-	$(Q)awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++} /\[SKIPPED\][\r]*$$/{s++} \
-	         END{ printf("%d test(s) passed, %d skipped, %d failed.", p, s, f); \
-	         if (s+f > 0) printf(" See all results in %s\n", ARGV[1]); else print; }' \
-	         $(CURDIR)/run.out
+	$(Q)$(REPORT) $(CURDIR)/run.out
 
 # re-run the tests from an existing kernel
 rerun:
 	$(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
-	$(Q)awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++} /\[SKIPPED\][\r]*$$/{s++} \
-	         END{ printf("%d test(s) passed, %d skipped, %d failed.", p, s, f); \
-	         if (s+f > 0) printf(" See all results in %s\n", ARGV[1]); else print; }' \
-	         $(CURDIR)/run.out
+	$(Q)$(REPORT) $(CURDIR)/run.out
 
 clean:
 	$(call QUIET_CLEAN, sysroot)

From b3389e48bf3199607a96240487c78a1fdd4b3e61 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Mon, 19 Jun 2023 14:57:34 +0800
Subject: [PATCH 0770/2047] selftests/nolibc: always print the path to test log
 file

Even when there is no failure, developers may be still interested in the
test log file, especially, string alignment, duplicated print, kernel
message and so forth, so, always print the path to test log file.

A new line is added for such a print to avoid annoying people who don't
care about it when the test pass completely.

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/ZIB792FtG6ibOudp@1wt.eu/
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 262a9f21d1b4..2a0c3f4fa204 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -85,8 +85,8 @@ CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 \
 LDFLAGS := -s
 
 REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++} /\[SKIPPED\][\r]*$$/{s++} \
-		END{ printf("%d test(s) passed, %d skipped, %d failed.", p, s, f); \
-		if (s+f > 0) printf(" See all results in %s\n", ARGV[1]); else print; }'
+		END{ printf("%d test(s) passed, %d skipped, %d failed.\n", p, s, f); \
+		printf("See all results in %s\n", ARGV[1]); }'
 
 help:
 	@echo "Supported targets under selftests/nolibc:"

From 5163b8d31eae909f17184cd020ef5785b36c714e Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Mon, 19 Jun 2023 15:01:43 +0800
Subject: [PATCH 0771/2047] selftests/nolibc: restore the failed tests print

The commit fa0df56a804b ("selftests/nolibc: also count skipped and
failed tests in output") added counting for the skipped and failed
tests, but also removed the 'FAIL' results print, let's restore it for
it really allow users to learn the failed details without opening the
log file.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 2a0c3f4fa204..000621f21adc 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -84,7 +84,7 @@ CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 \
 		$(CFLAGS_$(ARCH)) $(CFLAGS_STACKPROTECTOR)
 LDFLAGS := -s
 
-REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++} /\[SKIPPED\][\r]*$$/{s++} \
+REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++;print} /\[SKIPPED\][\r]*$$/{s++} \
 		END{ printf("%d test(s) passed, %d skipped, %d failed.\n", p, s, f); \
 		printf("See all results in %s\n", ARGV[1]); }'
 

From ca50df3098939578d577477a9464a46a2e9fea66 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Mon, 19 Jun 2023 20:24:15 +0800
Subject: [PATCH 0772/2047] tools/nolibc: fix up #error compile failures with
 -ENOSYS

Compiling nolibc for rv32 got such errors:

    In file included from nolibc/sysroot/riscv/include/nolibc.h:99,
                     from nolibc/sysroot/riscv/include/errno.h:26,
                     from nolibc/sysroot/riscv/include/stdio.h:14,
                     from tools/testing/selftests/nolibc/nolibc-test.c:12:
    nolibc/sysroot/riscv/include/sys.h:946:2: error: #error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
      946 | #error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
          |  ^~~~~
    nolibc/sysroot/riscv/include/sys.h:1062:2: error: #error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()
     1062 | #error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()

If a syscall is not supported by a target platform, 'return -ENOSYS' is
better than '#error', which lets the other syscalls work as-is and
allows developers to fix up the test failures reported by nolibc-test
one by one later.

This converts all of the '#error' to 'return -ENOSYS', so, all of the
'#error' failures are fixed.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/linux-riscv/5e7d2adf-e96f-41ca-a4c6-5c87a25d4c9c@app.fastmail.com/
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 856249a11890..78c86f124335 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -124,7 +124,7 @@ int sys_chmod(const char *path, mode_t mode)
 #elif defined(__NR_chmod)
 	return my_syscall2(__NR_chmod, path, mode);
 #else
-#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod()
+	return -ENOSYS;
 #endif
 }
 
@@ -153,7 +153,7 @@ int sys_chown(const char *path, uid_t owner, gid_t group)
 #elif defined(__NR_chown)
 	return my_syscall3(__NR_chown, path, owner, group);
 #else
-#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown()
+	return -ENOSYS;
 #endif
 }
 
@@ -251,7 +251,7 @@ int sys_dup2(int old, int new)
 #elif defined(__NR_dup2)
 	return my_syscall2(__NR_dup2, old, new);
 #else
-#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2()
+	return -ENOSYS;
 #endif
 }
 
@@ -351,7 +351,7 @@ pid_t sys_fork(void)
 #elif defined(__NR_fork)
 	return my_syscall0(__NR_fork);
 #else
-#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork()
+	return -ENOSYS;
 #endif
 }
 #endif
@@ -648,7 +648,7 @@ int sys_link(const char *old, const char *new)
 #elif defined(__NR_link)
 	return my_syscall2(__NR_link, old, new);
 #else
-#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link()
+	return -ENOSYS;
 #endif
 }
 
@@ -700,7 +700,7 @@ int sys_mkdir(const char *path, mode_t mode)
 #elif defined(__NR_mkdir)
 	return my_syscall2(__NR_mkdir, path, mode);
 #else
-#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir()
+	return -ENOSYS;
 #endif
 }
 
@@ -729,7 +729,7 @@ long sys_mknod(const char *path, mode_t mode, dev_t dev)
 #elif defined(__NR_mknod)
 	return my_syscall3(__NR_mknod, path, mode, dev);
 #else
-#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod()
+	return -ENOSYS;
 #endif
 }
 
@@ -848,7 +848,7 @@ int sys_open(const char *path, int flags, mode_t mode)
 #elif defined(__NR_open)
 	return my_syscall3(__NR_open, path, flags, mode);
 #else
-#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open()
+	return -ENOSYS;
 #endif
 }
 
@@ -943,7 +943,7 @@ int sys_poll(struct pollfd *fds, int nfds, int timeout)
 #elif defined(__NR_poll)
 	return my_syscall3(__NR_poll, fds, nfds, timeout);
 #else
-#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
+	return -ENOSYS;
 #endif
 }
 
@@ -1059,7 +1059,7 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva
 #endif
 	return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
 #else
-#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()
+	return -ENOSYS;
 #endif
 }
 
@@ -1196,7 +1196,7 @@ int sys_stat(const char *path, struct stat *buf)
 #elif defined(__NR_stat)
 	ret = my_syscall2(__NR_stat, path, &stat);
 #else
-#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat()
+	return -ENOSYS;
 #endif
 	buf->st_dev          = stat.st_dev;
 	buf->st_ino          = stat.st_ino;
@@ -1243,7 +1243,7 @@ int sys_symlink(const char *old, const char *new)
 #elif defined(__NR_symlink)
 	return my_syscall2(__NR_symlink, old, new);
 #else
-#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink()
+	return -ENOSYS;
 #endif
 }
 
@@ -1312,7 +1312,7 @@ int sys_unlink(const char *path)
 #elif defined(__NR_unlink)
 	return my_syscall1(__NR_unlink, path);
 #else
-#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink()
+	return -ENOSYS;
 #endif
 }
 

From 2f98aca8aaef0fca36adff5c478af1788997debd Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Mon, 19 Jun 2023 20:25:47 +0800
Subject: [PATCH 0773/2047] tools/nolibc: fix up undeclared syscall macros with
 #ifdef and -ENOSYS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Compiling nolibc for rv32 got such errors:

    nolibc/sysroot/riscv/include/sys.h: In function ‘sys_gettimeofday’:
    nolibc/sysroot/riscv/include/sys.h:557:21: error: ‘__NR_gettimeofday’ undeclared (first use in this function); did you mean ‘sys_gettimeofday’?
      557 |  return my_syscall2(__NR_gettimeofday, tv, tz);
          |                     ^~~~~~~~~~~~~~~~~
    nolibc/sysroot/riscv/include/sys.h: In function ‘sys_lseek’:
    nolibc/sysroot/riscv/include/sys.h:675:21: error: ‘__NR_lseek’ undeclared (first use in this function)
      675 |  return my_syscall3(__NR_lseek, fd, offset, whence);
          |                     ^~~~~~~~~~
    nolibc/sysroot/riscv/include/sys.h: In function ‘sys_wait4’:
    nolibc/sysroot/riscv/include/sys.h:1341:21: error: ‘__NR_wait4’ undeclared (first use in this function)
     1341 |  return my_syscall4(__NR_wait4, pid, status, options, rusage);

If a syscall macro is not supported by a target platform, wrap it with
'#ifdef' and 'return -ENOSYS' for the '#else' branch, which lets the
other syscalls work as-is and allows developers to fix up the test
failures reported by nolibc-test one by one later.

This wraps all of the failed syscall macros with '#ifdef' and 'return
-ENOSYS' for the '#else' branch, so, all of the undeclared failures are
fixed.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/linux-riscv/5e7d2adf-e96f-41ca-a4c6-5c87a25d4c9c@app.fastmail.com/
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 78c86f124335..5464f93e863e 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -554,7 +554,11 @@ long getpagesize(void)
 static __attribute__((unused))
 int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
+#ifdef __NR_gettimeofday
 	return my_syscall2(__NR_gettimeofday, tv, tz);
+#else
+	return -ENOSYS;
+#endif
 }
 
 static __attribute__((unused))
@@ -672,7 +676,11 @@ int link(const char *old, const char *new)
 static __attribute__((unused))
 off_t sys_lseek(int fd, off_t offset, int whence)
 {
+#ifdef __NR_lseek
 	return my_syscall3(__NR_lseek, fd, offset, whence);
+#else
+	return -ENOSYS;
+#endif
 }
 
 static __attribute__((unused))
@@ -1338,7 +1346,11 @@ int unlink(const char *path)
 static __attribute__((unused))
 pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
 {
+#ifdef __NR_wait4
 	return my_syscall4(__NR_wait4, pid, status, options, rusage);
+#else
+	return -ENOSYS;
+#endif
 }
 
 static __attribute__((unused))

From 428905da6ec4a02d08dfcc09013035594f666a31 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Wed, 28 Jun 2023 21:08:32 +0800
Subject: [PATCH 0774/2047] tools/nolibc: sys.h: add a syscall return helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most of the library routines share the same syscall return logic:

  In general, a 0 return value indicates success.  A -1 return value
  indicates an error, and an error number is stored in errno. [1]

Let's add a __sysret() helper for the above logic to simplify the coding
and shrink the code lines too.

Thomas suggested to use inline function instead of macro for __sysret().

Willy suggested to make __sysret() be always inline.

[1]: https://man7.org/linux/man-pages/man2/syscall.2.html

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/linux-riscv/ZH1+hkhiA2+ItSvX@1wt.eu/
Suggested-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/linux-riscv/ea4e7442-7223-4211-ba29-70821e907888@t-8ch.de/
Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 5464f93e863e..097eef88cf7e 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -28,6 +28,16 @@
 #include "errno.h"
 #include "types.h"
 
+/* Syscall return helper, set errno as -ret when ret < 0 */
+static __inline__ __attribute__((unused, always_inline))
+long __sysret(long ret)
+{
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
 
 /* Functions in this file only describe syscalls. They're declared static so
  * that the compiler usually decides to inline them while still being allowed

From c8d54fa37c5390cfbb63f63900a2466469eafb3a Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Wed, 28 Jun 2023 21:11:44 +0800
Subject: [PATCH 0775/2047] tools/nolibc: unistd.h: apply __sysret() helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use __sysret() to shrink the whole _syscall() to oneline code.

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/unistd.h | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
index 0e832e10a0b2..fabc846f797b 100644
--- a/tools/include/nolibc/unistd.h
+++ b/tools/include/nolibc/unistd.h
@@ -56,16 +56,7 @@ int tcsetpgrp(int fd, pid_t pid)
 	return ioctl(fd, TIOCSPGRP, &pid);
 }
 
-#define _syscall(N, ...)                                                      \
-({                                                                            \
-	long _ret = my_syscall##N(__VA_ARGS__);                               \
-	if (_ret < 0) {                                                       \
-		SET_ERRNO(-_ret);                                             \
-		_ret = -1;                                                    \
-	}                                                                     \
-	_ret;                                                                 \
-})
-
+#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__))
 #define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0)
 #define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N
 #define _syscall_n(N, ...) _syscall(N, __VA_ARGS__)

From d27447bc2e0a0c120fb84c33e46885a14a9216eb Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Wed, 28 Jun 2023 21:13:25 +0800
Subject: [PATCH 0776/2047] tools/nolibc: sys.h: apply __sysret() helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use __sysret() to shrink most of the library routines to oneline code.

Removed 266 lines of duplicated code.

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h | 354 +++++--------------------------------
 1 file changed, 44 insertions(+), 310 deletions(-)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 097eef88cf7e..53bc3ad6593e 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -76,13 +76,7 @@ void *sys_brk(void *addr)
 static __attribute__((unused))
 int brk(void *addr)
 {
-	void *ret = sys_brk(addr);
-
-	if (!ret) {
-		SET_ERRNO(ENOMEM);
-		return -1;
-	}
-	return 0;
+	return __sysret(sys_brk(addr) ? 0 : -ENOMEM);
 }
 
 static __attribute__((unused))
@@ -112,13 +106,7 @@ int sys_chdir(const char *path)
 static __attribute__((unused))
 int chdir(const char *path)
 {
-	int ret = sys_chdir(path);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_chdir(path));
 }
 
 
@@ -141,13 +129,7 @@ int sys_chmod(const char *path, mode_t mode)
 static __attribute__((unused))
 int chmod(const char *path, mode_t mode)
 {
-	int ret = sys_chmod(path, mode);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_chmod(path, mode));
 }
 
 
@@ -170,13 +152,7 @@ int sys_chown(const char *path, uid_t owner, gid_t group)
 static __attribute__((unused))
 int chown(const char *path, uid_t owner, gid_t group)
 {
-	int ret = sys_chown(path, owner, group);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_chown(path, owner, group));
 }
 
 
@@ -193,13 +169,7 @@ int sys_chroot(const char *path)
 static __attribute__((unused))
 int chroot(const char *path)
 {
-	int ret = sys_chroot(path);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_chroot(path));
 }
 
 
@@ -216,13 +186,7 @@ int sys_close(int fd)
 static __attribute__((unused))
 int close(int fd)
 {
-	int ret = sys_close(fd);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_close(fd));
 }
 
 
@@ -239,13 +203,7 @@ int sys_dup(int fd)
 static __attribute__((unused))
 int dup(int fd)
 {
-	int ret = sys_dup(fd);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_dup(fd));
 }
 
 
@@ -268,13 +226,7 @@ int sys_dup2(int old, int new)
 static __attribute__((unused))
 int dup2(int old, int new)
 {
-	int ret = sys_dup2(old, new);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_dup2(old, new));
 }
 
 
@@ -292,13 +244,7 @@ int sys_dup3(int old, int new, int flags)
 static __attribute__((unused))
 int dup3(int old, int new, int flags)
 {
-	int ret = sys_dup3(old, new, flags);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_dup3(old, new, flags));
 }
 #endif
 
@@ -316,13 +262,7 @@ int sys_execve(const char *filename, char *const argv[], char *const envp[])
 static __attribute__((unused))
 int execve(const char *filename, char *const argv[], char *const envp[])
 {
-	int ret = sys_execve(filename, argv, envp);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_execve(filename, argv, envp));
 }
 
 
@@ -369,13 +309,7 @@ pid_t sys_fork(void)
 static __attribute__((unused))
 pid_t fork(void)
 {
-	pid_t ret = sys_fork();
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_fork());
 }
 
 
@@ -392,13 +326,7 @@ int sys_fsync(int fd)
 static __attribute__((unused))
 int fsync(int fd)
 {
-	int ret = sys_fsync(fd);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_fsync(fd));
 }
 
 
@@ -415,13 +343,7 @@ int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
 static __attribute__((unused))
 int getdents64(int fd, struct linux_dirent64 *dirp, int count)
 {
-	int ret = sys_getdents64(fd, dirp, count);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_getdents64(fd, dirp, count));
 }
 
 
@@ -459,13 +381,7 @@ pid_t sys_getpgid(pid_t pid)
 static __attribute__((unused))
 pid_t getpgid(pid_t pid)
 {
-	pid_t ret = sys_getpgid(pid);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_getpgid(pid));
 }
 
 
@@ -545,15 +461,7 @@ static unsigned long getauxval(unsigned long key);
 static __attribute__((unused))
 long getpagesize(void)
 {
-	long ret;
-
-	ret = getauxval(AT_PAGESZ);
-	if (!ret) {
-		SET_ERRNO(ENOENT);
-		return -1;
-	}
-
-	return ret;
+	return __sysret(getauxval(AT_PAGESZ) ?: -ENOENT);
 }
 
 
@@ -574,13 +482,7 @@ int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
 static __attribute__((unused))
 int gettimeofday(struct timeval *tv, struct timezone *tz)
 {
-	int ret = sys_gettimeofday(tv, tz);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_gettimeofday(tv, tz));
 }
 
 
@@ -618,13 +520,7 @@ int sys_ioctl(int fd, unsigned long req, void *value)
 static __attribute__((unused))
 int ioctl(int fd, unsigned long req, void *value)
 {
-	int ret = sys_ioctl(fd, req, value);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_ioctl(fd, req, value));
 }
 
 /*
@@ -640,13 +536,7 @@ int sys_kill(pid_t pid, int signal)
 static __attribute__((unused))
 int kill(pid_t pid, int signal)
 {
-	int ret = sys_kill(pid, signal);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_kill(pid, signal));
 }
 
 
@@ -669,13 +559,7 @@ int sys_link(const char *old, const char *new)
 static __attribute__((unused))
 int link(const char *old, const char *new)
 {
-	int ret = sys_link(old, new);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_link(old, new));
 }
 
 
@@ -696,13 +580,7 @@ off_t sys_lseek(int fd, off_t offset, int whence)
 static __attribute__((unused))
 off_t lseek(int fd, off_t offset, int whence)
 {
-	off_t ret = sys_lseek(fd, offset, whence);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_lseek(fd, offset, whence));
 }
 
 
@@ -725,13 +603,7 @@ int sys_mkdir(const char *path, mode_t mode)
 static __attribute__((unused))
 int mkdir(const char *path, mode_t mode)
 {
-	int ret = sys_mkdir(path, mode);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_mkdir(path, mode));
 }
 
 
@@ -754,13 +626,7 @@ long sys_mknod(const char *path, mode_t mode, dev_t dev)
 static __attribute__((unused))
 int mknod(const char *path, mode_t mode, dev_t dev)
 {
-	int ret = sys_mknod(path, mode, dev);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_mknod(path, mode, dev));
 }
 
 #ifndef MAP_SHARED
@@ -818,13 +684,7 @@ int sys_munmap(void *addr, size_t length)
 static __attribute__((unused))
 int munmap(void *addr, size_t length)
 {
-	int ret = sys_munmap(addr, length);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_munmap(addr, length));
 }
 
 /*
@@ -844,13 +704,7 @@ int mount(const char *src, const char *tgt,
           const char *fst, unsigned long flags,
           const void *data)
 {
-	int ret = sys_mount(src, tgt, fst, flags, data);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_mount(src, tgt, fst, flags, data));
 }
 
 
@@ -884,13 +738,7 @@ int open(const char *path, int flags, ...)
 		va_end(args);
 	}
 
-	ret = sys_open(path, flags, mode);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_open(path, flags, mode));
 }
 
 
@@ -910,13 +758,7 @@ static __attribute__((unused))
 int prctl(int option, unsigned long arg2, unsigned long arg3,
 		      unsigned long arg4, unsigned long arg5)
 {
-	int ret = sys_prctl(option, arg2, arg3, arg4, arg5);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5));
 }
 
 
@@ -933,13 +775,7 @@ int sys_pivot_root(const char *new, const char *old)
 static __attribute__((unused))
 int pivot_root(const char *new, const char *old)
 {
-	int ret = sys_pivot_root(new, old);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_pivot_root(new, old));
 }
 
 
@@ -968,13 +804,7 @@ int sys_poll(struct pollfd *fds, int nfds, int timeout)
 static __attribute__((unused))
 int poll(struct pollfd *fds, int nfds, int timeout)
 {
-	int ret = sys_poll(fds, nfds, timeout);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_poll(fds, nfds, timeout));
 }
 
 
@@ -991,13 +821,7 @@ ssize_t sys_read(int fd, void *buf, size_t count)
 static __attribute__((unused))
 ssize_t read(int fd, void *buf, size_t count)
 {
-	ssize_t ret = sys_read(fd, buf, count);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_read(fd, buf, count));
 }
 
 
@@ -1015,13 +839,7 @@ ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
 static __attribute__((unused))
 int reboot(int cmd)
 {
-	int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0));
 }
 
 
@@ -1038,13 +856,7 @@ int sys_sched_yield(void)
 static __attribute__((unused))
 int sched_yield(void)
 {
-	int ret = sys_sched_yield();
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_sched_yield());
 }
 
 
@@ -1084,13 +896,7 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva
 static __attribute__((unused))
 int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
 {
-	int ret = sys_select(nfds, rfds, wfds, efds, timeout);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_select(nfds, rfds, wfds, efds, timeout));
 }
 
 
@@ -1107,13 +913,7 @@ int sys_setpgid(pid_t pid, pid_t pgid)
 static __attribute__((unused))
 int setpgid(pid_t pid, pid_t pgid)
 {
-	int ret = sys_setpgid(pid, pgid);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_setpgid(pid, pgid));
 }
 
 
@@ -1130,13 +930,7 @@ pid_t sys_setsid(void)
 static __attribute__((unused))
 pid_t setsid(void)
 {
-	pid_t ret = sys_setsid();
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_setsid());
 }
 
 #if defined(__NR_statx)
@@ -1153,13 +947,7 @@ int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct sta
 static __attribute__((unused))
 int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
 {
-	int ret = sys_statx(fd, path, flags, mask, buf);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_statx(fd, path, flags, mask, buf));
 }
 #endif
 
@@ -1239,13 +1027,7 @@ int sys_stat(const char *path, struct stat *buf)
 static __attribute__((unused))
 int stat(const char *path, struct stat *buf)
 {
-	int ret = sys_stat(path, buf);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_stat(path, buf));
 }
 
 
@@ -1268,13 +1050,7 @@ int sys_symlink(const char *old, const char *new)
 static __attribute__((unused))
 int symlink(const char *old, const char *new)
 {
-	int ret = sys_symlink(old, new);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_symlink(old, new));
 }
 
 
@@ -1308,13 +1084,7 @@ int sys_umount2(const char *path, int flags)
 static __attribute__((unused))
 int umount2(const char *path, int flags)
 {
-	int ret = sys_umount2(path, flags);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_umount2(path, flags));
 }
 
 
@@ -1337,13 +1107,7 @@ int sys_unlink(const char *path)
 static __attribute__((unused))
 int unlink(const char *path)
 {
-	int ret = sys_unlink(path);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_unlink(path));
 }
 
 
@@ -1366,38 +1130,20 @@ pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
 static __attribute__((unused))
 pid_t wait(int *status)
 {
-	pid_t ret = sys_wait4(-1, status, 0, NULL);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_wait4(-1, status, 0, NULL));
 }
 
 static __attribute__((unused))
 pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
 {
-	pid_t ret = sys_wait4(pid, status, options, rusage);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_wait4(pid, status, options, rusage));
 }
 
 
 static __attribute__((unused))
 pid_t waitpid(pid_t pid, int *status, int options)
 {
-	pid_t ret = sys_wait4(pid, status, options, NULL);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_wait4(pid, status, options, NULL));
 }
 
 
@@ -1414,13 +1160,7 @@ ssize_t sys_write(int fd, const void *buf, size_t count)
 static __attribute__((unused))
 ssize_t write(int fd, const void *buf, size_t count)
 {
-	ssize_t ret = sys_write(fd, buf, count);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_write(fd, buf, count));
 }
 
 
@@ -1437,13 +1177,7 @@ int sys_memfd_create(const char *name, unsigned int flags)
 static __attribute__((unused))
 int memfd_create(const char *name, unsigned int flags)
 {
-	ssize_t ret = sys_memfd_create(name, flags);
-
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
-	}
-	return ret;
+	return __sysret(sys_memfd_create(name, flags));
 }
 
 /* make sure to include all global symbols */

From 2d7481eb5d885ffdc1b9d948151e983341e45c38 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Wed, 28 Jun 2023 21:14:53 +0800
Subject: [PATCH 0777/2047] tools/nolibc: unistd.h: reorder the syscall macros

Tune the macros in the using order and align most of them.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/unistd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
index fabc846f797b..e38f3660c051 100644
--- a/tools/include/nolibc/unistd.h
+++ b/tools/include/nolibc/unistd.h
@@ -56,9 +56,9 @@ int tcsetpgrp(int fd, pid_t pid)
 	return ioctl(fd, TIOCSPGRP, &pid);
 }
 
-#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__))
-#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0)
 #define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N
+#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0)
+#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__))
 #define _syscall_n(N, ...) _syscall(N, __VA_ARGS__)
 #define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__)
 

From 5008b864b6a68ce993bc0d67258c697d1eee1f92 Mon Sep 17 00:00:00 2001
From: Tzung-Bi Shih <tzungbi@kernel.org>
Date: Thu, 3 Aug 2023 18:08:46 +0800
Subject: [PATCH 0778/2047] MAINTAINERS: update maintainers of chrome-platform

Add Tzung-Bi as a maintainer of chrome-platform.

Reviewed-by: Guenter Roeck <groeck@chromium.org>
Link: https://lore.kernel.org/r/20230803100846.3223564-1-tzungbi@kernel.org
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3be1bdfe8ecc..76299dc9a04c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4812,6 +4812,7 @@ F:	drivers/input/touchscreen/chipone_icn8505.c
 
 CHROME HARDWARE PLATFORM SUPPORT
 M:	Benson Leung <bleung@chromium.org>
+M:	Tzung-Bi Shih <tzungbi@kernel.org>
 L:	chrome-platform@lists.linux.dev
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/chrome-platform/linux.git

From f2d4dced9a584612b25adb559c1350243d2bb544 Mon Sep 17 00:00:00 2001
From: Rob Barnes <robbarnes@google.com>
Date: Wed, 2 Aug 2023 17:58:48 +0000
Subject: [PATCH 0779/2047] platform/chrome: cros_ec_lpc: Remove EC panic
 shutdown timeout

Remove the 1 second timeout applied to hw_protection_shutdown after an
EC panic. On some platforms this 1 second timeout is insufficient to
allow the filesystem to fully sync. Independently the EC will force a
full system reset after a short period. So this backup timeout is
unnecessary.

Signed-off-by: Rob Barnes <robbarnes@google.com>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Link: https://lore.kernel.org/r/20230802175847.1.Ie9fc53b6a1f4c6661c5376286a50e0cf51b3e961@changeid
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 drivers/platform/chrome/cros_ec_lpc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 500a61b093e4..356572452898 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -327,8 +327,8 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
 		dev_emerg(ec_dev->dev, "CrOS EC Panic Reported. Shutdown is imminent!");
 		blocking_notifier_call_chain(&ec_dev->panic_notifier, 0, ec_dev);
 		kobject_uevent_env(&ec_dev->dev->kobj, KOBJ_CHANGE, (char **)env);
-		/* Begin orderly shutdown. Force shutdown after 1 second. */
-		hw_protection_shutdown("CrOS EC Panic", 1000);
+		/* Begin orderly shutdown. EC will force reset after a short period. */
+		hw_protection_shutdown("CrOS EC Panic", -1);
 		/* Do not query for other events after a panic is reported */
 		return;
 	}

From 703e77134ebf7d99acbd0f85f7c94cfb408f4bd0 Mon Sep 17 00:00:00 2001
From: Tzung-Bi Shih <tzungbi@kernel.org>
Date: Mon, 31 Jul 2023 10:42:14 +0800
Subject: [PATCH 0780/2047] platform/chrome: chromeos_acpi: support official
 HID GOOG0016

Support official HID GOOG0016 for ChromeOS ACPI (see [1]).

[1]: https://crrev.com/c/2266713

Reviewed-by: Simon Glass <sjg@chromium.org>
Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Link: https://lore.kernel.org/r/20230731024214.908235-1-tzungbi@kernel.org
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 .../ABI/testing/sysfs-driver-chromeos-acpi        | 15 +++++++++++++++
 .../firmware-guide/acpi/chromeos-acpi-device.rst  |  5 ++---
 drivers/platform/chrome/chromeos_acpi.c           |  2 +-
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-chromeos-acpi b/Documentation/ABI/testing/sysfs-driver-chromeos-acpi
index c308926e1568..22082f9a7922 100644
--- a/Documentation/ABI/testing/sysfs-driver-chromeos-acpi
+++ b/Documentation/ABI/testing/sysfs-driver-chromeos-acpi
@@ -1,4 +1,5 @@
 What:		/sys/bus/platform/devices/GGL0001:*/BINF.2
+		/sys/bus/platform/devices/GOOG0016:*/BINF.2
 Date:		May 2022
 KernelVersion:	5.19
 Description:
@@ -10,6 +11,7 @@ Description:
 		== ===============================
 
 What:		/sys/bus/platform/devices/GGL0001:*/BINF.3
+		/sys/bus/platform/devices/GOOG0016:*/BINF.3
 Date:		May 2022
 KernelVersion:	5.19
 Description:
@@ -23,6 +25,7 @@ Description:
 		== =====================================
 
 What:		/sys/bus/platform/devices/GGL0001:*/CHSW
+		/sys/bus/platform/devices/GOOG0016:*/CHSW
 Date:		May 2022
 KernelVersion:	5.19
 Description:
@@ -38,6 +41,7 @@ Description:
 		==== ===========================================
 
 What:		/sys/bus/platform/devices/GGL0001:*/FMAP
+		/sys/bus/platform/devices/GOOG0016:*/FMAP
 Date:		May 2022
 KernelVersion:	5.19
 Description:
@@ -45,6 +49,7 @@ Description:
 		processor firmware flashmap.
 
 What:		/sys/bus/platform/devices/GGL0001:*/FRID
+		/sys/bus/platform/devices/GOOG0016:*/FRID
 Date:		May 2022
 KernelVersion:	5.19
 Description:
@@ -52,6 +57,7 @@ Description:
 		main processor firmware.
 
 What:		/sys/bus/platform/devices/GGL0001:*/FWID
+		/sys/bus/platform/devices/GOOG0016:*/FWID
 Date:		May 2022
 KernelVersion:	5.19
 Description:
@@ -59,6 +65,7 @@ Description:
 		main processor firmware.
 
 What:		/sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.0
+		/sys/bus/platform/devices/GOOG0016:*/GPIO.X/GPIO.0
 Date:		May 2022
 KernelVersion:	5.19
 Description:
@@ -73,6 +80,7 @@ Description:
 		=========== ==================================
 
 What:		/sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.1
+		/sys/bus/platform/devices/GOOG0016:*/GPIO.X/GPIO.1
 Date:		May 2022
 KernelVersion:	5.19
 Description:
@@ -84,6 +92,7 @@ Description:
 		== =======================
 
 What:		/sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.2
+		/sys/bus/platform/devices/GOOG0016:*/GPIO.X/GPIO.2
 Date:		May 2022
 KernelVersion:	5.19
 Description:
@@ -91,18 +100,21 @@ Description:
 		controller.
 
 What:		/sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.3
+		/sys/bus/platform/devices/GOOG0016:*/GPIO.X/GPIO.3
 Date:		May 2022
 KernelVersion:	5.19
 Description:
 		Returns name of the GPIO controller.
 
 What:		/sys/bus/platform/devices/GGL0001:*/HWID
+		/sys/bus/platform/devices/GOOG0016:*/HWID
 Date:		May 2022
 KernelVersion:	5.19
 Description:
 		Returns hardware ID for the Chromebook.
 
 What:		/sys/bus/platform/devices/GGL0001:*/MECK
+		/sys/bus/platform/devices/GOOG0016:*/MECK
 Date:		May 2022
 KernelVersion:	5.19
 Description:
@@ -113,6 +125,7 @@ Description:
 		present, or if the firmware was unable to read the extended registers, this buffer size can be zero.
 
 What:		/sys/bus/platform/devices/GGL0001:*/VBNV.0
+		/sys/bus/platform/devices/GOOG0016:*/VBNV.0
 Date:		May 2022
 KernelVersion:	5.19
 Description:
@@ -122,6 +135,7 @@ Description:
 		clock data).
 
 What:		/sys/bus/platform/devices/GGL0001:*/VBNV.1
+		/sys/bus/platform/devices/GOOG0016:*/VBNV.1
 Date:		May 2022
 KernelVersion:	5.19
 Description:
@@ -129,6 +143,7 @@ Description:
 		storage block.
 
 What:		/sys/bus/platform/devices/GGL0001:*/VDAT
+		/sys/bus/platform/devices/GOOG0016:*/VDAT
 Date:		May 2022
 KernelVersion:	5.19
 Description:
diff --git a/Documentation/firmware-guide/acpi/chromeos-acpi-device.rst b/Documentation/firmware-guide/acpi/chromeos-acpi-device.rst
index f37fc90ce340..89419e116413 100644
--- a/Documentation/firmware-guide/acpi/chromeos-acpi-device.rst
+++ b/Documentation/firmware-guide/acpi/chromeos-acpi-device.rst
@@ -5,9 +5,8 @@ Chrome OS ACPI Device
 =====================
 
 Hardware functionality specific to Chrome OS is exposed through a Chrome OS ACPI device.
-The plug and play ID of a Chrome OS ACPI device is GGL0001. GGL is a valid PNP ID of Google.
-PNP ID can be used with the ACPI devices according to the guidelines. The following ACPI
-objects are supported:
+The plug and play ID of a Chrome OS ACPI device is GGL0001 and the hardware ID is
+GOOG0016.  The following ACPI objects are supported:
 
 .. flat-table:: Supported ACPI Objects
    :widths: 1 2
diff --git a/drivers/platform/chrome/chromeos_acpi.c b/drivers/platform/chrome/chromeos_acpi.c
index 50d8a4d4352d..1cc01d893ada 100644
--- a/drivers/platform/chrome/chromeos_acpi.c
+++ b/drivers/platform/chrome/chromeos_acpi.c
@@ -235,9 +235,9 @@ static int chromeos_acpi_device_probe(struct platform_device *pdev)
 	return 0;
 }
 
-/* GGL is valid PNP ID of Google. PNP ID can be used with the ACPI devices. */
 static const struct acpi_device_id chromeos_device_ids[] = {
 	{ "GGL0001", 0 },
+	{ "GOOG0016", 0 },
 	{}
 };
 MODULE_DEVICE_TABLE(acpi, chromeos_device_ids);

From db7193a5c9db4babbfc99798f2e636b12419c12b Mon Sep 17 00:00:00 2001
From: Ben Gooding <ben.gooding.dev@gmail.com>
Date: Sun, 7 May 2023 17:27:39 +0100
Subject: [PATCH 0781/2047] rust: lock: Add intra-doc links to the Backend
 trait

Add missing intra-doc links to the Backend trait to make navigating the
documentation easier.

Suggested-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/rust-for-linux/94625fe6-b87a-a8f0-5b2a-a8152d5f7436@proton.me/
Link: https://github.com/Rust-for-Linux/linux/issues/1001
Signed-off-by: Ben Gooding <ben.gooding.dev@gmail.com>
Link: https://lore.kernel.org/r/20230509202314.8248-1-ben.gooding.dev@gmail.com
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/sync/lock.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rust/kernel/sync/lock.rs b/rust/kernel/sync/lock.rs
index a2216325632d..70a785f04754 100644
--- a/rust/kernel/sync/lock.rs
+++ b/rust/kernel/sync/lock.rs
@@ -72,8 +72,8 @@ pub unsafe trait Backend {
 
 /// A mutual exclusion primitive.
 ///
-/// Exposes one of the kernel locking primitives. Which one is exposed depends on the lock backend
-/// specified as the generic parameter `B`.
+/// Exposes one of the kernel locking primitives. Which one is exposed depends on the lock
+/// [`Backend`] specified as the generic parameter `B`.
 #[pin_data]
 pub struct Lock<T: ?Sized, B: Backend> {
     /// The kernel lock object.
@@ -126,7 +126,7 @@ impl<T: ?Sized, B: Backend> Lock<T, B> {
 
 /// A lock guard.
 ///
-/// Allows mutual exclusion primitives that implement the `Backend` trait to automatically unlock
+/// Allows mutual exclusion primitives that implement the [`Backend`] trait to automatically unlock
 /// when a guard goes out of scope. It also provides a safe and convenient way to access the data
 /// protected by the lock.
 #[must_use = "the lock unlocks immediately when the guard is unused"]

From 917b2e00b90f7a0f21ab3f28e7cf165825a7aba7 Mon Sep 17 00:00:00 2001
From: Ariel Miculas <amiculas@cisco.com>
Date: Wed, 26 Apr 2023 23:49:23 +0300
Subject: [PATCH 0782/2047] rust: helpers: sort includes alphabetically in
 rust/helpers.c

Sort the #include directives of rust/helpers.c alphabetically and add a
comment specifying this. The reason for this is to improve readability
and to be consistent with the other files with a similar approach within
'rust/'.

Suggested-by: Miguel Ojeda <ojeda@kernel.org>
Link: https://github.com/Rust-for-Linux/linux/issues/1003
Signed-off-by: Ariel Miculas <amiculas@cisco.com>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Link: https://lore.kernel.org/r/20230426204923.16195-1-amiculas@cisco.com
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/helpers.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/rust/helpers.c b/rust/helpers.c
index bb594da56137..f946f2ea640a 100644
--- a/rust/helpers.c
+++ b/rust/helpers.c
@@ -16,16 +16,18 @@
  *
  * All symbols are exported as GPL-only to guarantee no GPL-only feature is
  * accidentally exposed.
+ *
+ * Sorted alphabetically.
  */
 
 #include <linux/bug.h>
 #include <linux/build_bug.h>
 #include <linux/err.h>
 #include <linux/errname.h>
-#include <linux/refcount.h>
 #include <linux/mutex.h>
-#include <linux/spinlock.h>
+#include <linux/refcount.h>
 #include <linux/sched/signal.h>
+#include <linux/spinlock.h>
 #include <linux/wait.h>
 
 __noreturn void rust_helper_BUG(void)

From f39a97d0d8a765268b8de17f2e34393e7cb2cd2d Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Sat, 29 Jul 2023 18:29:03 -0700
Subject: [PATCH 0783/2047] rust: allocator: Use krealloc_aligned() in
 KernelAllocator::alloc

This fixes the potential issue that when KernelAllocator is used, the
allocation may be mis-aligned due to SLAB's alignment guarantee.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Reviewed-by: Andreas Hindborg <a.hindborg@samsung.com>
Link: https://lore.kernel.org/r/20230730012905.643822-3-boqun.feng@gmail.com
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/allocator.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/rust/kernel/allocator.rs b/rust/kernel/allocator.rs
index 9363b527be66..d4a3670ef405 100644
--- a/rust/kernel/allocator.rs
+++ b/rust/kernel/allocator.rs
@@ -41,9 +41,9 @@ unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: bindings::gf
 
 unsafe impl GlobalAlloc for KernelAllocator {
     unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
-        // `krealloc()` is used instead of `kmalloc()` because the latter is
-        // an inline function and cannot be bound to as a result.
-        unsafe { bindings::krealloc(ptr::null(), layout.size(), bindings::GFP_KERNEL) as *mut u8 }
+        // SAFETY: `ptr::null_mut()` is null and `layout` has a non-zero size by the function safety
+        // requirement.
+        unsafe { krealloc_aligned(ptr::null_mut(), layout, bindings::GFP_KERNEL) }
     }
 
     unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) {

From 0beaf546b4f198dd767b5a01a73f24db269ff6e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Roy=20Baron?= <bjorn3_gh@protonmail.com>
Date: Sat, 29 Jul 2023 18:29:04 -0700
Subject: [PATCH 0784/2047] rust: alloc: Add realloc and alloc_zeroed to the
 GlobalAlloc impl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While there are default impls for these methods, using the respective C
api's is faster. Currently neither the existing nor these new
GlobalAlloc method implementations are actually called. Instead the
__rust_* function defined below the GlobalAlloc impl are used. With
rustc 1.71 these functions will be gone and all allocation calls will go
through the GlobalAlloc implementation.

Link: https://github.com/Rust-for-Linux/linux/issues/68
Signed-off-by: Björn Roy Baron <bjorn3_gh@protonmail.com>
[boqun: add size adjustment for alignment requirement]
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Reviewed-by: Andreas Hindborg <a.hindborg@samsung.com>
Link: https://lore.kernel.org/r/20230730012905.643822-4-boqun.feng@gmail.com
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/allocator.rs | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/rust/kernel/allocator.rs b/rust/kernel/allocator.rs
index d4a3670ef405..72d66c23883c 100644
--- a/rust/kernel/allocator.rs
+++ b/rust/kernel/allocator.rs
@@ -51,6 +51,33 @@ unsafe impl GlobalAlloc for KernelAllocator {
             bindings::kfree(ptr as *const core::ffi::c_void);
         }
     }
+
+    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
+        // SAFETY:
+        // - `new_size`, when rounded up to the nearest multiple of `layout.align()`, will not
+        //   overflow `isize` by the function safety requirement.
+        // - `layout.align()` is a proper alignment (i.e. not zero and must be a power of two).
+        let layout = unsafe { Layout::from_size_align_unchecked(new_size, layout.align()) };
+
+        // SAFETY:
+        // - `ptr` is either null or a pointer allocated by this allocator by the function safety
+        //   requirement.
+        // - the size of `layout` is not zero because `new_size` is not zero by the function safety
+        //   requirement.
+        unsafe { krealloc_aligned(ptr, layout, bindings::GFP_KERNEL) }
+    }
+
+    unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
+        // SAFETY: `ptr::null_mut()` is null and `layout` has a non-zero size by the function safety
+        // requirement.
+        unsafe {
+            krealloc_aligned(
+                ptr::null_mut(),
+                layout,
+                bindings::GFP_KERNEL | bindings::__GFP_ZERO,
+            )
+        }
+    }
 }
 
 #[global_allocator]

From 49a9ef76740206d52e7393f6fe25fc764de8df32 Mon Sep 17 00:00:00 2001
From: Vinay Varma <varmavinaym@gmail.com>
Date: Tue, 11 Apr 2023 17:17:15 +0800
Subject: [PATCH 0785/2047] scripts: `make rust-analyzer` for out-of-tree
 modules

Adds support for out-of-tree rust modules to use the `rust-analyzer`
make target to generate the rust-project.json file.

The change involves adding an optional parameter `external_src` to the
`generate_rust_analyzer.py` which expects the path to the out-of-tree
module's source directory. When this parameter is passed, I have chosen
not to add the non-core modules (samples and drivers) into the result
since these are not expected to be used in third party modules. Related
changes are also made to the Makefile and rust/Makefile allowing the
`rust-analyzer` target to be used for out-of-tree modules as well.

Link: https://github.com/Rust-for-Linux/linux/pull/914
Link: https://github.com/Rust-for-Linux/rust-out-of-tree-module/pull/2
Signed-off-by: Vinay Varma <varmavinaym@gmail.com>
Link: https://lore.kernel.org/r/20230411091714.130525-1-varmavinaym@gmail.com
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 Makefile                          | 11 ++++++-----
 rust/Makefile                     |  6 ++++--
 scripts/generate_rust_analyzer.py | 27 ++++++++++++++++++---------
 3 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/Makefile b/Makefile
index 6bbf9db6b414..64ec209b26f0 100644
--- a/Makefile
+++ b/Makefile
@@ -1859,11 +1859,6 @@ rustfmt:
 rustfmtcheck: rustfmt_flags = --check
 rustfmtcheck: rustfmt
 
-# IDE support targets
-PHONY += rust-analyzer
-rust-analyzer:
-	$(Q)$(MAKE) $(build)=rust $@
-
 # Misc
 # ---------------------------------------------------------------------------
 
@@ -1924,6 +1919,7 @@ help:
 	@echo  '  modules         - default target, build the module(s)'
 	@echo  '  modules_install - install the module'
 	@echo  '  clean           - remove generated files in module directory only'
+	@echo  '  rust-analyzer	  - generate rust-project.json rust-analyzer support file'
 	@echo  ''
 
 __external_modules_error:
@@ -2065,6 +2061,11 @@ quiet_cmd_tags = GEN     $@
 tags TAGS cscope gtags: FORCE
 	$(call cmd,tags)
 
+# IDE support targets
+PHONY += rust-analyzer
+rust-analyzer:
+	$(Q)$(MAKE) $(build)=rust $@
+
 # Script to generate missing namespace dependencies
 # ---------------------------------------------------------------------------
 
diff --git a/rust/Makefile b/rust/Makefile
index 4124bfa01798..1ffdff6c2a34 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -373,8 +373,10 @@ quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L
 	$(if $(rustc_objcopy),;$(OBJCOPY) $(rustc_objcopy) $@)
 
 rust-analyzer:
-	$(Q)$(srctree)/scripts/generate_rust_analyzer.py $(srctree) $(objtree) \
-		$(RUST_LIB_SRC) > $(objtree)/rust-project.json
+	$(Q)$(srctree)/scripts/generate_rust_analyzer.py \
+		$(abs_srctree) $(abs_objtree) \
+		$(RUST_LIB_SRC) $(KBUILD_EXTMOD) > \
+		$(if $(KBUILD_EXTMOD),$(extmod_prefix),$(objtree))/rust-project.json
 
 redirect-intrinsics = \
 	__eqsf2 __gesf2 __lesf2 __nesf2 __unordsf2 \
diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py
index 946e250c1b2a..848fa1ad92ba 100755
--- a/scripts/generate_rust_analyzer.py
+++ b/scripts/generate_rust_analyzer.py
@@ -6,10 +6,11 @@
 import argparse
 import json
 import logging
+import os
 import pathlib
 import sys
 
-def generate_crates(srctree, objtree, sysroot_src):
+def generate_crates(srctree, objtree, sysroot_src, external_src):
     # Generate the configuration list.
     cfg = []
     with open(objtree / "include" / "generated" / "rustc_cfg") as fd:
@@ -65,7 +66,7 @@ def generate_crates(srctree, objtree, sysroot_src):
         [],
         is_proc_macro=True,
     )
-    crates[-1]["proc_macro_dylib_path"] = "rust/libmacros.so"
+    crates[-1]["proc_macro_dylib_path"] = f"{objtree}/rust/libmacros.so"
 
     append_crate(
         "build_error",
@@ -95,19 +96,26 @@ def generate_crates(srctree, objtree, sysroot_src):
         "exclude_dirs": [],
     }
 
+    def is_root_crate(build_file, target):
+        try:
+            return f"{target}.o" in open(build_file).read()
+        except FileNotFoundError:
+            return False
+
     # Then, the rest outside of `rust/`.
     #
     # We explicitly mention the top-level folders we want to cover.
-    for folder in ("samples", "drivers"):
-        for path in (srctree / folder).rglob("*.rs"):
+    extra_dirs = map(lambda dir: srctree / dir, ("samples", "drivers"))
+    if external_src is not None:
+        extra_dirs = [external_src]
+    for folder in extra_dirs:
+        for path in folder.rglob("*.rs"):
             logging.info("Checking %s", path)
             name = path.name.replace(".rs", "")
 
             # Skip those that are not crate roots.
-            try:
-                if f"{name}.o" not in open(path.parent / "Makefile").read():
-                    continue
-            except FileNotFoundError:
+            if not is_root_crate(path.parent / "Makefile", name) and \
+               not is_root_crate(path.parent / "Kbuild", name):
                 continue
 
             logging.info("Adding %s", name)
@@ -126,6 +134,7 @@ def main():
     parser.add_argument("srctree", type=pathlib.Path)
     parser.add_argument("objtree", type=pathlib.Path)
     parser.add_argument("sysroot_src", type=pathlib.Path)
+    parser.add_argument("exttree", type=pathlib.Path, nargs="?")
     args = parser.parse_args()
 
     logging.basicConfig(
@@ -134,7 +143,7 @@ def main():
     )
 
     rust_project = {
-        "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src),
+        "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree),
         "sysroot_src": str(args.sysroot_src),
     }
 

From 4b591ed6971191134e331f02300b4ec4dee188ea Mon Sep 17 00:00:00 2001
From: Alina Yu <alina_yu@richtek.com>
Date: Mon, 7 Aug 2023 14:28:06 +0800
Subject: [PATCH 0786/2047] regulator: dt-bindings: rtq2208: Add Richtek
 RTQ2208 SubPMIC

Add bindings for Richtek RTQ2208 IC controlled SubPMIC

Signed-off-by: Alina Yu <alina_yu@richtek.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/1691389687-31211-2-git-send-email-alina_yu@richtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/richtek,rtq2208.yaml   | 197 ++++++++++++++++++
 1 file changed, 197 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/richtek,rtq2208.yaml

diff --git a/Documentation/devicetree/bindings/regulator/richtek,rtq2208.yaml b/Documentation/devicetree/bindings/regulator/richtek,rtq2208.yaml
new file mode 100644
index 000000000000..609c06615bdc
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/richtek,rtq2208.yaml
@@ -0,0 +1,197 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/richtek,rtq2208.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Richtek RTQ2208 SubPMIC Regulator
+
+maintainers:
+  - Alina Yu <alina_yu@richtek.com>
+
+description: |
+  RTQ2208 is a highly integrated power converter that offers functional safety dual
+  multi-configurable synchronous buck converters and two LDOs.
+
+  Bucks support "regulator-allowed-modes" and "regulator-mode". The former defines the permitted
+  switching operation in normal mode; the latter defines the operation in suspend to RAM mode.
+
+  No matter the RTQ2208 is configured to normal or suspend to RAM mode, there are two switching
+  operation modes for all buck rails, automatic power saving mode (Auto mode) and forced continuous
+  conduction mode (FCCM).
+
+  The definition of modes is in the datasheet which is available in below link
+  and their meaning is::
+    0 - Auto mode for power saving, which reducing the switching frequency at light load condition
+    to maintain high frequency.
+    1 - FCCM to meet the strict voltage regulation accuracy, which keeping constant switching frequency.
+
+  Datasheet will be available soon at
+  https://www.richtek.com/assets/Products
+
+properties:
+  compatible:
+    enum:
+      - richtek,rtq2208
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+    
+  richtek,mtp-sel-high:
+    type: boolean
+    description:
+      vout register selection based on this boolean value.
+      false - Using DVS0 register setting to adjust vout
+      true - Using DVS1 register setting to adjust vout
+
+  regulators:
+    type: object
+    additionalProperties: false
+
+    patternProperties:
+      "^buck-[a-h]$":
+        type: object
+        $ref: regulator.yaml#
+        unevaluatedProperties: false
+        description:
+          description for buck-[a-h] regulator.
+
+        properties:
+          regulator-allowed-modes:
+            description:
+              two buck modes in different switching accuracy.
+              0 - Auto mode
+              1 - FCCM
+            items:
+              enum: [0, 1]
+
+      "^ldo[1-2]$":
+        type: object
+        $ref: regulator.yaml#
+        unevaluatedProperties: false
+        description:
+          regulator description for ldo[1-2].
+
+required:
+  - compatible
+  - reg
+  - regulators
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      pmic@10 {
+        compatible = "richtek,rtq2208";
+        reg = <0x10>;
+        interrupts-extended = <&gpio26 0 IRQ_TYPE_LEVEL_LOW>;
+        richtek,mtp-sel-high;
+
+        regulators {
+          buck-a {
+            regulator-min-microvolt = <400000>;
+            regulator-max-microvolt = <2050000>;
+            regulator-allowed-modes = <0 1>;
+            regulator-always-on;
+            regulator-state-mem {
+              regulator-on-in-suspend;
+              regulator-mode = <1>;
+            };
+          };
+          buck-b {
+            regulator-min-microvolt = <400000>;
+            regulator-max-microvolt = <2050000>;
+            regulator-allowed-modes = <0 1>;
+            regulator-always-on;
+            regulator-state-mem {
+              regulator-on-in-suspend;
+              regulator-mode = <1>;
+            };
+          };
+          buck-c {
+            regulator-min-microvolt = <400000>;
+            regulator-max-microvolt = <2050000>;
+            regulator-allowed-modes = <0 1>;
+            regulator-always-on;
+            regulator-state-mem {
+              regulator-on-in-suspend;
+              regulator-mode = <1>;
+            };
+          };
+          buck-d {
+            regulator-min-microvolt = <400000>;
+            regulator-max-microvolt = <2050000>;
+            regulator-allowed-modes = <0 1>;
+            regulator-always-on;
+            regulator-state-mem {
+              regulator-on-in-suspend;
+              regulator-mode = <1>;
+            };
+          };
+          buck-e {
+            regulator-min-microvolt = <400000>;
+            regulator-max-microvolt = <2050000>;
+            regulator-allowed-modes = <0 1>;
+            regulator-always-on;
+            regulator-state-mem {
+              regulator-on-in-suspend;
+              regulator-mode = <1>;
+            };
+          };
+          buck-f {
+            regulator-min-microvolt = <400000>;
+            regulator-max-microvolt = <2050000>;
+            regulator-allowed-modes = <0 1>;
+            regulator-always-on;
+            regulator-state-mem {
+              regulator-on-in-suspend;
+              regulator-mode = <1>;
+            };
+          };
+          buck-g {
+            regulator-min-microvolt = <400000>;
+            regulator-max-microvolt = <2050000>;
+            regulator-allowed-modes = <0 1>;
+            regulator-always-on;
+            regulator-state-mem {
+              regulator-on-in-suspend;
+              regulator-mode = <1>;
+            };
+          };
+          buck-h {
+            regulator-min-microvolt = <400000>;
+            regulator-max-microvolt = <2050000>;
+            regulator-allowed-modes = <0 1>;
+            regulator-always-on;
+            regulator-state-mem {
+              regulator-on-in-suspend;
+              regulator-mode = <1>;
+            };
+          };
+          ldo1 {
+            regulator-min-microvolt = <1200000>;
+            regulator-max-microvolt = <1200000>;
+            regulator-always-on;
+            regulator-state-mem {
+              regulator-on-in-suspend;
+            };
+          };
+          ldo2 {
+            regulator-min-microvolt = <3300000>;
+            regulator-max-microvolt = <3300000>;
+            regulator-always-on;
+            regulator-state-mem {
+              regulator-on-in-suspend;
+            };
+          };
+        };
+      };
+    };

From 85a11f55621a0c18b22b43ab4219450ac1d19386 Mon Sep 17 00:00:00 2001
From: Alina Yu <alina_yu@richtek.com>
Date: Mon, 7 Aug 2023 14:28:07 +0800
Subject: [PATCH 0787/2047] regulator: rtq2208: Add Richtek RTQ2208 SubPMIC
 driver

Add support for the RTQ2208 SubPMIC
This ic integrates with configurable, synchrnous buck converters and two ldos.

Signed-off-by: Alina Yu <alina_yu@richtek.com>
Link: https://lore.kernel.org/r/1691389687-31211-3-git-send-email-alina_yu@richtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig             |  11 +
 drivers/regulator/Makefile            |   1 +
 drivers/regulator/rtq2208-regulator.c | 583 ++++++++++++++++++++++++++
 3 files changed, 595 insertions(+)
 create mode 100644 drivers/regulator/rtq2208-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 94f44736473e..bf4b5b07e09e 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1254,6 +1254,17 @@ config REGULATOR_RTQ6752
 	  synchronous boost converters for PAVDD, and one synchronous NAVDD
 	  buck-boost. This device is suitable for automotive TFT-LCD panel.
 
+config REGULATOR_RTQ2208
+	tristate "Richtek RTQ2208 SubPMIC Regulator"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  This driver adds support for RTQ2208 SubPMIC regulators.
+	  The RTQ2208 is a multi-phase, programmable power management IC that
+	  integrate with dual multi-configurable, synchronous buck converters
+	  and two ldos. It features wide output voltage range from 0.4V to 2.05V
+	  and the capability to configure the corresponding power stages.
+
 config REGULATOR_S2MPA01
 	tristate "Samsung S2MPA01 voltage regulator"
 	depends on MFD_SEC_CORE || COMPILE_TEST
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 77b22bdd2791..67468cfcc4f3 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -147,6 +147,7 @@ obj-$(CONFIG_REGULATOR_RT6245)	+= rt6245-regulator.o
 obj-$(CONFIG_REGULATOR_RTMV20)	+= rtmv20-regulator.o
 obj-$(CONFIG_REGULATOR_RTQ2134) += rtq2134-regulator.o
 obj-$(CONFIG_REGULATOR_RTQ6752)	+= rtq6752-regulator.o
+obj-$(CONFIG_REGULATOR_RTQ2208) += rtq2208-regulator.o
 obj-$(CONFIG_REGULATOR_S2MPA01) += s2mpa01.o
 obj-$(CONFIG_REGULATOR_S2MPS11) += s2mps11.o
 obj-$(CONFIG_REGULATOR_S5M8767) += s5m8767.o
diff --git a/drivers/regulator/rtq2208-regulator.c b/drivers/regulator/rtq2208-regulator.c
new file mode 100644
index 000000000000..2463aea4192c
--- /dev/null
+++ b/drivers/regulator/rtq2208-regulator.c
@@ -0,0 +1,583 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/bitops.h>
+#include <linux/bitfield.h>
+#include <linux/util_macros.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/mod_devicetable.h>
+
+/* Register */
+#define RTQ2208_REG_GLOBAL_INT1			0x12
+#define RTQ2208_REG_FLT_RECORDBUCK_CB		0x18
+#define RTQ2208_REG_GLOBAL_INT1_MASK		0x1D
+#define RTQ2208_REG_FLT_MASKBUCK_CB		0x1F
+#define RTQ2208_REG_BUCK_C_CFG0			0x32
+#define RTQ2208_REG_BUCK_B_CFG0			0x42
+#define RTQ2208_REG_BUCK_A_CFG0			0x52
+#define RTQ2208_REG_BUCK_D_CFG0			0x62
+#define RTQ2208_REG_BUCK_G_CFG0			0x72
+#define RTQ2208_REG_BUCK_F_CFG0			0x82
+#define RTQ2208_REG_BUCK_E_CFG0			0x92
+#define RTQ2208_REG_BUCK_H_CFG0			0xA2
+#define RTQ2208_REG_LDO1_CFG			0xB1
+#define RTQ2208_REG_LDO2_CFG			0xC1
+
+/* Mask */
+#define RTQ2208_BUCK_NR_MTP_SEL_MASK		GENMASK(7, 0)
+#define RTQ2208_BUCK_EN_NR_MTP_SEL0_MASK	BIT(0)
+#define RTQ2208_BUCK_EN_NR_MTP_SEL1_MASK	BIT(1)
+#define RTQ2208_BUCK_RSPUP_MASK			GENMASK(6, 4)
+#define RTQ2208_BUCK_RSPDN_MASK			GENMASK(2, 0)
+#define RTQ2208_BUCK_NRMODE_MASK		BIT(5)
+#define RTQ2208_BUCK_STRMODE_MASK		BIT(5)
+#define RTQ2208_BUCK_EN_STR_MASK		BIT(0)
+#define RTQ2208_LDO_EN_STR_MASK			BIT(7)
+#define RTQ2208_EN_DIS_MASK			BIT(0)
+#define RTQ2208_BUCK_RAMP_SEL_MASK		GENMASK(2, 0)
+#define RTQ2208_HD_INT_MASK			BIT(0)
+
+/* Size */
+#define RTQ2208_VOUT_MAXNUM			256
+#define RTQ2208_BUCK_NUM_IRQ_REGS		5
+#define RTQ2208_STS_NUM_IRQ_REGS		2
+
+/* Value */
+#define RTQ2208_RAMP_VALUE_MIN_uV		500
+#define RTQ2208_RAMP_VALUE_MAX_uV		64000
+
+#define RTQ2208_BUCK_MASK(uv_irq, ov_irq)	(1 << ((uv_irq) % 8) | 1 << ((ov_irq) % 8))
+
+enum {
+	RTQ2208_BUCK_B = 0,
+	RTQ2208_BUCK_C,
+	RTQ2208_BUCK_D,
+	RTQ2208_BUCK_A,
+	RTQ2208_BUCK_F,
+	RTQ2208_BUCK_G,
+	RTQ2208_BUCK_H,
+	RTQ2208_BUCK_E,
+	RTQ2208_LDO2,
+	RTQ2208_LDO1,
+	RTQ2208_LDO_MAX,
+};
+
+enum {
+	RTQ2208_AUTO_MODE = 0,
+	RTQ2208_FCCM,
+};
+
+struct rtq2208_regulator_desc {
+	struct regulator_desc desc;
+	unsigned int mtp_sel_reg;
+	unsigned int mtp_sel_mask;
+	unsigned int mode_reg;
+	unsigned int mode_mask;
+	unsigned int suspend_config_reg;
+	unsigned int suspend_enable_mask;
+	unsigned int suspend_mode_mask;
+};
+
+struct rtq2208_rdev_map {
+	struct regulator_dev *rdev[RTQ2208_LDO_MAX];
+	struct regmap *regmap;
+	struct device *dev;
+};
+
+/* set Normal Auto/FCCM mode */
+static int rtq2208_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	const struct rtq2208_regulator_desc *rdesc =
+		(const struct rtq2208_regulator_desc *)rdev->desc;
+	unsigned int val, shift;
+
+	switch (mode) {
+	case REGULATOR_MODE_NORMAL:
+		val = RTQ2208_AUTO_MODE;
+		break;
+	case REGULATOR_MODE_FAST:
+		val = RTQ2208_FCCM;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	shift = ffs(rdesc->mode_mask) - 1;
+	return regmap_update_bits(rdev->regmap, rdesc->mode_reg,
+				  rdesc->mode_mask, val << shift);
+}
+
+static unsigned int rtq2208_get_mode(struct regulator_dev *rdev)
+{
+	const struct rtq2208_regulator_desc *rdesc =
+		(const struct rtq2208_regulator_desc *)rdev->desc;
+	unsigned int mode_val;
+	int ret;
+
+	ret = regmap_read(rdev->regmap, rdesc->mode_reg, &mode_val);
+	if (ret)
+		return REGULATOR_MODE_INVALID;
+
+	return (mode_val & rdesc->mode_mask) ? REGULATOR_MODE_FAST : REGULATOR_MODE_NORMAL;
+}
+
+static int rtq2208_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
+{
+	const struct regulator_desc *desc = rdev->desc;
+	unsigned int sel = 0, val;
+
+	ramp_delay = max(ramp_delay, RTQ2208_RAMP_VALUE_MIN_uV);
+	ramp_delay = min(ramp_delay, RTQ2208_RAMP_VALUE_MAX_uV);
+
+	ramp_delay /= RTQ2208_RAMP_VALUE_MIN_uV;
+
+	/*
+	 * fls(ramp_delay) - 1: doing LSB shift, let it starts from 0
+	 *
+	 * RTQ2208_BUCK_RAMP_SEL_MASK - sel: doing descending order shifting.
+	 * Because the relation of seleltion and value is like that
+	 *
+	 * seletion: value
+	 * 000: 64mv
+	 * 001: 32mv
+	 * ...
+	 * 111: 0.5mv
+	 *
+	 * For example, if I would like to select 64mv, the fls(ramp_delay) - 1 will be 0b111,
+	 * and I need to use 0b111 - sel to do the shifting
+	 */
+
+	sel = fls(ramp_delay) - 1;
+	sel = RTQ2208_BUCK_RAMP_SEL_MASK - sel;
+
+	val = FIELD_PREP(RTQ2208_BUCK_RSPUP_MASK, sel) | FIELD_PREP(RTQ2208_BUCK_RSPDN_MASK, sel);
+
+	return regmap_update_bits(rdev->regmap, desc->ramp_reg,
+				  RTQ2208_BUCK_RSPUP_MASK | RTQ2208_BUCK_RSPDN_MASK, val);
+}
+
+static int rtq2208_set_suspend_enable(struct regulator_dev *rdev)
+{
+	const struct rtq2208_regulator_desc *rdesc =
+		(const struct rtq2208_regulator_desc *)rdev->desc;
+
+	return regmap_set_bits(rdev->regmap, rdesc->suspend_config_reg, rdesc->suspend_enable_mask);
+}
+
+static int rtq2208_set_suspend_disable(struct regulator_dev *rdev)
+{
+	const struct rtq2208_regulator_desc *rdesc =
+		(const struct rtq2208_regulator_desc *)rdev->desc;
+
+	return regmap_update_bits(rdev->regmap, rdesc->suspend_config_reg, rdesc->suspend_enable_mask, 0);
+}
+
+static int rtq2208_set_suspend_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	const struct rtq2208_regulator_desc *rdesc =
+		(const struct rtq2208_regulator_desc *)rdev->desc;
+	unsigned int val, shift;
+
+	switch (mode) {
+	case REGULATOR_MODE_NORMAL:
+		val = RTQ2208_AUTO_MODE;
+		break;
+	case REGULATOR_MODE_FAST:
+		val = RTQ2208_FCCM;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	shift = ffs(rdesc->suspend_mode_mask) - 1;
+
+	return regmap_update_bits(rdev->regmap, rdesc->suspend_config_reg,
+			rdesc->suspend_mode_mask, val << shift);
+}
+
+static const struct regulator_ops rtq2208_regulator_buck_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = regulator_list_voltage_linear_range,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_mode = rtq2208_set_mode,
+	.get_mode = rtq2208_get_mode,
+	.set_ramp_delay = rtq2208_set_ramp_delay,
+	.set_active_discharge = regulator_set_active_discharge_regmap,
+	.set_suspend_enable = rtq2208_set_suspend_enable,
+	.set_suspend_disable = rtq2208_set_suspend_disable,
+	.set_suspend_mode = rtq2208_set_suspend_mode,
+};
+
+static const struct regulator_ops rtq2208_regulator_ldo_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.set_active_discharge = regulator_set_active_discharge_regmap,
+	.set_suspend_enable = rtq2208_set_suspend_enable,
+	.set_suspend_disable = rtq2208_set_suspend_disable,
+};
+
+static unsigned int rtq2208_of_map_mode(unsigned int mode)
+{
+	switch (mode) {
+	case RTQ2208_AUTO_MODE:
+		return REGULATOR_MODE_NORMAL;
+	case RTQ2208_FCCM:
+		return REGULATOR_MODE_FAST;
+	default:
+		return REGULATOR_MODE_INVALID;
+	}
+}
+
+static int rtq2208_init_irq_mask(struct rtq2208_rdev_map *rdev_map, unsigned int *buck_masks)
+{
+	unsigned char buck_clr_masks[5] = {0x33, 0x33, 0x33, 0x33, 0x33},
+		      sts_clr_masks[2] = {0xE7, 0xF7}, sts_masks[2] = {0xE6, 0xF6};
+	int ret;
+
+	/* write clear all buck irq once */
+	ret = regmap_bulk_write(rdev_map->regmap, RTQ2208_REG_FLT_RECORDBUCK_CB, buck_clr_masks, 5);
+	if (ret)
+		return dev_err_probe(rdev_map->dev, ret, "Failed to clr buck irqs\n");
+
+	/* write clear general irq once */
+	ret = regmap_bulk_write(rdev_map->regmap, RTQ2208_REG_GLOBAL_INT1, sts_clr_masks, 2);
+	if (ret)
+		return dev_err_probe(rdev_map->dev, ret, "Failed to clr general irqs\n");
+
+	/* unmask buck ov/uv irq */
+	ret = regmap_bulk_write(rdev_map->regmap, RTQ2208_REG_FLT_MASKBUCK_CB, buck_masks, 5);
+	if (ret)
+		return dev_err_probe(rdev_map->dev, ret, "Failed to unmask buck irqs\n");
+
+	/* unmask needed general irq */
+	return regmap_bulk_write(rdev_map->regmap, RTQ2208_REG_GLOBAL_INT1_MASK, sts_masks, 2);
+}
+
+static irqreturn_t rtq2208_irq_handler(int irqno, void *devid)
+{
+	unsigned char buck_flags[RTQ2208_BUCK_NUM_IRQ_REGS], sts_flags[RTQ2208_STS_NUM_IRQ_REGS];
+	int ret = 0, i, uv_bit, ov_bit;
+	struct rtq2208_rdev_map *rdev_map = devid;
+	struct regulator_dev *rdev;
+
+	if (!rdev_map)
+		return IRQ_NONE;
+
+	/* read irq event */
+	ret = regmap_bulk_read(rdev_map->regmap, RTQ2208_REG_FLT_RECORDBUCK_CB,
+				buck_flags, ARRAY_SIZE(buck_flags));
+	if (ret)
+		return IRQ_NONE;
+
+	ret = regmap_bulk_read(rdev_map->regmap, RTQ2208_REG_GLOBAL_INT1,
+				sts_flags, ARRAY_SIZE(sts_flags));
+	if (ret)
+		return IRQ_NONE;
+
+	/* clear irq event */
+	ret = regmap_bulk_write(rdev_map->regmap, RTQ2208_REG_FLT_RECORDBUCK_CB,
+				buck_flags, ARRAY_SIZE(buck_flags));
+	if (ret)
+		return IRQ_NONE;
+
+	ret = regmap_bulk_write(rdev_map->regmap, RTQ2208_REG_GLOBAL_INT1,
+				sts_flags, ARRAY_SIZE(sts_flags));
+	if (ret)
+		return IRQ_NONE;
+
+	for (i = 0; i < RTQ2208_LDO_MAX; i++) {
+		if (!rdev_map->rdev[i])
+			continue;
+
+		rdev = rdev_map->rdev[i];
+		/* uv irq */
+		uv_bit = (i & 1) ? 4 : 0;
+		if (buck_flags[i >> 1] & (1 << uv_bit))
+			regulator_notifier_call_chain(rdev,
+					REGULATOR_EVENT_UNDER_VOLTAGE, NULL);
+		/* ov irq */
+		ov_bit = uv_bit + 1;
+		if (buck_flags[i >> 1] & (1 << ov_bit))
+			regulator_notifier_call_chain(rdev,
+					REGULATOR_EVENT_REGULATION_OUT, NULL);
+
+		/* hd irq */
+		if (sts_flags[1] & RTQ2208_HD_INT_MASK)
+			regulator_notifier_call_chain(rdev,
+					REGULATOR_EVENT_OVER_TEMP, NULL);
+	}
+
+	return IRQ_HANDLED;
+}
+
+#define RTQ2208_REGULATOR_INFO(_name, _base) \
+{ \
+	.name = #_name, \
+	.base = _base, \
+}
+#define BUCK_RG_BASE(_id)	RTQ2208_REG_BUCK_##_id##_CFG0
+#define BUCK_RG_SHIFT(_base, _shift)	(_base + _shift)
+#define LDO_RG_BASE(_id)	RTQ2208_REG_LDO##_id##_CFG
+#define LDO_RG_SHIFT(_base, _shift)	(_base + _shift)
+#define	VSEL_SHIFT(_sel)	(_sel ? 3 : 1)
+#define MTP_SEL_MASK(_sel)	RTQ2208_BUCK_EN_NR_MTP_SEL##_sel##_MASK
+
+static const struct linear_range rtq2208_vout_range[] = {
+	REGULATOR_LINEAR_RANGE(400000, 0, 180, 5000),
+	REGULATOR_LINEAR_RANGE(1310000, 181, 255, 10000),
+};
+
+static int rtq2208_of_get_fixed_voltage(struct device *dev,
+					struct of_regulator_match *rtq2208_ldo_match, int n_fixed)
+{
+	struct device_node *np;
+	struct of_regulator_match *match;
+	struct rtq2208_regulator_desc *rdesc;
+	struct regulator_init_data *init_data;
+	int ret, i;
+
+	if (!dev->of_node)
+		return -ENODEV;
+
+	np = of_get_child_by_name(dev->of_node, "regulators");
+	if (!np)
+		np = dev->of_node;
+
+	ret = of_regulator_match(dev, np, rtq2208_ldo_match, n_fixed);
+
+	of_node_put(np);
+
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < n_fixed; i++) {
+		match = rtq2208_ldo_match + i;
+		init_data = match->init_data;
+		rdesc = (struct rtq2208_regulator_desc *)match->driver_data;
+
+		if (!init_data || !rdesc)
+			continue;
+
+		if (init_data->constraints.min_uV == init_data->constraints.max_uV)
+			rdesc->desc.fixed_uV = init_data->constraints.min_uV;
+	}
+
+	return 0;
+}
+
+static void rtq2208_init_regulator_desc(struct rtq2208_regulator_desc *rdesc, int mtp_sel,
+					int idx, struct of_regulator_match *rtq2208_ldo_match, int *ldo_idx)
+{
+	struct regulator_desc *desc;
+	static const struct {
+		char *name;
+		int base;
+	} regulator_info[] = {
+		RTQ2208_REGULATOR_INFO(buck-b, BUCK_RG_BASE(B)),
+		RTQ2208_REGULATOR_INFO(buck-c, BUCK_RG_BASE(C)),
+		RTQ2208_REGULATOR_INFO(buck-d, BUCK_RG_BASE(D)),
+		RTQ2208_REGULATOR_INFO(buck-a, BUCK_RG_BASE(A)),
+		RTQ2208_REGULATOR_INFO(buck-f, BUCK_RG_BASE(F)),
+		RTQ2208_REGULATOR_INFO(buck-g, BUCK_RG_BASE(G)),
+		RTQ2208_REGULATOR_INFO(buck-h, BUCK_RG_BASE(H)),
+		RTQ2208_REGULATOR_INFO(buck-e, BUCK_RG_BASE(E)),
+		RTQ2208_REGULATOR_INFO(ldo2, LDO_RG_BASE(2)),
+		RTQ2208_REGULATOR_INFO(ldo1, LDO_RG_BASE(1)),
+	}, *curr_info;
+
+	curr_info = regulator_info + idx;
+	desc = &rdesc->desc;
+	desc->name = curr_info->name;
+	desc->of_match = of_match_ptr(curr_info->name);
+	desc->regulators_node = of_match_ptr("regulators");
+	desc->id = idx;
+	desc->owner = THIS_MODULE;
+	desc->type = REGULATOR_VOLTAGE;
+	desc->enable_mask = mtp_sel ? MTP_SEL_MASK(1) : MTP_SEL_MASK(0);
+	desc->active_discharge_on = RTQ2208_EN_DIS_MASK;
+	desc->active_discharge_off = 0;
+	desc->active_discharge_mask = RTQ2208_EN_DIS_MASK;
+
+	rdesc->mode_mask = RTQ2208_BUCK_NRMODE_MASK;
+
+	if (idx >= RTQ2208_BUCK_B && idx <= RTQ2208_BUCK_E) {
+		/* init buck desc */
+		desc->enable_reg = BUCK_RG_SHIFT(curr_info->base, 2);
+		desc->ops = &rtq2208_regulator_buck_ops;
+		desc->vsel_reg = curr_info->base + VSEL_SHIFT(mtp_sel);
+		desc->vsel_mask = RTQ2208_BUCK_NR_MTP_SEL_MASK;
+		desc->n_voltages = RTQ2208_VOUT_MAXNUM;
+		desc->linear_ranges = rtq2208_vout_range;
+		desc->n_linear_ranges = ARRAY_SIZE(rtq2208_vout_range);
+		desc->ramp_reg = BUCK_RG_SHIFT(curr_info->base, 5);
+		desc->active_discharge_reg = curr_info->base;
+		desc->of_map_mode = rtq2208_of_map_mode;
+
+		rdesc->mode_reg = BUCK_RG_SHIFT(curr_info->base, 2);
+		rdesc->suspend_config_reg = BUCK_RG_SHIFT(curr_info->base, 4);
+		rdesc->suspend_enable_mask = RTQ2208_BUCK_EN_STR_MASK;
+		rdesc->suspend_mode_mask = RTQ2208_BUCK_STRMODE_MASK;
+	} else {
+		/* init ldo desc */
+		desc->enable_reg = curr_info->base;
+		desc->ops = &rtq2208_regulator_ldo_ops;
+		desc->n_voltages = 1;
+		desc->active_discharge_reg = LDO_RG_SHIFT(curr_info->base, 2);
+
+		rtq2208_ldo_match[*ldo_idx].name = desc->name;
+		rtq2208_ldo_match[*ldo_idx].driver_data = rdesc;
+		rtq2208_ldo_match[(*ldo_idx)++].desc = desc;
+
+		rdesc->suspend_config_reg = curr_info->base;
+		rdesc->suspend_enable_mask = RTQ2208_LDO_EN_STR_MASK;
+	}
+}
+
+static int rtq2208_parse_regulator_dt_data(int n_regulator, const unsigned int *regulator_idx_table,
+		struct rtq2208_regulator_desc *rdesc[RTQ2208_LDO_MAX], struct device *dev)
+{
+	struct of_regulator_match rtq2208_ldo_match[2];
+	int mtp_sel, ret, i, idx, ldo_idx = 0;
+
+	/* get mtp_sel0 or mtp_sel1 */
+	mtp_sel = device_property_read_bool(dev, "richtek,mtp-sel-high");
+
+	for (i = 0; i < n_regulator; i++) {
+		idx = regulator_idx_table[i];
+
+		rdesc[i] = devm_kcalloc(dev, 1, sizeof(*rdesc[0]), GFP_KERNEL);
+		if (!rdesc[i])
+			return -ENOMEM;
+
+		rtq2208_init_regulator_desc(rdesc[i], mtp_sel, idx, rtq2208_ldo_match, &ldo_idx);
+	}
+
+	/* init ldo fixed_uV */
+	ret = rtq2208_of_get_fixed_voltage(dev, rtq2208_ldo_match, ldo_idx);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to get ldo fixed_uV\n");
+
+	return 0;
+
+}
+
+/** different slave address corresponds different used bucks
+ * slave address 0x10: BUCK[BCA FGE]
+ * slave address 0x20: BUCK[BC FGHE]
+ * slave address 0x40: BUCK[C G]
+ */
+static int rtq2208_regulator_check(int slave_addr, int *num,
+				int *regulator_idx_table, unsigned int *buck_masks)
+{
+	static bool rtq2208_used_table[3][RTQ2208_LDO_MAX] = {
+		/* BUCK[BCA FGE], LDO[12] */
+		{1, 1, 0, 1, 1, 1, 0, 1, 1, 1},
+		/* BUCK[BC FGHE], LDO[12]*/
+		{1, 1, 0, 0, 1, 1, 1, 1, 1, 1},
+		/* BUCK[C G], LDO[12] */
+		{0, 1, 0, 0, 0, 1, 0, 0, 1, 1},
+	};
+	int i, idx = ffs(slave_addr >> 4) - 1;
+	u8 mask;
+
+	for (i = 0; i < RTQ2208_LDO_MAX; i++) {
+		if (!rtq2208_used_table[idx][i])
+			continue;
+
+		regulator_idx_table[(*num)++] = i;
+
+		mask = RTQ2208_BUCK_MASK(4 * i, 4 * i + 1);
+		buck_masks[i >> 1] &= ~mask;
+	}
+
+	return 0;
+}
+
+static const struct regmap_config rtq2208_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = 0xEF,
+};
+
+static int rtq2208_probe(struct i2c_client *i2c)
+{
+	struct device *dev = &i2c->dev;
+	struct regmap *regmap;
+	struct rtq2208_regulator_desc *rdesc[RTQ2208_LDO_MAX];
+	struct regulator_dev *rdev;
+	struct regulator_config cfg;
+	struct rtq2208_rdev_map *rdev_map;
+	int i, ret = 0, idx, n_regulator = 0;
+	unsigned int regulator_idx_table[RTQ2208_LDO_MAX],
+		     buck_masks[RTQ2208_BUCK_NUM_IRQ_REGS] = {0x33, 0x33, 0x33, 0x33, 0x33};
+
+	rdev_map = devm_kzalloc(dev, sizeof(struct rtq2208_rdev_map), GFP_KERNEL);
+	if (!rdev_map)
+		return -ENOMEM;
+
+	regmap = devm_regmap_init_i2c(i2c, &rtq2208_regmap_config);
+	if (IS_ERR(regmap))
+		return dev_err_probe(dev, PTR_ERR(regmap), "Failed to allocate regmap\n");
+
+	/* get needed regulator */
+	ret = rtq2208_regulator_check(i2c->addr, &n_regulator, regulator_idx_table, buck_masks);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to check used regulators\n");
+
+	rdev_map->regmap = regmap;
+	rdev_map->dev = dev;
+
+	cfg.dev = dev;
+
+	/* init regulator desc */
+	ret = rtq2208_parse_regulator_dt_data(n_regulator, regulator_idx_table, rdesc, dev);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < n_regulator; i++) {
+		idx = regulator_idx_table[i];
+
+		/* register regulator */
+		rdev = devm_regulator_register(dev, &rdesc[i]->desc, &cfg);
+		if (IS_ERR(rdev))
+			return PTR_ERR(rdev);
+
+		rdev_map->rdev[idx] = rdev;
+	}
+
+	/* init interrupt mask */
+	ret = rtq2208_init_irq_mask(rdev_map, buck_masks);
+	if (ret)
+		return ret;
+
+	/* register interrupt */
+	return devm_request_threaded_irq(dev, i2c->irq, NULL, rtq2208_irq_handler,
+					IRQF_ONESHOT, dev_name(dev), rdev_map);
+}
+
+static const struct of_device_id rtq2208_device_tables[] = {
+	{ .compatible = "richtek,rtq2208" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, rtq2208_device_tables);
+
+static struct i2c_driver rtq2208_driver = {
+	.driver = {
+		.name = "rtq2208",
+		.of_match_table = rtq2208_device_tables,
+	},
+	.probe_new = rtq2208_probe,
+};
+module_i2c_driver(rtq2208_driver);
+
+MODULE_AUTHOR("Alina Yu <alina_yu@richtek.com>");
+MODULE_DESCRIPTION("Richtek RTQ2208 Regulator Driver");
+MODULE_LICENSE("GPL");

From ed2f4c745fb2bc51bb1b402ff2294587cb0b207a Mon Sep 17 00:00:00 2001
From: Naresh Solanki <naresh.solanki@9elements.com>
Date: Tue, 1 Aug 2023 12:24:52 +0200
Subject: [PATCH 0788/2047] regulator: max5970: Rename driver and remove
 wildcard

The previous version of this driver included wildcards in file names and
descriptions. This patch renames the driver to only support MAX5970 and
MAX5978, which are the only chips that the driver actually supports.

Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230801102453.1798292-1-Naresh.Solanki@9elements.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig                     |  6 ++--
 drivers/regulator/Makefile                    |  2 +-
 ...ax597x-regulator.c => max5970-regulator.c} | 28 +++++++++----------
 3 files changed, 18 insertions(+), 18 deletions(-)
 rename drivers/regulator/{max597x-regulator.c => max5970-regulator.c} (94%)

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 94f44736473e..a5c9de1cab2b 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -546,11 +546,11 @@ config REGULATOR_MAX1586
 	  regulator via I2C bus. The provided regulator is suitable
 	  for PXA27x chips to control VCC_CORE and VCC_USIM voltages.
 
-config REGULATOR_MAX597X
-	tristate "Maxim 597x power switch and monitor"
+config REGULATOR_MAX5970
+	tristate "Maxim 5970/5978 power switch and monitor"
 	depends on I2C
 	depends on OF
-	depends on MFD_MAX597X
+	depends on MFD_MAX5970
 	help
 	  This driver controls a Maxim 5970/5978 switch via I2C bus.
 	  The MAX5970/5978 is a smart switch with no output regulation, but
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 77b22bdd2791..453289cdd444 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -67,7 +67,7 @@ obj-$(CONFIG_REGULATOR_LTC3589) += ltc3589.o
 obj-$(CONFIG_REGULATOR_LTC3676) += ltc3676.o
 obj-$(CONFIG_REGULATOR_MAX14577) += max14577-regulator.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
-obj-$(CONFIG_REGULATOR_MAX597X) += max597x-regulator.o
+obj-$(CONFIG_REGULATOR_MAX5970) += max5970-regulator.o
 obj-$(CONFIG_REGULATOR_MAX77541) += max77541-regulator.o
 obj-$(CONFIG_REGULATOR_MAX77620) += max77620-regulator.o
 obj-$(CONFIG_REGULATOR_MAX77650) += max77650-regulator.o
diff --git a/drivers/regulator/max597x-regulator.c b/drivers/regulator/max5970-regulator.c
similarity index 94%
rename from drivers/regulator/max597x-regulator.c
rename to drivers/regulator/max5970-regulator.c
index 7873a5267555..b56a174cde3d 100644
--- a/drivers/regulator/max597x-regulator.c
+++ b/drivers/regulator/max5970-regulator.c
@@ -20,9 +20,9 @@
 #include <linux/regulator/of_regulator.h>
 #include <linux/platform_device.h>
 
-#include <linux/mfd/max597x.h>
+#include <linux/mfd/max5970.h>
 
-struct max597x_regulator {
+struct max5970_regulator {
 	int num_switches, mon_rng, irng, shunt_micro_ohms, lim_uA;
 	struct regmap *regmap;
 };
@@ -58,7 +58,7 @@ static int max597x_set_vp(struct regulator_dev *rdev, int lim_uV, int severity,
 			  bool enable, bool overvoltage)
 {
 	int off_h, off_l, reg, ret;
-	struct max597x_regulator *data = rdev_get_drvdata(rdev);
+	struct max5970_regulator *data = rdev_get_drvdata(rdev);
 	int channel = rdev_get_id(rdev);
 
 	if (overvoltage) {
@@ -140,7 +140,7 @@ static int max597x_set_ocp(struct regulator_dev *rdev, int lim_uA,
 	int val, reg;
 	unsigned int vthst, vthfst;
 
-	struct max597x_regulator *data = rdev_get_drvdata(rdev);
+	struct max5970_regulator *data = rdev_get_drvdata(rdev);
 	int rdev_id = rdev_get_id(rdev);
 	/*
 	 * MAX5970 doesn't has enable control for ocp.
@@ -222,7 +222,7 @@ static int max597x_dt_parse(struct device_node *np,
 			    const struct regulator_desc *desc,
 			    struct regulator_config *cfg)
 {
-	struct max597x_regulator *data = cfg->driver_data;
+	struct max5970_regulator *data = cfg->driver_data;
 	int ret = 0;
 
 	ret =
@@ -274,7 +274,7 @@ static int max597x_irq_handler(int irq, struct regulator_irq_data *rid,
 			       unsigned long *dev_mask)
 {
 	struct regulator_err_state *stat;
-	struct max597x_regulator *d = (struct max597x_regulator *)rid->data;
+	struct max5970_regulator *d = (struct max5970_regulator *)rid->data;
 	int val, ret, i;
 
 	ret = max597x_regmap_read_clear(d->regmap, MAX5970_REG_FAULT0, &val);
@@ -394,7 +394,7 @@ static int max597x_adc_range(struct regmap *regmap, const int ch,
 static int max597x_setup_irq(struct device *dev,
 			     int irq,
 			     struct regulator_dev *rdevs[MAX5970_NUM_SWITCHES],
-			     int num_switches, struct max597x_regulator *data)
+			     int num_switches, struct max5970_regulator *data)
 {
 	struct regulator_irq_desc max597x_notif = {
 		.name = "max597x-irq",
@@ -425,9 +425,9 @@ static int max597x_setup_irq(struct device *dev,
 
 static int max597x_regulator_probe(struct platform_device *pdev)
 {
-	struct max597x_data *max597x;
+	struct max5970_data *max597x;
 	struct regmap *regmap = dev_get_regmap(pdev->dev.parent, NULL);
-	struct max597x_regulator *data;
+	struct max5970_regulator *data;
 	struct i2c_client *i2c = to_i2c_client(pdev->dev.parent);
 	struct regulator_config config = { };
 	struct regulator_dev *rdev;
@@ -438,16 +438,16 @@ static int max597x_regulator_probe(struct platform_device *pdev)
 	if (!regmap)
 		return -EPROBE_DEFER;
 
-	max597x = devm_kzalloc(&i2c->dev, sizeof(struct max597x_data), GFP_KERNEL);
+	max597x = devm_kzalloc(&i2c->dev, sizeof(struct max5970_data), GFP_KERNEL);
 	if (!max597x)
 		return -ENOMEM;
 
 	i2c_set_clientdata(i2c, max597x);
 
 	if (of_device_is_compatible(i2c->dev.of_node, "maxim,max5978"))
-		max597x->num_switches = MAX597x_TYPE_MAX5978;
+		max597x->num_switches = MAX5978_NUM_SWITCHES;
 	else if (of_device_is_compatible(i2c->dev.of_node, "maxim,max5970"))
-		max597x->num_switches = MAX597x_TYPE_MAX5970;
+		max597x->num_switches = MAX5970_NUM_SWITCHES;
 	else
 		return -ENODEV;
 
@@ -456,7 +456,7 @@ static int max597x_regulator_probe(struct platform_device *pdev)
 
 	for (i = 0; i < num_switches; i++) {
 		data =
-		    devm_kzalloc(&i2c->dev, sizeof(struct max597x_regulator),
+		    devm_kzalloc(&i2c->dev, sizeof(struct max5970_regulator),
 				 GFP_KERNEL);
 		if (!data)
 			return -ENOMEM;
@@ -500,7 +500,7 @@ static int max597x_regulator_probe(struct platform_device *pdev)
 
 static struct platform_driver max597x_regulator_driver = {
 	.driver = {
-		.name = "max597x-regulator",
+		.name = "max5970-regulator",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
 	},
 	.probe = max597x_regulator_probe,

From e9759d403baaeda9ea4d61d64c3151e214561217 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:01 +0800
Subject: [PATCH 0789/2047] spi: amd: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-2-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-amd.c | 50 +++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
index b19766571f28..e4345ad5cd36 100644
--- a/drivers/spi/spi-amd.c
+++ b/drivers/spi/spi-amd.c
@@ -215,9 +215,9 @@ static int amd_spi_execute_opcode(struct amd_spi *amd_spi)
 	}
 }
 
-static int amd_spi_master_setup(struct spi_device *spi)
+static int amd_spi_host_setup(struct spi_device *spi)
 {
-	struct amd_spi *amd_spi = spi_master_get_devdata(spi->master);
+	struct amd_spi *amd_spi = spi_controller_get_devdata(spi->controller);
 
 	amd_spi_clear_fifo_ptr(amd_spi);
 
@@ -272,7 +272,7 @@ static int amd_set_spi_freq(struct amd_spi *amd_spi, u32 speed_hz)
 }
 
 static inline int amd_spi_fifo_xfer(struct amd_spi *amd_spi,
-				    struct spi_master *master,
+				    struct spi_controller *host,
 				    struct spi_message *message)
 {
 	struct spi_transfer *xfer = NULL;
@@ -353,15 +353,15 @@ fin_msg:
 		return -ENODEV;
 	}
 
-	spi_finalize_current_message(master);
+	spi_finalize_current_message(host);
 
 	return message->status;
 }
 
-static int amd_spi_master_transfer(struct spi_master *master,
+static int amd_spi_host_transfer(struct spi_controller *host,
 				   struct spi_message *msg)
 {
-	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+	struct amd_spi *amd_spi = spi_controller_get_devdata(host);
 	struct spi_device *spi = msg->spi;
 
 	amd_spi_select_chip(amd_spi, spi_get_chipselect(spi, 0));
@@ -370,7 +370,7 @@ static int amd_spi_master_transfer(struct spi_master *master,
 	 * Extract spi_transfers from the spi message and
 	 * program the controller.
 	 */
-	return amd_spi_fifo_xfer(amd_spi, master, msg);
+	return amd_spi_fifo_xfer(amd_spi, host, msg);
 }
 
 static size_t amd_spi_max_transfer_size(struct spi_device *spi)
@@ -381,16 +381,16 @@ static size_t amd_spi_max_transfer_size(struct spi_device *spi)
 static int amd_spi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct amd_spi *amd_spi;
 	int err;
 
-	/* Allocate storage for spi_master and driver private data */
-	master = devm_spi_alloc_master(dev, sizeof(struct amd_spi));
-	if (!master)
-		return dev_err_probe(dev, -ENOMEM, "Error allocating SPI master\n");
+	/* Allocate storage for host and driver private data */
+	host = devm_spi_alloc_host(dev, sizeof(struct amd_spi));
+	if (!host)
+		return dev_err_probe(dev, -ENOMEM, "Error allocating SPI host\n");
 
-	amd_spi = spi_master_get_devdata(master);
+	amd_spi = spi_controller_get_devdata(host);
 	amd_spi->io_remap_addr = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(amd_spi->io_remap_addr))
 		return dev_err_probe(dev, PTR_ERR(amd_spi->io_remap_addr),
@@ -400,20 +400,20 @@ static int amd_spi_probe(struct platform_device *pdev)
 
 	amd_spi->version = (enum amd_spi_versions) device_get_match_data(dev);
 
-	/* Initialize the spi_master fields */
-	master->bus_num = 0;
-	master->num_chipselect = 4;
-	master->mode_bits = 0;
-	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
-	master->max_speed_hz = AMD_SPI_MAX_HZ;
-	master->min_speed_hz = AMD_SPI_MIN_HZ;
-	master->setup = amd_spi_master_setup;
-	master->transfer_one_message = amd_spi_master_transfer;
-	master->max_transfer_size = amd_spi_max_transfer_size;
-	master->max_message_size = amd_spi_max_transfer_size;
+	/* Initialize the spi_controller fields */
+	host->bus_num = 0;
+	host->num_chipselect = 4;
+	host->mode_bits = 0;
+	host->flags = SPI_CONTROLLER_HALF_DUPLEX;
+	host->max_speed_hz = AMD_SPI_MAX_HZ;
+	host->min_speed_hz = AMD_SPI_MIN_HZ;
+	host->setup = amd_spi_host_setup;
+	host->transfer_one_message = amd_spi_host_transfer;
+	host->max_transfer_size = amd_spi_max_transfer_size;
+	host->max_message_size = amd_spi_max_transfer_size;
 
 	/* Register the controller with SPI framework */
-	err = devm_spi_register_master(dev, master);
+	err = devm_spi_register_controller(dev, host);
 	if (err)
 		return dev_err_probe(dev, err, "error registering SPI controller\n");
 

From 21ac58f59125f1c45fdd3791cbc5cc3683514564 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:02 +0800
Subject: [PATCH 0790/2047] spi: aspeed: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-3-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-aspeed-smc.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/spi/spi-aspeed-smc.c b/drivers/spi/spi-aspeed-smc.c
index e75b0d51f06a..21b0fa646c7d 100644
--- a/drivers/spi/spi-aspeed-smc.c
+++ b/drivers/spi/spi-aspeed-smc.c
@@ -295,7 +295,7 @@ static const struct aspeed_spi_data ast2400_spi_data;
 
 static int do_aspeed_spi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 {
-	struct aspeed_spi *aspi = spi_controller_get_devdata(mem->spi->master);
+	struct aspeed_spi *aspi = spi_controller_get_devdata(mem->spi->controller);
 	struct aspeed_spi_chip *chip = &aspi->chips[spi_get_chipselect(mem->spi, 0)];
 	u32 addr_mode, addr_mode_backup;
 	u32 ctl_val;
@@ -374,7 +374,7 @@ static int aspeed_spi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 
 static const char *aspeed_spi_get_name(struct spi_mem *mem)
 {
-	struct aspeed_spi *aspi = spi_controller_get_devdata(mem->spi->master);
+	struct aspeed_spi *aspi = spi_controller_get_devdata(mem->spi->controller);
 	struct device *dev = aspi->dev;
 
 	return devm_kasprintf(dev, GFP_KERNEL, "%s.%d", dev_name(dev),
@@ -553,7 +553,7 @@ static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip);
 
 static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)
 {
-	struct aspeed_spi *aspi = spi_controller_get_devdata(desc->mem->spi->master);
+	struct aspeed_spi *aspi = spi_controller_get_devdata(desc->mem->spi->controller);
 	struct aspeed_spi_chip *chip = &aspi->chips[spi_get_chipselect(desc->mem->spi, 0)];
 	struct spi_mem_op *op = &desc->info.op_tmpl;
 	u32 ctl_val;
@@ -620,7 +620,7 @@ static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)
 static ssize_t aspeed_spi_dirmap_read(struct spi_mem_dirmap_desc *desc,
 				      u64 offset, size_t len, void *buf)
 {
-	struct aspeed_spi *aspi = spi_controller_get_devdata(desc->mem->spi->master);
+	struct aspeed_spi *aspi = spi_controller_get_devdata(desc->mem->spi->controller);
 	struct aspeed_spi_chip *chip = &aspi->chips[spi_get_chipselect(desc->mem->spi, 0)];
 
 	/* Switch to USER command mode if mapping window is too small */
@@ -669,7 +669,7 @@ static void aspeed_spi_chip_enable(struct aspeed_spi *aspi, unsigned int cs, boo
 
 static int aspeed_spi_setup(struct spi_device *spi)
 {
-	struct aspeed_spi *aspi = spi_controller_get_devdata(spi->master);
+	struct aspeed_spi *aspi = spi_controller_get_devdata(spi->controller);
 	const struct aspeed_spi_data *data = aspi->data;
 	unsigned int cs = spi_get_chipselect(spi, 0);
 	struct aspeed_spi_chip *chip = &aspi->chips[cs];
@@ -697,7 +697,7 @@ static int aspeed_spi_setup(struct spi_device *spi)
 
 static void aspeed_spi_cleanup(struct spi_device *spi)
 {
-	struct aspeed_spi *aspi = spi_controller_get_devdata(spi->master);
+	struct aspeed_spi *aspi = spi_controller_get_devdata(spi->controller);
 	unsigned int cs = spi_get_chipselect(spi, 0);
 
 	aspeed_spi_chip_enable(aspi, cs, false);
@@ -726,7 +726,7 @@ static int aspeed_spi_probe(struct platform_device *pdev)
 	if (!data)
 		return -ENODEV;
 
-	ctlr = devm_spi_alloc_master(dev, sizeof(*aspi));
+	ctlr = devm_spi_alloc_host(dev, sizeof(*aspi));
 	if (!ctlr)
 		return -ENOMEM;
 

From 9d5920b37ab4a970f658a6a30b54cc6d6a7d2d3d Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:03 +0800
Subject: [PATCH 0791/2047] spi: spi-axi-spi-engine: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-4-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-axi-spi-engine.c | 58 ++++++++++++++++----------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index 89661f3b0d44..c5b568543149 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -356,8 +356,8 @@ static bool spi_engine_read_rx_fifo(struct spi_engine *spi_engine)
 
 static irqreturn_t spi_engine_irq(int irq, void *devid)
 {
-	struct spi_master *master = devid;
-	struct spi_engine *spi_engine = spi_master_get_devdata(master);
+	struct spi_controller *host = devid;
+	struct spi_engine *spi_engine = spi_controller_get_devdata(host);
 	unsigned int disable_int = 0;
 	unsigned int pending;
 
@@ -396,7 +396,7 @@ static irqreturn_t spi_engine_irq(int irq, void *devid)
 			msg->status = 0;
 			msg->actual_length = msg->frame_length;
 			spi_engine->msg = NULL;
-			spi_finalize_current_message(master);
+			spi_finalize_current_message(host);
 			disable_int |= SPI_ENGINE_INT_SYNC;
 		}
 	}
@@ -412,11 +412,11 @@ static irqreturn_t spi_engine_irq(int irq, void *devid)
 	return IRQ_HANDLED;
 }
 
-static int spi_engine_transfer_one_message(struct spi_master *master,
+static int spi_engine_transfer_one_message(struct spi_controller *host,
 	struct spi_message *msg)
 {
 	struct spi_engine_program p_dry, *p;
-	struct spi_engine *spi_engine = spi_master_get_devdata(master);
+	struct spi_engine *spi_engine = spi_controller_get_devdata(host);
 	unsigned int int_enable = 0;
 	unsigned long flags;
 	size_t size;
@@ -464,7 +464,7 @@ static int spi_engine_transfer_one_message(struct spi_master *master,
 static int spi_engine_probe(struct platform_device *pdev)
 {
 	struct spi_engine *spi_engine;
-	struct spi_master *master;
+	struct spi_controller *host;
 	unsigned int version;
 	int irq;
 	int ret;
@@ -477,29 +477,29 @@ static int spi_engine_probe(struct platform_device *pdev)
 	if (!spi_engine)
 		return -ENOMEM;
 
-	master = spi_alloc_master(&pdev->dev, 0);
-	if (!master)
+	host = spi_alloc_host(&pdev->dev, 0);
+	if (!host)
 		return -ENOMEM;
 
-	spi_master_set_devdata(master, spi_engine);
+	spi_controller_set_devdata(host, spi_engine);
 
 	spin_lock_init(&spi_engine->lock);
 
 	spi_engine->clk = devm_clk_get(&pdev->dev, "s_axi_aclk");
 	if (IS_ERR(spi_engine->clk)) {
 		ret = PTR_ERR(spi_engine->clk);
-		goto err_put_master;
+		goto err_put_host;
 	}
 
 	spi_engine->ref_clk = devm_clk_get(&pdev->dev, "spi_clk");
 	if (IS_ERR(spi_engine->ref_clk)) {
 		ret = PTR_ERR(spi_engine->ref_clk);
-		goto err_put_master;
+		goto err_put_host;
 	}
 
 	ret = clk_prepare_enable(spi_engine->clk);
 	if (ret)
-		goto err_put_master;
+		goto err_put_host;
 
 	ret = clk_prepare_enable(spi_engine->ref_clk);
 	if (ret)
@@ -525,46 +525,46 @@ static int spi_engine_probe(struct platform_device *pdev)
 	writel_relaxed(0xff, spi_engine->base + SPI_ENGINE_REG_INT_PENDING);
 	writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_INT_ENABLE);
 
-	ret = request_irq(irq, spi_engine_irq, 0, pdev->name, master);
+	ret = request_irq(irq, spi_engine_irq, 0, pdev->name, host);
 	if (ret)
 		goto err_ref_clk_disable;
 
-	master->dev.of_node = pdev->dev.of_node;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_3WIRE;
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->max_speed_hz = clk_get_rate(spi_engine->ref_clk) / 2;
-	master->transfer_one_message = spi_engine_transfer_one_message;
-	master->num_chipselect = 8;
+	host->dev.of_node = pdev->dev.of_node;
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_3WIRE;
+	host->bits_per_word_mask = SPI_BPW_MASK(8);
+	host->max_speed_hz = clk_get_rate(spi_engine->ref_clk) / 2;
+	host->transfer_one_message = spi_engine_transfer_one_message;
+	host->num_chipselect = 8;
 
-	ret = spi_register_master(master);
+	ret = spi_register_controller(host);
 	if (ret)
 		goto err_free_irq;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
 	return 0;
 err_free_irq:
-	free_irq(irq, master);
+	free_irq(irq, host);
 err_ref_clk_disable:
 	clk_disable_unprepare(spi_engine->ref_clk);
 err_clk_disable:
 	clk_disable_unprepare(spi_engine->clk);
-err_put_master:
-	spi_master_put(master);
+err_put_host:
+	spi_controller_put(host);
 	return ret;
 }
 
 static void spi_engine_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = spi_master_get(platform_get_drvdata(pdev));
-	struct spi_engine *spi_engine = spi_master_get_devdata(master);
+	struct spi_controller *host = spi_controller_get(platform_get_drvdata(pdev));
+	struct spi_engine *spi_engine = spi_controller_get_devdata(host);
 	int irq = platform_get_irq(pdev, 0);
 
-	spi_unregister_master(master);
+	spi_unregister_controller(host);
 
-	free_irq(irq, master);
+	free_irq(irq, host);
 
-	spi_master_put(master);
+	spi_controller_put(host);
 
 	writel_relaxed(0xff, spi_engine->base + SPI_ENGINE_REG_INT_PENDING);
 	writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_INT_ENABLE);

From ec271c04ae93b8b5ec392f9aa1cb062714157a7b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:04 +0800
Subject: [PATCH 0792/2047] spi: bcm-qspi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-5-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm-qspi.c | 64 +++++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index d91dfbe47aa5..ef08fcac2f6d 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -220,7 +220,7 @@ struct qspi_trans {
 
 struct bcm_qspi {
 	struct platform_device *pdev;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct clk *clk;
 	u32 base_clk;
 	u32 max_speed_hz;
@@ -732,7 +732,7 @@ static bool bcm_qspi_mspi_transfer_is_last(struct bcm_qspi *qspi,
 					   struct qspi_trans *qt)
 {
 	if (qt->mspi_last_trans &&
-	    spi_transfer_is_last(qspi->master, qt->trans))
+	    spi_transfer_is_last(qspi->host, qt->trans))
 		return true;
 	else
 		return false;
@@ -979,7 +979,7 @@ static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi)
 		mspi_cdram |= ((tp.trans->bits_per_word <= 8) ? 0 :
 			       MSPI_CDRAM_BITSE_BIT);
 
-		/* set 3wrire halfduplex mode data from master to slave */
+		/* set 3wrire halfduplex mode data from host to target */
 		if ((spi->mode & SPI_3WIRE) && tp.trans->tx_buf)
 			mspi_cdram |= MSPI_CDRAM_OUTP;
 
@@ -1035,7 +1035,7 @@ done:
 static int bcm_qspi_bspi_exec_mem_op(struct spi_device *spi,
 				     const struct spi_mem_op *op)
 {
-	struct bcm_qspi *qspi = spi_master_get_devdata(spi->master);
+	struct bcm_qspi *qspi = spi_controller_get_devdata(spi->controller);
 	u32 addr = 0, len, rdlen, len_words, from = 0;
 	int ret = 0;
 	unsigned long timeo = msecs_to_jiffies(100);
@@ -1118,11 +1118,11 @@ static int bcm_qspi_bspi_exec_mem_op(struct spi_device *spi,
 	return ret;
 }
 
-static int bcm_qspi_transfer_one(struct spi_master *master,
+static int bcm_qspi_transfer_one(struct spi_controller *host,
 				 struct spi_device *spi,
 				 struct spi_transfer *trans)
 {
-	struct bcm_qspi *qspi = spi_master_get_devdata(master);
+	struct bcm_qspi *qspi = spi_controller_get_devdata(host);
 	int slots;
 	unsigned long timeo = msecs_to_jiffies(100);
 
@@ -1150,8 +1150,8 @@ static int bcm_qspi_transfer_one(struct spi_master *master,
 static int bcm_qspi_mspi_exec_mem_op(struct spi_device *spi,
 				     const struct spi_mem_op *op)
 {
-	struct spi_master *master = spi->master;
-	struct bcm_qspi *qspi = spi_master_get_devdata(master);
+	struct spi_controller *host = spi->controller;
+	struct bcm_qspi *qspi = spi_controller_get_devdata(host);
 	struct spi_transfer t[2];
 	u8 cmd[6] = { };
 	int ret, i;
@@ -1171,7 +1171,7 @@ static int bcm_qspi_mspi_exec_mem_op(struct spi_device *spi,
 	t[0].tx_nbits = op->cmd.buswidth;
 	/* lets mspi know that this is not last transfer */
 	qspi->trans_pos.mspi_last_trans = false;
-	ret = bcm_qspi_transfer_one(master, spi, &t[0]);
+	ret = bcm_qspi_transfer_one(host, spi, &t[0]);
 
 	/* rx */
 	qspi->trans_pos.mspi_last_trans = true;
@@ -1181,7 +1181,7 @@ static int bcm_qspi_mspi_exec_mem_op(struct spi_device *spi,
 		t[1].len = op->data.nbytes;
 		t[1].rx_nbits =  op->data.buswidth;
 		t[1].bits_per_word = spi->bits_per_word;
-		ret = bcm_qspi_transfer_one(master, spi, &t[1]);
+		ret = bcm_qspi_transfer_one(host, spi, &t[1]);
 	}
 
 	return ret;
@@ -1191,7 +1191,7 @@ static int bcm_qspi_exec_mem_op(struct spi_mem *mem,
 				const struct spi_mem_op *op)
 {
 	struct spi_device *spi = mem->spi;
-	struct bcm_qspi *qspi = spi_master_get_devdata(spi->master);
+	struct bcm_qspi *qspi = spi_controller_get_devdata(spi->controller);
 	int ret = 0;
 	bool mspi_read = false;
 	u32 addr = 0, len;
@@ -1486,7 +1486,7 @@ int bcm_qspi_probe(struct platform_device *pdev,
 	const struct bcm_qspi_data *data;
 	struct device *dev = &pdev->dev;
 	struct bcm_qspi *qspi;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct resource *res;
 	int irq, ret = 0, num_ints = 0;
 	u32 val;
@@ -1504,13 +1504,13 @@ int bcm_qspi_probe(struct platform_device *pdev,
 
 	data = of_id->data;
 
-	master = devm_spi_alloc_master(dev, sizeof(struct bcm_qspi));
-	if (!master) {
-		dev_err(dev, "error allocating spi_master\n");
+	host = devm_spi_alloc_host(dev, sizeof(struct bcm_qspi));
+	if (!host) {
+		dev_err(dev, "error allocating spi_controller\n");
 		return -ENOMEM;
 	}
 
-	qspi = spi_master_get_devdata(master);
+	qspi = spi_controller_get_devdata(host);
 
 	qspi->clk = devm_clk_get_optional(&pdev->dev, NULL);
 	if (IS_ERR(qspi->clk))
@@ -1520,23 +1520,23 @@ int bcm_qspi_probe(struct platform_device *pdev,
 	qspi->trans_pos.trans = NULL;
 	qspi->trans_pos.byte = 0;
 	qspi->trans_pos.mspi_last_trans = true;
-	qspi->master = master;
+	qspi->host = host;
 
-	master->bus_num = -1;
-	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_RX_DUAL | SPI_RX_QUAD |
+	host->bus_num = -1;
+	host->mode_bits = SPI_CPHA | SPI_CPOL | SPI_RX_DUAL | SPI_RX_QUAD |
 				SPI_3WIRE;
-	master->setup = bcm_qspi_setup;
-	master->transfer_one = bcm_qspi_transfer_one;
-	master->mem_ops = &bcm_qspi_mem_ops;
-	master->cleanup = bcm_qspi_cleanup;
-	master->dev.of_node = dev->of_node;
-	master->num_chipselect = NUM_CHIPSELECT;
-	master->use_gpio_descriptors = true;
+	host->setup = bcm_qspi_setup;
+	host->transfer_one = bcm_qspi_transfer_one;
+	host->mem_ops = &bcm_qspi_mem_ops;
+	host->cleanup = bcm_qspi_cleanup;
+	host->dev.of_node = dev->of_node;
+	host->num_chipselect = NUM_CHIPSELECT;
+	host->use_gpio_descriptors = true;
 
 	qspi->big_endian = of_device_is_big_endian(dev->of_node);
 
 	if (!of_property_read_u32(dev->of_node, "num-cs", &val))
-		master->num_chipselect = val;
+		host->num_chipselect = val;
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "hif_mspi");
 	if (!res)
@@ -1659,9 +1659,9 @@ int bcm_qspi_probe(struct platform_device *pdev,
 	qspi->xfer_mode.addrlen = -1;
 	qspi->xfer_mode.hp = -1;
 
-	ret = spi_register_master(master);
+	ret = spi_register_controller(host);
 	if (ret < 0) {
-		dev_err(dev, "can't register master\n");
+		dev_err(dev, "can't register host\n");
 		goto qspi_reg_err;
 	}
 
@@ -1682,7 +1682,7 @@ void bcm_qspi_remove(struct platform_device *pdev)
 {
 	struct bcm_qspi *qspi = platform_get_drvdata(pdev);
 
-	spi_unregister_master(qspi->master);
+	spi_unregister_controller(qspi->host);
 	bcm_qspi_hw_uninit(qspi);
 	clk_disable_unprepare(qspi->clk);
 	kfree(qspi->dev_ids);
@@ -1700,7 +1700,7 @@ static int __maybe_unused bcm_qspi_suspend(struct device *dev)
 		qspi->s3_strap_override_ctrl =
 			bcm_qspi_read(qspi, BSPI, BSPI_STRAP_OVERRIDE_CTRL);
 
-	spi_master_suspend(qspi->master);
+	spi_controller_suspend(qspi->host);
 	clk_disable_unprepare(qspi->clk);
 	bcm_qspi_hw_uninit(qspi);
 
@@ -1721,7 +1721,7 @@ static int __maybe_unused bcm_qspi_resume(struct device *dev)
 
 	ret = clk_prepare_enable(qspi->clk);
 	if (!ret)
-		spi_master_resume(qspi->master);
+		spi_controller_resume(qspi->host);
 
 	return ret;
 }

From 00be843bc1c3c77574f566ef78b1b7ec1ef92be5 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:05 +0800
Subject: [PATCH 0793/2047] spi: bcm2835: switch to use modern name

Change legacy name master/slave to modern name host/target.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-6-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm2835.c | 112 +++++++++++++++++++-------------------
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index c16abc2a9e9b..1116e7d20034 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -105,7 +105,7 @@ MODULE_PARM_DESC(polling_limit_us,
  *      These are counted as well in @count_transfer_polling and
  *      @count_transfer_irq
  * @count_transfer_dma: count how often dma mode is used
- * @slv: SPI slave currently selected
+ * @target: SPI target currently selected
  *	(used by bcm2835_spi_dma_tx_done() to write @clear_rx_cs)
  * @tx_dma_active: whether a TX DMA descriptor is in progress
  * @rx_dma_active: whether a RX DMA descriptor is in progress
@@ -135,7 +135,7 @@ struct bcm2835_spi {
 	u64 count_transfer_irq_after_polling;
 	u64 count_transfer_dma;
 
-	struct bcm2835_spidev *slv;
+	struct bcm2835_spidev *target;
 	unsigned int tx_dma_active;
 	unsigned int rx_dma_active;
 	struct dma_async_tx_descriptor *fill_tx_desc;
@@ -143,14 +143,14 @@ struct bcm2835_spi {
 };
 
 /**
- * struct bcm2835_spidev - BCM2835 SPI slave
+ * struct bcm2835_spidev - BCM2835 SPI target
  * @prepare_cs: precalculated CS register value for ->prepare_message()
- *	(uses slave-specific clock polarity and phase settings)
+ *	(uses target-specific clock polarity and phase settings)
  * @clear_rx_desc: preallocated RX DMA descriptor used for TX-only transfers
  *	(cyclically clears RX FIFO by writing @clear_rx_cs to CS register)
  * @clear_rx_addr: bus address of @clear_rx_cs
  * @clear_rx_cs: precalculated CS register value to clear RX FIFO
- *	(uses slave-specific clock polarity and phase settings)
+ *	(uses target-specific clock polarity and phase settings)
  */
 struct bcm2835_spidev {
 	u32 prepare_cs;
@@ -434,7 +434,7 @@ static int bcm2835_spi_transfer_one_irq(struct spi_controller *ctlr,
 
 /**
  * bcm2835_spi_transfer_prologue() - transfer first few bytes without DMA
- * @ctlr: SPI master controller
+ * @ctlr: SPI host controller
  * @tfr: SPI transfer
  * @bs: BCM2835 SPI controller
  * @cs: CS register
@@ -596,7 +596,7 @@ out:
 
 /**
  * bcm2835_spi_dma_rx_done() - callback for DMA RX channel
- * @data: SPI master controller
+ * @data: SPI host controller
  *
  * Used for bidirectional and RX-only transfers.
  */
@@ -624,7 +624,7 @@ static void bcm2835_spi_dma_rx_done(void *data)
 
 /**
  * bcm2835_spi_dma_tx_done() - callback for DMA TX channel
- * @data: SPI master controller
+ * @data: SPI host controller
  *
  * Used for TX-only transfers.
  */
@@ -635,7 +635,7 @@ static void bcm2835_spi_dma_tx_done(void *data)
 
 	/* busy-wait for TX FIFO to empty */
 	while (!(bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE))
-		bcm2835_wr(bs, BCM2835_SPI_CS, bs->slv->clear_rx_cs);
+		bcm2835_wr(bs, BCM2835_SPI_CS, bs->target->clear_rx_cs);
 
 	bs->tx_dma_active = false;
 	smp_wmb();
@@ -655,10 +655,10 @@ static void bcm2835_spi_dma_tx_done(void *data)
 
 /**
  * bcm2835_spi_prepare_sg() - prepare and submit DMA descriptor for sglist
- * @ctlr: SPI master controller
+ * @ctlr: SPI host controller
  * @tfr: SPI transfer
  * @bs: BCM2835 SPI controller
- * @slv: BCM2835 SPI slave
+ * @target: BCM2835 SPI target
  * @is_tx: whether to submit DMA descriptor for TX or RX sglist
  *
  * Prepare and submit a DMA descriptor for the TX or RX sglist of @tfr.
@@ -667,7 +667,7 @@ static void bcm2835_spi_dma_tx_done(void *data)
 static int bcm2835_spi_prepare_sg(struct spi_controller *ctlr,
 				  struct spi_transfer *tfr,
 				  struct bcm2835_spi *bs,
-				  struct bcm2835_spidev *slv,
+				  struct bcm2835_spidev *target,
 				  bool is_tx)
 {
 	struct dma_chan *chan;
@@ -707,7 +707,7 @@ static int bcm2835_spi_prepare_sg(struct spi_controller *ctlr,
 	} else if (!tfr->rx_buf) {
 		desc->callback = bcm2835_spi_dma_tx_done;
 		desc->callback_param = ctlr;
-		bs->slv = slv;
+		bs->target = target;
 	}
 
 	/* submit it to DMA-engine */
@@ -718,9 +718,9 @@ static int bcm2835_spi_prepare_sg(struct spi_controller *ctlr,
 
 /**
  * bcm2835_spi_transfer_one_dma() - perform SPI transfer using DMA engine
- * @ctlr: SPI master controller
+ * @ctlr: SPI host controller
  * @tfr: SPI transfer
- * @slv: BCM2835 SPI slave
+ * @target: BCM2835 SPI target
  * @cs: CS register
  *
  * For *bidirectional* transfers (both tx_buf and rx_buf are non-%NULL), set up
@@ -732,7 +732,7 @@ static int bcm2835_spi_prepare_sg(struct spi_controller *ctlr,
  * clear the RX FIFO by setting the CLEAR_RX bit in the CS register.
  *
  * The CS register value is precalculated in bcm2835_spi_setup().  Normally
- * this is called only once, on slave registration.  A DMA descriptor to write
+ * this is called only once, on target registration.  A DMA descriptor to write
  * this value is preallocated in bcm2835_dma_init().  All that's left to do
  * when performing a TX-only transfer is to submit this descriptor to the RX
  * DMA channel.  Latency is thereby minimized.  The descriptor does not
@@ -765,7 +765,7 @@ static int bcm2835_spi_prepare_sg(struct spi_controller *ctlr,
  */
 static int bcm2835_spi_transfer_one_dma(struct spi_controller *ctlr,
 					struct spi_transfer *tfr,
-					struct bcm2835_spidev *slv,
+					struct bcm2835_spidev *target,
 					u32 cs)
 {
 	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
@@ -783,7 +783,7 @@ static int bcm2835_spi_transfer_one_dma(struct spi_controller *ctlr,
 
 	/* setup tx-DMA */
 	if (bs->tx_buf) {
-		ret = bcm2835_spi_prepare_sg(ctlr, tfr, bs, slv, true);
+		ret = bcm2835_spi_prepare_sg(ctlr, tfr, bs, target, true);
 	} else {
 		cookie = dmaengine_submit(bs->fill_tx_desc);
 		ret = dma_submit_error(cookie);
@@ -809,9 +809,9 @@ static int bcm2835_spi_transfer_one_dma(struct spi_controller *ctlr,
 	 * this saves 10us or more.
 	 */
 	if (bs->rx_buf) {
-		ret = bcm2835_spi_prepare_sg(ctlr, tfr, bs, slv, false);
+		ret = bcm2835_spi_prepare_sg(ctlr, tfr, bs, target, false);
 	} else {
-		cookie = dmaengine_submit(slv->clear_rx_desc);
+		cookie = dmaengine_submit(target->clear_rx_desc);
 		ret = dma_submit_error(cookie);
 	}
 	if (ret) {
@@ -1050,10 +1050,10 @@ static int bcm2835_spi_transfer_one(struct spi_controller *ctlr,
 				    struct spi_transfer *tfr)
 {
 	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
-	struct bcm2835_spidev *slv = spi_get_ctldata(spi);
+	struct bcm2835_spidev *target = spi_get_ctldata(spi);
 	unsigned long spi_hz, cdiv;
 	unsigned long hz_per_byte, byte_limit;
-	u32 cs = slv->prepare_cs;
+	u32 cs = target->prepare_cs;
 
 	/* set clock */
 	spi_hz = tfr->speed_hz;
@@ -1101,7 +1101,7 @@ static int bcm2835_spi_transfer_one(struct spi_controller *ctlr,
 	 * this 1 idle clock cycle pattern but runs the spi clock without gaps
 	 */
 	if (ctlr->can_dma && bcm2835_spi_can_dma(ctlr, spi, tfr))
-		return bcm2835_spi_transfer_one_dma(ctlr, tfr, slv, cs);
+		return bcm2835_spi_transfer_one_dma(ctlr, tfr, target, cs);
 
 	/* run in interrupt-mode */
 	return bcm2835_spi_transfer_one_irq(ctlr, spi, tfr, cs, true);
@@ -1112,7 +1112,7 @@ static int bcm2835_spi_prepare_message(struct spi_controller *ctlr,
 {
 	struct spi_device *spi = msg->spi;
 	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
-	struct bcm2835_spidev *slv = spi_get_ctldata(spi);
+	struct bcm2835_spidev *target = spi_get_ctldata(spi);
 	int ret;
 
 	if (ctlr->can_dma) {
@@ -1131,7 +1131,7 @@ static int bcm2835_spi_prepare_message(struct spi_controller *ctlr,
 	 * Set up clock polarity before spi_transfer_one_message() asserts
 	 * chip select to avoid a gratuitous clock signal edge.
 	 */
-	bcm2835_wr(bs, BCM2835_SPI_CS, slv->prepare_cs);
+	bcm2835_wr(bs, BCM2835_SPI_CS, target->prepare_cs);
 
 	return 0;
 }
@@ -1163,51 +1163,51 @@ static int chip_match_name(struct gpio_chip *chip, void *data)
 
 static void bcm2835_spi_cleanup(struct spi_device *spi)
 {
-	struct bcm2835_spidev *slv = spi_get_ctldata(spi);
+	struct bcm2835_spidev *target = spi_get_ctldata(spi);
 	struct spi_controller *ctlr = spi->controller;
 
-	if (slv->clear_rx_desc)
-		dmaengine_desc_free(slv->clear_rx_desc);
+	if (target->clear_rx_desc)
+		dmaengine_desc_free(target->clear_rx_desc);
 
-	if (slv->clear_rx_addr)
+	if (target->clear_rx_addr)
 		dma_unmap_single(ctlr->dma_rx->device->dev,
-				 slv->clear_rx_addr,
+				 target->clear_rx_addr,
 				 sizeof(u32),
 				 DMA_TO_DEVICE);
 
-	kfree(slv);
+	kfree(target);
 }
 
 static int bcm2835_spi_setup_dma(struct spi_controller *ctlr,
 				 struct spi_device *spi,
 				 struct bcm2835_spi *bs,
-				 struct bcm2835_spidev *slv)
+				 struct bcm2835_spidev *target)
 {
 	int ret;
 
 	if (!ctlr->dma_rx)
 		return 0;
 
-	slv->clear_rx_addr = dma_map_single(ctlr->dma_rx->device->dev,
-					    &slv->clear_rx_cs,
-					    sizeof(u32),
-					    DMA_TO_DEVICE);
-	if (dma_mapping_error(ctlr->dma_rx->device->dev, slv->clear_rx_addr)) {
+	target->clear_rx_addr = dma_map_single(ctlr->dma_rx->device->dev,
+					       &target->clear_rx_cs,
+					       sizeof(u32),
+					       DMA_TO_DEVICE);
+	if (dma_mapping_error(ctlr->dma_rx->device->dev, target->clear_rx_addr)) {
 		dev_err(&spi->dev, "cannot map clear_rx_cs\n");
-		slv->clear_rx_addr = 0;
+		target->clear_rx_addr = 0;
 		return -ENOMEM;
 	}
 
-	slv->clear_rx_desc = dmaengine_prep_dma_cyclic(ctlr->dma_rx,
-						       slv->clear_rx_addr,
-						       sizeof(u32), 0,
-						       DMA_MEM_TO_DEV, 0);
-	if (!slv->clear_rx_desc) {
+	target->clear_rx_desc = dmaengine_prep_dma_cyclic(ctlr->dma_rx,
+						          target->clear_rx_addr,
+						          sizeof(u32), 0,
+						          DMA_MEM_TO_DEV, 0);
+	if (!target->clear_rx_desc) {
 		dev_err(&spi->dev, "cannot prepare clear_rx_desc\n");
 		return -ENOMEM;
 	}
 
-	ret = dmaengine_desc_set_reuse(slv->clear_rx_desc);
+	ret = dmaengine_desc_set_reuse(target->clear_rx_desc);
 	if (ret) {
 		dev_err(&spi->dev, "cannot reuse clear_rx_desc\n");
 		return ret;
@@ -1220,26 +1220,26 @@ static int bcm2835_spi_setup(struct spi_device *spi)
 {
 	struct spi_controller *ctlr = spi->controller;
 	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
-	struct bcm2835_spidev *slv = spi_get_ctldata(spi);
+	struct bcm2835_spidev *target = spi_get_ctldata(spi);
 	struct gpio_chip *chip;
 	int ret;
 	u32 cs;
 
-	if (!slv) {
-		slv = kzalloc(ALIGN(sizeof(*slv), dma_get_cache_alignment()),
+	if (!target) {
+		target = kzalloc(ALIGN(sizeof(*target), dma_get_cache_alignment()),
 			      GFP_KERNEL);
-		if (!slv)
+		if (!target)
 			return -ENOMEM;
 
-		spi_set_ctldata(spi, slv);
+		spi_set_ctldata(spi, target);
 
-		ret = bcm2835_spi_setup_dma(ctlr, spi, bs, slv);
+		ret = bcm2835_spi_setup_dma(ctlr, spi, bs, target);
 		if (ret)
 			goto err_cleanup;
 	}
 
 	/*
-	 * Precalculate SPI slave's CS register value for ->prepare_message():
+	 * Precalculate SPI target's CS register value for ->prepare_message():
 	 * The driver always uses software-controlled GPIO chip select, hence
 	 * set the hardware-controlled native chip select to an invalid value
 	 * to prevent it from interfering.
@@ -1249,18 +1249,18 @@ static int bcm2835_spi_setup(struct spi_device *spi)
 		cs |= BCM2835_SPI_CS_CPOL;
 	if (spi->mode & SPI_CPHA)
 		cs |= BCM2835_SPI_CS_CPHA;
-	slv->prepare_cs = cs;
+	target->prepare_cs = cs;
 
 	/*
-	 * Precalculate SPI slave's CS register value to clear RX FIFO
+	 * Precalculate SPI target's CS register value to clear RX FIFO
 	 * in case of a TX-only DMA transfer.
 	 */
 	if (ctlr->dma_rx) {
-		slv->clear_rx_cs = cs | BCM2835_SPI_CS_TA |
+		target->clear_rx_cs = cs | BCM2835_SPI_CS_TA |
 					BCM2835_SPI_CS_DMAEN |
 					BCM2835_SPI_CS_CLEAR_RX;
 		dma_sync_single_for_device(ctlr->dma_rx->device->dev,
-					   slv->clear_rx_addr,
+					   target->clear_rx_addr,
 					   sizeof(u32),
 					   DMA_TO_DEVICE);
 	}
@@ -1328,7 +1328,7 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
 	struct bcm2835_spi *bs;
 	int err;
 
-	ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*bs));
+	ctlr = devm_spi_alloc_host(&pdev->dev, sizeof(*bs));
 	if (!ctlr)
 		return -ENOMEM;
 

From 901fcd0740df2f907d56709e208325088265c662 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:06 +0800
Subject: [PATCH 0794/2047] spi: bcm2835aux: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-7-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm2835aux.c | 84 ++++++++++++++++++------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index 8ace417c0a29..172b534390b2 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -231,8 +231,8 @@ static void bcm2835aux_spi_transfer_helper(struct bcm2835aux_spi *bs)
 
 static irqreturn_t bcm2835aux_spi_interrupt(int irq, void *dev_id)
 {
-	struct spi_master *master = dev_id;
-	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_id;
+	struct bcm2835aux_spi *bs = spi_controller_get_devdata(host);
 
 	/* IRQ may be shared, so return if our interrupts are disabled */
 	if (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_CNTL1) &
@@ -251,17 +251,17 @@ static irqreturn_t bcm2835aux_spi_interrupt(int irq, void *dev_id)
 	/* and if rx_len is 0 then disable interrupts and wake up completion */
 	if (!bs->rx_len) {
 		bcm2835aux_wr(bs, BCM2835_AUX_SPI_CNTL1, bs->cntl[1]);
-		spi_finalize_current_transfer(master);
+		spi_finalize_current_transfer(host);
 	}
 
 	return IRQ_HANDLED;
 }
 
-static int __bcm2835aux_spi_transfer_one_irq(struct spi_master *master,
+static int __bcm2835aux_spi_transfer_one_irq(struct spi_controller *host,
 					     struct spi_device *spi,
 					     struct spi_transfer *tfr)
 {
-	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
+	struct bcm2835aux_spi *bs = spi_controller_get_devdata(host);
 
 	/* enable interrupts */
 	bcm2835aux_wr(bs, BCM2835_AUX_SPI_CNTL1, bs->cntl[1] |
@@ -272,11 +272,11 @@ static int __bcm2835aux_spi_transfer_one_irq(struct spi_master *master,
 	return 1;
 }
 
-static int bcm2835aux_spi_transfer_one_irq(struct spi_master *master,
+static int bcm2835aux_spi_transfer_one_irq(struct spi_controller *host,
 					   struct spi_device *spi,
 					   struct spi_transfer *tfr)
 {
-	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
+	struct bcm2835aux_spi *bs = spi_controller_get_devdata(host);
 
 	/* update statistics */
 	bs->count_transfer_irq++;
@@ -294,14 +294,14 @@ static int bcm2835aux_spi_transfer_one_irq(struct spi_master *master,
 	}
 
 	/* now run the interrupt mode */
-	return __bcm2835aux_spi_transfer_one_irq(master, spi, tfr);
+	return __bcm2835aux_spi_transfer_one_irq(host, spi, tfr);
 }
 
-static int bcm2835aux_spi_transfer_one_poll(struct spi_master *master,
+static int bcm2835aux_spi_transfer_one_poll(struct spi_controller *host,
 					    struct spi_device *spi,
 					struct spi_transfer *tfr)
 {
-	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
+	struct bcm2835aux_spi *bs = spi_controller_get_devdata(host);
 	unsigned long timeout;
 
 	/* update statistics */
@@ -328,7 +328,7 @@ static int bcm2835aux_spi_transfer_one_poll(struct spi_master *master,
 					    bs->tx_len, bs->rx_len);
 			/* forward to interrupt handler */
 			bs->count_transfer_irq_after_poll++;
-			return __bcm2835aux_spi_transfer_one_irq(master,
+			return __bcm2835aux_spi_transfer_one_irq(host,
 							       spi, tfr);
 		}
 	}
@@ -337,11 +337,11 @@ static int bcm2835aux_spi_transfer_one_poll(struct spi_master *master,
 	return 0;
 }
 
-static int bcm2835aux_spi_transfer_one(struct spi_master *master,
+static int bcm2835aux_spi_transfer_one(struct spi_controller *host,
 				       struct spi_device *spi,
 				       struct spi_transfer *tfr)
 {
-	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
+	struct bcm2835aux_spi *bs = spi_controller_get_devdata(host);
 	unsigned long spi_hz, clk_hz, speed;
 	unsigned long hz_per_byte, byte_limit;
 
@@ -392,17 +392,17 @@ static int bcm2835aux_spi_transfer_one(struct spi_master *master,
 
 	/* run in polling mode for short transfers */
 	if (tfr->len < byte_limit)
-		return bcm2835aux_spi_transfer_one_poll(master, spi, tfr);
+		return bcm2835aux_spi_transfer_one_poll(host, spi, tfr);
 
 	/* run in interrupt mode for all others */
-	return bcm2835aux_spi_transfer_one_irq(master, spi, tfr);
+	return bcm2835aux_spi_transfer_one_irq(host, spi, tfr);
 }
 
-static int bcm2835aux_spi_prepare_message(struct spi_master *master,
+static int bcm2835aux_spi_prepare_message(struct spi_controller *host,
 					  struct spi_message *msg)
 {
 	struct spi_device *spi = msg->spi;
-	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
+	struct bcm2835aux_spi *bs = spi_controller_get_devdata(host);
 
 	bs->cntl[0] = BCM2835_AUX_SPI_CNTL0_ENABLE |
 		      BCM2835_AUX_SPI_CNTL0_VAR_WIDTH |
@@ -422,20 +422,20 @@ static int bcm2835aux_spi_prepare_message(struct spi_master *master,
 	return 0;
 }
 
-static int bcm2835aux_spi_unprepare_message(struct spi_master *master,
+static int bcm2835aux_spi_unprepare_message(struct spi_controller *host,
 					    struct spi_message *msg)
 {
-	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
+	struct bcm2835aux_spi *bs = spi_controller_get_devdata(host);
 
 	bcm2835aux_spi_reset_hw(bs);
 
 	return 0;
 }
 
-static void bcm2835aux_spi_handle_err(struct spi_master *master,
+static void bcm2835aux_spi_handle_err(struct spi_controller *host,
 				      struct spi_message *msg)
 {
-	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
+	struct bcm2835aux_spi *bs = spi_controller_get_devdata(host);
 
 	bcm2835aux_spi_reset_hw(bs);
 }
@@ -473,18 +473,18 @@ static int bcm2835aux_spi_setup(struct spi_device *spi)
 
 static int bcm2835aux_spi_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct bcm2835aux_spi *bs;
 	unsigned long clk_hz;
 	int err;
 
-	master = devm_spi_alloc_master(&pdev->dev, sizeof(*bs));
-	if (!master)
+	host = devm_spi_alloc_host(&pdev->dev, sizeof(*bs));
+	if (!host)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, master);
-	master->mode_bits = (SPI_CPOL | SPI_CS_HIGH | SPI_NO_CS);
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
+	platform_set_drvdata(pdev, host);
+	host->mode_bits = (SPI_CPOL | SPI_CS_HIGH | SPI_NO_CS);
+	host->bits_per_word_mask = SPI_BPW_MASK(8);
 	/* even though the driver never officially supported native CS
 	 * allow a single native CS for legacy DT support purposes when
 	 * no cs-gpio is configured.
@@ -496,16 +496,16 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev)
 	 * * cs_delay_usec: cs is always deasserted one SCK cycle after
 	 *     a spi_transfer
 	 */
-	master->num_chipselect = 1;
-	master->setup = bcm2835aux_spi_setup;
-	master->transfer_one = bcm2835aux_spi_transfer_one;
-	master->handle_err = bcm2835aux_spi_handle_err;
-	master->prepare_message = bcm2835aux_spi_prepare_message;
-	master->unprepare_message = bcm2835aux_spi_unprepare_message;
-	master->dev.of_node = pdev->dev.of_node;
-	master->use_gpio_descriptors = true;
+	host->num_chipselect = 1;
+	host->setup = bcm2835aux_spi_setup;
+	host->transfer_one = bcm2835aux_spi_transfer_one;
+	host->handle_err = bcm2835aux_spi_handle_err;
+	host->prepare_message = bcm2835aux_spi_prepare_message;
+	host->unprepare_message = bcm2835aux_spi_unprepare_message;
+	host->dev.of_node = pdev->dev.of_node;
+	host->use_gpio_descriptors = true;
 
-	bs = spi_master_get_devdata(master);
+	bs = spi_controller_get_devdata(host);
 
 	/* the main area */
 	bs->regs = devm_platform_ioremap_resource(pdev, 0);
@@ -544,15 +544,15 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev)
 	err = devm_request_irq(&pdev->dev, bs->irq,
 			       bcm2835aux_spi_interrupt,
 			       IRQF_SHARED,
-			       dev_name(&pdev->dev), master);
+			       dev_name(&pdev->dev), host);
 	if (err) {
 		dev_err(&pdev->dev, "could not request IRQ: %d\n", err);
 		goto out_clk_disable;
 	}
 
-	err = spi_register_master(master);
+	err = spi_register_controller(host);
 	if (err) {
-		dev_err(&pdev->dev, "could not register SPI master: %d\n", err);
+		dev_err(&pdev->dev, "could not register SPI host: %d\n", err);
 		goto out_clk_disable;
 	}
 
@@ -567,12 +567,12 @@ out_clk_disable:
 
 static void bcm2835aux_spi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct bcm2835aux_spi *bs = spi_controller_get_devdata(host);
 
 	bcm2835aux_debugfs_remove(bs);
 
-	spi_unregister_master(master);
+	spi_unregister_controller(host);
 
 	bcm2835aux_spi_reset_hw(bs);
 

From 2c40be6bc95bd2b9681b4acc434feda05b16eb00 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:07 +0800
Subject: [PATCH 0795/2047] spi: bcm63xx-hsspi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-8-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm63xx-hsspi.c | 86 ++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 43 deletions(-)

diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c
index 9e218e143263..1ca857c2a4aa 100644
--- a/drivers/spi/spi-bcm63xx-hsspi.c
+++ b/drivers/spi/spi-bcm63xx-hsspi.c
@@ -149,7 +149,7 @@ static ssize_t wait_mode_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
 	struct spi_controller *ctrl = dev_get_drvdata(dev);
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(ctrl);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(ctrl);
 
 	return sprintf(buf, "%d\n", bs->wait_mode);
 }
@@ -158,7 +158,7 @@ static ssize_t wait_mode_store(struct device *dev, struct device_attribute *attr
 			  const char *buf, size_t count)
 {
 	struct spi_controller *ctrl = dev_get_drvdata(dev);
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(ctrl);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(ctrl);
 	u32 val;
 
 	if (kstrtou32(buf, 10, &val))
@@ -185,7 +185,7 @@ static ssize_t xfer_mode_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
 	struct spi_controller *ctrl = dev_get_drvdata(dev);
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(ctrl);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(ctrl);
 
 	return sprintf(buf, "%d\n", bs->xfer_mode);
 }
@@ -194,7 +194,7 @@ static ssize_t xfer_mode_store(struct device *dev, struct device_attribute *attr
 			  const char *buf, size_t count)
 {
 	struct spi_controller *ctrl = dev_get_drvdata(dev);
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(ctrl);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(ctrl);
 	u32 val;
 
 	if (kstrtou32(buf, 10, &val))
@@ -262,12 +262,12 @@ static int bcm63xx_hsspi_wait_cmd(struct bcm63xx_hsspi *bs)
 	return rc;
 }
 
-static bool bcm63xx_prepare_prepend_transfer(struct spi_master *master,
+static bool bcm63xx_prepare_prepend_transfer(struct spi_controller *host,
 					  struct spi_message *msg,
 					  struct spi_transfer *t_prepend)
 {
 
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(master);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(host);
 	bool tx_only = false;
 	struct spi_transfer *t;
 
@@ -348,7 +348,7 @@ static bool bcm63xx_prepare_prepend_transfer(struct spi_master *master,
 static int bcm63xx_hsspi_do_prepend_txrx(struct spi_device *spi,
 					 struct spi_transfer *t)
 {
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(spi->master);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(spi->controller);
 	unsigned int chip_select = spi_get_chipselect(spi, 0);
 	u16 opcode = 0, val;
 	const u8 *tx = t->tx_buf;
@@ -467,7 +467,7 @@ static void bcm63xx_hsspi_set_clk(struct bcm63xx_hsspi *bs,
 
 static int bcm63xx_hsspi_do_txrx(struct spi_device *spi, struct spi_transfer *t)
 {
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(spi->master);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(spi->controller);
 	unsigned int chip_select = spi_get_chipselect(spi, 0);
 	u16 opcode = 0, val;
 	int pending = t->len;
@@ -541,7 +541,7 @@ static int bcm63xx_hsspi_do_txrx(struct spi_device *spi, struct spi_transfer *t)
 
 static int bcm63xx_hsspi_setup(struct spi_device *spi)
 {
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(spi->master);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(spi->controller);
 	u32 reg;
 
 	reg = __raw_readl(bs->regs +
@@ -579,7 +579,7 @@ static int bcm63xx_hsspi_setup(struct spi_device *spi)
 static int bcm63xx_hsspi_do_dummy_cs_txrx(struct spi_device *spi,
 				      struct spi_message *msg)
 {
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(spi->master);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(spi->controller);
 	int status = -EINVAL;
 	int dummy_cs;
 	bool keep_cs = false;
@@ -653,10 +653,10 @@ static int bcm63xx_hsspi_do_dummy_cs_txrx(struct spi_device *spi,
 	return status;
 }
 
-static int bcm63xx_hsspi_transfer_one(struct spi_master *master,
+static int bcm63xx_hsspi_transfer_one(struct spi_controller *host,
 				      struct spi_message *msg)
 {
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(master);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(host);
 	struct spi_device *spi = msg->spi;
 	int status = -EINVAL;
 	bool prependable = false;
@@ -665,7 +665,7 @@ static int bcm63xx_hsspi_transfer_one(struct spi_master *master,
 	mutex_lock(&bs->msg_mutex);
 
 	if (bs->xfer_mode != HSSPI_XFER_MODE_DUMMYCS)
-		prependable = bcm63xx_prepare_prepend_transfer(master, msg, &t_prepend);
+		prependable = bcm63xx_prepare_prepend_transfer(host, msg, &t_prepend);
 
 	if (prependable) {
 		status = bcm63xx_hsspi_do_prepend_txrx(spi, &t_prepend);
@@ -681,7 +681,7 @@ static int bcm63xx_hsspi_transfer_one(struct spi_master *master,
 
 	mutex_unlock(&bs->msg_mutex);
 	msg->status = status;
-	spi_finalize_current_message(master);
+	spi_finalize_current_message(host);
 
 	return 0;
 }
@@ -723,7 +723,7 @@ static irqreturn_t bcm63xx_hsspi_interrupt(int irq, void *dev_id)
 
 static int bcm63xx_hsspi_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct bcm63xx_hsspi *bs;
 	void __iomem *regs;
 	struct device *dev = &pdev->dev;
@@ -779,13 +779,13 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev)
 		}
 	}
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*bs));
-	if (!master) {
+	host = spi_alloc_host(&pdev->dev, sizeof(*bs));
+	if (!host) {
 		ret = -ENOMEM;
 		goto out_disable_pll_clk;
 	}
 
-	bs = spi_master_get_devdata(master);
+	bs = spi_controller_get_devdata(host);
 	bs->pdev = pdev;
 	bs->clk = clk;
 	bs->pll_clk = pll_clk;
@@ -796,17 +796,17 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev)
 	bs->prepend_buf = devm_kzalloc(dev, HSSPI_BUFFER_LEN, GFP_KERNEL);
 	if (!bs->prepend_buf) {
 		ret = -ENOMEM;
-		goto out_put_master;
+		goto out_put_host;
 	}
 
 	mutex_init(&bs->bus_mutex);
 	mutex_init(&bs->msg_mutex);
 	init_completion(&bs->done);
 
-	master->mem_ops = &bcm63xx_hsspi_mem_ops;
-	master->dev.of_node = dev->of_node;
+	host->mem_ops = &bcm63xx_hsspi_mem_ops;
+	host->dev.of_node = dev->of_node;
 	if (!dev->of_node)
-		master->bus_num = HSSPI_BUS_NUM;
+		host->bus_num = HSSPI_BUS_NUM;
 
 	of_property_read_u32(dev->of_node, "num-cs", &num_cs);
 	if (num_cs > 8) {
@@ -814,18 +814,18 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev)
 			 num_cs);
 		num_cs = HSSPI_SPI_MAX_CS;
 	}
-	master->num_chipselect = num_cs;
-	master->setup = bcm63xx_hsspi_setup;
-	master->transfer_one_message = bcm63xx_hsspi_transfer_one;
-	master->max_transfer_size = bcm63xx_hsspi_max_message_size;
-	master->max_message_size = bcm63xx_hsspi_max_message_size;
+	host->num_chipselect = num_cs;
+	host->setup = bcm63xx_hsspi_setup;
+	host->transfer_one_message = bcm63xx_hsspi_transfer_one;
+	host->max_transfer_size = bcm63xx_hsspi_max_message_size;
+	host->max_message_size = bcm63xx_hsspi_max_message_size;
 
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH |
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH |
 			    SPI_RX_DUAL | SPI_TX_DUAL;
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->auto_runtime_pm = true;
+	host->bits_per_word_mask = SPI_BPW_MASK(8);
+	host->auto_runtime_pm = true;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
 	/* Initialize the hardware */
 	__raw_writel(0, bs->regs + HSSPI_INT_MASK_REG);
@@ -844,7 +844,7 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev)
 				       pdev->name, bs);
 
 		if (ret)
-			goto out_put_master;
+			goto out_put_host;
 	}
 
 	pm_runtime_enable(&pdev->dev);
@@ -856,7 +856,7 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev)
 	}
 
 	/* register and we are done */
-	ret = devm_spi_register_master(dev, master);
+	ret = devm_spi_register_controller(dev, host);
 	if (ret)
 		goto out_sysgroup_disable;
 
@@ -868,8 +868,8 @@ out_sysgroup_disable:
 	sysfs_remove_group(&pdev->dev.kobj, &bcm63xx_hsspi_group);
 out_pm_disable:
 	pm_runtime_disable(&pdev->dev);
-out_put_master:
-	spi_master_put(master);
+out_put_host:
+	spi_controller_put(host);
 out_disable_pll_clk:
 	clk_disable_unprepare(pll_clk);
 out_disable_clk:
@@ -880,8 +880,8 @@ out_disable_clk:
 
 static void bcm63xx_hsspi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(host);
 
 	/* reset the hardware and block queue progress */
 	__raw_writel(0, bs->regs + HSSPI_INT_MASK_REG);
@@ -893,10 +893,10 @@ static void bcm63xx_hsspi_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int bcm63xx_hsspi_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(host);
 
-	spi_master_suspend(master);
+	spi_controller_suspend(host);
 	clk_disable_unprepare(bs->pll_clk);
 	clk_disable_unprepare(bs->clk);
 
@@ -905,8 +905,8 @@ static int bcm63xx_hsspi_suspend(struct device *dev)
 
 static int bcm63xx_hsspi_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct bcm63xx_hsspi *bs = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct bcm63xx_hsspi *bs = spi_controller_get_devdata(host);
 	int ret;
 
 	ret = clk_prepare_enable(bs->clk);
@@ -921,7 +921,7 @@ static int bcm63xx_hsspi_resume(struct device *dev)
 		}
 	}
 
-	spi_master_resume(master);
+	spi_controller_resume(host);
 
 	return 0;
 }

From 1a9e76190f72e4e3aaf7eb89da22876846ff9b3d Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:08 +0800
Subject: [PATCH 0796/2047] spi: bcm63xx: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-9-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm63xx.c | 68 +++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
index 07b5b71b2352..aac41bd05f98 100644
--- a/drivers/spi/spi-bcm63xx.c
+++ b/drivers/spi/spi-bcm63xx.c
@@ -186,7 +186,7 @@ static const unsigned int bcm63xx_spi_freq_table[SPI_CLK_MASK][2] = {
 static void bcm63xx_spi_setup_transfer(struct spi_device *spi,
 				      struct spi_transfer *t)
 {
-	struct bcm63xx_spi *bs = spi_master_get_devdata(spi->master);
+	struct bcm63xx_spi *bs = spi_controller_get_devdata(spi->controller);
 	u8 clk_cfg, reg;
 	int i;
 
@@ -217,7 +217,7 @@ static void bcm63xx_spi_setup_transfer(struct spi_device *spi,
 static int bcm63xx_txrx_bufs(struct spi_device *spi, struct spi_transfer *first,
 				unsigned int num_transfers)
 {
-	struct bcm63xx_spi *bs = spi_master_get_devdata(spi->master);
+	struct bcm63xx_spi *bs = spi_controller_get_devdata(spi->controller);
 	u16 msg_ctl;
 	u16 cmd;
 	unsigned int i, timeout = 0, prepend_len = 0, len = 0;
@@ -312,10 +312,10 @@ static int bcm63xx_txrx_bufs(struct spi_device *spi, struct spi_transfer *first,
 	return 0;
 }
 
-static int bcm63xx_spi_transfer_one(struct spi_master *master,
+static int bcm63xx_spi_transfer_one(struct spi_controller *host,
 					struct spi_message *m)
 {
-	struct bcm63xx_spi *bs = spi_master_get_devdata(master);
+	struct bcm63xx_spi *bs = spi_controller_get_devdata(host);
 	struct spi_transfer *t, *first = NULL;
 	struct spi_device *spi = m->spi;
 	int status = 0;
@@ -385,18 +385,18 @@ static int bcm63xx_spi_transfer_one(struct spi_master *master,
 	}
 exit:
 	m->status = status;
-	spi_finalize_current_message(master);
+	spi_finalize_current_message(host);
 
 	return 0;
 }
 
-/* This driver supports single master mode only. Hence
+/* This driver supports single host mode only. Hence
  * CMD_DONE is the only interrupt we care about
  */
 static irqreturn_t bcm63xx_spi_interrupt(int irq, void *dev_id)
 {
-	struct spi_master *master = (struct spi_master *)dev_id;
-	struct bcm63xx_spi *bs = spi_master_get_devdata(master);
+	struct spi_controller *host = (struct spi_controller *)dev_id;
+	struct bcm63xx_spi *bs = spi_controller_get_devdata(host);
 	u8 intr;
 
 	/* Read interupts and clear them immediately */
@@ -413,7 +413,7 @@ static irqreturn_t bcm63xx_spi_interrupt(int irq, void *dev_id)
 
 static size_t bcm63xx_spi_max_length(struct spi_device *spi)
 {
-	struct bcm63xx_spi *bs = spi_master_get_devdata(spi->master);
+	struct bcm63xx_spi *bs = spi_controller_get_devdata(spi->controller);
 
 	return bs->fifo_size;
 }
@@ -479,7 +479,7 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
 	const unsigned long *bcm63xx_spireg;
 	struct device *dev = &pdev->dev;
 	int irq, bus_num;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct clk *clk;
 	struct bcm63xx_spi *bs;
 	int ret;
@@ -525,16 +525,16 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
 	if (IS_ERR(reset))
 		return PTR_ERR(reset);
 
-	master = spi_alloc_master(dev, sizeof(*bs));
-	if (!master) {
+	host = spi_alloc_host(dev, sizeof(*bs));
+	if (!host) {
 		dev_err(dev, "out of memory\n");
 		return -ENOMEM;
 	}
 
-	bs = spi_master_get_devdata(master);
+	bs = spi_controller_get_devdata(host);
 	init_completion(&bs->done);
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 	bs->pdev = pdev;
 
 	bs->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &r);
@@ -549,21 +549,21 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
 	bs->fifo_size = bs->reg_offsets[SPI_MSG_DATA_SIZE];
 
 	ret = devm_request_irq(&pdev->dev, irq, bcm63xx_spi_interrupt, 0,
-							pdev->name, master);
+			       pdev->name, host);
 	if (ret) {
 		dev_err(dev, "unable to request irq\n");
 		goto out_err;
 	}
 
-	master->dev.of_node = dev->of_node;
-	master->bus_num = bus_num;
-	master->num_chipselect = num_cs;
-	master->transfer_one_message = bcm63xx_spi_transfer_one;
-	master->mode_bits = MODEBITS;
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->max_transfer_size = bcm63xx_spi_max_length;
-	master->max_message_size = bcm63xx_spi_max_length;
-	master->auto_runtime_pm = true;
+	host->dev.of_node = dev->of_node;
+	host->bus_num = bus_num;
+	host->num_chipselect = num_cs;
+	host->transfer_one_message = bcm63xx_spi_transfer_one;
+	host->mode_bits = MODEBITS;
+	host->bits_per_word_mask = SPI_BPW_MASK(8);
+	host->max_transfer_size = bcm63xx_spi_max_length;
+	host->max_message_size = bcm63xx_spi_max_length;
+	host->auto_runtime_pm = true;
 	bs->msg_type_shift = bs->reg_offsets[SPI_MSG_TYPE_SHIFT];
 	bs->msg_ctl_width = bs->reg_offsets[SPI_MSG_CTL_WIDTH];
 	bs->tx_io = (u8 *)(bs->regs + bs->reg_offsets[SPI_MSG_DATA]);
@@ -585,7 +585,7 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
 	pm_runtime_enable(&pdev->dev);
 
 	/* register and we are done */
-	ret = devm_spi_register_master(dev, master);
+	ret = devm_spi_register_controller(dev, host);
 	if (ret) {
 		dev_err(dev, "spi register failed\n");
 		goto out_pm_disable;
@@ -601,14 +601,14 @@ out_pm_disable:
 out_clk_disable:
 	clk_disable_unprepare(clk);
 out_err:
-	spi_master_put(master);
+	spi_controller_put(host);
 	return ret;
 }
 
 static void bcm63xx_spi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct bcm63xx_spi *bs = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct bcm63xx_spi *bs = spi_controller_get_devdata(host);
 
 	/* reset spi block */
 	bcm_spi_writeb(bs, 0, SPI_INT_MASK);
@@ -619,10 +619,10 @@ static void bcm63xx_spi_remove(struct platform_device *pdev)
 
 static int bcm63xx_spi_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct bcm63xx_spi *bs = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct bcm63xx_spi *bs = spi_controller_get_devdata(host);
 
-	spi_master_suspend(master);
+	spi_controller_suspend(host);
 
 	clk_disable_unprepare(bs->clk);
 
@@ -631,15 +631,15 @@ static int bcm63xx_spi_suspend(struct device *dev)
 
 static int bcm63xx_spi_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct bcm63xx_spi *bs = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct bcm63xx_spi *bs = spi_controller_get_devdata(host);
 	int ret;
 
 	ret = clk_prepare_enable(bs->clk);
 	if (ret)
 		return ret;
 
-	spi_master_resume(master);
+	spi_controller_resume(host);
 
 	return 0;
 }

From 5a59b9a1070da555f44e81d09e3d096694deb1c0 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:09 +0800
Subject: [PATCH 0797/2047] spi: butterfly: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-10-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-butterfly.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/spi/spi-butterfly.c b/drivers/spi/spi-butterfly.c
index cceae816cebc..289b4454242a 100644
--- a/drivers/spi/spi-butterfly.c
+++ b/drivers/spi/spi-butterfly.c
@@ -178,7 +178,7 @@ static void butterfly_attach(struct parport *p)
 	struct pardevice	*pd;
 	int			status;
 	struct butterfly	*pp;
-	struct spi_master	*master;
+	struct spi_controller	*host;
 	struct device		*dev = p->physport->dev;
 	struct pardev_cb	butterfly_cb;
 
@@ -189,12 +189,12 @@ static void butterfly_attach(struct parport *p)
 	 * and no way to be selective about what it binds to.
 	 */
 
-	master = spi_alloc_master(dev, sizeof(*pp));
-	if (!master) {
+	host = spi_alloc_host(dev, sizeof(*pp));
+	if (!host) {
 		status = -ENOMEM;
 		goto done;
 	}
-	pp = spi_master_get_devdata(master);
+	pp = spi_controller_get_devdata(host);
 
 	/*
 	 * SPI and bitbang hookup
@@ -202,10 +202,10 @@ static void butterfly_attach(struct parport *p)
 	 * use default setup(), cleanup(), and transfer() methods; and
 	 * only bother implementing mode 0.  Start it later.
 	 */
-	master->bus_num = 42;
-	master->num_chipselect = 2;
+	host->bus_num = 42;
+	host->num_chipselect = 2;
 
-	pp->bitbang.master = master;
+	pp->bitbang.master = host;
 	pp->bitbang.chipselect = butterfly_chipselect;
 	pp->bitbang.txrx_word[SPI_MODE_0] = butterfly_txrx_word_mode0;
 
@@ -280,7 +280,7 @@ clean2:
 clean1:
 	parport_unregister_device(pd);
 clean0:
-	spi_master_put(pp->bitbang.master);
+	spi_controller_put(host);
 done:
 	pr_debug("%s: butterfly probe, fail %d\n", p->name, status);
 }
@@ -308,7 +308,7 @@ static void butterfly_detach(struct parport *p)
 	parport_release(pp->pd);
 	parport_unregister_device(pp->pd);
 
-	spi_master_put(pp->bitbang.master);
+	spi_controller_put(pp->bitbang.master);
 }
 
 static struct parport_driver butterfly_driver = {

From ec7cfadff25c1177dadeb1b2fef29f14d530f0df Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:11 +0800
Subject: [PATCH 0798/2047] spi: cadence-xspi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-12-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cadence-xspi.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/spi/spi-cadence-xspi.c b/drivers/spi/spi-cadence-xspi.c
index 11d623cbba2e..b7e04b03be58 100644
--- a/drivers/spi/spi-cadence-xspi.c
+++ b/drivers/spi/spi-cadence-xspi.c
@@ -419,7 +419,7 @@ static int cdns_xspi_mem_op_execute(struct spi_mem *mem,
 				    const struct spi_mem_op *op)
 {
 	struct cdns_xspi_dev *cdns_xspi =
-		spi_master_get_devdata(mem->spi->master);
+		spi_controller_get_devdata(mem->spi->controller);
 	int ret = 0;
 
 	ret = cdns_xspi_mem_op(cdns_xspi, mem, op);
@@ -430,7 +430,7 @@ static int cdns_xspi_mem_op_execute(struct spi_mem *mem,
 static int cdns_xspi_adjust_mem_op_size(struct spi_mem *mem, struct spi_mem_op *op)
 {
 	struct cdns_xspi_dev *cdns_xspi =
-		spi_master_get_devdata(mem->spi->master);
+		spi_controller_get_devdata(mem->spi->controller);
 
 	op->data.nbytes = clamp_val(op->data.nbytes, 0, cdns_xspi->sdmasize);
 
@@ -527,26 +527,26 @@ static void cdns_xspi_print_phy_config(struct cdns_xspi_dev *cdns_xspi)
 static int cdns_xspi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct spi_master *master = NULL;
+	struct spi_controller *host = NULL;
 	struct cdns_xspi_dev *cdns_xspi = NULL;
 	struct resource *res;
 	int ret;
 
-	master = devm_spi_alloc_master(dev, sizeof(*cdns_xspi));
-	if (!master)
+	host = devm_spi_alloc_host(dev, sizeof(*cdns_xspi));
+	if (!host)
 		return -ENOMEM;
 
-	master->mode_bits = SPI_3WIRE | SPI_TX_DUAL  | SPI_TX_QUAD  |
+	host->mode_bits = SPI_3WIRE | SPI_TX_DUAL  | SPI_TX_QUAD  |
 		SPI_RX_DUAL | SPI_RX_QUAD | SPI_TX_OCTAL | SPI_RX_OCTAL |
 		SPI_MODE_0  | SPI_MODE_3;
 
-	master->mem_ops = &cadence_xspi_mem_ops;
-	master->dev.of_node = pdev->dev.of_node;
-	master->bus_num = -1;
+	host->mem_ops = &cadence_xspi_mem_ops;
+	host->dev.of_node = pdev->dev.of_node;
+	host->bus_num = -1;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
-	cdns_xspi = spi_master_get_devdata(master);
+	cdns_xspi = spi_controller_get_devdata(host);
 	cdns_xspi->pdev = pdev;
 	cdns_xspi->dev = &pdev->dev;
 	cdns_xspi->cur_cs = 0;
@@ -596,15 +596,15 @@ static int cdns_xspi_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	master->num_chipselect = 1 << cdns_xspi->hw_num_banks;
+	host->num_chipselect = 1 << cdns_xspi->hw_num_banks;
 
-	ret = devm_spi_register_master(dev, master);
+	ret = devm_spi_register_controller(dev, host);
 	if (ret) {
-		dev_err(dev, "Failed to register SPI master\n");
+		dev_err(dev, "Failed to register SPI host\n");
 		return ret;
 	}
 
-	dev_info(dev, "Successfully registered SPI master\n");
+	dev_info(dev, "Successfully registered SPI host\n");
 
 	return 0;
 }

From f75c04a99100b27749bf0d13b96e4fe0cbda6e87 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:12 +0800
Subject: [PATCH 0799/2047] spi: cadence: switch to use modern name

Change legacy name master/slave to modern name host/target or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-13-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cadence.c | 70 +++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index 42f101d357c3..2554d06a650c 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * Cadence SPI controller driver (master and slave mode)
+ * Cadence SPI controller driver (host and target mode)
  *
  * Copyright (C) 2008 - 2014 Xilinx, Inc.
  *
@@ -59,10 +59,10 @@
 					CDNS_SPI_CR_BAUD_DIV_4)
 
 /*
- * SPI Configuration Register - Baud rate and slave select
+ * SPI Configuration Register - Baud rate and target select
  *
  * These are the values used in the calculation of baud rate divisor and
- * setting the slave select.
+ * setting the target select.
  */
 
 #define CDNS_SPI_BAUD_DIV_MAX		7 /* Baud rate divisor maximum */
@@ -141,20 +141,20 @@ static inline void cdns_spi_write(struct cdns_spi *xspi, u32 offset, u32 val)
 /**
  * cdns_spi_init_hw - Initialize the hardware and configure the SPI controller
  * @xspi:	Pointer to the cdns_spi structure
- * @is_slave:	Flag to indicate slave or master mode
- * * On reset the SPI controller is configured to slave or  master mode.
- * In master mode baud rate divisor is set to 4, threshold value for TX FIFO
+ * @is_target:	Flag to indicate target or host mode
+ * * On reset the SPI controller is configured to target or host mode.
+ * In host mode baud rate divisor is set to 4, threshold value for TX FIFO
  * not full interrupt is set to 1 and size of the word to be transferred as 8 bit.
  *
  * This function initializes the SPI controller to disable and clear all the
- * interrupts, enable manual slave select and manual start, deselect all the
+ * interrupts, enable manual target select and manual start, deselect all the
  * chip select lines, and enable the SPI controller.
  */
-static void cdns_spi_init_hw(struct cdns_spi *xspi, bool is_slave)
+static void cdns_spi_init_hw(struct cdns_spi *xspi, bool is_target)
 {
 	u32 ctrl_reg = 0;
 
-	if (!is_slave)
+	if (!is_target)
 		ctrl_reg |= CDNS_SPI_CR_DEFAULT;
 
 	if (xspi->is_decoded_cs)
@@ -185,10 +185,10 @@ static void cdns_spi_chipselect(struct spi_device *spi, bool is_high)
 	ctrl_reg = cdns_spi_read(xspi, CDNS_SPI_CR);
 
 	if (is_high) {
-		/* Deselect the slave */
+		/* Deselect the target */
 		ctrl_reg |= CDNS_SPI_CR_SSCTRL;
 	} else {
-		/* Select the slave */
+		/* Select the target */
 		ctrl_reg &= ~CDNS_SPI_CR_SSCTRL;
 		if (!(xspi->is_decoded_cs))
 			ctrl_reg |= ((~(CDNS_SPI_SS0 << spi_get_chipselect(spi, 0))) <<
@@ -227,7 +227,7 @@ static void cdns_spi_config_clock_mode(struct spi_device *spi)
 		/*
 		 * Just writing the CR register does not seem to apply the clock
 		 * setting changes. This is problematic when changing the clock
-		 * polarity as it will cause the SPI slave to see spurious clock
+		 * polarity as it will cause the SPI target to see spurious clock
 		 * transitions. To workaround the issue toggle the ER register.
 		 */
 		cdns_spi_write(xspi, CDNS_SPI_ER, CDNS_SPI_ER_DISABLE);
@@ -406,7 +406,7 @@ static irqreturn_t cdns_spi_irq(int irq, void *dev_id)
 static int cdns_prepare_message(struct spi_controller *ctlr,
 				struct spi_message *msg)
 {
-	if (!spi_controller_is_slave(ctlr))
+	if (!spi_controller_is_target(ctlr))
 		cdns_spi_config_clock_mode(msg->spi);
 	return 0;
 }
@@ -418,9 +418,9 @@ static int cdns_prepare_message(struct spi_controller *ctlr,
  * @transfer:	Pointer to the spi_transfer structure which provides
  *		information about next transfer parameters
  *
- * This function in master mode fills the TX FIFO, starts the SPI transfer and
+ * This function in host mode fills the TX FIFO, starts the SPI transfer and
  * returns a positive transfer count so that core will wait for completion.
- * This function in slave mode fills the TX FIFO and wait for transfer trigger.
+ * This function in target mode fills the TX FIFO and wait for transfer trigger.
  *
  * Return:	Number of bytes transferred in the last transfer
  */
@@ -435,7 +435,7 @@ static int cdns_transfer_one(struct spi_controller *ctlr,
 	xspi->tx_bytes = transfer->len;
 	xspi->rx_bytes = transfer->len;
 
-	if (!spi_controller_is_slave(ctlr)) {
+	if (!spi_controller_is_target(ctlr)) {
 		cdns_spi_setup_transfer(spi, transfer);
 	} else {
 		/* Set TX empty threshold to half of FIFO depth
@@ -457,7 +457,7 @@ static int cdns_transfer_one(struct spi_controller *ctlr,
  * @ctlr:	Pointer to the spi_controller structure which provides
  *		information about the controller.
  *
- * This function enables SPI master controller.
+ * This function enables SPI host controller.
  *
  * Return:	0 always
  */
@@ -475,7 +475,7 @@ static int cdns_prepare_transfer_hardware(struct spi_controller *ctlr)
  * @ctlr:	Pointer to the spi_controller structure which provides
  *		information about the controller.
  *
- * This function disables the SPI master controller when no slave selected.
+ * This function disables the SPI host controller when no target selected.
  * This function flush out if any pending data in FIFO.
  *
  * Return:	0 always
@@ -486,15 +486,15 @@ static int cdns_unprepare_transfer_hardware(struct spi_controller *ctlr)
 	u32 ctrl_reg;
 	unsigned int cnt = xspi->tx_fifo_depth;
 
-	if (spi_controller_is_slave(ctlr)) {
+	if (spi_controller_is_target(ctlr)) {
 		while (cnt--)
 			cdns_spi_read(xspi, CDNS_SPI_RXD);
 	}
 
-	/* Disable the SPI if slave is deselected */
+	/* Disable the SPI if target is deselected */
 	ctrl_reg = cdns_spi_read(xspi, CDNS_SPI_CR);
 	ctrl_reg = (ctrl_reg & CDNS_SPI_CR_SSCTRL) >>  CDNS_SPI_SS_SHIFT;
-	if (ctrl_reg == CDNS_SPI_NOSS || spi_controller_is_slave(ctlr))
+	if (ctrl_reg == CDNS_SPI_NOSS || spi_controller_is_target(ctlr))
 		cdns_spi_write(xspi, CDNS_SPI_ER, CDNS_SPI_ER_DISABLE);
 
 	/* Reset to default */
@@ -521,14 +521,14 @@ static void cdns_spi_detect_fifo_depth(struct cdns_spi *xspi)
 }
 
 /**
- * cdns_slave_abort - Abort slave transfer
+ * cdns_target_abort - Abort target transfer
  * @ctlr:	Pointer to the spi_controller structure
  *
- * This function abort slave transfer if there any transfer timeout.
+ * This function abort target transfer if there any transfer timeout.
  *
  * Return:      0 always
  */
-static int cdns_slave_abort(struct spi_controller *ctlr)
+static int cdns_target_abort(struct spi_controller *ctlr)
 {
 	struct cdns_spi *xspi = spi_controller_get_devdata(ctlr);
 	u32 intr_status;
@@ -555,13 +555,13 @@ static int cdns_spi_probe(struct platform_device *pdev)
 	struct spi_controller *ctlr;
 	struct cdns_spi *xspi;
 	u32 num_cs;
-	bool slave;
+	bool target;
 
-	slave = of_property_read_bool(pdev->dev.of_node, "spi-slave");
-	if (slave)
-		ctlr = spi_alloc_slave(&pdev->dev, sizeof(*xspi));
+	target = of_property_read_bool(pdev->dev.of_node, "spi-slave");
+	if (target)
+		ctlr = spi_alloc_target(&pdev->dev, sizeof(*xspi));
 	else
-		ctlr = spi_alloc_master(&pdev->dev, sizeof(*xspi));
+		ctlr = spi_alloc_host(&pdev->dev, sizeof(*xspi));
 
 	if (!ctlr)
 		return -ENOMEM;
@@ -589,7 +589,7 @@ static int cdns_spi_probe(struct platform_device *pdev)
 		goto remove_ctlr;
 	}
 
-	if (!spi_controller_is_slave(ctlr)) {
+	if (!spi_controller_is_target(ctlr)) {
 		xspi->ref_clk = devm_clk_get(&pdev->dev, "ref_clk");
 		if (IS_ERR(xspi->ref_clk)) {
 			dev_err(&pdev->dev, "ref_clk clock not found.\n");
@@ -624,7 +624,7 @@ static int cdns_spi_probe(struct platform_device *pdev)
 	cdns_spi_detect_fifo_depth(xspi);
 
 	/* SPI controller initializations */
-	cdns_spi_init_hw(xspi, spi_controller_is_slave(ctlr));
+	cdns_spi_init_hw(xspi, spi_controller_is_target(ctlr));
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq <= 0) {
@@ -648,7 +648,7 @@ static int cdns_spi_probe(struct platform_device *pdev)
 	ctlr->mode_bits = SPI_CPOL | SPI_CPHA;
 	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
 
-	if (!spi_controller_is_slave(ctlr)) {
+	if (!spi_controller_is_target(ctlr)) {
 		ctlr->mode_bits |=  SPI_CS_HIGH;
 		ctlr->set_cs = cdns_spi_chipselect;
 		ctlr->auto_runtime_pm = true;
@@ -660,7 +660,7 @@ static int cdns_spi_probe(struct platform_device *pdev)
 		pm_runtime_put_autosuspend(&pdev->dev);
 	} else {
 		ctlr->mode_bits |= SPI_NO_CS;
-		ctlr->slave_abort = cdns_slave_abort;
+		ctlr->target_abort = cdns_target_abort;
 	}
 	ret = spi_register_controller(ctlr);
 	if (ret) {
@@ -671,7 +671,7 @@ static int cdns_spi_probe(struct platform_device *pdev)
 	return ret;
 
 clk_dis_all:
-	if (!spi_controller_is_slave(ctlr)) {
+	if (!spi_controller_is_target(ctlr)) {
 		pm_runtime_set_suspended(&pdev->dev);
 		pm_runtime_disable(&pdev->dev);
 		clk_disable_unprepare(xspi->ref_clk);
@@ -735,7 +735,7 @@ static int __maybe_unused cdns_spi_resume(struct device *dev)
 	struct spi_controller *ctlr = dev_get_drvdata(dev);
 	struct cdns_spi *xspi = spi_controller_get_devdata(ctlr);
 
-	cdns_spi_init_hw(xspi, spi_controller_is_slave(ctlr));
+	cdns_spi_init_hw(xspi, spi_controller_is_target(ctlr));
 	return spi_controller_resume(ctlr);
 }
 

From 1baba77b1f2c6143866463e11749d8dd4c544181 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:13 +0800
Subject: [PATCH 0800/2047] spi: clps711x: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-14-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-clps711x.c | 42 +++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/spi/spi-clps711x.c b/drivers/spi/spi-clps711x.c
index c005ed26a3e1..5552ccd716fc 100644
--- a/drivers/spi/spi-clps711x.c
+++ b/drivers/spi/spi-clps711x.c
@@ -33,10 +33,10 @@ struct spi_clps711x_data {
 	int			len;
 };
 
-static int spi_clps711x_prepare_message(struct spi_master *master,
+static int spi_clps711x_prepare_message(struct spi_controller *host,
 					struct spi_message *msg)
 {
-	struct spi_clps711x_data *hw = spi_master_get_devdata(master);
+	struct spi_clps711x_data *hw = spi_controller_get_devdata(host);
 	struct spi_device *spi = msg->spi;
 
 	/* Setup mode for transfer */
@@ -45,11 +45,11 @@ static int spi_clps711x_prepare_message(struct spi_master *master,
 				  SYSCON3_ADCCKNSEN : 0);
 }
 
-static int spi_clps711x_transfer_one(struct spi_master *master,
+static int spi_clps711x_transfer_one(struct spi_controller *host,
 				     struct spi_device *spi,
 				     struct spi_transfer *xfer)
 {
-	struct spi_clps711x_data *hw = spi_master_get_devdata(master);
+	struct spi_clps711x_data *hw = spi_controller_get_devdata(host);
 	u8 data;
 
 	clk_set_rate(hw->spi_clk, xfer->speed_hz ? : spi->max_speed_hz);
@@ -68,8 +68,8 @@ static int spi_clps711x_transfer_one(struct spi_master *master,
 
 static irqreturn_t spi_clps711x_isr(int irq, void *dev_id)
 {
-	struct spi_master *master = dev_id;
-	struct spi_clps711x_data *hw = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_id;
+	struct spi_clps711x_data *hw = spi_controller_get_devdata(host);
 	u8 data;
 
 	/* Handle RX */
@@ -83,7 +83,7 @@ static irqreturn_t spi_clps711x_isr(int irq, void *dev_id)
 		writel(data | SYNCIO_FRMLEN(hw->bpw) | SYNCIO_TXFRMEN,
 		       hw->syncio);
 	} else
-		spi_finalize_current_transfer(master);
+		spi_finalize_current_transfer(host);
 
 	return IRQ_HANDLED;
 }
@@ -92,26 +92,26 @@ static int spi_clps711x_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct spi_clps711x_data *hw;
-	struct spi_master *master;
+	struct spi_controller *host;
 	int irq, ret;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
 		return irq;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*hw));
-	if (!master)
+	host = spi_alloc_host(&pdev->dev, sizeof(*hw));
+	if (!host)
 		return -ENOMEM;
 
-	master->use_gpio_descriptors = true;
-	master->bus_num = -1;
-	master->mode_bits = SPI_CPHA | SPI_CS_HIGH;
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 8);
-	master->dev.of_node = pdev->dev.of_node;
-	master->prepare_message = spi_clps711x_prepare_message;
-	master->transfer_one = spi_clps711x_transfer_one;
+	host->use_gpio_descriptors = true;
+	host->bus_num = -1;
+	host->mode_bits = SPI_CPHA | SPI_CS_HIGH;
+	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 8);
+	host->dev.of_node = pdev->dev.of_node;
+	host->prepare_message = spi_clps711x_prepare_message;
+	host->transfer_one = spi_clps711x_transfer_one;
 
-	hw = spi_master_get_devdata(master);
+	hw = spi_controller_get_devdata(host);
 
 	hw->spi_clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(hw->spi_clk)) {
@@ -138,16 +138,16 @@ static int spi_clps711x_probe(struct platform_device *pdev)
 	readl(hw->syncio);
 
 	ret = devm_request_irq(&pdev->dev, irq, spi_clps711x_isr, 0,
-			       dev_name(&pdev->dev), master);
+			       dev_name(&pdev->dev), host);
 	if (ret)
 		goto err_out;
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	ret = devm_spi_register_controller(&pdev->dev, host);
 	if (!ret)
 		return 0;
 
 err_out:
-	spi_master_put(master);
+	spi_controller_put(host);
 
 	return ret;
 }

From d2a44ae8d2cf9142f5555a2007cb370dc0a2efa5 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:14 +0800
Subject: [PATCH 0801/2047] spi: octeon: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-15-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cavium-octeon.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/spi/spi-cavium-octeon.c b/drivers/spi/spi-cavium-octeon.c
index 58060be33106..4511c3b31223 100644
--- a/drivers/spi/spi-cavium-octeon.c
+++ b/drivers/spi/spi-cavium-octeon.c
@@ -19,15 +19,15 @@
 static int octeon_spi_probe(struct platform_device *pdev)
 {
 	void __iomem *reg_base;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct octeon_spi *p;
 	int err = -ENOENT;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct octeon_spi));
-	if (!master)
+	host = spi_alloc_host(&pdev->dev, sizeof(struct octeon_spi));
+	if (!host)
 		return -ENOMEM;
-	p = spi_master_get_devdata(master);
-	platform_set_drvdata(pdev, master);
+	p = spi_controller_get_devdata(host);
+	platform_set_drvdata(pdev, host);
 
 	reg_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(reg_base)) {
@@ -43,21 +43,21 @@ static int octeon_spi_probe(struct platform_device *pdev)
 	p->regs.tx = 0x10;
 	p->regs.data = 0x80;
 
-	master->num_chipselect = 4;
-	master->mode_bits = SPI_CPHA |
+	host->num_chipselect = 4;
+	host->mode_bits = SPI_CPHA |
 			    SPI_CPOL |
 			    SPI_CS_HIGH |
 			    SPI_LSB_FIRST |
 			    SPI_3WIRE;
 
-	master->transfer_one_message = octeon_spi_transfer_one_message;
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->max_speed_hz = OCTEON_SPI_MAX_CLOCK_HZ;
+	host->transfer_one_message = octeon_spi_transfer_one_message;
+	host->bits_per_word_mask = SPI_BPW_MASK(8);
+	host->max_speed_hz = OCTEON_SPI_MAX_CLOCK_HZ;
 
-	master->dev.of_node = pdev->dev.of_node;
-	err = devm_spi_register_master(&pdev->dev, master);
+	host->dev.of_node = pdev->dev.of_node;
+	err = devm_spi_register_controller(&pdev->dev, host);
 	if (err) {
-		dev_err(&pdev->dev, "register master failed: %d\n", err);
+		dev_err(&pdev->dev, "register host failed: %d\n", err);
 		goto fail;
 	}
 
@@ -65,14 +65,14 @@ static int octeon_spi_probe(struct platform_device *pdev)
 
 	return 0;
 fail:
-	spi_master_put(master);
+	spi_controller_put(host);
 	return err;
 }
 
 static void octeon_spi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct octeon_spi *p = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct octeon_spi *p = spi_controller_get_devdata(host);
 
 	/* Clear the CSENA* and put everything in a known state. */
 	writeq(0, p->register_base + OCTEON_SPI_CFG(p));

From 556aca5be04c83c25ff4e96f53cb82b44cd7f8ff Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:15 +0800
Subject: [PATCH 0802/2047] spi: spi-cavium-thunderx: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-16-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cavium-thunderx.c | 32 +++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/spi/spi-cavium-thunderx.c b/drivers/spi/spi-cavium-thunderx.c
index 535f7eb9fa69..f7c378a5f1bc 100644
--- a/drivers/spi/spi-cavium-thunderx.c
+++ b/drivers/spi/spi-cavium-thunderx.c
@@ -20,15 +20,15 @@ static int thunderx_spi_probe(struct pci_dev *pdev,
 			      const struct pci_device_id *ent)
 {
 	struct device *dev = &pdev->dev;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct octeon_spi *p;
 	int ret;
 
-	master = spi_alloc_master(dev, sizeof(struct octeon_spi));
-	if (!master)
+	host = spi_alloc_host(dev, sizeof(struct octeon_spi));
+	if (!host)
 		return -ENOMEM;
 
-	p = spi_master_get_devdata(master);
+	p = spi_controller_get_devdata(host);
 
 	ret = pcim_enable_device(pdev);
 	if (ret)
@@ -64,18 +64,18 @@ static int thunderx_spi_probe(struct pci_dev *pdev,
 		p->sys_freq = SYS_FREQ_DEFAULT;
 	dev_info(dev, "Set system clock to %u\n", p->sys_freq);
 
-	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
-	master->num_chipselect = 4;
-	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH |
+	host->flags = SPI_CONTROLLER_HALF_DUPLEX;
+	host->num_chipselect = 4;
+	host->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH |
 			    SPI_LSB_FIRST | SPI_3WIRE;
-	master->transfer_one_message = octeon_spi_transfer_one_message;
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->max_speed_hz = OCTEON_SPI_MAX_CLOCK_HZ;
-	master->dev.of_node = pdev->dev.of_node;
+	host->transfer_one_message = octeon_spi_transfer_one_message;
+	host->bits_per_word_mask = SPI_BPW_MASK(8);
+	host->max_speed_hz = OCTEON_SPI_MAX_CLOCK_HZ;
+	host->dev.of_node = pdev->dev.of_node;
 
-	pci_set_drvdata(pdev, master);
+	pci_set_drvdata(pdev, host);
 
-	ret = devm_spi_register_master(dev, master);
+	ret = devm_spi_register_controller(dev, host);
 	if (ret)
 		goto error;
 
@@ -84,16 +84,16 @@ static int thunderx_spi_probe(struct pci_dev *pdev,
 error:
 	clk_disable_unprepare(p->clk);
 	pci_release_regions(pdev);
-	spi_master_put(master);
+	spi_controller_put(host);
 	return ret;
 }
 
 static void thunderx_spi_remove(struct pci_dev *pdev)
 {
-	struct spi_master *master = pci_get_drvdata(pdev);
+	struct spi_controller *host = pci_get_drvdata(pdev);
 	struct octeon_spi *p;
 
-	p = spi_master_get_devdata(master);
+	p = spi_controller_get_devdata(host);
 	if (!p)
 		return;
 

From cd9fdf5ae81c01d7d927bb090e25419e9f5165f1 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:16 +0800
Subject: [PATCH 0803/2047] spi: coldfire-qspi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-17-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-coldfire-qspi.c | 66 ++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/drivers/spi/spi-coldfire-qspi.c b/drivers/spi/spi-coldfire-qspi.c
index 31174e7ca7a6..f0b630fe16c3 100644
--- a/drivers/spi/spi-coldfire-qspi.c
+++ b/drivers/spi/spi-coldfire-qspi.c
@@ -286,7 +286,7 @@ static void mcfqspi_transfer_msg16(struct mcfqspi *mcfqspi, unsigned count,
 
 static void mcfqspi_set_cs(struct spi_device *spi, bool enable)
 {
-	struct mcfqspi *mcfqspi = spi_master_get_devdata(spi->master);
+	struct mcfqspi *mcfqspi = spi_controller_get_devdata(spi->controller);
 	bool cs_high = spi->mode & SPI_CS_HIGH;
 
 	if (enable)
@@ -295,11 +295,11 @@ static void mcfqspi_set_cs(struct spi_device *spi, bool enable)
 		mcfqspi_cs_deselect(mcfqspi, spi_get_chipselect(spi, 0), cs_high);
 }
 
-static int mcfqspi_transfer_one(struct spi_master *master,
+static int mcfqspi_transfer_one(struct spi_controller *host,
 				struct spi_device *spi,
 				struct spi_transfer *t)
 {
-	struct mcfqspi *mcfqspi = spi_master_get_devdata(master);
+	struct mcfqspi *mcfqspi = spi_controller_get_devdata(host);
 	u16 qmr = MCFQSPI_QMR_MSTR;
 
 	qmr |= t->bits_per_word << 10;
@@ -323,7 +323,7 @@ static int mcfqspi_transfer_one(struct spi_master *master,
 
 static int mcfqspi_setup(struct spi_device *spi)
 {
-	mcfqspi_cs_deselect(spi_master_get_devdata(spi->master),
+	mcfqspi_cs_deselect(spi_controller_get_devdata(spi->controller),
 			    spi_get_chipselect(spi, 0), spi->mode & SPI_CS_HIGH);
 
 	dev_dbg(&spi->dev,
@@ -337,7 +337,7 @@ static int mcfqspi_setup(struct spi_device *spi)
 
 static int mcfqspi_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct mcfqspi *mcfqspi;
 	struct mcfqspi_platform_data *pdata;
 	int status;
@@ -353,13 +353,13 @@ static int mcfqspi_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*mcfqspi));
-	if (master == NULL) {
-		dev_dbg(&pdev->dev, "spi_alloc_master failed\n");
+	host = spi_alloc_host(&pdev->dev, sizeof(*mcfqspi));
+	if (host == NULL) {
+		dev_dbg(&pdev->dev, "spi_alloc_host failed\n");
 		return -ENOMEM;
 	}
 
-	mcfqspi = spi_master_get_devdata(master);
+	mcfqspi = spi_controller_get_devdata(host);
 
 	mcfqspi->iobase = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(mcfqspi->iobase)) {
@@ -388,8 +388,8 @@ static int mcfqspi_probe(struct platform_device *pdev)
 		goto fail0;
 	}
 
-	master->bus_num = pdata->bus_num;
-	master->num_chipselect = pdata->num_chipselect;
+	host->bus_num = pdata->bus_num;
+	host->num_chipselect = pdata->num_chipselect;
 
 	mcfqspi->cs_control = pdata->cs_control;
 	status = mcfqspi_cs_setup(mcfqspi);
@@ -400,19 +400,19 @@ static int mcfqspi_probe(struct platform_device *pdev)
 
 	init_waitqueue_head(&mcfqspi->waitq);
 
-	master->mode_bits = SPI_CS_HIGH | SPI_CPOL | SPI_CPHA;
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 16);
-	master->setup = mcfqspi_setup;
-	master->set_cs = mcfqspi_set_cs;
-	master->transfer_one = mcfqspi_transfer_one;
-	master->auto_runtime_pm = true;
+	host->mode_bits = SPI_CS_HIGH | SPI_CPOL | SPI_CPHA;
+	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 16);
+	host->setup = mcfqspi_setup;
+	host->set_cs = mcfqspi_set_cs;
+	host->transfer_one = mcfqspi_transfer_one;
+	host->auto_runtime_pm = true;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 	pm_runtime_enable(&pdev->dev);
 
-	status = devm_spi_register_master(&pdev->dev, master);
+	status = devm_spi_register_controller(&pdev->dev, host);
 	if (status) {
-		dev_dbg(&pdev->dev, "spi_register_master failed\n");
+		dev_dbg(&pdev->dev, "devm_spi_register_controller failed\n");
 		goto fail1;
 	}
 
@@ -424,7 +424,7 @@ fail1:
 	pm_runtime_disable(&pdev->dev);
 	mcfqspi_cs_teardown(mcfqspi);
 fail0:
-	spi_master_put(master);
+	spi_controller_put(host);
 
 	dev_dbg(&pdev->dev, "Coldfire QSPI probe failed\n");
 
@@ -433,8 +433,8 @@ fail0:
 
 static void mcfqspi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct mcfqspi *mcfqspi = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct mcfqspi *mcfqspi = spi_controller_get_devdata(host);
 
 	pm_runtime_disable(&pdev->dev);
 	/* disable the hardware (set the baud rate to 0) */
@@ -447,11 +447,11 @@ static void mcfqspi_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int mcfqspi_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct mcfqspi *mcfqspi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct mcfqspi *mcfqspi = spi_controller_get_devdata(host);
 	int ret;
 
-	ret = spi_master_suspend(master);
+	ret = spi_controller_suspend(host);
 	if (ret)
 		return ret;
 
@@ -462,20 +462,20 @@ static int mcfqspi_suspend(struct device *dev)
 
 static int mcfqspi_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct mcfqspi *mcfqspi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct mcfqspi *mcfqspi = spi_controller_get_devdata(host);
 
 	clk_enable(mcfqspi->clk);
 
-	return spi_master_resume(master);
+	return spi_controller_resume(host);
 }
 #endif
 
 #ifdef CONFIG_PM
 static int mcfqspi_runtime_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct mcfqspi *mcfqspi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct mcfqspi *mcfqspi = spi_controller_get_devdata(host);
 
 	clk_disable(mcfqspi->clk);
 
@@ -484,8 +484,8 @@ static int mcfqspi_runtime_suspend(struct device *dev)
 
 static int mcfqspi_runtime_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct mcfqspi *mcfqspi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct mcfqspi *mcfqspi = spi_controller_get_devdata(host);
 
 	clk_enable(mcfqspi->clk);
 

From ad5602ba3113b99318a726d6452228b0ce137804 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:17 +0800
Subject: [PATCH 0804/2047] spi: davinci: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-18-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-davinci.c | 74 +++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 9a9f3bc0e2d5..c457b550d3ad 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -201,7 +201,7 @@ static void davinci_spi_chipselect(struct spi_device *spi, int value)
 	u8 chip_sel = spi_get_chipselect(spi, 0);
 	u16 spidat1 = CS_DEFAULT;
 
-	dspi = spi_master_get_devdata(spi->master);
+	dspi = spi_controller_get_devdata(spi->controller);
 
 	/* program delay transfers if tx_delay is non zero */
 	if (spicfg && spicfg->wdelay)
@@ -271,7 +271,7 @@ static int davinci_spi_setup_transfer(struct spi_device *spi,
 	u32 hz = 0, spifmt = 0;
 	int prescale;
 
-	dspi = spi_master_get_devdata(spi->master);
+	dspi = spi_controller_get_devdata(spi->controller);
 	spicfg = spi->controller_data;
 	if (!spicfg)
 		spicfg = &davinci_spi_default_cfg;
@@ -379,7 +379,7 @@ static int davinci_spi_of_setup(struct spi_device *spi)
 {
 	struct davinci_spi_config *spicfg = spi->controller_data;
 	struct device_node *np = spi->dev.of_node;
-	struct davinci_spi *dspi = spi_master_get_devdata(spi->master);
+	struct davinci_spi *dspi = spi_controller_get_devdata(spi->controller);
 	u32 prop;
 
 	if (spicfg == NULL && np) {
@@ -411,7 +411,7 @@ static int davinci_spi_setup(struct spi_device *spi)
 	struct device_node *np = spi->dev.of_node;
 	bool internal_cs = true;
 
-	dspi = spi_master_get_devdata(spi->master);
+	dspi = spi_controller_get_devdata(spi->controller);
 
 	if (!(spi->mode & SPI_NO_CS)) {
 		if (np && spi_get_csgpiod(spi, 0))
@@ -441,7 +441,7 @@ static void davinci_spi_cleanup(struct spi_device *spi)
 		kfree(spicfg);
 }
 
-static bool davinci_spi_can_dma(struct spi_master *master,
+static bool davinci_spi_can_dma(struct spi_controller *host,
 				struct spi_device *spi,
 				struct spi_transfer *xfer)
 {
@@ -571,7 +571,7 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	struct davinci_spi_config *spicfg;
 	struct davinci_spi_platform_data *pdata;
 
-	dspi = spi_master_get_devdata(spi->master);
+	dspi = spi_controller_get_devdata(spi->controller);
 	pdata = &dspi->pdata;
 	spicfg = (struct davinci_spi_config *)spi->controller_data;
 	if (!spicfg)
@@ -592,7 +592,7 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 
 	reinit_completion(&dspi->done);
 
-	if (!davinci_spi_can_dma(spi->master, spi, t)) {
+	if (!davinci_spi_can_dma(spi->controller, spi, t)) {
 		if (spicfg->io_type != SPI_IO_TYPE_POLL)
 			set_io_bits(dspi->base + SPIINT, SPIINT_MASKINT);
 		/* start the transfer */
@@ -673,7 +673,7 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	}
 
 	clear_io_bits(dspi->base + SPIINT, SPIINT_MASKALL);
-	if (davinci_spi_can_dma(spi->master, spi, t))
+	if (davinci_spi_can_dma(spi->controller, spi, t))
 		clear_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
 
 	clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK);
@@ -855,22 +855,22 @@ static int spi_davinci_get_pdata(struct platform_device *pdev,
  */
 static int davinci_spi_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct davinci_spi *dspi;
 	struct davinci_spi_platform_data *pdata;
 	struct resource *r;
 	int ret = 0;
 	u32 spipc0;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct davinci_spi));
-	if (master == NULL) {
+	host = spi_alloc_host(&pdev->dev, sizeof(struct davinci_spi));
+	if (host == NULL) {
 		ret = -ENOMEM;
 		goto err;
 	}
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
-	dspi = spi_master_get_devdata(master);
+	dspi = spi_controller_get_devdata(host);
 
 	if (dev_get_platdata(&pdev->dev)) {
 		pdata = dev_get_platdata(&pdev->dev);
@@ -879,7 +879,7 @@ static int davinci_spi_probe(struct platform_device *pdev)
 		/* update dspi pdata with that from the DT */
 		ret = spi_davinci_get_pdata(pdev, dspi);
 		if (ret < 0)
-			goto free_master;
+			goto free_host;
 	}
 
 	/* pdata in dspi is now updated and point pdata to that */
@@ -891,13 +891,13 @@ static int davinci_spi_probe(struct platform_device *pdev)
 					    GFP_KERNEL);
 	if (dspi->bytes_per_word == NULL) {
 		ret = -ENOMEM;
-		goto free_master;
+		goto free_host;
 	}
 
 	dspi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &r);
 	if (IS_ERR(dspi->base)) {
 		ret = PTR_ERR(dspi->base);
-		goto free_master;
+		goto free_host;
 	}
 	dspi->pbase = r->start;
 
@@ -905,34 +905,34 @@ static int davinci_spi_probe(struct platform_device *pdev)
 
 	ret = platform_get_irq(pdev, 0);
 	if (ret < 0)
-		goto free_master;
+		goto free_host;
 	dspi->irq = ret;
 
 	ret = devm_request_threaded_irq(&pdev->dev, dspi->irq, davinci_spi_irq,
 				dummy_thread_fn, 0, dev_name(&pdev->dev), dspi);
 	if (ret)
-		goto free_master;
+		goto free_host;
 
-	dspi->bitbang.master = master;
+	dspi->bitbang.master = host;
 
 	dspi->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dspi->clk)) {
 		ret = -ENODEV;
-		goto free_master;
+		goto free_host;
 	}
 	ret = clk_prepare_enable(dspi->clk);
 	if (ret)
-		goto free_master;
+		goto free_host;
 
-	master->use_gpio_descriptors = true;
-	master->dev.of_node = pdev->dev.of_node;
-	master->bus_num = pdev->id;
-	master->num_chipselect = pdata->num_chipselect;
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 16);
-	master->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_GPIO_SS;
-	master->setup = davinci_spi_setup;
-	master->cleanup = davinci_spi_cleanup;
-	master->can_dma = davinci_spi_can_dma;
+	host->use_gpio_descriptors = true;
+	host->dev.of_node = pdev->dev.of_node;
+	host->bus_num = pdev->id;
+	host->num_chipselect = pdata->num_chipselect;
+	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 16);
+	host->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_GPIO_SS;
+	host->setup = davinci_spi_setup;
+	host->cleanup = davinci_spi_cleanup;
+	host->can_dma = davinci_spi_can_dma;
 
 	dspi->bitbang.chipselect = davinci_spi_chipselect;
 	dspi->bitbang.setup_transfer = davinci_spi_setup_transfer;
@@ -973,7 +973,7 @@ static int davinci_spi_probe(struct platform_device *pdev)
 
 	iowrite32(CS_DEFAULT, dspi->base + SPIDEF);
 
-	/* master mode default */
+	/* host mode default */
 	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_CLKMOD_MASK);
 	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_MASTER_MASK);
 	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK);
@@ -993,8 +993,8 @@ free_dma:
 	}
 free_clk:
 	clk_disable_unprepare(dspi->clk);
-free_master:
-	spi_master_put(master);
+free_host:
+	spi_controller_put(host);
 err:
 	return ret;
 }
@@ -1011,10 +1011,10 @@ err:
 static void davinci_spi_remove(struct platform_device *pdev)
 {
 	struct davinci_spi *dspi;
-	struct spi_master *master;
+	struct spi_controller *host;
 
-	master = platform_get_drvdata(pdev);
-	dspi = spi_master_get_devdata(master);
+	host = platform_get_drvdata(pdev);
+	dspi = spi_controller_get_devdata(host);
 
 	spi_bitbang_stop(&dspi->bitbang);
 
@@ -1025,7 +1025,7 @@ static void davinci_spi_remove(struct platform_device *pdev)
 		dma_release_channel(dspi->dma_tx);
 	}
 
-	spi_master_put(master);
+	spi_controller_put(host);
 }
 
 static struct platform_driver davinci_spi_driver = {

From 5ab7a7e37d2fe73465f521155672ccd3b138c35a Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:18 +0800
Subject: [PATCH 0805/2047] spi: dln2: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-19-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-dln2.c | 94 +++++++++++++++++++++---------------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/drivers/spi/spi-dln2.c b/drivers/spi/spi-dln2.c
index 6bd93c47853c..d319dc357fef 100644
--- a/drivers/spi/spi-dln2.c
+++ b/drivers/spi/spi-dln2.c
@@ -79,7 +79,7 @@
 
 struct dln2_spi {
 	struct platform_device *pdev;
-	struct spi_master *master;
+	struct spi_controller *host;
 	u8 port;
 
 	/*
@@ -176,7 +176,7 @@ static int dln2_spi_cs_enable(struct dln2_spi *dln2, u8 cs_mask, bool enable)
 
 static int dln2_spi_cs_enable_all(struct dln2_spi *dln2, bool enable)
 {
-	u8 cs_mask = GENMASK(dln2->master->num_chipselect - 1, 0);
+	u8 cs_mask = GENMASK(dln2->host->num_chipselect - 1, 0);
 
 	return dln2_spi_cs_enable(dln2, cs_mask, enable);
 }
@@ -589,11 +589,11 @@ static int dln2_spi_rdwr(struct dln2_spi *dln2, const u8 *tx_data,
 	return 0;
 }
 
-static int dln2_spi_prepare_message(struct spi_master *master,
+static int dln2_spi_prepare_message(struct spi_controller *host,
 				    struct spi_message *message)
 {
 	int ret;
-	struct dln2_spi *dln2 = spi_master_get_devdata(master);
+	struct dln2_spi *dln2 = spi_controller_get_devdata(host);
 	struct spi_device *spi = message->spi;
 
 	if (dln2->cs != spi_get_chipselect(spi, 0)) {
@@ -650,11 +650,11 @@ static int dln2_spi_transfer_setup(struct dln2_spi *dln2, u32 speed,
 	return dln2_spi_enable(dln2, true);
 }
 
-static int dln2_spi_transfer_one(struct spi_master *master,
+static int dln2_spi_transfer_one(struct spi_controller *host,
 				 struct spi_device *spi,
 				 struct spi_transfer *xfer)
 {
-	struct dln2_spi *dln2 = spi_master_get_devdata(master);
+	struct dln2_spi *dln2 = spi_controller_get_devdata(host);
 	int status;
 	u8 attr = 0;
 
@@ -666,7 +666,7 @@ static int dln2_spi_transfer_one(struct spi_master *master,
 		return status;
 	}
 
-	if (!xfer->cs_change && !spi_transfer_is_last(master, xfer))
+	if (!xfer->cs_change && !spi_transfer_is_last(host, xfer))
 		attr = DLN2_SPI_ATTR_LEAVE_SS_LOW;
 
 	status = dln2_spi_rdwr(dln2, xfer->tx_buf, xfer->rx_buf,
@@ -679,29 +679,29 @@ static int dln2_spi_transfer_one(struct spi_master *master,
 
 static int dln2_spi_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct dln2_spi *dln2;
 	struct dln2_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct device *dev = &pdev->dev;
 	int ret;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*dln2));
-	if (!master)
+	host = spi_alloc_host(&pdev->dev, sizeof(*dln2));
+	if (!host)
 		return -ENOMEM;
 
-	device_set_node(&master->dev, dev_fwnode(dev));
+	device_set_node(&host->dev, dev_fwnode(dev));
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
-	dln2 = spi_master_get_devdata(master);
+	dln2 = spi_controller_get_devdata(host);
 
 	dln2->buf = devm_kmalloc(&pdev->dev, DLN2_SPI_BUF_SIZE, GFP_KERNEL);
 	if (!dln2->buf) {
 		ret = -ENOMEM;
-		goto exit_free_master;
+		goto exit_free_host;
 	}
 
-	dln2->master = master;
+	dln2->host = host;
 	dln2->pdev = pdev;
 	dln2->port = pdata->port;
 	/* cs/mode can never be 0xff, so the first transfer will set them */
@@ -712,47 +712,47 @@ static int dln2_spi_probe(struct platform_device *pdev)
 	ret = dln2_spi_enable(dln2, false);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to disable SPI module\n");
-		goto exit_free_master;
+		goto exit_free_host;
 	}
 
-	ret = dln2_spi_get_cs_num(dln2, &master->num_chipselect);
+	ret = dln2_spi_get_cs_num(dln2, &host->num_chipselect);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to get number of CS pins\n");
-		goto exit_free_master;
+		goto exit_free_host;
 	}
 
 	ret = dln2_spi_get_speed_range(dln2,
-				       &master->min_speed_hz,
-				       &master->max_speed_hz);
+				       &host->min_speed_hz,
+				       &host->max_speed_hz);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to read bus min/max freqs\n");
-		goto exit_free_master;
+		goto exit_free_host;
 	}
 
 	ret = dln2_spi_get_supported_frame_sizes(dln2,
-						 &master->bits_per_word_mask);
+						 &host->bits_per_word_mask);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to read supported frame sizes\n");
-		goto exit_free_master;
+		goto exit_free_host;
 	}
 
 	ret = dln2_spi_cs_enable_all(dln2, true);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to enable CS pins\n");
-		goto exit_free_master;
+		goto exit_free_host;
 	}
 
-	master->bus_num = -1;
-	master->mode_bits = SPI_CPOL | SPI_CPHA;
-	master->prepare_message = dln2_spi_prepare_message;
-	master->transfer_one = dln2_spi_transfer_one;
-	master->auto_runtime_pm = true;
+	host->bus_num = -1;
+	host->mode_bits = SPI_CPOL | SPI_CPHA;
+	host->prepare_message = dln2_spi_prepare_message;
+	host->transfer_one = dln2_spi_transfer_one;
+	host->auto_runtime_pm = true;
 
 	/* enable SPI module, we're good to go */
 	ret = dln2_spi_enable(dln2, true);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to enable SPI module\n");
-		goto exit_free_master;
+		goto exit_free_host;
 	}
 
 	pm_runtime_set_autosuspend_delay(&pdev->dev,
@@ -761,9 +761,9 @@ static int dln2_spi_probe(struct platform_device *pdev)
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	ret = devm_spi_register_controller(&pdev->dev, host);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "Failed to register master\n");
+		dev_err(&pdev->dev, "Failed to register host\n");
 		goto exit_register;
 	}
 
@@ -775,16 +775,16 @@ exit_register:
 
 	if (dln2_spi_enable(dln2, false) < 0)
 		dev_err(&pdev->dev, "Failed to disable SPI module\n");
-exit_free_master:
-	spi_master_put(master);
+exit_free_host:
+	spi_controller_put(host);
 
 	return ret;
 }
 
 static void dln2_spi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct dln2_spi *dln2 = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct dln2_spi *dln2 = spi_controller_get_devdata(host);
 
 	pm_runtime_disable(&pdev->dev);
 
@@ -796,10 +796,10 @@ static void dln2_spi_remove(struct platform_device *pdev)
 static int dln2_spi_suspend(struct device *dev)
 {
 	int ret;
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct dln2_spi *dln2 = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct dln2_spi *dln2 = spi_controller_get_devdata(host);
 
-	ret = spi_master_suspend(master);
+	ret = spi_controller_suspend(host);
 	if (ret < 0)
 		return ret;
 
@@ -824,8 +824,8 @@ static int dln2_spi_suspend(struct device *dev)
 static int dln2_spi_resume(struct device *dev)
 {
 	int ret;
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct dln2_spi *dln2 = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct dln2_spi *dln2 = spi_controller_get_devdata(host);
 
 	if (!pm_runtime_suspended(dev)) {
 		ret = dln2_spi_cs_enable_all(dln2, true);
@@ -837,23 +837,23 @@ static int dln2_spi_resume(struct device *dev)
 			return ret;
 	}
 
-	return spi_master_resume(master);
+	return spi_controller_resume(host);
 }
 #endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM
 static int dln2_spi_runtime_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct dln2_spi *dln2 = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct dln2_spi *dln2 = spi_controller_get_devdata(host);
 
 	return dln2_spi_enable(dln2, false);
 }
 
 static int dln2_spi_runtime_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct dln2_spi *dln2 = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct dln2_spi *dln2 = spi_controller_get_devdata(host);
 
 	return  dln2_spi_enable(dln2, true);
 }
@@ -875,7 +875,7 @@ static struct platform_driver spi_dln2_driver = {
 };
 module_platform_driver(spi_dln2_driver);
 
-MODULE_DESCRIPTION("Driver for the Diolan DLN2 SPI master interface");
+MODULE_DESCRIPTION("Driver for the Diolan DLN2 SPI host interface");
 MODULE_AUTHOR("Laurentiu Palcu <laurentiu.palcu@intel.com>");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:dln2-spi");

From eefc6c5c2451126c27f4098536d659d07635a1b1 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:19 +0800
Subject: [PATCH 0806/2047] spi: dw: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-20-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-dw-core.c | 112 +++++++++++++++++++-------------------
 drivers/spi/spi-dw-dma.c  |  22 ++++----
 drivers/spi/spi-dw-mmio.c |  10 ++--
 drivers/spi/spi-dw.h      |   4 +-
 4 files changed, 74 insertions(+), 74 deletions(-)

diff --git a/drivers/spi/spi-dw-core.c b/drivers/spi/spi-dw-core.c
index 45f5acc26b1d..0274c9295514 100644
--- a/drivers/spi/spi-dw-core.c
+++ b/drivers/spi/spi-dw-core.c
@@ -61,7 +61,7 @@ static void dw_spi_debugfs_init(struct dw_spi *dws)
 {
 	char name[32];
 
-	snprintf(name, 32, "dw_spi%d", dws->master->bus_num);
+	snprintf(name, 32, "dw_spi%d", dws->host->bus_num);
 	dws->debugfs = debugfs_create_dir(name, NULL);
 
 	dws->regset.regs = dw_spi_dbgfs_regs;
@@ -183,25 +183,25 @@ int dw_spi_check_status(struct dw_spi *dws, bool raw)
 		irq_status = dw_readl(dws, DW_SPI_ISR);
 
 	if (irq_status & DW_SPI_INT_RXOI) {
-		dev_err(&dws->master->dev, "RX FIFO overflow detected\n");
+		dev_err(&dws->host->dev, "RX FIFO overflow detected\n");
 		ret = -EIO;
 	}
 
 	if (irq_status & DW_SPI_INT_RXUI) {
-		dev_err(&dws->master->dev, "RX FIFO underflow detected\n");
+		dev_err(&dws->host->dev, "RX FIFO underflow detected\n");
 		ret = -EIO;
 	}
 
 	if (irq_status & DW_SPI_INT_TXOI) {
-		dev_err(&dws->master->dev, "TX FIFO overflow detected\n");
+		dev_err(&dws->host->dev, "TX FIFO overflow detected\n");
 		ret = -EIO;
 	}
 
 	/* Generically handle the erroneous situation */
 	if (ret) {
 		dw_spi_reset_chip(dws);
-		if (dws->master->cur_msg)
-			dws->master->cur_msg->status = ret;
+		if (dws->host->cur_msg)
+			dws->host->cur_msg->status = ret;
 	}
 
 	return ret;
@@ -213,7 +213,7 @@ static irqreturn_t dw_spi_transfer_handler(struct dw_spi *dws)
 	u16 irq_status = dw_readl(dws, DW_SPI_ISR);
 
 	if (dw_spi_check_status(dws, false)) {
-		spi_finalize_current_transfer(dws->master);
+		spi_finalize_current_transfer(dws->host);
 		return IRQ_HANDLED;
 	}
 
@@ -227,7 +227,7 @@ static irqreturn_t dw_spi_transfer_handler(struct dw_spi *dws)
 	dw_reader(dws);
 	if (!dws->rx_len) {
 		dw_spi_mask_intr(dws, 0xff);
-		spi_finalize_current_transfer(dws->master);
+		spi_finalize_current_transfer(dws->host);
 	} else if (dws->rx_len <= dw_readl(dws, DW_SPI_RXFTLR)) {
 		dw_writel(dws, DW_SPI_RXFTLR, dws->rx_len - 1);
 	}
@@ -248,14 +248,14 @@ static irqreturn_t dw_spi_transfer_handler(struct dw_spi *dws)
 
 static irqreturn_t dw_spi_irq(int irq, void *dev_id)
 {
-	struct spi_controller *master = dev_id;
-	struct dw_spi *dws = spi_controller_get_devdata(master);
+	struct spi_controller *host = dev_id;
+	struct dw_spi *dws = spi_controller_get_devdata(host);
 	u16 irq_status = dw_readl(dws, DW_SPI_ISR) & DW_SPI_INT_MASK;
 
 	if (!irq_status)
 		return IRQ_NONE;
 
-	if (!master->cur_msg) {
+	if (!host->cur_msg) {
 		dw_spi_mask_intr(dws, 0xff);
 		return IRQ_HANDLED;
 	}
@@ -408,11 +408,11 @@ static int dw_spi_poll_transfer(struct dw_spi *dws,
 	return 0;
 }
 
-static int dw_spi_transfer_one(struct spi_controller *master,
+static int dw_spi_transfer_one(struct spi_controller *host,
 			       struct spi_device *spi,
 			       struct spi_transfer *transfer)
 {
-	struct dw_spi *dws = spi_controller_get_devdata(master);
+	struct dw_spi *dws = spi_controller_get_devdata(host);
 	struct dw_spi_cfg cfg = {
 		.tmode = DW_SPI_CTRLR0_TMOD_TR,
 		.dfs = transfer->bits_per_word,
@@ -440,8 +440,8 @@ static int dw_spi_transfer_one(struct spi_controller *master,
 	transfer->effective_speed_hz = dws->current_freq;
 
 	/* Check if current transfer is a DMA transaction */
-	if (master->can_dma && master->can_dma(master, spi, transfer))
-		dws->dma_mapped = master->cur_msg_mapped;
+	if (host->can_dma && host->can_dma(host, spi, transfer))
+		dws->dma_mapped = host->cur_msg_mapped;
 
 	/* For poll mode just disable all interrupts */
 	dw_spi_mask_intr(dws, 0xff);
@@ -464,10 +464,10 @@ static int dw_spi_transfer_one(struct spi_controller *master,
 	return 1;
 }
 
-static void dw_spi_handle_err(struct spi_controller *master,
+static void dw_spi_handle_err(struct spi_controller *host,
 			      struct spi_message *msg)
 {
-	struct dw_spi *dws = spi_controller_get_devdata(master);
+	struct dw_spi *dws = spi_controller_get_devdata(host);
 
 	if (dws->dma_mapped)
 		dws->dma_ops->dma_stop(dws);
@@ -576,7 +576,7 @@ static int dw_spi_write_then_read(struct dw_spi *dws, struct spi_device *spi)
 	while (len) {
 		entries = readl_relaxed(dws->regs + DW_SPI_TXFLR);
 		if (!entries) {
-			dev_err(&dws->master->dev, "CS de-assertion on Tx\n");
+			dev_err(&dws->host->dev, "CS de-assertion on Tx\n");
 			return -EIO;
 		}
 		room = min(dws->fifo_len - entries, len);
@@ -596,7 +596,7 @@ static int dw_spi_write_then_read(struct dw_spi *dws, struct spi_device *spi)
 		if (!entries) {
 			sts = readl_relaxed(dws->regs + DW_SPI_RISR);
 			if (sts & DW_SPI_INT_RXOI) {
-				dev_err(&dws->master->dev, "FIFO overflow on Rx\n");
+				dev_err(&dws->host->dev, "FIFO overflow on Rx\n");
 				return -EIO;
 			}
 			continue;
@@ -637,7 +637,7 @@ static int dw_spi_wait_mem_op_done(struct dw_spi *dws)
 		spi_delay_exec(&delay, NULL);
 
 	if (retry < 0) {
-		dev_err(&dws->master->dev, "Mem op hanged up\n");
+		dev_err(&dws->host->dev, "Mem op hanged up\n");
 		return -EIO;
 	}
 
@@ -884,56 +884,56 @@ static void dw_spi_hw_init(struct device *dev, struct dw_spi *dws)
 
 int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
 {
-	struct spi_controller *master;
+	struct spi_controller *host;
 	int ret;
 
 	if (!dws)
 		return -EINVAL;
 
-	master = spi_alloc_master(dev, 0);
-	if (!master)
+	host = spi_alloc_host(dev, 0);
+	if (!host)
 		return -ENOMEM;
 
-	device_set_node(&master->dev, dev_fwnode(dev));
+	device_set_node(&host->dev, dev_fwnode(dev));
 
-	dws->master = master;
+	dws->host = host;
 	dws->dma_addr = (dma_addr_t)(dws->paddr + DW_SPI_DR);
 
-	spi_controller_set_devdata(master, dws);
+	spi_controller_set_devdata(host, dws);
 
 	/* Basic HW init */
 	dw_spi_hw_init(dev, dws);
 
 	ret = request_irq(dws->irq, dw_spi_irq, IRQF_SHARED, dev_name(dev),
-			  master);
+			  host);
 	if (ret < 0 && ret != -ENOTCONN) {
 		dev_err(dev, "can not get IRQ\n");
-		goto err_free_master;
+		goto err_free_host;
 	}
 
 	dw_spi_init_mem_ops(dws);
 
-	master->use_gpio_descriptors = true;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP;
+	host->use_gpio_descriptors = true;
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP;
 	if (dws->caps & DW_SPI_CAP_DFS32)
-		master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
+		host->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 	else
-		master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
-	master->bus_num = dws->bus_num;
-	master->num_chipselect = dws->num_cs;
-	master->setup = dw_spi_setup;
-	master->cleanup = dw_spi_cleanup;
+		host->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
+	host->bus_num = dws->bus_num;
+	host->num_chipselect = dws->num_cs;
+	host->setup = dw_spi_setup;
+	host->cleanup = dw_spi_cleanup;
 	if (dws->set_cs)
-		master->set_cs = dws->set_cs;
+		host->set_cs = dws->set_cs;
 	else
-		master->set_cs = dw_spi_set_cs;
-	master->transfer_one = dw_spi_transfer_one;
-	master->handle_err = dw_spi_handle_err;
+		host->set_cs = dw_spi_set_cs;
+	host->transfer_one = dw_spi_transfer_one;
+	host->handle_err = dw_spi_handle_err;
 	if (dws->mem_ops.exec_op)
-		master->mem_ops = &dws->mem_ops;
-	master->max_speed_hz = dws->max_freq;
-	master->flags = SPI_CONTROLLER_GPIO_SS;
-	master->auto_runtime_pm = true;
+		host->mem_ops = &dws->mem_ops;
+	host->max_speed_hz = dws->max_freq;
+	host->flags = SPI_CONTROLLER_GPIO_SS;
+	host->auto_runtime_pm = true;
 
 	/* Get default rx sample delay */
 	device_property_read_u32(dev, "rx-sample-delay-ns",
@@ -946,14 +946,14 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
 		} else if (ret) {
 			dev_warn(dev, "DMA init failed\n");
 		} else {
-			master->can_dma = dws->dma_ops->can_dma;
-			master->flags |= SPI_CONTROLLER_MUST_TX;
+			host->can_dma = dws->dma_ops->can_dma;
+			host->flags |= SPI_CONTROLLER_MUST_TX;
 		}
 	}
 
-	ret = spi_register_controller(master);
+	ret = spi_register_controller(host);
 	if (ret) {
-		dev_err_probe(dev, ret, "problem registering spi master\n");
+		dev_err_probe(dev, ret, "problem registering spi host\n");
 		goto err_dma_exit;
 	}
 
@@ -965,9 +965,9 @@ err_dma_exit:
 		dws->dma_ops->dma_exit(dws);
 	dw_spi_enable_chip(dws, 0);
 err_free_irq:
-	free_irq(dws->irq, master);
-err_free_master:
-	spi_controller_put(master);
+	free_irq(dws->irq, host);
+err_free_host:
+	spi_controller_put(host);
 	return ret;
 }
 EXPORT_SYMBOL_NS_GPL(dw_spi_add_host, SPI_DW_CORE);
@@ -976,14 +976,14 @@ void dw_spi_remove_host(struct dw_spi *dws)
 {
 	dw_spi_debugfs_remove(dws);
 
-	spi_unregister_controller(dws->master);
+	spi_unregister_controller(dws->host);
 
 	if (dws->dma_ops && dws->dma_ops->dma_exit)
 		dws->dma_ops->dma_exit(dws);
 
 	dw_spi_shutdown_chip(dws);
 
-	free_irq(dws->irq, dws->master);
+	free_irq(dws->irq, dws->host);
 }
 EXPORT_SYMBOL_NS_GPL(dw_spi_remove_host, SPI_DW_CORE);
 
@@ -991,7 +991,7 @@ int dw_spi_suspend_host(struct dw_spi *dws)
 {
 	int ret;
 
-	ret = spi_controller_suspend(dws->master);
+	ret = spi_controller_suspend(dws->host);
 	if (ret)
 		return ret;
 
@@ -1002,8 +1002,8 @@ EXPORT_SYMBOL_NS_GPL(dw_spi_suspend_host, SPI_DW_CORE);
 
 int dw_spi_resume_host(struct dw_spi *dws)
 {
-	dw_spi_hw_init(&dws->master->dev, dws);
-	return spi_controller_resume(dws->master);
+	dw_spi_hw_init(&dws->host->dev, dws);
+	return spi_controller_resume(dws->host);
 }
 EXPORT_SYMBOL_NS_GPL(dw_spi_resume_host, SPI_DW_CORE);
 
diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c
index df819652901a..0ecbb6c36e23 100644
--- a/drivers/spi/spi-dw-dma.c
+++ b/drivers/spi/spi-dw-dma.c
@@ -139,8 +139,8 @@ static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws)
 	if (!dws->txchan)
 		goto free_rxchan;
 
-	dws->master->dma_rx = dws->rxchan;
-	dws->master->dma_tx = dws->txchan;
+	dws->host->dma_rx = dws->rxchan;
+	dws->host->dma_tx = dws->txchan;
 
 	init_completion(&dws->dma_completion);
 
@@ -183,8 +183,8 @@ static int dw_spi_dma_init_generic(struct device *dev, struct dw_spi *dws)
 		goto free_rxchan;
 	}
 
-	dws->master->dma_rx = dws->rxchan;
-	dws->master->dma_tx = dws->txchan;
+	dws->host->dma_rx = dws->rxchan;
+	dws->host->dma_tx = dws->txchan;
 
 	init_completion(&dws->dma_completion);
 
@@ -242,10 +242,10 @@ static enum dma_slave_buswidth dw_spi_dma_convert_width(u8 n_bytes)
 	}
 }
 
-static bool dw_spi_can_dma(struct spi_controller *master,
+static bool dw_spi_can_dma(struct spi_controller *host,
 			   struct spi_device *spi, struct spi_transfer *xfer)
 {
-	struct dw_spi *dws = spi_controller_get_devdata(master);
+	struct dw_spi *dws = spi_controller_get_devdata(host);
 	enum dma_slave_buswidth dma_bus_width;
 
 	if (xfer->len <= dws->fifo_len)
@@ -271,7 +271,7 @@ static int dw_spi_dma_wait(struct dw_spi *dws, unsigned int len, u32 speed)
 					 msecs_to_jiffies(ms));
 
 	if (ms == 0) {
-		dev_err(&dws->master->cur_msg->spi->dev,
+		dev_err(&dws->host->cur_msg->spi->dev,
 			"DMA transaction timed out\n");
 		return -ETIMEDOUT;
 	}
@@ -299,7 +299,7 @@ static int dw_spi_dma_wait_tx_done(struct dw_spi *dws,
 		spi_delay_exec(&delay, xfer);
 
 	if (retry < 0) {
-		dev_err(&dws->master->dev, "Tx hanged up\n");
+		dev_err(&dws->host->dev, "Tx hanged up\n");
 		return -EIO;
 	}
 
@@ -400,7 +400,7 @@ static int dw_spi_dma_wait_rx_done(struct dw_spi *dws)
 		spi_delay_exec(&delay, NULL);
 
 	if (retry < 0) {
-		dev_err(&dws->master->dev, "Rx hanged up\n");
+		dev_err(&dws->host->dev, "Rx hanged up\n");
 		return -EIO;
 	}
 
@@ -656,13 +656,13 @@ static int dw_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
 	if (ret)
 		return ret;
 
-	if (dws->master->cur_msg->status == -EINPROGRESS) {
+	if (dws->host->cur_msg->status == -EINPROGRESS) {
 		ret = dw_spi_dma_wait_tx_done(dws, xfer);
 		if (ret)
 			return ret;
 	}
 
-	if (xfer->rx_buf && dws->master->cur_msg->status == -EINPROGRESS)
+	if (xfer->rx_buf && dws->host->cur_msg->status == -EINPROGRESS)
 		ret = dw_spi_dma_wait_rx_done(dws);
 
 	return ret;
diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
index a963bc96c223..3a31ad74a538 100644
--- a/drivers/spi/spi-dw-mmio.c
+++ b/drivers/spi/spi-dw-mmio.c
@@ -68,7 +68,7 @@ struct dw_spi_mscc {
 		((((val) << 1) | BIT(0)) << ELBA_SPICS_OFFSET(cs))
 
 /*
- * The Designware SPI controller (referred to as master in the documentation)
+ * The Designware SPI controller (referred to as host in the documentation)
  * automatically deasserts chip select when the tx fifo is empty. The chip
  * selects then needs to be either driven as GPIOs or, for the first 4 using
  * the SPI boot controller registers. the final chip select is an OR gate
@@ -76,7 +76,7 @@ struct dw_spi_mscc {
  */
 static void dw_spi_mscc_set_cs(struct spi_device *spi, bool enable)
 {
-	struct dw_spi *dws = spi_master_get_devdata(spi->master);
+	struct dw_spi *dws = spi_controller_get_devdata(spi->controller);
 	struct dw_spi_mmio *dwsmmio = container_of(dws, struct dw_spi_mmio, dws);
 	struct dw_spi_mscc *dwsmscc = dwsmmio->priv;
 	u32 cs = spi_get_chipselect(spi, 0);
@@ -142,14 +142,14 @@ static int dw_spi_mscc_jaguar2_init(struct platform_device *pdev,
 }
 
 /*
- * The Designware SPI controller (referred to as master in the
+ * The Designware SPI controller (referred to as host in the
  * documentation) automatically deasserts chip select when the tx fifo
  * is empty. The chip selects then needs to be driven by a CS override
  * register. enable is an active low signal.
  */
 static void dw_spi_sparx5_set_cs(struct spi_device *spi, bool enable)
 {
-	struct dw_spi *dws = spi_master_get_devdata(spi->master);
+	struct dw_spi *dws = spi_controller_get_devdata(spi->controller);
 	struct dw_spi_mmio *dwsmmio = container_of(dws, struct dw_spi_mmio, dws);
 	struct dw_spi_mscc *dwsmscc = dwsmmio->priv;
 	u8 cs = spi_get_chipselect(spi, 0);
@@ -277,7 +277,7 @@ static void dw_spi_elba_override_cs(struct regmap *syscon, int cs, int enable)
 
 static void dw_spi_elba_set_cs(struct spi_device *spi, bool enable)
 {
-	struct dw_spi *dws = spi_master_get_devdata(spi->master);
+	struct dw_spi *dws = spi_controller_get_devdata(spi->controller);
 	struct dw_spi_mmio *dwsmmio = container_of(dws, struct dw_spi_mmio, dws);
 	struct regmap *syscon = dwsmmio->priv;
 	u8 cs;
diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h
index 3962e6dcf880..6cafeee8ee2a 100644
--- a/drivers/spi/spi-dw.h
+++ b/drivers/spi/spi-dw.h
@@ -142,14 +142,14 @@ struct dw_spi_dma_ops {
 	int (*dma_init)(struct device *dev, struct dw_spi *dws);
 	void (*dma_exit)(struct dw_spi *dws);
 	int (*dma_setup)(struct dw_spi *dws, struct spi_transfer *xfer);
-	bool (*can_dma)(struct spi_controller *master, struct spi_device *spi,
+	bool (*can_dma)(struct spi_controller *host, struct spi_device *spi,
 			struct spi_transfer *xfer);
 	int (*dma_transfer)(struct dw_spi *dws, struct spi_transfer *xfer);
 	void (*dma_stop)(struct dw_spi *dws);
 };
 
 struct dw_spi {
-	struct spi_controller	*master;
+	struct spi_controller	*host;
 
 	u32			ip;		/* Synopsys DW SSI IP-core ID */
 	u32			ver;		/* Synopsys component version */

From 9f5890466e93a7392c618e50de53cdd3a0dd1fd5 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:20 +0800
Subject: [PATCH 0807/2047] spi: hisi-kunpeng: switch to use modern name

Change legacy name master to modern name host.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-21-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-hisi-kunpeng.c | 82 +++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c
index 2b4b3d2a22b8..35ef5e8e2ffd 100644
--- a/drivers/spi/spi-hisi-kunpeng.c
+++ b/drivers/spi/spi-hisi-kunpeng.c
@@ -164,10 +164,10 @@ static int hisi_spi_debugfs_init(struct hisi_spi *hs)
 {
 	char name[32];
 
-	struct spi_controller *master;
+	struct spi_controller *host;
 
-	master = container_of(hs->dev, struct spi_controller, dev);
-	snprintf(name, 32, "hisi_spi%d", master->bus_num);
+	host = container_of(hs->dev, struct spi_controller, dev);
+	snprintf(name, 32, "hisi_spi%d", host->bus_num);
 	hs->debugfs = debugfs_create_dir(name, NULL);
 	if (IS_ERR(hs->debugfs))
 		return -ENOMEM;
@@ -291,18 +291,18 @@ static void __hisi_calc_div_reg(struct hisi_chip_data *chip)
 	chip->div_post = (chip->clk_div / chip->div_pre) - 1;
 }
 
-static u32 hisi_calc_effective_speed(struct spi_controller *master,
+static u32 hisi_calc_effective_speed(struct spi_controller *host,
 			struct hisi_chip_data *chip, u32 speed_hz)
 {
 	u32 effective_speed;
 
 	/* Note clock divider doesn't support odd numbers */
-	chip->clk_div = DIV_ROUND_UP(master->max_speed_hz, speed_hz) + 1;
+	chip->clk_div = DIV_ROUND_UP(host->max_speed_hz, speed_hz) + 1;
 	chip->clk_div &= 0xfffe;
 	if (chip->clk_div > CLK_DIV_MAX)
 		chip->clk_div = CLK_DIV_MAX;
 
-	effective_speed = master->max_speed_hz / chip->clk_div;
+	effective_speed = host->max_speed_hz / chip->clk_div;
 	if (chip->speed_hz != effective_speed) {
 		__hisi_calc_div_reg(chip);
 		chip->speed_hz = effective_speed;
@@ -336,20 +336,20 @@ static void hisi_spi_hw_init(struct hisi_spi *hs)
 
 static irqreturn_t hisi_spi_irq(int irq, void *dev_id)
 {
-	struct spi_controller *master = dev_id;
-	struct hisi_spi *hs = spi_controller_get_devdata(master);
+	struct spi_controller *host = dev_id;
+	struct hisi_spi *hs = spi_controller_get_devdata(host);
 	u32 irq_status = readl(hs->regs + HISI_SPI_ISR) & ISR_MASK;
 
 	if (!irq_status)
 		return IRQ_NONE;
 
-	if (!master->cur_msg)
+	if (!host->cur_msg)
 		return IRQ_HANDLED;
 
 	/* Error handling */
 	if (irq_status & ISR_RXOF) {
 		dev_err(hs->dev, "interrupt_transfer: fifo overflow\n");
-		master->cur_msg->status = -EIO;
+		host->cur_msg->status = -EIO;
 		goto finalize_transfer;
 	}
 
@@ -369,20 +369,20 @@ static irqreturn_t hisi_spi_irq(int irq, void *dev_id)
 
 finalize_transfer:
 	hisi_spi_disable(hs);
-	spi_finalize_current_transfer(master);
+	spi_finalize_current_transfer(host);
 	return IRQ_HANDLED;
 }
 
-static int hisi_spi_transfer_one(struct spi_controller *master,
+static int hisi_spi_transfer_one(struct spi_controller *host,
 		struct spi_device *spi, struct spi_transfer *transfer)
 {
-	struct hisi_spi *hs = spi_controller_get_devdata(master);
+	struct hisi_spi *hs = spi_controller_get_devdata(host);
 	struct hisi_chip_data *chip = spi_get_ctldata(spi);
 	u32 cr = chip->cr;
 
 	/* Update per transfer options for speed and bpw */
 	transfer->effective_speed_hz =
-		hisi_calc_effective_speed(master, chip, transfer->speed_hz);
+		hisi_calc_effective_speed(host, chip, transfer->speed_hz);
 	cr |= FIELD_PREP(CR_DIV_PRE_MASK, chip->div_pre);
 	cr |= FIELD_PREP(CR_DIV_POST_MASK, chip->div_post);
 	cr |= FIELD_PREP(CR_BPW_MASK, transfer->bits_per_word - 1);
@@ -409,10 +409,10 @@ static int hisi_spi_transfer_one(struct spi_controller *master,
 	return 1;
 }
 
-static void hisi_spi_handle_err(struct spi_controller *master,
+static void hisi_spi_handle_err(struct spi_controller *host,
 		struct spi_message *msg)
 {
-	struct hisi_spi *hs = spi_controller_get_devdata(master);
+	struct hisi_spi *hs = spi_controller_get_devdata(host);
 
 	hisi_spi_disable(hs);
 
@@ -452,7 +452,7 @@ static void hisi_spi_cleanup(struct spi_device *spi)
 static int hisi_spi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct spi_controller *master;
+	struct spi_controller *host;
 	struct hisi_spi *hs;
 	int ret, irq;
 
@@ -460,13 +460,13 @@ static int hisi_spi_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	master = devm_spi_alloc_master(dev, sizeof(*hs));
-	if (!master)
+	host = devm_spi_alloc_host(dev, sizeof(*hs));
+	if (!host)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
-	hs = spi_controller_get_devdata(master);
+	hs = spi_controller_get_devdata(host);
 	hs->dev = dev;
 	hs->irq = irq;
 
@@ -474,9 +474,9 @@ static int hisi_spi_probe(struct platform_device *pdev)
 	if (IS_ERR(hs->regs))
 		return PTR_ERR(hs->regs);
 
-	/* Specify maximum SPI clocking speed (master only) by firmware */
+	/* Specify maximum SPI clocking speed (host only) by firmware */
 	ret = device_property_read_u32(dev, "spi-max-frequency",
-					&master->max_speed_hz);
+					&host->max_speed_hz);
 	if (ret) {
 		dev_err(dev, "failed to get max SPI clocking speed, ret=%d\n",
 			ret);
@@ -484,32 +484,32 @@ static int hisi_spi_probe(struct platform_device *pdev)
 	}
 
 	ret = device_property_read_u16(dev, "num-cs",
-					&master->num_chipselect);
+					&host->num_chipselect);
 	if (ret)
-		master->num_chipselect = DEFAULT_NUM_CS;
+		host->num_chipselect = DEFAULT_NUM_CS;
 
-	master->use_gpio_descriptors = true;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
-	master->bus_num = pdev->id;
-	master->setup = hisi_spi_setup;
-	master->cleanup = hisi_spi_cleanup;
-	master->transfer_one = hisi_spi_transfer_one;
-	master->handle_err = hisi_spi_handle_err;
-	master->dev.fwnode = dev->fwnode;
+	host->use_gpio_descriptors = true;
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
+	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
+	host->bus_num = pdev->id;
+	host->setup = hisi_spi_setup;
+	host->cleanup = hisi_spi_cleanup;
+	host->transfer_one = hisi_spi_transfer_one;
+	host->handle_err = hisi_spi_handle_err;
+	host->dev.fwnode = dev->fwnode;
 
 	hisi_spi_hw_init(hs);
 
 	ret = devm_request_irq(dev, hs->irq, hisi_spi_irq, 0, dev_name(dev),
-			master);
+			       host);
 	if (ret < 0) {
 		dev_err(dev, "failed to get IRQ=%d, ret=%d\n", hs->irq, ret);
 		return ret;
 	}
 
-	ret = spi_register_controller(master);
+	ret = spi_register_controller(host);
 	if (ret) {
-		dev_err(dev, "failed to register spi master, ret=%d\n", ret);
+		dev_err(dev, "failed to register spi host, ret=%d\n", ret);
 		return ret;
 	}
 
@@ -518,18 +518,18 @@ static int hisi_spi_probe(struct platform_device *pdev)
 
 	dev_info(dev, "hw version:0x%x max-freq:%u kHz\n",
 		readl(hs->regs + HISI_SPI_VERSION),
-		master->max_speed_hz / 1000);
+		host->max_speed_hz / 1000);
 
 	return 0;
 }
 
 static void hisi_spi_remove(struct platform_device *pdev)
 {
-	struct spi_controller *master = platform_get_drvdata(pdev);
-	struct hisi_spi *hs = spi_controller_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct hisi_spi *hs = spi_controller_get_devdata(host);
 
 	debugfs_remove_recursive(hs->debugfs);
-	spi_unregister_controller(master);
+	spi_unregister_controller(host);
 }
 
 static const struct acpi_device_id hisi_spi_acpi_match[] = {

From 1f6c80a74cd4686b5b0b9aed55d7195009c96330 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 28 Jul 2023 17:32:21 +0800
Subject: [PATCH 0808/2047] spi: npcm-fiu: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230728093221.3312026-22-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-npcm-fiu.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c
index 9e8c914ff26c..0ca21ff0e9cc 100644
--- a/drivers/spi/spi-npcm-fiu.c
+++ b/drivers/spi/spi-npcm-fiu.c
@@ -287,7 +287,7 @@ static ssize_t npcm_fiu_direct_read(struct spi_mem_dirmap_desc *desc,
 				    u64 offs, size_t len, void *buf)
 {
 	struct npcm_fiu_spi *fiu =
-		spi_controller_get_devdata(desc->mem->spi->master);
+		spi_controller_get_devdata(desc->mem->spi->controller);
 	struct npcm_fiu_chip *chip = &fiu->chip[spi_get_chipselect(desc->mem->spi, 0)];
 	void __iomem *src = (void __iomem *)(chip->flash_region_mapped_ptr +
 					     offs);
@@ -314,7 +314,7 @@ static ssize_t npcm_fiu_direct_write(struct spi_mem_dirmap_desc *desc,
 				     u64 offs, size_t len, const void *buf)
 {
 	struct npcm_fiu_spi *fiu =
-		spi_controller_get_devdata(desc->mem->spi->master);
+		spi_controller_get_devdata(desc->mem->spi->controller);
 	struct npcm_fiu_chip *chip = &fiu->chip[spi_get_chipselect(desc->mem->spi, 0)];
 	void __iomem *dst = (void __iomem *)(chip->flash_region_mapped_ptr +
 					     offs);
@@ -335,7 +335,7 @@ static int npcm_fiu_uma_read(struct spi_mem *mem,
 			      bool is_address_size, u8 *data, u32 data_size)
 {
 	struct npcm_fiu_spi *fiu =
-		spi_controller_get_devdata(mem->spi->master);
+		spi_controller_get_devdata(mem->spi->controller);
 	u32 uma_cfg = BIT(10);
 	u32 data_reg[4];
 	int ret;
@@ -390,7 +390,7 @@ static int npcm_fiu_uma_write(struct spi_mem *mem,
 			      bool is_address_size, u8 *data, u32 data_size)
 {
 	struct npcm_fiu_spi *fiu =
-		spi_controller_get_devdata(mem->spi->master);
+		spi_controller_get_devdata(mem->spi->controller);
 	u32 uma_cfg = BIT(10);
 	u32 data_reg[4] = {0};
 	u32 val;
@@ -439,7 +439,7 @@ static int npcm_fiu_manualwrite(struct spi_mem *mem,
 				const struct spi_mem_op *op)
 {
 	struct npcm_fiu_spi *fiu =
-		spi_controller_get_devdata(mem->spi->master);
+		spi_controller_get_devdata(mem->spi->controller);
 	u8 *data = (u8 *)op->data.buf.out;
 	u32 num_data_chunks;
 	u32 remain_data;
@@ -544,7 +544,7 @@ static void npcm_fiux_set_direct_rd(struct npcm_fiu_spi *fiu)
 static int npcm_fiu_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 {
 	struct npcm_fiu_spi *fiu =
-		spi_controller_get_devdata(mem->spi->master);
+		spi_controller_get_devdata(mem->spi->controller);
 	struct npcm_fiu_chip *chip = &fiu->chip[spi_get_chipselect(mem->spi, 0)];
 	int ret = 0;
 	u8 *buf;
@@ -604,7 +604,7 @@ static int npcm_fiu_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 static int npcm_fiu_dirmap_create(struct spi_mem_dirmap_desc *desc)
 {
 	struct npcm_fiu_spi *fiu =
-		spi_controller_get_devdata(desc->mem->spi->master);
+		spi_controller_get_devdata(desc->mem->spi->controller);
 	struct npcm_fiu_chip *chip = &fiu->chip[spi_get_chipselect(desc->mem->spi, 0)];
 	struct regmap *gcr_regmap;
 
@@ -665,7 +665,7 @@ static int npcm_fiu_dirmap_create(struct spi_mem_dirmap_desc *desc)
 
 static int npcm_fiu_setup(struct spi_device *spi)
 {
-	struct spi_controller *ctrl = spi->master;
+	struct spi_controller *ctrl = spi->controller;
 	struct npcm_fiu_spi *fiu = spi_controller_get_devdata(ctrl);
 	struct npcm_fiu_chip *chip;
 
@@ -701,7 +701,7 @@ static int npcm_fiu_probe(struct platform_device *pdev)
 	void __iomem *regbase;
 	int id, ret;
 
-	ctrl = devm_spi_alloc_master(dev, sizeof(*fiu));
+	ctrl = devm_spi_alloc_host(dev, sizeof(*fiu));
 	if (!ctrl)
 		return -ENOMEM;
 
@@ -755,7 +755,7 @@ static int npcm_fiu_probe(struct platform_device *pdev)
 	ctrl->num_chipselect = fiu->info->max_cs;
 	ctrl->dev.of_node = dev->of_node;
 
-	ret = devm_spi_register_master(dev, ctrl);
+	ret = devm_spi_register_controller(dev, ctrl);
 	if (ret)
 		clk_disable_unprepare(fiu->clk);
 

From 3182d49aad5f1cd8acdcf7de0c5b651772edd32e Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Wed, 2 Aug 2023 17:43:57 +0800
Subject: [PATCH 0809/2047] spi: spi-zynq: Do not check for 0 return after
 calling platform_get_irq()

It is not possible for platform_get_irq() to return 0. Use the
return value from platform_get_irq().

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Link: https://lore.kernel.org/r/20230802094357.981100-1-ruanjinjie@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-zynq-qspi.c    | 4 ++--
 drivers/spi/spi-zynqmp-gqspi.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c
index ee1995b91287..0db69a2a72ff 100644
--- a/drivers/spi/spi-zynq-qspi.c
+++ b/drivers/spi/spi-zynq-qspi.c
@@ -679,8 +679,8 @@ static int zynq_qspi_probe(struct platform_device *pdev)
 	}
 
 	xqspi->irq = platform_get_irq(pdev, 0);
-	if (xqspi->irq <= 0) {
-		ret = -ENXIO;
+	if (xqspi->irq < 0) {
+		ret = xqspi->irq;
 		goto clk_dis_all;
 	}
 	ret = devm_request_irq(&pdev->dev, xqspi->irq, zynq_qspi_irq,
diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
index a6b892d01038..94d9a33d9af5 100644
--- a/drivers/spi/spi-zynqmp-gqspi.c
+++ b/drivers/spi/spi-zynqmp-gqspi.c
@@ -1293,8 +1293,8 @@ static int zynqmp_qspi_probe(struct platform_device *pdev)
 	zynqmp_qspi_init_hw(xqspi);
 
 	xqspi->irq = platform_get_irq(pdev, 0);
-	if (xqspi->irq <= 0) {
-		ret = -ENXIO;
+	if (xqspi->irq < 0) {
+		ret = xqspi->irq;
 		goto clk_dis_all;
 	}
 	ret = devm_request_irq(&pdev->dev, xqspi->irq, zynqmp_qspi_irq,

From 8102d64c04e8ead02c30bb07ff7dd5c41ed61bce Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Wed, 2 Aug 2023 17:32:38 +0800
Subject: [PATCH 0810/2047] spi: Do not check for 0 return after calling
 platform_get_irq()

It is not possible for platform_get_irq() to return 0. Use the
return value from platform_get_irq().

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com> #
Link: https://lore.kernel.org/r/20230802093238.975906-1-ruanjinjie@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-axi-spi-engine.c | 4 ++--
 drivers/spi/spi-bcm2835.c        | 4 ++--
 drivers/spi/spi-bcm2835aux.c     | 4 ++--
 drivers/spi/spi-cadence.c        | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index 89661f3b0d44..c3224b85bc79 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -470,8 +470,8 @@ static int spi_engine_probe(struct platform_device *pdev)
 	int ret;
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq <= 0)
-		return -ENXIO;
+	if (irq < 0)
+		return irq;
 
 	spi_engine = devm_kzalloc(&pdev->dev, sizeof(*spi_engine), GFP_KERNEL);
 	if (!spi_engine)
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index c16abc2a9e9b..3f7f61ba66d5 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -1360,8 +1360,8 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
 	ctlr->max_speed_hz = clk_get_rate(bs->clk) / 2;
 
 	bs->irq = platform_get_irq(pdev, 0);
-	if (bs->irq <= 0)
-		return bs->irq ? bs->irq : -ENODEV;
+	if (bs->irq < 0)
+		return bs->irq;
 
 	err = clk_prepare_enable(bs->clk);
 	if (err)
diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index 8ace417c0a29..843dac847584 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -520,8 +520,8 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev)
 	}
 
 	bs->irq = platform_get_irq(pdev, 0);
-	if (bs->irq <= 0)
-		return bs->irq ? bs->irq : -ENODEV;
+	if (bs->irq < 0)
+		return bs->irq;
 
 	/* this also enables the HW block */
 	err = clk_prepare_enable(bs->clk);
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index 42f101d357c3..7b0136536cfb 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -627,8 +627,8 @@ static int cdns_spi_probe(struct platform_device *pdev)
 	cdns_spi_init_hw(xspi, spi_controller_is_slave(ctlr));
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq <= 0) {
-		ret = -ENXIO;
+	if (irq < 0) {
+		ret = irq;
 		goto clk_dis_all;
 	}
 

From 33af38d85b5c9e8ffe26f9712319ed28c5ded64f Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Sat, 5 Aug 2023 19:04:06 +0800
Subject: [PATCH 0811/2047] cpu/hotplug: Remove unused function declaration
 cpu_set_state_online()

Commit 5356297d12d9 ("cpu/hotplug: Remove cpu_report_state() and related unused cruft")
removed function but leave the declaration.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20230805110406.45900-1-yuehaibing@huawei.com
---
 include/linux/cpu.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 6b326a9e8191..ed56b2534500 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -191,7 +191,6 @@ void arch_cpu_finalize_init(void);
 static inline void arch_cpu_finalize_init(void) { }
 #endif
 
-void cpu_set_state_online(int cpu);
 void play_idle_precise(u64 duration_ns, u64 latency_ns);
 
 static inline void play_idle(unsigned long duration_us)

From 264b82fdb4989cf6a44a2bcd0c6ea05e8026b2ac Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:26:58 +0200
Subject: [PATCH 0812/2047] x86/decompressor: Don't rely on upper 32 bits of
 GPRs being preserved
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 4-to-5 level mode switch trampoline disables long mode and paging in
order to be able to flick the LA57 bit. According to section 3.4.1.1 of
the x86 architecture manual [0], 64-bit GPRs might not retain the upper
32 bits of their contents across such a mode switch.

Given that RBP, RBX and RSI are live at this point, preserve them on the
stack, along with the return address that might be above 4G as well.

[0] Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1: Basic Architecture

  "Because the upper 32 bits of 64-bit general-purpose registers are
   undefined in 32-bit modes, the upper 32 bits of any general-purpose
   register are not preserved when switching from 64-bit mode to a 32-bit
   mode (to protected mode or compatibility mode). Software must not
   depend on these bits to maintain a value after a 64-bit to 32-bit
   mode switch."

Fixes: 194a9749c73d650c ("x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G")
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-2-ardb@kernel.org
---
 arch/x86/boot/compressed/head_64.S | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 03c4328a88cb..f732426d3b48 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -459,11 +459,25 @@ SYM_CODE_START(startup_64)
 	/* Save the trampoline address in RCX */
 	movq	%rax, %rcx
 
+	/* Set up 32-bit addressable stack */
+	leaq	TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp
+
 	/*
-	 * Load the address of trampoline_return() into RDI.
-	 * It will be used by the trampoline to return to the main code.
+	 * Preserve live 64-bit registers on the stack: this is necessary
+	 * because the architecture does not guarantee that GPRs will retain
+	 * their full 64-bit values across a 32-bit mode switch.
+	 */
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%rsi
+
+	/*
+	 * Push the 64-bit address of trampoline_return() onto the new stack.
+	 * It will be used by the trampoline to return to the main code. Due to
+	 * the 32-bit mode switch, it cannot be kept it in a register either.
 	 */
 	leaq	trampoline_return(%rip), %rdi
+	pushq	%rdi
 
 	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
 	pushq	$__KERNEL32_CS
@@ -471,6 +485,11 @@ SYM_CODE_START(startup_64)
 	pushq	%rax
 	lretq
 trampoline_return:
+	/* Restore live 64-bit registers */
+	popq	%rsi
+	popq	%rbx
+	popq	%rbp
+
 	/* Restore the stack, the 32-bit trampoline uses its own stack */
 	leaq	rva(boot_stack_end)(%rbx), %rsp
 
@@ -582,7 +601,7 @@ SYM_FUNC_END(.Lrelocated)
 /*
  * This is the 32-bit trampoline that will be copied over to low memory.
  *
- * RDI contains the return address (might be above 4G).
+ * Return address is at the top of the stack (might be above 4G).
  * ECX contains the base address of the trampoline memory.
  * Non zero RDX means trampoline needs to enable 5-level paging.
  */
@@ -592,9 +611,6 @@ SYM_CODE_START(trampoline_32bit_src)
 	movl	%eax, %ds
 	movl	%eax, %ss
 
-	/* Set up new stack */
-	leal	TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
-
 	/* Disable paging */
 	movl	%cr0, %eax
 	btrl	$X86_CR0_PG_BIT, %eax
@@ -671,7 +687,7 @@ SYM_CODE_END(trampoline_32bit_src)
 	.code64
 SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
 	/* Return from the trampoline */
-	jmp	*%rdi
+	retq
 SYM_FUNC_END(.Lpaging_enabled)
 
 	/*

From 2f69a81ad6873231a2ae78d5ade3263ec54a4c9c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:26:59 +0200
Subject: [PATCH 0813/2047] x86/head_64: Store boot_params pointer in callee
 save register

Instead of pushing/popping %RSI to/from the stack every time a function
is called from startup_64(), store it in a callee preserved register
and grab it from there when its value is actually needed.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-3-ardb@kernel.org
---
 arch/x86/kernel/head_64.S | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c5b9289837dc..ea6995920b7a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -51,7 +51,9 @@ SYM_CODE_START_NOALIGN(startup_64)
 	 * for us.  These identity mapped page tables map all of the
 	 * kernel pages and possibly all of memory.
 	 *
-	 * %rsi holds a physical pointer to real_mode_data.
+	 * %RSI holds the physical address of the boot_params structure
+	 * provided by the bootloader. Preserve it in %R15 so C function calls
+	 * will not clobber it.
 	 *
 	 * We come here either directly from a 64bit bootloader, or from
 	 * arch/x86/boot/compressed/head_64.S.
@@ -62,6 +64,7 @@ SYM_CODE_START_NOALIGN(startup_64)
 	 * compiled to run at we first fixup the physical addresses in our page
 	 * tables and then reload them.
 	 */
+	mov	%rsi, %r15
 
 	/* Set up the stack for verify_cpu() */
 	leaq	(__end_init_task - PTREGS_SIZE)(%rip), %rsp
@@ -75,9 +78,7 @@ SYM_CODE_START_NOALIGN(startup_64)
 	shrq	$32,  %rdx
 	wrmsr
 
-	pushq	%rsi
 	call	startup_64_setup_env
-	popq	%rsi
 
 	/* Now switch to __KERNEL_CS so IRET works reliably */
 	pushq	$__KERNEL_CS
@@ -93,12 +94,10 @@ SYM_CODE_START_NOALIGN(startup_64)
 	 * Activate SEV/SME memory encryption if supported/enabled. This needs to
 	 * be done now, since this also includes setup of the SEV-SNP CPUID table,
 	 * which needs to be done before any CPUID instructions are executed in
-	 * subsequent code.
+	 * subsequent code. Pass the boot_params pointer as the first argument.
 	 */
-	movq	%rsi, %rdi
-	pushq	%rsi
+	movq	%r15, %rdi
 	call	sme_enable
-	popq	%rsi
 #endif
 
 	/* Sanitize CPU configuration */
@@ -111,9 +110,8 @@ SYM_CODE_START_NOALIGN(startup_64)
 	 * programmed into CR3.
 	 */
 	leaq	_text(%rip), %rdi
-	pushq	%rsi
+	movq	%r15, %rsi
 	call	__startup_64
-	popq	%rsi
 
 	/* Form the CR3 value being sure to include the CR3 modifier */
 	addq	$(early_top_pgt - __START_KERNEL_map), %rax
@@ -127,8 +125,6 @@ SYM_CODE_START(secondary_startup_64)
 	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
 	 * and someone has loaded a mapped page table.
 	 *
-	 * %rsi holds a physical pointer to real_mode_data.
-	 *
 	 * We come here either from startup_64 (using physical addresses)
 	 * or from trampoline.S (using virtual addresses).
 	 *
@@ -153,6 +149,9 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	UNWIND_HINT_END_OF_STACK
 	ANNOTATE_NOENDBR
 
+	/* Clear %R15 which holds the boot_params pointer on the boot CPU */
+	xorq	%r15, %r15
+
 	/*
 	 * Retrieve the modifier (SME encryption mask if SME is active) to be
 	 * added to the initial pgdir entry that will be programmed into CR3.
@@ -199,13 +198,9 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	 * hypervisor could lie about the C-bit position to perform a ROP
 	 * attack on the guest by writing to the unencrypted stack and wait for
 	 * the next RET instruction.
-	 * %rsi carries pointer to realmode data and is callee-clobbered. Save
-	 * and restore it.
 	 */
-	pushq	%rsi
 	movq	%rax, %rdi
 	call	sev_verify_cbit
-	popq	%rsi
 
 	/*
 	 * Switch to new page-table
@@ -365,9 +360,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	wrmsr
 
 	/* Setup and Load IDT */
-	pushq	%rsi
 	call	early_setup_idt
-	popq	%rsi
 
 	/* Check if nx is implemented */
 	movl	$0x80000001, %eax
@@ -403,9 +396,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	pushq $0
 	popfq
 
-	/* rsi is pointer to real mode structure with interesting info.
-	   pass it to C */
-	movq	%rsi, %rdi
+	/* Pass the boot_params pointer as first argument */
+	movq	%r15, %rdi
 
 .Ljump_to_C_code:
 	/*

From 1c75d749b5ebd2352cc72ed33d4fb522c624d974 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 22:07:06 +0800
Subject: [PATCH 0814/2047] spi: cadence-quadspi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807140717.3484180-11-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cadence-quadspi.c | 44 +++++++++++++++----------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index 6064b8e0319e..b50db71ac4cc 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -67,7 +67,7 @@ struct cqspi_flash_pdata {
 
 struct cqspi_st {
 	struct platform_device	*pdev;
-	struct spi_master	*master;
+	struct spi_controller	*host;
 	struct clk		*clk;
 	struct clk		*clks[CLK_QSPI_NUM];
 	unsigned int		sclk;
@@ -1379,7 +1379,7 @@ static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata,
 
 static int cqspi_mem_process(struct spi_mem *mem, const struct spi_mem_op *op)
 {
-	struct cqspi_st *cqspi = spi_master_get_devdata(mem->spi->master);
+	struct cqspi_st *cqspi = spi_controller_get_devdata(mem->spi->controller);
 	struct cqspi_flash_pdata *f_pdata;
 
 	f_pdata = &cqspi->f_pdata[spi_get_chipselect(mem->spi, 0)];
@@ -1585,7 +1585,7 @@ static int cqspi_request_mmap_dma(struct cqspi_st *cqspi)
 
 static const char *cqspi_get_name(struct spi_mem *mem)
 {
-	struct cqspi_st *cqspi = spi_master_get_devdata(mem->spi->master);
+	struct cqspi_st *cqspi = spi_controller_get_devdata(mem->spi->controller);
 	struct device *dev = &cqspi->pdev->dev;
 
 	return devm_kasprintf(dev, GFP_KERNEL, "%s.%d", dev_name(dev),
@@ -1690,26 +1690,26 @@ static int cqspi_probe(struct platform_device *pdev)
 	const struct cqspi_driver_platdata *ddata;
 	struct reset_control *rstc, *rstc_ocp, *rstc_ref;
 	struct device *dev = &pdev->dev;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct resource *res_ahb;
 	struct cqspi_st *cqspi;
 	int ret;
 	int irq;
 
-	master = devm_spi_alloc_master(&pdev->dev, sizeof(*cqspi));
-	if (!master) {
-		dev_err(&pdev->dev, "spi_alloc_master failed\n");
+	host = devm_spi_alloc_host(&pdev->dev, sizeof(*cqspi));
+	if (!host) {
+		dev_err(&pdev->dev, "devm_spi_alloc_host failed\n");
 		return -ENOMEM;
 	}
-	master->mode_bits = SPI_RX_QUAD | SPI_RX_DUAL;
-	master->mem_ops = &cqspi_mem_ops;
-	master->mem_caps = &cqspi_mem_caps;
-	master->dev.of_node = pdev->dev.of_node;
+	host->mode_bits = SPI_RX_QUAD | SPI_RX_DUAL;
+	host->mem_ops = &cqspi_mem_ops;
+	host->mem_caps = &cqspi_mem_caps;
+	host->dev.of_node = pdev->dev.of_node;
 
-	cqspi = spi_master_get_devdata(master);
+	cqspi = spi_controller_get_devdata(host);
 
 	cqspi->pdev = pdev;
-	cqspi->master = master;
+	cqspi->host = host;
 	cqspi->is_jh7110 = false;
 	platform_set_drvdata(pdev, cqspi);
 
@@ -1797,7 +1797,7 @@ static int cqspi_probe(struct platform_device *pdev)
 	reset_control_deassert(rstc_ocp);
 
 	cqspi->master_ref_clk_hz = clk_get_rate(cqspi->clk);
-	master->max_speed_hz = cqspi->master_ref_clk_hz;
+	host->max_speed_hz = cqspi->master_ref_clk_hz;
 
 	/* write completion is supported by default */
 	cqspi->wr_completion = true;
@@ -1808,7 +1808,7 @@ static int cqspi_probe(struct platform_device *pdev)
 			cqspi->wr_delay = 50 * DIV_ROUND_UP(NSEC_PER_SEC,
 						cqspi->master_ref_clk_hz);
 		if (ddata->hwcaps_mask & CQSPI_SUPPORTS_OCTAL)
-			master->mode_bits |= SPI_RX_OCTAL | SPI_TX_OCTAL;
+			host->mode_bits |= SPI_RX_OCTAL | SPI_TX_OCTAL;
 		if (!(ddata->quirks & CQSPI_DISABLE_DAC_MODE)) {
 			cqspi->use_direct_mode = true;
 			cqspi->use_direct_mode_wr = true;
@@ -1848,7 +1848,7 @@ static int cqspi_probe(struct platform_device *pdev)
 	cqspi->current_cs = -1;
 	cqspi->sclk = 0;
 
-	master->num_chipselect = cqspi->num_chipselect;
+	host->num_chipselect = cqspi->num_chipselect;
 
 	ret = cqspi_setup_flash(cqspi);
 	if (ret) {
@@ -1862,7 +1862,7 @@ static int cqspi_probe(struct platform_device *pdev)
 			goto probe_setup_failed;
 	}
 
-	ret = spi_register_master(master);
+	ret = spi_register_controller(host);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register SPI ctlr %d\n", ret);
 		goto probe_setup_failed;
@@ -1884,7 +1884,7 @@ static void cqspi_remove(struct platform_device *pdev)
 {
 	struct cqspi_st *cqspi = platform_get_drvdata(pdev);
 
-	spi_unregister_master(cqspi->master);
+	spi_unregister_controller(cqspi->host);
 	cqspi_controller_enable(cqspi, 0);
 
 	if (cqspi->rx_chan)
@@ -1902,10 +1902,10 @@ static void cqspi_remove(struct platform_device *pdev)
 static int cqspi_suspend(struct device *dev)
 {
 	struct cqspi_st *cqspi = dev_get_drvdata(dev);
-	struct spi_master *master = dev_get_drvdata(dev);
+	struct spi_controller *host = dev_get_drvdata(dev);
 	int ret;
 
-	ret = spi_master_suspend(master);
+	ret = spi_controller_suspend(host);
 	cqspi_controller_enable(cqspi, 0);
 
 	clk_disable_unprepare(cqspi->clk);
@@ -1916,7 +1916,7 @@ static int cqspi_suspend(struct device *dev)
 static int cqspi_resume(struct device *dev)
 {
 	struct cqspi_st *cqspi = dev_get_drvdata(dev);
-	struct spi_master *master = dev_get_drvdata(dev);
+	struct spi_controller *host = dev_get_drvdata(dev);
 
 	clk_prepare_enable(cqspi->clk);
 	cqspi_wait_idle(cqspi);
@@ -1925,7 +1925,7 @@ static int cqspi_resume(struct device *dev)
 	cqspi->current_cs = -1;
 	cqspi->sclk = 0;
 
-	return spi_master_resume(master);
+	return spi_controller_resume(host);
 }
 
 static DEFINE_SIMPLE_DEV_PM_OPS(cqspi_dev_pm_ops, cqspi_suspend, cqspi_resume);

From 4f5177458ba97734149897c5d4d44f85b8fe0348 Mon Sep 17 00:00:00 2001
From: Rajeshwar R Shinde <coolrrsh@gmail.com>
Date: Mon, 7 Aug 2023 20:19:42 +0530
Subject: [PATCH 0815/2047] spi: spi-mpc52xx-psc: Fix an unsigned comparison
 that can never be negative

In struct mpc52xx_psc_spi, the variable 'irq' is declared as an unsigned int.
The comparison of variable 'irq'  with signed int operand is incorrect. Also,
the return value from the call to platform_get_irq(pdev,0) is int and it is
assigned to an unsigned int variable 'irq', thus redeclaring the type of
variable 'irq' to signed int.

This fixes warning such as:
drivers/spi/spi-mpc52xx-psc.c:332:5-13:
WARNING: Unsigned expression compared with zero: mps -> irq < 0

Signed-off-by: Rajeshwar R Shinde <coolrrsh@gmail.com>
Link: https://lore.kernel.org/r/20230807144942.30317-2-coolrrsh@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mpc52xx-psc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-mpc52xx-psc.c b/drivers/spi/spi-mpc52xx-psc.c
index 9a1a080fb688..795c08594a4d 100644
--- a/drivers/spi/spi-mpc52xx-psc.c
+++ b/drivers/spi/spi-mpc52xx-psc.c
@@ -29,7 +29,7 @@ struct mpc52xx_psc_spi {
 	/* driver internal data */
 	struct mpc52xx_psc __iomem *psc;
 	struct mpc52xx_psc_fifo __iomem *fifo;
-	unsigned int irq;
+	int irq;
 	u8 bits_per_word;
 
 	struct completion done;

From 142c61a5fddeb755c420cb2e23b4bc0c0901308f Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Tue, 1 Aug 2023 21:54:42 +0800
Subject: [PATCH 0816/2047] spi: omap2-mcspi: remove redundant dev_err_probe()

When platform_get_irq() is called, the error message has been printed,
so it need not to call dev_err_probe() to print error, we remove the
redundant platform_get_irq().

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Link: https://lore.kernel.org/r/20230801135442.255604-1-wangzhu9@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-omap2-mcspi.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 8331e247bf5c..e5cd82eb9e54 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -1508,10 +1508,8 @@ static int omap2_mcspi_probe(struct platform_device *pdev)
 	}
 
 	status = platform_get_irq(pdev, 0);
-	if (status < 0) {
-		dev_err_probe(&pdev->dev, status, "no irq resource found\n");
+	if (status < 0)
 		goto free_master;
-	}
 	init_completion(&mcspi->txdone);
 	status = devm_request_irq(&pdev->dev, status,
 				  omap2_mcspi_irq_handler, 0, pdev->name,

From 0e19118ab24beea86dac16ce3c550c47d31f777d Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jonas.gorski@gmail.com>
Date: Thu, 27 Jul 2023 09:08:06 +0200
Subject: [PATCH 0817/2047] dt-bindings: spi: convert spi-brcm63xx.txt to YAML

Changes done during conversion:

* added a description, lifting and adapting the limitation sentence from
  brcm,bcm63xx-hsspi.yml
* added appropriate compatibles for all SoCs that are supported by
  bcm63xx/bmips

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Link: https://lore.kernel.org/r/20230727070806.12205-1-jonas.gorski@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/spi/brcm,bcm63xx-spi.yaml        | 71 +++++++++++++++++++
 .../devicetree/bindings/spi/spi-bcm63xx.txt   | 33 ---------
 MAINTAINERS                                   |  2 +-
 3 files changed, 72 insertions(+), 34 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/spi/brcm,bcm63xx-spi.yaml
 delete mode 100644 Documentation/devicetree/bindings/spi/spi-bcm63xx.txt

diff --git a/Documentation/devicetree/bindings/spi/brcm,bcm63xx-spi.yaml b/Documentation/devicetree/bindings/spi/brcm,bcm63xx-spi.yaml
new file mode 100644
index 000000000000..fa03cdd68e70
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/brcm,bcm63xx-spi.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/brcm,bcm63xx-spi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM6348/BCM6358 SPI controller
+
+maintainers:
+  - Jonas Gorski <jonas.gorski@gmail.com>
+
+description: |
+  Broadcom "Low Speed" SPI controller found in many older MIPS based Broadband
+  SoCs.
+
+  This controller has a limitation that can not keep the chip select line active
+  between the SPI transfers within the same SPI message. This can terminate the
+  transaction to some SPI devices prematurely. The issue can be worked around by
+  the controller's prepend mode.
+
+allOf:
+  - $ref: spi-controller.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - brcm,bcm6368-spi
+              - brcm,bcm6362-spi
+              - brcm,bcm63268-spi
+          - const: brcm,bcm6358-spi
+      - enum:
+          - brcm,bcm6348-spi
+          - brcm,bcm6358-spi
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: SPI master reference clock
+
+  clock-names:
+    items:
+      - const: spi
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - interrupts
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    spi@10000800 {
+        compatible = "brcm,bcm6368-spi", "brcm,bcm6358-spi";
+        reg = <0x10000800 0x70c>;
+        interrupts = <1>;
+        clocks = <&clkctl 9>;
+        clock-names = "spi";
+        num-cs = <5>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+    };
diff --git a/Documentation/devicetree/bindings/spi/spi-bcm63xx.txt b/Documentation/devicetree/bindings/spi/spi-bcm63xx.txt
deleted file mode 100644
index 1c16f6692613..000000000000
--- a/Documentation/devicetree/bindings/spi/spi-bcm63xx.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Binding for Broadcom BCM6348/BCM6358 SPI controller
-
-Required properties:
-- compatible: must contain one of "brcm,bcm6348-spi", "brcm,bcm6358-spi".
-- reg: Base address and size of the controllers memory area.
-- interrupts: Interrupt for the SPI block.
-- clocks: phandle of the SPI clock.
-- clock-names: has to be "spi".
-- #address-cells: <1>, as required by generic SPI binding.
-- #size-cells: <0>, also as required by generic SPI binding.
-
-Optional properties:
-- num-cs: some controllers have less than 8 cs signals. Defaults to 8
-  if absent.
-
-Child nodes as per the generic SPI binding.
-
-Example:
-
-	spi@10000800 {
-		compatible = "brcm,bcm6368-spi", "brcm,bcm6358-spi";
-		reg = <0x10000800 0x70c>;
-
-		interrupts = <1>;
-
-		clocks = <&clkctl 9>;
-		clock-names = "spi";
-
-		num-cs = <5>;
-
-		#address-cells = <1>;
-		#size-cells = <0>;
-	};
diff --git a/MAINTAINERS b/MAINTAINERS
index 6578fabb936f..172b53e37080 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4127,7 +4127,7 @@ BROADCOM BCM6348/BCM6358 SPI controller DRIVER
 M:	Jonas Gorski <jonas.gorski@gmail.com>
 L:	linux-spi@vger.kernel.org
 S:	Odd Fixes
-F:	Documentation/devicetree/bindings/spi/spi-bcm63xx.txt
+F:	Documentation/devicetree/bindings/spi/brcm,bcm63xx-spi.yaml
 F:	drivers/spi/spi-bcm63xx.c
 
 BROADCOM ETHERNET PHY DRIVERS

From d2d7a54f69b67cd0a30e0ebb5307cb2de625baac Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:00 +0200
Subject: [PATCH 0818/2047] x86/efistub: Branch straight to kernel entry point
 from C code

Instead of returning to the calling code in assembler that does nothing
more than perform an indirect call with the boot_params pointer in
register ESI/RSI, perform the jump directly from the EFI stub C code.
This will allow the asm entrypoint code to be dropped entirely in
subsequent patches.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-4-ardb@kernel.org
---
 drivers/firmware/efi/libstub/x86-stub.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 220be75a5cdc..40a10db2d85e 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -290,7 +290,7 @@ adjust_memory_range_protection(unsigned long start, unsigned long size)
 #define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024)
 #define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024)
 
-void startup_32(struct boot_params *boot_params);
+extern const u8 startup_32[], startup_64[];
 
 static void
 setup_memory_protection(unsigned long image_base, unsigned long image_size)
@@ -803,10 +803,19 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
 	return EFI_SUCCESS;
 }
 
+static void __noreturn enter_kernel(unsigned long kernel_addr,
+				    struct boot_params *boot_params)
+{
+	/* enter decompressed kernel with boot_params pointer in RSI/ESI */
+	asm("jmp *%0"::"r"(kernel_addr), "S"(boot_params));
+
+	unreachable();
+}
+
 /*
- * On success, we return the address of startup_32, which has potentially been
- * relocated by efi_relocate_kernel.
- * On failure, we exit to the firmware via efi_exit instead of returning.
+ * On success, this routine will jump to the relocated image directly and never
+ * return.  On failure, it will exit to the firmware via efi_exit() instead of
+ * returning.
  */
 asmlinkage unsigned long efi_main(efi_handle_t handle,
 				  efi_system_table_t *sys_table_arg,
@@ -950,7 +959,10 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
 		goto fail;
 	}
 
-	return bzimage_addr;
+	if (IS_ENABLED(CONFIG_X86_64))
+		bzimage_addr += startup_64 - startup_32;
+
+	enter_kernel(bzimage_addr, boot_params);
 fail:
 	efi_err("efi_main() failed!\n");
 

From df9215f15206c2a81909ccf60f21d170801dce38 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:01 +0200
Subject: [PATCH 0819/2047] x86/efistub: Simplify and clean up handover entry
 code

Now that the EFI entry code in assembler is only used by the optional
and deprecated EFI handover protocol, and given that the EFI stub C code
no longer returns to it, most of it can simply be dropped.

While at it, clarify the symbol naming, by merging efi_main() and
efi_stub_entry(), making the latter the shared entry point for all
different boot modes that enter via the EFI stub.

The efi32_stub_entry() and efi64_stub_entry() names are referenced
explicitly by the tooling that populates the setup header, so these must
be retained, but can be emitted as aliases of efi_stub_entry() where
appropriate.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-5-ardb@kernel.org
---
 Documentation/arch/x86/boot.rst         |  2 +-
 arch/x86/boot/compressed/efi_mixed.S    | 22 ++++++++++++----------
 arch/x86/boot/compressed/head_32.S      | 11 -----------
 arch/x86/boot/compressed/head_64.S      | 12 ++----------
 drivers/firmware/efi/libstub/x86-stub.c | 20 ++++++++++++++++----
 5 files changed, 31 insertions(+), 36 deletions(-)

diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst
index 33520ecdb37a..cdbca15a4fc2 100644
--- a/Documentation/arch/x86/boot.rst
+++ b/Documentation/arch/x86/boot.rst
@@ -1417,7 +1417,7 @@ execution context provided by the EFI firmware.
 
 The function prototype for the handover entry point looks like this::
 
-    efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp)
+    efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp)
 
 'handle' is the EFI image handle passed to the boot loader by the EFI
 firmware, 'table' is the EFI system table - these are the first two
diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S
index 4ca70bf93dc0..dcc562c8f7f3 100644
--- a/arch/x86/boot/compressed/efi_mixed.S
+++ b/arch/x86/boot/compressed/efi_mixed.S
@@ -26,8 +26,8 @@
  * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode()
  * is the first thing that runs after switching to long mode. Depending on
  * whether the EFI handover protocol or the compat entry point was used to
- * enter the kernel, it will either branch to the 64-bit EFI handover
- * entrypoint at offset 0x390 in the image, or to the 64-bit EFI PE/COFF
+ * enter the kernel, it will either branch to the common 64-bit EFI stub
+ * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF
  * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a
  * struct bootparams pointer as the third argument, so the presence of such a
  * pointer is used to disambiguate.
@@ -37,21 +37,23 @@
  *  | efi32_pe_entry   |---->|            |            |       +-----------+--+
  *  +------------------+     |            |     +------+----------------+  |
  *                           | startup_32 |---->| startup_64_mixed_mode |  |
- *  +------------------+     |            |     +------+----------------+  V
- *  | efi32_stub_entry |---->|            |            |     +------------------+
- *  +------------------+     +------------+            +---->| efi64_stub_entry |
- *                                                           +-------------+----+
- *                           +------------+     +----------+               |
- *                           | startup_64 |<----| efi_main |<--------------+
- *                           +------------+     +----------+
+ *  +------------------+     |            |     +------+----------------+  |
+ *  | efi32_stub_entry |---->|            |            |                   |
+ *  +------------------+     +------------+            |                   |
+ *                                                     V                   |
+ *                           +------------+     +----------------+         |
+ *                           | startup_64 |<----| efi_stub_entry |<--------+
+ *                           +------------+     +----------------+
  */
 SYM_FUNC_START(startup_64_mixed_mode)
 	lea	efi32_boot_args(%rip), %rdx
 	mov	0(%rdx), %edi
 	mov	4(%rdx), %esi
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
 	mov	8(%rdx), %edx		// saved bootparams pointer
 	test	%edx, %edx
-	jnz	efi64_stub_entry
+	jnz	efi_stub_entry
+#endif
 	/*
 	 * efi_pe_entry uses MS calling convention, which requires 32 bytes of
 	 * shadow space on the stack even if all arguments are passed in
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 987ae727cf9f..8876ffe30e9a 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -150,17 +150,6 @@ SYM_FUNC_START(startup_32)
 	jmp	*%eax
 SYM_FUNC_END(startup_32)
 
-#ifdef CONFIG_EFI_STUB
-SYM_FUNC_START(efi32_stub_entry)
-	add	$0x4, %esp
-	movl	8(%esp), %esi	/* save boot_params pointer */
-	call	efi_main
-	/* efi_main returns the possibly relocated address of startup_32 */
-	jmp	*%eax
-SYM_FUNC_END(efi32_stub_entry)
-SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
-#endif
-
 	.text
 SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
 
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f732426d3b48..e6880208b9a1 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -542,19 +542,11 @@ trampoline_return:
 	jmp	*%rax
 SYM_CODE_END(startup_64)
 
-#ifdef CONFIG_EFI_STUB
-#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL)
 	.org 0x390
-#endif
 SYM_FUNC_START(efi64_stub_entry)
-	and	$~0xf, %rsp			/* realign the stack */
-	movq	%rdx, %rbx			/* save boot_params pointer */
-	call	efi_main
-	movq	%rbx,%rsi
-	leaq	rva(startup_64)(%rax), %rax
-	jmp	*%rax
+	jmp	efi_stub_entry
 SYM_FUNC_END(efi64_stub_entry)
-SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
 #endif
 
 	.text
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 40a10db2d85e..3f3b3edf7a38 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -817,9 +817,9 @@ static void __noreturn enter_kernel(unsigned long kernel_addr,
  * return.  On failure, it will exit to the firmware via efi_exit() instead of
  * returning.
  */
-asmlinkage unsigned long efi_main(efi_handle_t handle,
-				  efi_system_table_t *sys_table_arg,
-				  struct boot_params *boot_params)
+void __noreturn efi_stub_entry(efi_handle_t handle,
+			       efi_system_table_t *sys_table_arg,
+			       struct boot_params *boot_params)
 {
 	unsigned long bzimage_addr = (unsigned long)startup_32;
 	unsigned long buffer_start, buffer_end;
@@ -964,7 +964,19 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
 
 	enter_kernel(bzimage_addr, boot_params);
 fail:
-	efi_err("efi_main() failed!\n");
+	efi_err("efi_stub_entry() failed!\n");
 
 	efi_exit(handle, status);
 }
+
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+#ifndef CONFIG_EFI_MIXED
+extern __alias(efi_stub_entry)
+void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
+		      struct boot_params *boot_params);
+
+extern __alias(efi_stub_entry)
+void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
+		      struct boot_params *boot_params);
+#endif
+#endif

From 12792064587623065250069d1df980e2c9ac3e67 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:02 +0200
Subject: [PATCH 0820/2047] x86/decompressor: Avoid magic offsets for EFI
 handover entrypoint

The native 32-bit or 64-bit EFI handover protocol entrypoint offset
relative to the respective startup_32/64 address is described in
boot_params as handover_offset, so that the special Linux/x86 aware EFI
loader can find it there.

When mixed mode is enabled, this single field has to describe this
offset for both the 32-bit and 64-bit entrypoints, so their respective
relative offsets have to be identical. Given that startup_32 and
startup_64 are 0x200 bytes apart, and the EFI handover entrypoint
resides at a fixed offset, the 32-bit and 64-bit versions of those
entrypoints must be exactly 0x200 bytes apart as well.

Currently, hard-coded fixed offsets are used to ensure this, but it is
sufficient to emit the 64-bit entrypoint 0x200 bytes after the 32-bit
one, wherever it happens to reside. This allows this code (which is now
EFI mixed mode specific) to be moved into efi_mixed.S and out of the
startup code in head_64.S.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-6-ardb@kernel.org
---
 arch/x86/boot/compressed/efi_mixed.S | 20 +++++++++++++++++++-
 arch/x86/boot/compressed/head_64.S   | 18 ------------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S
index dcc562c8f7f3..9308b595f6f0 100644
--- a/arch/x86/boot/compressed/efi_mixed.S
+++ b/arch/x86/boot/compressed/efi_mixed.S
@@ -140,6 +140,16 @@ SYM_FUNC_START(__efi64_thunk)
 SYM_FUNC_END(__efi64_thunk)
 
 	.code32
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+SYM_FUNC_START(efi32_stub_entry)
+	add	$0x4, %esp		/* Discard return address */
+	popl	%ecx
+	popl	%edx
+	popl	%esi
+	jmp	efi32_entry
+SYM_FUNC_END(efi32_stub_entry)
+#endif
+
 /*
  * EFI service pointer must be in %edi.
  *
@@ -220,7 +230,7 @@ SYM_FUNC_END(efi_enter32)
  * stub may still exit and return to the firmware using the Exit() EFI boot
  * service.]
  */
-SYM_FUNC_START(efi32_entry)
+SYM_FUNC_START_LOCAL(efi32_entry)
 	call	1f
 1:	pop	%ebx
 
@@ -320,6 +330,14 @@ SYM_FUNC_START(efi32_pe_entry)
 	RET
 SYM_FUNC_END(efi32_pe_entry)
 
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+	.org	efi32_stub_entry + 0x200
+	.code64
+SYM_FUNC_START_NOALIGN(efi64_stub_entry)
+	jmp	efi_stub_entry
+SYM_FUNC_END(efi64_stub_entry)
+#endif
+
 	.section ".rodata"
 	/* EFI loaded image protocol GUID */
 	.balign 4
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index e6880208b9a1..a3f764daf3a3 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -294,17 +294,6 @@ SYM_FUNC_START(startup_32)
 	lret
 SYM_FUNC_END(startup_32)
 
-#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL)
-	.org 0x190
-SYM_FUNC_START(efi32_stub_entry)
-	add	$0x4, %esp		/* Discard return address */
-	popl	%ecx
-	popl	%edx
-	popl	%esi
-	jmp	efi32_entry
-SYM_FUNC_END(efi32_stub_entry)
-#endif
-
 	.code64
 	.org 0x200
 SYM_CODE_START(startup_64)
@@ -542,13 +531,6 @@ trampoline_return:
 	jmp	*%rax
 SYM_CODE_END(startup_64)
 
-#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL)
-	.org 0x390
-SYM_FUNC_START(efi64_stub_entry)
-	jmp	efi_stub_entry
-SYM_FUNC_END(efi64_stub_entry)
-#endif
-
 	.text
 SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
 

From d7156b986d4cc0657fa6dc05c9fcf51c3d55a0fe Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:03 +0200
Subject: [PATCH 0821/2047] x86/efistub: Clear BSS in EFI handover protocol
 entrypoint

The so-called EFI handover protocol is value-add from the distros that
permits a loader to simply copy a PE kernel image into memory and call
an alternative entrypoint that is described by an embedded boot_params
structure.

Most implementations of this protocol do not bother to check the PE
header for minimum alignment, section placement, etc, and therefore also
don't clear the image's BSS, or even allocate enough memory for it.

Allocating more memory on the fly is rather difficult, but at least
clear the BSS region explicitly when entering in this manner, so that
the EFI stub code does not get confused by global variables that were
not zero-initialized correctly.

When booting in mixed mode, this BSS clearing must occur before any
global state is created, so clear it in the 32-bit asm entry point.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-7-ardb@kernel.org
---
 arch/x86/boot/compressed/efi_mixed.S    | 14 +++++++++++++-
 drivers/firmware/efi/libstub/x86-stub.c | 13 +++++++++++--
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S
index 9308b595f6f0..8a02a151806d 100644
--- a/arch/x86/boot/compressed/efi_mixed.S
+++ b/arch/x86/boot/compressed/efi_mixed.S
@@ -142,6 +142,18 @@ SYM_FUNC_END(__efi64_thunk)
 	.code32
 #ifdef CONFIG_EFI_HANDOVER_PROTOCOL
 SYM_FUNC_START(efi32_stub_entry)
+	call	1f
+1:	popl	%ecx
+
+	/* Clear BSS */
+	xorl	%eax, %eax
+	leal	(_bss - 1b)(%ecx), %edi
+	leal	(_ebss - 1b)(%ecx), %ecx
+	subl	%edi, %ecx
+	shrl	$2, %ecx
+	cld
+	rep	stosl
+
 	add	$0x4, %esp		/* Discard return address */
 	popl	%ecx
 	popl	%edx
@@ -334,7 +346,7 @@ SYM_FUNC_END(efi32_pe_entry)
 	.org	efi32_stub_entry + 0x200
 	.code64
 SYM_FUNC_START_NOALIGN(efi64_stub_entry)
-	jmp	efi_stub_entry
+	jmp	efi_handover_entry
 SYM_FUNC_END(efi64_stub_entry)
 #endif
 
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 3f3b3edf7a38..9247dbc7dbbd 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -970,12 +970,21 @@ fail:
 }
 
 #ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
+			struct boot_params *boot_params)
+{
+	extern char _bss[], _ebss[];
+
+	memset(_bss, 0, _ebss - _bss);
+	efi_stub_entry(handle, sys_table_arg, boot_params);
+}
+
 #ifndef CONFIG_EFI_MIXED
-extern __alias(efi_stub_entry)
+extern __alias(efi_handover_entry)
 void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
 		      struct boot_params *boot_params);
 
-extern __alias(efi_stub_entry)
+extern __alias(efi_handover_entry)
 void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
 		      struct boot_params *boot_params);
 #endif

From 8b63cba746f86a754d66e302c43209cc9b9b6e39 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:04 +0200
Subject: [PATCH 0822/2047] x86/decompressor: Store boot_params pointer in
 callee save register

Instead of pushing and popping %RSI several times to preserve the struct
boot_params pointer across the execution of the startup code, move it
into a callee save register before the first call into C, and copy it
back when needed.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-8-ardb@kernel.org
---
 arch/x86/boot/compressed/head_64.S | 42 ++++++++++++------------------
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index a3f764daf3a3..19bf810409e2 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -405,10 +405,14 @@ SYM_CODE_START(startup_64)
 	lretq
 
 .Lon_kernel_cs:
+	/*
+	 * RSI holds a pointer to a boot_params structure provided by the
+	 * loader, and this needs to be preserved across C function calls. So
+	 * move it into a callee saved register.
+	 */
+	movq	%rsi, %r15
 
-	pushq	%rsi
 	call	load_stage1_idt
-	popq	%rsi
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 	/*
@@ -419,12 +423,10 @@ SYM_CODE_START(startup_64)
 	 * CPUID instructions being issued, so go ahead and do that now via
 	 * sev_enable(), which will also handle the rest of the SEV-related
 	 * detection/setup to ensure that has been done in advance of any dependent
-	 * code.
+	 * code. Pass the boot_params pointer as the first argument.
 	 */
-	pushq	%rsi
-	movq	%rsi, %rdi		/* real mode address */
+	movq	%r15, %rdi
 	call	sev_enable
-	popq	%rsi
 #endif
 
 	/*
@@ -437,13 +439,10 @@ SYM_CODE_START(startup_64)
 	 *   - Non zero RDX means trampoline needs to enable 5-level
 	 *     paging.
 	 *
-	 * RSI holds real mode data and needs to be preserved across
-	 * this function call.
+	 * Pass the boot_params pointer as the first argument.
 	 */
-	pushq	%rsi
-	movq	%rsi, %rdi		/* real mode address */
+	movq	%r15, %rdi
 	call	paging_prepare
-	popq	%rsi
 
 	/* Save the trampoline address in RCX */
 	movq	%rax, %rcx
@@ -456,9 +455,9 @@ SYM_CODE_START(startup_64)
 	 * because the architecture does not guarantee that GPRs will retain
 	 * their full 64-bit values across a 32-bit mode switch.
 	 */
+	pushq	%r15
 	pushq	%rbp
 	pushq	%rbx
-	pushq	%rsi
 
 	/*
 	 * Push the 64-bit address of trampoline_return() onto the new stack.
@@ -475,9 +474,9 @@ SYM_CODE_START(startup_64)
 	lretq
 trampoline_return:
 	/* Restore live 64-bit registers */
-	popq	%rsi
 	popq	%rbx
 	popq	%rbp
+	popq	%r15
 
 	/* Restore the stack, the 32-bit trampoline uses its own stack */
 	leaq	rva(boot_stack_end)(%rbx), %rsp
@@ -487,14 +486,9 @@ trampoline_return:
 	 *
 	 * RDI is address of the page table to use instead of page table
 	 * in trampoline memory (if required).
-	 *
-	 * RSI holds real mode data and needs to be preserved across
-	 * this function call.
 	 */
-	pushq	%rsi
 	leaq	rva(top_pgtable)(%rbx), %rdi
 	call	cleanup_trampoline
-	popq	%rsi
 
 	/* Zero EFLAGS */
 	pushq	$0
@@ -504,7 +498,6 @@ trampoline_return:
  * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
  */
-	pushq	%rsi
 	leaq	(_bss-8)(%rip), %rsi
 	leaq	rva(_bss-8)(%rbx), %rdi
 	movl	$(_bss - startup_32), %ecx
@@ -512,7 +505,6 @@ trampoline_return:
 	std
 	rep	movsq
 	cld
-	popq	%rsi
 
 	/*
 	 * The GDT may get overwritten either during the copy we just did or
@@ -544,30 +536,28 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
 	shrq	$3, %rcx
 	rep	stosq
 
-	pushq	%rsi
 	call	load_stage2_idt
 
 	/* Pass boot_params to initialize_identity_maps() */
-	movq	(%rsp), %rdi
+	movq	%r15, %rdi
 	call	initialize_identity_maps
-	popq	%rsi
 
 /*
  * Do the extraction, and jump to the new kernel..
  */
-	pushq	%rsi			/* Save the real mode argument */
-	movq	%rsi, %rdi		/* real mode address */
+	/* pass struct boot_params pointer */
+	movq	%r15, %rdi
 	leaq	boot_heap(%rip), %rsi	/* malloc area for uncompression */
 	leaq	input_data(%rip), %rdx  /* input_data */
 	movl	input_len(%rip), %ecx	/* input_len */
 	movq	%rbp, %r8		/* output target address */
 	movl	output_len(%rip), %r9d	/* decompressed length, end of relocs */
 	call	extract_kernel		/* returns kernel entry point in %rax */
-	popq	%rsi
 
 /*
  * Jump to the decompressed kernel.
  */
+	movq	%r15, %rsi
 	jmp	*%rax
 SYM_FUNC_END(.Lrelocated)
 

From 00c6b0978ec182f1a672095930872168b9d5b1e2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:05 +0200
Subject: [PATCH 0823/2047] x86/decompressor: Assign paging related global
 variables earlier

There is no need to defer the assignment of the paging related global
variables 'pgdir_shift' and 'ptrs_per_p4d' until after the trampoline is
cleaned up, so assign them as soon as it is clear that 5-level paging
will be enabled.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-9-ardb@kernel.org
---
 arch/x86/boot/compressed/misc.h       |  2 --
 arch/x86/boot/compressed/pgtable_64.c | 14 +++++---------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 964fe903a1cd..cc70d3fb9049 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -179,9 +179,7 @@ static inline int count_immovable_mem_regions(void) { return 0; }
 #endif
 
 /* ident_map_64.c */
-#ifdef CONFIG_X86_5LEVEL
 extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
-#endif
 extern void kernel_add_identity_map(unsigned long start, unsigned long end);
 
 /* Used by PAGE_KERN* macros: */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 2ac12ff4111b..f8092d3244c9 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -130,6 +130,11 @@ struct paging_config paging_prepare(void *rmode)
 			native_cpuid_eax(0) >= 7 &&
 			(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
 		paging_config.l5_required = 1;
+
+		/* Initialize variables for 5-level paging */
+		__pgtable_l5_enabled = 1;
+		pgdir_shift = 48;
+		ptrs_per_p4d = 512;
 	}
 
 	paging_config.trampoline_start = find_trampoline_placement();
@@ -206,13 +211,4 @@ void cleanup_trampoline(void *pgtable)
 
 	/* Restore trampoline memory */
 	memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
-
-	/* Initialize variables for 5-level paging */
-#ifdef CONFIG_X86_5LEVEL
-	if (__read_cr4() & X86_CR4_LA57) {
-		__pgtable_l5_enabled = 1;
-		pgdir_shift = 48;
-		ptrs_per_p4d = 512;
-	}
-#endif
 }

From e8972a76aa90c05a0078043413f806c02fcb3487 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:06 +0200
Subject: [PATCH 0824/2047] x86/decompressor: Call trampoline as a normal
 function

Move the long return to switch to 32-bit mode into the trampoline code
so it can be called as an ordinary function. This will allow it to be
called directly from C code in a subsequent patch.

While at it, reorganize the code somewhat to keep the prologue and
epilogue of the function together, making the code a bit easier to
follow. Also, given that the trampoline is now entered in 64-bit mode, a
simple RIP-relative reference can be used to take the address of the
exit point.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20230807162720.545787-10-ardb@kernel.org
---
 arch/x86/boot/compressed/head_64.S | 79 +++++++++++++-----------------
 arch/x86/boot/compressed/pgtable.h |  2 +-
 2 files changed, 36 insertions(+), 45 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 19bf810409e2..91b5eee30614 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -447,39 +447,8 @@ SYM_CODE_START(startup_64)
 	/* Save the trampoline address in RCX */
 	movq	%rax, %rcx
 
-	/* Set up 32-bit addressable stack */
-	leaq	TRAMPOLINE_32BIT_STACK_END(%rcx), %rsp
-
-	/*
-	 * Preserve live 64-bit registers on the stack: this is necessary
-	 * because the architecture does not guarantee that GPRs will retain
-	 * their full 64-bit values across a 32-bit mode switch.
-	 */
-	pushq	%r15
-	pushq	%rbp
-	pushq	%rbx
-
-	/*
-	 * Push the 64-bit address of trampoline_return() onto the new stack.
-	 * It will be used by the trampoline to return to the main code. Due to
-	 * the 32-bit mode switch, it cannot be kept it in a register either.
-	 */
-	leaq	trampoline_return(%rip), %rdi
-	pushq	%rdi
-
-	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
-	pushq	$__KERNEL32_CS
 	leaq	TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
-	pushq	%rax
-	lretq
-trampoline_return:
-	/* Restore live 64-bit registers */
-	popq	%rbx
-	popq	%rbp
-	popq	%r15
-
-	/* Restore the stack, the 32-bit trampoline uses its own stack */
-	leaq	rva(boot_stack_end)(%rbx), %rsp
+	call	*%rax
 
 	/*
 	 * cleanup_trampoline() would restore trampoline memory.
@@ -561,7 +530,6 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
 	jmp	*%rax
 SYM_FUNC_END(.Lrelocated)
 
-	.code32
 /*
  * This is the 32-bit trampoline that will be copied over to low memory.
  *
@@ -570,6 +538,39 @@ SYM_FUNC_END(.Lrelocated)
  * Non zero RDX means trampoline needs to enable 5-level paging.
  */
 SYM_CODE_START(trampoline_32bit_src)
+	/*
+	 * Preserve live 64-bit registers on the stack: this is necessary
+	 * because the architecture does not guarantee that GPRs will retain
+	 * their full 64-bit values across a 32-bit mode switch.
+	 */
+	pushq	%r15
+	pushq	%rbp
+	pushq	%rbx
+
+	/* Set up 32-bit addressable stack and push the old RSP value */
+	leaq	(TRAMPOLINE_32BIT_STACK_END - 8)(%rcx), %rbx
+	movq	%rsp, (%rbx)
+	movq	%rbx, %rsp
+
+	/* Take the address of the trampoline exit code */
+	leaq	.Lret(%rip), %rbx
+
+	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+	pushq	$__KERNEL32_CS
+	leaq	0f(%rip), %rax
+	pushq	%rax
+	lretq
+
+.Lret:
+	/* Restore the preserved 64-bit registers */
+	movq	(%rsp), %rsp
+	popq	%rbx
+	popq	%rbp
+	popq	%r15
+	retq
+
+	.code32
+0:
 	/* Set up data and stack segments */
 	movl	$__KERNEL_DS, %eax
 	movl	%eax, %ds
@@ -633,12 +634,9 @@ SYM_CODE_START(trampoline_32bit_src)
 1:
 	movl	%eax, %cr4
 
-	/* Calculate address of paging_enabled() once we are executing in the trampoline */
-	leal	.Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
-
 	/* Prepare the stack for far return to Long Mode */
 	pushl	$__KERNEL_CS
-	pushl	%eax
+	pushl	%ebx
 
 	/* Enable paging again. */
 	movl	%cr0, %eax
@@ -648,12 +646,6 @@ SYM_CODE_START(trampoline_32bit_src)
 	lret
 SYM_CODE_END(trampoline_32bit_src)
 
-	.code64
-SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
-	/* Return from the trampoline */
-	retq
-SYM_FUNC_END(.Lpaging_enabled)
-
 	/*
          * The trampoline code has a size limit.
          * Make sure we fail to compile if the trampoline code grows
@@ -661,7 +653,6 @@ SYM_FUNC_END(.Lpaging_enabled)
 	 */
 	.org	trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
 
-	.code32
 SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
 	/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
 1:
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index cc9b2529a086..91dbb99203fb 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -6,7 +6,7 @@
 #define TRAMPOLINE_32BIT_PGTABLE_OFFSET	0
 
 #define TRAMPOLINE_32BIT_CODE_OFFSET	PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE	0x80
+#define TRAMPOLINE_32BIT_CODE_SIZE	0xA0
 
 #define TRAMPOLINE_32BIT_STACK_END	TRAMPOLINE_32BIT_SIZE
 

From 918a7a04e71745e99a0efc6753e587439b794b29 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:07 +0200
Subject: [PATCH 0825/2047] x86/decompressor: Use standard calling convention
 for trampoline

Update the trampoline code so its arguments are passed via RDI and RSI,
which matches the ordinary SysV calling convention for x86_64. This will
allow this code to be called directly from C.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20230807162720.545787-11-ardb@kernel.org
---
 arch/x86/boot/compressed/head_64.S | 27 +++++++++++++--------------
 arch/x86/boot/compressed/pgtable.h |  2 +-
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 91b5eee30614..c47504208105 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -444,9 +444,9 @@ SYM_CODE_START(startup_64)
 	movq	%r15, %rdi
 	call	paging_prepare
 
-	/* Save the trampoline address in RCX */
-	movq	%rax, %rcx
-
+	/* Pass the trampoline address and boolean flag as args #1 and #2 */
+	movq	%rax, %rdi
+	movq	%rdx, %rsi
 	leaq	TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
 	call	*%rax
 
@@ -531,11 +531,14 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
 SYM_FUNC_END(.Lrelocated)
 
 /*
- * This is the 32-bit trampoline that will be copied over to low memory.
+ * This is the 32-bit trampoline that will be copied over to low memory. It
+ * will be called using the ordinary 64-bit calling convention from code
+ * running in 64-bit mode.
  *
  * Return address is at the top of the stack (might be above 4G).
- * ECX contains the base address of the trampoline memory.
- * Non zero RDX means trampoline needs to enable 5-level paging.
+ * The first argument (EDI) contains the 32-bit addressable base of the
+ * trampoline memory. A non-zero second argument (ESI) means that the
+ * trampoline needs to enable 5-level paging.
  */
 SYM_CODE_START(trampoline_32bit_src)
 	/*
@@ -582,7 +585,7 @@ SYM_CODE_START(trampoline_32bit_src)
 	movl	%eax, %cr0
 
 	/* Check what paging mode we want to be in after the trampoline */
-	testl	%edx, %edx
+	testl	%esi, %esi
 	jz	1f
 
 	/* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
@@ -597,21 +600,17 @@ SYM_CODE_START(trampoline_32bit_src)
 	jz	3f
 2:
 	/* Point CR3 to the trampoline's new top level page table */
-	leal	TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
+	leal	TRAMPOLINE_32BIT_PGTABLE_OFFSET(%edi), %eax
 	movl	%eax, %cr3
 3:
 	/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
-	pushl	%ecx
-	pushl	%edx
 	movl	$MSR_EFER, %ecx
 	rdmsr
 	btsl	$_EFER_LME, %eax
 	/* Avoid writing EFER if no change was made (for TDX guest) */
 	jc	1f
 	wrmsr
-1:	popl	%edx
-	popl	%ecx
-
+1:
 #ifdef CONFIG_X86_MCE
 	/*
 	 * Preserve CR4.MCE if the kernel will enable #MC support.
@@ -628,7 +627,7 @@ SYM_CODE_START(trampoline_32bit_src)
 
 	/* Enable PAE and LA57 (if required) paging modes */
 	orl	$X86_CR4_PAE, %eax
-	testl	%edx, %edx
+	testl	%esi, %esi
 	jz	1f
 	orl	$X86_CR4_LA57, %eax
 1:
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index 91dbb99203fb..4e8cef135226 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -14,7 +14,7 @@
 
 extern unsigned long *trampoline_32bit;
 
-extern void trampoline_32bit_src(void *return_ptr);
+extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
 
 #endif /* __ASSEMBLER__ */
 #endif /* BOOT_COMPRESSED_PAGETABLE_H */

From bd328aa01ff77a45aeffea5fc4521854291db11f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:08 +0200
Subject: [PATCH 0826/2047] x86/decompressor: Avoid the need for a stack in the
 32-bit trampoline

The 32-bit trampoline no longer uses the stack for anything except
performing a far return back to long mode, and preserving the caller's
stack pointer value. Currently, the trampoline stack is placed in the
same page that carries the trampoline code, which means this page must
be mapped writable and executable, and the stack is therefore executable
as well.

Replace the far return with a far jump, so that the return address can
be pre-calculated and patched into the code before it is called. This
removes the need for a 32-bit addressable stack entirely, and in a later
patch, this will be taken advantage of by removing writable permissions
from (and adding executable permissions to) the trampoline code page
when booting via the EFI stub.

Note that the value of RSP still needs to be preserved explicitly across
the switch into 32-bit mode, as the register may get truncated to 32
bits.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20230807162720.545787-12-ardb@kernel.org
---
 arch/x86/boot/compressed/head_64.S    | 45 ++++++++++++++++-----------
 arch/x86/boot/compressed/pgtable.h    |  4 +--
 arch/x86/boot/compressed/pgtable_64.c | 12 ++++++-
 3 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index c47504208105..37fd7b7d683d 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -540,6 +540,7 @@ SYM_FUNC_END(.Lrelocated)
  * trampoline memory. A non-zero second argument (ESI) means that the
  * trampoline needs to enable 5-level paging.
  */
+	.section ".rodata", "a", @progbits
 SYM_CODE_START(trampoline_32bit_src)
 	/*
 	 * Preserve live 64-bit registers on the stack: this is necessary
@@ -550,13 +551,9 @@ SYM_CODE_START(trampoline_32bit_src)
 	pushq	%rbp
 	pushq	%rbx
 
-	/* Set up 32-bit addressable stack and push the old RSP value */
-	leaq	(TRAMPOLINE_32BIT_STACK_END - 8)(%rcx), %rbx
-	movq	%rsp, (%rbx)
-	movq	%rbx, %rsp
-
-	/* Take the address of the trampoline exit code */
-	leaq	.Lret(%rip), %rbx
+	/* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
+	movq	%rsp, %rbx
+	shrq	$32, %rbx
 
 	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
 	pushq	$__KERNEL32_CS
@@ -564,9 +561,17 @@ SYM_CODE_START(trampoline_32bit_src)
 	pushq	%rax
 	lretq
 
+	/*
+	 * The 32-bit code below will do a far jump back to long mode and end
+	 * up here after reconfiguring the number of paging levels. First, the
+	 * stack pointer needs to be restored to its full 64-bit value before
+	 * the callee save register contents can be popped from the stack.
+	 */
 .Lret:
+	shlq	$32, %rbx
+	orq	%rbx, %rsp
+
 	/* Restore the preserved 64-bit registers */
-	movq	(%rsp), %rsp
 	popq	%rbx
 	popq	%rbp
 	popq	%r15
@@ -574,11 +579,6 @@ SYM_CODE_START(trampoline_32bit_src)
 
 	.code32
 0:
-	/* Set up data and stack segments */
-	movl	$__KERNEL_DS, %eax
-	movl	%eax, %ds
-	movl	%eax, %ss
-
 	/* Disable paging */
 	movl	%cr0, %eax
 	btrl	$X86_CR0_PG_BIT, %eax
@@ -633,18 +633,26 @@ SYM_CODE_START(trampoline_32bit_src)
 1:
 	movl	%eax, %cr4
 
-	/* Prepare the stack for far return to Long Mode */
-	pushl	$__KERNEL_CS
-	pushl	%ebx
-
 	/* Enable paging again. */
 	movl	%cr0, %eax
 	btsl	$X86_CR0_PG_BIT, %eax
 	movl	%eax, %cr0
 
-	lret
+	/*
+	 * Return to the 64-bit calling code using LJMP rather than LRET, to
+	 * avoid the need for a 32-bit addressable stack. The destination
+	 * address will be adjusted after the template code is copied into a
+	 * 32-bit addressable buffer.
+	 */
+.Ljmp:	ljmpl	$__KERNEL_CS, $(.Lret - trampoline_32bit_src)
 SYM_CODE_END(trampoline_32bit_src)
 
+/*
+ * This symbol is placed right after trampoline_32bit_src() so its address can
+ * be used to infer the size of the trampoline code.
+ */
+SYM_DATA(trampoline_ljmp_imm_offset, .word  .Ljmp + 1 - trampoline_32bit_src)
+
 	/*
          * The trampoline code has a size limit.
          * Make sure we fail to compile if the trampoline code grows
@@ -652,6 +660,7 @@ SYM_CODE_END(trampoline_32bit_src)
 	 */
 	.org	trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
 
+	.text
 SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
 	/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
 1:
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index 4e8cef135226..c6b0903aded0 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -8,13 +8,13 @@
 #define TRAMPOLINE_32BIT_CODE_OFFSET	PAGE_SIZE
 #define TRAMPOLINE_32BIT_CODE_SIZE	0xA0
 
-#define TRAMPOLINE_32BIT_STACK_END	TRAMPOLINE_32BIT_SIZE
-
 #ifndef __ASSEMBLER__
 
 extern unsigned long *trampoline_32bit;
 
 extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
 
+extern const u16 trampoline_ljmp_imm_offset;
+
 #endif /* __ASSEMBLER__ */
 #endif /* BOOT_COMPRESSED_PAGETABLE_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index f8092d3244c9..5198a05aefa8 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -109,6 +109,7 @@ static unsigned long find_trampoline_placement(void)
 struct paging_config paging_prepare(void *rmode)
 {
 	struct paging_config paging_config = {};
+	void *tramp_code;
 
 	/* Initialize boot_params. Required for cmdline_find_option_bool(). */
 	boot_params = rmode;
@@ -148,9 +149,18 @@ struct paging_config paging_prepare(void *rmode)
 	memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
 
 	/* Copy trampoline code in place */
-	memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
+	tramp_code = memcpy(trampoline_32bit +
+			TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
 			&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
 
+	/*
+	 * Avoid the need for a stack in the 32-bit trampoline code, by using
+	 * LJMP rather than LRET to return back to long mode. LJMP takes an
+	 * immediate absolute address, which needs to be adjusted based on the
+	 * placement of the trampoline.
+	 */
+	*(u32 *)(tramp_code + trampoline_ljmp_imm_offset) += (unsigned long)tramp_code;
+
 	/*
 	 * The code below prepares page table in trampoline memory.
 	 *

From 64ef578b6b6866bec012544416946533444036c8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:09 +0200
Subject: [PATCH 0827/2047] x86/decompressor: Call trampoline directly from C
 code

Instead of returning to the asm calling code to invoke the trampoline,
call it straight from the C code that sets it up. That way, the struct
return type is no longer needed for returning two values, and the call
can be made conditional more cleanly in a subsequent patch.

This means that all callee save 64-bit registers need to be preserved
and restored, as their contents may not survive the legacy mode switch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20230807162720.545787-13-ardb@kernel.org
---
 arch/x86/boot/compressed/head_64.S    | 31 +++++++++++---------------
 arch/x86/boot/compressed/pgtable_64.c | 32 +++++++++++----------------
 2 files changed, 26 insertions(+), 37 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 37fd7b7d683d..cd6e3e175389 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -430,25 +430,14 @@ SYM_CODE_START(startup_64)
 #endif
 
 	/*
-	 * paging_prepare() sets up the trampoline and checks if we need to
-	 * enable 5-level paging.
-	 *
-	 * paging_prepare() returns a two-quadword structure which lands
-	 * into RDX:RAX:
-	 *   - Address of the trampoline is returned in RAX.
-	 *   - Non zero RDX means trampoline needs to enable 5-level
-	 *     paging.
+	 * configure_5level_paging() updates the number of paging levels using
+	 * a trampoline in 32-bit addressable memory if the current number does
+	 * not match the desired number.
 	 *
 	 * Pass the boot_params pointer as the first argument.
 	 */
 	movq	%r15, %rdi
-	call	paging_prepare
-
-	/* Pass the trampoline address and boolean flag as args #1 and #2 */
-	movq	%rax, %rdi
-	movq	%rdx, %rsi
-	leaq	TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
-	call	*%rax
+	call	configure_5level_paging
 
 	/*
 	 * cleanup_trampoline() would restore trampoline memory.
@@ -543,11 +532,14 @@ SYM_FUNC_END(.Lrelocated)
 	.section ".rodata", "a", @progbits
 SYM_CODE_START(trampoline_32bit_src)
 	/*
-	 * Preserve live 64-bit registers on the stack: this is necessary
-	 * because the architecture does not guarantee that GPRs will retain
-	 * their full 64-bit values across a 32-bit mode switch.
+	 * Preserve callee save 64-bit registers on the stack: this is
+	 * necessary because the architecture does not guarantee that GPRs will
+	 * retain their full 64-bit values across a 32-bit mode switch.
 	 */
 	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
 	pushq	%rbp
 	pushq	%rbx
 
@@ -574,6 +566,9 @@ SYM_CODE_START(trampoline_32bit_src)
 	/* Restore the preserved 64-bit registers */
 	popq	%rbx
 	popq	%rbp
+	popq	%r12
+	popq	%r13
+	popq	%r14
 	popq	%r15
 	retq
 
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 5198a05aefa8..f9cc86b2ee55 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39;
 unsigned int __section(".data") ptrs_per_p4d = 1;
 #endif
 
-struct paging_config {
-	unsigned long trampoline_start;
-	unsigned long l5_required;
-};
-
 /* Buffer to preserve trampoline memory */
 static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
 
@@ -29,7 +24,7 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
  * purposes.
  *
  * Avoid putting the pointer into .bss as it will be cleared between
- * paging_prepare() and extract_kernel().
+ * configure_5level_paging() and extract_kernel().
  */
 unsigned long *trampoline_32bit __section(".data");
 
@@ -106,13 +101,13 @@ static unsigned long find_trampoline_placement(void)
 	return bios_start - TRAMPOLINE_32BIT_SIZE;
 }
 
-struct paging_config paging_prepare(void *rmode)
+asmlinkage void configure_5level_paging(struct boot_params *bp)
 {
-	struct paging_config paging_config = {};
-	void *tramp_code;
+	void (*toggle_la57)(void *trampoline, bool enable_5lvl);
+	bool l5_required = false;
 
 	/* Initialize boot_params. Required for cmdline_find_option_bool(). */
-	boot_params = rmode;
+	boot_params = bp;
 
 	/*
 	 * Check if LA57 is desired and supported.
@@ -130,7 +125,7 @@ struct paging_config paging_prepare(void *rmode)
 			!cmdline_find_option_bool("no5lvl") &&
 			native_cpuid_eax(0) >= 7 &&
 			(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
-		paging_config.l5_required = 1;
+		l5_required = true;
 
 		/* Initialize variables for 5-level paging */
 		__pgtable_l5_enabled = 1;
@@ -138,9 +133,7 @@ struct paging_config paging_prepare(void *rmode)
 		ptrs_per_p4d = 512;
 	}
 
-	paging_config.trampoline_start = find_trampoline_placement();
-
-	trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
+	trampoline_32bit = (unsigned long *)find_trampoline_placement();
 
 	/* Preserve trampoline memory */
 	memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
@@ -149,7 +142,7 @@ struct paging_config paging_prepare(void *rmode)
 	memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
 
 	/* Copy trampoline code in place */
-	tramp_code = memcpy(trampoline_32bit +
+	toggle_la57 = memcpy(trampoline_32bit +
 			TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
 			&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
 
@@ -159,7 +152,8 @@ struct paging_config paging_prepare(void *rmode)
 	 * immediate absolute address, which needs to be adjusted based on the
 	 * placement of the trampoline.
 	 */
-	*(u32 *)(tramp_code + trampoline_ljmp_imm_offset) += (unsigned long)tramp_code;
+	*(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) +=
+						(unsigned long)toggle_la57;
 
 	/*
 	 * The code below prepares page table in trampoline memory.
@@ -175,10 +169,10 @@ struct paging_config paging_prepare(void *rmode)
 	 * We are not going to use the page table in trampoline memory if we
 	 * are already in the desired paging mode.
 	 */
-	if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
+	if (l5_required == !!(native_read_cr4() & X86_CR4_LA57))
 		goto out;
 
-	if (paging_config.l5_required) {
+	if (l5_required) {
 		/*
 		 * For 4- to 5-level paging transition, set up current CR3 as
 		 * the first and the only entry in a new top-level page table.
@@ -201,7 +195,7 @@ struct paging_config paging_prepare(void *rmode)
 	}
 
 out:
-	return paging_config;
+	toggle_la57(trampoline_32bit, l5_required);
 }
 
 void cleanup_trampoline(void *pgtable)

From f97b67a773cd84bd8b55c0a0ec32448a87fc56bb Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:10 +0200
Subject: [PATCH 0828/2047] x86/decompressor: Only call the trampoline when
 changing paging levels

Since the current and desired number of paging levels are known when the
trampoline is being prepared, avoid calling the trampoline at all if it
is clear that calling it is not going to result in a change to the
number of paging levels.

Given that the CPU is already running in long mode, the PAE and LA57
settings are necessarily consistent with the currently active page
tables, and other fields in CR4 will be initialized by the startup code
in the kernel proper. So limit the manipulation of CR4 to toggling the
LA57 bit, which is the only thing that really needs doing at this point
in the boot. This also means that there is no need to pass the value of
l5_required to toggle_la57(), as it will not be called unless CR4.LA57
needs to toggle.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20230807162720.545787-14-ardb@kernel.org
---
 arch/x86/boot/compressed/head_64.S    | 45 +++------------------------
 arch/x86/boot/compressed/pgtable_64.c | 22 ++++++-------
 2 files changed, 13 insertions(+), 54 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index cd6e3e175389..8730b1d58e2b 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -387,10 +387,6 @@ SYM_CODE_START(startup_64)
 	 * For the trampoline, we need the top page table to reside in lower
 	 * memory as we don't have a way to load 64-bit values into CR3 in
 	 * 32-bit mode.
-	 *
-	 * We go though the trampoline even if we don't have to: if we're
-	 * already in a desired paging mode. This way the trampoline code gets
-	 * tested on every boot.
 	 */
 
 	/* Make sure we have GDT with 32-bit code segment */
@@ -526,8 +522,7 @@ SYM_FUNC_END(.Lrelocated)
  *
  * Return address is at the top of the stack (might be above 4G).
  * The first argument (EDI) contains the 32-bit addressable base of the
- * trampoline memory. A non-zero second argument (ESI) means that the
- * trampoline needs to enable 5-level paging.
+ * trampoline memory.
  */
 	.section ".rodata", "a", @progbits
 SYM_CODE_START(trampoline_32bit_src)
@@ -579,25 +574,10 @@ SYM_CODE_START(trampoline_32bit_src)
 	btrl	$X86_CR0_PG_BIT, %eax
 	movl	%eax, %cr0
 
-	/* Check what paging mode we want to be in after the trampoline */
-	testl	%esi, %esi
-	jz	1f
-
-	/* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
-	movl	%cr4, %eax
-	testl	$X86_CR4_LA57, %eax
-	jnz	3f
-	jmp	2f
-1:
-	/* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
-	movl	%cr4, %eax
-	testl	$X86_CR4_LA57, %eax
-	jz	3f
-2:
 	/* Point CR3 to the trampoline's new top level page table */
 	leal	TRAMPOLINE_32BIT_PGTABLE_OFFSET(%edi), %eax
 	movl	%eax, %cr3
-3:
+
 	/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
 	movl	$MSR_EFER, %ecx
 	rdmsr
@@ -606,26 +586,9 @@ SYM_CODE_START(trampoline_32bit_src)
 	jc	1f
 	wrmsr
 1:
-#ifdef CONFIG_X86_MCE
-	/*
-	 * Preserve CR4.MCE if the kernel will enable #MC support.
-	 * Clearing MCE may fault in some environments (that also force #MC
-	 * support). Any machine check that occurs before #MC support is fully
-	 * configured will crash the system regardless of the CR4.MCE value set
-	 * here.
-	 */
+	/* Toggle CR4.LA57 */
 	movl	%cr4, %eax
-	andl	$X86_CR4_MCE, %eax
-#else
-	movl	$0, %eax
-#endif
-
-	/* Enable PAE and LA57 (if required) paging modes */
-	orl	$X86_CR4_PAE, %eax
-	testl	%esi, %esi
-	jz	1f
-	orl	$X86_CR4_LA57, %eax
-1:
+	btcl	$X86_CR4_LA57_BIT, %eax
 	movl	%eax, %cr4
 
 	/* Enable paging again. */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index f9cc86b2ee55..4213473ae548 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -103,7 +103,7 @@ static unsigned long find_trampoline_placement(void)
 
 asmlinkage void configure_5level_paging(struct boot_params *bp)
 {
-	void (*toggle_la57)(void *trampoline, bool enable_5lvl);
+	void (*toggle_la57)(void *trampoline);
 	bool l5_required = false;
 
 	/* Initialize boot_params. Required for cmdline_find_option_bool(). */
@@ -133,6 +133,13 @@ asmlinkage void configure_5level_paging(struct boot_params *bp)
 		ptrs_per_p4d = 512;
 	}
 
+	/*
+	 * The trampoline will not be used if the paging mode is already set to
+	 * the desired one.
+	 */
+	if (l5_required == !!(native_read_cr4() & X86_CR4_LA57))
+		return;
+
 	trampoline_32bit = (unsigned long *)find_trampoline_placement();
 
 	/* Preserve trampoline memory */
@@ -160,18 +167,8 @@ asmlinkage void configure_5level_paging(struct boot_params *bp)
 	 *
 	 * The new page table will be used by trampoline code for switching
 	 * from 4- to 5-level paging or vice versa.
-	 *
-	 * If switching is not required, the page table is unused: trampoline
-	 * code wouldn't touch CR3.
 	 */
 
-	/*
-	 * We are not going to use the page table in trampoline memory if we
-	 * are already in the desired paging mode.
-	 */
-	if (l5_required == !!(native_read_cr4() & X86_CR4_LA57))
-		goto out;
-
 	if (l5_required) {
 		/*
 		 * For 4- to 5-level paging transition, set up current CR3 as
@@ -194,8 +191,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp)
 		       (void *)src, PAGE_SIZE);
 	}
 
-out:
-	toggle_la57(trampoline_32bit, l5_required);
+	toggle_la57(trampoline_32bit);
 }
 
 void cleanup_trampoline(void *pgtable)

From cb83cece57e1889109dd73ea08ee338668c9d1b8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:11 +0200
Subject: [PATCH 0829/2047] x86/decompressor: Pass pgtable address to
 trampoline directly

The only remaining use of the trampoline address by the trampoline
itself is deriving the page table address from it, and this involves
adding an offset of 0x0. So simplify this, and pass the new CR3 value
directly.

This makes the fact that the page table happens to be at the start of
the trampoline allocation an implementation detail of the caller.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-15-ardb@kernel.org
---
 arch/x86/boot/compressed/head_64.S    | 8 ++++----
 arch/x86/boot/compressed/pgtable.h    | 2 --
 arch/x86/boot/compressed/pgtable_64.c | 9 ++++-----
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 8730b1d58e2b..afdaf8cb8bb9 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -521,8 +521,9 @@ SYM_FUNC_END(.Lrelocated)
  * running in 64-bit mode.
  *
  * Return address is at the top of the stack (might be above 4G).
- * The first argument (EDI) contains the 32-bit addressable base of the
- * trampoline memory.
+ * The first argument (EDI) contains the address of the temporary PGD level
+ * page table in 32-bit addressable memory which will be programmed into
+ * register CR3.
  */
 	.section ".rodata", "a", @progbits
 SYM_CODE_START(trampoline_32bit_src)
@@ -575,8 +576,7 @@ SYM_CODE_START(trampoline_32bit_src)
 	movl	%eax, %cr0
 
 	/* Point CR3 to the trampoline's new top level page table */
-	leal	TRAMPOLINE_32BIT_PGTABLE_OFFSET(%edi), %eax
-	movl	%eax, %cr3
+	movl	%edi, %cr3
 
 	/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
 	movl	$MSR_EFER, %ecx
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index c6b0903aded0..6d595abe06b3 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -3,8 +3,6 @@
 
 #define TRAMPOLINE_32BIT_SIZE		(2 * PAGE_SIZE)
 
-#define TRAMPOLINE_32BIT_PGTABLE_OFFSET	0
-
 #define TRAMPOLINE_32BIT_CODE_OFFSET	PAGE_SIZE
 #define TRAMPOLINE_32BIT_CODE_SIZE	0xA0
 
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 4213473ae548..eab4e6b568ae 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -103,7 +103,7 @@ static unsigned long find_trampoline_placement(void)
 
 asmlinkage void configure_5level_paging(struct boot_params *bp)
 {
-	void (*toggle_la57)(void *trampoline);
+	void (*toggle_la57)(void *cr3);
 	bool l5_required = false;
 
 	/* Initialize boot_params. Required for cmdline_find_option_bool(). */
@@ -174,7 +174,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp)
 		 * For 4- to 5-level paging transition, set up current CR3 as
 		 * the first and the only entry in a new top-level page table.
 		 */
-		trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
+		*trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC;
 	} else {
 		unsigned long src;
 
@@ -187,8 +187,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp)
 		 * may be above 4G.
 		 */
 		src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
-		memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
-		       (void *)src, PAGE_SIZE);
+		memcpy(trampoline_32bit, (void *)src, PAGE_SIZE);
 	}
 
 	toggle_la57(trampoline_32bit);
@@ -198,7 +197,7 @@ void cleanup_trampoline(void *pgtable)
 {
 	void *trampoline_pgtable;
 
-	trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
+	trampoline_pgtable = trampoline_32bit;
 
 	/*
 	 * Move the top level page table out of trampoline memory,

From 03dda95137d3247564854ad9032c0354273a159d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:12 +0200
Subject: [PATCH 0830/2047] x86/decompressor: Merge trampoline cleanup with
 switching code

Now that the trampoline setup code and the actual invocation of it are
all done from the C routine, the trampoline cleanup can be merged into
it as well, instead of returning to asm just to call another C function.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20230807162720.545787-16-ardb@kernel.org
---
 arch/x86/boot/compressed/head_64.S    | 14 ++++----------
 arch/x86/boot/compressed/pgtable_64.c | 18 ++++--------------
 2 files changed, 8 insertions(+), 24 deletions(-)

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index afdaf8cb8bb9..fb0e562c26f6 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -430,20 +430,14 @@ SYM_CODE_START(startup_64)
 	 * a trampoline in 32-bit addressable memory if the current number does
 	 * not match the desired number.
 	 *
-	 * Pass the boot_params pointer as the first argument.
+	 * Pass the boot_params pointer as the first argument. The second
+	 * argument is the relocated address of the page table to use instead
+	 * of the page table in trampoline memory (if required).
 	 */
 	movq	%r15, %rdi
+	leaq	rva(top_pgtable)(%rbx), %rsi
 	call	configure_5level_paging
 
-	/*
-	 * cleanup_trampoline() would restore trampoline memory.
-	 *
-	 * RDI is address of the page table to use instead of page table
-	 * in trampoline memory (if required).
-	 */
-	leaq	rva(top_pgtable)(%rbx), %rdi
-	call	cleanup_trampoline
-
 	/* Zero EFLAGS */
 	pushq	$0
 	popfq
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index eab4e6b568ae..7939eb6e6ce9 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -101,7 +101,7 @@ static unsigned long find_trampoline_placement(void)
 	return bios_start - TRAMPOLINE_32BIT_SIZE;
 }
 
-asmlinkage void configure_5level_paging(struct boot_params *bp)
+asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
 {
 	void (*toggle_la57)(void *cr3);
 	bool l5_required = false;
@@ -191,22 +191,12 @@ asmlinkage void configure_5level_paging(struct boot_params *bp)
 	}
 
 	toggle_la57(trampoline_32bit);
-}
-
-void cleanup_trampoline(void *pgtable)
-{
-	void *trampoline_pgtable;
-
-	trampoline_pgtable = trampoline_32bit;
 
 	/*
-	 * Move the top level page table out of trampoline memory,
-	 * if it's there.
+	 * Move the top level page table out of trampoline memory.
 	 */
-	if ((void *)__native_read_cr3() == trampoline_pgtable) {
-		memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
-		native_write_cr3((unsigned long)pgtable);
-	}
+	memcpy(pgtable, trampoline_32bit, PAGE_SIZE);
+	native_write_cr3((unsigned long)pgtable);
 
 	/* Restore trampoline memory */
 	memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);

From cb1c9e02b0c13032c3aec325643453ba48e96e17 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:13 +0200
Subject: [PATCH 0831/2047] x86/efistub: Perform 4/5 level paging switch from
 the stub

In preparation for updating the EFI stub boot flow to avoid the bare
metal decompressor code altogether, implement the support code for
switching between 4 and 5 levels of paging before jumping to the kernel
proper.

Reuse the newly refactored trampoline that the bare metal decompressor
uses, but relies on EFI APIs to allocate 32-bit addressable memory and
remap it with the appropriate permissions. Given that the bare metal
decompressor will no longer call into the trampoline if the number of
paging levels is already set correctly, it is no longer needed to remove
NX restrictions from the memory range where this trampoline may end up.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20230807162720.545787-17-ardb@kernel.org
---
 drivers/firmware/efi/libstub/Makefile         |  1 +
 .../firmware/efi/libstub/efi-stub-helper.c    |  2 +
 drivers/firmware/efi/libstub/efistub.h        |  1 +
 drivers/firmware/efi/libstub/x86-5lvl.c       | 95 +++++++++++++++++++
 drivers/firmware/efi/libstub/x86-stub.c       | 40 +++-----
 drivers/firmware/efi/libstub/x86-stub.h       | 17 ++++
 6 files changed, 130 insertions(+), 26 deletions(-)
 create mode 100644 drivers/firmware/efi/libstub/x86-5lvl.c
 create mode 100644 drivers/firmware/efi/libstub/x86-stub.h

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 16d64a34d1e1..ae8874401a9f 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -88,6 +88,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB)	+= efi-stub.o string.o intrinsics.o systable.o \
 lib-$(CONFIG_ARM)		+= arm32-stub.o
 lib-$(CONFIG_ARM64)		+= arm64.o arm64-stub.o smbios.o
 lib-$(CONFIG_X86)		+= x86-stub.o
+lib-$(CONFIG_X86_64)		+= x86-5lvl.o
 lib-$(CONFIG_RISCV)		+= riscv.o riscv-stub.o
 lib-$(CONFIG_LOONGARCH)		+= loongarch.o loongarch-stub.o
 
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 732984295295..bfa30625f5d0 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -73,6 +73,8 @@ efi_status_t efi_parse_options(char const *cmdline)
 			efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
 		} else if (!strcmp(param, "noinitrd")) {
 			efi_noinitrd = true;
+		} else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
+			efi_no5lvl = true;
 		} else if (!strcmp(param, "efi") && val) {
 			efi_nochunk = parse_option_str(val, "nochunk");
 			efi_novamap |= parse_option_str(val, "novamap");
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 6aa38a1bf126..06b7abc92ced 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -33,6 +33,7 @@
 #define EFI_ALLOC_LIMIT		ULONG_MAX
 #endif
 
+extern bool efi_no5lvl;
 extern bool efi_nochunk;
 extern bool efi_nokaslr;
 extern int efi_loglevel;
diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c
new file mode 100644
index 000000000000..479dd445acdc
--- /dev/null
+++ b/drivers/firmware/efi/libstub/x86-5lvl.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/efi.h>
+
+#include <asm/boot.h>
+#include <asm/desc.h>
+#include <asm/efi.h>
+
+#include "efistub.h"
+#include "x86-stub.h"
+
+bool efi_no5lvl;
+
+static void (*la57_toggle)(void *cr3);
+
+static const struct desc_struct gdt[] = {
+	[GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_CS]   = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+};
+
+/*
+ * Enabling (or disabling) 5 level paging is tricky, because it can only be
+ * done from 32-bit mode with paging disabled. This means not only that the
+ * code itself must be running from 32-bit addressable physical memory, but
+ * also that the root page table must be 32-bit addressable, as programming
+ * a 64-bit value into CR3 when running in 32-bit mode is not supported.
+ */
+efi_status_t efi_setup_5level_paging(void)
+{
+	u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src;
+	efi_status_t status;
+	u8 *la57_code;
+
+	if (!efi_is_64bit())
+		return EFI_SUCCESS;
+
+	/* check for 5 level paging support */
+	if (native_cpuid_eax(0) < 7 ||
+	    !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+		return EFI_SUCCESS;
+
+	/* allocate some 32-bit addressable memory for code and a page table */
+	status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code,
+				    U32_MAX);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size);
+	memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size);
+
+	/*
+	 * To avoid the need to allocate a 32-bit addressable stack, the
+	 * trampoline uses a LJMP instruction to switch back to long mode.
+	 * LJMP takes an absolute destination address, which needs to be
+	 * fixed up at runtime.
+	 */
+	*(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code;
+
+	efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE);
+
+	return EFI_SUCCESS;
+}
+
+void efi_5level_switch(void)
+{
+	bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
+	bool have_la57 = native_read_cr4() & X86_CR4_LA57;
+	bool need_toggle = want_la57 ^ have_la57;
+	u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
+	u64 *cr3 = (u64 *)__native_read_cr3();
+	u64 *new_cr3;
+
+	if (!la57_toggle || !need_toggle)
+		return;
+
+	if (!have_la57) {
+		/*
+		 * 5 level paging will be enabled, so a root level page needs
+		 * to be allocated from the 32-bit addressable physical region,
+		 * with its first entry referring to the existing hierarchy.
+		 */
+		new_cr3 = memset(pgt, 0, PAGE_SIZE);
+		new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC;
+	} else {
+		/* take the new root table pointer from the current entry #0 */
+		new_cr3 = (u64 *)(cr3[0] & PAGE_MASK);
+
+		/* copy the new root table if it is not 32-bit addressable */
+		if ((u64)new_cr3 > U32_MAX)
+			new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE);
+	}
+
+	native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt });
+
+	la57_toggle(new_cr3);
+}
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 9247dbc7dbbd..af5f50617a5b 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -17,6 +17,7 @@
 #include <asm/boot.h>
 
 #include "efistub.h"
+#include "x86-stub.h"
 
 /* Maximum physical address for 64-bit kernel with 4-level paging */
 #define MAXMEM_X86_64_4LEVEL (1ull << 46)
@@ -223,8 +224,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
 	}
 }
 
-static void
-adjust_memory_range_protection(unsigned long start, unsigned long size)
+void efi_adjust_memory_range_protection(unsigned long start,
+					unsigned long size)
 {
 	efi_status_t status;
 	efi_gcd_memory_space_desc_t desc;
@@ -278,35 +279,14 @@ adjust_memory_range_protection(unsigned long start, unsigned long size)
 	}
 }
 
-/*
- * Trampoline takes 2 pages and can be loaded in first megabyte of memory
- * with its end placed between 128k and 640k where BIOS might start.
- * (see arch/x86/boot/compressed/pgtable_64.c)
- *
- * We cannot find exact trampoline placement since memory map
- * can be modified by UEFI, and it can alter the computed address.
- */
-
-#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024)
-#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024)
-
 extern const u8 startup_32[], startup_64[];
 
 static void
 setup_memory_protection(unsigned long image_base, unsigned long image_size)
 {
-	/*
-	 * Allow execution of possible trampoline used
-	 * for switching between 4- and 5-level page tables
-	 * and relocated kernel image.
-	 */
-
-	adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE,
-				       TRAMPOLINE_PLACEMENT_SIZE);
-
 #ifdef CONFIG_64BIT
 	if (image_base != (unsigned long)startup_32)
-		adjust_memory_range_protection(image_base, image_size);
+		efi_adjust_memory_range_protection(image_base, image_size);
 #else
 	/*
 	 * Clear protection flags on a whole range of possible
@@ -316,8 +296,8 @@ setup_memory_protection(unsigned long image_base, unsigned long image_size)
 	 * need to remove possible protection on relocated image
 	 * itself disregarding further relocations.
 	 */
-	adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
-				       KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
+	efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
+					   KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
 #endif
 }
 
@@ -839,6 +819,12 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
 		efi_dxe_table = NULL;
 	}
 
+	status = efi_setup_5level_paging();
+	if (status != EFI_SUCCESS) {
+		efi_err("efi_setup_5level_paging() failed!\n");
+		goto fail;
+	}
+
 	/*
 	 * If the kernel isn't already loaded at a suitable address,
 	 * relocate it.
@@ -959,6 +945,8 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
 		goto fail;
 	}
 
+	efi_5level_switch();
+
 	if (IS_ENABLED(CONFIG_X86_64))
 		bzimage_addr += startup_64 - startup_32;
 
diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h
new file mode 100644
index 000000000000..37c5a36b9d8c
--- /dev/null
+++ b/drivers/firmware/efi/libstub/x86-stub.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/efi.h>
+
+extern void trampoline_32bit_src(void *, bool);
+extern const u16 trampoline_ljmp_imm_offset;
+
+void efi_adjust_memory_range_protection(unsigned long start,
+					unsigned long size);
+
+#ifdef CONFIG_X86_64
+efi_status_t efi_setup_5level_paging(void);
+void efi_5level_switch(void);
+#else
+static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; }
+static inline void efi_5level_switch(void) {}
+#endif

From 11078876b7a6a1b7226344fecab968945c806832 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:14 +0200
Subject: [PATCH 0832/2047] x86/efistub: Prefer EFI memory attributes protocol
 over DXE services

Currently, the EFI stub relies on DXE services in some cases to clear
non-execute restrictions from page allocations that need to be
executable. This is dodgy, because DXE services are not specified by
UEFI but by PI, and they are not intended for consumption by OS loaders.
However, no alternative existed at the time.

Now, there is a new UEFI protocol that should be used instead, so if it
exists, prefer it over the DXE services calls.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-18-ardb@kernel.org
---
 drivers/firmware/efi/libstub/x86-stub.c | 29 ++++++++++++++++++-------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index af5f50617a5b..acb1c65bf8ac 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -26,6 +26,7 @@ const efi_system_table_t *efi_system_table;
 const efi_dxe_services_table_t *efi_dxe_table;
 u32 image_offset __section(".data");
 static efi_loaded_image_t *image = NULL;
+static efi_memory_attribute_protocol_t *memattr;
 
 typedef union sev_memory_acceptance_protocol sev_memory_acceptance_protocol_t;
 union sev_memory_acceptance_protocol {
@@ -233,12 +234,18 @@ void efi_adjust_memory_range_protection(unsigned long start,
 	unsigned long rounded_start, rounded_end;
 	unsigned long unprotect_start, unprotect_size;
 
-	if (efi_dxe_table == NULL)
-		return;
-
 	rounded_start = rounddown(start, EFI_PAGE_SIZE);
 	rounded_end = roundup(start + size, EFI_PAGE_SIZE);
 
+	if (memattr != NULL) {
+		efi_call_proto(memattr, clear_memory_attributes, rounded_start,
+			       rounded_end - rounded_start, EFI_MEMORY_XP);
+		return;
+	}
+
+	if (efi_dxe_table == NULL)
+		return;
+
 	/*
 	 * Don't modify memory region attributes, they are
 	 * already suitable, to lower the possibility to
@@ -801,6 +808,7 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
 			       efi_system_table_t *sys_table_arg,
 			       struct boot_params *boot_params)
 {
+	efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID;
 	unsigned long bzimage_addr = (unsigned long)startup_32;
 	unsigned long buffer_start, buffer_end;
 	struct setup_header *hdr = &boot_params->hdr;
@@ -812,13 +820,18 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
 	if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
 		efi_exit(handle, EFI_INVALID_PARAMETER);
 
-	efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
-	if (efi_dxe_table &&
-	    efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) {
-		efi_warn("Ignoring DXE services table: invalid signature\n");
-		efi_dxe_table = NULL;
+	if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) {
+		efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
+		if (efi_dxe_table &&
+		    efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) {
+			efi_warn("Ignoring DXE services table: invalid signature\n");
+			efi_dxe_table = NULL;
+		}
 	}
 
+	/* grab the memory attributes protocol if it exists */
+	efi_bs_call(locate_protocol, &guid, NULL, (void **)&memattr);
+
 	status = efi_setup_5level_paging();
 	if (status != EFI_SUCCESS) {
 		efi_err("efi_setup_5level_paging() failed!\n");

From 8217ad0a435ff06d651d7298ea8ae8d72388179e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:15 +0200
Subject: [PATCH 0833/2047] decompress: Use 8 byte alignment

The ZSTD decompressor requires malloc() allocations to be 8 byte
aligned, so ensure that this the case.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-19-ardb@kernel.org
---
 include/linux/decompress/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h
index 9192986b1a73..ac862422df15 100644
--- a/include/linux/decompress/mm.h
+++ b/include/linux/decompress/mm.h
@@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size)
 	if (!malloc_ptr)
 		malloc_ptr = free_mem_ptr;
 
-	malloc_ptr = (malloc_ptr + 3) & ~3;     /* Align */
+	malloc_ptr = (malloc_ptr + 7) & ~7;     /* Align */
 
 	p = (void *)malloc_ptr;
 	malloc_ptr += size;

From 24388292e2d7fae79a0d4183cc91716b851299cf Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:16 +0200
Subject: [PATCH 0834/2047] x86/decompressor: Move global symbol references to
 C code

It is no longer necessary to be cautious when referring to global
variables in the position independent decompressor code, now that it is
built using PIE codegen and makes an assertion in the linker script that
no GOT entries exist (which would require adjustment for the actual
runtime load address of the decompressor binary).

This means global variables can be referenced directly from C code,
instead of having to pass their runtime addresses into C routines from
asm code, which needs to happen at each call site. Do so for the code
that will be called directly from the EFI stub after a subsequent patch,
and avoid the need to duplicate this logic a third time.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-20-ardb@kernel.org
---
 arch/x86/boot/compressed/head_32.S |  8 --------
 arch/x86/boot/compressed/head_64.S | 10 ++--------
 arch/x86/boot/compressed/misc.c    | 16 +++++++++-------
 3 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 8876ffe30e9a..3af4a383615b 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -168,13 +168,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
  */
 	/* push arguments for extract_kernel: */
 
-	pushl	output_len@GOTOFF(%ebx)	/* decompressed length, end of relocs */
 	pushl	%ebp			/* output address */
-	pushl	input_len@GOTOFF(%ebx)	/* input_len */
-	leal	input_data@GOTOFF(%ebx), %eax
-	pushl	%eax			/* input_data */
-	leal	boot_heap@GOTOFF(%ebx), %eax
-	pushl	%eax			/* heap area */
 	pushl	%esi			/* real mode pointer */
 	call	extract_kernel		/* returns kernel entry point in %eax */
 	addl	$24, %esp
@@ -202,8 +196,6 @@ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
  */
 	.bss
 	.balign 4
-boot_heap:
-	.fill BOOT_HEAP_SIZE, 1, 0
 boot_stack:
 	.fill BOOT_STACK_SIZE, 1, 0
 boot_stack_end:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fb0e562c26f6..28f46051c706 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -493,13 +493,9 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
 /*
  * Do the extraction, and jump to the new kernel..
  */
-	/* pass struct boot_params pointer */
+	/* pass struct boot_params pointer and output target address */
 	movq	%r15, %rdi
-	leaq	boot_heap(%rip), %rsi	/* malloc area for uncompression */
-	leaq	input_data(%rip), %rdx  /* input_data */
-	movl	input_len(%rip), %ecx	/* input_len */
-	movq	%rbp, %r8		/* output target address */
-	movl	output_len(%rip), %r9d	/* decompressed length, end of relocs */
+	movq	%rbp, %rsi
 	call	extract_kernel		/* returns kernel entry point in %rax */
 
 /*
@@ -657,8 +653,6 @@ SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
  */
 	.bss
 	.balign 4
-SYM_DATA_LOCAL(boot_heap,	.fill BOOT_HEAP_SIZE, 1, 0)
-
 SYM_DATA_START_LOCAL(boot_stack)
 	.fill BOOT_STACK_SIZE, 1, 0
 	.balign 16
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 94b7abcf624b..2d91d56b59e1 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -330,6 +330,11 @@ static size_t parse_elf(void *output)
 	return ehdr.e_entry - LOAD_PHYSICAL_ADDR;
 }
 
+static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
+
+extern unsigned char input_data[];
+extern unsigned int input_len, output_len;
+
 /*
  * The compressed kernel image (ZO), has been moved so that its position
  * is against the end of the buffer used to hold the uncompressed kernel
@@ -347,14 +352,11 @@ static size_t parse_elf(void *output)
  *             |-------uncompressed kernel image---------|
  *
  */
-asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
-				  unsigned char *input_data,
-				  unsigned long input_len,
-				  unsigned char *output,
-				  unsigned long output_len)
+asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
 {
 	const unsigned long kernel_total_size = VO__end - VO__text;
 	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+	memptr heap = (memptr)boot_heap;
 	unsigned long needed_size;
 	size_t entry_offset;
 
@@ -412,7 +414,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	 * entries. This ensures the full mapped area is usable RAM
 	 * and doesn't include any reserved areas.
 	 */
-	needed_size = max(output_len, kernel_total_size);
+	needed_size = max_t(unsigned long, output_len, kernel_total_size);
 #ifdef CONFIG_X86_64
 	needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
 #endif
@@ -443,7 +445,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 #ifdef CONFIG_X86_64
 	if (heap > 0x3fffffffffffUL)
 		error("Destination address too large");
-	if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
+	if (virt_addr + needed_size > KERNEL_IMAGE_SIZE)
 		error("Destination virtual address is beyond the kernel mapping area");
 #else
 	if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))

From 83381519352d6b5b3e429bf72aaab907480cb6b6 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:17 +0200
Subject: [PATCH 0835/2047] x86/decompressor: Factor out kernel decompression
 and relocation

Factor out the decompressor sequence that invokes the decompressor,
parses the ELF and applies the relocations so that it can be called
directly from the EFI stub.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-21-ardb@kernel.org
---
 arch/x86/boot/compressed/misc.c | 28 +++++++++++++++++++++++-----
 arch/x86/include/asm/boot.h     |  8 ++++++++
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 2d91d56b59e1..f711f2a85862 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -330,11 +330,33 @@ static size_t parse_elf(void *output)
 	return ehdr.e_entry - LOAD_PHYSICAL_ADDR;
 }
 
+const unsigned long kernel_total_size = VO__end - VO__text;
+
 static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
 
 extern unsigned char input_data[];
 extern unsigned int input_len, output_len;
 
+unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+				void (*error)(char *x))
+{
+	unsigned long entry;
+
+	if (!free_mem_ptr) {
+		free_mem_ptr     = (unsigned long)boot_heap;
+		free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap);
+	}
+
+	if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len,
+			 NULL, error) < 0)
+		return ULONG_MAX;
+
+	entry = parse_elf(outbuf);
+	handle_relocations(outbuf, output_len, virt_addr);
+
+	return entry;
+}
+
 /*
  * The compressed kernel image (ZO), has been moved so that its position
  * is against the end of the buffer used to hold the uncompressed kernel
@@ -354,7 +376,6 @@ extern unsigned int input_len, output_len;
  */
 asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
 {
-	const unsigned long kernel_total_size = VO__end - VO__text;
 	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
 	memptr heap = (memptr)boot_heap;
 	unsigned long needed_size;
@@ -463,10 +484,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
 		accept_memory(__pa(output), __pa(output) + needed_size);
 	}
 
-	__decompress(input_data, input_len, NULL, NULL, output, output_len,
-			NULL, error);
-	entry_offset = parse_elf(output);
-	handle_relocations(output, output_len, virt_addr);
+	entry_offset = decompress_kernel(output, virt_addr, error);
 
 	debug_putstr("done.\nBooting the kernel (entry_offset: 0x");
 	debug_puthex(entry_offset);
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 9191280d9ea3..4ae14339cb8c 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -62,4 +62,12 @@
 # define BOOT_STACK_SIZE	0x1000
 #endif
 
+#ifndef __ASSEMBLY__
+extern unsigned int output_len;
+extern const unsigned long kernel_total_size;
+
+unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+				void (*error)(char *x));
+#endif
+
 #endif /* _ASM_X86_BOOT_H */

From bc5ddceff4c14494d83449ad45c985e6cd353fce Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:18 +0200
Subject: [PATCH 0836/2047] efi/libstub: Add limit argument to
 efi_random_alloc()

x86 will need to limit the kernel memory allocation to the lowest 512
MiB of memory, to match the behavior of the existing bare metal KASLR
physical randomization logic. So in preparation for that, add a limit
parameter to efi_random_alloc() and wire it up.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-22-ardb@kernel.org
---
 drivers/firmware/efi/libstub/arm64-stub.c  |  2 +-
 drivers/firmware/efi/libstub/efistub.h     |  2 +-
 drivers/firmware/efi/libstub/randomalloc.c | 10 ++++++----
 drivers/firmware/efi/libstub/zboot.c       |  2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index 770b8ecb7398..8c40fc89f5f9 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -106,7 +106,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
 		 */
 		status = efi_random_alloc(*reserve_size, min_kimg_align,
 					  reserve_addr, phys_seed,
-					  EFI_LOADER_CODE);
+					  EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
 		if (status != EFI_SUCCESS)
 			efi_warn("efi_random_alloc() failed: 0x%lx\n", status);
 	} else {
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 06b7abc92ced..9823f6fb3e01 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -956,7 +956,7 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out);
 
 efi_status_t efi_random_alloc(unsigned long size, unsigned long align,
 			      unsigned long *addr, unsigned long random_seed,
-			      int memory_type);
+			      int memory_type, unsigned long alloc_limit);
 
 efi_status_t efi_random_get_seed(void);
 
diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c
index 32c7a54923b4..674a064b8f7a 100644
--- a/drivers/firmware/efi/libstub/randomalloc.c
+++ b/drivers/firmware/efi/libstub/randomalloc.c
@@ -16,7 +16,8 @@
  */
 static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
 					 unsigned long size,
-					 unsigned long align_shift)
+					 unsigned long align_shift,
+					 u64 alloc_limit)
 {
 	unsigned long align = 1UL << align_shift;
 	u64 first_slot, last_slot, region_end;
@@ -29,7 +30,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
 		return 0;
 
 	region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1,
-			 (u64)EFI_ALLOC_LIMIT);
+			 alloc_limit);
 	if (region_end < size)
 		return 0;
 
@@ -54,7 +55,8 @@ efi_status_t efi_random_alloc(unsigned long size,
 			      unsigned long align,
 			      unsigned long *addr,
 			      unsigned long random_seed,
-			      int memory_type)
+			      int memory_type,
+			      unsigned long alloc_limit)
 {
 	unsigned long total_slots = 0, target_slot;
 	unsigned long total_mirrored_slots = 0;
@@ -76,7 +78,7 @@ efi_status_t efi_random_alloc(unsigned long size,
 		efi_memory_desc_t *md = (void *)map->map + map_offset;
 		unsigned long slots;
 
-		slots = get_entry_num_slots(md, size, ilog2(align));
+		slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit);
 		MD_NUM_SLOTS(md) = slots;
 		total_slots += slots;
 		if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
diff --git a/drivers/firmware/efi/libstub/zboot.c b/drivers/firmware/efi/libstub/zboot.c
index e5d7fa1f1d8f..bdb17eac0cb4 100644
--- a/drivers/firmware/efi/libstub/zboot.c
+++ b/drivers/firmware/efi/libstub/zboot.c
@@ -119,7 +119,7 @@ efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab)
 		}
 
 		status = efi_random_alloc(alloc_size, min_kimg_align, &image_base,
-					  seed, EFI_LOADER_CODE);
+					  seed, EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
 		if (status != EFI_SUCCESS) {
 			efi_err("Failed to allocate memory\n");
 			goto free_cmdline;

From 31c77a50992e8dd136feed7b67073bb5f1f978cc Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:19 +0200
Subject: [PATCH 0837/2047] x86/efistub: Perform SNP feature test while running
 in the firmware

Before refactoring the EFI stub boot flow to avoid the legacy bare metal
decompressor, duplicate the SNP feature check in the EFI stub before
handing over to the kernel proper.

The SNP feature check can be performed while running under the EFI boot
services, which means it can force the boot to fail gracefully and
return an error to the bootloader if the loaded kernel does not
implement support for all the features that the hypervisor enabled.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-23-ardb@kernel.org
---
 arch/x86/boot/compressed/sev.c          | 112 ++++++++++++++----------
 arch/x86/include/asm/sev.h              |   4 +
 drivers/firmware/efi/libstub/x86-stub.c |  17 ++++
 3 files changed, 87 insertions(+), 46 deletions(-)

diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index c3e343bd4760..199155b8af3b 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -367,20 +367,25 @@ static void enforce_vmpl0(void)
  */
 #define SNP_FEATURES_PRESENT (0)
 
+u64 snp_get_unsupported_features(u64 status)
+{
+	if (!(status & MSR_AMD64_SEV_SNP_ENABLED))
+		return 0;
+
+	return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
+}
+
 void snp_check_features(void)
 {
 	u64 unsupported;
 
-	if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
-		return;
-
 	/*
 	 * Terminate the boot if hypervisor has enabled any feature lacking
 	 * guest side implementation. Pass on the unsupported features mask through
 	 * EXIT_INFO_2 of the GHCB protocol so that those features can be reported
 	 * as part of the guest boot failure.
 	 */
-	unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
+	unsupported = snp_get_unsupported_features(sev_status);
 	if (unsupported) {
 		if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb()))
 			sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
@@ -390,10 +395,45 @@ void snp_check_features(void)
 	}
 }
 
-void sev_enable(struct boot_params *bp)
+/*
+ * sev_check_cpu_support - Check for SEV support in the CPU capabilities
+ *
+ * Returns < 0 if SEV is not supported, otherwise the position of the
+ * encryption bit in the page table descriptors.
+ */
+static int sev_check_cpu_support(void)
 {
 	unsigned int eax, ebx, ecx, edx;
+
+	/* Check for the SME/SEV support leaf */
+	eax = 0x80000000;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+	if (eax < 0x8000001f)
+		return -ENODEV;
+
+	/*
+	 * Check for the SME/SEV feature:
+	 *   CPUID Fn8000_001F[EAX]
+	 *   - Bit 0 - Secure Memory Encryption support
+	 *   - Bit 1 - Secure Encrypted Virtualization support
+	 *   CPUID Fn8000_001F[EBX]
+	 *   - Bits 5:0 - Pagetable bit position used to indicate encryption
+	 */
+	eax = 0x8000001f;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+	/* Check whether SEV is supported */
+	if (!(eax & BIT(1)))
+		return -ENODEV;
+
+	return ebx & 0x3f;
+}
+
+void sev_enable(struct boot_params *bp)
+{
 	struct msr m;
+	int bitpos;
 	bool snp;
 
 	/*
@@ -413,26 +453,7 @@ void sev_enable(struct boot_params *bp)
 	 * which is good enough.
 	 */
 
-	/* Check for the SME/SEV support leaf */
-	eax = 0x80000000;
-	ecx = 0;
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-	if (eax < 0x8000001f)
-		return;
-
-	/*
-	 * Check for the SME/SEV feature:
-	 *   CPUID Fn8000_001F[EAX]
-	 *   - Bit 0 - Secure Memory Encryption support
-	 *   - Bit 1 - Secure Encrypted Virtualization support
-	 *   CPUID Fn8000_001F[EBX]
-	 *   - Bits 5:0 - Pagetable bit position used to indicate encryption
-	 */
-	eax = 0x8000001f;
-	ecx = 0;
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-	/* Check whether SEV is supported */
-	if (!(eax & BIT(1)))
+	if (sev_check_cpu_support() < 0)
 		return;
 
 	/*
@@ -443,26 +464,8 @@ void sev_enable(struct boot_params *bp)
 
 	/* Now repeat the checks with the SNP CPUID table. */
 
-	/* Recheck the SME/SEV support leaf */
-	eax = 0x80000000;
-	ecx = 0;
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-	if (eax < 0x8000001f)
-		return;
-
-	/*
-	 * Recheck for the SME/SEV feature:
-	 *   CPUID Fn8000_001F[EAX]
-	 *   - Bit 0 - Secure Memory Encryption support
-	 *   - Bit 1 - Secure Encrypted Virtualization support
-	 *   CPUID Fn8000_001F[EBX]
-	 *   - Bits 5:0 - Pagetable bit position used to indicate encryption
-	 */
-	eax = 0x8000001f;
-	ecx = 0;
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-	/* Check whether SEV is supported */
-	if (!(eax & BIT(1))) {
+	bitpos = sev_check_cpu_support();
+	if (bitpos < 0) {
 		if (snp)
 			error("SEV-SNP support indicated by CC blob, but not CPUID.");
 		return;
@@ -494,7 +497,24 @@ void sev_enable(struct boot_params *bp)
 	if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
 		error("SEV-SNP supported indicated by CC blob, but not SEV status MSR.");
 
-	sme_me_mask = BIT_ULL(ebx & 0x3f);
+	sme_me_mask = BIT_ULL(bitpos);
+}
+
+/*
+ * sev_get_status - Retrieve the SEV status mask
+ *
+ * Returns 0 if the CPU is not SEV capable, otherwise the value of the
+ * AMD64_SEV MSR.
+ */
+u64 sev_get_status(void)
+{
+	struct msr m;
+
+	if (sev_check_cpu_support() < 0)
+		return 0;
+
+	boot_rdmsr(MSR_AMD64_SEV, &m);
+	return m.q;
 }
 
 /* Search for Confidential Computing blob in the EFI config table. */
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 66c806784c52..b97d239e18ea 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -210,6 +210,8 @@ bool snp_init(struct boot_params *bp);
 void __init __noreturn snp_abort(void);
 int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
 void snp_accept_memory(phys_addr_t start, phys_addr_t end);
+u64 snp_get_unsupported_features(u64 status);
+u64 sev_get_status(void);
 #else
 static inline void sev_es_ist_enter(struct pt_regs *regs) { }
 static inline void sev_es_ist_exit(void) { }
@@ -235,6 +237,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
 }
 
 static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
+static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
+static inline u64 sev_get_status(void) { return 0; }
 #endif
 
 #endif
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index acb1c65bf8ac..b4685da2b8d5 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -15,6 +15,7 @@
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/boot.h>
+#include <asm/sev.h>
 
 #include "efistub.h"
 #include "x86-stub.h"
@@ -790,6 +791,19 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
 	return EFI_SUCCESS;
 }
 
+static bool have_unsupported_snp_features(void)
+{
+	u64 unsupported;
+
+	unsupported = snp_get_unsupported_features(sev_get_status());
+	if (unsupported) {
+		efi_err("Unsupported SEV-SNP features detected: 0x%llx\n",
+			unsupported);
+		return true;
+	}
+	return false;
+}
+
 static void __noreturn enter_kernel(unsigned long kernel_addr,
 				    struct boot_params *boot_params)
 {
@@ -820,6 +834,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
 	if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
 		efi_exit(handle, EFI_INVALID_PARAMETER);
 
+	if (have_unsupported_snp_features())
+		efi_exit(handle, EFI_UNSUPPORTED);
+
 	if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) {
 		efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
 		if (efi_dxe_table &&

From a1b87d54f4e45ff5e0d081fb1d9db3bf1a8fb39a Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 7 Aug 2023 18:27:20 +0200
Subject: [PATCH 0838/2047] x86/efistub: Avoid legacy decompressor when doing
 EFI boot

The bare metal decompressor code was never really intended to run in a
hosted environment such as the EFI boot services, and does a few things
that are becoming problematic in the context of EFI boot now that the
logo requirements are getting tighter: EFI executables will no longer be
allowed to consist of a single executable section that is mapped with
read, write and execute permissions if they are intended for use in a
context where Secure Boot is enabled (and where Microsoft's set of
certificates is used, i.e., every x86 PC built to run Windows).

To avoid stepping on reserved memory before having inspected the E820
tables, and to ensure the correct placement when running a kernel build
that is non-relocatable, the bare metal decompressor moves its own
executable image to the end of the allocation that was reserved for it,
in order to perform the decompression in place. This means the region in
question requires both write and execute permissions, which either need
to be given upfront (which EFI will no longer permit), or need to be
applied on demand using the existing page fault handling framework.

However, the physical placement of the kernel is usually randomized
anyway, and even if it isn't, a dedicated decompression output buffer
can be allocated anywhere in memory using EFI APIs when still running in
the boot services, given that EFI support already implies a relocatable
kernel. This means that decompression in place is never necessary, nor
is moving the compressed image from one end to the other.

Since EFI already maps all of memory 1:1, it is also unnecessary to
create new page tables or handle page faults when decompressing the
kernel. That means there is also no need to replace the special
exception handlers for SEV. Generally, there is little need to do
any of the things that the decompressor does beyond

- initialize SEV encryption, if needed,
- perform the 4/5 level paging switch, if needed,
- decompress the kernel
- relocate the kernel

So do all of this from the EFI stub code, and avoid the bare metal
decompressor altogether.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230807162720.545787-24-ardb@kernel.org
---
 arch/x86/boot/compressed/Makefile       |   5 +
 arch/x86/boot/compressed/efi_mixed.S    |  55 --------
 arch/x86/boot/compressed/head_32.S      |  13 --
 arch/x86/boot/compressed/head_64.S      |  27 ----
 arch/x86/include/asm/efi.h              |   7 +-
 arch/x86/include/asm/sev.h              |   2 +
 drivers/firmware/efi/libstub/x86-stub.c | 166 ++++++++++--------------
 7 files changed, 84 insertions(+), 191 deletions(-)

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 40d2ff503079..71fc531b95b4 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack
 ifeq ($(CONFIG_LD_IS_BFD),y)
 LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments)
 endif
+ifeq ($(CONFIG_EFI_STUB),y)
+# ensure that the static EFI stub library will be pulled in, even if it is
+# never referenced explicitly from the startup code
+LDFLAGS_vmlinux += -u efi_pe_entry
+endif
 LDFLAGS_vmlinux += -T
 
 hostprogs	:= mkpiggy
diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S
index 8a02a151806d..f4e22ef774ab 100644
--- a/arch/x86/boot/compressed/efi_mixed.S
+++ b/arch/x86/boot/compressed/efi_mixed.S
@@ -269,10 +269,6 @@ SYM_FUNC_START_LOCAL(efi32_entry)
 	jmp	startup_32
 SYM_FUNC_END(efi32_entry)
 
-#define ST32_boottime		60 // offsetof(efi_system_table_32_t, boottime)
-#define BS32_handle_protocol	88 // offsetof(efi_boot_services_32_t, handle_protocol)
-#define LI32_image_base		32 // offsetof(efi_loaded_image_32_t, image_base)
-
 /*
  * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
  *			       efi_system_table_32_t *sys_table)
@@ -280,8 +276,6 @@ SYM_FUNC_END(efi32_entry)
 SYM_FUNC_START(efi32_pe_entry)
 	pushl	%ebp
 	movl	%esp, %ebp
-	pushl	%eax				// dummy push to allocate loaded_image
-
 	pushl	%ebx				// save callee-save registers
 	pushl	%edi
 
@@ -290,48 +284,8 @@ SYM_FUNC_START(efi32_pe_entry)
 	movl	$0x80000003, %eax		// EFI_UNSUPPORTED
 	jnz	2f
 
-	call	1f
-1:	pop	%ebx
-
-	/* Get the loaded image protocol pointer from the image handle */
-	leal	-4(%ebp), %eax
-	pushl	%eax				// &loaded_image
-	leal	(loaded_image_proto - 1b)(%ebx), %eax
-	pushl	%eax				// pass the GUID address
-	pushl	8(%ebp)				// pass the image handle
-
-	/*
-	 * Note the alignment of the stack frame.
-	 *   sys_table
-	 *   handle             <-- 16-byte aligned on entry by ABI
-	 *   return address
-	 *   frame pointer
-	 *   loaded_image       <-- local variable
-	 *   saved %ebx		<-- 16-byte aligned here
-	 *   saved %edi
-	 *   &loaded_image
-	 *   &loaded_image_proto
-	 *   handle             <-- 16-byte aligned for call to handle_protocol
-	 */
-
-	movl	12(%ebp), %eax			// sys_table
-	movl	ST32_boottime(%eax), %eax	// sys_table->boottime
-	call	*BS32_handle_protocol(%eax)	// sys_table->boottime->handle_protocol
-	addl	$12, %esp			// restore argument space
-	testl	%eax, %eax
-	jnz	2f
-
 	movl	8(%ebp), %ecx			// image_handle
 	movl	12(%ebp), %edx			// sys_table
-	movl	-4(%ebp), %esi			// loaded_image
-	movl	LI32_image_base(%esi), %esi	// loaded_image->image_base
-	leal	(startup_32 - 1b)(%ebx), %ebp	// runtime address of startup_32
-	/*
-	 * We need to set the image_offset variable here since startup_32() will
-	 * use it before we get to the 64-bit efi_pe_entry() in C code.
-	 */
-	subl	%esi, %ebp			// calculate image_offset
-	movl	%ebp, (image_offset - 1b)(%ebx)	// save image_offset
 	xorl	%esi, %esi
 	jmp	efi32_entry			// pass %ecx, %edx, %esi
 						// no other registers remain live
@@ -350,15 +304,6 @@ SYM_FUNC_START_NOALIGN(efi64_stub_entry)
 SYM_FUNC_END(efi64_stub_entry)
 #endif
 
-	.section ".rodata"
-	/* EFI loaded image protocol GUID */
-	.balign 4
-SYM_DATA_START_LOCAL(loaded_image_proto)
-	.long	0x5b1b31a1
-	.word	0x9562, 0x11d2
-	.byte	0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b
-SYM_DATA_END(loaded_image_proto)
-
 	.data
 	.balign	8
 SYM_DATA_START_LOCAL(efi32_boot_gdt)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 3af4a383615b..1cfe9802a42f 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32)
 
 #ifdef CONFIG_RELOCATABLE
 	leal	startup_32@GOTOFF(%edx), %ebx
-
-#ifdef CONFIG_EFI_STUB
-/*
- * If we were loaded via the EFI LoadImage service, startup_32() will be at an
- * offset to the start of the space allocated for the image. efi_pe_entry() will
- * set up image_offset to tell us where the image actually starts, so that we
- * can use the full available buffer.
- *	image_offset = startup_32 - image_base
- * Otherwise image_offset will be zero and has no effect on the calculations.
- */
-	subl    image_offset@GOTOFF(%edx), %ebx
-#endif
-
 	movl	BP_kernel_alignment(%esi), %eax
 	decl	%eax
 	addl    %eax, %ebx
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 28f46051c706..bf4a10a5794f 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -146,19 +146,6 @@ SYM_FUNC_START(startup_32)
 
 #ifdef CONFIG_RELOCATABLE
 	movl	%ebp, %ebx
-
-#ifdef CONFIG_EFI_STUB
-/*
- * If we were loaded via the EFI LoadImage service, startup_32 will be at an
- * offset to the start of the space allocated for the image. efi_pe_entry will
- * set up image_offset to tell us where the image actually starts, so that we
- * can use the full available buffer.
- *	image_offset = startup_32 - image_base
- * Otherwise image_offset will be zero and has no effect on the calculations.
- */
-	subl    rva(image_offset)(%ebp), %ebx
-#endif
-
 	movl	BP_kernel_alignment(%esi), %eax
 	decl	%eax
 	addl	%eax, %ebx
@@ -335,20 +322,6 @@ SYM_CODE_START(startup_64)
 	/* Start with the delta to where the kernel will run at. */
 #ifdef CONFIG_RELOCATABLE
 	leaq	startup_32(%rip) /* - $startup_32 */, %rbp
-
-#ifdef CONFIG_EFI_STUB
-/*
- * If we were loaded via the EFI LoadImage service, startup_32 will be at an
- * offset to the start of the space allocated for the image. efi_pe_entry will
- * set up image_offset to tell us where the image actually starts, so that we
- * can use the full available buffer.
- *	image_offset = startup_32 - image_base
- * Otherwise image_offset will be zero and has no effect on the calculations.
- */
-	movl    image_offset(%rip), %eax
-	subq	%rax, %rbp
-#endif
-
 	movl	BP_kernel_alignment(%rsi), %eax
 	decl	%eax
 	addq	%rax, %rbp
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 8b4be7cecdb8..b0994ae3bc23 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,6 +90,8 @@ static inline void efi_fpu_end(void)
 }
 
 #ifdef CONFIG_X86_32
+#define EFI_X86_KERNEL_ALLOC_LIMIT		(SZ_512M - 1)
+
 #define arch_efi_call_virt_setup()					\
 ({									\
 	efi_fpu_begin();						\
@@ -103,8 +105,7 @@ static inline void efi_fpu_end(void)
 })
 
 #else /* !CONFIG_X86_32 */
-
-#define EFI_LOADER_SIGNATURE	"EL64"
+#define EFI_X86_KERNEL_ALLOC_LIMIT		EFI_ALLOC_LIMIT
 
 extern asmlinkage u64 __efi_call(void *fp, ...);
 
@@ -218,6 +219,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
 
 #ifdef CONFIG_EFI_MIXED
 
+#define EFI_ALLOC_LIMIT		(efi_is_64bit() ? ULONG_MAX : U32_MAX)
+
 #define ARCH_HAS_EFISTUB_WRAPPERS
 
 static inline bool efi_is_64bit(void)
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index b97d239e18ea..5b4a1ce3d368 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -164,6 +164,7 @@ static __always_inline void sev_es_nmi_complete(void)
 		__sev_es_nmi_complete();
 }
 extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
+extern void sev_enable(struct boot_params *bp);
 
 static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
 {
@@ -218,6 +219,7 @@ static inline void sev_es_ist_exit(void) { }
 static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
 static inline void sev_es_nmi_complete(void) { }
 static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
+static inline void sev_enable(struct boot_params *bp) { }
 static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
 static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
 static inline void setup_ghcb(void) { }
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index b4685da2b8d5..e976288728e9 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -15,17 +15,14 @@
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/boot.h>
+#include <asm/kaslr.h>
 #include <asm/sev.h>
 
 #include "efistub.h"
 #include "x86-stub.h"
 
-/* Maximum physical address for 64-bit kernel with 4-level paging */
-#define MAXMEM_X86_64_4LEVEL (1ull << 46)
-
 const efi_system_table_t *efi_system_table;
 const efi_dxe_services_table_t *efi_dxe_table;
-u32 image_offset __section(".data");
 static efi_loaded_image_t *image = NULL;
 static efi_memory_attribute_protocol_t *memattr;
 
@@ -287,28 +284,6 @@ void efi_adjust_memory_range_protection(unsigned long start,
 	}
 }
 
-extern const u8 startup_32[], startup_64[];
-
-static void
-setup_memory_protection(unsigned long image_base, unsigned long image_size)
-{
-#ifdef CONFIG_64BIT
-	if (image_base != (unsigned long)startup_32)
-		efi_adjust_memory_range_protection(image_base, image_size);
-#else
-	/*
-	 * Clear protection flags on a whole range of possible
-	 * addresses used for KASLR. We don't need to do that
-	 * on x86_64, since KASLR/extraction is performed after
-	 * dedicated identity page tables are built and we only
-	 * need to remove possible protection on relocated image
-	 * itself disregarding further relocations.
-	 */
-	efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
-					   KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
-#endif
-}
-
 static void setup_unaccepted_memory(void)
 {
 	efi_guid_t mem_acceptance_proto = OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID;
@@ -334,9 +309,7 @@ static void setup_unaccepted_memory(void)
 
 static const efi_char16_t apple[] = L"Apple";
 
-static void setup_quirks(struct boot_params *boot_params,
-			 unsigned long image_base,
-			 unsigned long image_size)
+static void setup_quirks(struct boot_params *boot_params)
 {
 	efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
 		efi_table_attr(efi_system_table, fw_vendor);
@@ -345,9 +318,6 @@ static void setup_quirks(struct boot_params *boot_params,
 		if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
 			retrieve_apple_device_properties(boot_params);
 	}
-
-	if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES))
-		setup_memory_protection(image_base, image_size);
 }
 
 /*
@@ -500,7 +470,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
 	}
 
 	image_base = efi_table_attr(image, image_base);
-	image_offset = (void *)startup_32 - image_base;
 
 	status = efi_allocate_pages(sizeof(struct boot_params),
 				    (unsigned long *)&boot_params, ULONG_MAX);
@@ -804,6 +773,61 @@ static bool have_unsupported_snp_features(void)
 	return false;
 }
 
+static void efi_get_seed(void *seed, int size)
+{
+	efi_get_random_bytes(size, seed);
+
+	/*
+	 * This only updates seed[0] when running on 32-bit, but in that case,
+	 * seed[1] is not used anyway, as there is no virtual KASLR on 32-bit.
+	 */
+	*(unsigned long *)seed ^= kaslr_get_random_long("EFI");
+}
+
+static void error(char *str)
+{
+	efi_warn("Decompression failed: %s\n", str);
+}
+
+static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
+{
+	unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+	unsigned long addr, alloc_size, entry;
+	efi_status_t status;
+	u32 seed[2] = {};
+
+	/* determine the required size of the allocation */
+	alloc_size = ALIGN(max_t(unsigned long, output_len, kernel_total_size),
+			   MIN_KERNEL_ALIGN);
+
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) {
+		u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size;
+
+		efi_get_seed(seed, sizeof(seed));
+
+		virt_addr += (range * seed[1]) >> 32;
+		virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1);
+	}
+
+	status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr,
+				  seed[0], EFI_LOADER_CODE,
+				  EFI_X86_KERNEL_ALLOC_LIMIT);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	entry = decompress_kernel((void *)addr, virt_addr, error);
+	if (entry == ULONG_MAX) {
+		efi_free(alloc_size, addr);
+		return EFI_LOAD_ERROR;
+	}
+
+	*kernel_entry = addr + entry;
+
+	efi_adjust_memory_range_protection(addr, kernel_total_size);
+
+	return EFI_SUCCESS;
+}
+
 static void __noreturn enter_kernel(unsigned long kernel_addr,
 				    struct boot_params *boot_params)
 {
@@ -823,10 +847,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
 			       struct boot_params *boot_params)
 {
 	efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID;
-	unsigned long bzimage_addr = (unsigned long)startup_32;
-	unsigned long buffer_start, buffer_end;
 	struct setup_header *hdr = &boot_params->hdr;
 	const struct linux_efi_initrd *initrd = NULL;
+	unsigned long kernel_entry;
 	efi_status_t status;
 
 	efi_system_table = sys_table_arg;
@@ -855,60 +878,6 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
 		goto fail;
 	}
 
-	/*
-	 * If the kernel isn't already loaded at a suitable address,
-	 * relocate it.
-	 *
-	 * It must be loaded above LOAD_PHYSICAL_ADDR.
-	 *
-	 * The maximum address for 64-bit is 1 << 46 for 4-level paging. This
-	 * is defined as the macro MAXMEM, but unfortunately that is not a
-	 * compile-time constant if 5-level paging is configured, so we instead
-	 * define our own macro for use here.
-	 *
-	 * For 32-bit, the maximum address is complicated to figure out, for
-	 * now use KERNEL_IMAGE_SIZE, which will be 512MiB, the same as what
-	 * KASLR uses.
-	 *
-	 * Also relocate it if image_offset is zero, i.e. the kernel wasn't
-	 * loaded by LoadImage, but rather by a bootloader that called the
-	 * handover entry. The reason we must always relocate in this case is
-	 * to handle the case of systemd-boot booting a unified kernel image,
-	 * which is a PE executable that contains the bzImage and an initrd as
-	 * COFF sections. The initrd section is placed after the bzImage
-	 * without ensuring that there are at least init_size bytes available
-	 * for the bzImage, and thus the compressed kernel's startup code may
-	 * overwrite the initrd unless it is moved out of the way.
-	 */
-
-	buffer_start = ALIGN(bzimage_addr - image_offset,
-			     hdr->kernel_alignment);
-	buffer_end = buffer_start + hdr->init_size;
-
-	if ((buffer_start < LOAD_PHYSICAL_ADDR)				     ||
-	    (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE)    ||
-	    (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) ||
-	    (image_offset == 0)) {
-		extern char _bss[];
-
-		status = efi_relocate_kernel(&bzimage_addr,
-					     (unsigned long)_bss - bzimage_addr,
-					     hdr->init_size,
-					     hdr->pref_address,
-					     hdr->kernel_alignment,
-					     LOAD_PHYSICAL_ADDR);
-		if (status != EFI_SUCCESS) {
-			efi_err("efi_relocate_kernel() failed!\n");
-			goto fail;
-		}
-		/*
-		 * Now that we've copied the kernel elsewhere, we no longer
-		 * have a set up block before startup_32(), so reset image_offset
-		 * to zero in case it was set earlier.
-		 */
-		image_offset = 0;
-	}
-
 #ifdef CONFIG_CMDLINE_BOOL
 	status = efi_parse_options(CONFIG_CMDLINE);
 	if (status != EFI_SUCCESS) {
@@ -926,6 +895,12 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
 		}
 	}
 
+	status = efi_decompress_kernel(&kernel_entry);
+	if (status != EFI_SUCCESS) {
+		efi_err("Failed to decompress kernel\n");
+		goto fail;
+	}
+
 	/*
 	 * At this point, an initrd may already have been loaded by the
 	 * bootloader and passed via bootparams. We permit an initrd loaded
@@ -965,7 +940,7 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
 
 	setup_efi_pci(boot_params);
 
-	setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start);
+	setup_quirks(boot_params);
 
 	setup_unaccepted_memory();
 
@@ -975,12 +950,15 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
 		goto fail;
 	}
 
+	/*
+	 * Call the SEV init code while still running with the firmware's
+	 * GDT/IDT, so #VC exceptions will be handled by EFI.
+	 */
+	sev_enable(boot_params);
+
 	efi_5level_switch();
 
-	if (IS_ENABLED(CONFIG_X86_64))
-		bzimage_addr += startup_64 - startup_32;
-
-	enter_kernel(bzimage_addr, boot_params);
+	enter_kernel(kernel_entry, boot_params);
 fail:
 	efi_err("efi_stub_entry() failed!\n");
 

From 200ee464f7a9a4e8d7a51a2083b28cf240e7cb91 Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Mon, 7 Aug 2023 21:41:27 +0800
Subject: [PATCH 0839/2047] regulator: rpi-panel-attiny-regulator: Remove
 redundant of_match_ptr()

The driver depends on CONFIG_OF, so it is not necessary to use
of_match_ptr() here, and __maybe_unused can also be removed.

Even for drivers that do not depend on CONFIG_OF, it's almost always
better to leave out the of_match_ptr(), since the only thing it can
possibly do is to save a few bytes of .text if a driver can be used both
with and without it.

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Link: https://lore.kernel.org/r/20230807134127.2380390-1-ruanjinjie@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rpi-panel-attiny-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index e9719a378a0b..ed046cc998c6 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -397,7 +397,7 @@ static struct i2c_driver attiny_regulator_driver = {
 	.driver = {
 		.name = "rpi_touchscreen_attiny",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
-		.of_match_table = of_match_ptr(attiny_dt_ids),
+		.of_match_table = attiny_dt_ids,
 	},
 	.probe = attiny_i2c_probe,
 	.remove	= attiny_i2c_remove,

From e0a99a839f04c90bf9f16919997c4b34f9c8f1f0 Mon Sep 17 00:00:00 2001
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
Date: Mon, 15 May 2023 18:20:38 +0200
Subject: [PATCH 0840/2047] Documentation: core-api/cpuhotplug: Fix state names

Dynamic allocated hotplug states in documentation and the comment above
cpuhp_state enum do not match the code. To not get confused by wrong
documentation, change to proper state names.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20230515162038.62703-1-anna-maria@linutronix.de
---
 Documentation/core-api/cpu_hotplug.rst | 10 +++++-----
 include/linux/cpuhotplug.h             |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst
index e6f5bc39cf5c..b9ae591d0b18 100644
--- a/Documentation/core-api/cpu_hotplug.rst
+++ b/Documentation/core-api/cpu_hotplug.rst
@@ -395,8 +395,8 @@ multi-instance state the following function is available:
 * cpuhp_setup_state_multi(state, name, startup, teardown)
 
 The @state argument is either a statically allocated state or one of the
-constants for dynamically allocated states - CPUHP_PREPARE_DYN,
-CPUHP_ONLINE_DYN - depending on the state section (PREPARE, ONLINE) for
+constants for dynamically allocated states - CPUHP_BP_PREPARE_DYN,
+CPUHP_AP_ONLINE_DYN - depending on the state section (PREPARE, ONLINE) for
 which a dynamic state should be allocated.
 
 The @name argument is used for sysfs output and for instrumentation. The
@@ -588,7 +588,7 @@ notifications on online and offline operations::
 Setup and teardown a dynamically allocated state in the ONLINE section
 for notifications on offline operations::
 
-   state = cpuhp_setup_state(CPUHP_ONLINE_DYN, "subsys:offline", NULL, subsys_cpu_offline);
+   state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "subsys:offline", NULL, subsys_cpu_offline);
    if (state < 0)
        return state;
    ....
@@ -597,7 +597,7 @@ for notifications on offline operations::
 Setup and teardown a dynamically allocated state in the ONLINE section
 for notifications on online operations without invoking the callbacks::
 
-   state = cpuhp_setup_state_nocalls(CPUHP_ONLINE_DYN, "subsys:online", subsys_cpu_online, NULL);
+   state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "subsys:online", subsys_cpu_online, NULL);
    if (state < 0)
        return state;
    ....
@@ -606,7 +606,7 @@ for notifications on online operations without invoking the callbacks::
 Setup, use and teardown a dynamically allocated multi-instance state in the
 ONLINE section for notifications on online and offline operation::
 
-   state = cpuhp_setup_state_multi(CPUHP_ONLINE_DYN, "subsys:online", subsys_cpu_online, subsys_cpu_offline);
+   state = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "subsys:online", subsys_cpu_online, subsys_cpu_offline);
    if (state < 0)
        return state;
    ....
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 25b6e6e6ba6b..06dda85f0424 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -48,7 +48,7 @@
  *    same section.
  *
  * If neither #1 nor #2 apply, please use the dynamic state space when
- * setting up a state by using CPUHP_PREPARE_DYN or CPUHP_PREPARE_ONLINE
+ * setting up a state by using CPUHP_BP_PREPARE_DYN or CPUHP_AP_ONLINE_DYN
  * for the @state argument of the setup function.
  *
  * See Documentation/core-api/cpu_hotplug.rst for further information and

From 2ca03ecc9c8556e913aef6f381154721dec4e44b Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Mon, 7 Aug 2023 09:16:21 +0900
Subject: [PATCH 0841/2047] spi: dw: Set default value if reg-io-width isn't
 specified

According to the dt-bindings, the default value of reg-io-width is 4.
However, the value becomes zero when reg-io-width isn't specified.

Should set the actual value to dws->reg_io_width, considering it
referenced.

Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Link: https://lore.kernel.org/r/20230807001621.196776-1-hayashi.kunihiko@socionext.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-dw-mmio.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
index 3a31ad74a538..9e041a28ed47 100644
--- a/drivers/spi/spi-dw-mmio.c
+++ b/drivers/spi/spi-dw-mmio.c
@@ -369,7 +369,9 @@ static int dw_spi_mmio_probe(struct platform_device *pdev)
 
 	dws->max_freq = clk_get_rate(dwsmmio->clk);
 
-	device_property_read_u32(&pdev->dev, "reg-io-width", &dws->reg_io_width);
+	if (device_property_read_u32(&pdev->dev, "reg-io-width",
+				     &dws->reg_io_width))
+		dws->reg_io_width = 4;
 
 	num_cs = 4;
 

From 9b966639b0cc742a9c4b6329a8e27128e4424cf1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 8 Aug 2023 15:46:28 +0200
Subject: [PATCH 0842/2047] regulator: tps65910: Drop useless header

The TPS65910 includes the legacy header <linux/gpio.h> for no
reason, drop the include.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230808-descriptors-regulator-v1-1-939b5e84dd18@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/tps65910-regulator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
index 2a0965ba1570..3a3027e0b94e 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -17,7 +17,6 @@
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/slab.h>
-#include <linux/gpio.h>
 #include <linux/mfd/tps65910.h>
 #include <linux/regulator/of_regulator.h>
 

From d0d58fe27b344fb6d0edb5fd2038372b5e5ed95b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 8 Aug 2023 15:46:29 +0200
Subject: [PATCH 0843/2047] regulator: s2mpa01: Drop useless header

The TPS65910 includes the legacy header <linux/gpio.h> for no
reason, drop the include.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230808-descriptors-regulator-v1-2-939b5e84dd18@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/s2mpa01.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/s2mpa01.c b/drivers/regulator/s2mpa01.c
index b147ff6a16b1..c22fdde67f9c 100644
--- a/drivers/regulator/s2mpa01.c
+++ b/drivers/regulator/s2mpa01.c
@@ -5,7 +5,6 @@
 
 #include <linux/bug.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/of.h>

From 052eff402fb754f3472833cb679ceef954ebf2a0 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 8 Aug 2023 15:46:30 +0200
Subject: [PATCH 0844/2047] regulator: rpi-panel-attiny: Drop useless header

The RPI panel regulator driver includes the legacy header
<linux/gpio.h> for no reason, this is a driver and <linux/gpio/driver.h>
is already included. Drop the include.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230808-descriptors-regulator-v1-3-939b5e84dd18@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rpi-panel-attiny-regulator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c
index ed046cc998c6..f52c3d47ecea 100644
--- a/drivers/regulator/rpi-panel-attiny-regulator.c
+++ b/drivers/regulator/rpi-panel-attiny-regulator.c
@@ -7,7 +7,6 @@
 
 #include <linux/backlight.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/gpio/driver.h>
 #include <linux/i2c.h>
 #include <linux/init.h>

From 2f26d97863f05b83b8f7872aff81ecb9d6b76b50 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 8 Aug 2023 15:46:32 +0200
Subject: [PATCH 0845/2047] regulator: rc5t583: Drop useless header

The RC5T583 includes the legacy header <linux/gpio.h> for no
reason, drop the include.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230808-descriptors-regulator-v1-5-939b5e84dd18@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rc5t583-regulator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/rc5t583-regulator.c b/drivers/regulator/rc5t583-regulator.c
index a5afca73715d..a25a141e86c4 100644
--- a/drivers/regulator/rc5t583-regulator.c
+++ b/drivers/regulator/rc5t583-regulator.c
@@ -16,7 +16,6 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
-#include <linux/gpio.h>
 #include <linux/mfd/rc5t583.h>
 
 struct rc5t583_regulator_info {

From 4eb351fb89d68efeaca3625dccbbf492f5450801 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 8 Aug 2023 15:46:33 +0200
Subject: [PATCH 0846/2047] regulator: mt6311: Drop useless header

The mt6311 includes the legacy header <linux/gpio.h> for no
reason, drop the include.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230808-descriptors-regulator-v1-6-939b5e84dd18@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mt6311-regulator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/mt6311-regulator.c b/drivers/regulator/mt6311-regulator.c
index b0771770cc26..63a51485f2cc 100644
--- a/drivers/regulator/mt6311-regulator.c
+++ b/drivers/regulator/mt6311-regulator.c
@@ -4,7 +4,6 @@
 // Author: Henry Chen <henryc.chen@mediatek.com>
 
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>

From 2e903eac35ec0ea1f44af5a53d87d98309295fc3 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 8 Aug 2023 15:46:34 +0200
Subject: [PATCH 0847/2047] regulator: mcp16502: Drop useless header

The mcp16502 regulator driver includes the legacy header
<linux/gpio.h> for no reason, it is already using the proper
<linux/gpio/consumer.h> include. Drop the include.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230808-descriptors-regulator-v1-7-939b5e84dd18@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mcp16502.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/mcp16502.c b/drivers/regulator/mcp16502.c
index 6c6f5a21362b..7a472a2ef48a 100644
--- a/drivers/regulator/mcp16502.c
+++ b/drivers/regulator/mcp16502.c
@@ -8,7 +8,6 @@
 //
 // Inspired from tps65086-regulator.c
 
-#include <linux/gpio.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/kernel.h>

From d150c73aa233d6469392282ef648dba5fd4b4821 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 8 Aug 2023 15:46:35 +0200
Subject: [PATCH 0848/2047] regulator: max20086: Drop useless header

The max20086 regulator driver includes the legacy header
<linux/gpio.h> for no reason, it is already using the proper
<linux/gpio/consumer.h> include. Drop the include.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230808-descriptors-regulator-v1-8-939b5e84dd18@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max20086-regulator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c
index fad31f5f435e..32f47b896fd1 100644
--- a/drivers/regulator/max20086-regulator.c
+++ b/drivers/regulator/max20086-regulator.c
@@ -6,7 +6,6 @@
 // Copyright (C) 2018 Avnet, Inc.
 
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/module.h>

From e4d48f64fcd469feeb09fc452f8cd1dfc00b43f6 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 8 Aug 2023 15:46:36 +0200
Subject: [PATCH 0849/2047] regulator: lp8755: Drop useless header

The lp8755 includes the legacy header <linux/gpio.h> for no
reason, drop the include.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230808-descriptors-regulator-v1-9-939b5e84dd18@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/lp8755.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/lp8755.c b/drivers/regulator/lp8755.c
index 4bc310f972ed..8d01e18046f3 100644
--- a/drivers/regulator/lp8755.c
+++ b/drivers/regulator/lp8755.c
@@ -13,7 +13,6 @@
 #include <linux/err.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <linux/gpio.h>
 #include <linux/regmap.h>
 #include <linux/uaccess.h>
 #include <linux/regulator/driver.h>

From a5c9a1444088099c6d52939ed2f34049d5d00b5f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 8 Aug 2023 15:46:37 +0200
Subject: [PATCH 0850/2047] regulator: bd71828: Drop useless header

The bd71828 includes the legacy header <linux/gpio.h> for no
reason, drop the include. The documentation mentions GPIO but there
is no usage of the GPIO namespace.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230808-descriptors-regulator-v1-10-939b5e84dd18@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd71828-regulator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/bd71828-regulator.c b/drivers/regulator/bd71828-regulator.c
index f3205dc9d4fc..08d4ee369287 100644
--- a/drivers/regulator/bd71828-regulator.c
+++ b/drivers/regulator/bd71828-regulator.c
@@ -5,7 +5,6 @@
 
 #include <linux/delay.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/mfd/rohm-bd71828.h>

From f321708da4db6b15a8691dc64b2d5169234937bc Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 8 Aug 2023 15:46:38 +0200
Subject: [PATCH 0851/2047] regulator: bd71815: Drop useless header

The bd71815 regulator driver includes the legacy header
<linux/gpio.h> for no reason, it is already using the proper
<linux/gpio/consumer.h> include. Drop the include.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230808-descriptors-regulator-v1-11-939b5e84dd18@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd71815-regulator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/bd71815-regulator.c b/drivers/regulator/bd71815-regulator.c
index 475b1e0110e7..26192d55a685 100644
--- a/drivers/regulator/bd71815-regulator.c
+++ b/drivers/regulator/bd71815-regulator.c
@@ -18,7 +18,6 @@
 #include <linux/regulator/driver.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <linux/gpio.h>
 #include <linux/mfd/rohm-generic.h>
 #include <linux/mfd/rohm-bd71815.h>
 #include <linux/regulator/of_regulator.h>

From 80347cd515ca149f1da31786ec9a59b0dfd1e579 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 4 Aug 2023 09:58:53 +0200
Subject: [PATCH 0852/2047] x86/microcode: Remove microcode_mutex

microcode_mutex is only used by reload_store(). It has a comment saying
"to synchronize with each other". Other user of this mutex have been
removed in the commits

  181b6f40e9ea8 ("x86/microcode: Rip out the OLD_INTERFACE").
  b6f86689d5b74 ("x86/microcode: Rip out the subsys interface gunk")

The sysfs interface does not need additional synchronisation vs itself
because it is provided as kernfs_ops::mutex which is acquired in
kernfs_fop_write_iter().

Remove the superfluous microcode_mutex.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Sohil Mehta <sohil.mehta@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20230804075853.JF_n6GXC@linutronix.de
---
 arch/x86/kernel/cpu/common.c         | 3 +--
 arch/x86/kernel/cpu/microcode/core.c | 6 ------
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 52683fddafaf..06015f304cb8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2276,8 +2276,7 @@ void store_cpu_caps(struct cpuinfo_x86 *curr_info)
  * @prev_info:	CPU capabilities stored before an update.
  *
  * The microcode loader calls this upon late microcode load to recheck features,
- * only when microcode has been updated. Caller holds microcode_mutex and CPU
- * hotplug lock.
+ * only when microcode has been updated. Caller holds and CPU hotplug lock.
  *
  * Return: None
  */
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 192adf5f948e..c9a53e330df3 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -54,15 +54,12 @@ LIST_HEAD(microcode_cache);
  *
  * All non cpu-hotplug-callback call sites use:
  *
- * - microcode_mutex to synchronize with each other;
  * - cpus_read_lock/unlock() to synchronize with
  *   the cpu-hotplug-callback call sites.
  *
  * We guarantee that only a single cpu is being
  * updated at any particular moment of time.
  */
-static DEFINE_MUTEX(microcode_mutex);
-
 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
 
 struct cpu_info_ctx {
@@ -488,10 +485,7 @@ static ssize_t reload_store(struct device *dev,
 	if (tmp_ret != UCODE_NEW)
 		goto put;
 
-	mutex_lock(&microcode_mutex);
 	ret = microcode_reload_late();
-	mutex_unlock(&microcode_mutex);
-
 put:
 	cpus_read_unlock();
 

From 021a160abf62c19aff36c920566efb4f690e964a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 8 Aug 2023 19:26:35 +0200
Subject: [PATCH 0853/2047] fs: use __fput_sync in close(2)

close(2) is a special case which guarantees a shallow kernel stack,
making delegation to task_work machinery unnecessary. Said delegation is
problematic as it involves atomic ops and interrupt masking trips, none
of which are cheap on x86-64. Forcing close(2) to do it looks like an
oversight in the original work.

Moreover presence of CONFIG_RSEQ adds an additional overhead as fput()
-> task_work_add(..., TWA_RESUME) -> set_notify_resume() makes the
thread returning to userspace land in resume_user_mode_work(), where
rseq_handle_notify_resume takes a SMAP round-trip if rseq is enabled for
the thread (and it is by default with contemporary glibc).

Sample result when benchmarking open1_processes -t 1 from will-it-scale
(that's an open + close loop) + tmpfs on /tmp, running on the Sapphire
Rapid CPU (ops/s):
stock+RSEQ:     1329857
stock-RSEQ:     1421667 (+7%)
patched:        1523521 (+14.5% / +7%) (with / without rseq)

Patched result is the same regardless of rseq as the codepath is avoided.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/file_table.c |  5 +----
 fs/open.c       | 27 ++++++++++++++++++++++++---
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index fc7d677ff5ad..ee21b3da9d08 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -461,11 +461,8 @@ void fput(struct file *file)
  */
 void __fput_sync(struct file *file)
 {
-	if (atomic_long_dec_and_test(&file->f_count)) {
-		struct task_struct *task = current;
-		BUG_ON(!(task->flags & PF_KTHREAD));
+	if (atomic_long_dec_and_test(&file->f_count))
 		__fput(file);
-	}
 }
 
 EXPORT_SYMBOL(fput);
diff --git a/fs/open.c b/fs/open.c
index 0c55c8e7f837..455fdddbe9aa 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1503,7 +1503,7 @@ SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
  * "id" is the POSIX thread ID. We use the
  * files pointer for this..
  */
-int filp_close(struct file *filp, fl_owner_t id)
+static int filp_flush(struct file *filp, fl_owner_t id)
 {
 	int retval = 0;
 
@@ -1520,10 +1520,18 @@ int filp_close(struct file *filp, fl_owner_t id)
 		dnotify_flush(filp, id);
 		locks_remove_posix(filp, id);
 	}
-	fput(filp);
 	return retval;
 }
 
+int filp_close(struct file *filp, fl_owner_t id)
+{
+	int retval;
+
+	retval = filp_flush(filp, id);
+	fput(filp);
+
+	return retval;
+}
 EXPORT_SYMBOL(filp_close);
 
 /*
@@ -1533,7 +1541,20 @@ EXPORT_SYMBOL(filp_close);
  */
 SYSCALL_DEFINE1(close, unsigned int, fd)
 {
-	int retval = close_fd(fd);
+	int retval;
+	struct file *file;
+
+	file = close_fd_get_file(fd);
+	if (!file)
+		return -EBADF;
+
+	retval = filp_flush(file, current->files);
+
+	/*
+	 * We're returning to user space. Don't bother
+	 * with any delayed fput() cases.
+	 */
+	__fput_sync(file);
 
 	/* can't restart close syscall because file table entry was cleared */
 	if (unlikely(retval == -ERESTARTSYS ||

From c95e7c05c139b1a8a51d368bde57cf20ce931a98 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Date: Mon, 7 Aug 2023 12:23:54 +0200
Subject: [PATCH 0854/2047] kunit: Report the count of test suites in a module

According to KTAP specification[1], results should always start from a
header that provides a TAP protocol version, followed by a test plan with
a count of items to be executed.  That pattern should be followed at each
nesting level.  In the current implementation of the top-most, i.e., test
suite level, those rules apply only for test suites built into the kernel,
executed and reported on boot.  Results submitted to dmesg from kunit test
modules loaded later are missing those top-level headers.

As a consequence, if a kunit test module provides more than one test suite
then, without the top level test plan, external tools that are parsing
dmesg for kunit test output are not able to tell how many test suites
should be expected and whether to continue parsing after complete output
from the first test suite is collected.

Submit the top-level headers also from the kunit test module notifier
initialization callback.

v3: Fix new name of a structure moved to kunit namespace not updated in
    executor_test functions (lkp@intel.com).
v2: Use kunit_exec_run_tests() (Mauro, Rae), but prevent it from
    emitting the headers when called on load of non-test modules.

[1] https://docs.kernel.org/dev-tools/ktap.html#

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Rae Moar <rmoar@google.com>
Reviewed-by: Rae Moar <rmoar@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h      |  8 ++++++++
 lib/kunit/executor.c      | 42 +++++++++++++++++++++------------------
 lib/kunit/executor_test.c | 36 ++++++++++++++++++++++-----------
 lib/kunit/test.c          |  6 +++++-
 4 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/include/kunit/test.h b/include/kunit/test.h
index 011e0d6bb506..3d002e6b252f 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -256,6 +256,12 @@ struct kunit_suite {
 	int suite_init_err;
 };
 
+/* Stores an array of suites, end points one past the end */
+struct kunit_suite_set {
+	struct kunit_suite * const *start;
+	struct kunit_suite * const *end;
+};
+
 /**
  * struct kunit - represents a running instance of a test.
  *
@@ -317,6 +323,8 @@ int __kunit_test_suites_init(struct kunit_suite * const * const suites, int num_
 
 void __kunit_test_suites_exit(struct kunit_suite **suites, int num_suites);
 
+void kunit_exec_run_tests(struct kunit_suite_set *suite_set, bool builtin);
+
 #if IS_BUILTIN(CONFIG_KUNIT)
 int kunit_run_all_tests(void);
 #else
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index dc295150c4e5..5ef90c334eb0 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -104,13 +104,7 @@ kunit_filter_glob_tests(const struct kunit_suite *const suite, const char *test_
 static char *kunit_shutdown;
 core_param(kunit_shutdown, kunit_shutdown, charp, 0644);
 
-/* Stores an array of suites, end points one past the end */
-struct suite_set {
-	struct kunit_suite * const *start;
-	struct kunit_suite * const *end;
-};
-
-static void kunit_free_suite_set(struct suite_set suite_set)
+static void kunit_free_suite_set(struct kunit_suite_set suite_set)
 {
 	struct kunit_suite * const *suites;
 
@@ -119,16 +113,17 @@ static void kunit_free_suite_set(struct suite_set suite_set)
 	kfree(suite_set.start);
 }
 
-static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,
-					    const char *filter_glob,
-						char *filters,
-						char *filter_action,
-					    int *err)
+static struct kunit_suite_set
+kunit_filter_suites(const struct kunit_suite_set *suite_set,
+		    const char *filter_glob,
+		    char *filters,
+		    char *filter_action,
+		    int *err)
 {
 	int i, j, k;
 	int filter_count = 0;
 	struct kunit_suite **copy, **copy_start, *filtered_suite, *new_filtered_suite;
-	struct suite_set filtered = {NULL, NULL};
+	struct kunit_suite_set filtered = {NULL, NULL};
 	struct kunit_glob_filter parsed_glob;
 	struct kunit_attr_filter *parsed_filters = NULL;
 
@@ -230,17 +225,24 @@ static void kunit_handle_shutdown(void)
 
 }
 
-static void kunit_exec_run_tests(struct suite_set *suite_set)
+#endif
+
+void kunit_exec_run_tests(struct kunit_suite_set *suite_set, bool builtin)
 {
 	size_t num_suites = suite_set->end - suite_set->start;
 
-	pr_info("KTAP version 1\n");
-	pr_info("1..%zu\n", num_suites);
+	if (builtin || num_suites) {
+		pr_info("KTAP version 1\n");
+		pr_info("1..%zu\n", num_suites);
+	}
 
 	__kunit_test_suites_init(suite_set->start, num_suites);
 }
 
-static void kunit_exec_list_tests(struct suite_set *suite_set, bool include_attr)
+#if IS_BUILTIN(CONFIG_KUNIT)
+
+static void kunit_exec_list_tests(struct kunit_suite_set *suite_set,
+				  bool include_attr)
 {
 	struct kunit_suite * const *suites;
 	struct kunit_case *test_case;
@@ -265,7 +267,9 @@ static void kunit_exec_list_tests(struct suite_set *suite_set, bool include_attr
 
 int kunit_run_all_tests(void)
 {
-	struct suite_set suite_set = {__kunit_suites_start, __kunit_suites_end};
+	struct kunit_suite_set suite_set = {
+		__kunit_suites_start, __kunit_suites_end,
+	};
 	int err = 0;
 	if (!kunit_enabled()) {
 		pr_info("kunit: disabled\n");
@@ -282,7 +286,7 @@ int kunit_run_all_tests(void)
 	}
 
 	if (!action_param)
-		kunit_exec_run_tests(&suite_set);
+		kunit_exec_run_tests(&suite_set, true);
 	else if (strcmp(action_param, "list") == 0)
 		kunit_exec_list_tests(&suite_set, false);
 	else if (strcmp(action_param, "list_attr") == 0)
diff --git a/lib/kunit/executor_test.c b/lib/kunit/executor_test.c
index 3e0a1c99cb4e..4084071d0eb5 100644
--- a/lib/kunit/executor_test.c
+++ b/lib/kunit/executor_test.c
@@ -43,8 +43,10 @@ static void parse_filter_test(struct kunit *test)
 static void filter_suites_test(struct kunit *test)
 {
 	struct kunit_suite *subsuite[3] = {NULL, NULL};
-	struct suite_set suite_set = {.start = subsuite, .end = &subsuite[2]};
-	struct suite_set got;
+	struct kunit_suite_set suite_set = {
+		.start = subsuite, .end = &subsuite[2],
+	};
+	struct kunit_suite_set got;
 	int err = 0;
 
 	subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases);
@@ -67,8 +69,10 @@ static void filter_suites_test(struct kunit *test)
 static void filter_suites_test_glob_test(struct kunit *test)
 {
 	struct kunit_suite *subsuite[3] = {NULL, NULL};
-	struct suite_set suite_set = {.start = subsuite, .end = &subsuite[2]};
-	struct suite_set got;
+	struct kunit_suite_set suite_set = {
+		.start = subsuite, .end = &subsuite[2],
+	};
+	struct kunit_suite_set got;
 	int err = 0;
 
 	subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases);
@@ -94,8 +98,10 @@ static void filter_suites_test_glob_test(struct kunit *test)
 static void filter_suites_to_empty_test(struct kunit *test)
 {
 	struct kunit_suite *subsuite[3] = {NULL, NULL};
-	struct suite_set suite_set = {.start = subsuite, .end = &subsuite[2]};
-	struct suite_set got;
+	struct kunit_suite_set suite_set = {
+		.start = subsuite, .end = &subsuite[2],
+	};
+	struct kunit_suite_set got;
 	int err = 0;
 
 	subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases);
@@ -144,8 +150,10 @@ static struct kunit_case dummy_attr_test_cases[] = {
 static void filter_attr_test(struct kunit *test)
 {
 	struct kunit_suite *subsuite[3] = {NULL, NULL};
-	struct suite_set suite_set = {.start = subsuite, .end = &subsuite[2]};
-	struct suite_set got;
+	struct kunit_suite_set suite_set = {
+		.start = subsuite, .end = &subsuite[2],
+	};
+	struct kunit_suite_set got;
 	int err = 0;
 
 	subsuite[0] = alloc_fake_suite(test, "normal_suite", dummy_attr_test_cases);
@@ -179,8 +187,10 @@ static void filter_attr_test(struct kunit *test)
 static void filter_attr_empty_test(struct kunit *test)
 {
 	struct kunit_suite *subsuite[3] = {NULL, NULL};
-	struct suite_set suite_set = {.start = subsuite, .end = &subsuite[2]};
-	struct suite_set got;
+	struct kunit_suite_set suite_set = {
+		.start = subsuite, .end = &subsuite[2],
+	};
+	struct kunit_suite_set got;
 	int err = 0;
 
 	subsuite[0] = alloc_fake_suite(test, "suite1", dummy_attr_test_cases);
@@ -197,8 +207,10 @@ static void filter_attr_empty_test(struct kunit *test)
 static void filter_attr_skip_test(struct kunit *test)
 {
 	struct kunit_suite *subsuite[2] = {NULL};
-	struct suite_set suite_set = {.start = subsuite, .end = &subsuite[1]};
-	struct suite_set got;
+	struct kunit_suite_set suite_set = {
+		.start = subsuite, .end = &subsuite[1],
+	};
+	struct kunit_suite_set got;
 	int err = 0;
 
 	subsuite[0] = alloc_fake_suite(test, "suite", dummy_attr_test_cases);
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index cb9797fa6303..8b2808068497 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -736,7 +736,11 @@ EXPORT_SYMBOL_GPL(__kunit_test_suites_exit);
 #ifdef CONFIG_MODULES
 static void kunit_module_init(struct module *mod)
 {
-	__kunit_test_suites_init(mod->kunit_suites, mod->num_kunit_suites);
+	struct kunit_suite_set suite_set = {
+		mod->kunit_suites, mod->kunit_suites + mod->num_kunit_suites,
+	};
+
+	kunit_exec_run_tests(&suite_set, false);
 }
 
 static void kunit_module_exit(struct module *mod)

From 18258c60f8a74b9c39b593d118f13f3265d44cd6 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Date: Mon, 7 Aug 2023 12:23:55 +0200
Subject: [PATCH 0855/2047] kunit: Make 'list' action available to kunit test
 modules

Results from kunit tests reported via dmesg may be interleaved with other
kernel messages.  When parsing dmesg for modular kunit results in real
time, external tools, e.g., Intel GPU tools (IGT), may want to insert
their own test name markers into dmesg at the start of each test, before
any kernel message related to that test appears there, so existing upper
level test result parsers have no doubt which test to blame for a specific
kernel message.  Unfortunately, kunit reports names of tests only at their
completion (with the exeption of a not standarized "# Subtest: <name>"
header above a test plan of each test suite or parametrized test).

External tools could be able to insert their own "start of the test"
markers with test names included if they new those names in advance.
Test names could be learned from a list if provided by a kunit test
module.

There exists a feature of listing kunit tests without actually executing
them, but it is now limited to configurations with the kunit module built
in and covers only built-in tests, already available at boot time.
Moreover, switching from list to normal mode requires reboot.  If that
feature was also available when kunit is built as a module, userspace
could load the module with action=list parameter, load some kunit test
modules they are interested in and learn about the list of tests provided
by those modules, then unload them, reload the kunit module in normal mode
and execute the tests with their lists already known.

Extend kunit module notifier initialization callback with a processing
path for only listing the tests provided by a module if the kunit action
parameter is set to "list" or "list_attr".  For user convenience, make the
kunit.action parameter visible in sysfs.

v2: Don't use a different format, use kunit_exec_list_tests() (Rae),
  - refresh on top of new attributes patches, handle newly introduced
    kunit.action=list_attr case (Rae).

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Cc: Rae Moar <rmoar@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h |  2 ++
 lib/kunit/executor.c | 28 +++++++++++++++++-----------
 lib/kunit/test.c     | 16 ++++++++++++++--
 3 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/include/kunit/test.h b/include/kunit/test.h
index 3d002e6b252f..6a338a3267ed 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -309,6 +309,7 @@ static inline void kunit_set_failure(struct kunit *test)
 }
 
 bool kunit_enabled(void);
+const char *kunit_action(void);
 
 void kunit_init_test(struct kunit *test, const char *name, char *log);
 
@@ -324,6 +325,7 @@ int __kunit_test_suites_init(struct kunit_suite * const * const suites, int num_
 void __kunit_test_suites_exit(struct kunit_suite **suites, int num_suites);
 
 void kunit_exec_run_tests(struct kunit_suite_set *suite_set, bool builtin);
+void kunit_exec_list_tests(struct kunit_suite_set *suite_set, bool include_attr);
 
 #if IS_BUILTIN(CONFIG_KUNIT)
 int kunit_run_all_tests(void);
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 5ef90c334eb0..e877c1f1e75c 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -13,22 +13,29 @@
 extern struct kunit_suite * const __kunit_suites_start[];
 extern struct kunit_suite * const __kunit_suites_end[];
 
+static char *action_param;
+
+module_param_named(action, action_param, charp, 0400);
+MODULE_PARM_DESC(action,
+		 "Changes KUnit executor behavior, valid values are:\n"
+		 "<none>: run the tests like normal\n"
+		 "'list' to list test names instead of running them.\n"
+		 "'list_attr' to list test names and attributes instead of running them.\n");
+
+const char *kunit_action(void)
+{
+	return action_param;
+}
+
 #if IS_BUILTIN(CONFIG_KUNIT)
 
 static char *filter_glob_param;
-static char *action_param;
 static char *filter_param;
 static char *filter_action_param;
 
 module_param_named(filter_glob, filter_glob_param, charp, 0);
 MODULE_PARM_DESC(filter_glob,
 		"Filter which KUnit test suites/tests run at boot-time, e.g. list* or list*.*del_test");
-module_param_named(action, action_param, charp, 0);
-MODULE_PARM_DESC(action,
-		 "Changes KUnit executor behavior, valid values are:\n"
-		 "<none>: run the tests like normal\n"
-		 "'list' to list test names instead of running them.\n"
-		 "'list_attr' to list test names and attributes instead of running them.\n");
 module_param_named(filter, filter_param, charp, 0);
 MODULE_PARM_DESC(filter,
 		"Filter which KUnit test suites/tests run at boot-time using attributes, e.g. speed>slow");
@@ -239,10 +246,7 @@ void kunit_exec_run_tests(struct kunit_suite_set *suite_set, bool builtin)
 	__kunit_test_suites_init(suite_set->start, num_suites);
 }
 
-#if IS_BUILTIN(CONFIG_KUNIT)
-
-static void kunit_exec_list_tests(struct kunit_suite_set *suite_set,
-				  bool include_attr)
+void kunit_exec_list_tests(struct kunit_suite_set *suite_set, bool include_attr)
 {
 	struct kunit_suite * const *suites;
 	struct kunit_case *test_case;
@@ -265,6 +269,8 @@ static void kunit_exec_list_tests(struct kunit_suite_set *suite_set,
 	}
 }
 
+#if IS_BUILTIN(CONFIG_KUNIT)
+
 int kunit_run_all_tests(void)
 {
 	struct kunit_suite_set suite_set = {
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index 8b2808068497..5232a4373782 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -739,13 +739,25 @@ static void kunit_module_init(struct module *mod)
 	struct kunit_suite_set suite_set = {
 		mod->kunit_suites, mod->kunit_suites + mod->num_kunit_suites,
 	};
+	const char *action = kunit_action();
 
-	kunit_exec_run_tests(&suite_set, false);
+	if (!action)
+		kunit_exec_run_tests(&suite_set, false);
+	else if (!strcmp(action, "list"))
+		kunit_exec_list_tests(&suite_set, false);
+	else if (!strcmp(action, "list_attr"))
+		kunit_exec_list_tests(&suite_set, true);
+	else
+		pr_err("kunit: unknown action '%s'\n", action);
 }
 
 static void kunit_module_exit(struct module *mod)
 {
-	__kunit_test_suites_exit(mod->kunit_suites, mod->num_kunit_suites);
+	const char *action = kunit_action();
+
+	if (!action)
+		__kunit_test_suites_exit(mod->kunit_suites,
+					 mod->num_kunit_suites);
 }
 
 static int kunit_module_notify(struct notifier_block *nb, unsigned long val,

From b67abaad4d25b5d9364a1d4f6bc18286ebaaa013 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Date: Mon, 7 Aug 2023 12:23:56 +0200
Subject: [PATCH 0856/2047] kunit: Allow kunit test modules to use test
 filtering

External tools, e.g., Intel GPU tools (IGT), support execution of
individual selftests provided by kernel modules.  That could be also
applicable to kunit test modules if they provided test filtering.  But
test filtering is now possible only when kunit code is built into the
kernel.  Moreover, a filter can be specified only at boot time, then
reboot is required each time a different filter is needed.

Build the test filtering code also when kunit is configured as a module,
expose test filtering functions to other kunit source files, and use them
in kunit module notifier callback functions.  Userspace can then reload
the kunit module with a value of the filter_glob parameter tuned to a
specific kunit test module every time it wants to limit the scope of tests
executed on that module load.  Make the kunit.filter* parameters visible
in sysfs for user convenience.

v5: Refresh on tpp of attributes filtering fix
v4: Refresh on top of newly applied attributes patches and changes
    introdced by new versions of other patches submitted in series with
    this one.
v3: Fix CONFIG_GLOB, required by filtering functions, not selected when
    building as a module (lkp@intel.com).
v2: Fix new name of a structure moved to kunit namespace not updated
    across all uses (lkp@intel.com).

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h | 11 ++++++++
 lib/kunit/Kconfig    |  2 +-
 lib/kunit/executor.c | 63 ++++++++++++++++++++++++++------------------
 lib/kunit/test.c     | 17 ++++++++++++
 4 files changed, 66 insertions(+), 27 deletions(-)

diff --git a/include/kunit/test.h b/include/kunit/test.h
index 6a338a3267ed..d33114097d0d 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -310,6 +310,9 @@ static inline void kunit_set_failure(struct kunit *test)
 
 bool kunit_enabled(void);
 const char *kunit_action(void);
+const char *kunit_filter_glob(void);
+char *kunit_filter(void);
+char *kunit_filter_action(void);
 
 void kunit_init_test(struct kunit *test, const char *name, char *log);
 
@@ -320,6 +323,14 @@ size_t kunit_suite_num_test_cases(struct kunit_suite *suite);
 unsigned int kunit_test_case_num(struct kunit_suite *suite,
 				 struct kunit_case *test_case);
 
+struct kunit_suite_set
+kunit_filter_suites(const struct kunit_suite_set *suite_set,
+		    const char *filter_glob,
+		    char *filters,
+		    char *filter_action,
+		    int *err);
+void kunit_free_suite_set(struct kunit_suite_set suite_set);
+
 int __kunit_test_suites_init(struct kunit_suite * const * const suites, int num_suites);
 
 void __kunit_test_suites_exit(struct kunit_suite **suites, int num_suites);
diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig
index 626719b95bad..68a6daec0aef 100644
--- a/lib/kunit/Kconfig
+++ b/lib/kunit/Kconfig
@@ -4,7 +4,7 @@
 
 menuconfig KUNIT
 	tristate "KUnit - Enable support for unit tests"
-	select GLOB if KUNIT=y
+	select GLOB
 	help
 	  Enables support for kernel unit tests (KUnit), a lightweight unit
 	  testing and mocking framework for the Linux kernel. These tests are
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index e877c1f1e75c..5181aa2e760b 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -27,24 +27,37 @@ const char *kunit_action(void)
 	return action_param;
 }
 
-#if IS_BUILTIN(CONFIG_KUNIT)
-
 static char *filter_glob_param;
 static char *filter_param;
 static char *filter_action_param;
 
-module_param_named(filter_glob, filter_glob_param, charp, 0);
+module_param_named(filter_glob, filter_glob_param, charp, 0400);
 MODULE_PARM_DESC(filter_glob,
 		"Filter which KUnit test suites/tests run at boot-time, e.g. list* or list*.*del_test");
-module_param_named(filter, filter_param, charp, 0);
+module_param_named(filter, filter_param, charp, 0400);
 MODULE_PARM_DESC(filter,
 		"Filter which KUnit test suites/tests run at boot-time using attributes, e.g. speed>slow");
-module_param_named(filter_action, filter_action_param, charp, 0);
+module_param_named(filter_action, filter_action_param, charp, 0400);
 MODULE_PARM_DESC(filter_action,
 		"Changes behavior of filtered tests using attributes, valid values are:\n"
 		"<none>: do not run filtered tests as normal\n"
 		"'skip': skip all filtered tests instead so tests will appear in output\n");
 
+const char *kunit_filter_glob(void)
+{
+	return filter_glob_param;
+}
+
+char *kunit_filter(void)
+{
+	return filter_param;
+}
+
+char *kunit_filter_action(void)
+{
+	return filter_action_param;
+}
+
 /* glob_match() needs NULL terminated strings, so we need a copy of filter_glob_param. */
 struct kunit_glob_filter {
 	char *suite_glob;
@@ -108,10 +121,7 @@ kunit_filter_glob_tests(const struct kunit_suite *const suite, const char *test_
 	return copy;
 }
 
-static char *kunit_shutdown;
-core_param(kunit_shutdown, kunit_shutdown, charp, 0644);
-
-static void kunit_free_suite_set(struct kunit_suite_set suite_set)
+void kunit_free_suite_set(struct kunit_suite_set suite_set)
 {
 	struct kunit_suite * const *suites;
 
@@ -120,7 +130,7 @@ static void kunit_free_suite_set(struct kunit_suite_set suite_set)
 	kfree(suite_set.start);
 }
 
-static struct kunit_suite_set
+struct kunit_suite_set
 kunit_filter_suites(const struct kunit_suite_set *suite_set,
 		    const char *filter_glob,
 		    char *filters,
@@ -218,22 +228,6 @@ err:
 	return filtered;
 }
 
-static void kunit_handle_shutdown(void)
-{
-	if (!kunit_shutdown)
-		return;
-
-	if (!strcmp(kunit_shutdown, "poweroff"))
-		kernel_power_off();
-	else if (!strcmp(kunit_shutdown, "halt"))
-		kernel_halt();
-	else if (!strcmp(kunit_shutdown, "reboot"))
-		kernel_restart(NULL);
-
-}
-
-#endif
-
 void kunit_exec_run_tests(struct kunit_suite_set *suite_set, bool builtin)
 {
 	size_t num_suites = suite_set->end - suite_set->start;
@@ -271,6 +265,23 @@ void kunit_exec_list_tests(struct kunit_suite_set *suite_set, bool include_attr)
 
 #if IS_BUILTIN(CONFIG_KUNIT)
 
+static char *kunit_shutdown;
+core_param(kunit_shutdown, kunit_shutdown, charp, 0644);
+
+static void kunit_handle_shutdown(void)
+{
+	if (!kunit_shutdown)
+		return;
+
+	if (!strcmp(kunit_shutdown, "poweroff"))
+		kernel_power_off();
+	else if (!strcmp(kunit_shutdown, "halt"))
+		kernel_halt();
+	else if (!strcmp(kunit_shutdown, "reboot"))
+		kernel_restart(NULL);
+
+}
+
 int kunit_run_all_tests(void)
 {
 	struct kunit_suite_set suite_set = {
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index 5232a4373782..49698a168437 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -740,6 +740,17 @@ static void kunit_module_init(struct module *mod)
 		mod->kunit_suites, mod->kunit_suites + mod->num_kunit_suites,
 	};
 	const char *action = kunit_action();
+	int err = 0;
+
+	suite_set = kunit_filter_suites(&suite_set,
+					kunit_filter_glob() ?: "*.*",
+					kunit_filter(), kunit_filter_action(),
+					&err);
+	if (err)
+		pr_err("kunit module: error filtering suites: %d\n", err);
+
+	mod->kunit_suites = (struct kunit_suite **)suite_set.start;
+	mod->num_kunit_suites = suite_set.end - suite_set.start;
 
 	if (!action)
 		kunit_exec_run_tests(&suite_set, false);
@@ -753,11 +764,17 @@ static void kunit_module_init(struct module *mod)
 
 static void kunit_module_exit(struct module *mod)
 {
+	struct kunit_suite_set suite_set = {
+		mod->kunit_suites, mod->kunit_suites + mod->num_kunit_suites,
+	};
 	const char *action = kunit_action();
 
 	if (!action)
 		__kunit_test_suites_exit(mod->kunit_suites,
 					 mod->num_kunit_suites);
+
+	if (suite_set.start)
+		kunit_free_suite_set(suite_set);
 }
 
 static int kunit_module_notify(struct notifier_block *nb, unsigned long val,

From c0d87e43a6627b7ef3dcaf4ac5fb9996e3ac31a9 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 27 Jun 2023 11:29:20 -0400
Subject: [PATCH 0857/2047] selftests/rseq: Fix CID_ID typo in Makefile

Ensure that the basic percpu ops tests are effectively built against
mm_cid.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/rseq/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index b357ba24af06..8f023b7cab60 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -31,7 +31,7 @@ $(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h
 	$(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
 
 $(OUTPUT)/basic_percpu_ops_mm_cid_test: basic_percpu_ops_test.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h
-	$(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID_ID $< $(LDLIBS) -lrseq -o $@
+	$(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID $< $(LDLIBS) -lrseq -o $@
 
 $(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
 					rseq.h rseq-*.h

From d6aaa23a2b33d81502e18dfe25ee1847b76b7bf6 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 27 Jun 2023 11:29:21 -0400
Subject: [PATCH 0858/2047] selftests/rseq: Implement rseq_unqual_scalar_typeof

Allow defining variables and perform cast with a typeof which removes
the volatile and const qualifiers.

This prevents declaring a stack variable with a volatile qualifier
within a macro, which would generate sub-optimal assembler.

This is imported from the "librseq" project.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/rseq/compiler.h | 26 +++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tools/testing/selftests/rseq/compiler.h b/tools/testing/selftests/rseq/compiler.h
index f47092bddeba..49d62fbd6dda 100644
--- a/tools/testing/selftests/rseq/compiler.h
+++ b/tools/testing/selftests/rseq/compiler.h
@@ -33,4 +33,30 @@
 #define RSEQ_COMBINE_TOKENS(_tokena, _tokenb)	\
 	RSEQ__COMBINE_TOKENS(_tokena, _tokenb)
 
+#ifdef __cplusplus
+#define rseq_unqual_scalar_typeof(x)					\
+	std::remove_cv<std::remove_reference<decltype(x)>::type>::type
+#else
+#define rseq_scalar_type_to_expr(type)					\
+	unsigned type: (unsigned type)0,				\
+	signed type: (signed type)0
+
+/*
+ * Use C11 _Generic to express unqualified type from expression. This removes
+ * volatile qualifier from expression type.
+ */
+#define rseq_unqual_scalar_typeof(x)					\
+	__typeof__(							\
+		_Generic((x),						\
+			char: (char)0,					\
+			rseq_scalar_type_to_expr(char),			\
+			rseq_scalar_type_to_expr(short),		\
+			rseq_scalar_type_to_expr(int),			\
+			rseq_scalar_type_to_expr(long),			\
+			rseq_scalar_type_to_expr(long long),		\
+			default: (x)					\
+		)							\
+	)
+#endif
+
 #endif  /* RSEQ_COMPILER_H_ */

From 0fbbf07ce67d2edb869572bf41b507a5f6851c72 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 27 Jun 2023 11:29:22 -0400
Subject: [PATCH 0859/2047] selftests/rseq: Fix arm64 buggy
 load-acquire/store-release macros

The arm64 load-acquire/store-release macros from the Linux kernel rseq
selftests are buggy. Remplace them by a working implementation.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/rseq/rseq-arm64.h | 58 ++++++++++++-----------
 1 file changed, 30 insertions(+), 28 deletions(-)

diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
index 85b90977e7e6..21e1626a7235 100644
--- a/tools/testing/selftests/rseq/rseq-arm64.h
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -27,59 +27,61 @@
 
 #define rseq_smp_load_acquire(p)						\
 __extension__ ({								\
-	__typeof(*p) ____p1;							\
-	switch (sizeof(*p)) {							\
+	union { rseq_unqual_scalar_typeof(*(p)) __val; char __c[sizeof(*(p))]; } __u; \
+	switch (sizeof(*(p))) {							\
 	case 1:									\
-		asm volatile ("ldarb %w0, %1"					\
-			: "=r" (*(__u8 *)p)					\
-			: "Q" (*p) : "memory");					\
+		__asm__ __volatile__ ("ldarb %w0, %1"				\
+			: "=r" (*(__u8 *)__u.__c)				\
+			: "Q" (*(p)) : "memory");				\
 		break;								\
 	case 2:									\
-		asm volatile ("ldarh %w0, %1"					\
-			: "=r" (*(__u16 *)p)					\
-			: "Q" (*p) : "memory");					\
+		__asm__ __volatile__ ("ldarh %w0, %1"				\
+			: "=r" (*(__u16 *)__u.__c)				\
+			: "Q" (*(p)) : "memory");				\
 		break;								\
 	case 4:									\
-		asm volatile ("ldar %w0, %1"					\
-			: "=r" (*(__u32 *)p)					\
-			: "Q" (*p) : "memory");					\
+		__asm__ __volatile__ ("ldar %w0, %1"				\
+			: "=r" (*(__u32 *)__u.__c)				\
+			: "Q" (*(p)) : "memory");				\
 		break;								\
 	case 8:									\
-		asm volatile ("ldar %0, %1"					\
-			: "=r" (*(__u64 *)p)					\
-			: "Q" (*p) : "memory");					\
+		__asm__ __volatile__ ("ldar %0, %1"				\
+			: "=r" (*(__u64 *)__u.__c)				\
+			: "Q" (*(p)) : "memory");				\
 		break;								\
 	}									\
-	____p1;									\
+	(rseq_unqual_scalar_typeof(*(p)))__u.__val;				\
 })
 
 #define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
 
 #define rseq_smp_store_release(p, v)						\
 do {										\
-	switch (sizeof(*p)) {							\
+	union { rseq_unqual_scalar_typeof(*(p)) __val; char __c[sizeof(*(p))]; } __u = \
+		{ .__val = (rseq_unqual_scalar_typeof(*(p))) (v) };		\
+	switch (sizeof(*(p))) {							\
 	case 1:									\
-		asm volatile ("stlrb %w1, %0"					\
-				: "=Q" (*p)					\
-				: "r" ((__u8)v)					\
+		__asm__ __volatile__ ("stlrb %w1, %0"				\
+				: "=Q" (*(p))					\
+				: "r" (*(__u8 *)__u.__c)			\
 				: "memory");					\
 		break;								\
 	case 2:									\
-		asm volatile ("stlrh %w1, %0"					\
-				: "=Q" (*p)					\
-				: "r" ((__u16)v)				\
+		__asm__ __volatile__ ("stlrh %w1, %0"				\
+				: "=Q" (*(p))					\
+				: "r" (*(__u16 *)__u.__c)			\
 				: "memory");					\
 		break;								\
 	case 4:									\
-		asm volatile ("stlr %w1, %0"					\
-				: "=Q" (*p)					\
-				: "r" ((__u32)v)				\
+		__asm__ __volatile__ ("stlr %w1, %0"				\
+				: "=Q" (*(p))					\
+				: "r" (*(__u32 *)__u.__c)			\
 				: "memory");					\
 		break;								\
 	case 8:									\
-		asm volatile ("stlr %1, %0"					\
-				: "=Q" (*p)					\
-				: "r" ((__u64)v)				\
+		__asm__ __volatile__ ("stlr %1, %0"				\
+				: "=Q" (*(p))					\
+				: "r" (*(__u64 *)__u.__c)			\
 				: "memory");					\
 		break;								\
 	}									\

From 2b2fe6052dd01fdb4e9a31031c2c9d8f03cf7753 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 27 Jun 2023 11:29:23 -0400
Subject: [PATCH 0860/2047] selftests/rseq: Use rseq_unqual_scalar_typeof in
 macros

Use rseq_unqual_scalar_typeof() rather than typeof() in macros to remove
the volatile qualifier (if there is one in the input argument), thus
generating better assembly code in those scenarios.

Also add extra brackets around the "p" parameter in RSEQ_READ_ONCE(),
RSEQ_WRITE_ONCE(), and rseq_unqual_scalar_typeof() across architectures
to preserve expectations of operator priority. Here is an example that
shows how operator priority may be an issue with missing parentheses:

    #define m(p) \
    do { \
            __typeof__(*p) v = 0; \
    } while (0)

    void fct(unsigned long long *p1)
    {
            m(p1 + 1);      /* works */
            m(1 + p1);      /* broken */
    }

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/rseq/rseq-arm.h   | 4 ++--
 tools/testing/selftests/rseq/rseq-mips.h  | 4 ++--
 tools/testing/selftests/rseq/rseq-ppc.h   | 4 ++--
 tools/testing/selftests/rseq/rseq-riscv.h | 6 +++---
 tools/testing/selftests/rseq/rseq-s390.h  | 4 ++--
 tools/testing/selftests/rseq/rseq-x86.h   | 4 ++--
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
index 8414fc3eac15..d887b3bbe257 100644
--- a/tools/testing/selftests/rseq/rseq-arm.h
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -66,7 +66,7 @@
 
 #define rseq_smp_load_acquire(p)					\
 __extension__ ({							\
-	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p));	\
 	rseq_smp_mb();							\
 	____p1;								\
 })
@@ -76,7 +76,7 @@ __extension__ ({							\
 #define rseq_smp_store_release(p, v)					\
 do {									\
 	rseq_smp_mb();							\
-	RSEQ_WRITE_ONCE(*p, v);						\
+	RSEQ_WRITE_ONCE(*(p), v);					\
 } while (0)
 
 #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip,	\
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
index 50b950cf9585..42ef8e946693 100644
--- a/tools/testing/selftests/rseq/rseq-mips.h
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -45,7 +45,7 @@
 
 #define rseq_smp_load_acquire(p)					\
 __extension__ ({							\
-	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p));	\
 	rseq_smp_mb();							\
 	____p1;								\
 })
@@ -55,7 +55,7 @@ __extension__ ({							\
 #define rseq_smp_store_release(p, v)					\
 do {									\
 	rseq_smp_mb();							\
-	RSEQ_WRITE_ONCE(*p, v);						\
+	RSEQ_WRITE_ONCE(*(p), v);					\
 } while (0)
 
 #if _MIPS_SZLONG == 64
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
index dc9190facee9..57b160597189 100644
--- a/tools/testing/selftests/rseq/rseq-ppc.h
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -23,7 +23,7 @@
 
 #define rseq_smp_load_acquire(p)					\
 __extension__ ({							\
-	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p));	\
 	rseq_smp_lwsync();						\
 	____p1;								\
 })
@@ -33,7 +33,7 @@ __extension__ ({							\
 #define rseq_smp_store_release(p, v)					\
 do {									\
 	rseq_smp_lwsync();						\
-	RSEQ_WRITE_ONCE(*p, v);						\
+	RSEQ_WRITE_ONCE(*(p), v);					\
 } while (0)
 
 /*
diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h
index 17932a79e066..37e598d0a365 100644
--- a/tools/testing/selftests/rseq/rseq-riscv.h
+++ b/tools/testing/selftests/rseq/rseq-riscv.h
@@ -36,8 +36,8 @@
 
 #define rseq_smp_load_acquire(p)					\
 __extension__ ({							\
-	__typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p));			\
-	RISCV_FENCE(r, rw)						\
+	rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p));	\
+	RISCV_FENCE(r, rw);						\
 	____p1;								\
 })
 
@@ -46,7 +46,7 @@ __extension__ ({							\
 #define rseq_smp_store_release(p, v)					\
 do {									\
 	RISCV_FENCE(rw, w);						\
-	RSEQ_WRITE_ONCE(*(p), v);						\
+	RSEQ_WRITE_ONCE(*(p), v);					\
 } while (0)
 
 #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip,	\
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
index 46c92598acc7..33baaa9f9997 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -15,7 +15,7 @@
 
 #define rseq_smp_load_acquire(p)					\
 __extension__ ({							\
-	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p));	\
 	rseq_barrier();							\
 	____p1;								\
 })
@@ -25,7 +25,7 @@ __extension__ ({							\
 #define rseq_smp_store_release(p, v)					\
 do {									\
 	rseq_barrier();							\
-	RSEQ_WRITE_ONCE(*p, v);						\
+	RSEQ_WRITE_ONCE(*(p), v);					\
 } while (0)
 
 #ifdef __s390x__
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index fb65ef54b0fb..a2aa428ba151 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -42,7 +42,7 @@
 
 #define rseq_smp_load_acquire(p)					\
 __extension__ ({							\
-	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p));	\
 	rseq_barrier();							\
 	____p1;								\
 })
@@ -52,7 +52,7 @@ __extension__ ({							\
 #define rseq_smp_store_release(p, v)					\
 do {									\
 	rseq_barrier();							\
-	RSEQ_WRITE_ONCE(*p, v);						\
+	RSEQ_WRITE_ONCE(*(p), v);					\
 } while (0)
 
 #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,			\

From 0aea7a2f88a551d9dd38a2b405063cf5f97bfc29 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Wed, 9 Aug 2023 02:07:59 +0200
Subject: [PATCH 0861/2047] cpufreq: blocklist more Qualcomm platforms in
 cpufreq-dt-platdev

All Qualcomm platforms utilizing the qcom-cpufreq-hw driver have no
business in using cpufreq-dt. Prevent that from happening.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq-dt-platdev.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index adb3579a1fee..fb2875ce1fdd 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -144,14 +144,18 @@ static const struct of_device_id blocklist[] __initconst = {
 	{ .compatible = "qcom,apq8096", },
 	{ .compatible = "qcom,msm8996", },
 	{ .compatible = "qcom,msm8998", },
+	{ .compatible = "qcom,qcm2290", },
 	{ .compatible = "qcom,qcs404", },
+	{ .compatible = "qcom,qdu1000", },
 	{ .compatible = "qcom,sa8155p" },
 	{ .compatible = "qcom,sa8540p" },
+	{ .compatible = "qcom,sa8775p" },
 	{ .compatible = "qcom,sc7180", },
 	{ .compatible = "qcom,sc7280", },
 	{ .compatible = "qcom,sc8180x", },
 	{ .compatible = "qcom,sc8280xp", },
 	{ .compatible = "qcom,sdm845", },
+	{ .compatible = "qcom,sdx75", },
 	{ .compatible = "qcom,sm6115", },
 	{ .compatible = "qcom,sm6350", },
 	{ .compatible = "qcom,sm6375", },
@@ -159,6 +163,8 @@ static const struct of_device_id blocklist[] __initconst = {
 	{ .compatible = "qcom,sm8150", },
 	{ .compatible = "qcom,sm8250", },
 	{ .compatible = "qcom,sm8350", },
+	{ .compatible = "qcom,sm8450", },
+	{ .compatible = "qcom,sm8550", },
 
 	{ .compatible = "st,stih407", },
 	{ .compatible = "st,stih410", },

From 4352b8cd66e22952210e8205312d2b9ecd70be54 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Aug 2023 09:17:04 -0700
Subject: [PATCH 0862/2047] fs: unexport d_genocide

d_genocide is only used by built-in code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Message-Id: <20230808161704.1099680-1-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/dcache.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 52e6d5fdab6b..3aaf2c7ba110 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3247,8 +3247,6 @@ void d_genocide(struct dentry *parent)
 	d_walk(parent, parent, d_genocide_kill);
 }
 
-EXPORT_SYMBOL(d_genocide);
-
 void d_tmpfile(struct file *file, struct inode *inode)
 {
 	struct dentry *dentry = file->f_path.dentry;

From 4b2201dad26742c92decd920471b7185088624f5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 7 Aug 2023 12:26:22 +0100
Subject: [PATCH 0863/2047] fs: stop using bdev->bd_super in
 mark_buffer_write_io_error

bdev->bd_super is a somewhat awkward backpointer from a block device to
an owning file system with unclear rules.

For the buffer_head code we already have a good backpointer for the
inode that the buffer_head is associated with, even if it lives on the
block device mapping: b_assoc_map. It is used track dirty buffers
associated with an inode but living on the block device mapping like
directory buffers in ext4.

mark_buffer_write_io_error already uses it for the call to
mapping_set_error, and should be doing the same for the per-sb error
sequence.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Message-Id: <20230807112625.652089-2-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/buffer.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index bd091329026c..f36ef03c078a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1225,19 +1225,14 @@ EXPORT_SYMBOL(mark_buffer_dirty);
 
 void mark_buffer_write_io_error(struct buffer_head *bh)
 {
-	struct super_block *sb;
-
 	set_buffer_write_io_error(bh);
 	/* FIXME: do we need to set this in both places? */
 	if (bh->b_folio && bh->b_folio->mapping)
 		mapping_set_error(bh->b_folio->mapping, -EIO);
-	if (bh->b_assoc_map)
+	if (bh->b_assoc_map) {
 		mapping_set_error(bh->b_assoc_map, -EIO);
-	rcu_read_lock();
-	sb = READ_ONCE(bh->b_bdev->bd_super);
-	if (sb)
-		errseq_set(&sb->s_wb_err, -EIO);
-	rcu_read_unlock();
+		errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO);
+	}
 }
 EXPORT_SYMBOL(mark_buffer_write_io_error);
 

From 01efe93a5aa20a19b390426718dc214898a7c2ec Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 7 Aug 2023 12:26:23 +0100
Subject: [PATCH 0864/2047] ext4: don't use bdev->bd_super in
 __ext4_journal_get_write_access

__ext4_journal_get_write_access already has a super_block available,
and there is no need to go from that to the bdev to go back to the
owning super_block.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Message-Id: <20230807112625.652089-3-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ext4/ext4_jbd2.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 77f318ec8abb..b38d59581411 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -234,8 +234,7 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
 
 	might_sleep();
 
-	if (bh->b_bdev->bd_super)
-		ext4_check_bdev_write_error(bh->b_bdev->bd_super);
+	ext4_check_bdev_write_error(sb);
 
 	if (ext4_handle_valid(handle)) {
 		err = jbd2_journal_get_write_access(handle, bh);

From 8887b94d93224e0ef7e1bc6369640e313b8b12f4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 7 Aug 2023 12:26:24 +0100
Subject: [PATCH 0865/2047] ocfs2: stop using bdev->bd_super for journal error
 logging

All ocfs2 journal error handling and logging is based on buffer_heads,
and the owning inode and thus super_block can be retrieved through
bh->b_assoc_map->host.  Switch to using that to remove the last users
of bdev->bd_super.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Message-Id: <20230807112625.652089-4-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ocfs2/journal.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 25d8072ccfce..c19c730c26e2 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -557,7 +557,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
 	     (unsigned long)bh,
 	     (unsigned long long)bh->b_blocknr);
 
-	ocfs2_error(bh->b_bdev->bd_super,
+	ocfs2_error(bh->b_assoc_map->host->i_sb,
 		    "JBD2 has aborted our journal, ocfs2 cannot continue\n");
 }
 
@@ -780,14 +780,14 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
 		mlog_errno(status);
 		if (!is_handle_aborted(handle)) {
 			journal_t *journal = handle->h_transaction->t_journal;
-			struct super_block *sb = bh->b_bdev->bd_super;
 
 			mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
 					"Aborting transaction and journal.\n");
 			handle->h_err = status;
 			jbd2_journal_abort_handle(handle);
 			jbd2_journal_abort(journal, status);
-			ocfs2_abort(sb, "Journal already aborted.\n");
+			ocfs2_abort(bh->b_assoc_map->host->i_sb,
+				    "Journal already aborted.\n");
 		}
 	}
 }

From 6a3207395563f724d91231ec0aa7c4d95bf9591d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 7 Aug 2023 12:26:25 +0100
Subject: [PATCH 0866/2047] fs, block: remove bdev->bd_super

bdev->bd_super is unused now, remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Message-Id: <20230807112625.652089-5-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ext4/super.c           | 1 -
 fs/super.c                | 3 ---
 include/linux/blk_types.h | 1 -
 3 files changed, 5 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c94ebf704616..b24f5b9f63e0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5572,7 +5572,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
 	spin_lock_init(&sbi->s_bdev_wb_lock);
 	errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
 				 &sbi->s_bdev_wb_err);
-	sb->s_bdev->bd_super = sb;
 	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
 	ext4_orphan_cleanup(sb, es);
 	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
diff --git a/fs/super.c b/fs/super.c
index e781226e2880..7755cc2a3607 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1322,7 +1322,6 @@ int get_tree_bdev(struct fs_context *fc,
 		}
 
 		s->s_flags |= SB_ACTIVE;
-		bdev->bd_super = s;
 	}
 
 	BUG_ON(fc->root);
@@ -1395,7 +1394,6 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 		}
 
 		s->s_flags |= SB_ACTIVE;
-		bdev->bd_super = s;
 	}
 
 	return dget(s->s_root);
@@ -1413,7 +1411,6 @@ void kill_block_super(struct super_block *sb)
 {
 	struct block_device *bdev = sb->s_bdev;
 
-	bdev->bd_super = NULL;
 	generic_shutdown_super(sb);
 	sync_blockdev(bdev);
 	blkdev_put(bdev, sb->s_type);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0bad62cca3d0..d5c5e59ddbd2 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -52,7 +52,6 @@ struct block_device {
 	atomic_t		bd_openers;
 	spinlock_t		bd_size_lock; /* for bd_inode->i_size updates */
 	struct inode *		bd_inode;	/* will die */
-	struct super_block *	bd_super;
 	void *			bd_claiming;
 	void *			bd_holder;
 	const struct blk_holder_ops *bd_holder_ops;

From b3030e4f23441347cd5d18b2610fb299ed98d739 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 7 Aug 2023 15:38:32 -0400
Subject: [PATCH 0867/2047] fs: remove silly warning from current_time

An inode with no superblock? Unpossible!

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230807-mgctime-v7-1-d1dec143a704@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/inode.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index d4ab92233062..3fc251bfaf73 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2495,12 +2495,6 @@ struct timespec64 current_time(struct inode *inode)
 	struct timespec64 now;
 
 	ktime_get_coarse_real_ts64(&now);
-
-	if (unlikely(!inode->i_sb)) {
-		WARN(1, "current_time() called with uninitialized super_block in the inode");
-		return now;
-	}
-
 	return timestamp_truncate(now, inode);
 }
 EXPORT_SYMBOL(current_time);

From 0d72b92883c651a11059d93335f33d65c6eb653b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 7 Aug 2023 15:38:33 -0400
Subject: [PATCH 0868/2047] fs: pass the request_mask to generic_fillattr

generic_fillattr just fills in the entire stat struct indiscriminately
today, copying data from the inode. There is at least one attribute
(STATX_CHANGE_COOKIE) that can have side effects when it is reported,
and we're looking at adding more with the addition of multigrain
timestamps.

Add a request_mask argument to generic_fillattr and have most callers
just pass in the value that is passed to getattr. Have other callers
(e.g. ksmbd) just pass in STATX_BASIC_STATS. Also move the setting of
STATX_CHANGE_COOKIE into generic_fillattr.

Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: "Paulo Alcantara (SUSE)" <pc@manguebit.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230807-mgctime-v7-2-d1dec143a704@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/9p/vfs_inode.c       |  4 ++--
 fs/9p/vfs_inode_dotl.c  |  4 ++--
 fs/afs/inode.c          |  2 +-
 fs/btrfs/inode.c        |  2 +-
 fs/ceph/inode.c         |  2 +-
 fs/coda/inode.c         |  3 ++-
 fs/ecryptfs/inode.c     |  5 +++--
 fs/erofs/inode.c        |  2 +-
 fs/exfat/file.c         |  2 +-
 fs/ext2/inode.c         |  2 +-
 fs/ext4/inode.c         |  2 +-
 fs/f2fs/file.c          |  2 +-
 fs/fat/file.c           |  2 +-
 fs/fuse/dir.c           |  2 +-
 fs/gfs2/inode.c         |  2 +-
 fs/hfsplus/inode.c      |  2 +-
 fs/kernfs/inode.c       |  2 +-
 fs/libfs.c              |  4 ++--
 fs/minix/inode.c        |  2 +-
 fs/nfs/inode.c          |  2 +-
 fs/nfs/namespace.c      |  3 ++-
 fs/ntfs3/file.c         |  2 +-
 fs/ocfs2/file.c         |  2 +-
 fs/orangefs/inode.c     |  2 +-
 fs/proc/base.c          |  4 ++--
 fs/proc/fd.c            |  2 +-
 fs/proc/generic.c       |  2 +-
 fs/proc/proc_net.c      |  2 +-
 fs/proc/proc_sysctl.c   |  2 +-
 fs/proc/root.c          |  3 ++-
 fs/smb/client/inode.c   |  2 +-
 fs/smb/server/smb2pdu.c | 22 +++++++++++-----------
 fs/smb/server/vfs.c     |  3 ++-
 fs/stat.c               | 24 +++++++++++++-----------
 fs/sysv/itree.c         |  3 ++-
 fs/ubifs/dir.c          |  2 +-
 fs/udf/symlink.c        |  2 +-
 fs/vboxsf/utils.c       |  2 +-
 include/linux/fs.h      |  2 +-
 mm/shmem.c              |  2 +-
 40 files changed, 73 insertions(+), 65 deletions(-)

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 16d85e6033a3..d24d1f20e922 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1016,7 +1016,7 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path,
 	p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
 	v9ses = v9fs_dentry2v9ses(dentry);
 	if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
-		generic_fillattr(&nop_mnt_idmap, inode, stat);
+		generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 		return 0;
 	} else if (v9ses->cache & CACHE_WRITEBACK) {
 		if (S_ISREG(inode->i_mode)) {
@@ -1037,7 +1037,7 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path,
 		return PTR_ERR(st);
 
 	v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0);
-	generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
 
 	p9stat_free(st);
 	kfree(st);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 464ea73d1bf8..8e8d5d2a13d8 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -451,7 +451,7 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
 	p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
 	v9ses = v9fs_dentry2v9ses(dentry);
 	if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
-		generic_fillattr(&nop_mnt_idmap, inode, stat);
+		generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 		return 0;
 	} else if (v9ses->cache) {
 		if (S_ISREG(inode->i_mode)) {
@@ -476,7 +476,7 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
 		return PTR_ERR(st);
 
 	v9fs_stat2inode_dotl(st, d_inode(dentry), 0);
-	generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
 	/* Change block size to what the server returned */
 	stat->blksize = st->st_blksize;
 
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 6b636f43f548..1c794a1896aa 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -773,7 +773,7 @@ int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
 
 	do {
 		read_seqbegin_or_lock(&vnode->cb_lock, &seq);
-		generic_fillattr(&nop_mnt_idmap, inode, stat);
+		generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 		if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
 		    stat->nlink > 0)
 			stat->nlink -= 1;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ceac62c1cbfc..29a20f828dda 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8746,7 +8746,7 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
 				  STATX_ATTR_IMMUTABLE |
 				  STATX_ATTR_NODUMP);
 
-	generic_fillattr(idmap, inode, stat);
+	generic_fillattr(idmap, request_mask, inode, stat);
 	stat->dev = BTRFS_I(inode)->root->anon_dev;
 
 	spin_lock(&BTRFS_I(inode)->lock);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5f6e93714f5a..fd05d68e2990 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2467,7 +2467,7 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path,
 			return err;
 	}
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	stat->ino = ceph_present_inode(inode);
 
 	/*
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 3e64679c1620..0c7c2528791e 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -256,7 +256,8 @@ int coda_getattr(struct mnt_idmap *idmap, const struct path *path,
 {
 	int err = coda_revalidate_inode(d_inode(path->dentry));
 	if (!err)
-		generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
+		generic_fillattr(&nop_mnt_idmap, request_mask,
+				 d_inode(path->dentry), stat);
 	return err;
 }
 
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index b491bb239c8f..992d9c7e64ae 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -982,7 +982,7 @@ static int ecryptfs_getattr_link(struct mnt_idmap *idmap,
 
 	mount_crypt_stat = &ecryptfs_superblock_to_private(
 						dentry->d_sb)->mount_crypt_stat;
-	generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
 	if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
 		char *target;
 		size_t targetsiz;
@@ -1011,7 +1011,8 @@ static int ecryptfs_getattr(struct mnt_idmap *idmap,
 	if (!rc) {
 		fsstack_copy_attr_all(d_inode(dentry),
 				      ecryptfs_inode_to_lower(d_inode(dentry)));
-		generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+		generic_fillattr(&nop_mnt_idmap, request_mask,
+				 d_inode(dentry), stat);
 		stat->blocks = lower_stat.blocks;
 	}
 	return rc;
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 567c0d305ea4..f3053f0dd6e1 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -368,7 +368,7 @@ int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
 	stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
 				  STATX_ATTR_IMMUTABLE);
 
-	generic_fillattr(idmap, inode, stat);
+	generic_fillattr(idmap, request_mask, inode, stat);
 	return 0;
 }
 
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index f40ecfeee3a4..32395ef686a2 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -232,7 +232,7 @@ int exfat_getattr(struct mnt_idmap *idmap, const struct path *path,
 	struct inode *inode = d_backing_inode(path->dentry);
 	struct exfat_inode_info *ei = EXFAT_I(inode);
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	exfat_truncate_atime(&stat->atime);
 	stat->result_mask |= STATX_BTIME;
 	stat->btime.tv_sec = ei->i_crtime.tv_sec;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 1259995977d2..acbab27fe957 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1628,7 +1628,7 @@ int ext2_getattr(struct mnt_idmap *idmap, const struct path *path,
 			STATX_ATTR_IMMUTABLE |
 			STATX_ATTR_NODUMP);
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	return 0;
 }
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 86696b40c58f..6683076ecb2f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5535,7 +5535,7 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
 				  STATX_ATTR_NODUMP |
 				  STATX_ATTR_VERITY);
 
-	generic_fillattr(idmap, inode, stat);
+	generic_fillattr(idmap, request_mask, inode, stat);
 	return 0;
 }
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index b018800223c4..35886a52edfb 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -882,7 +882,7 @@ int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path,
 				  STATX_ATTR_NODUMP |
 				  STATX_ATTR_VERITY);
 
-	generic_fillattr(idmap, inode, stat);
+	generic_fillattr(idmap, request_mask, inode, stat);
 
 	/* we need to show initial sectors used for inline_data/dentries */
 	if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 456477946dd9..e887e9ab7472 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -401,7 +401,7 @@ int fat_getattr(struct mnt_idmap *idmap, const struct path *path,
 	struct inode *inode = d_inode(path->dentry);
 	struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
 
-	generic_fillattr(idmap, inode, stat);
+	generic_fillattr(idmap, request_mask, inode, stat);
 	stat->blksize = sbi->cluster_size;
 
 	if (sbi->options.nfs == FAT_NFS_NOSTALE_RO) {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index a210c231c7d3..645fae48dc6b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1224,7 +1224,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
 		forget_all_cached_acls(inode);
 		err = fuse_do_getattr(inode, stat, file);
 	} else if (stat) {
-		generic_fillattr(&nop_mnt_idmap, inode, stat);
+		generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 		stat->mode = fi->orig_i_mode;
 		stat->ino = fi->orig_ino;
 	}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2ded6c813f20..200cabf3b393 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -2071,7 +2071,7 @@ static int gfs2_getattr(struct mnt_idmap *idmap,
 				  STATX_ATTR_IMMUTABLE |
 				  STATX_ATTR_NODUMP);
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 
 	if (gfs2_holder_initialized(&gh))
 		gfs2_glock_dq_uninit(&gh);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 40c61ab4a918..c65c8c4b03dd 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -298,7 +298,7 @@ int hfsplus_getattr(struct mnt_idmap *idmap, const struct path *path,
 	stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE |
 				 STATX_ATTR_NODUMP;
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	return 0;
 }
 
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 89a9b4dcf109..af37be68bf06 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -190,7 +190,7 @@ int kernfs_iop_getattr(struct mnt_idmap *idmap,
 
 	down_read(&root->kernfs_iattr_rwsem);
 	kernfs_refresh_inode(kn, inode);
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	up_read(&root->kernfs_iattr_rwsem);
 
 	return 0;
diff --git a/fs/libfs.c b/fs/libfs.c
index 1f5245e8bfdc..a61878469dcd 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -33,7 +33,7 @@ int simple_getattr(struct mnt_idmap *idmap, const struct path *path,
 		   unsigned int query_flags)
 {
 	struct inode *inode = d_inode(path->dentry);
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
 	return 0;
 }
@@ -1334,7 +1334,7 @@ static int empty_dir_getattr(struct mnt_idmap *idmap,
 			     u32 request_mask, unsigned int query_flags)
 {
 	struct inode *inode = d_inode(path->dentry);
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	return 0;
 }
 
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 8a4fc9420b36..df575473c1cc 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -656,7 +656,7 @@ int minix_getattr(struct mnt_idmap *idmap, const struct path *path,
 	struct super_block *sb = path->dentry->d_sb;
 	struct inode *inode = d_inode(path->dentry);
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	if (INODE_VERSION(inode) == MINIX_V1)
 		stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
 	else
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1283fdfa4b0a..e21c073158e5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -912,7 +912,7 @@ out_no_revalidate:
 	/* Only return attributes that were revalidated. */
 	stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
 	stat->change_cookie = inode_peek_iversion_raw(inode);
 	stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 19d51ebf842c..e7494cdd957e 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -215,7 +215,8 @@ nfs_namespace_getattr(struct mnt_idmap *idmap,
 	if (NFS_FH(d_inode(path->dentry))->size != 0)
 		return nfs_getattr(idmap, path, stat, request_mask,
 				   query_flags);
-	generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry),
+			 stat);
 	return 0;
 }
 
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 12788601dc84..962f12ce6c0a 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -85,7 +85,7 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
 
 	stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED;
 
-	generic_fillattr(idmap, inode, stat);
+	generic_fillattr(idmap, request_mask, inode, stat);
 
 	stat->result_mask |= STATX_BTIME;
 	stat->btime = ni->i_crtime;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1b337ebce4df..8184499ae7a5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1319,7 +1319,7 @@ int ocfs2_getattr(struct mnt_idmap *idmap, const struct path *path,
 		goto bail;
 	}
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	/*
 	 * If there is inline data in the inode, the inode will normally not
 	 * have data blocks allocated (it may have an external xattr block).
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 9014bbcc8031..a52c30e80f45 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -871,7 +871,7 @@ int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path,
 	ret = orangefs_inode_getattr(inode,
 	    request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0);
 	if (ret == 0) {
-		generic_fillattr(&nop_mnt_idmap, inode, stat);
+		generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 
 		/* override block size reported to stat */
 		if (!(request_mask & STATX_SIZE))
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3c644a822bca..2fcb393836ab 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1966,7 +1966,7 @@ int pid_getattr(struct mnt_idmap *idmap, const struct path *path,
 	struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
 	struct task_struct *task;
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 
 	stat->uid = GLOBAL_ROOT_UID;
 	stat->gid = GLOBAL_ROOT_GID;
@@ -3899,7 +3899,7 @@ static int proc_task_getattr(struct mnt_idmap *idmap,
 {
 	struct inode *inode = d_inode(path->dentry);
 	struct task_struct *p = get_proc_task(inode);
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 
 	if (p) {
 		stat->nlink += get_nr_threads(p);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index b3140deebbbf..6276b3938842 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -352,7 +352,7 @@ static int proc_fd_getattr(struct mnt_idmap *idmap,
 	struct inode *inode = d_inode(path->dentry);
 	int rv = 0;
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 
 	/* If it's a directory, put the number of open fds there */
 	if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 42ae38ff6e7e..775ce0bcf08c 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -146,7 +146,7 @@ static int proc_getattr(struct mnt_idmap *idmap,
 		}
 	}
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	return 0;
 }
 
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index a0c0419872e3..75f35f128e63 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -308,7 +308,7 @@ static int proc_tgid_net_getattr(struct mnt_idmap *idmap,
 
 	net = get_proc_task_net(inode);
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 
 	if (net != NULL) {
 		stat->nlink = net->proc_net->nlink;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 6bc10e7e0ff7..bf06344a42cc 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -849,7 +849,7 @@ static int proc_sys_getattr(struct mnt_idmap *idmap,
 	if (IS_ERR(head))
 		return PTR_ERR(head);
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	if (table)
 		stat->mode = (stat->mode & S_IFMT) | table->mode;
 
diff --git a/fs/proc/root.c b/fs/proc/root.c
index a86e65a608da..9191248f2dac 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -314,7 +314,8 @@ static int proc_root_getattr(struct mnt_idmap *idmap,
 			     const struct path *path, struct kstat *stat,
 			     u32 request_mask, unsigned int query_flags)
 {
-	generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry),
+			 stat);
 	stat->nlink = proc_root.nlink + nr_processes();
 	return 0;
 }
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 218f03dd3f52..93fe43789d7a 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -2540,7 +2540,7 @@ int cifs_getattr(struct mnt_idmap *idmap, const struct path *path,
 			return rc;
 	}
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	stat->blksize = cifs_sb->ctx->bsize;
 	stat->ino = CIFS_I(inode)->uniqueid;
 
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index f9099831c8ff..2a084d35233a 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -4391,8 +4391,8 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
 	}
 
 	basic_info = (struct smb2_file_basic_info *)rsp->Buffer;
-	generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
-			 &stat);
+	generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
+			 file_inode(fp->filp), &stat);
 	basic_info->CreationTime = cpu_to_le64(fp->create_time);
 	time = ksmbd_UnixTimeToNT(stat.atime);
 	basic_info->LastAccessTime = cpu_to_le64(time);
@@ -4417,7 +4417,7 @@ static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
 	struct kstat stat;
 
 	inode = file_inode(fp->filp);
-	generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat);
+	generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat);
 
 	sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
 	delete_pending = ksmbd_inode_pending_delete(fp);
@@ -4471,7 +4471,7 @@ static int get_file_all_info(struct ksmbd_work *work,
 		return PTR_ERR(filename);
 
 	inode = file_inode(fp->filp);
-	generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat);
+	generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat);
 
 	ksmbd_debug(SMB, "filename = %s\n", filename);
 	delete_pending = ksmbd_inode_pending_delete(fp);
@@ -4548,8 +4548,8 @@ static void get_file_stream_info(struct ksmbd_work *work,
 	int buf_free_len;
 	struct smb2_query_info_req *req = ksmbd_req_buf_next(work);
 
-	generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
-			 &stat);
+	generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
+			 file_inode(fp->filp), &stat);
 	file_info = (struct smb2_file_stream_info *)rsp->Buffer;
 
 	buf_free_len =
@@ -4639,8 +4639,8 @@ static void get_file_internal_info(struct smb2_query_info_rsp *rsp,
 	struct smb2_file_internal_info *file_info;
 	struct kstat stat;
 
-	generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
-			 &stat);
+	generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
+			 file_inode(fp->filp), &stat);
 	file_info = (struct smb2_file_internal_info *)rsp->Buffer;
 	file_info->IndexNumber = cpu_to_le64(stat.ino);
 	rsp->OutputBufferLength =
@@ -4665,7 +4665,7 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
 	file_info = (struct smb2_file_ntwrk_info *)rsp->Buffer;
 
 	inode = file_inode(fp->filp);
-	generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat);
+	generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat);
 
 	file_info->CreationTime = cpu_to_le64(fp->create_time);
 	time = ksmbd_UnixTimeToNT(stat.atime);
@@ -4726,8 +4726,8 @@ static void get_file_compression_info(struct smb2_query_info_rsp *rsp,
 	struct smb2_file_comp_info *file_info;
 	struct kstat stat;
 
-	generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
-			 &stat);
+	generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
+			 file_inode(fp->filp), &stat);
 
 	file_info = (struct smb2_file_comp_info *)rsp->Buffer;
 	file_info->CompressedFileSize = cpu_to_le64(stat.blocks << 9);
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index e35914457350..d0e94b73931a 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -1650,7 +1650,8 @@ int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
 	u64 time;
 	int rc;
 
-	generic_fillattr(idmap, d_inode(dentry), ksmbd_kstat->kstat);
+	generic_fillattr(idmap, STATX_BASIC_STATS, d_inode(dentry),
+			 ksmbd_kstat->kstat);
 
 	time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->ctime);
 	ksmbd_kstat->create_time = time;
diff --git a/fs/stat.c b/fs/stat.c
index 8c2b30af19f5..7644e5997035 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -28,9 +28,10 @@
 
 /**
  * generic_fillattr - Fill in the basic attributes from the inode struct
- * @idmap:	idmap of the mount the inode was found from
- * @inode:	Inode to use as the source
- * @stat:	Where to fill in the attributes
+ * @idmap:		idmap of the mount the inode was found from
+ * @request_mask:	statx request_mask
+ * @inode:		Inode to use as the source
+ * @stat:		Where to fill in the attributes
  *
  * Fill in the basic attributes in the kstat structure from data that's to be
  * found on the VFS inode structure.  This is the default if no getattr inode
@@ -42,8 +43,8 @@
  * uid and gid filds. On non-idmapped mounts or if permission checking is to be
  * performed on the raw inode simply passs @nop_mnt_idmap.
  */
-void generic_fillattr(struct mnt_idmap *idmap, struct inode *inode,
-		      struct kstat *stat)
+void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask,
+		      struct inode *inode, struct kstat *stat)
 {
 	vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
 	vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
@@ -61,6 +62,12 @@ void generic_fillattr(struct mnt_idmap *idmap, struct inode *inode,
 	stat->ctime = inode_get_ctime(inode);
 	stat->blksize = i_blocksize(inode);
 	stat->blocks = inode->i_blocks;
+
+	if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
+		stat->result_mask |= STATX_CHANGE_COOKIE;
+		stat->change_cookie = inode_query_iversion(inode);
+	}
+
 }
 EXPORT_SYMBOL(generic_fillattr);
 
@@ -123,17 +130,12 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
 	stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
 				  STATX_ATTR_DAX);
 
-	if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
-		stat->result_mask |= STATX_CHANGE_COOKIE;
-		stat->change_cookie = inode_query_iversion(inode);
-	}
-
 	idmap = mnt_idmap(path->mnt);
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(idmap, path, stat,
 					    request_mask, query_flags);
 
-	generic_fillattr(idmap, inode, stat);
+	generic_fillattr(idmap, request_mask, inode, stat);
 	return 0;
 }
 EXPORT_SYMBOL(vfs_getattr_nosec);
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index dba6a2ef26f1..edb94e55de8e 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -449,7 +449,8 @@ int sysv_getattr(struct mnt_idmap *idmap, const struct path *path,
 		 struct kstat *stat, u32 request_mask, unsigned int flags)
 {
 	struct super_block *s = path->dentry->d_sb;
-	generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry),
+			 stat);
 	stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
 	stat->blksize = s->s_blocksize;
 	return 0;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 3a1ba8ba308a..2f48c58d47cd 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1654,7 +1654,7 @@ int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path,
 				STATX_ATTR_ENCRYPTED |
 				STATX_ATTR_IMMUTABLE);
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	stat->blksize = UBIFS_BLOCK_SIZE;
 	stat->size = ui->ui_size;
 
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 779b5c2c75f6..f7eaf7b14594 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -149,7 +149,7 @@ static int udf_symlink_getattr(struct mnt_idmap *idmap,
 	struct inode *inode = d_backing_inode(dentry);
 	struct page *page;
 
-	generic_fillattr(&nop_mnt_idmap, inode, stat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 	page = read_mapping_page(inode->i_mapping, 0, NULL);
 	if (IS_ERR(page))
 		return PTR_ERR(page);
diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
index 576b91d571c5..83f20dd15522 100644
--- a/fs/vboxsf/utils.c
+++ b/fs/vboxsf/utils.c
@@ -252,7 +252,7 @@ int vboxsf_getattr(struct mnt_idmap *idmap, const struct path *path,
 	if (err)
 		return err;
 
-	generic_fillattr(&nop_mnt_idmap, d_inode(dentry), kstat);
+	generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), kstat);
 	return 0;
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 61f27011fd04..85977cdeda94 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2917,7 +2917,7 @@ extern void page_put_link(void *);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_link(void *);
-void generic_fillattr(struct mnt_idmap *, struct inode *, struct kstat *);
+void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
 void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
 extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
 extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
diff --git a/mm/shmem.c b/mm/shmem.c
index 72129c101800..142ead70e8c1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1073,7 +1073,7 @@ static int shmem_getattr(struct mnt_idmap *idmap,
 	stat->attributes_mask |= (STATX_ATTR_APPEND |
 			STATX_ATTR_IMMUTABLE |
 			STATX_ATTR_NODUMP);
-	generic_fillattr(idmap, inode, stat);
+	generic_fillattr(idmap, request_mask, inode, stat);
 
 	if (shmem_is_huge(inode, 0, false, NULL, 0))
 		stat->blksize = HPAGE_PMD_SIZE;

From 541d4c798a598854fcce7326d947cbcbd35701d6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 7 Aug 2023 15:38:34 -0400
Subject: [PATCH 0869/2047] fs: drop the timespec64 arg from
 generic_update_time

In future patches we're going to change how the ctime is updated
to keep track of when it has been queried. The way that the update_time
operation works (and a lot of its callers) make this difficult, since
they grab a timestamp early and then pass it down to eventually be
copied into the inode.

All of the existing update_time callers pass in the result of
current_time() in some fashion. Drop the "time" parameter from
generic_update_time, and rework it to fetch its own timestamp.

This change means that an update_time could fetch a different timestamp
than was seen in inode_needs_update_time. update_time is only ever
called with one of two flag combinations: Either S_ATIME is set, or
S_MTIME|S_CTIME|S_VERSION are set.

With this change we now treat the flags argument as an indicator that
some value needed to be updated when last checked, rather than an
indication to update specific timestamps.

Rework the logic for updating the timestamps and put it in a new
inode_update_timestamps helper that other update_time routines can use.
S_ATIME is as treated as we always have, but if any of the other three
are set, then we attempt to update all three.

Also, some callers of generic_update_time need to know what timestamps
were actually updated. Change it to return an S_* flag mask to indicate
that and rework the callers to expect it.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230807-mgctime-v7-3-d1dec143a704@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/gfs2/inode.c     |  3 +-
 fs/inode.c          | 82 +++++++++++++++++++++++++++++++++++----------
 fs/orangefs/inode.c |  3 +-
 fs/ubifs/file.c     |  6 ++--
 fs/xfs/xfs_iops.c   |  6 ++--
 include/linux/fs.h  |  3 +-
 6 files changed, 79 insertions(+), 24 deletions(-)

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 200cabf3b393..f1f04557aa21 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -2155,7 +2155,8 @@ static int gfs2_update_time(struct inode *inode, struct timespec64 *time,
 		if (error)
 			return error;
 	}
-	return generic_update_time(inode, time, flags);
+	generic_update_time(inode, flags);
+	return 0;
 }
 
 static const struct inode_operations gfs2_file_iops = {
diff --git a/fs/inode.c b/fs/inode.c
index 3fc251bfaf73..e07e45f6cd01 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1881,29 +1881,76 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
 	return 0;
 }
 
-int generic_update_time(struct inode *inode, struct timespec64 *time, int flags)
+/**
+ * inode_update_timestamps - update the timestamps on the inode
+ * @inode: inode to be updated
+ * @flags: S_* flags that needed to be updated
+ *
+ * The update_time function is called when an inode's timestamps need to be
+ * updated for a read or write operation. This function handles updating the
+ * actual timestamps. It's up to the caller to ensure that the inode is marked
+ * dirty appropriately.
+ *
+ * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated,
+ * attempt to update all three of them. S_ATIME updates can be handled
+ * independently of the rest.
+ *
+ * Returns a set of S_* flags indicating which values changed.
+ */
+int inode_update_timestamps(struct inode *inode, int flags)
 {
-	int dirty_flags = 0;
+	int updated = 0;
+	struct timespec64 now;
 
-	if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
-		if (flags & S_ATIME)
-			inode->i_atime = *time;
-		if (flags & S_CTIME)
-			inode_set_ctime_to_ts(inode, *time);
-		if (flags & S_MTIME)
-			inode->i_mtime = *time;
+	if (flags & (S_MTIME|S_CTIME|S_VERSION)) {
+		struct timespec64 ctime = inode_get_ctime(inode);
 
-		if (inode->i_sb->s_flags & SB_LAZYTIME)
-			dirty_flags |= I_DIRTY_TIME;
-		else
-			dirty_flags |= I_DIRTY_SYNC;
+		now = inode_set_ctime_current(inode);
+		if (!timespec64_equal(&now, &ctime))
+			updated |= S_CTIME;
+		if (!timespec64_equal(&now, &inode->i_mtime)) {
+			inode->i_mtime = now;
+			updated |= S_MTIME;
+		}
+		if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated))
+			updated |= S_VERSION;
+	} else {
+		now = current_time(inode);
 	}
 
-	if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
-		dirty_flags |= I_DIRTY_SYNC;
+	if (flags & S_ATIME) {
+		if (!timespec64_equal(&now, &inode->i_atime)) {
+			inode->i_atime = now;
+			updated |= S_ATIME;
+		}
+	}
+	return updated;
+}
+EXPORT_SYMBOL(inode_update_timestamps);
 
+/**
+ * generic_update_time - update the timestamps on the inode
+ * @inode: inode to be updated
+ * @flags: S_* flags that needed to be updated
+ *
+ * The update_time function is called when an inode's timestamps need to be
+ * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME,
+ * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME
+ * updates can be handled done independently of the rest.
+ *
+ * Returns a S_* mask indicating which fields were updated.
+ */
+int generic_update_time(struct inode *inode, int flags)
+{
+	int updated = inode_update_timestamps(inode, flags);
+	int dirty_flags = 0;
+
+	if (updated & (S_ATIME|S_MTIME|S_CTIME))
+		dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC;
+	if (updated & S_VERSION)
+		dirty_flags |= I_DIRTY_SYNC;
 	__mark_inode_dirty(inode, dirty_flags);
-	return 0;
+	return updated;
 }
 EXPORT_SYMBOL(generic_update_time);
 
@@ -1915,7 +1962,8 @@ int inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
 {
 	if (inode->i_op->update_time)
 		return inode->i_op->update_time(inode, time, flags);
-	return generic_update_time(inode, time, flags);
+	generic_update_time(inode, flags);
+	return 0;
 }
 EXPORT_SYMBOL(inode_update_time);
 
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index a52c30e80f45..3afa2a69bc63 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -903,9 +903,10 @@ int orangefs_permission(struct mnt_idmap *idmap,
 int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags)
 {
 	struct iattr iattr;
+
 	gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_update_time: %pU\n",
 	    get_khandle_from_ino(inode));
-	generic_update_time(inode, time, flags);
+	flags = generic_update_time(inode, flags);
 	memset(&iattr, 0, sizeof iattr);
         if (flags & S_ATIME)
 		iattr.ia_valid |= ATTR_ATIME;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 436b27d7c58f..df9086b19cd0 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1387,8 +1387,10 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time,
 			.dirtied_ino_d = ALIGN(ui->data_len, 8) };
 	int err, release;
 
-	if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT))
-		return generic_update_time(inode, time, flags);
+	if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT)) {
+		generic_update_time(inode, flags);
+		return 0;
+	}
 
 	err = ubifs_budget_space(c, &req);
 	if (err)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 3a9363953ef2..731f45391baa 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1042,8 +1042,10 @@ xfs_vn_update_time(
 
 	if (inode->i_sb->s_flags & SB_LAZYTIME) {
 		if (!((flags & S_VERSION) &&
-		      inode_maybe_inc_iversion(inode, false)))
-			return generic_update_time(inode, now, flags);
+		      inode_maybe_inc_iversion(inode, false))) {
+			generic_update_time(inode, flags);
+			return 0;
+		}
 
 		/* Capture the iversion update that just occurred */
 		log_flags |= XFS_ILOG_CORE;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 85977cdeda94..bb3c2c4f871f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2343,7 +2343,8 @@ extern int current_umask(void);
 
 extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
-extern int generic_update_time(struct inode *, struct timespec64 *, int);
+int inode_update_timestamps(struct inode *inode, int flags);
+int generic_update_time(struct inode *, int);
 
 /* /sys/fs */
 extern struct kobject *fs_kobj;

From bb7cc0a62e47b5810ed2721fc7eaaf709e42b966 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 7 Aug 2023 15:38:35 -0400
Subject: [PATCH 0870/2047] btrfs: have it use inode_update_timestamps

In later patches, we're going to drop the "now" argument from the
update_time operation. Have btrfs_update_time use the new
inode_update_timestamps helper to fetch a new timestamp and update it
properly.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230807-mgctime-v7-4-d1dec143a704@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/btrfs/inode.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 29a20f828dda..d52e7d64570a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6068,14 +6068,7 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
 	if (btrfs_root_readonly(root))
 		return -EROFS;
 
-	if (flags & S_VERSION)
-		dirty |= inode_maybe_inc_iversion(inode, dirty);
-	if (flags & S_CTIME)
-		inode_set_ctime_to_ts(inode, *now);
-	if (flags & S_MTIME)
-		inode->i_mtime = *now;
-	if (flags & S_ATIME)
-		inode->i_atime = *now;
+	dirty = inode_update_timestamps(inode, flags);
 	return dirty ? btrfs_dirty_inode(BTRFS_I(inode)) : 0;
 }
 

From c7e263ab45b95170fe7d4b818bab484268b7bba8 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Tue, 25 Jul 2023 16:45:04 +0200
Subject: [PATCH 0871/2047] shmem: make shmem_inode_acct_block() return error

Make shmem_inode_acct_block() return proper error code instead of bool.
This will be useful later when we introduce quota support.

There should be no functional change.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230725144510.253763-2-cem@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 mm/shmem.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index f5af4b943e42..be71cdcaeb14 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -199,13 +199,14 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
 		vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
 }
 
-static inline bool shmem_inode_acct_block(struct inode *inode, long pages)
+static inline int shmem_inode_acct_block(struct inode *inode, long pages)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	int err = -ENOSPC;
 
 	if (shmem_acct_block(info->flags, pages))
-		return false;
+		return err;
 
 	if (sbinfo->max_blocks) {
 		if (percpu_counter_compare(&sbinfo->used_blocks,
@@ -214,11 +215,11 @@ static inline bool shmem_inode_acct_block(struct inode *inode, long pages)
 		percpu_counter_add(&sbinfo->used_blocks, pages);
 	}
 
-	return true;
+	return 0;
 
 unacct:
 	shmem_unacct_blocks(info->flags, pages);
-	return false;
+	return err;
 }
 
 static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
@@ -370,7 +371,7 @@ bool shmem_charge(struct inode *inode, long pages)
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	unsigned long flags;
 
-	if (!shmem_inode_acct_block(inode, pages))
+	if (shmem_inode_acct_block(inode, pages))
 		return false;
 
 	/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
@@ -1588,13 +1589,14 @@ static struct folio *shmem_alloc_and_acct_folio(gfp_t gfp, struct inode *inode,
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct folio *folio;
 	int nr;
-	int err = -ENOSPC;
+	int err;
 
 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
 		huge = false;
 	nr = huge ? HPAGE_PMD_NR : 1;
 
-	if (!shmem_inode_acct_block(inode, nr))
+	err = shmem_inode_acct_block(inode, nr);
+	if (err)
 		goto failed;
 
 	if (huge)
@@ -2445,7 +2447,7 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
 	int ret;
 	pgoff_t max_off;
 
-	if (!shmem_inode_acct_block(inode, 1)) {
+	if (shmem_inode_acct_block(inode, 1)) {
 		/*
 		 * We may have got a page, returned -ENOENT triggering a retry,
 		 * and now we find ourselves with -ENOMEM. Release the page, to

From 71480663b751de1bdfafaa2f668ceabaea78b68b Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cem@kernel.org>
Date: Tue, 25 Jul 2023 16:45:05 +0200
Subject: [PATCH 0872/2047] shmem: make shmem_get_inode() return ERR_PTR
 instead of NULL

Make shmem_get_inode() return ERR_PTR instead of NULL on error. This will be
useful later when we introduce quota support.

There should be no functional change.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230725144510.253763-3-cem@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 mm/shmem.c | 211 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 119 insertions(+), 92 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index be71cdcaeb14..e89c35c65586 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2365,67 +2365,74 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
 	struct shmem_inode_info *info;
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 	ino_t ino;
+	int err;
+
+	err = shmem_reserve_inode(sb, &ino);
+	if (err)
+		return ERR_PTR(err);
 
-	if (shmem_reserve_inode(sb, &ino))
-		return NULL;
 
 	inode = new_inode(sb);
-	if (inode) {
-		inode->i_ino = ino;
-		inode_init_owner(idmap, inode, dir, mode);
-		inode->i_blocks = 0;
-		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
-		inode->i_generation = get_random_u32();
-		info = SHMEM_I(inode);
-		memset(info, 0, (char *)inode - (char *)info);
-		spin_lock_init(&info->lock);
-		atomic_set(&info->stop_eviction, 0);
-		info->seals = F_SEAL_SEAL;
-		info->flags = flags & VM_NORESERVE;
-		info->i_crtime = inode->i_mtime;
-		info->fsflags = (dir == NULL) ? 0 :
-			SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
-		if (info->fsflags)
-			shmem_set_inode_flags(inode, info->fsflags);
-		INIT_LIST_HEAD(&info->shrinklist);
-		INIT_LIST_HEAD(&info->swaplist);
-		if (sbinfo->noswap)
-			mapping_set_unevictable(inode->i_mapping);
-		simple_xattrs_init(&info->xattrs);
-		cache_no_acl(inode);
-		mapping_set_large_folios(inode->i_mapping);
 
-		switch (mode & S_IFMT) {
-		default:
-			inode->i_op = &shmem_special_inode_operations;
-			init_special_inode(inode, mode, dev);
-			break;
-		case S_IFREG:
-			inode->i_mapping->a_ops = &shmem_aops;
-			inode->i_op = &shmem_inode_operations;
-			inode->i_fop = &shmem_file_operations;
-			mpol_shared_policy_init(&info->policy,
-						 shmem_get_sbmpol(sbinfo));
-			break;
-		case S_IFDIR:
-			inc_nlink(inode);
-			/* Some things misbehave if size == 0 on a directory */
-			inode->i_size = 2 * BOGO_DIRENT_SIZE;
-			inode->i_op = &shmem_dir_inode_operations;
-			inode->i_fop = &simple_dir_operations;
-			break;
-		case S_IFLNK:
-			/*
-			 * Must not load anything in the rbtree,
-			 * mpol_free_shared_policy will not be called.
-			 */
-			mpol_shared_policy_init(&info->policy, NULL);
-			break;
-		}
-
-		lockdep_annotate_inode_mutex_key(inode);
-	} else
+	if (!inode) {
 		shmem_free_inode(sb);
+		return ERR_PTR(-ENOSPC);
+	}
+
+	inode->i_ino = ino;
+	inode_init_owner(idmap, inode, dir, mode);
+	inode->i_blocks = 0;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	inode->i_generation = get_random_u32();
+	info = SHMEM_I(inode);
+	memset(info, 0, (char *)inode - (char *)info);
+	spin_lock_init(&info->lock);
+	atomic_set(&info->stop_eviction, 0);
+	info->seals = F_SEAL_SEAL;
+	info->flags = flags & VM_NORESERVE;
+	info->i_crtime = inode->i_mtime;
+	info->fsflags = (dir == NULL) ? 0 :
+		SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
+	if (info->fsflags)
+		shmem_set_inode_flags(inode, info->fsflags);
+	INIT_LIST_HEAD(&info->shrinklist);
+	INIT_LIST_HEAD(&info->swaplist);
+	INIT_LIST_HEAD(&info->swaplist);
+	if (sbinfo->noswap)
+		mapping_set_unevictable(inode->i_mapping);
+	simple_xattrs_init(&info->xattrs);
+	cache_no_acl(inode);
+	mapping_set_large_folios(inode->i_mapping);
+
+	switch (mode & S_IFMT) {
+	default:
+		inode->i_op = &shmem_special_inode_operations;
+		init_special_inode(inode, mode, dev);
+		break;
+	case S_IFREG:
+		inode->i_mapping->a_ops = &shmem_aops;
+		inode->i_op = &shmem_inode_operations;
+		inode->i_fop = &shmem_file_operations;
+		mpol_shared_policy_init(&info->policy,
+					 shmem_get_sbmpol(sbinfo));
+		break;
+	case S_IFDIR:
+		inc_nlink(inode);
+		/* Some things misbehave if size == 0 on a directory */
+		inode->i_size = 2 * BOGO_DIRENT_SIZE;
+		inode->i_op = &shmem_dir_inode_operations;
+		inode->i_fop = &simple_dir_operations;
+		break;
+	case S_IFLNK:
+		/*
+		 * Must not load anything in the rbtree,
+		 * mpol_free_shared_policy will not be called.
+		 */
+		mpol_shared_policy_init(&info->policy, NULL);
+		break;
+	}
+
+	lockdep_annotate_inode_mutex_key(inode);
 	return inode;
 }
 
@@ -3074,27 +3081,30 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	    struct dentry *dentry, umode_t mode, dev_t dev)
 {
 	struct inode *inode;
-	int error = -ENOSPC;
+	int error;
 
 	inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE);
-	if (inode) {
-		error = simple_acl_create(dir, inode);
-		if (error)
-			goto out_iput;
-		error = security_inode_init_security(inode, dir,
-						     &dentry->d_name,
-						     shmem_initxattrs, NULL);
-		if (error && error != -EOPNOTSUPP)
-			goto out_iput;
 
-		error = 0;
-		dir->i_size += BOGO_DIRENT_SIZE;
-		dir->i_ctime = dir->i_mtime = current_time(dir);
-		inode_inc_iversion(dir);
-		d_instantiate(dentry, inode);
-		dget(dentry); /* Extra count - pin the dentry in core */
-	}
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	error = simple_acl_create(dir, inode);
+	if (error)
+		goto out_iput;
+	error = security_inode_init_security(inode, dir,
+					     &dentry->d_name,
+					     shmem_initxattrs, NULL);
+	if (error && error != -EOPNOTSUPP)
+		goto out_iput;
+
+	error = 0;
+	dir->i_size += BOGO_DIRENT_SIZE;
+	dir->i_ctime = dir->i_mtime = current_time(dir);
+	inode_inc_iversion(dir);
+	d_instantiate(dentry, inode);
+	dget(dentry); /* Extra count - pin the dentry in core */
 	return error;
+
 out_iput:
 	iput(inode);
 	return error;
@@ -3105,20 +3115,26 @@ shmem_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
 	      struct file *file, umode_t mode)
 {
 	struct inode *inode;
-	int error = -ENOSPC;
+	int error;
 
 	inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE);
-	if (inode) {
-		error = security_inode_init_security(inode, dir,
-						     NULL,
-						     shmem_initxattrs, NULL);
-		if (error && error != -EOPNOTSUPP)
-			goto out_iput;
-		error = simple_acl_create(dir, inode);
-		if (error)
-			goto out_iput;
-		d_tmpfile(file, inode);
+
+	if (IS_ERR(inode)) {
+		error = PTR_ERR(inode);
+		goto err_out;
 	}
+
+	error = security_inode_init_security(inode, dir,
+					     NULL,
+					     shmem_initxattrs, NULL);
+	if (error && error != -EOPNOTSUPP)
+		goto out_iput;
+	error = simple_acl_create(dir, inode);
+	if (error)
+		goto out_iput;
+	d_tmpfile(file, inode);
+
+err_out:
 	return finish_open_simple(file, error);
 out_iput:
 	iput(inode);
@@ -3293,8 +3309,9 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 
 	inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0,
 				VM_NORESERVE);
-	if (!inode)
-		return -ENOSPC;
+
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
 
 	error = security_inode_init_security(inode, dir, &dentry->d_name,
 					     shmem_initxattrs, NULL);
@@ -3932,12 +3949,13 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 	struct shmem_options *ctx = fc->fs_private;
 	struct inode *inode;
 	struct shmem_sb_info *sbinfo;
+	int error = -ENOMEM;
 
 	/* Round up to L1_CACHE_BYTES to resist false sharing */
 	sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
 				L1_CACHE_BYTES), GFP_KERNEL);
 	if (!sbinfo)
-		return -ENOMEM;
+		return error;
 
 	sb->s_fs_info = sbinfo;
 
@@ -4000,8 +4018,10 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 
 	inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL, S_IFDIR | sbinfo->mode, 0,
 				VM_NORESERVE);
-	if (!inode)
+	if (IS_ERR(inode)) {
+		error = PTR_ERR(inode);
 		goto failed;
+	}
 	inode->i_uid = sbinfo->uid;
 	inode->i_gid = sbinfo->gid;
 	sb->s_root = d_make_root(inode);
@@ -4011,7 +4031,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 
 failed:
 	shmem_put_super(sb);
-	return -ENOMEM;
+	return error;
 }
 
 static int shmem_get_tree(struct fs_context *fc)
@@ -4380,10 +4400,16 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
 #define shmem_vm_ops				generic_file_vm_ops
 #define shmem_anon_vm_ops			generic_file_vm_ops
 #define shmem_file_operations			ramfs_file_operations
-#define shmem_get_inode(idmap, sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
 #define shmem_acct_size(flags, size)		0
 #define shmem_unacct_size(flags, size)		do {} while (0)
 
+static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb, struct inode *dir,
+					    umode_t mode, dev_t dev, unsigned long flags)
+{
+	struct inode *inode = ramfs_get_inode(sb, dir, mode, dev);
+	return inode ? inode : ERR_PTR(-ENOSPC);
+}
+
 #endif /* CONFIG_SHMEM */
 
 /* common code */
@@ -4408,9 +4434,10 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l
 
 	inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
 				S_IFREG | S_IRWXUGO, 0, flags);
-	if (unlikely(!inode)) {
+
+	if (IS_ERR(inode)) {
 		shmem_unacct_size(flags, size);
-		return ERR_PTR(-ENOSPC);
+		return ERR_CAST(inode);
 	}
 	inode->i_flags |= i_flags;
 	inode->i_size = size;

From 86be6b8bd834bece87e6adc5313cb41e1ba7fe81 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Jul 2023 16:45:06 +0200
Subject: [PATCH 0873/2047] quota: Check presence of quota operation structures
 instead of ->quota_read and ->quota_write callbacks

Currently we check whether superblock has ->quota_read and ->quota_write
operations to check whether filesystem supports quotas. However for
example for shmfs we will not read or write dquots so check whether
quota operations are set in the superblock instead.

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Message-Id: <20230725144510.253763-4-cem@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/quota/dquot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index e3e4f4047657..4d826c369da2 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2367,7 +2367,7 @@ int dquot_load_quota_sb(struct super_block *sb, int type, int format_id,
 
 	if (!fmt)
 		return -ESRCH;
-	if (!sb->s_op->quota_write || !sb->s_op->quota_read ||
+	if (!sb->dq_op || !sb->s_qcop ||
 	    (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) {
 		error = -EINVAL;
 		goto out_fmt;

From eafc474e202978ac735c551d5ee1eb8c02e2be54 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cem@kernel.org>
Date: Tue, 25 Jul 2023 16:45:07 +0200
Subject: [PATCH 0874/2047] shmem: prepare shmem quota infrastructure

Add new shmem quota format, its quota_format_ops together with
dquot_operations

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230725144510.253763-5-cem@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/Kconfig                 |  12 ++
 include/linux/shmem_fs.h   |  12 ++
 include/uapi/linux/quota.h |   1 +
 mm/Makefile                |   2 +-
 mm/shmem_quota.c           | 318 +++++++++++++++++++++++++++++++++++++
 5 files changed, 344 insertions(+), 1 deletion(-)
 create mode 100644 mm/shmem_quota.c

diff --git a/fs/Kconfig b/fs/Kconfig
index 18d034ec7953..8218a71933f9 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -233,6 +233,18 @@ config TMPFS_INODE64
 
 	  If unsure, say N.
 
+config TMPFS_QUOTA
+	bool "Tmpfs quota support"
+	depends on TMPFS
+	select QUOTA
+	help
+	  Quota support allows to set per user and group limits for tmpfs
+	  usage.  Say Y to enable quota support. Once enabled you can control
+	  user and group quota enforcement with quota, usrquota and grpquota
+	  mount options.
+
+	  If unsure, say N.
+
 config ARCH_SUPPORTS_HUGETLBFS
 	def_bool n
 
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 9029abd29b1c..7abfaf70b58a 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -13,6 +13,10 @@
 
 /* inode in-kernel data */
 
+#ifdef CONFIG_TMPFS_QUOTA
+#define SHMEM_MAXQUOTAS 2
+#endif
+
 struct shmem_inode_info {
 	spinlock_t		lock;
 	unsigned int		seals;		/* shmem seals */
@@ -172,4 +176,12 @@ extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
 #endif /* CONFIG_SHMEM */
 #endif /* CONFIG_USERFAULTFD */
 
+/*
+ * Used space is stored as unsigned 64-bit value in bytes but
+ * quota core supports only signed 64-bit values so use that
+ * as a limit
+ */
+#define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */
+#define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL
+
 #endif
diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h
index f17c9636a859..52090105b828 100644
--- a/include/uapi/linux/quota.h
+++ b/include/uapi/linux/quota.h
@@ -77,6 +77,7 @@
 #define	QFMT_VFS_V0 2
 #define QFMT_OCFS2 3
 #define	QFMT_VFS_V1 4
+#define	QFMT_SHMEM 5
 
 /* Size of block in which space limits are passed through the quota
  * interface */
diff --git a/mm/Makefile b/mm/Makefile
index 678530a07326..d4ee20988dd1 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -51,7 +51,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   util.o mmzone.o vmstat.o backing-dev.o \
 			   mm_init.o percpu.o slab_common.o \
-			   compaction.o show_mem.o\
+			   compaction.o show_mem.o shmem_quota.o\
 			   interval_tree.o list_lru.o workingset.o \
 			   debug.o gup.o mmap_lock.o $(mmu-y)
 
diff --git a/mm/shmem_quota.c b/mm/shmem_quota.c
new file mode 100644
index 000000000000..e92b8ece9880
--- /dev/null
+++ b/mm/shmem_quota.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * In memory quota format relies on quota infrastructure to store dquot
+ * information for us. While conventional quota formats for file systems
+ * with persistent storage can load quota information into dquot from the
+ * storage on-demand and hence quota dquot shrinker can free any dquot
+ * that is not currently being used, it must be avoided here. Otherwise we
+ * can lose valuable information, user provided limits, because there is
+ * no persistent storage to load the information from afterwards.
+ *
+ * One information that in-memory quota format needs to keep track of is
+ * a sorted list of ids for each quota type. This is done by utilizing
+ * an rb tree which root is stored in mem_dqinfo->dqi_priv for each quota
+ * type.
+ *
+ * This format can be used to support quota on file system without persistent
+ * storage such as tmpfs.
+ *
+ * Author:	Lukas Czerner <lczerner@redhat.com>
+ *		Carlos Maiolino <cmaiolino@redhat.com>
+ *
+ * Copyright (C) 2023 Red Hat, Inc.
+ */
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+#include <linux/shmem_fs.h>
+
+#include <linux/quotaops.h>
+#include <linux/quota.h>
+
+#ifdef CONFIG_TMPFS_QUOTA
+
+/*
+ * The following constants define the amount of time given a user
+ * before the soft limits are treated as hard limits (usually resulting
+ * in an allocation failure). The timer is started when the user crosses
+ * their soft limit, it is reset when they go below their soft limit.
+ */
+#define SHMEM_MAX_IQ_TIME 604800	/* (7*24*60*60) 1 week */
+#define SHMEM_MAX_DQ_TIME 604800	/* (7*24*60*60) 1 week */
+
+struct quota_id {
+	struct rb_node	node;
+	qid_t		id;
+	qsize_t		bhardlimit;
+	qsize_t		bsoftlimit;
+	qsize_t		ihardlimit;
+	qsize_t		isoftlimit;
+};
+
+static int shmem_check_quota_file(struct super_block *sb, int type)
+{
+	/* There is no real quota file, nothing to do */
+	return 1;
+}
+
+/*
+ * There is no real quota file. Just allocate rb_root for quota ids and
+ * set limits
+ */
+static int shmem_read_file_info(struct super_block *sb, int type)
+{
+	struct quota_info *dqopt = sb_dqopt(sb);
+	struct mem_dqinfo *info = &dqopt->info[type];
+
+	info->dqi_priv = kzalloc(sizeof(struct rb_root), GFP_NOFS);
+	if (!info->dqi_priv)
+		return -ENOMEM;
+
+	info->dqi_max_spc_limit = SHMEM_QUOTA_MAX_SPC_LIMIT;
+	info->dqi_max_ino_limit = SHMEM_QUOTA_MAX_INO_LIMIT;
+
+	info->dqi_bgrace = SHMEM_MAX_DQ_TIME;
+	info->dqi_igrace = SHMEM_MAX_IQ_TIME;
+	info->dqi_flags = 0;
+
+	return 0;
+}
+
+static int shmem_write_file_info(struct super_block *sb, int type)
+{
+	/* There is no real quota file, nothing to do */
+	return 0;
+}
+
+/*
+ * Free all the quota_id entries in the rb tree and rb_root.
+ */
+static int shmem_free_file_info(struct super_block *sb, int type)
+{
+	struct mem_dqinfo *info = &sb_dqopt(sb)->info[type];
+	struct rb_root *root = info->dqi_priv;
+	struct quota_id *entry;
+	struct rb_node *node;
+
+	info->dqi_priv = NULL;
+	node = rb_first(root);
+	while (node) {
+		entry = rb_entry(node, struct quota_id, node);
+		node = rb_next(&entry->node);
+
+		rb_erase(&entry->node, root);
+		kfree(entry);
+	}
+
+	kfree(root);
+	return 0;
+}
+
+static int shmem_get_next_id(struct super_block *sb, struct kqid *qid)
+{
+	struct mem_dqinfo *info = sb_dqinfo(sb, qid->type);
+	struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node;
+	qid_t id = from_kqid(&init_user_ns, *qid);
+	struct quota_info *dqopt = sb_dqopt(sb);
+	struct quota_id *entry = NULL;
+	int ret = 0;
+
+	if (!sb_has_quota_active(sb, qid->type))
+		return -ESRCH;
+
+	down_read(&dqopt->dqio_sem);
+	while (node) {
+		entry = rb_entry(node, struct quota_id, node);
+
+		if (id < entry->id)
+			node = node->rb_left;
+		else if (id > entry->id)
+			node = node->rb_right;
+		else
+			goto got_next_id;
+	}
+
+	if (!entry) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	if (id > entry->id) {
+		node = rb_next(&entry->node);
+		if (!node) {
+			ret = -ENOENT;
+			goto out_unlock;
+		}
+		entry = rb_entry(node, struct quota_id, node);
+	}
+
+got_next_id:
+	*qid = make_kqid(&init_user_ns, qid->type, entry->id);
+out_unlock:
+	up_read(&dqopt->dqio_sem);
+	return ret;
+}
+
+/*
+ * Load dquot with limits from existing entry, or create the new entry if
+ * it does not exist.
+ */
+static int shmem_acquire_dquot(struct dquot *dquot)
+{
+	struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type);
+	struct rb_node **n = &((struct rb_root *)info->dqi_priv)->rb_node;
+	struct rb_node *parent = NULL, *new_node = NULL;
+	struct quota_id *new_entry, *entry;
+	qid_t id = from_kqid(&init_user_ns, dquot->dq_id);
+	struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+	int ret = 0;
+
+	mutex_lock(&dquot->dq_lock);
+
+	down_write(&dqopt->dqio_sem);
+	while (*n) {
+		parent = *n;
+		entry = rb_entry(parent, struct quota_id, node);
+
+		if (id < entry->id)
+			n = &(*n)->rb_left;
+		else if (id > entry->id)
+			n = &(*n)->rb_right;
+		else
+			goto found;
+	}
+
+	/* We don't have entry for this id yet, create it */
+	new_entry = kzalloc(sizeof(struct quota_id), GFP_NOFS);
+	if (!new_entry) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	new_entry->id = id;
+	new_node = &new_entry->node;
+	rb_link_node(new_node, parent, n);
+	rb_insert_color(new_node, (struct rb_root *)info->dqi_priv);
+	entry = new_entry;
+
+found:
+	/* Load the stored limits from the tree */
+	spin_lock(&dquot->dq_dqb_lock);
+	dquot->dq_dqb.dqb_bhardlimit = entry->bhardlimit;
+	dquot->dq_dqb.dqb_bsoftlimit = entry->bsoftlimit;
+	dquot->dq_dqb.dqb_ihardlimit = entry->ihardlimit;
+	dquot->dq_dqb.dqb_isoftlimit = entry->isoftlimit;
+
+	if (!dquot->dq_dqb.dqb_bhardlimit &&
+	    !dquot->dq_dqb.dqb_bsoftlimit &&
+	    !dquot->dq_dqb.dqb_ihardlimit &&
+	    !dquot->dq_dqb.dqb_isoftlimit)
+		set_bit(DQ_FAKE_B, &dquot->dq_flags);
+	spin_unlock(&dquot->dq_dqb_lock);
+
+	/* Make sure flags update is visible after dquot has been filled */
+	smp_mb__before_atomic();
+	set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
+out_unlock:
+	up_write(&dqopt->dqio_sem);
+	mutex_unlock(&dquot->dq_lock);
+	return ret;
+}
+
+/*
+ * Store limits from dquot in the tree unless it's fake. If it is fake
+ * remove the id from the tree since there is no useful information in
+ * there.
+ */
+static int shmem_release_dquot(struct dquot *dquot)
+{
+	struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type);
+	struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node;
+	qid_t id = from_kqid(&init_user_ns, dquot->dq_id);
+	struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+	struct quota_id *entry = NULL;
+
+	mutex_lock(&dquot->dq_lock);
+	/* Check whether we are not racing with some other dqget() */
+	if (dquot_is_busy(dquot))
+		goto out_dqlock;
+
+	down_write(&dqopt->dqio_sem);
+	while (node) {
+		entry = rb_entry(node, struct quota_id, node);
+
+		if (id < entry->id)
+			node = node->rb_left;
+		else if (id > entry->id)
+			node = node->rb_right;
+		else
+			goto found;
+	}
+
+	/* We should always find the entry in the rb tree */
+	WARN_ONCE(1, "quota id %u from dquot %p, not in rb tree!\n", id, dquot);
+	up_write(&dqopt->dqio_sem);
+	mutex_unlock(&dquot->dq_lock);
+	return -ENOENT;
+
+found:
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags)) {
+		/* Remove entry from the tree */
+		rb_erase(&entry->node, info->dqi_priv);
+		kfree(entry);
+	} else {
+		/* Store the limits in the tree */
+		spin_lock(&dquot->dq_dqb_lock);
+		entry->bhardlimit = dquot->dq_dqb.dqb_bhardlimit;
+		entry->bsoftlimit = dquot->dq_dqb.dqb_bsoftlimit;
+		entry->ihardlimit = dquot->dq_dqb.dqb_ihardlimit;
+		entry->isoftlimit = dquot->dq_dqb.dqb_isoftlimit;
+		spin_unlock(&dquot->dq_dqb_lock);
+	}
+
+	clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
+	up_write(&dqopt->dqio_sem);
+
+out_dqlock:
+	mutex_unlock(&dquot->dq_lock);
+	return 0;
+}
+
+static int shmem_mark_dquot_dirty(struct dquot *dquot)
+{
+	return 0;
+}
+
+static int shmem_dquot_write_info(struct super_block *sb, int type)
+{
+	return 0;
+}
+
+static const struct quota_format_ops shmem_format_ops = {
+	.check_quota_file	= shmem_check_quota_file,
+	.read_file_info		= shmem_read_file_info,
+	.write_file_info	= shmem_write_file_info,
+	.free_file_info		= shmem_free_file_info,
+};
+
+struct quota_format_type shmem_quota_format = {
+	.qf_fmt_id = QFMT_SHMEM,
+	.qf_ops = &shmem_format_ops,
+	.qf_owner = THIS_MODULE
+};
+
+const struct dquot_operations shmem_quota_operations = {
+	.acquire_dquot		= shmem_acquire_dquot,
+	.release_dquot		= shmem_release_dquot,
+	.alloc_dquot		= dquot_alloc,
+	.destroy_dquot		= dquot_destroy,
+	.write_info		= shmem_dquot_write_info,
+	.mark_dirty		= shmem_mark_dquot_dirty,
+	.get_next_id		= shmem_get_next_id,
+};
+#endif /* CONFIG_TMPFS_QUOTA */

From e09764cff44b5d31c2ca5477444565e3080637d2 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cem@kernel.org>
Date: Tue, 25 Jul 2023 16:45:08 +0200
Subject: [PATCH 0875/2047] shmem: quota support

Now the basic infra-structure is in place, enable quota support for tmpfs.

This offers user and group quotas to tmpfs (project quotas will be added
later). Also, as other filesystems, the tmpfs quota is not supported
within user namespaces yet, so idmapping is not translated.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230725144510.253763-6-cem@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/tmpfs.rst |  15 +++
 include/linux/shmem_fs.h            |   8 ++
 mm/shmem.c                          | 186 ++++++++++++++++++++++++++--
 3 files changed, 201 insertions(+), 8 deletions(-)

diff --git a/Documentation/filesystems/tmpfs.rst b/Documentation/filesystems/tmpfs.rst
index 2cd8fa332feb..06d6195eaa2c 100644
--- a/Documentation/filesystems/tmpfs.rst
+++ b/Documentation/filesystems/tmpfs.rst
@@ -123,6 +123,21 @@ sysfs file /sys/kernel/mm/transparent_hugepage/shmem_enabled: which can
 be used to deny huge pages on all tmpfs mounts in an emergency, or to
 force huge pages on all tmpfs mounts for testing.
 
+tmpfs also supports quota with the following mount options
+
+========  =============================================================
+quota     User and group quota accounting and enforcement is enabled on
+          the mount. Tmpfs is using hidden system quota files that are
+          initialized on mount.
+usrquota  User quota accounting and enforcement is enabled on the
+          mount.
+grpquota  Group quota accounting and enforcement is enabled on the
+          mount.
+========  =============================================================
+
+Note that tmpfs quotas do not support user namespaces so no uid/gid
+translation is done if quotas are enabled inside user namespaces.
+
 tmpfs has a mount option to set the NUMA memory allocation policy for
 all files in that instance (if CONFIG_NUMA is enabled) - which can be
 adjusted on the fly via 'mount -o remount ...'
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 7abfaf70b58a..1a568a0f542f 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -31,6 +31,9 @@ struct shmem_inode_info {
 	atomic_t		stop_eviction;	/* hold when working on inode */
 	struct timespec64	i_crtime;	/* file creation time */
 	unsigned int		fsflags;	/* flags for FS_IOC_[SG]ETFLAGS */
+#ifdef CONFIG_TMPFS_QUOTA
+	struct dquot		*i_dquot[MAXQUOTAS];
+#endif
 	struct inode		vfs_inode;
 };
 
@@ -184,4 +187,9 @@ extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
 #define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */
 #define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL
 
+#ifdef CONFIG_TMPFS_QUOTA
+extern const struct dquot_operations shmem_quota_operations;
+extern struct quota_format_type shmem_quota_format;
+#endif /* CONFIG_TMPFS_QUOTA */
+
 #endif
diff --git a/mm/shmem.c b/mm/shmem.c
index e89c35c65586..ec76ce9aea5f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -78,6 +78,7 @@ static struct vfsmount *shm_mnt;
 #include <uapi/linux/memfd.h>
 #include <linux/rmap.h>
 #include <linux/uuid.h>
+#include <linux/quotaops.h>
 
 #include <linux/uaccess.h>
 
@@ -116,11 +117,13 @@ struct shmem_options {
 	int huge;
 	int seen;
 	bool noswap;
+	unsigned short quota_types;
 #define SHMEM_SEEN_BLOCKS 1
 #define SHMEM_SEEN_INODES 2
 #define SHMEM_SEEN_HUGE 4
 #define SHMEM_SEEN_INUMS 8
 #define SHMEM_SEEN_NOSWAP 16
+#define SHMEM_SEEN_QUOTA 32
 };
 
 #ifdef CONFIG_TMPFS
@@ -212,7 +215,16 @@ static inline int shmem_inode_acct_block(struct inode *inode, long pages)
 		if (percpu_counter_compare(&sbinfo->used_blocks,
 					   sbinfo->max_blocks - pages) > 0)
 			goto unacct;
+
+		err = dquot_alloc_block_nodirty(inode, pages);
+		if (err)
+			goto unacct;
+
 		percpu_counter_add(&sbinfo->used_blocks, pages);
+	} else {
+		err = dquot_alloc_block_nodirty(inode, pages);
+		if (err)
+			goto unacct;
 	}
 
 	return 0;
@@ -227,6 +239,8 @@ static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 
+	dquot_free_block_nodirty(inode, pages);
+
 	if (sbinfo->max_blocks)
 		percpu_counter_sub(&sbinfo->used_blocks, pages);
 	shmem_unacct_blocks(info->flags, pages);
@@ -255,6 +269,47 @@ bool vma_is_shmem(struct vm_area_struct *vma)
 static LIST_HEAD(shmem_swaplist);
 static DEFINE_MUTEX(shmem_swaplist_mutex);
 
+#ifdef CONFIG_TMPFS_QUOTA
+
+static int shmem_enable_quotas(struct super_block *sb,
+			       unsigned short quota_types)
+{
+	int type, err = 0;
+
+	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
+	for (type = 0; type < SHMEM_MAXQUOTAS; type++) {
+		if (!(quota_types & (1 << type)))
+			continue;
+		err = dquot_load_quota_sb(sb, type, QFMT_SHMEM,
+					  DQUOT_USAGE_ENABLED |
+					  DQUOT_LIMITS_ENABLED);
+		if (err)
+			goto out_err;
+	}
+	return 0;
+
+out_err:
+	pr_warn("tmpfs: failed to enable quota tracking (type=%d, err=%d)\n",
+		type, err);
+	for (type--; type >= 0; type--)
+		dquot_quota_off(sb, type);
+	return err;
+}
+
+static void shmem_disable_quotas(struct super_block *sb)
+{
+	int type;
+
+	for (type = 0; type < SHMEM_MAXQUOTAS; type++)
+		dquot_quota_off(sb, type);
+}
+
+static struct dquot **shmem_get_dquots(struct inode *inode)
+{
+	return SHMEM_I(inode)->i_dquot;
+}
+#endif /* CONFIG_TMPFS_QUOTA */
+
 /*
  * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and
  * produces a novel ino for the newly allocated inode.
@@ -361,7 +416,6 @@ static void shmem_recalc_inode(struct inode *inode)
 	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
 	if (freed > 0) {
 		info->alloced -= freed;
-		inode->i_blocks -= freed * BLOCKS_PER_PAGE;
 		shmem_inode_unacct_blocks(inode, freed);
 	}
 }
@@ -379,7 +433,6 @@ bool shmem_charge(struct inode *inode, long pages)
 
 	spin_lock_irqsave(&info->lock, flags);
 	info->alloced += pages;
-	inode->i_blocks += pages * BLOCKS_PER_PAGE;
 	shmem_recalc_inode(inode);
 	spin_unlock_irqrestore(&info->lock, flags);
 
@@ -395,7 +448,6 @@ void shmem_uncharge(struct inode *inode, long pages)
 
 	spin_lock_irqsave(&info->lock, flags);
 	info->alloced -= pages;
-	inode->i_blocks -= pages * BLOCKS_PER_PAGE;
 	shmem_recalc_inode(inode);
 	spin_unlock_irqrestore(&info->lock, flags);
 
@@ -1141,6 +1193,21 @@ static int shmem_setattr(struct mnt_idmap *idmap,
 		}
 	}
 
+	if (is_quota_modification(idmap, inode, attr)) {
+		error = dquot_initialize(inode);
+		if (error)
+			return error;
+	}
+
+	/* Transfer quota accounting */
+	if (i_uid_needs_update(idmap, attr, inode) ||
+	    i_gid_needs_update(idmap, attr, inode)) {
+		error = dquot_transfer(idmap, inode, attr);
+
+		if (error)
+			return error;
+	}
+
 	setattr_copy(idmap, inode, attr);
 	if (attr->ia_valid & ATTR_MODE)
 		error = posix_acl_chmod(idmap, dentry, inode->i_mode);
@@ -1187,6 +1254,10 @@ static void shmem_evict_inode(struct inode *inode)
 	WARN_ON(inode->i_blocks);
 	shmem_free_inode(inode->i_sb);
 	clear_inode(inode);
+#ifdef CONFIG_TMPFS_QUOTA
+	dquot_free_inode(inode);
+	dquot_drop(inode);
+#endif
 }
 
 static int shmem_find_swap_entries(struct address_space *mapping,
@@ -1986,7 +2057,6 @@ alloc_nohuge:
 
 	spin_lock_irq(&info->lock);
 	info->alloced += folio_nr_pages(folio);
-	inode->i_blocks += (blkcnt_t)BLOCKS_PER_PAGE << folio_order(folio);
 	shmem_recalc_inode(inode);
 	spin_unlock_irq(&info->lock);
 	alloced = true;
@@ -2357,9 +2427,10 @@ static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
 #define shmem_initxattrs NULL
 #endif
 
-static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb,
-				     struct inode *dir, umode_t mode, dev_t dev,
-				     unsigned long flags)
+static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
+					     struct super_block *sb,
+					     struct inode *dir, umode_t mode,
+					     dev_t dev, unsigned long flags)
 {
 	struct inode *inode;
 	struct shmem_inode_info *info;
@@ -2436,6 +2507,43 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block
 	return inode;
 }
 
+#ifdef CONFIG_TMPFS_QUOTA
+static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
+				     struct super_block *sb, struct inode *dir,
+				     umode_t mode, dev_t dev, unsigned long flags)
+{
+	int err;
+	struct inode *inode;
+
+	inode = __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
+	if (IS_ERR(inode))
+		return inode;
+
+	err = dquot_initialize(inode);
+	if (err)
+		goto errout;
+
+	err = dquot_alloc_inode(inode);
+	if (err) {
+		dquot_drop(inode);
+		goto errout;
+	}
+	return inode;
+
+errout:
+	inode->i_flags |= S_NOQUOTA;
+	iput(inode);
+	return ERR_PTR(err);
+}
+#else
+static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
+				     struct super_block *sb, struct inode *dir,
+				     umode_t mode, dev_t dev, unsigned long flags)
+{
+	return __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
+}
+#endif /* CONFIG_TMPFS_QUOTA */
+
 #ifdef CONFIG_USERFAULTFD
 int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
 			   struct vm_area_struct *dst_vma,
@@ -2538,7 +2646,6 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
 
 	spin_lock_irq(&info->lock);
 	info->alloced++;
-	inode->i_blocks += BLOCKS_PER_PAGE;
 	shmem_recalc_inode(inode);
 	spin_unlock_irq(&info->lock);
 
@@ -3519,6 +3626,7 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
 static const struct inode_operations shmem_short_symlink_operations = {
 	.getattr	= shmem_getattr,
+	.setattr	= shmem_setattr,
 	.get_link	= simple_get_link,
 #ifdef CONFIG_TMPFS_XATTR
 	.listxattr	= shmem_listxattr,
@@ -3527,6 +3635,7 @@ static const struct inode_operations shmem_short_symlink_operations = {
 
 static const struct inode_operations shmem_symlink_inode_operations = {
 	.getattr	= shmem_getattr,
+	.setattr	= shmem_setattr,
 	.get_link	= shmem_get_link,
 #ifdef CONFIG_TMPFS_XATTR
 	.listxattr	= shmem_listxattr,
@@ -3626,6 +3735,9 @@ enum shmem_param {
 	Opt_inode32,
 	Opt_inode64,
 	Opt_noswap,
+	Opt_quota,
+	Opt_usrquota,
+	Opt_grpquota,
 };
 
 static const struct constant_table shmem_param_enums_huge[] = {
@@ -3648,6 +3760,11 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
 	fsparam_flag  ("inode32",	Opt_inode32),
 	fsparam_flag  ("inode64",	Opt_inode64),
 	fsparam_flag  ("noswap",	Opt_noswap),
+#ifdef CONFIG_TMPFS_QUOTA
+	fsparam_flag  ("quota",		Opt_quota),
+	fsparam_flag  ("usrquota",	Opt_usrquota),
+	fsparam_flag  ("grpquota",	Opt_grpquota),
+#endif
 	{}
 };
 
@@ -3739,6 +3856,24 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 		ctx->noswap = true;
 		ctx->seen |= SHMEM_SEEN_NOSWAP;
 		break;
+	case Opt_quota:
+		if (fc->user_ns != &init_user_ns)
+			return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
+		ctx->seen |= SHMEM_SEEN_QUOTA;
+		ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
+		break;
+	case Opt_usrquota:
+		if (fc->user_ns != &init_user_ns)
+			return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
+		ctx->seen |= SHMEM_SEEN_QUOTA;
+		ctx->quota_types |= QTYPE_MASK_USR;
+		break;
+	case Opt_grpquota:
+		if (fc->user_ns != &init_user_ns)
+			return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
+		ctx->seen |= SHMEM_SEEN_QUOTA;
+		ctx->quota_types |= QTYPE_MASK_GRP;
+		break;
 	}
 	return 0;
 
@@ -3846,6 +3981,12 @@ static int shmem_reconfigure(struct fs_context *fc)
 		goto out;
 	}
 
+	if (ctx->seen & SHMEM_SEEN_QUOTA &&
+	    !sb_any_quota_loaded(fc->root->d_sb)) {
+		err = "Cannot enable quota on remount";
+		goto out;
+	}
+
 	if (ctx->seen & SHMEM_SEEN_HUGE)
 		sbinfo->huge = ctx->huge;
 	if (ctx->seen & SHMEM_SEEN_INUMS)
@@ -3937,6 +4078,9 @@ static void shmem_put_super(struct super_block *sb)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 
+#ifdef CONFIG_TMPFS_QUOTA
+	shmem_disable_quotas(sb);
+#endif
 	free_percpu(sbinfo->ino_batch);
 	percpu_counter_destroy(&sbinfo->used_blocks);
 	mpol_put(sbinfo->mpol);
@@ -4016,6 +4160,17 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 #endif
 	uuid_gen(&sb->s_uuid);
 
+#ifdef CONFIG_TMPFS_QUOTA
+	if (ctx->seen & SHMEM_SEEN_QUOTA) {
+		sb->dq_op = &shmem_quota_operations;
+		sb->s_qcop = &dquot_quotactl_sysfile_ops;
+		sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+
+		if (shmem_enable_quotas(sb, ctx->quota_types))
+			goto failed;
+	}
+#endif /* CONFIG_TMPFS_QUOTA */
+
 	inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL, S_IFDIR | sbinfo->mode, 0,
 				VM_NORESERVE);
 	if (IS_ERR(inode)) {
@@ -4191,6 +4346,9 @@ static const struct super_operations shmem_ops = {
 #ifdef CONFIG_TMPFS
 	.statfs		= shmem_statfs,
 	.show_options	= shmem_show_options,
+#endif
+#ifdef CONFIG_TMPFS_QUOTA
+	.get_dquots	= shmem_get_dquots,
 #endif
 	.evict_inode	= shmem_evict_inode,
 	.drop_inode	= generic_delete_inode,
@@ -4257,6 +4415,14 @@ void __init shmem_init(void)
 
 	shmem_init_inodecache();
 
+#ifdef CONFIG_TMPFS_QUOTA
+	error = register_quota_format(&shmem_quota_format);
+	if (error < 0) {
+		pr_err("Could not register quota format\n");
+		goto out3;
+	}
+#endif
+
 	error = register_filesystem(&shmem_fs_type);
 	if (error) {
 		pr_err("Could not register tmpfs\n");
@@ -4281,6 +4447,10 @@ void __init shmem_init(void)
 out1:
 	unregister_filesystem(&shmem_fs_type);
 out2:
+#ifdef CONFIG_TMPFS_QUOTA
+	unregister_quota_format(&shmem_quota_format);
+out3:
+#endif
 	shmem_destroy_inodecache();
 	shm_mnt = ERR_PTR(error);
 }

From de4c0e7ca8b526a82ff7e5ee5533787bb6d01724 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Tue, 25 Jul 2023 16:45:09 +0200
Subject: [PATCH 0876/2047] shmem: Add default quota limit mount options

Allow system administrator to set default global quota limits at tmpfs
mount time.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230725144510.253763-7-cem@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/tmpfs.rst | 34 +++++++++++-----
 include/linux/shmem_fs.h            |  8 ++++
 mm/shmem.c                          | 61 +++++++++++++++++++++++++++++
 mm/shmem_quota.c                    | 34 +++++++++++++++-
 4 files changed, 127 insertions(+), 10 deletions(-)

diff --git a/Documentation/filesystems/tmpfs.rst b/Documentation/filesystems/tmpfs.rst
index 06d6195eaa2c..67422ee10e03 100644
--- a/Documentation/filesystems/tmpfs.rst
+++ b/Documentation/filesystems/tmpfs.rst
@@ -125,15 +125,31 @@ force huge pages on all tmpfs mounts for testing.
 
 tmpfs also supports quota with the following mount options
 
-========  =============================================================
-quota     User and group quota accounting and enforcement is enabled on
-          the mount. Tmpfs is using hidden system quota files that are
-          initialized on mount.
-usrquota  User quota accounting and enforcement is enabled on the
-          mount.
-grpquota  Group quota accounting and enforcement is enabled on the
-          mount.
-========  =============================================================
+======================== =================================================
+quota                    User and group quota accounting and enforcement
+                         is enabled on the mount. Tmpfs is using hidden
+                         system quota files that are initialized on mount.
+usrquota                 User quota accounting and enforcement is enabled
+                         on the mount.
+grpquota                 Group quota accounting and enforcement is enabled
+                         on the mount.
+usrquota_block_hardlimit Set global user quota block hard limit.
+usrquota_inode_hardlimit Set global user quota inode hard limit.
+grpquota_block_hardlimit Set global group quota block hard limit.
+grpquota_inode_hardlimit Set global group quota inode hard limit.
+======================== =================================================
+
+None of the quota related mount options can be set or changed on remount.
+
+Quota limit parameters accept a suffix k, m or g for kilo, mega and giga
+and can't be changed on remount. Default global quota limits are taking
+effect for any and all user/group/project except root the first time the
+quota entry for user/group/project id is being accessed - typically the
+first time an inode with a particular id ownership is being created after
+the mount. In other words, instead of the limits being initialized to zero,
+they are initialized with the particular value provided with these mount
+options. The limits can be changed for any user/group id at any time as they
+normally can be.
 
 Note that tmpfs quotas do not support user namespaces so no uid/gid
 translation is done if quotas are enabled inside user namespaces.
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 1a568a0f542f..c0058f3bba70 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -42,6 +42,13 @@ struct shmem_inode_info {
 	(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL)
 #define SHMEM_FL_INHERITED		(FS_NODUMP_FL | FS_NOATIME_FL)
 
+struct shmem_quota_limits {
+	qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */
+	qsize_t usrquota_ihardlimit; /* Default user quota inode hard limit */
+	qsize_t grpquota_bhardlimit; /* Default group quota block hard limit */
+	qsize_t grpquota_ihardlimit; /* Default group quota inode hard limit */
+};
+
 struct shmem_sb_info {
 	unsigned long max_blocks;   /* How many blocks are allowed */
 	struct percpu_counter used_blocks;  /* How many are allocated */
@@ -60,6 +67,7 @@ struct shmem_sb_info {
 	spinlock_t shrinklist_lock;   /* Protects shrinklist */
 	struct list_head shrinklist;  /* List of shinkable inodes */
 	unsigned long shrinklist_len; /* Length of shrinklist */
+	struct shmem_quota_limits qlimits; /* Default quota limits */
 };
 
 static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
diff --git a/mm/shmem.c b/mm/shmem.c
index ec76ce9aea5f..5f2c9e2961c2 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -118,6 +118,7 @@ struct shmem_options {
 	int seen;
 	bool noswap;
 	unsigned short quota_types;
+	struct shmem_quota_limits qlimits;
 #define SHMEM_SEEN_BLOCKS 1
 #define SHMEM_SEEN_INODES 2
 #define SHMEM_SEEN_HUGE 4
@@ -3738,6 +3739,10 @@ enum shmem_param {
 	Opt_quota,
 	Opt_usrquota,
 	Opt_grpquota,
+	Opt_usrquota_block_hardlimit,
+	Opt_usrquota_inode_hardlimit,
+	Opt_grpquota_block_hardlimit,
+	Opt_grpquota_inode_hardlimit,
 };
 
 static const struct constant_table shmem_param_enums_huge[] = {
@@ -3764,6 +3769,10 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
 	fsparam_flag  ("quota",		Opt_quota),
 	fsparam_flag  ("usrquota",	Opt_usrquota),
 	fsparam_flag  ("grpquota",	Opt_grpquota),
+	fsparam_string("usrquota_block_hardlimit", Opt_usrquota_block_hardlimit),
+	fsparam_string("usrquota_inode_hardlimit", Opt_usrquota_inode_hardlimit),
+	fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
+	fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
 #endif
 	{}
 };
@@ -3874,6 +3883,42 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 		ctx->seen |= SHMEM_SEEN_QUOTA;
 		ctx->quota_types |= QTYPE_MASK_GRP;
 		break;
+	case Opt_usrquota_block_hardlimit:
+		size = memparse(param->string, &rest);
+		if (*rest || !size)
+			goto bad_value;
+		if (size > SHMEM_QUOTA_MAX_SPC_LIMIT)
+			return invalfc(fc,
+				       "User quota block hardlimit too large.");
+		ctx->qlimits.usrquota_bhardlimit = size;
+		break;
+	case Opt_grpquota_block_hardlimit:
+		size = memparse(param->string, &rest);
+		if (*rest || !size)
+			goto bad_value;
+		if (size > SHMEM_QUOTA_MAX_SPC_LIMIT)
+			return invalfc(fc,
+				       "Group quota block hardlimit too large.");
+		ctx->qlimits.grpquota_bhardlimit = size;
+		break;
+	case Opt_usrquota_inode_hardlimit:
+		size = memparse(param->string, &rest);
+		if (*rest || !size)
+			goto bad_value;
+		if (size > SHMEM_QUOTA_MAX_INO_LIMIT)
+			return invalfc(fc,
+				       "User quota inode hardlimit too large.");
+		ctx->qlimits.usrquota_ihardlimit = size;
+		break;
+	case Opt_grpquota_inode_hardlimit:
+		size = memparse(param->string, &rest);
+		if (*rest || !size)
+			goto bad_value;
+		if (size > SHMEM_QUOTA_MAX_INO_LIMIT)
+			return invalfc(fc,
+				       "Group quota inode hardlimit too large.");
+		ctx->qlimits.grpquota_ihardlimit = size;
+		break;
 	}
 	return 0;
 
@@ -3987,6 +4032,18 @@ static int shmem_reconfigure(struct fs_context *fc)
 		goto out;
 	}
 
+#ifdef CONFIG_TMPFS_QUOTA
+#define CHANGED_LIMIT(name)						\
+	(ctx->qlimits.name## hardlimit &&				\
+	(ctx->qlimits.name## hardlimit != sbinfo->qlimits.name## hardlimit))
+
+	if (CHANGED_LIMIT(usrquota_b) || CHANGED_LIMIT(usrquota_i) ||
+	    CHANGED_LIMIT(grpquota_b) || CHANGED_LIMIT(grpquota_i)) {
+		err = "Cannot change global quota limit on remount";
+		goto out;
+	}
+#endif /* CONFIG_TMPFS_QUOTA */
+
 	if (ctx->seen & SHMEM_SEEN_HUGE)
 		sbinfo->huge = ctx->huge;
 	if (ctx->seen & SHMEM_SEEN_INUMS)
@@ -4166,6 +4223,10 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 		sb->s_qcop = &dquot_quotactl_sysfile_ops;
 		sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
 
+		/* Copy the default limits from ctx into sbinfo */
+		memcpy(&sbinfo->qlimits, &ctx->qlimits,
+		       sizeof(struct shmem_quota_limits));
+
 		if (shmem_enable_quotas(sb, ctx->quota_types))
 			goto failed;
 	}
diff --git a/mm/shmem_quota.c b/mm/shmem_quota.c
index e92b8ece9880..062d1c1097ae 100644
--- a/mm/shmem_quota.c
+++ b/mm/shmem_quota.c
@@ -166,6 +166,7 @@ static int shmem_acquire_dquot(struct dquot *dquot)
 {
 	struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type);
 	struct rb_node **n = &((struct rb_root *)info->dqi_priv)->rb_node;
+	struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info;
 	struct rb_node *parent = NULL, *new_node = NULL;
 	struct quota_id *new_entry, *entry;
 	qid_t id = from_kqid(&init_user_ns, dquot->dq_id);
@@ -195,6 +196,14 @@ static int shmem_acquire_dquot(struct dquot *dquot)
 	}
 
 	new_entry->id = id;
+	if (dquot->dq_id.type == USRQUOTA) {
+		new_entry->bhardlimit = sbinfo->qlimits.usrquota_bhardlimit;
+		new_entry->ihardlimit = sbinfo->qlimits.usrquota_ihardlimit;
+	} else if (dquot->dq_id.type == GRPQUOTA) {
+		new_entry->bhardlimit = sbinfo->qlimits.grpquota_bhardlimit;
+		new_entry->ihardlimit = sbinfo->qlimits.grpquota_ihardlimit;
+	}
+
 	new_node = &new_entry->node;
 	rb_link_node(new_node, parent, n);
 	rb_insert_color(new_node, (struct rb_root *)info->dqi_priv);
@@ -224,6 +233,29 @@ out_unlock:
 	return ret;
 }
 
+static bool shmem_is_empty_dquot(struct dquot *dquot)
+{
+	struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info;
+	qsize_t bhardlimit;
+	qsize_t ihardlimit;
+
+	if (dquot->dq_id.type == USRQUOTA) {
+		bhardlimit = sbinfo->qlimits.usrquota_bhardlimit;
+		ihardlimit = sbinfo->qlimits.usrquota_ihardlimit;
+	} else if (dquot->dq_id.type == GRPQUOTA) {
+		bhardlimit = sbinfo->qlimits.grpquota_bhardlimit;
+		ihardlimit = sbinfo->qlimits.grpquota_ihardlimit;
+	}
+
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+		(dquot->dq_dqb.dqb_curspace == 0 &&
+		 dquot->dq_dqb.dqb_curinodes == 0 &&
+		 dquot->dq_dqb.dqb_bhardlimit == bhardlimit &&
+		 dquot->dq_dqb.dqb_ihardlimit == ihardlimit))
+		return true;
+
+	return false;
+}
 /*
  * Store limits from dquot in the tree unless it's fake. If it is fake
  * remove the id from the tree since there is no useful information in
@@ -261,7 +293,7 @@ static int shmem_release_dquot(struct dquot *dquot)
 	return -ENOENT;
 
 found:
-	if (test_bit(DQ_FAKE_B, &dquot->dq_flags)) {
+	if (shmem_is_empty_dquot(dquot)) {
 		/* Remove entry from the tree */
 		rb_erase(&entry->node, info->dqi_priv);
 		kfree(entry);

From 509f006932de7556d48eaa7afcd02dcf1ca9a3e9 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 25 Jul 2023 16:45:10 +0200
Subject: [PATCH 0877/2047] shmem: fix quota lock nesting in huge hole handling

i_pages lock nests inside i_lock, but shmem_charge() and shmem_uncharge()
were being called from THP splitting or collapsing while i_pages lock was
held, and now go on to call dquot_alloc_block_nodirty() which takes
i_lock to update i_blocks.

We may well want to take i_lock out of this path later, in the non-quota
case even if it's left in the quota case (or perhaps use i_lock instead
of shmem's info->lock throughout); but don't get into that at this time.

Move the shmem_charge() and shmem_uncharge() calls out from under i_pages
lock, accounting the full batch of holes in a single call.

Still pass the pages argument to shmem_uncharge(), but it happens now to
be unused: shmem_recalc_inode() is designed to account for clean pages
freed behind shmem's back, so it gets the accounting right by itself;
then the later call to shmem_inode_unacct_blocks() led to imbalance
(that WARN_ON(inode->i_blocks) in shmem_evict_inode()).

Reported-by: syzbot+38ca19393fb3344f57e6@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/lkml/0000000000008e62f40600bfe080@google.com/
Reported-by: syzbot+440ff8cca06ee7a1d4db@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/lkml/00000000000076a7840600bfb6e8@google.com/
Signed-off-by: Hugh Dickins <hughd@google.com>
Tested-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Message-Id: <20230725144510.253763-8-cem@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 mm/huge_memory.c |  6 ++++--
 mm/khugepaged.c  | 13 +++++++------
 mm/shmem.c       | 19 +++++++++----------
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index eb3678360b97..d301c323c69a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2521,7 +2521,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	struct address_space *swap_cache = NULL;
 	unsigned long offset = 0;
 	unsigned int nr = thp_nr_pages(head);
-	int i;
+	int i, nr_dropped = 0;
 
 	/* complete memcg works before add pages to LRU */
 	split_page_memcg(head, nr);
@@ -2546,7 +2546,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 			struct folio *tail = page_folio(head + i);
 
 			if (shmem_mapping(head->mapping))
-				shmem_uncharge(head->mapping->host, 1);
+				nr_dropped++;
 			else if (folio_test_clear_dirty(tail))
 				folio_account_cleaned(tail,
 					inode_to_wb(folio->mapping->host));
@@ -2583,6 +2583,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	}
 	local_irq_enable();
 
+	if (nr_dropped)
+		shmem_uncharge(head->mapping->host, nr_dropped);
 	remap_page(folio, nr);
 
 	if (PageSwapCache(head)) {
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 78c8d5d8b628..47d1d32c734f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1955,10 +1955,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
 						goto xa_locked;
 					}
 				}
-				if (!shmem_charge(mapping->host, 1)) {
-					result = SCAN_FAIL;
-					goto xa_locked;
-				}
 				nr_none++;
 				continue;
 			}
@@ -2145,8 +2141,13 @@ xa_unlocked:
 	 */
 	try_to_unmap_flush();
 
-	if (result != SCAN_SUCCEED)
+	if (result == SCAN_SUCCEED && nr_none &&
+	    !shmem_charge(mapping->host, nr_none))
+		result = SCAN_FAIL;
+	if (result != SCAN_SUCCEED) {
+		nr_none = 0;
 		goto rollback;
+	}
 
 	/*
 	 * The old pages are locked, so they won't change anymore.
@@ -2283,8 +2284,8 @@ rollback:
 	if (nr_none) {
 		xas_lock_irq(&xas);
 		mapping->nrpages -= nr_none;
-		shmem_uncharge(mapping->host, nr_none);
 		xas_unlock_irq(&xas);
+		shmem_uncharge(mapping->host, nr_none);
 	}
 
 	list_for_each_entry_safe(page, tmp, &pagelist, lru) {
diff --git a/mm/shmem.c b/mm/shmem.c
index 5f2c9e2961c2..72326ea74954 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -424,18 +424,20 @@ static void shmem_recalc_inode(struct inode *inode)
 bool shmem_charge(struct inode *inode, long pages)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	unsigned long flags;
+	struct address_space *mapping = inode->i_mapping;
 
 	if (shmem_inode_acct_block(inode, pages))
 		return false;
 
 	/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
-	inode->i_mapping->nrpages += pages;
+	xa_lock_irq(&mapping->i_pages);
+	mapping->nrpages += pages;
+	xa_unlock_irq(&mapping->i_pages);
 
-	spin_lock_irqsave(&info->lock, flags);
+	spin_lock_irq(&info->lock);
 	info->alloced += pages;
 	shmem_recalc_inode(inode);
-	spin_unlock_irqrestore(&info->lock, flags);
+	spin_unlock_irq(&info->lock);
 
 	return true;
 }
@@ -443,16 +445,13 @@ bool shmem_charge(struct inode *inode, long pages)
 void shmem_uncharge(struct inode *inode, long pages)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	unsigned long flags;
 
 	/* nrpages adjustment done by __filemap_remove_folio() or caller */
 
-	spin_lock_irqsave(&info->lock, flags);
-	info->alloced -= pages;
+	spin_lock_irq(&info->lock);
 	shmem_recalc_inode(inode);
-	spin_unlock_irqrestore(&info->lock, flags);
-
-	shmem_inode_unacct_blocks(inode, pages);
+	/* which has called shmem_inode_unacct_blocks() if necessary */
+	spin_unlock_irq(&info->lock);
 }
 
 /*

From 6faddda69f623d38bb097640689901a4b5ff881a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 30 Jun 2023 13:48:49 -0400
Subject: [PATCH 0878/2047] libfs: Add directory operations for stable offsets

Create a vector of directory operations in fs/libfs.c that handles
directory seeks and readdir via stable offsets instead of the
current cursor-based mechanism.

For the moment these are unused.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Message-Id: <168814732984.530310.11190772066786107220.stgit@manet.1015granger.net>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/locking.rst |   8 +-
 Documentation/filesystems/vfs.rst     |   6 +-
 fs/libfs.c                            | 248 ++++++++++++++++++++++++++
 include/linux/fs.h                    |  18 ++
 4 files changed, 276 insertions(+), 4 deletions(-)

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index ed148919e11a..443091406e35 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -85,13 +85,14 @@ prototypes::
 			    struct dentry *dentry, struct fileattr *fa);
 	int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
 	struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int);
+	struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
 
 locking rules:
 	all may block
 
-==============	=============================================
+==============	==================================================
 ops		i_rwsem(inode)
-==============	=============================================
+==============	==================================================
 lookup:		shared
 create:		exclusive
 link:		exclusive (both)
@@ -115,7 +116,8 @@ atomic_open:	shared (exclusive if O_CREAT is set in open flags)
 tmpfile:	no
 fileattr_get:	no or exclusive
 fileattr_set:	exclusive
-==============	=============================================
+get_offset_ctx  no
+==============	==================================================
 
 
 	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index cb2a97e49872..898d0b43109e 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -515,6 +515,7 @@ As of kernel 2.6.22, the following members are defined:
 		int (*fileattr_set)(struct mnt_idmap *idmap,
 				    struct dentry *dentry, struct fileattr *fa);
 		int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
+	        struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
 	};
 
 Again, all methods are called without any locks being held, unless
@@ -675,7 +676,10 @@ otherwise noted.
 	called on ioctl(FS_IOC_SETFLAGS) and ioctl(FS_IOC_FSSETXATTR) to
 	change miscellaneous file flags and attributes.  Callers hold
 	i_rwsem exclusive.  If unset, then fall back to f_op->ioctl().
-
+``get_offset_ctx``
+	called to get the offset context for a directory inode. A
+        filesystem must define this operation to use
+        simple_offset_dir_operations.
 
 The Address Space Object
 ========================
diff --git a/fs/libfs.c b/fs/libfs.c
index 5b851315eeed..a7e56baf8bbd 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -239,6 +239,254 @@ const struct inode_operations simple_dir_inode_operations = {
 };
 EXPORT_SYMBOL(simple_dir_inode_operations);
 
+static void offset_set(struct dentry *dentry, u32 offset)
+{
+	dentry->d_fsdata = (void *)((uintptr_t)(offset));
+}
+
+static u32 dentry2offset(struct dentry *dentry)
+{
+	return (u32)((uintptr_t)(dentry->d_fsdata));
+}
+
+/**
+ * simple_offset_init - initialize an offset_ctx
+ * @octx: directory offset map to be initialized
+ *
+ */
+void simple_offset_init(struct offset_ctx *octx)
+{
+	xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);
+
+	/* 0 is '.', 1 is '..', so always start with offset 2 */
+	octx->next_offset = 2;
+}
+
+/**
+ * simple_offset_add - Add an entry to a directory's offset map
+ * @octx: directory offset ctx to be updated
+ * @dentry: new dentry being added
+ *
+ * Returns zero on success. @so_ctx and the dentry offset are updated.
+ * Otherwise, a negative errno value is returned.
+ */
+int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
+{
+	static const struct xa_limit limit = XA_LIMIT(2, U32_MAX);
+	u32 offset;
+	int ret;
+
+	if (dentry2offset(dentry) != 0)
+		return -EBUSY;
+
+	ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit,
+			      &octx->next_offset, GFP_KERNEL);
+	if (ret < 0)
+		return ret;
+
+	offset_set(dentry, offset);
+	return 0;
+}
+
+/**
+ * simple_offset_remove - Remove an entry to a directory's offset map
+ * @octx: directory offset ctx to be updated
+ * @dentry: dentry being removed
+ *
+ */
+void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
+{
+	u32 offset;
+
+	offset = dentry2offset(dentry);
+	if (offset == 0)
+		return;
+
+	xa_erase(&octx->xa, offset);
+	offset_set(dentry, 0);
+}
+
+/**
+ * simple_offset_rename_exchange - exchange rename with directory offsets
+ * @old_dir: parent of dentry being moved
+ * @old_dentry: dentry being moved
+ * @new_dir: destination parent
+ * @new_dentry: destination dentry
+ *
+ * Returns zero on success. Otherwise a negative errno is returned and the
+ * rename is rolled back.
+ */
+int simple_offset_rename_exchange(struct inode *old_dir,
+				  struct dentry *old_dentry,
+				  struct inode *new_dir,
+				  struct dentry *new_dentry)
+{
+	struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
+	struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
+	u32 old_index = dentry2offset(old_dentry);
+	u32 new_index = dentry2offset(new_dentry);
+	int ret;
+
+	simple_offset_remove(old_ctx, old_dentry);
+	simple_offset_remove(new_ctx, new_dentry);
+
+	ret = simple_offset_add(new_ctx, old_dentry);
+	if (ret)
+		goto out_restore;
+
+	ret = simple_offset_add(old_ctx, new_dentry);
+	if (ret) {
+		simple_offset_remove(new_ctx, old_dentry);
+		goto out_restore;
+	}
+
+	ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
+	if (ret) {
+		simple_offset_remove(new_ctx, old_dentry);
+		simple_offset_remove(old_ctx, new_dentry);
+		goto out_restore;
+	}
+	return 0;
+
+out_restore:
+	offset_set(old_dentry, old_index);
+	xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL);
+	offset_set(new_dentry, new_index);
+	xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL);
+	return ret;
+}
+
+/**
+ * simple_offset_destroy - Release offset map
+ * @octx: directory offset ctx that is about to be destroyed
+ *
+ * During fs teardown (eg. umount), a directory's offset map might still
+ * contain entries. xa_destroy() cleans out anything that remains.
+ */
+void simple_offset_destroy(struct offset_ctx *octx)
+{
+	xa_destroy(&octx->xa);
+}
+
+/**
+ * offset_dir_llseek - Advance the read position of a directory descriptor
+ * @file: an open directory whose position is to be updated
+ * @offset: a byte offset
+ * @whence: enumerator describing the starting position for this update
+ *
+ * SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories.
+ *
+ * Returns the updated read position if successful; otherwise a
+ * negative errno is returned and the read position remains unchanged.
+ */
+static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+	switch (whence) {
+	case SEEK_CUR:
+		offset += file->f_pos;
+		fallthrough;
+	case SEEK_SET:
+		if (offset >= 0)
+			break;
+		fallthrough;
+	default:
+		return -EINVAL;
+	}
+
+	return vfs_setpos(file, offset, U32_MAX);
+}
+
+static struct dentry *offset_find_next(struct xa_state *xas)
+{
+	struct dentry *child, *found = NULL;
+
+	rcu_read_lock();
+	child = xas_next_entry(xas, U32_MAX);
+	if (!child)
+		goto out;
+	spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+	if (simple_positive(child))
+		found = dget_dlock(child);
+	spin_unlock(&child->d_lock);
+out:
+	rcu_read_unlock();
+	return found;
+}
+
+static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
+{
+	u32 offset = dentry2offset(dentry);
+	struct inode *inode = d_inode(dentry);
+
+	return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
+			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
+}
+
+static void offset_iterate_dir(struct dentry *dir, struct dir_context *ctx)
+{
+	struct inode *inode = d_inode(dir);
+	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
+	XA_STATE(xas, &so_ctx->xa, ctx->pos);
+	struct dentry *dentry;
+
+	while (true) {
+		spin_lock(&dir->d_lock);
+		dentry = offset_find_next(&xas);
+		spin_unlock(&dir->d_lock);
+		if (!dentry)
+			break;
+
+		if (!offset_dir_emit(ctx, dentry)) {
+			dput(dentry);
+			break;
+		}
+
+		dput(dentry);
+		ctx->pos = xas.xa_index + 1;
+	}
+}
+
+/**
+ * offset_readdir - Emit entries starting at offset @ctx->pos
+ * @file: an open directory to iterate over
+ * @ctx: directory iteration context
+ *
+ * Caller must hold @file's i_rwsem to prevent insertion or removal of
+ * entries during this call.
+ *
+ * On entry, @ctx->pos contains an offset that represents the first entry
+ * to be read from the directory.
+ *
+ * The operation continues until there are no more entries to read, or
+ * until the ctx->actor indicates there is no more space in the caller's
+ * output buffer.
+ *
+ * On return, @ctx->pos contains an offset that will read the next entry
+ * in this directory when shmem_readdir() is called again with @ctx.
+ *
+ * Return values:
+ *   %0 - Complete
+ */
+static int offset_readdir(struct file *file, struct dir_context *ctx)
+{
+	struct dentry *dir = file->f_path.dentry;
+
+	lockdep_assert_held(&d_inode(dir)->i_rwsem);
+
+	if (!dir_emit_dots(file, ctx))
+		return 0;
+
+	offset_iterate_dir(dir, ctx);
+	return 0;
+}
+
+const struct file_operations simple_offset_dir_operations = {
+	.llseek		= offset_dir_llseek,
+	.iterate_shared	= offset_readdir,
+	.read		= generic_read_dir,
+	.fsync		= noop_fsync,
+};
+
 static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
 {
 	struct dentry *child = NULL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6867512907d6..59a4129ce14c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1770,6 +1770,7 @@ struct dir_context {
 
 struct iov_iter;
 struct io_uring_cmd;
+struct offset_ctx;
 
 struct file_operations {
 	struct module *owner;
@@ -1857,6 +1858,7 @@ struct inode_operations {
 	int (*fileattr_set)(struct mnt_idmap *idmap,
 			    struct dentry *dentry, struct fileattr *fa);
 	int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
+	struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
 } ____cacheline_aligned;
 
 static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
@@ -2971,6 +2973,22 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
 extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
 		const void __user *from, size_t count);
 
+struct offset_ctx {
+	struct xarray		xa;
+	u32			next_offset;
+};
+
+void simple_offset_init(struct offset_ctx *octx);
+int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
+void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
+int simple_offset_rename_exchange(struct inode *old_dir,
+				  struct dentry *old_dentry,
+				  struct inode *new_dir,
+				  struct dentry *new_dentry);
+void simple_offset_destroy(struct offset_ctx *octx);
+
+extern const struct file_operations simple_offset_dir_operations;
+
 extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
 extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
 

From 23a31d87645c652734f89f477f69ddac9aa402cb Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 30 Jun 2023 13:48:56 -0400
Subject: [PATCH 0879/2047] shmem: Refactor shmem_symlink()

De-duplicate the error handling paths. No change in behavior is
expected.

Suggested-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Message-Id: <168814733654.530310.9958360833543413152.stgit@manet.1015granger.net>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 mm/shmem.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 72326ea74954..f5006ea3b8e1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3422,26 +3422,22 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 
 	error = security_inode_init_security(inode, dir, &dentry->d_name,
 					     shmem_initxattrs, NULL);
-	if (error && error != -EOPNOTSUPP) {
-		iput(inode);
-		return error;
-	}
+	if (error && error != -EOPNOTSUPP)
+		goto out_iput;
 
 	inode->i_size = len-1;
 	if (len <= SHORT_SYMLINK_LEN) {
 		inode->i_link = kmemdup(symname, len, GFP_KERNEL);
 		if (!inode->i_link) {
-			iput(inode);
-			return -ENOMEM;
+			error = -ENOMEM;
+			goto out_iput;
 		}
 		inode->i_op = &shmem_short_symlink_operations;
 	} else {
 		inode_nohighmem(inode);
 		error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
-		if (error) {
-			iput(inode);
-			return error;
-		}
+		if (error)
+			goto out_iput;
 		inode->i_mapping->a_ops = &shmem_aops;
 		inode->i_op = &shmem_symlink_inode_operations;
 		memcpy(folio_address(folio), symname, len);
@@ -3456,6 +3452,9 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	d_instantiate(dentry, inode);
 	dget(dentry);
 	return 0;
+out_iput:
+	iput(inode);
+	return error;
 }
 
 static void shmem_put_link(void *arg)

From a2e459555c5f9da3e619b7e47a63f98574dc75f1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 30 Jun 2023 13:49:03 -0400
Subject: [PATCH 0880/2047] shmem: stable directory offsets

The current cursor-based directory offset mechanism doesn't work
when a tmpfs filesystem is exported via NFS. This is because NFS
clients do not open directories. Each server-side READDIR operation
has to open the directory, read it, then close it. The cursor state
for that directory, being associated strictly with the opened
struct file, is thus discarded after each NFS READDIR operation.

Directory offsets are cached not only by NFS clients, but also by
user space libraries on those clients. Essentially there is no way
to invalidate those caches when directory offsets have changed on
an NFS server after the offset-to-dentry mapping changes. Thus the
whole application stack depends on unchanging directory offsets.

The solution we've come up with is to make the directory offset for
each file in a tmpfs filesystem stable for the life of the directory
entry it represents.

shmem_readdir() and shmem_dir_llseek() now use an xarray to map each
directory offset (an loff_t integer) to the memory address of a
struct dentry.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Message-Id: <168814734331.530310.3911190551060453102.stgit@manet.1015granger.net>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/shmem_fs.h |  1 +
 mm/shmem.c               | 47 ++++++++++++++++++++++++++++++++++------
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index c0058f3bba70..9b2d2faff1d0 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -34,6 +34,7 @@ struct shmem_inode_info {
 #ifdef CONFIG_TMPFS_QUOTA
 	struct dquot		*i_dquot[MAXQUOTAS];
 #endif
+	struct offset_ctx	dir_offsets;	/* stable entry offsets */
 	struct inode		vfs_inode;
 };
 
diff --git a/mm/shmem.c b/mm/shmem.c
index f5006ea3b8e1..81973914b294 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2427,6 +2427,11 @@ static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
 #define shmem_initxattrs NULL
 #endif
 
+static struct offset_ctx *shmem_get_offset_ctx(struct inode *inode)
+{
+	return &SHMEM_I(inode)->dir_offsets;
+}
+
 static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
 					     struct super_block *sb,
 					     struct inode *dir, umode_t mode,
@@ -2492,7 +2497,8 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
 		/* Some things misbehave if size == 0 on a directory */
 		inode->i_size = 2 * BOGO_DIRENT_SIZE;
 		inode->i_op = &shmem_dir_inode_operations;
-		inode->i_fop = &simple_dir_operations;
+		inode->i_fop = &simple_offset_dir_operations;
+		simple_offset_init(shmem_get_offset_ctx(inode));
 		break;
 	case S_IFLNK:
 		/*
@@ -3204,7 +3210,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	if (error && error != -EOPNOTSUPP)
 		goto out_iput;
 
-	error = 0;
+	error = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
+	if (error)
+		goto out_iput;
+
 	dir->i_size += BOGO_DIRENT_SIZE;
 	dir->i_ctime = dir->i_mtime = current_time(dir);
 	inode_inc_iversion(dir);
@@ -3287,6 +3296,13 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
 			goto out;
 	}
 
+	ret = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
+	if (ret) {
+		if (inode->i_nlink)
+			shmem_free_inode(inode->i_sb);
+		goto out;
+	}
+
 	dir->i_size += BOGO_DIRENT_SIZE;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
 	inode_inc_iversion(dir);
@@ -3305,6 +3321,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
 	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
 		shmem_free_inode(inode->i_sb);
 
+	simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
+
 	dir->i_size -= BOGO_DIRENT_SIZE;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
 	inode_inc_iversion(dir);
@@ -3363,24 +3381,29 @@ static int shmem_rename2(struct mnt_idmap *idmap,
 {
 	struct inode *inode = d_inode(old_dentry);
 	int they_are_dirs = S_ISDIR(inode->i_mode);
+	int error;
 
 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
 		return -EINVAL;
 
 	if (flags & RENAME_EXCHANGE)
-		return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
+		return simple_offset_rename_exchange(old_dir, old_dentry,
+						     new_dir, new_dentry);
 
 	if (!simple_empty(new_dentry))
 		return -ENOTEMPTY;
 
 	if (flags & RENAME_WHITEOUT) {
-		int error;
-
 		error = shmem_whiteout(idmap, old_dir, old_dentry);
 		if (error)
 			return error;
 	}
 
+	simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry);
+	error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry);
+	if (error)
+		return error;
+
 	if (d_really_is_positive(new_dentry)) {
 		(void) shmem_unlink(new_dir, new_dentry);
 		if (they_are_dirs) {
@@ -3425,19 +3448,23 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	if (error && error != -EOPNOTSUPP)
 		goto out_iput;
 
+	error = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
+	if (error)
+		goto out_iput;
+
 	inode->i_size = len-1;
 	if (len <= SHORT_SYMLINK_LEN) {
 		inode->i_link = kmemdup(symname, len, GFP_KERNEL);
 		if (!inode->i_link) {
 			error = -ENOMEM;
-			goto out_iput;
+			goto out_remove_offset;
 		}
 		inode->i_op = &shmem_short_symlink_operations;
 	} else {
 		inode_nohighmem(inode);
 		error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
 		if (error)
-			goto out_iput;
+			goto out_remove_offset;
 		inode->i_mapping->a_ops = &shmem_aops;
 		inode->i_op = &shmem_symlink_inode_operations;
 		memcpy(folio_address(folio), symname, len);
@@ -3452,6 +3479,9 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	d_instantiate(dentry, inode);
 	dget(dentry);
 	return 0;
+
+out_remove_offset:
+	simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
 out_iput:
 	iput(inode);
 	return error;
@@ -4295,6 +4325,8 @@ static void shmem_destroy_inode(struct inode *inode)
 {
 	if (S_ISREG(inode->i_mode))
 		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
+	if (S_ISDIR(inode->i_mode))
+		simple_offset_destroy(shmem_get_offset_ctx(inode));
 }
 
 static void shmem_init_inode(void *foo)
@@ -4375,6 +4407,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
 	.mknod		= shmem_mknod,
 	.rename		= shmem_rename2,
 	.tmpfile	= shmem_tmpfile,
+	.get_offset_ctx	= shmem_get_offset_ctx,
 #endif
 #ifdef CONFIG_TMPFS_XATTR
 	.listxattr	= shmem_listxattr,

From bbaef7973dd017954419718ab2df526ae1f66e93 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 24 Jul 2023 10:43:57 -0400
Subject: [PATCH 0881/2047] libfs: Add a lock class for the offset map's
 xa_lock

Tie the dynamically-allocated xarray locks into a single class so
contention on the directory offset xarrays can be observed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Message-Id: <169020933088.160441.9405180953116076087.stgit@manet.1015granger.net>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/libfs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/libfs.c b/fs/libfs.c
index a7e56baf8bbd..3495f071093e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -249,6 +249,8 @@ static u32 dentry2offset(struct dentry *dentry)
 	return (u32)((uintptr_t)(dentry->d_fsdata));
 }
 
+static struct lock_class_key simple_offset_xa_lock;
+
 /**
  * simple_offset_init - initialize an offset_ctx
  * @octx: directory offset map to be initialized
@@ -257,6 +259,7 @@ static u32 dentry2offset(struct dentry *dentry)
 void simple_offset_init(struct offset_ctx *octx)
 {
 	xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);
+	lockdep_set_class(&octx->xa.xa_lock, &simple_offset_xa_lock);
 
 	/* 0 is '.', 1 is '..', so always start with offset 2 */
 	octx->next_offset = 2;

From 2be4f05af71bb2a9958c5680c19e5a489636ff42 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 25 Jul 2023 14:31:04 -0400
Subject: [PATCH 0882/2047] libfs: Remove parent dentry locking in
 offset_iterate_dir()

Since offset_iterate_dir() does not walk the parent's d_subdir list
nor does it manipulate the parent's d_child, there doesn't seem to
be a reason to hold the parent's d_lock. The offset_ctx's xarray can
be sufficiently protected with just the RCU read lock.

Flame graph data captured during the git regression run shows a
20% reduction in CPU cycles consumed in offset_find_next().

Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202307171640.e299f8d5-oliver.sang@intel.com
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Message-Id: <169030957098.157536.9938425508695693348.stgit@manet.1015granger.net>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/libfs.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index 3495f071093e..c78a48654a94 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -407,7 +407,7 @@ static struct dentry *offset_find_next(struct xa_state *xas)
 	child = xas_next_entry(xas, U32_MAX);
 	if (!child)
 		goto out;
-	spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+	spin_lock(&child->d_lock);
 	if (simple_positive(child))
 		found = dget_dlock(child);
 	spin_unlock(&child->d_lock);
@@ -425,17 +425,14 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
 			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
 }
 
-static void offset_iterate_dir(struct dentry *dir, struct dir_context *ctx)
+static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 {
-	struct inode *inode = d_inode(dir);
 	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
 	XA_STATE(xas, &so_ctx->xa, ctx->pos);
 	struct dentry *dentry;
 
 	while (true) {
-		spin_lock(&dir->d_lock);
 		dentry = offset_find_next(&xas);
-		spin_unlock(&dir->d_lock);
 		if (!dentry)
 			break;
 
@@ -465,7 +462,7 @@ static void offset_iterate_dir(struct dentry *dir, struct dir_context *ctx)
  * output buffer.
  *
  * On return, @ctx->pos contains an offset that will read the next entry
- * in this directory when shmem_readdir() is called again with @ctx.
+ * in this directory when offset_readdir() is called again with @ctx.
  *
  * Return values:
  *   %0 - Complete
@@ -479,7 +476,7 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
 	if (!dir_emit_dots(file, ctx))
 		return 0;
 
-	offset_iterate_dir(dir, ctx);
+	offset_iterate_dir(d_inode(dir), ctx);
 	return 0;
 }
 

From 3c1b7528d8969a8e89c77cd5eb867503152547b1 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 3 Aug 2023 22:46:11 -0700
Subject: [PATCH 0883/2047] shmem: move spinlock into shmem_recalc_inode() to
 fix quota support

Commit "shmem: fix quota lock nesting in huge hole handling" was not so
good: Smatch caught shmem_recalc_inode()'s shmem_inode_unacct_blocks()
descending into quota_send_warning(): where blocking GFP_NOFS is used,
yet shmem_recalc_inode() is called holding the shmem inode's info->lock.

Yes, both __dquot_alloc_space() and __dquot_free_space() are commented
"This operation can block, but only after everything is updated" - when
calling flush_warnings() at the end - both its print_warning() and its
quota_send_warning() may block.

Rework shmem_recalc_inode() to take the shmem inode's info->lock inside,
and drop it before calling shmem_inode_unacct_blocks().

And why were the spin_locks disabling interrupts?  That was just a relic
from when shmem_charge() and shmem_uncharge() were called while holding
i_pages xa_lock: stop disabling interrupts for info->lock now.

To help stop me from making the same mistake again, add a might_sleep()
into shmem_inode_acct_block() and shmem_inode_unacct_blocks(); and those
functions have grown, so let the compiler decide whether to inline them.

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/linux-fsdevel/ffd7ca34-7f2a-44ee-b05d-b54d920ce076@moroto.mountain/
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <29f48045-2cb5-7db-ecf1-72462f1bef5@google.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 mm/shmem.c | 107 ++++++++++++++++++++++-------------------------------
 1 file changed, 44 insertions(+), 63 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 81973914b294..b88dc7916e94 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -203,7 +203,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
 		vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
 }
 
-static inline int shmem_inode_acct_block(struct inode *inode, long pages)
+static int shmem_inode_acct_block(struct inode *inode, long pages)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
@@ -212,6 +212,7 @@ static inline int shmem_inode_acct_block(struct inode *inode, long pages)
 	if (shmem_acct_block(info->flags, pages))
 		return err;
 
+	might_sleep();	/* when quotas */
 	if (sbinfo->max_blocks) {
 		if (percpu_counter_compare(&sbinfo->used_blocks,
 					   sbinfo->max_blocks - pages) > 0)
@@ -235,11 +236,12 @@ unacct:
 	return err;
 }
 
-static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
+static void shmem_inode_unacct_blocks(struct inode *inode, long pages)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 
+	might_sleep();	/* when quotas */
 	dquot_free_block_nodirty(inode, pages);
 
 	if (sbinfo->max_blocks)
@@ -400,30 +402,45 @@ static void shmem_free_inode(struct super_block *sb)
 /**
  * shmem_recalc_inode - recalculate the block usage of an inode
  * @inode: inode to recalc
+ * @alloced: the change in number of pages allocated to inode
+ * @swapped: the change in number of pages swapped from inode
  *
  * We have to calculate the free blocks since the mm can drop
  * undirtied hole pages behind our back.
  *
  * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
  * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
- *
- * It has to be called with the spinlock held.
  */
-static void shmem_recalc_inode(struct inode *inode)
+static void shmem_recalc_inode(struct inode *inode, long alloced, long swapped)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	long freed;
 
-	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
-	if (freed > 0) {
+	spin_lock(&info->lock);
+	info->alloced += alloced;
+	info->swapped += swapped;
+	freed = info->alloced - info->swapped -
+		READ_ONCE(inode->i_mapping->nrpages);
+	/*
+	 * Special case: whereas normally shmem_recalc_inode() is called
+	 * after i_mapping->nrpages has already been adjusted (up or down),
+	 * shmem_writepage() has to raise swapped before nrpages is lowered -
+	 * to stop a racing shmem_recalc_inode() from thinking that a page has
+	 * been freed.  Compensate here, to avoid the need for a followup call.
+	 */
+	if (swapped > 0)
+		freed += swapped;
+	if (freed > 0)
 		info->alloced -= freed;
+	spin_unlock(&info->lock);
+
+	/* The quota case may block */
+	if (freed > 0)
 		shmem_inode_unacct_blocks(inode, freed);
-	}
 }
 
 bool shmem_charge(struct inode *inode, long pages)
 {
-	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct address_space *mapping = inode->i_mapping;
 
 	if (shmem_inode_acct_block(inode, pages))
@@ -434,24 +451,16 @@ bool shmem_charge(struct inode *inode, long pages)
 	mapping->nrpages += pages;
 	xa_unlock_irq(&mapping->i_pages);
 
-	spin_lock_irq(&info->lock);
-	info->alloced += pages;
-	shmem_recalc_inode(inode);
-	spin_unlock_irq(&info->lock);
-
+	shmem_recalc_inode(inode, pages, 0);
 	return true;
 }
 
 void shmem_uncharge(struct inode *inode, long pages)
 {
-	struct shmem_inode_info *info = SHMEM_I(inode);
-
+	/* pages argument is currently unused: keep it to help debugging */
 	/* nrpages adjustment done by __filemap_remove_folio() or caller */
 
-	spin_lock_irq(&info->lock);
-	shmem_recalc_inode(inode);
-	/* which has called shmem_inode_unacct_blocks() if necessary */
-	spin_unlock_irq(&info->lock);
+	shmem_recalc_inode(inode, 0, 0);
 }
 
 /*
@@ -1091,10 +1100,7 @@ whole_folios:
 		folio_batch_release(&fbatch);
 	}
 
-	spin_lock_irq(&info->lock);
-	info->swapped -= nr_swaps_freed;
-	shmem_recalc_inode(inode);
-	spin_unlock_irq(&info->lock);
+	shmem_recalc_inode(inode, 0, -nr_swaps_freed);
 }
 
 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
@@ -1112,11 +1118,9 @@ static int shmem_getattr(struct mnt_idmap *idmap,
 	struct inode *inode = path->dentry->d_inode;
 	struct shmem_inode_info *info = SHMEM_I(inode);
 
-	if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
-		spin_lock_irq(&info->lock);
-		shmem_recalc_inode(inode);
-		spin_unlock_irq(&info->lock);
-	}
+	if (info->alloced - info->swapped != inode->i_mapping->nrpages)
+		shmem_recalc_inode(inode, 0, 0);
+
 	if (info->fsflags & FS_APPEND_FL)
 		stat->attributes |= STATX_ATTR_APPEND;
 	if (info->fsflags & FS_IMMUTABLE_FL)
@@ -1501,11 +1505,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 	if (add_to_swap_cache(folio, swap,
 			__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
 			NULL) == 0) {
-		spin_lock_irq(&info->lock);
-		shmem_recalc_inode(inode);
-		info->swapped++;
-		spin_unlock_irq(&info->lock);
-
+		shmem_recalc_inode(inode, 0, 1);
 		swap_shmem_alloc(swap);
 		shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap));
 
@@ -1776,7 +1776,6 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
 					 struct folio *folio, swp_entry_t swap)
 {
 	struct address_space *mapping = inode->i_mapping;
-	struct shmem_inode_info *info = SHMEM_I(inode);
 	swp_entry_t swapin_error;
 	void *old;
 
@@ -1789,16 +1788,12 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
 
 	folio_wait_writeback(folio);
 	delete_from_swap_cache(folio);
-	spin_lock_irq(&info->lock);
 	/*
-	 * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks won't
-	 * be 0 when inode is released and thus trigger WARN_ON(inode->i_blocks) in
-	 * shmem_evict_inode.
+	 * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
+	 * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
+	 * in shmem_evict_inode().
 	 */
-	info->alloced--;
-	info->swapped--;
-	shmem_recalc_inode(inode);
-	spin_unlock_irq(&info->lock);
+	shmem_recalc_inode(inode, -1, -1);
 	swap_free(swap);
 }
 
@@ -1885,10 +1880,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 	if (error)
 		goto failed;
 
-	spin_lock_irq(&info->lock);
-	info->swapped--;
-	shmem_recalc_inode(inode);
-	spin_unlock_irq(&info->lock);
+	shmem_recalc_inode(inode, 0, -1);
 
 	if (sgp == SGP_WRITE)
 		folio_mark_accessed(folio);
@@ -2053,12 +2045,9 @@ alloc_nohuge:
 					charge_mm);
 	if (error)
 		goto unacct;
-	folio_add_lru(folio);
 
-	spin_lock_irq(&info->lock);
-	info->alloced += folio_nr_pages(folio);
-	shmem_recalc_inode(inode);
-	spin_unlock_irq(&info->lock);
+	folio_add_lru(folio);
+	shmem_recalc_inode(inode, folio_nr_pages(folio), 0);
 	alloced = true;
 
 	if (folio_test_pmd_mappable(folio) &&
@@ -2107,9 +2096,7 @@ clear:
 		if (alloced) {
 			folio_clear_dirty(folio);
 			filemap_remove_folio(folio);
-			spin_lock_irq(&info->lock);
-			shmem_recalc_inode(inode);
-			spin_unlock_irq(&info->lock);
+			shmem_recalc_inode(inode, 0, 0);
 		}
 		error = -EINVAL;
 		goto unlock;
@@ -2135,9 +2122,7 @@ unlock:
 		folio_put(folio);
 	}
 	if (error == -ENOSPC && !once++) {
-		spin_lock_irq(&info->lock);
-		shmem_recalc_inode(inode);
-		spin_unlock_irq(&info->lock);
+		shmem_recalc_inode(inode, 0, 0);
 		goto repeat;
 	}
 	if (error == -EEXIST)
@@ -2650,11 +2635,7 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
 	if (ret)
 		goto out_delete_from_cache;
 
-	spin_lock_irq(&info->lock);
-	info->alloced++;
-	shmem_recalc_inode(inode);
-	spin_unlock_irq(&info->lock);
-
+	shmem_recalc_inode(inode, 1, 0);
 	folio_unlock(folio);
 	return 0;
 out_delete_from_cache:

From 0200679fc7953177941e41c2a4241d0b6c2c5de8 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 1 Aug 2023 18:17:04 +0200
Subject: [PATCH 0884/2047] tmpfs: verify {g,u}id mount options correctly

A while ago we received the following report:

"The other outstanding issue I noticed comes from the fact that
fsconfig syscalls may occur in a different userns than that which
called fsopen. That means that resolving the uid/gid via
current_user_ns() can save a kuid that isn't mapped in the associated
namespace when the filesystem is finally mounted. This means that it
is possible for an unprivileged user to create files owned by any
group in a tmpfs mount (since we can set the SUID bit on the tmpfs
directory), or a tmpfs that is owned by any user, including the root
group/user."

The contract for {g,u}id mount options and {g,u}id values in general set
from userspace has always been that they are translated according to the
caller's idmapping. In so far, tmpfs has been doing the correct thing.
But since tmpfs is mountable in unprivileged contexts it is also
necessary to verify that the resulting {k,g}uid is representable in the
namespace of the superblock to avoid such bugs as above.

The new mount api's cross-namespace delegation abilities are already
widely used. After having talked to a bunch of userspace this is the
most faithful solution with minimal regression risks. I know of one
users - systemd - that makes use of the new mount api in this way and
they don't set unresolable {g,u}ids. So the regression risk is minimal.

Link: https://lore.kernel.org/lkml/CALxfFW4BXhEwxR0Q5LSkg-8Vb4r2MONKCcUCVioehXQKr35eHg@mail.gmail.com
Fixes: f32356261d44 ("vfs: Convert ramfs, shmem, tmpfs, devtmpfs, rootfs to use the new mount API")
Reviewed-by: "Seth Forshee (DigitalOcean)" <sforshee@kernel.org>
Reported-by: Seth Jenkins <sethjenkins@google.com>
Message-Id: <20230801-vfs-fs_context-uidgid-v1-1-daf46a050bbf@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 mm/shmem.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index b88dc7916e94..678a7be46b89 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3793,6 +3793,8 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 	unsigned long long size;
 	char *rest;
 	int opt;
+	kuid_t kuid;
+	kgid_t kgid;
 
 	opt = fs_parse(fc, shmem_fs_parameters, param, &result);
 	if (opt < 0)
@@ -3828,14 +3830,32 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 		ctx->mode = result.uint_32 & 07777;
 		break;
 	case Opt_uid:
-		ctx->uid = make_kuid(current_user_ns(), result.uint_32);
-		if (!uid_valid(ctx->uid))
+		kuid = make_kuid(current_user_ns(), result.uint_32);
+		if (!uid_valid(kuid))
 			goto bad_value;
+
+		/*
+		 * The requested uid must be representable in the
+		 * filesystem's idmapping.
+		 */
+		if (!kuid_has_mapping(fc->user_ns, kuid))
+			goto bad_value;
+
+		ctx->uid = kuid;
 		break;
 	case Opt_gid:
-		ctx->gid = make_kgid(current_user_ns(), result.uint_32);
-		if (!gid_valid(ctx->gid))
+		kgid = make_kgid(current_user_ns(), result.uint_32);
+		if (!gid_valid(kgid))
 			goto bad_value;
+
+		/*
+		 * The requested gid must be representable in the
+		 * filesystem's idmapping.
+		 */
+		if (!kgid_has_mapping(fc->user_ns, kgid))
+			goto bad_value;
+
+		ctx->gid = kgid;
 		break;
 	case Opt_huge:
 		ctx->huge = result.uint_32;

From 5de75970c9fd7220e394b76e6d20fbafa1369b5a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 8 Aug 2023 21:30:59 -0700
Subject: [PATCH 0885/2047] xattr: simple_xattr_set() return old_xattr to be
 freed

tmpfs wants to support limited user extended attributes, but kernfs
(or cgroupfs, the only kernfs with KERNFS_ROOT_SUPPORT_USER_XATTR)
already supports user extended attributes through simple xattrs: but
limited by a policy (128KiB per inode) too liberal to be used on tmpfs.

To allow a different limiting policy for tmpfs, without affecting the
policy for kernfs, change simple_xattr_set() to return the replaced or
removed xattr (if any), leaving the caller to update their accounting
then free the xattr (by simple_xattr_free(), renamed from the static
free_simple_xattr()).

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Message-Id: <158c6585-2aa7-d4aa-90ff-f7c3f8fe407c@google.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/kernfs/inode.c     | 46 +++++++++++++++++++++++---------------
 fs/xattr.c            | 51 +++++++++++++++++++------------------------
 include/linux/xattr.h |  7 +++---
 mm/shmem.c            | 10 +++++----
 4 files changed, 61 insertions(+), 53 deletions(-)

diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index b22b74d1a115..fec5d5f78f07 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -306,11 +306,17 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
 int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
 		     const void *value, size_t size, int flags)
 {
+	struct simple_xattr *old_xattr;
 	struct kernfs_iattrs *attrs = kernfs_iattrs(kn);
 	if (!attrs)
 		return -ENOMEM;
 
-	return simple_xattr_set(&attrs->xattrs, name, value, size, flags, NULL);
+	old_xattr = simple_xattr_set(&attrs->xattrs, name, value, size, flags);
+	if (IS_ERR(old_xattr))
+		return PTR_ERR(old_xattr);
+
+	simple_xattr_free(old_xattr);
+	return 0;
 }
 
 static int kernfs_vfs_xattr_get(const struct xattr_handler *handler,
@@ -342,7 +348,7 @@ static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn,
 {
 	atomic_t *sz = &kn->iattr->user_xattr_size;
 	atomic_t *nr = &kn->iattr->nr_user_xattrs;
-	ssize_t removed_size;
+	struct simple_xattr *old_xattr;
 	int ret;
 
 	if (atomic_inc_return(nr) > KERNFS_MAX_USER_XATTRS) {
@@ -355,13 +361,18 @@ static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn,
 		goto dec_size_out;
 	}
 
-	ret = simple_xattr_set(xattrs, full_name, value, size, flags,
-			       &removed_size);
-
-	if (!ret && removed_size >= 0)
-		size = removed_size;
-	else if (!ret)
+	old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags);
+	if (!old_xattr)
 		return 0;
+
+	if (IS_ERR(old_xattr)) {
+		ret = PTR_ERR(old_xattr);
+		goto dec_size_out;
+	}
+
+	ret = 0;
+	size = old_xattr->size;
+	simple_xattr_free(old_xattr);
 dec_size_out:
 	atomic_sub(size, sz);
 dec_count_out:
@@ -376,18 +387,19 @@ static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn,
 {
 	atomic_t *sz = &kn->iattr->user_xattr_size;
 	atomic_t *nr = &kn->iattr->nr_user_xattrs;
-	ssize_t removed_size;
-	int ret;
+	struct simple_xattr *old_xattr;
 
-	ret = simple_xattr_set(xattrs, full_name, value, size, flags,
-			       &removed_size);
+	old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags);
+	if (!old_xattr)
+		return 0;
 
-	if (removed_size >= 0) {
-		atomic_sub(removed_size, sz);
-		atomic_dec(nr);
-	}
+	if (IS_ERR(old_xattr))
+		return PTR_ERR(old_xattr);
 
-	return ret;
+	atomic_sub(old_xattr->size, sz);
+	atomic_dec(nr);
+	simple_xattr_free(old_xattr);
+	return 0;
 }
 
 static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
diff --git a/fs/xattr.c b/fs/xattr.c
index e7bbb7f57557..ba37a8f5cfd1 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1040,12 +1040,12 @@ const char *xattr_full_name(const struct xattr_handler *handler,
 EXPORT_SYMBOL(xattr_full_name);
 
 /**
- * free_simple_xattr - free an xattr object
+ * simple_xattr_free - free an xattr object
  * @xattr: the xattr object
  *
  * Free the xattr object. Can handle @xattr being NULL.
  */
-static inline void free_simple_xattr(struct simple_xattr *xattr)
+void simple_xattr_free(struct simple_xattr *xattr)
 {
 	if (xattr)
 		kfree(xattr->name);
@@ -1164,7 +1164,6 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
  * @value: the value to store along the xattr
  * @size: the size of @value
  * @flags: the flags determining how to set the xattr
- * @removed_size: the size of the removed xattr
  *
  * Set a new xattr object.
  * If @value is passed a new xattr object will be allocated. If XATTR_REPLACE
@@ -1181,29 +1180,27 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
  * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
  * XATTR_REPLACE we fail as mentioned above.
  *
- * Return: On success zero and on error a negative error code is returned.
+ * Return: On success, the removed or replaced xattr is returned, to be freed
+ * by the caller; or NULL if none. On failure a negative error code is returned.
  */
-int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
-		     const void *value, size_t size, int flags,
-		     ssize_t *removed_size)
+struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
+				      const char *name, const void *value,
+				      size_t size, int flags)
 {
-	struct simple_xattr *xattr = NULL, *new_xattr = NULL;
+	struct simple_xattr *old_xattr = NULL, *new_xattr = NULL;
 	struct rb_node *parent = NULL, **rbp;
 	int err = 0, ret;
 
-	if (removed_size)
-		*removed_size = -1;
-
 	/* value == NULL means remove */
 	if (value) {
 		new_xattr = simple_xattr_alloc(value, size);
 		if (!new_xattr)
-			return -ENOMEM;
+			return ERR_PTR(-ENOMEM);
 
 		new_xattr->name = kstrdup(name, GFP_KERNEL);
 		if (!new_xattr->name) {
-			free_simple_xattr(new_xattr);
-			return -ENOMEM;
+			simple_xattr_free(new_xattr);
+			return ERR_PTR(-ENOMEM);
 		}
 	}
 
@@ -1217,12 +1214,12 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
 		else if (ret > 0)
 			rbp = &(*rbp)->rb_right;
 		else
-			xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
-		if (xattr)
+			old_xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
+		if (old_xattr)
 			break;
 	}
 
-	if (xattr) {
+	if (old_xattr) {
 		/* Fail if XATTR_CREATE is requested and the xattr exists. */
 		if (flags & XATTR_CREATE) {
 			err = -EEXIST;
@@ -1230,12 +1227,10 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
 		}
 
 		if (new_xattr)
-			rb_replace_node(&xattr->rb_node, &new_xattr->rb_node,
-					&xattrs->rb_root);
+			rb_replace_node(&old_xattr->rb_node,
+					&new_xattr->rb_node, &xattrs->rb_root);
 		else
-			rb_erase(&xattr->rb_node, &xattrs->rb_root);
-		if (!err && removed_size)
-			*removed_size = xattr->size;
+			rb_erase(&old_xattr->rb_node, &xattrs->rb_root);
 	} else {
 		/* Fail if XATTR_REPLACE is requested but no xattr is found. */
 		if (flags & XATTR_REPLACE) {
@@ -1260,12 +1255,10 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
 
 out_unlock:
 	write_unlock(&xattrs->lock);
-	if (err)
-		free_simple_xattr(new_xattr);
-	else
-		free_simple_xattr(xattr);
-	return err;
-
+	if (!err)
+		return old_xattr;
+	simple_xattr_free(new_xattr);
+	return ERR_PTR(err);
 }
 
 static bool xattr_is_trusted(const char *name)
@@ -1386,7 +1379,7 @@ void simple_xattrs_free(struct simple_xattrs *xattrs)
 		rbp_next = rb_next(rbp);
 		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
 		rb_erase(&xattr->rb_node, &xattrs->rb_root);
-		free_simple_xattr(xattr);
+		simple_xattr_free(xattr);
 		rbp = rbp_next;
 	}
 }
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d591ef59aa98..e37fe667ae04 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -116,11 +116,12 @@ struct simple_xattr {
 void simple_xattrs_init(struct simple_xattrs *xattrs);
 void simple_xattrs_free(struct simple_xattrs *xattrs);
 struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
+void simple_xattr_free(struct simple_xattr *xattr);
 int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
 		     void *buffer, size_t size);
-int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
-		     const void *value, size_t size, int flags,
-		     ssize_t *removed_size);
+struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
+				      const char *name, const void *value,
+				      size_t size, int flags);
 ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
 			  char *buffer, size_t size);
 void simple_xattr_add(struct simple_xattrs *xattrs,
diff --git a/mm/shmem.c b/mm/shmem.c
index 678a7be46b89..b22d0bf1f5e3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3598,15 +3598,17 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
 				   size_t size, int flags)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	int err;
+	struct simple_xattr *old_xattr;
 
 	name = xattr_full_name(handler, name);
-	err = simple_xattr_set(&info->xattrs, name, value, size, flags, NULL);
-	if (!err) {
+	old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
+	if (!IS_ERR(old_xattr)) {
+		simple_xattr_free(old_xattr);
+		old_xattr = NULL;
 		inode->i_ctime = current_time(inode);
 		inode_inc_iversion(inode);
 	}
-	return err;
+	return PTR_ERR(old_xattr);
 }
 
 static const struct xattr_handler shmem_security_xattr_handler = {

From e07c469e979c104464300aaa3b7923f929055cd0 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 8 Aug 2023 21:32:21 -0700
Subject: [PATCH 0886/2047] tmpfs: track free_ispace instead of free_inodes

In preparation for assigning some inode space to extended attributes,
keep track of free_ispace instead of number of free_inodes: as if one
tmpfs inode (and accompanying dentry) occupies very approximately 1KiB.

Unsigned long is large enough for free_ispace, on 64-bit and on 32-bit:
but take care to enforce the maximum.  And fix the nr_blocks maximum on
32-bit: S64_MAX would be too big for it there, so say LONG_MAX instead.

Delete the incorrect limited<->unlimited blocks/inodes comment above
shmem_reconfigure(): leave it to the error messages below to describe.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Message-Id: <4fe1739-d9e7-8dfd-5bce-12e7339711da@google.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/shmem_fs.h |  2 +-
 mm/shmem.c               | 33 +++++++++++++++++----------------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 9b2d2faff1d0..6b0c626620f5 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -54,7 +54,7 @@ struct shmem_sb_info {
 	unsigned long max_blocks;   /* How many blocks are allowed */
 	struct percpu_counter used_blocks;  /* How many are allocated */
 	unsigned long max_inodes;   /* How many inodes are allowed */
-	unsigned long free_inodes;  /* How many are left for allocation */
+	unsigned long free_ispace;  /* How much ispace left for allocation */
 	raw_spinlock_t stat_lock;   /* Serialize shmem_sb_info changes */
 	umode_t mode;		    /* Mount mode for root directory */
 	unsigned char huge;	    /* Whether to try for hugepages */
diff --git a/mm/shmem.c b/mm/shmem.c
index b22d0bf1f5e3..8c8f0b1b700c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -90,6 +90,9 @@ static struct vfsmount *shm_mnt;
 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
 
+/* Pretend that one inode + its dentry occupy this much memory */
+#define BOGO_INODE_SIZE 1024
+
 /* Symlink up to this size is kmalloc'ed instead of using a swappable page */
 #define SHORT_SYMLINK_LEN 128
 
@@ -137,7 +140,8 @@ static unsigned long shmem_default_max_inodes(void)
 {
 	unsigned long nr_pages = totalram_pages();
 
-	return min(nr_pages - totalhigh_pages(), nr_pages / 2);
+	return min3(nr_pages - totalhigh_pages(), nr_pages / 2,
+			ULONG_MAX / BOGO_INODE_SIZE);
 }
 #endif
 
@@ -331,11 +335,11 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
 	if (!(sb->s_flags & SB_KERNMOUNT)) {
 		raw_spin_lock(&sbinfo->stat_lock);
 		if (sbinfo->max_inodes) {
-			if (!sbinfo->free_inodes) {
+			if (sbinfo->free_ispace < BOGO_INODE_SIZE) {
 				raw_spin_unlock(&sbinfo->stat_lock);
 				return -ENOSPC;
 			}
-			sbinfo->free_inodes--;
+			sbinfo->free_ispace -= BOGO_INODE_SIZE;
 		}
 		if (inop) {
 			ino = sbinfo->next_ino++;
@@ -394,7 +398,7 @@ static void shmem_free_inode(struct super_block *sb)
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 	if (sbinfo->max_inodes) {
 		raw_spin_lock(&sbinfo->stat_lock);
-		sbinfo->free_inodes++;
+		sbinfo->free_ispace += BOGO_INODE_SIZE;
 		raw_spin_unlock(&sbinfo->stat_lock);
 	}
 }
@@ -3158,7 +3162,7 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 	}
 	if (sbinfo->max_inodes) {
 		buf->f_files = sbinfo->max_inodes;
-		buf->f_ffree = sbinfo->free_inodes;
+		buf->f_ffree = sbinfo->free_ispace / BOGO_INODE_SIZE;
 	}
 	/* else leave those fields 0 like simple_statfs */
 
@@ -3818,13 +3822,13 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 		break;
 	case Opt_nr_blocks:
 		ctx->blocks = memparse(param->string, &rest);
-		if (*rest || ctx->blocks > S64_MAX)
+		if (*rest || ctx->blocks > LONG_MAX)
 			goto bad_value;
 		ctx->seen |= SHMEM_SEEN_BLOCKS;
 		break;
 	case Opt_nr_inodes:
 		ctx->inodes = memparse(param->string, &rest);
-		if (*rest)
+		if (*rest || ctx->inodes > ULONG_MAX / BOGO_INODE_SIZE)
 			goto bad_value;
 		ctx->seen |= SHMEM_SEEN_INODES;
 		break;
@@ -4005,21 +4009,17 @@ static int shmem_parse_options(struct fs_context *fc, void *data)
 
 /*
  * Reconfigure a shmem filesystem.
- *
- * Note that we disallow change from limited->unlimited blocks/inodes while any
- * are in use; but we must separately disallow unlimited->limited, because in
- * that case we have no record of how much is already in use.
  */
 static int shmem_reconfigure(struct fs_context *fc)
 {
 	struct shmem_options *ctx = fc->fs_private;
 	struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
-	unsigned long inodes;
+	unsigned long used_isp;
 	struct mempolicy *mpol = NULL;
 	const char *err;
 
 	raw_spin_lock(&sbinfo->stat_lock);
-	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
+	used_isp = sbinfo->max_inodes * BOGO_INODE_SIZE - sbinfo->free_ispace;
 
 	if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
 		if (!sbinfo->max_blocks) {
@@ -4037,7 +4037,7 @@ static int shmem_reconfigure(struct fs_context *fc)
 			err = "Cannot retroactively limit inodes";
 			goto out;
 		}
-		if (ctx->inodes < inodes) {
+		if (ctx->inodes * BOGO_INODE_SIZE < used_isp) {
 			err = "Too few inodes for current use";
 			goto out;
 		}
@@ -4083,7 +4083,7 @@ static int shmem_reconfigure(struct fs_context *fc)
 		sbinfo->max_blocks  = ctx->blocks;
 	if (ctx->seen & SHMEM_SEEN_INODES) {
 		sbinfo->max_inodes  = ctx->inodes;
-		sbinfo->free_inodes = ctx->inodes - inodes;
+		sbinfo->free_ispace = ctx->inodes * BOGO_INODE_SIZE - used_isp;
 	}
 
 	/*
@@ -4214,7 +4214,8 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 	sb->s_flags |= SB_NOUSER;
 #endif
 	sbinfo->max_blocks = ctx->blocks;
-	sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes;
+	sbinfo->max_inodes = ctx->inodes;
+	sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE;
 	if (sb->s_flags & SB_KERNMOUNT) {
 		sbinfo->ino_batch = alloc_percpu(ino_t);
 		if (!sbinfo->ino_batch)

From 868a4715a88114e914144fe07a630cd7230ab848 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 8 Aug 2023 22:15:15 +0200
Subject: [PATCH 0887/2047] ACPI: thermal: Do not attach private data to ACPI
 handles

The ACPI thermal driver uses acpi_bus_attach_private_data() to attach
the thermal zone object to the ACPI handle of the thermal zone and
acpi_bus_detach_private_data() to clean that up, but it never uses
acpi_bus_get_private_data() to retrieve that object.

Drop the unneded acpi_bus_attach_private_data() and
acpi_bus_detach_private_data() calls from the ACPI thermal driver and
clean up the related code.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/acpi/thermal.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 3aab121cd410..a329b0dce78f 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -742,7 +742,6 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz)
 {
 	int trips = 0;
 	int result;
-	acpi_status status;
 	int i;
 
 	if (tz->trips.critical.valid)
@@ -775,24 +774,15 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz)
 	if (result)
 		goto unregister_tzd;
 
-	status =  acpi_bus_attach_private_data(tz->device->handle,
-					       tz->thermal_zone);
-	if (ACPI_FAILURE(status)) {
-		result = -ENODEV;
-		goto remove_links;
-	}
-
 	result = thermal_zone_device_enable(tz->thermal_zone);
 	if (result)
-		goto acpi_bus_detach;
+		goto remove_links;
 
 	dev_info(&tz->device->dev, "registered as thermal_zone%d\n",
 		 thermal_zone_device_id(tz->thermal_zone));
 
 	return 0;
 
-acpi_bus_detach:
-	acpi_bus_detach_private_data(tz->device->handle);
 remove_links:
 	acpi_thermal_zone_sysfs_remove(tz);
 unregister_tzd:
@@ -806,7 +796,6 @@ static void acpi_thermal_unregister_thermal_zone(struct acpi_thermal *tz)
 	acpi_thermal_zone_sysfs_remove(tz);
 	thermal_zone_device_unregister(tz->thermal_zone);
 	tz->thermal_zone = NULL;
-	acpi_bus_detach_private_data(tz->device->handle);
 }
 
 
From e9ffb042c57705ea1b40276ab180d63fb6b52881 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 22 Jun 2023 12:53:27 +0200
Subject: [PATCH 0888/2047] mmc: meson-gx: Drop redundant WARN_ON() in the irq
 handler

The host pointer is already being dereferenced earlier, so let's just drop
the redundant WARN_ON.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20230622105327.77296-1-ulf.hansson@linaro.org
---
 drivers/mmc/host/meson-gx-mmc.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index ee9a25b900ae..d652374f37b2 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -948,9 +948,6 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
 		return IRQ_NONE;
 	}
 
-	if (WARN_ON(!host))
-		return IRQ_NONE;
-
 	/* ack all raised interrupts */
 	writel(status, host->regs + SD_EMMC_STATUS);
 

From 2eb277c2f6b7cbbbeeffc89ae8b546a8b21f1b1f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 28 Jun 2023 21:12:43 +0200
Subject: [PATCH 0889/2047] mmc: mmci: Improve ux500 debug prints

To conclude the ux500 busy timeout fixes, this improves the debug and
error prints so we can see a bit what is going on. Here is a typical
dmesg with these new debug messages enabled:

[    2.648864] mmci-pl18x 80005000.mmc: mmc2: PL180 manf 80 rev4
     	       at 0x80005000 irq 81,0 (pio)
[    2.662750] mmci-pl18x 80005000.mmc: DMA channels RX dma0chan4, TX dma0chan5
[    3.480407] mmci-pl18x 80005000.mmc: no busy signalling in time CMD06
[    3.487457] mmci-pl18x 80005000.mmc: no busy signalling in time CMD06
[    3.998321] mmci-pl18x 80005000.mmc: timeout in state waiting for end IRQ
     	       		  		waiting for busy CMD06
[    3.998535] mmc2: new DDR MMC card at address 0001
[    4.000030] mmcblk2: mmc2:0001 M4G1YC 3.69 GiB
[    4.008361]  mmcblk2: p1 p2 p3 p4 p5 p6 p7 p8 p9 p10 p11 p12 p13 p14 p15
     			 p16 p17 p18 p19 p20 p21 p22 p23 p24 p25
[    4.017700] mmcblk2boot0: mmc2:0001 M4G1YC 2.00 MiB
[    4.020477] mmcblk2boot1: mmc2:0001 M4G1YC 2.00 MiB
[    4.022125] mmcblk2rpmb: mmc2:0001 M4G1YC 128 KiB, chardev (246:0)
[    5.791381] mmci-pl18x 80005000.mmc: no busy signalling in time CMD06
[   10.938568] mmci-pl18x 80005000.mmc: timeout in state waiting for end IRQ
    	       		  		waiting for busy CMD06
[   17.982849] mmci-pl18x 80005000.mmc: lost busy status when waiting for
    	       		  		busy start IRQ CMD06
[   18.683563] mmci-pl18x 80005000.mmc: no busy signalling in time CMD06
[   19.385437] mmci-pl18x 80005000.mmc: no busy signalling in time CMD06
[   20.493652] mmci-pl18x 80005000.mmc: no busy signalling in time CMD06

We see a lot of lost IRQs and the timeout always occur while waiting for
the end IRQ, and then the busy status is *low* meaning the busy indication
is already de-asserted.

So busy signalling is missed in various ways for various reasons,
sometimes it appears that IRQs are simply lost.

One hypothesis is that this happens because the events happen so fast
that they are transient, and since the MMCI state machine in effect is
handling an edge trigger (rising or falling signal on DAT0) the
internal logic will miss the event, because the state machine in the
hardware is sampling the line, and will at times detect only the first
event but miss the second, fireing only one IRQ.

We print the second timeout error with dev_err() since it is pretty
serious, the other events are so common and simple to handle that we
can keep them at dev_dbg() level.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230628191243.3632401-1-linus.walleij@linaro.org
[Ulf: Fixup conflict in ux500_busy_timeout_work()]
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mmci.c | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 769b34afa835..dda756a56379 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -764,7 +764,8 @@ static bool ux500_busy_complete(struct mmci_host *host, struct mmc_command *cmd,
 			}
 			retries--;
 		}
-		dev_dbg(mmc_dev(host->mmc), "no busy signalling in time\n");
+		dev_dbg(mmc_dev(host->mmc),
+			"no busy signalling in time CMD%02x\n", cmd->opcode);
 		ux500_busy_clear_mask_done(host);
 		break;
 
@@ -786,7 +787,8 @@ static bool ux500_busy_complete(struct mmci_host *host, struct mmc_command *cmd,
 			host->busy_state = MMCI_BUSY_WAITING_FOR_END_IRQ;
 		} else {
 			dev_dbg(mmc_dev(host->mmc),
-				"lost busy status when waiting for busy start IRQ\n");
+				"lost busy status when waiting for busy start IRQ CMD%02x\n",
+				cmd->opcode);
 			cancel_delayed_work(&host->ux500_busy_timeout_work);
 			ux500_busy_clear_mask_done(host);
 		}
@@ -800,13 +802,14 @@ static bool ux500_busy_complete(struct mmci_host *host, struct mmc_command *cmd,
 			ux500_busy_clear_mask_done(host);
 		} else {
 			dev_dbg(mmc_dev(host->mmc),
-				"busy status still asserted when handling busy end IRQ - will keep waiting\n");
+				"busy status still asserted when handling busy end IRQ - will keep waiting CMD%02x\n",
+				cmd->opcode);
 		}
 		break;
 
 	default:
-		dev_dbg(mmc_dev(host->mmc), "fell through on state %d\n",
-			host->busy_state);
+		dev_dbg(mmc_dev(host->mmc), "fell through on state %d, CMD%02x\n",
+			host->busy_state, cmd->opcode);
 		break;
 	}
 
@@ -1533,6 +1536,20 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
 	}
 }
 
+static char *ux500_state_str(struct mmci_host *host)
+{
+	switch (host->busy_state) {
+	case MMCI_BUSY_WAITING_FOR_START_IRQ:
+		return "waiting for start IRQ";
+	case MMCI_BUSY_WAITING_FOR_END_IRQ:
+		return "waiting for end IRQ";
+	case MMCI_BUSY_DONE:
+		return "not waiting for IRQs";
+	default:
+		return "unknown";
+	}
+}
+
 /*
  * This busy timeout worker is used to "kick" the command IRQ if a
  * busy detect IRQ fails to appear in reasonable time. Only used on
@@ -1548,12 +1565,18 @@ static void ux500_busy_timeout_work(struct work_struct *work)
 	spin_lock_irqsave(&host->lock, flags);
 
 	if (host->cmd) {
-		dev_dbg(mmc_dev(host->mmc), "timeout waiting for busy IRQ\n");
-
 		/* If we are still busy let's tag on a cmd-timeout error. */
 		status = readl(host->base + MMCISTATUS);
-		if (status & host->variant->busy_detect_flag)
+		if (status & host->variant->busy_detect_flag) {
 			status |= MCI_CMDTIMEOUT;
+			dev_err(mmc_dev(host->mmc),
+				"timeout in state %s still busy with CMD%02x\n",
+				ux500_state_str(host), host->cmd->opcode);
+		} else {
+			dev_err(mmc_dev(host->mmc),
+				"timeout in state %s waiting for busy CMD%02x\n",
+				ux500_state_str(host), host->cmd->opcode);
+		}
 
 		mmci_cmd_irq(host, host->cmd, status);
 	}

From c4a226e617e78f1d62644379303e764b23d2faf2 Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Thu, 29 Jun 2023 20:43:17 +0200
Subject: [PATCH 0890/2047] dt-bindings: mmc: mtk-sd: drop
 assigned-clocks/clock-parents

MT7986 has 2 clock-parents and these properties are not needed in driver
binding. So drop them completely.

Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20230629184318.551317-2-linux@fw-web.de
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/mtk-sd.yaml | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.yaml b/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
index 46eefdd19a2c..3fffa467e4e1 100644
--- a/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
+++ b/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
@@ -91,16 +91,6 @@ properties:
       should switch dat1 pin to GPIO mode.
     maxItems: 1
 
-  assigned-clocks:
-    description:
-      PLL of the source clock.
-    maxItems: 1
-
-  assigned-clock-parents:
-    description:
-      parent of source clock, used for HS400 mode to get 400Mhz source clock.
-    maxItems: 1
-
   hs400-ds-delay:
     $ref: /schemas/types.yaml#/definitions/uint32
     description:

From 74f45de394d979cc7770271f92fafa53e1ed3119 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 12 Jul 2023 16:00:11 +0200
Subject: [PATCH 0891/2047] mmc: renesas_sdhi: register irqs before registering
 controller

IRQs should be ready to serve when we call mmc_add_host() via
tmio_mmc_host_probe(). To achieve that, ensure that all irqs are masked
before registering the handlers.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20230712140011.18602-1-wsa+renesas@sang-engineering.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/renesas_sdhi_core.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index 345934e4f59e..2d5ef9c37d76 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -1006,6 +1006,8 @@ int renesas_sdhi_probe(struct platform_device *pdev,
 		host->sdcard_irq_setbit_mask = TMIO_STAT_ALWAYS_SET_27;
 		host->sdcard_irq_mask_all = TMIO_MASK_ALL_RCAR2;
 		host->reset = renesas_sdhi_reset;
+	} else {
+		host->sdcard_irq_mask_all = TMIO_MASK_ALL;
 	}
 
 	/* Orginally registers were 16 bit apart, could be 32 or 64 nowadays */
@@ -1100,9 +1102,7 @@ int renesas_sdhi_probe(struct platform_device *pdev,
 		host->ops.hs400_complete = renesas_sdhi_hs400_complete;
 	}
 
-	ret = tmio_mmc_host_probe(host);
-	if (ret < 0)
-		goto edisclk;
+	sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK, host->sdcard_irq_mask_all);
 
 	num_irqs = platform_irq_count(pdev);
 	if (num_irqs < 0) {
@@ -1129,6 +1129,10 @@ int renesas_sdhi_probe(struct platform_device *pdev,
 			goto eirq;
 	}
 
+	ret = tmio_mmc_host_probe(host);
+	if (ret < 0)
+		goto edisclk;
+
 	dev_info(&pdev->dev, "%s base at %pa, max clock rate %u MHz\n",
 		 mmc_hostname(host->mmc), &res->start, host->mmc->f_max / 1000000);
 

From aed8530500f7b4bc2696482b0e9000902aa5b07a Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 12 Jul 2023 16:01:15 +0200
Subject: [PATCH 0892/2047] mmc: renesas_sdhi: remove outdated indentation

Using tabs to make a structure initialization more readable is not
considered helpful. Remove the final appearance from this driver.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230712140116.18718-1-wsa+renesas@sang-engineering.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/renesas_sdhi_core.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index 2d5ef9c37d76..10dbdb4abc93 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -983,12 +983,12 @@ int renesas_sdhi_probe(struct platform_device *pdev,
 
 	}
 
-	host->write16_hook	= renesas_sdhi_write16_hook;
-	host->clk_enable	= renesas_sdhi_clk_enable;
-	host->clk_disable	= renesas_sdhi_clk_disable;
-	host->set_clock		= renesas_sdhi_set_clock;
-	host->multi_io_quirk	= renesas_sdhi_multi_io_quirk;
-	host->dma_ops		= dma_ops;
+	host->write16_hook = renesas_sdhi_write16_hook;
+	host->clk_enable = renesas_sdhi_clk_enable;
+	host->clk_disable = renesas_sdhi_clk_disable;
+	host->set_clock = renesas_sdhi_set_clock;
+	host->multi_io_quirk = renesas_sdhi_multi_io_quirk;
+	host->dma_ops = dma_ops;
 
 	if (sdhi_has_quirk(priv, hs400_disabled))
 		host->mmc->caps2 &= ~(MMC_CAP2_HS400 | MMC_CAP2_HS400_ES);

From a1a28ac1c095d3c084e24fcae3e0920b85dc47a0 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 21:19:36 +0800
Subject: [PATCH 0893/2047] mmc: mxcmmc: Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230704131939.22562-1-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mxcmmc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 668f865f3efb..f3a72569dd1a 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -989,7 +989,6 @@ static int mxcmci_probe(struct platform_device *pdev)
 
 	pr_info("i.MX/MPC512x SDHC driver\n");
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
 		return irq;
@@ -1000,7 +999,7 @@ static int mxcmci_probe(struct platform_device *pdev)
 
 	host = mmc_priv(mmc);
 
-	host->base = devm_ioremap_resource(&pdev->dev, res);
+	host->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(host->base)) {
 		ret = PTR_ERR(host->base);
 		goto out_free;

From 9a2fdd2e7d756e7c9ed7c4dca02221fed3e194f8 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 21:19:37 +0800
Subject: [PATCH 0894/2047] mmc: omap_hsmmc: Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230704131939.22562-2-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 1e0f2d7774bd..93de00a06aba 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1790,14 +1790,11 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENXIO;
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
 		return irq;
 
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 

From c89a869b67dc6c22e1d967df7bd6826fc81413a5 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 21:19:38 +0800
Subject: [PATCH 0895/2047] mmc: pxamci: Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230704131939.22562-3-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/pxamci.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 2a988f942b6c..1142cd2368f6 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -612,7 +612,6 @@ static int pxamci_probe(struct platform_device *pdev)
 	struct resource *r;
 	int ret, irq;
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
 		return irq;
@@ -685,14 +684,14 @@ static int pxamci_probe(struct platform_device *pdev)
 	}
 
 	spin_lock_init(&host->lock);
-	host->res = r;
 	host->imask = MMC_I_MASK_ALL;
 
-	host->base = devm_ioremap_resource(dev, r);
+	host->base = devm_platform_get_and_ioremap_resource(pdev, 0, &r);
 	if (IS_ERR(host->base)) {
 		ret = PTR_ERR(host->base);
 		goto out;
 	}
+	host->res = r;
 
 	/*
 	 * Ensure that the host controller is shut down, and setup

From de2067448d0ece6b774026cc05dcb6d4ebfd1e1a Mon Sep 17 00:00:00 2001
From: Julien Delbergue <j.delbergue.foss@gmail.com>
Date: Thu, 13 Jul 2023 11:31:08 +0200
Subject: [PATCH 0896/2047] mmc: davinci: Make SDIO irq truly optional

The following error is printed on Logic PD's DA850 EVM:
  davinci_mmc 1c40000.mmc: error -ENXIO: IRQ index 1 not found

Depending on the board, the SDIO interrupt may not be present, so use
the correct function to reflect that and prevent logging an error.

Signed-off-by: Julien Delbergue <j.delbergue.foss@gmail.com>
Acked-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Link: https://lore.kernel.org/r/8b57db8d-1d3a-883e-eb8f-ddf15f19d823@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/davinci_mmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index 7138dfa065bf..fb686c0d171c 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -1257,7 +1257,7 @@ static int davinci_mmcsd_probe(struct platform_device *pdev)
 
 	host->use_dma = use_dma;
 	host->mmc_irq = irq;
-	host->sdio_irq = platform_get_irq(pdev, 1);
+	host->sdio_irq = platform_get_irq_optional(pdev, 1);
 
 	if (host->use_dma) {
 		ret = davinci_acquire_dma_channels(host);

From a5d8de1cb7e1d1404b3009937186a82834cedd1f Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 12 Jul 2023 16:13:27 +0200
Subject: [PATCH 0897/2047] mmc: uniphier-sd: register irqs before registering
 controller

IRQs should be ready to serve when we call mmc_add_host() via
tmio_mmc_host_probe(). To achieve that, ensure that all irqs are masked
before registering the handlers.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Link: https://lore.kernel.org/r/20230712141327.20827-1-wsa+renesas@sang-engineering.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/uniphier-sd.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/uniphier-sd.c b/drivers/mmc/host/uniphier-sd.c
index 61acd69fac0e..4dc079f74c1b 100644
--- a/drivers/mmc/host/uniphier-sd.c
+++ b/drivers/mmc/host/uniphier-sd.c
@@ -706,19 +706,19 @@ static int uniphier_sd_probe(struct platform_device *pdev)
 	tmio_data->max_segs = 1;
 	tmio_data->max_blk_count = U16_MAX;
 
-	ret = tmio_mmc_host_probe(host);
-	if (ret)
-		goto disable_clk;
+	sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK, TMIO_MASK_ALL);
 
 	ret = devm_request_irq(dev, irq, tmio_mmc_irq, IRQF_SHARED,
 			       dev_name(dev), host);
 	if (ret)
-		goto remove_host;
+		goto disable_clk;
+
+	ret = tmio_mmc_host_probe(host);
+	if (ret)
+		goto disable_clk;
 
 	return 0;
 
-remove_host:
-	tmio_mmc_host_remove(host);
 disable_clk:
 	uniphier_sd_clk_disable(host);
 free_host:

From c62da8a8a9872039ff3751d65a7f7b101a81cdcd Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jul 2023 08:30:52 -0600
Subject: [PATCH 0898/2047] mmc: Explicitly include correct DT includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230718143054.1065288-1-robh@kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/atmel-mci.c                  | 1 -
 drivers/mmc/host/cavium-octeon.c              | 2 ++
 drivers/mmc/host/cavium-thunderx.c            | 1 +
 drivers/mmc/host/davinci_mmc.c                | 1 -
 drivers/mmc/host/meson-gx-mmc.c               | 2 +-
 drivers/mmc/host/mtk-sd.c                     | 4 +---
 drivers/mmc/host/mxcmmc.c                     | 1 -
 drivers/mmc/host/mxs-mmc.c                    | 1 -
 drivers/mmc/host/owl-mmc.c                    | 3 ++-
 drivers/mmc/host/pxamci.c                     | 1 -
 drivers/mmc/host/renesas_sdhi_internal_dmac.c | 3 ++-
 drivers/mmc/host/renesas_sdhi_sys_dmac.c      | 3 ++-
 drivers/mmc/host/sdhci-bcm-kona.c             | 1 -
 drivers/mmc/host/sdhci-cadence.c              | 2 +-
 drivers/mmc/host/sdhci-esdhc-imx.c            | 2 +-
 drivers/mmc/host/sdhci-iproc.c                | 2 +-
 drivers/mmc/host/sdhci-msm.c                  | 2 +-
 drivers/mmc/host/sdhci-of-arasan.c            | 4 ++--
 drivers/mmc/host/sdhci-of-at91.c              | 2 +-
 drivers/mmc/host/sdhci-of-dwcmshc.c           | 2 +-
 drivers/mmc/host/sdhci-of-sparx5.c            | 2 +-
 drivers/mmc/host/sdhci-omap.c                 | 1 -
 drivers/mmc/host/sdhci-pxav2.c                | 1 -
 drivers/mmc/host/sdhci-s3c.c                  | 1 -
 drivers/mmc/host/sdhci-sprd.c                 | 1 -
 drivers/mmc/host/sdhci-tegra.c                | 1 -
 drivers/mmc/host/sh_mmcif.c                   | 1 -
 drivers/mmc/host/uniphier-sd.c                | 1 -
 drivers/mmc/host/wmt-sdmmc.c                  | 1 -
 29 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index dd18440a90c5..2fdf352b3d86 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -18,7 +18,6 @@
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_gpio.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c
index 12dca91a8ef6..02429f7afb42 100644
--- a/drivers/mmc/host/cavium-octeon.c
+++ b/drivers/mmc/host/cavium-octeon.c
@@ -13,7 +13,9 @@
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/slot-gpio.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <asm/octeon/octeon.h>
 #include "cavium.h"
 
diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c
index 202b1d6da678..2e2ff984f0b3 100644
--- a/drivers/mmc/host/cavium-thunderx.c
+++ b/drivers/mmc/host/cavium-thunderx.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pci.h>
 #include "cavium.h"
 
diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index fb686c0d171c..9e99de1206ef 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -21,7 +21,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/mmc/mmc.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/mmc/slot-gpio.h>
 #include <linux/interrupt.h>
 
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index d652374f37b2..49e3d1418902 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -11,7 +11,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/iopoll.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/ioport.h>
 #include <linux/dma-mapping.h>
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 02403ff99e0d..6c68c642ef61 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -12,9 +12,7 @@
 #include <linux/iopoll.h>
 #include <linux/ioport.h>
 #include <linux/irq.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index f3a72569dd1a..9b6bdc1b0758 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -31,7 +31,6 @@
 #include <linux/dmaengine.h>
 #include <linux/types.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_dma.h>
 #include <linux/mmc/slot-gpio.h>
 
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index 8c3655d3be96..951c6c48b4f7 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -11,7 +11,6 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
diff --git a/drivers/mmc/host/owl-mmc.c b/drivers/mmc/host/owl-mmc.c
index 1bf22b08b373..62d2993573d0 100644
--- a/drivers/mmc/host/owl-mmc.c
+++ b/drivers/mmc/host/owl-mmc.c
@@ -16,8 +16,9 @@
 #include <linux/interrupt.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/slot-gpio.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/spinlock.h>
 
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 1142cd2368f6..bd53b17efe12 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -30,7 +30,6 @@
 #include <linux/gpio/consumer.h>
 #include <linux/gfp.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/soc/pxa/cpu.h>
 
 #include <linux/sizes.h>
diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
index 9ab813903b2c..4cc5bcb3213a 100644
--- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
@@ -15,7 +15,8 @@
 #include <linux/mmc/host.h>
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/pagemap.h>
 #include <linux/scatterlist.h>
 #include <linux/sys_soc.h>
diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c
index b559ad38b667..452982e670d2 100644
--- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c
@@ -15,7 +15,8 @@
 #include <linux/mmc/host.h>
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/pagemap.h>
 #include <linux/scatterlist.h>
 #include <linux/sys_soc.h>
diff --git a/drivers/mmc/host/sdhci-bcm-kona.c b/drivers/mmc/host/sdhci-bcm-kona.c
index 6a93a54fe067..ef1fb383e1e7 100644
--- a/drivers/mmc/host/sdhci-bcm-kona.c
+++ b/drivers/mmc/host/sdhci-bcm-kona.c
@@ -11,7 +11,6 @@
 #include <linux/clk.h>
 #include <linux/regulator/consumer.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/mmc/slot-gpio.h>
 
 #include "sdhci-pltfm.h"
diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c
index d2f625054689..25d9849ef0d1 100644
--- a/drivers/mmc/host/sdhci-cadence.c
+++ b/drivers/mmc/host/sdhci-cadence.c
@@ -11,7 +11,7 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/reset.h>
 
 #include "sdhci-pltfm.h"
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index eebf94604a7f..e767fe058023 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -22,7 +22,7 @@
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/slot-gpio.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pm_runtime.h>
 #include "sdhci-cqhci.h"
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index 86eb0045515e..6a93b1ee0b30 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/mmc/host.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include "sdhci-pltfm.h"
 
 struct sdhci_iproc_data {
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 1c935b5bafe1..c0ce187e740a 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/delay.h>
 #include <linux/mmc/mmc.h>
 #include <linux/pm_runtime.h>
@@ -15,6 +14,7 @@
 #include <linux/iopoll.h>
 #include <linux/regulator/consumer.h>
 #include <linux/interconnect.h>
+#include <linux/of.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/reset.h>
 
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 294dd605fd2b..2f9de2518942 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -18,11 +18,11 @@
 #include <linux/clk-provider.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/phy/phy.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
-#include <linux/of.h>
 #include <linux/firmware/xlnx-zynqmp.h>
 
 #include "cqhci.h"
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index cd0134580a90..808307a211f2 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -17,7 +17,7 @@
 #include <linux/mmc/slot-gpio.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
 
diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index e68cd87998c8..55e1e2d0494d 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -14,7 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/sizes.h>
 
diff --git a/drivers/mmc/host/sdhci-of-sparx5.c b/drivers/mmc/host/sdhci-of-sparx5.c
index 28e4ee69e100..2e983cf49bc6 100644
--- a/drivers/mmc/host/sdhci-of-sparx5.c
+++ b/drivers/mmc/host/sdhci-of-sparx5.c
@@ -13,9 +13,9 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/regmap.h>
-#include <linux/of_device.h>
 #include <linux/mfd/syscon.h>
 #include <linux/dma-mapping.h>
+#include <linux/of.h>
 
 #include "sdhci-pltfm.h"
 
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 8ed9256b83da..2593e3fbd0d9 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -11,7 +11,6 @@
 #include <linux/mmc/slot-gpio.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
index 91aca8f8d6ef..86b60b0447b3 100644
--- a/drivers/mmc/host/sdhci-pxav2.c
+++ b/drivers/mmc/host/sdhci-pxav2.c
@@ -19,7 +19,6 @@
 #include <linux/platform_data/pxa_sdhci.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/mmc.h>
 #include <linux/pinctrl/consumer.h>
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index 504015e84308..6245fb4584fe 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -20,7 +20,6 @@
 #include <linux/gpio.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_gpio.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c
index 7f4ee2e12735..a21d6524d7ca 100644
--- a/drivers/mmc/host/sdhci-sprd.c
+++ b/drivers/mmc/host/sdhci-sprd.c
@@ -11,7 +11,6 @@
 #include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_gpio.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index bff084f178c9..1e1eac953691 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -19,7 +19,6 @@
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/slot-gpio.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 5cf53348372a..838dab179724 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -46,7 +46,6 @@
 #include <linux/mmc/slot-gpio.h>
 #include <linux/mod_devicetable.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
 #include <linux/pagemap.h>
 #include <linux/platform_data/sh_mmcif.h>
 #include <linux/platform_device.h>
diff --git a/drivers/mmc/host/uniphier-sd.c b/drivers/mmc/host/uniphier-sd.c
index 4dc079f74c1b..b50cda4a9635 100644
--- a/drivers/mmc/host/uniphier-sd.c
+++ b/drivers/mmc/host/uniphier-sd.c
@@ -13,7 +13,6 @@
 #include <linux/mmc/host.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
index 68525d900046..f860f363be56 100644
--- a/drivers/mmc/host/wmt-sdmmc.c
+++ b/drivers/mmc/host/wmt-sdmmc.c
@@ -21,7 +21,6 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_device.h>
 
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>

From 9e4f4fe5ec1a7d6df5eceae0da33c24b753e34d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Tue, 25 Jul 2023 16:17:25 +0200
Subject: [PATCH 0899/2047] mmc: core: propagate removable attribute to driver
 core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Userspace can use this to distinguish hotpluggable mmc devices such as
sdcards from non-hotpluggable ones such as eMMC.
One example is the lsblk tool from util-linux.

Note that dev_set_removable() is not related to GENHD_FL_REMOVABLE which
is not applicable as per the comment in drivers/mmc/core/block.c

Link: https://github.com/util-linux/util-linux/issues/2379
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reviewed-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20230725-mmc-removable-v1-1-b2e0c4f18e6d@weissschuh.net
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/bus.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 2c3074a605fc..0af96548e7da 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -310,6 +310,9 @@ int mmc_add_card(struct mmc_card *card)
 
 
 	dev_set_name(&card->dev, "%s:%04x", mmc_hostname(card->host), card->rca);
+	dev_set_removable(&card->dev,
+			  mmc_card_is_removable(card->host) ?
+			  DEVICE_REMOVABLE : DEVICE_FIXED);
 
 	switch (card->type) {
 	case MMC_TYPE_MMC:

From 90ecc29a09d5113a54d09053a019d4236ca685ca Mon Sep 17 00:00:00 2001
From: Varshini Rajendran <varshini.rajendran@microchip.com>
Date: Fri, 28 Jul 2023 15:56:16 +0530
Subject: [PATCH 0900/2047] dt-bindings: sdhci-of-at91: add
 microchip,sam9x7-sdhci

Add microchip,sam9x7-sdhci to DT bindings documentation.

Signed-off-by: Varshini Rajendran <varshini.rajendran@microchip.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230728102616.266235-1-varshini.rajendran@microchip.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/sdhci-atmel.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mmc/sdhci-atmel.txt b/Documentation/devicetree/bindings/mmc/sdhci-atmel.txt
index 69edfd4d3922..a9fb0a91245f 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-atmel.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-atmel.txt
@@ -5,11 +5,13 @@ Documentation/devicetree/bindings/mmc/mmc.txt and the properties used by the
 sdhci-of-at91 driver.
 
 Required properties:
-- compatible:		Must be "atmel,sama5d2-sdhci" or "microchip,sam9x60-sdhci".
+- compatible:		Must be "atmel,sama5d2-sdhci" or "microchip,sam9x60-sdhci"
+			or "microchip,sam9x7-sdhci", "microchip,sam9x60-sdhci".
 - clocks:		Phandlers to the clocks.
 - clock-names:		Must be "hclock", "multclk", "baseclk" for
 			"atmel,sama5d2-sdhci".
 			Must be "hclock", "multclk" for "microchip,sam9x60-sdhci".
+			Must be "hclock", "multclk" for "microchip,sam9x7-sdhci".
 
 Optional properties:
 - assigned-clocks:	The same with "multclk".

From aa857326a454814478b8e7c52caeee1743d92e0d Mon Sep 17 00:00:00 2001
From: Li Zetao <lizetao1@huawei.com>
Date: Wed, 2 Aug 2023 17:40:28 +0800
Subject: [PATCH 0901/2047] mmc: sdhci-st: Use
 devm_platform_ioremap_resource_byname()

Convert platform_get_resource_byname() + devm_ioremap_resource() to a
single call to devm_platform_ioremap_resource_byname(), as this is
exactly what this function does.

Signed-off-by: Li Zetao <lizetao1@huawei.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230802094028.976612-1-lizetao1@huawei.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-st.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
index 6415916fbd91..97f350953c4b 100644
--- a/drivers/mmc/host/sdhci-st.c
+++ b/drivers/mmc/host/sdhci-st.c
@@ -348,7 +348,6 @@ static int sdhci_st_probe(struct platform_device *pdev)
 	struct clk *clk, *icnclk;
 	int ret = 0;
 	u16 host_version;
-	struct resource *res;
 	struct reset_control *rstc;
 
 	clk =  devm_clk_get(&pdev->dev, "mmc");
@@ -397,9 +396,7 @@ static int sdhci_st_probe(struct platform_device *pdev)
 	}
 
 	/* Configure the FlashSS Top registers for setting eMMC TX/RX delay */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-					   "top-mmc-delay");
-	pdata->top_ioaddr = devm_ioremap_resource(&pdev->dev, res);
+	pdata->top_ioaddr = devm_platform_ioremap_resource_byname(pdev, "top-mmc-delay");
 	if (IS_ERR(pdata->top_ioaddr))
 		pdata->top_ioaddr = NULL;
 

From 92b5b53380955e6ca3c5786c72dbc99dcdfa8e4f Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@amd.com>
Date: Thu, 3 Aug 2023 09:22:56 +0200
Subject: [PATCH 0902/2047] dt-bindings: mmc: arasan,sdci: Add power-domains
 and iommus properties

ZynqMP SDHCI Arasan IP core has own power domain and also iommu ID that's
why describe optional power-domains and iommus properties.

Signed-off-by: Michal Simek <michal.simek@amd.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/bf912d5f5e74b43903a84262565f564bfe0fed7e.1691047370.git.michal.simek@amd.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml b/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml
index a6c19a6cc99e..3e99801f77d2 100644
--- a/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml
+++ b/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml
@@ -160,6 +160,12 @@ properties:
     description:
       The MIO bank number in which the command and data lines are configured.
 
+  iommus:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
 dependencies:
   '#clock-cells': [ clock-output-names ]
 

From 921700b5ed5587c354a6bf2d23da325a49490253 Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Tue, 8 Aug 2023 21:37:13 +0800
Subject: [PATCH 0903/2047] mmc: sdhci-spear: remove redundant of_match_ptr()

The driver depends on CONFIG_OF, so it is not necessary to use
of_match_ptr() here. We remove both CONFIG_OF and of_match_ptr() here.

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Link: https://lore.kernel.org/r/20230808133714.214914-2-wangzhu9@huawei.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-spear.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
index c79035727b20..73ca07cdfaa9 100644
--- a/drivers/mmc/host/sdhci-spear.c
+++ b/drivers/mmc/host/sdhci-spear.c
@@ -170,20 +170,18 @@ static int sdhci_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(sdhci_pm_ops, sdhci_suspend, sdhci_resume);
 
-#ifdef CONFIG_OF
 static const struct of_device_id sdhci_spear_id_table[] = {
 	{ .compatible = "st,spear300-sdhci" },
 	{}
 };
 MODULE_DEVICE_TABLE(of, sdhci_spear_id_table);
-#endif
 
 static struct platform_driver sdhci_driver = {
 	.driver = {
 		.name	= "sdhci",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
 		.pm	= &sdhci_pm_ops,
-		.of_match_table = of_match_ptr(sdhci_spear_id_table),
+		.of_match_table = sdhci_spear_id_table,
 	},
 	.probe		= sdhci_probe,
 	.remove		= sdhci_remove,

From 8069a60a0575bf287b9326541bbdbd59d41e49ec Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Tue, 8 Aug 2023 21:37:14 +0800
Subject: [PATCH 0904/2047] mmc: sdhci: milbeaut: remove redundant
 of_match_ptr()

The driver depends on CONFIG_OF, so it is not necessary to use
of_match_ptr() here. We remove of_match_ptr() here.

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Link: https://lore.kernel.org/r/20230808133714.214914-3-wangzhu9@huawei.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-milbeaut.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-milbeaut.c b/drivers/mmc/host/sdhci-milbeaut.c
index 148b37ac6564..701b11a14c6f 100644
--- a/drivers/mmc/host/sdhci-milbeaut.c
+++ b/drivers/mmc/host/sdhci-milbeaut.c
@@ -334,7 +334,7 @@ static struct platform_driver sdhci_milbeaut_driver = {
 	.driver = {
 		.name = "sdhci-milbeaut",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
-		.of_match_table = of_match_ptr(mlb_dt_ids),
+		.of_match_table = mlb_dt_ids,
 	},
 	.probe	= sdhci_milbeaut_probe,
 	.remove	= sdhci_milbeaut_remove,

From 3a6d93eb5f27f313ce0f3fa3fb171cd0732c4878 Mon Sep 17 00:00:00 2001
From: Chen Jiahao <chenjiahao16@huawei.com>
Date: Wed, 9 Aug 2023 10:04:22 +0000
Subject: [PATCH 0905/2047] regulator: da9121-regulator: Remove redundant
 of_match_ptr() macros

Since the driver da9121-regulator depends on CONFIG_OF,
it makes no difference to wrap of_match_ptr() here.

Remove of_match_ptr() macros to clean it up.

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Link: https://lore.kernel.org/r/20230809100428.2669817-2-chenjiahao16@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/da9121-regulator.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/regulator/da9121-regulator.c b/drivers/regulator/da9121-regulator.c
index 122124944749..80098035bb13 100644
--- a/drivers/regulator/da9121-regulator.c
+++ b/drivers/regulator/da9121-regulator.c
@@ -440,7 +440,7 @@ static const struct regulator_desc da9121_reg = {
 	.of_match = "buck1",
 	.of_parse_cb = da9121_of_parse_cb,
 	.owner = THIS_MODULE,
-	.regulators_node = of_match_ptr("regulators"),
+	.regulators_node = "regulators",
 	.of_map_mode = da9121_map_mode,
 	.ops = &da9121_buck_ops,
 	.type = REGULATOR_VOLTAGE,
@@ -465,7 +465,7 @@ static const struct regulator_desc da9220_reg[2] = {
 		.of_match = "buck1",
 		.of_parse_cb = da9121_of_parse_cb,
 		.owner = THIS_MODULE,
-		.regulators_node = of_match_ptr("regulators"),
+		.regulators_node = "regulators",
 		.of_map_mode = da9121_map_mode,
 		.ops = &da9121_buck_ops,
 		.type = REGULATOR_VOLTAGE,
@@ -484,7 +484,7 @@ static const struct regulator_desc da9220_reg[2] = {
 		.of_match = "buck2",
 		.of_parse_cb = da9121_of_parse_cb,
 		.owner = THIS_MODULE,
-		.regulators_node = of_match_ptr("regulators"),
+		.regulators_node = "regulators",
 		.of_map_mode = da9121_map_mode,
 		.ops = &da9121_buck_ops,
 		.type = REGULATOR_VOLTAGE,
@@ -506,7 +506,7 @@ static const struct regulator_desc da9122_reg[2] = {
 		.of_match = "buck1",
 		.of_parse_cb = da9121_of_parse_cb,
 		.owner = THIS_MODULE,
-		.regulators_node = of_match_ptr("regulators"),
+		.regulators_node = "regulators",
 		.of_map_mode = da9121_map_mode,
 		.ops = &da9121_buck_ops,
 		.type = REGULATOR_VOLTAGE,
@@ -525,7 +525,7 @@ static const struct regulator_desc da9122_reg[2] = {
 		.of_match = "buck2",
 		.of_parse_cb = da9121_of_parse_cb,
 		.owner = THIS_MODULE,
-		.regulators_node = of_match_ptr("regulators"),
+		.regulators_node = "regulators",
 		.of_map_mode = da9121_map_mode,
 		.ops = &da9121_buck_ops,
 		.type = REGULATOR_VOLTAGE,
@@ -546,7 +546,7 @@ static const struct regulator_desc da9217_reg = {
 	.of_match = "buck1",
 	.of_parse_cb = da9121_of_parse_cb,
 	.owner = THIS_MODULE,
-	.regulators_node = of_match_ptr("regulators"),
+	.regulators_node = "regulators",
 	.of_map_mode = da9121_map_mode,
 	.ops = &da9121_buck_ops,
 	.type = REGULATOR_VOLTAGE,
@@ -573,7 +573,7 @@ static const struct regulator_desc da9141_reg = {
 	.of_match = "buck1",
 	.of_parse_cb = da9121_of_parse_cb,
 	.owner = THIS_MODULE,
-	.regulators_node = of_match_ptr("regulators"),
+	.regulators_node = "regulators",
 	.of_map_mode = da9121_map_mode,
 	.ops = &da9121_buck_ops,
 	.type = REGULATOR_VOLTAGE,
@@ -593,7 +593,7 @@ static const struct regulator_desc da9142_reg = {
 	.of_match = "buck1",
 	.of_parse_cb = da9121_of_parse_cb,
 	.owner = THIS_MODULE,
-	.regulators_node = of_match_ptr("regulators"),
+	.regulators_node = "regulators",
 	.of_map_mode = da9121_map_mode,
 	.ops = &da9121_buck_ops,
 	.type = REGULATOR_VOLTAGE,
@@ -1195,7 +1195,7 @@ static struct i2c_driver da9121_regulator_driver = {
 	.driver = {
 		.name = "da9121",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
-		.of_match_table = of_match_ptr(da9121_dt_ids),
+		.of_match_table = da9121_dt_ids,
 	},
 	.probe = da9121_i2c_probe,
 	.remove = da9121_i2c_remove,

From f410cfe8be5788cf09a3f32ce39946c339297821 Mon Sep 17 00:00:00 2001
From: Chen Jiahao <chenjiahao16@huawei.com>
Date: Wed, 9 Aug 2023 10:04:23 +0000
Subject: [PATCH 0906/2047] regulator: lp87565: Remove redundant of_match_ptr()
 macros

Since the driver lp87565-regulator depends on CONFIG_OF,
it makes no difference to wrap of_match_ptr() here.

Remove of_match_ptr() macros to clean it up.

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Link: https://lore.kernel.org/r/20230809100428.2669817-3-chenjiahao16@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/lp87565-regulator.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/lp87565-regulator.c b/drivers/regulator/lp87565-regulator.c
index bdb60d8a7f3d..61ee5cf3f241 100644
--- a/drivers/regulator/lp87565-regulator.c
+++ b/drivers/regulator/lp87565-regulator.c
@@ -29,8 +29,8 @@ enum LP87565_regulator_id {
 			.name			= _name,		\
 			.supply_name		= _of "-in",		\
 			.id			= _id,			\
-			.of_match		= of_match_ptr(_of),	\
-			.regulators_node	= of_match_ptr("regulators"),\
+			.of_match		= _of,			\
+			.regulators_node	= "regulators",		\
 			.ops			= &_ops,		\
 			.n_voltages		= _n,			\
 			.type			= REGULATOR_VOLTAGE,	\

From 656ed7467623674e6cc794a5b2ef79e8b8b81c3f Mon Sep 17 00:00:00 2001
From: Chen Jiahao <chenjiahao16@huawei.com>
Date: Wed, 9 Aug 2023 10:04:24 +0000
Subject: [PATCH 0907/2047] regulator: hi6421: Remove redundant of_match_ptr()
 macros

Since the driver hi6421-regulator depends on CONFIG_OF,
it makes no difference to wrap of_match_ptr() here.

Remove of_match_ptr() macros to clean it up.

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Link: https://lore.kernel.org/r/20230809100428.2669817-4-chenjiahao16@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/hi6421-regulator.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/regulator/hi6421-regulator.c b/drivers/regulator/hi6421-regulator.c
index 1b52423598d3..82e9e364d4d4 100644
--- a/drivers/regulator/hi6421-regulator.c
+++ b/drivers/regulator/hi6421-regulator.c
@@ -131,8 +131,8 @@ static const struct regulator_ops hi6421_buck345_ops;
 	[HI6421_##_id] = {						\
 		.desc = {						\
 			.name		= #_id,				\
-			.of_match        = of_match_ptr(#_match),	\
-			.regulators_node = of_match_ptr("regulators"),	\
+			.of_match        = #_match,			\
+			.regulators_node = "regulators",		\
 			.ops		= &hi6421_ldo_ops,		\
 			.type		= REGULATOR_VOLTAGE,		\
 			.id		= HI6421_##_id,			\
@@ -170,8 +170,8 @@ static const struct regulator_ops hi6421_buck345_ops;
 	[HI6421_##_id] = {						\
 		.desc = {						\
 			.name		= #_id,				\
-			.of_match        = of_match_ptr(#_match),	\
-			.regulators_node = of_match_ptr("regulators"),	\
+			.of_match        = #_match,			\
+			.regulators_node = "regulators",		\
 			.ops		= &hi6421_ldo_linear_ops,	\
 			.type		= REGULATOR_VOLTAGE,		\
 			.id		= HI6421_##_id,			\
@@ -210,8 +210,8 @@ static const struct regulator_ops hi6421_buck345_ops;
 	[HI6421_##_id] = {						\
 		.desc = {						\
 			.name		= #_id,				\
-			.of_match        = of_match_ptr(#_match),	\
-			.regulators_node = of_match_ptr("regulators"),	\
+			.of_match        = #_match,			\
+			.regulators_node = "regulators",		\
 			.ops		= &hi6421_ldo_linear_range_ops,	\
 			.type		= REGULATOR_VOLTAGE,		\
 			.id		= HI6421_##_id,			\
@@ -247,8 +247,8 @@ static const struct regulator_ops hi6421_buck345_ops;
 	[HI6421_##_id] = {						\
 		.desc = {						\
 			.name		= #_id,				\
-			.of_match        = of_match_ptr(#_match),	\
-			.regulators_node = of_match_ptr("regulators"),	\
+			.of_match        = #_match,			\
+			.regulators_node = "regulators",		\
 			.ops		= &hi6421_buck012_ops,		\
 			.type		= REGULATOR_VOLTAGE,		\
 			.id		= HI6421_##_id,			\
@@ -284,8 +284,8 @@ static const struct regulator_ops hi6421_buck345_ops;
 	[HI6421_##_id] = {						\
 		.desc = {						\
 			.name		= #_id,				\
-			.of_match        = of_match_ptr(#_match),	\
-			.regulators_node = of_match_ptr("regulators"),	\
+			.of_match        = #_match,			\
+			.regulators_node = "regulators",		\
 			.ops		= &hi6421_buck345_ops,		\
 			.type		= REGULATOR_VOLTAGE,		\
 			.id		= HI6421_##_id,			\

From 3988795eb08c4239e865c4c21a3d5fd6fb2bd055 Mon Sep 17 00:00:00 2001
From: Chen Jiahao <chenjiahao16@huawei.com>
Date: Wed, 9 Aug 2023 10:04:25 +0000
Subject: [PATCH 0908/2047] regulator: mcp16502: Remove redundant
 of_match_ptr() macros

Since the driver mcp16502 depends on CONFIG_OF,
it makes no difference to wrap of_match_ptr() here.

Remove of_match_ptr() macros to clean it up.

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Link: https://lore.kernel.org/r/20230809100428.2669817-5-chenjiahao16@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mcp16502.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/regulator/mcp16502.c b/drivers/regulator/mcp16502.c
index 7a472a2ef48a..0c15a19fe83a 100644
--- a/drivers/regulator/mcp16502.c
+++ b/drivers/regulator/mcp16502.c
@@ -110,7 +110,7 @@ static unsigned int mcp16502_of_map_mode(unsigned int mode)
 #define MCP16502_REGULATOR(_name, _id, _ranges, _ops, _ramp_table)	\
 	[_id] = {							\
 		.name			= _name,			\
-		.regulators_node	= of_match_ptr("regulators"),	\
+		.regulators_node	= "regulators",			\
 		.id			= _id,				\
 		.ops			= &(_ops),			\
 		.type			= REGULATOR_VOLTAGE,		\
@@ -119,7 +119,7 @@ static unsigned int mcp16502_of_map_mode(unsigned int mode)
 		.linear_ranges		= _ranges,			\
 		.linear_min_sel		= VDD_LOW_SEL,			\
 		.n_linear_ranges	= ARRAY_SIZE(_ranges),		\
-		.of_match		= of_match_ptr(_name),		\
+		.of_match		= _name,			\
 		.of_map_mode		= mcp16502_of_map_mode,		\
 		.vsel_reg		= (((_id) + 1) << 4),		\
 		.vsel_mask		= MCP16502_VSEL,		\
@@ -587,7 +587,7 @@ static struct i2c_driver mcp16502_drv = {
 	.driver		= {
 		.name	= "mcp16502-regulator",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
-		.of_match_table	= of_match_ptr(mcp16502_ids),
+		.of_match_table	= mcp16502_ids,
 #ifdef CONFIG_PM
 		.pm = &mcp16502_pm_ops,
 #endif

From c329adf3026034c84ddd61c0b19db53d40510ae7 Mon Sep 17 00:00:00 2001
From: Chen Jiahao <chenjiahao16@huawei.com>
Date: Wed, 9 Aug 2023 10:04:26 +0000
Subject: [PATCH 0909/2047] regulator: mpq7920: Remove redundant of_match_ptr()
 macro

Since the driver mpq7920 depends on CONFIG_OF,
it makes no difference to wrap of_match_ptr() here.

Remove the of_match_ptr() macro to clean it up.

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Link: https://lore.kernel.org/r/20230809100428.2669817-6-chenjiahao16@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mpq7920.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/mpq7920.c b/drivers/regulator/mpq7920.c
index 4926c229109b..a670e09891e7 100644
--- a/drivers/regulator/mpq7920.c
+++ b/drivers/regulator/mpq7920.c
@@ -318,7 +318,7 @@ static struct i2c_driver mpq7920_regulator_driver = {
 	.driver = {
 		.name = "mpq7920",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
-		.of_match_table = of_match_ptr(mpq7920_of_match),
+		.of_match_table = mpq7920_of_match,
 	},
 	.probe = mpq7920_i2c_probe,
 	.id_table = mpq7920_id,

From 9e8925eb7fd6e5d93d9dfa9ca4628e897f0db8f1 Mon Sep 17 00:00:00 2001
From: Chen Jiahao <chenjiahao16@huawei.com>
Date: Wed, 9 Aug 2023 10:04:27 +0000
Subject: [PATCH 0910/2047] regulator: pfuze100-regulator: Remove redundant
 of_match_ptr() macro

Since the driver pfuze100-regulator depends on CONFIG_OF,
it makes no difference to wrap of_match_ptr() here.

Remove the of_match_ptr() macro to clean it up.

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Link: https://lore.kernel.org/r/20230809100428.2669817-7-chenjiahao16@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/pfuze100-regulator.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c
index 8d7e6c323324..46854602b3ea 100644
--- a/drivers/regulator/pfuze100-regulator.c
+++ b/drivers/regulator/pfuze100-regulator.c
@@ -699,8 +699,7 @@ static int pfuze100_regulator_probe(struct i2c_client *client)
 		return -ENOMEM;
 
 	if (client->dev.of_node) {
-		match = of_match_device(of_match_ptr(pfuze_dt_ids),
-				&client->dev);
+		match = of_match_device(pfuze_dt_ids, &client->dev);
 		if (!match) {
 			dev_err(&client->dev, "Error: No device match found\n");
 			return -ENODEV;

From 252b9116aff4d04176adc931cd11f02169df54f1 Mon Sep 17 00:00:00 2001
From: Chen Jiahao <chenjiahao16@huawei.com>
Date: Wed, 9 Aug 2023 10:04:28 +0000
Subject: [PATCH 0911/2047] regulator: tps6286x-regulator: Remove redundant
 of_match_ptr() macros

Since the driver tps6286x-regulator depends on CONFIG_OF,
it makes no difference to wrap of_match_ptr() here.

Remove of_match_ptr() macros to clean it up.

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Link: https://lore.kernel.org/r/20230809100428.2669817-8-chenjiahao16@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/tps6286x-regulator.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/regulator/tps6286x-regulator.c b/drivers/regulator/tps6286x-regulator.c
index c06f6d1dc737..758c70269653 100644
--- a/drivers/regulator/tps6286x-regulator.c
+++ b/drivers/regulator/tps6286x-regulator.c
@@ -84,11 +84,11 @@ static unsigned int tps6286x_of_map_mode(unsigned int mode)
 
 static const struct regulator_desc tps6286x_reg = {
 	.name = "tps6286x",
-	.of_match = of_match_ptr("SW"),
+	.of_match = "SW",
 	.owner = THIS_MODULE,
 	.ops = &tps6286x_regulator_ops,
 	.of_map_mode = tps6286x_of_map_mode,
-	.regulators_node = of_match_ptr("regulators"),
+	.regulators_node = "regulators",
 	.type = REGULATOR_VOLTAGE,
 	.n_voltages = ((TPS6286X_MAX_MV - TPS6286X_MIN_MV) / TPS6286X_STEP_MV) + 1,
 	.min_uV = TPS6286X_MIN_MV * 1000,
@@ -148,7 +148,7 @@ static struct i2c_driver tps6286x_regulator_driver = {
 	.driver = {
 		.name = "tps6286x",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
-		.of_match_table = of_match_ptr(tps6286x_dt_ids),
+		.of_match_table = tps6286x_dt_ids,
 	},
 	.probe = tps6286x_i2c_probe,
 	.id_table = tps6286x_i2c_id,

From ddaec4e44d4e2a556b51159d48788a85ff67179d Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Wed, 9 Aug 2023 10:54:27 +0200
Subject: [PATCH 0912/2047] spi: fsl-cpm: Properly define and use IO pointers

Sparse reports several issues with IO pointers.

Fix it by adding missing __iomem flags and using iowriteXXbe()
generic helpers instead of the powerpc specific out_beXX() ones.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202307252052.7RqHxFZj-lkp@intel.com/
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Link: https://lore.kernel.org/r/18a65dca9134f6fc35932408066d4a8284cbfa65.1691571190.git.christophe.leroy@csgroup.eu
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-cpm.c | 74 +++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c
index 38452089e8f3..47c7a5c6257f 100644
--- a/drivers/spi/spi-fsl-cpm.c
+++ b/drivers/spi/spi-fsl-cpm.c
@@ -56,12 +56,12 @@ void fsl_spi_cpm_reinit_txrx(struct mpc8xxx_spi *mspi)
 			     QE_CR_PROTOCOL_UNSPECIFIED, 0);
 	} else {
 		if (mspi->flags & SPI_CPM1) {
-			out_be32(&mspi->pram->rstate, 0);
-			out_be16(&mspi->pram->rbptr,
-				 in_be16(&mspi->pram->rbase));
-			out_be32(&mspi->pram->tstate, 0);
-			out_be16(&mspi->pram->tbptr,
-				 in_be16(&mspi->pram->tbase));
+			iowrite32be(0, &mspi->pram->rstate);
+			iowrite16be(ioread16be(&mspi->pram->rbase),
+				    &mspi->pram->rbptr);
+			iowrite32be(0, &mspi->pram->tstate);
+			iowrite16be(ioread16be(&mspi->pram->tbase),
+				    &mspi->pram->tbptr);
 		} else {
 			cpm_command(CPM_SPI_CMD, CPM_CR_INIT_TRX);
 		}
@@ -75,24 +75,24 @@ static void fsl_spi_cpm_bufs_start(struct mpc8xxx_spi *mspi)
 	struct cpm_buf_desc __iomem *rx_bd = mspi->rx_bd;
 	unsigned int xfer_len = min(mspi->count, SPI_MRBLR);
 	unsigned int xfer_ofs;
-	struct fsl_spi_reg *reg_base = mspi->reg_base;
+	struct fsl_spi_reg __iomem *reg_base = mspi->reg_base;
 
 	xfer_ofs = mspi->xfer_in_progress->len - mspi->count;
 
 	if (mspi->rx_dma == mspi->dma_dummy_rx)
-		out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma);
+		iowrite32be(mspi->rx_dma, &rx_bd->cbd_bufaddr);
 	else
-		out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma + xfer_ofs);
-	out_be16(&rx_bd->cbd_datlen, 0);
-	out_be16(&rx_bd->cbd_sc, BD_SC_EMPTY | BD_SC_INTRPT | BD_SC_WRAP);
+		iowrite32be(mspi->rx_dma + xfer_ofs, &rx_bd->cbd_bufaddr);
+	iowrite16be(0, &rx_bd->cbd_datlen);
+	iowrite16be(BD_SC_EMPTY | BD_SC_INTRPT | BD_SC_WRAP, &rx_bd->cbd_sc);
 
 	if (mspi->tx_dma == mspi->dma_dummy_tx)
-		out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma);
+		iowrite32be(mspi->tx_dma, &tx_bd->cbd_bufaddr);
 	else
-		out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma + xfer_ofs);
-	out_be16(&tx_bd->cbd_datlen, xfer_len);
-	out_be16(&tx_bd->cbd_sc, BD_SC_READY | BD_SC_INTRPT | BD_SC_WRAP |
-				 BD_SC_LAST);
+		iowrite32be(mspi->tx_dma + xfer_ofs, &tx_bd->cbd_bufaddr);
+	iowrite16be(xfer_len, &tx_bd->cbd_datlen);
+	iowrite16be(BD_SC_READY | BD_SC_INTRPT | BD_SC_WRAP | BD_SC_LAST,
+		    &tx_bd->cbd_sc);
 
 	/* start transfer */
 	mpc8xxx_spi_write_reg(&reg_base->command, SPCOM_STR);
@@ -102,7 +102,7 @@ int fsl_spi_cpm_bufs(struct mpc8xxx_spi *mspi,
 		     struct spi_transfer *t, bool is_dma_mapped)
 {
 	struct device *dev = mspi->dev;
-	struct fsl_spi_reg *reg_base = mspi->reg_base;
+	struct fsl_spi_reg __iomem *reg_base = mspi->reg_base;
 
 	if (is_dma_mapped) {
 		mspi->map_tx_dma = 0;
@@ -123,7 +123,7 @@ int fsl_spi_cpm_bufs(struct mpc8xxx_spi *mspi,
 	}
 	if (t->bits_per_word == 16 && t->tx_buf) {
 		const u16 *src = t->tx_buf;
-		u16 *dst;
+		__le16 *dst;
 		int i;
 
 		dst = kmalloc(t->len, GFP_KERNEL);
@@ -202,12 +202,12 @@ EXPORT_SYMBOL_GPL(fsl_spi_cpm_bufs_complete);
 void fsl_spi_cpm_irq(struct mpc8xxx_spi *mspi, u32 events)
 {
 	u16 len;
-	struct fsl_spi_reg *reg_base = mspi->reg_base;
+	struct fsl_spi_reg __iomem *reg_base = mspi->reg_base;
 
 	dev_dbg(mspi->dev, "%s: bd datlen %d, count %d\n", __func__,
-		in_be16(&mspi->rx_bd->cbd_datlen), mspi->count);
+		ioread16be(&mspi->rx_bd->cbd_datlen), mspi->count);
 
-	len = in_be16(&mspi->rx_bd->cbd_datlen);
+	len = ioread16be(&mspi->rx_bd->cbd_datlen);
 	if (len > mspi->count) {
 		WARN_ON(1);
 		len = mspi->count;
@@ -328,7 +328,7 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
 	}
 
 	if (mspi->flags & SPI_CPM1) {
-		void *pram;
+		void __iomem *pram;
 
 		pram = devm_platform_ioremap_resource(to_platform_device(dev),
 						      1);
@@ -374,21 +374,21 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
 	mspi->rx_bd = cpm_muram_addr(bds_ofs + sizeof(*mspi->tx_bd));
 
 	/* Initialize parameter ram. */
-	out_be16(&mspi->pram->tbase, cpm_muram_offset(mspi->tx_bd));
-	out_be16(&mspi->pram->rbase, cpm_muram_offset(mspi->rx_bd));
-	out_8(&mspi->pram->tfcr, CPMFCR_EB | CPMFCR_GBL);
-	out_8(&mspi->pram->rfcr, CPMFCR_EB | CPMFCR_GBL);
-	out_be16(&mspi->pram->mrblr, SPI_MRBLR);
-	out_be32(&mspi->pram->rstate, 0);
-	out_be32(&mspi->pram->rdp, 0);
-	out_be16(&mspi->pram->rbptr, 0);
-	out_be16(&mspi->pram->rbc, 0);
-	out_be32(&mspi->pram->rxtmp, 0);
-	out_be32(&mspi->pram->tstate, 0);
-	out_be32(&mspi->pram->tdp, 0);
-	out_be16(&mspi->pram->tbptr, 0);
-	out_be16(&mspi->pram->tbc, 0);
-	out_be32(&mspi->pram->txtmp, 0);
+	iowrite16be(cpm_muram_offset(mspi->tx_bd), &mspi->pram->tbase);
+	iowrite16be(cpm_muram_offset(mspi->rx_bd), &mspi->pram->rbase);
+	iowrite8(CPMFCR_EB | CPMFCR_GBL, &mspi->pram->tfcr);
+	iowrite8(CPMFCR_EB | CPMFCR_GBL, &mspi->pram->rfcr);
+	iowrite16be(SPI_MRBLR, &mspi->pram->mrblr);
+	iowrite32be(0, &mspi->pram->rstate);
+	iowrite32be(0, &mspi->pram->rdp);
+	iowrite16be(0, &mspi->pram->rbptr);
+	iowrite16be(0, &mspi->pram->rbc);
+	iowrite32be(0, &mspi->pram->rxtmp);
+	iowrite32be(0, &mspi->pram->tstate);
+	iowrite32be(0, &mspi->pram->tdp);
+	iowrite16be(0, &mspi->pram->tbptr);
+	iowrite16be(0, &mspi->pram->tbc);
+	iowrite32be(0, &mspi->pram->txtmp);
 
 	return 0;
 

From e1b9c2749af020e6c915eb07fcd53fa3a1a074e6 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Tue, 1 Aug 2023 15:05:30 +0200
Subject: [PATCH 0913/2047] s390/smp: ensure global control register contents
 are in sync

Globally setting a bit in control registers is done with
smp_ctl_set_clear_bit(). This is using on_each_cpu() to execute a function
which actually sets the control register bit on each online CPU. This can
be problematic since on_each_cpu() does not prevent that new CPUs come
online while it is executed, which in turn means that control register
updates could be missing on new CPUs.

In order to prevent this problem make sure that global control register
contents cannot change until new CPUs have initialized their control
registers, and marked themselves online, so they are included in subsequent
on_each_cpu() calls.

Reviewed-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/smp.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 9244130721d6..a4edb7ea66ea 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -253,8 +253,9 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
 
 static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
 {
-	struct lowcore *lc = lowcore_ptr[cpu];
+	struct lowcore *lc, *abs_lc;
 
+	lc = lowcore_ptr[cpu];
 	cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
 	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
 	lc->cpu_nr = cpu;
@@ -267,7 +268,9 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
 	lc->machine_flags = S390_lowcore.machine_flags;
 	lc->user_timer = lc->system_timer =
 		lc->steal_timer = lc->avg_steal_timer = 0;
-	__ctl_store(lc->cregs_save_area, 0, 15);
+	abs_lc = get_abs_lowcore();
+	memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area));
+	put_abs_lowcore(abs_lc);
 	lc->cregs_save_area[1] = lc->kernel_asce;
 	lc->cregs_save_area[7] = lc->user_asce;
 	save_access_regs((unsigned int *) lc->access_regs_save_area);
@@ -607,8 +610,8 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set)
 	ctlreg = (ctlreg & parms.andval) | parms.orval;
 	abs_lc->cregs_save_area[cr] = ctlreg;
 	put_abs_lowcore(abs_lc);
-	spin_unlock(&ctl_lock);
 	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+	spin_unlock(&ctl_lock);
 }
 EXPORT_SYMBOL(smp_ctl_set_clear_bit);
 
@@ -928,12 +931,18 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	rc = pcpu_alloc_lowcore(pcpu, cpu);
 	if (rc)
 		return rc;
+	/*
+	 * Make sure global control register contents do not change
+	 * until new CPU has initialized control registers.
+	 */
+	spin_lock(&ctl_lock);
 	pcpu_prepare_secondary(pcpu, cpu);
 	pcpu_attach_task(pcpu, tidle);
 	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
 	/* Wait until cpu puts itself in the online & active maps */
 	while (!cpu_online(cpu))
 		cpu_relax();
+	spin_unlock(&ctl_lock);
 	return 0;
 }
 

From 1e66317a7f575b07a837b7dcbf4bf636282ea8a2 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 7 Aug 2023 00:16:38 +0900
Subject: [PATCH 0914/2047] s390: remove unneeded #include <asm/export.h>

There is no EXPORT_SYMBOL line there, hence #include <asm/export.h>
is unneeded.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20230806151641.394720-1-masahiroy@kernel.org
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/mcount.S | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 71c5fa05e7f1..ae4d4fd9afcd 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -9,8 +9,6 @@
 #include <asm/ftrace.h>
 #include <asm/nospec-insn.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
-
 
 #define STACK_FRAME_SIZE_PTREGS		(STACK_FRAME_OVERHEAD + __PT_SIZE)
 #define STACK_PTREGS			(STACK_FRAME_OVERHEAD)

From b8c723f1e62b40af4db91c04989272fb7056b30b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 7 Aug 2023 00:16:39 +0900
Subject: [PATCH 0915/2047] s390: replace #include <asm/export.h> with #include
 <linux/export.h>

Commit ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost")
deprecated <asm/export.h>, which is now a wrapper of <linux/export.h>.

Replace #include <asm/export.h> with #include <linux/export.h>.

After all the <asm/export.h> lines are converted, <asm/export.h> and
<asm-generic/export.h> will be removed.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20230806151641.394720-2-masahiroy@kernel.org
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/entry.S | 2 +-
 arch/s390/lib/mem.S      | 2 +-
 arch/s390/lib/tishift.S  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index a660f4b6d654..49a11f6dd7ae 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -8,6 +8,7 @@
  *		 Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
  */
 
+#include <linux/export.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <asm/asm-extable.h>
@@ -26,7 +27,6 @@
 #include <asm/vx-insn.h>
 #include <asm/setup.h>
 #include <asm/nmi.h>
-#include <asm/export.h>
 #include <asm/nospec-insn.h>
 
 _LPP_OFFSET	= __LC_LPP
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index 5a9a55de2e10..08f60a42b9a6 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -5,8 +5,8 @@
  * Copyright IBM Corp. 2012
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 #include <asm/nospec-insn.h>
 
 	GEN_BR_THUNK %r14
diff --git a/arch/s390/lib/tishift.S b/arch/s390/lib/tishift.S
index de33cf02cfd2..96214f51f49b 100644
--- a/arch/s390/lib/tishift.S
+++ b/arch/s390/lib/tishift.S
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/nospec-insn.h>
-#include <asm/export.h>
 
 	.section .noinstr.text, "ax"
 

From ee4ac5275fd82a877e2fe6aeefcc3c4268f43388 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 7 Aug 2023 00:16:40 +0900
Subject: [PATCH 0916/2047] s390: remove <asm/export.h>

All *.S files under arch/s390/ have been converted to include
<linux/export.h> instead of <asm/export.h>.

Remove <asm/export.h>.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20230806151641.394720-3-masahiroy@kernel.org
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/Kbuild | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 1a18d7b82f86..4b904110d27c 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -5,6 +5,5 @@ generated-y += syscall_table.h
 generated-y += unistd_nr.h
 
 generic-y += asm-offsets.h
-generic-y += export.h
 generic-y += kvm_types.h
 generic-y += mcs_spinlock.h

From 117d73b18441638b85b3963256c846509c6138f1 Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Wed, 9 Aug 2023 00:18:12 -0700
Subject: [PATCH 0917/2047] mmc: sunplus: Fix platform_get_irq() error checking

The platform_get_irq() function returns negative error codes on failure.

Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Reviewed-by: Dan Carpenter <dan.carpenter@linaro.org>
Link: https://lore.kernel.org/r/20230809071812.547229-2-harshit.m.mogalapalli@oracle.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sunplus-mmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sunplus-mmc.c b/drivers/mmc/host/sunplus-mmc.c
index 2bdebeb1f8e4..e9cb1a57cb75 100644
--- a/drivers/mmc/host/sunplus-mmc.c
+++ b/drivers/mmc/host/sunplus-mmc.c
@@ -885,7 +885,7 @@ static int spmmc_drv_probe(struct platform_device *pdev)
 		return dev_err_probe(&pdev->dev, PTR_ERR(host->rstc), "rst get fail\n");
 
 	host->irq = platform_get_irq(pdev, 0);
-	if (host->irq <= 0)
+	if (host->irq < 0)
 		return host->irq;
 
 	ret = devm_request_threaded_irq(&pdev->dev, host->irq,

From 283cf357b02848a48eb544e3cde993254fc2c19e Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 8 Aug 2023 20:15:13 +0800
Subject: [PATCH 0918/2047] mmc: remove unnecessary set_drvdata() function

The driver data will be cleared in device_unbind_cleanup() in driver
core code. So the set_drvdata(..., NULL) called in remove and error
path in probe can be removed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230808121513.553143-1-yangyingliang@huawei.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c       | 1 -
 drivers/mmc/host/moxart-mmc.c  | 2 --
 drivers/mmc/host/sunplus-mmc.c | 1 -
 drivers/mmc/host/via-sdmmc.c   | 2 --
 drivers/mmc/host/wbsd.c        | 2 --
 5 files changed, 8 deletions(-)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index b6f4be25b31b..266298a04f78 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -3026,7 +3026,6 @@ static void mmc_blk_remove(struct mmc_card *card)
 		pm_runtime_disable(&card->dev);
 	pm_runtime_put_noidle(&card->dev);
 	mmc_blk_remove_req(md);
-	dev_set_drvdata(&card->dev, NULL);
 	destroy_workqueue(card->complete_wq);
 }
 
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index d0d6ffcf78d4..43c2a95bc29a 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -692,8 +692,6 @@ static int moxart_remove(struct platform_device *pdev)
 	struct mmc_host *mmc = dev_get_drvdata(&pdev->dev);
 	struct moxart_host *host = mmc_priv(mmc);
 
-	dev_set_drvdata(&pdev->dev, NULL);
-
 	if (!IS_ERR_OR_NULL(host->dma_chan_tx))
 		dma_release_channel(host->dma_chan_tx);
 	if (!IS_ERR_OR_NULL(host->dma_chan_rx))
diff --git a/drivers/mmc/host/sunplus-mmc.c b/drivers/mmc/host/sunplus-mmc.c
index e9cb1a57cb75..37ebed040807 100644
--- a/drivers/mmc/host/sunplus-mmc.c
+++ b/drivers/mmc/host/sunplus-mmc.c
@@ -948,7 +948,6 @@ static int spmmc_drv_remove(struct platform_device *dev)
 	clk_disable_unprepare(host->clk);
 	pm_runtime_put_noidle(&dev->dev);
 	pm_runtime_disable(&dev->dev);
-	platform_set_drvdata(dev, NULL);
 
 	return 0;
 }
diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c
index a2b0d9461665..ba6044b16e07 100644
--- a/drivers/mmc/host/via-sdmmc.c
+++ b/drivers/mmc/host/via-sdmmc.c
@@ -1160,7 +1160,6 @@ static int via_sd_probe(struct pci_dev *pcidev,
 unmap:
 	iounmap(sdhost->mmiobase);
 free_mmc_host:
-	dev_set_drvdata(&pcidev->dev, NULL);
 	mmc_free_host(mmc);
 release:
 	pci_release_regions(pcidev);
@@ -1212,7 +1211,6 @@ static void via_sd_remove(struct pci_dev *pcidev)
 	writeb(gatt, sdhost->pcictrl_mmiobase + VIA_CRDR_PCICLKGATT);
 
 	iounmap(sdhost->mmiobase);
-	dev_set_drvdata(&pcidev->dev, NULL);
 	mmc_free_host(sdhost->mmc);
 	pci_release_regions(pcidev);
 	pci_disable_device(pcidev);
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index bf2a92fba0ed..0589d169e8fb 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -1264,8 +1264,6 @@ static void wbsd_free_mmc(struct device *dev)
 	del_timer_sync(&host->ignore_timer);
 
 	mmc_free_host(mmc);
-
-	dev_set_drvdata(dev, NULL);
 }
 
 /*

From 2a6f5df3cd9493a2d61a2ee1017b9583ae929c22 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Tue, 18 Jul 2023 07:44:25 +0200
Subject: [PATCH 0919/2047] MAINTAINERS: add Andreas Hindborg as Rust reviewer

Andreas has been involved with the Rust for Linux project for more than
a year now. He has been primarily working on the Rust NVMe driver [1],
presenting it in several places (such as LPC [2][3] and Kangrejos [4]).

In addition, he recently submitted the Rust null block driver [5] and
has been reviewing patches in the mailing list for some months.

Thus add him to the `RUST` entry as reviewer.

Link: https://rust-for-linux.com/nvme-driver [1]
Link: https://lpc.events/event/16/contributions/1180/attachments/1017/1961/deck.pdf [2]
Link: https://www.youtube.com/watch?v=BwywU1MqW38 [3]
Link: https://kangrejos.com/A%20Linux%20(PCI)%20NVMe%20Driver%20in%20Rust.pdf [4]
Link: https://lore.kernel.org/rust-for-linux/20230503090708.2524310-1-nmi@metaspace.dk/ [5]
Acked-by: Andreas Hindborg <a.hindborg@samsung.com>
Acked-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lore.kernel.org/r/20230718054426.1048583-1-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0f966f05fb0d..c469df613a29 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18554,6 +18554,7 @@ R:	Boqun Feng <boqun.feng@gmail.com>
 R:	Gary Guo <gary@garyguo.net>
 R:	Björn Roy Baron <bjorn3_gh@protonmail.com>
 R:	Benno Lossin <benno.lossin@proton.me>
+R:	Andreas Hindborg <a.hindborg@samsung.com>
 L:	rust-for-linux@vger.kernel.org
 S:	Supported
 W:	https://github.com/Rust-for-Linux/linux

From d4d84eaa3f394feb7be366d9e9c8c0699afeec2c Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Tue, 18 Jul 2023 07:45:20 +0200
Subject: [PATCH 0920/2047] MAINTAINERS: add Alice Ryhl as Rust reviewer

Alice has been involved with the Rust for Linux project for
almost a year now. She has been primarily working on the
Android Binder Driver [1].

In addition, she has been reviewing patches in the mailing
list for some months and has submitted improvements to the
core Rust support.

She is also part of the core maintainer team for the widely
used library Tokio [2], an asynchronous Rust runtime.

Her expertise with the language will be very useful to have
around in the future if Rust grows within the kernel, thus
add her to the `RUST` entry as reviewer.

Link: https://rust-for-linux.com/android-binder-driver [1]
Link: https://tokio.rs [2]
Acked-by: Alice Ryhl <aliceryhl@google.com>
Acked-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lore.kernel.org/r/20230718054521.1048785-1-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c469df613a29..12601a47c839 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18555,6 +18555,7 @@ R:	Gary Guo <gary@garyguo.net>
 R:	Björn Roy Baron <bjorn3_gh@protonmail.com>
 R:	Benno Lossin <benno.lossin@proton.me>
 R:	Andreas Hindborg <a.hindborg@samsung.com>
+R:	Alice Ryhl <aliceryhl@google.com>
 L:	rust-for-linux@vger.kernel.org
 S:	Supported
 W:	https://github.com/Rust-for-Linux/linux

From d824d2f98565e7c4cb1b862c230198fbe1a968be Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 16 Jun 2023 02:16:21 +0200
Subject: [PATCH 0921/2047] kbuild: rust_is_available: remove -v option

The -v option is passed when this script is invoked from Makefile,
but not when invoked from Kconfig.

As you can see in scripts/Kconfig.include, the 'success' macro suppresses
stdout and stderr anyway, so this script does not need to be quiet.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Tested-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20230109061436.3146442-1-masahiroy@kernel.org
[ Reworded prefix to match the others in the patch series. ]
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Link: https://lore.kernel.org/r/20230616001631.463536-2-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 Makefile                     |  4 +-
 scripts/rust_is_available.sh | 96 +++++++++++++++---------------------
 2 files changed, 42 insertions(+), 58 deletions(-)

diff --git a/Makefile b/Makefile
index 64ec209b26f0..a5880570a526 100644
--- a/Makefile
+++ b/Makefile
@@ -1289,7 +1289,7 @@ prepare0: archprepare
 # All the preparing..
 prepare: prepare0
 ifdef CONFIG_RUST
-	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh -v
+	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh
 	$(Q)$(MAKE) $(build)=rust
 endif
 
@@ -1825,7 +1825,7 @@ $(DOC_TARGETS):
 # "Is Rust available?" target
 PHONY += rustavailable
 rustavailable:
-	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh -v && echo "Rust is available!"
+	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh && echo "Rust is available!"
 
 # Documentation target
 #
diff --git a/scripts/rust_is_available.sh b/scripts/rust_is_available.sh
index aebbf1913970..f43a010eaf30 100755
--- a/scripts/rust_is_available.sh
+++ b/scripts/rust_is_available.sh
@@ -2,8 +2,6 @@
 # SPDX-License-Identifier: GPL-2.0
 #
 # Tests whether a suitable Rust toolchain is available.
-#
-# Pass `-v` for human output and more checks (as warnings).
 
 set -e
 
@@ -23,21 +21,17 @@ get_canonical_version()
 
 # Check that the Rust compiler exists.
 if ! command -v "$RUSTC" >/dev/null; then
-	if [ "$1" = -v ]; then
-		echo >&2 "***"
-		echo >&2 "*** Rust compiler '$RUSTC' could not be found."
-		echo >&2 "***"
-	fi
+	echo >&2 "***"
+	echo >&2 "*** Rust compiler '$RUSTC' could not be found."
+	echo >&2 "***"
 	exit 1
 fi
 
 # Check that the Rust bindings generator exists.
 if ! command -v "$BINDGEN" >/dev/null; then
-	if [ "$1" = -v ]; then
-		echo >&2 "***"
-		echo >&2 "*** Rust bindings generator '$BINDGEN' could not be found."
-		echo >&2 "***"
-	fi
+	echo >&2 "***"
+	echo >&2 "*** Rust bindings generator '$BINDGEN' could not be found."
+	echo >&2 "***"
 	exit 1
 fi
 
@@ -53,16 +47,14 @@ rust_compiler_min_version=$($min_tool_version rustc)
 rust_compiler_cversion=$(get_canonical_version $rust_compiler_version)
 rust_compiler_min_cversion=$(get_canonical_version $rust_compiler_min_version)
 if [ "$rust_compiler_cversion" -lt "$rust_compiler_min_cversion" ]; then
-	if [ "$1" = -v ]; then
-		echo >&2 "***"
-		echo >&2 "*** Rust compiler '$RUSTC' is too old."
-		echo >&2 "***   Your version:    $rust_compiler_version"
-		echo >&2 "***   Minimum version: $rust_compiler_min_version"
-		echo >&2 "***"
-	fi
+	echo >&2 "***"
+	echo >&2 "*** Rust compiler '$RUSTC' is too old."
+	echo >&2 "***   Your version:    $rust_compiler_version"
+	echo >&2 "***   Minimum version: $rust_compiler_min_version"
+	echo >&2 "***"
 	exit 1
 fi
-if [ "$1" = -v ] && [ "$rust_compiler_cversion" -gt "$rust_compiler_min_cversion" ]; then
+if [ "$rust_compiler_cversion" -gt "$rust_compiler_min_cversion" ]; then
 	echo >&2 "***"
 	echo >&2 "*** Rust compiler '$RUSTC' is too new. This may or may not work."
 	echo >&2 "***   Your version:     $rust_compiler_version"
@@ -82,16 +74,14 @@ rust_bindings_generator_min_version=$($min_tool_version bindgen)
 rust_bindings_generator_cversion=$(get_canonical_version $rust_bindings_generator_version)
 rust_bindings_generator_min_cversion=$(get_canonical_version $rust_bindings_generator_min_version)
 if [ "$rust_bindings_generator_cversion" -lt "$rust_bindings_generator_min_cversion" ]; then
-	if [ "$1" = -v ]; then
-		echo >&2 "***"
-		echo >&2 "*** Rust bindings generator '$BINDGEN' is too old."
-		echo >&2 "***   Your version:    $rust_bindings_generator_version"
-		echo >&2 "***   Minimum version: $rust_bindings_generator_min_version"
-		echo >&2 "***"
-	fi
+	echo >&2 "***"
+	echo >&2 "*** Rust bindings generator '$BINDGEN' is too old."
+	echo >&2 "***   Your version:    $rust_bindings_generator_version"
+	echo >&2 "***   Minimum version: $rust_bindings_generator_min_version"
+	echo >&2 "***"
 	exit 1
 fi
-if [ "$1" = -v ] && [ "$rust_bindings_generator_cversion" -gt "$rust_bindings_generator_min_cversion" ]; then
+if [ "$rust_bindings_generator_cversion" -gt "$rust_bindings_generator_min_cversion" ]; then
 	echo >&2 "***"
 	echo >&2 "*** Rust bindings generator '$BINDGEN' is too new. This may or may not work."
 	echo >&2 "***   Your version:     $rust_bindings_generator_version"
@@ -110,13 +100,11 @@ bindgen_libclang_min_version=$($min_tool_version llvm)
 bindgen_libclang_cversion=$(get_canonical_version $bindgen_libclang_version)
 bindgen_libclang_min_cversion=$(get_canonical_version $bindgen_libclang_min_version)
 if [ "$bindgen_libclang_cversion" -lt "$bindgen_libclang_min_cversion" ]; then
-	if [ "$1" = -v ]; then
-		echo >&2 "***"
-		echo >&2 "*** libclang (used by the Rust bindings generator '$BINDGEN') is too old."
-		echo >&2 "***   Your version:    $bindgen_libclang_version"
-		echo >&2 "***   Minimum version: $bindgen_libclang_min_version"
-		echo >&2 "***"
-	fi
+	echo >&2 "***"
+	echo >&2 "*** libclang (used by the Rust bindings generator '$BINDGEN') is too old."
+	echo >&2 "***   Your version:    $bindgen_libclang_version"
+	echo >&2 "***   Minimum version: $bindgen_libclang_min_version"
+	echo >&2 "***"
 	exit 1
 fi
 
@@ -125,21 +113,19 @@ fi
 #
 # In the future, we might be able to perform a full version check, see
 # https://github.com/rust-lang/rust-bindgen/issues/2138.
-if [ "$1" = -v ]; then
-	cc_name=$($(dirname $0)/cc-version.sh "$CC" | cut -f1 -d' ')
-	if [ "$cc_name" = Clang ]; then
-		clang_version=$( \
-			LC_ALL=C "$CC" --version 2>/dev/null \
-				| sed -nE '1s:.*version ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p'
-		)
-		if [ "$clang_version" != "$bindgen_libclang_version" ]; then
-			echo >&2 "***"
-			echo >&2 "*** libclang (used by the Rust bindings generator '$BINDGEN')"
-			echo >&2 "*** version does not match Clang's. This may be a problem."
-			echo >&2 "***   libclang version: $bindgen_libclang_version"
-			echo >&2 "***   Clang version:    $clang_version"
-			echo >&2 "***"
-		fi
+cc_name=$($(dirname $0)/cc-version.sh "$CC" | cut -f1 -d' ')
+if [ "$cc_name" = Clang ]; then
+	clang_version=$( \
+		LC_ALL=C "$CC" --version 2>/dev/null \
+			| sed -nE '1s:.*version ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p'
+	)
+	if [ "$clang_version" != "$bindgen_libclang_version" ]; then
+		echo >&2 "***"
+		echo >&2 "*** libclang (used by the Rust bindings generator '$BINDGEN')"
+		echo >&2 "*** version does not match Clang's. This may be a problem."
+		echo >&2 "***   libclang version: $bindgen_libclang_version"
+		echo >&2 "***   Clang version:    $clang_version"
+		echo >&2 "***"
 	fi
 fi
 
@@ -150,11 +136,9 @@ rustc_sysroot=$("$RUSTC" $KRUSTFLAGS --print sysroot)
 rustc_src=${RUST_LIB_SRC:-"$rustc_sysroot/lib/rustlib/src/rust/library"}
 rustc_src_core="$rustc_src/core/src/lib.rs"
 if [ ! -e "$rustc_src_core" ]; then
-	if [ "$1" = -v ]; then
-		echo >&2 "***"
-		echo >&2 "*** Source code for the 'core' standard library could not be found"
-		echo >&2 "*** at '$rustc_src_core'."
-		echo >&2 "***"
-	fi
+	echo >&2 "***"
+	echo >&2 "*** Source code for the 'core' standard library could not be found"
+	echo >&2 "*** at '$rustc_src_core'."
+	echo >&2 "***"
 	exit 1
 fi

From dee3a6b819c96fc8b1907577f585fd66f5c0fefe Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Fri, 16 Jun 2023 02:16:22 +0200
Subject: [PATCH 0922/2047] kbuild: rust_is_available: fix version check when
 CC has multiple arguments

rust_is_available.sh uses cc-version.sh to identify which C compiler is
in use, as scripts/Kconfig.include does.  cc-version.sh isn't designed to
be able to handle multiple arguments in one variable, i.e. "ccache clang".
Its invocation in rust_is_available.sh quotes "$CC", which makes
$1 == "ccache clang" instead of the intended $1 == ccache & $2 == clang.

cc-version.sh could also be changed to handle having "ccache clang" as one
argument, but it only has the one consumer upstream, making it simpler to
fix the caller here.

Signed-off-by: Russell Currey <ruscur@russell.cc>
Fixes: 78521f3399ab ("scripts: add `rust_is_available.sh`")
Link: https://github.com/Rust-for-Linux/linux/pull/873
[ Reworded title prefix and reflow line to 75 columns. ]
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20230616001631.463536-3-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 scripts/rust_is_available.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/rust_is_available.sh b/scripts/rust_is_available.sh
index f43a010eaf30..0c9be438e4cd 100755
--- a/scripts/rust_is_available.sh
+++ b/scripts/rust_is_available.sh
@@ -113,10 +113,10 @@ fi
 #
 # In the future, we might be able to perform a full version check, see
 # https://github.com/rust-lang/rust-bindgen/issues/2138.
-cc_name=$($(dirname $0)/cc-version.sh "$CC" | cut -f1 -d' ')
+cc_name=$($(dirname $0)/cc-version.sh $CC | cut -f1 -d' ')
 if [ "$cc_name" = Clang ]; then
 	clang_version=$( \
-		LC_ALL=C "$CC" --version 2>/dev/null \
+		LC_ALL=C $CC --version 2>/dev/null \
 			| sed -nE '1s:.*version ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p'
 	)
 	if [ "$clang_version" != "$bindgen_libclang_version" ]; then

From eae90172c5b89f08f054b959197397c5cadd658d Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 16 Jun 2023 02:16:23 +0200
Subject: [PATCH 0923/2047] docs: rust: add paragraph about finding a suitable
 `libclang`

Sometimes users need to tweak the finding process of `libclang`
for `bindgen` via the `clang-sys`-provided environment variables.

Thus add a paragraph to the setting up guide, including a reference
to `clang-sys`'s relevant documentation.

Link: https://lore.kernel.org/rust-for-linux/CAKwvOdm5JT4wbdQQYuW+RT07rCi6whGBM2iUAyg8A1CmLXG6Nw@mail.gmail.com/
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Link: https://lore.kernel.org/r/20230616001631.463536-4-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 Documentation/rust/quick-start.rst | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst
index a8931512ed98..58a183bb90b1 100644
--- a/Documentation/rust/quick-start.rst
+++ b/Documentation/rust/quick-start.rst
@@ -100,6 +100,23 @@ Install it via (note that this will download and build the tool from source)::
 
 	cargo install --locked --version $(scripts/min-tool-version.sh bindgen) bindgen
 
+``bindgen`` needs to find a suitable ``libclang`` in order to work. If it is
+not found (or a different ``libclang`` than the one found should be used),
+the process can be tweaked using the environment variables understood by
+``clang-sys`` (the Rust bindings crate that ``bindgen`` uses to access
+``libclang``):
+
+* ``LLVM_CONFIG_PATH`` can be pointed to an ``llvm-config`` executable.
+
+* Or ``LIBCLANG_PATH`` can be pointed to a ``libclang`` shared library
+  or to the directory containing it.
+
+* Or ``CLANG_PATH`` can be pointed to a ``clang`` executable.
+
+For details, please see ``clang-sys``'s documentation at:
+
+	https://github.com/KyleMayes/clang-sys#environment-variables
+
 
 Requirements: Developing
 ------------------------

From aac284b1eb420c9317475cacdd21dd0ae3c3eda8 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 16 Jun 2023 02:16:24 +0200
Subject: [PATCH 0924/2047] kbuild: rust_is_available: print docs reference

People trying out the Rust support in the kernel may get
warnings and errors from `scripts/rust_is_available.sh`
from the `rustavailable` target or the build step.

Some of those users may be following the Quick Start guide,
but others may not (likely those getting warnings from
the build step instead of the target).

While the messages are fairly clear on what the problem is,
it may not be clear how to solve the particular issue,
especially for those not aware of the documentation.

We could add all sorts of details on the script for each one,
but it is better to point users to the documentation instead,
where it is easily readable in different formats. It also
avoids duplication.

Thus add a reference to the documentation whenever the script
fails or there is at least a warning.

Reviewed-by: Finn Behrens <fin@nyantec.com>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Link: https://lore.kernel.org/r/20230616001631.463536-5-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 scripts/rust_is_available.sh | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/scripts/rust_is_available.sh b/scripts/rust_is_available.sh
index 0c9be438e4cd..6b8131d5b547 100755
--- a/scripts/rust_is_available.sh
+++ b/scripts/rust_is_available.sh
@@ -19,6 +19,20 @@ get_canonical_version()
 	echo $((100000 * $1 + 100 * $2 + $3))
 }
 
+# Print a reference to the Quick Start guide in the documentation.
+print_docs_reference()
+{
+	echo >&2 "***"
+	echo >&2 "*** Please see Documentation/rust/quick-start.rst for details"
+	echo >&2 "*** on how to set up the Rust support."
+	echo >&2 "***"
+}
+
+# If the script fails for any reason, or if there was any warning, then
+# print a reference to the documentation on exit.
+warning=0
+trap 'if [ $? -ne 0 ] || [ $warning -ne 0 ]; then print_docs_reference; fi' EXIT
+
 # Check that the Rust compiler exists.
 if ! command -v "$RUSTC" >/dev/null; then
 	echo >&2 "***"
@@ -60,6 +74,7 @@ if [ "$rust_compiler_cversion" -gt "$rust_compiler_min_cversion" ]; then
 	echo >&2 "***   Your version:     $rust_compiler_version"
 	echo >&2 "***   Expected version: $rust_compiler_min_version"
 	echo >&2 "***"
+	warning=1
 fi
 
 # Check that the Rust bindings generator is suitable.
@@ -87,6 +102,7 @@ if [ "$rust_bindings_generator_cversion" -gt "$rust_bindings_generator_min_cvers
 	echo >&2 "***   Your version:     $rust_bindings_generator_version"
 	echo >&2 "***   Expected version: $rust_bindings_generator_min_version"
 	echo >&2 "***"
+	warning=1
 fi
 
 # Check that the `libclang` used by the Rust bindings generator is suitable.
@@ -126,6 +142,7 @@ if [ "$cc_name" = Clang ]; then
 		echo >&2 "***   libclang version: $bindgen_libclang_version"
 		echo >&2 "***   Clang version:    $clang_version"
 		echo >&2 "***"
+		warning=1
 	fi
 fi
 

From 52cae7f28ed6c3992489f16bb355f5b623f0912e Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 16 Jun 2023 02:16:25 +0200
Subject: [PATCH 0925/2047] kbuild: rust_is_available: add check for `bindgen`
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`scripts/rust_is_available.sh` calls `bindgen` with a special
header in order to check whether the `libclang` version in use
is suitable.

However, the invocation itself may fail if, for instance, `bindgen`
cannot locate `libclang`. This is fine for Kconfig (since the
script will still fail and therefore disable Rust as it should),
but it is pretty confusing for users of the `rustavailable` target
given the error will be unrelated:

    ./scripts/rust_is_available.sh: 21: arithmetic expression: expecting primary: "100000 *  + 100 *  + "
    make: *** [Makefile:1816: rustavailable] Error 2

Instead, run the `bindgen` invocation independently in a previous
step, saving its output and return code. If it fails, then show
the user a proper error message. Otherwise, continue as usual
with the saved output.

Since the previous patch we show a reference to the docs, and
the docs now explain how `bindgen` looks for `libclang`,
thus the error message can leverage the documentation, avoiding
duplication here (and making users aware of the setup guide in
the documentation).

Reported-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/rust-for-linux/CAKwvOdm5JT4wbdQQYuW+RT07rCi6whGBM2iUAyg8A1CmLXG6Nw@mail.gmail.com/
Reported-by: François Valenduc <francoisvalenduc@gmail.com>
Closes: https://github.com/Rust-for-Linux/linux/issues/934
Reported-by: Alexandru Radovici <msg4alex@gmail.com>
Closes: https://github.com/Rust-for-Linux/linux/pull/921
Reported-by: Matthew Leach <dev@mattleach.net>
Closes: https://lore.kernel.org/rust-for-linux/20230507084116.1099067-1-dev@mattleach.net/
Fixes: 78521f3399ab ("scripts: add `rust_is_available.sh`")
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20230616001631.463536-6-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 scripts/rust_is_available.sh | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/scripts/rust_is_available.sh b/scripts/rust_is_available.sh
index 6b8131d5b547..1bdff4472cbe 100755
--- a/scripts/rust_is_available.sh
+++ b/scripts/rust_is_available.sh
@@ -106,8 +106,28 @@ if [ "$rust_bindings_generator_cversion" -gt "$rust_bindings_generator_min_cvers
 fi
 
 # Check that the `libclang` used by the Rust bindings generator is suitable.
+#
+# In order to do that, first invoke `bindgen` to get the `libclang` version
+# found by `bindgen`. This step may already fail if, for instance, `libclang`
+# is not found, thus inform the user in such a case.
+bindgen_libclang_output=$( \
+	LC_ALL=C "$BINDGEN" $(dirname $0)/rust_is_available_bindgen_libclang.h 2>&1 >/dev/null
+) || bindgen_libclang_code=$?
+if [ -n "$bindgen_libclang_code" ]; then
+	echo >&2 "***"
+	echo >&2 "*** Running '$BINDGEN' to check the libclang version (used by the Rust"
+	echo >&2 "*** bindings generator) failed with code $bindgen_libclang_code. This may be caused by"
+	echo >&2 "*** a failure to locate libclang. See output and docs below for details:"
+	echo >&2 "***"
+	echo >&2 "$bindgen_libclang_output"
+	echo >&2 "***"
+	exit 1
+fi
+
+# `bindgen` returned successfully, thus use the output to check that the version
+# of the `libclang` found by the Rust bindings generator is suitable.
 bindgen_libclang_version=$( \
-	LC_ALL=C "$BINDGEN" $(dirname $0)/rust_is_available_bindgen_libclang.h 2>&1 >/dev/null \
+	echo "$bindgen_libclang_output" \
 		| grep -F 'clang version ' \
 		| grep -oE '[0-9]+\.[0-9]+\.[0-9]+' \
 		| head -n 1 \

From e90db5521de2e00b63ba425b3b215f02563efe0a Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 16 Jun 2023 02:16:26 +0200
Subject: [PATCH 0926/2047] kbuild: rust_is_available: check that environment
 variables are set

Sometimes [1] users may attempt to setup the Rust support by
checking what Kbuild does and they end up finding out about
`scripts/rust_is_available.sh`. Inevitably, they run the script
directly, but unless they setup the required variables,
the result of the script is not meaningful.

We could add some defaults to the variables, but that could be
confusing for those that may override the defaults (compared
to their kernel builds), and `$CC` would not be a simple default
in any case.

Therefore, instead, explicitly check whether the expected variables
are set (`$RUSTC`, `$BINDGEN` and `$CC`). If not, print an explanation
about the fact that the script is meant to be called from Kbuild,
since that is the most likely cause for the variables not being set.

Link: https://lore.kernel.org/oe-kbuild-all/Y6r4mXz5NS0+HVXo@zn.tnic/ [1]
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20230616001631.463536-7-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 scripts/rust_is_available.sh | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/scripts/rust_is_available.sh b/scripts/rust_is_available.sh
index 1bdff4472cbe..7e0368babe64 100755
--- a/scripts/rust_is_available.sh
+++ b/scripts/rust_is_available.sh
@@ -28,11 +28,40 @@ print_docs_reference()
 	echo >&2 "***"
 }
 
+# Print an explanation about the fact that the script is meant to be called from Kbuild.
+print_kbuild_explanation()
+{
+	echo >&2 "***"
+	echo >&2 "*** This script is intended to be called from Kbuild."
+	echo >&2 "*** Please use the 'rustavailable' target to call it instead."
+	echo >&2 "*** Otherwise, the results may not be meaningful."
+	exit 1
+}
+
 # If the script fails for any reason, or if there was any warning, then
 # print a reference to the documentation on exit.
 warning=0
 trap 'if [ $? -ne 0 ] || [ $warning -ne 0 ]; then print_docs_reference; fi' EXIT
 
+# Check that the expected environment variables are set.
+if [ -z "${RUSTC+x}" ]; then
+	echo >&2 "***"
+	echo >&2 "*** Environment variable 'RUSTC' is not set."
+	print_kbuild_explanation
+fi
+
+if [ -z "${BINDGEN+x}" ]; then
+	echo >&2 "***"
+	echo >&2 "*** Environment variable 'BINDGEN' is not set."
+	print_kbuild_explanation
+fi
+
+if [ -z "${CC+x}" ]; then
+	echo >&2 "***"
+	echo >&2 "*** Environment variable 'CC' is not set."
+	print_kbuild_explanation
+fi
+
 # Check that the Rust compiler exists.
 if ! command -v "$RUSTC" >/dev/null; then
 	echo >&2 "***"

From 21d28cd2fa5fc01ee83e64df838ecd72112c09b7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 3 Aug 2023 23:07:41 +0200
Subject: [PATCH 0927/2047] cpuidle: teo: Do not call
 tick_nohz_get_sleep_length() upfront

Because the cost of calling tick_nohz_get_sleep_length() may increase
in the future, reorder the code in teo_select() so it first uses the
statistics to pick up a candidate idle state and applies the utilization
heuristic to it and only then calls tick_nohz_get_sleep_length() to
obtain the sleep length value and refine the selection if necessary.

This change by itself does not cause tick_nohz_get_sleep_length() to
be called less often, but it prepares the code for subsequent changes
that will do so.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Kajetan Puchalski <kajetan.puchalski@arm.com>
Tested-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
---
 drivers/cpuidle/governors/teo.c | 107 ++++++++++++++------------------
 1 file changed, 45 insertions(+), 62 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 3915e5639790..4ef76cdfc5c8 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -306,15 +306,10 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	cpu_data->total += PULSE;
 }
 
-static bool teo_time_ok(u64 interval_ns)
+static bool teo_state_ok(int i, struct cpuidle_driver *drv)
 {
-	return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
-}
-
-static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
-{
-	return (drv->states[idx].target_residency_ns +
-		drv->states[idx+1].target_residency_ns) / 2;
+	return !tick_nohz_tick_stopped() ||
+		drv->states[i].target_residency_ns >= TICK_NSEC;
 }
 
 /**
@@ -354,6 +349,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
+	ktime_t delta_tick = TICK_NSEC / 2;
 	unsigned int idx_intercept_sum = 0;
 	unsigned int intercept_sum = 0;
 	unsigned int idx_recent_sum = 0;
@@ -363,7 +359,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	int constraint_idx = 0;
 	int idx0 = 0, idx = -1;
 	bool alt_intercepts, alt_recent;
-	ktime_t delta_tick;
 	bool cpu_utilized;
 	s64 duration_ns;
 	int i;
@@ -374,9 +369,11 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	}
 
 	cpu_data->time_span_ns = local_clock();
-
-	duration_ns = tick_nohz_get_sleep_length(&delta_tick);
-	cpu_data->sleep_length_ns = duration_ns;
+	/*
+	 * Set the expected sleep length to infinity in case of an early
+	 * return.
+	 */
+	cpu_data->sleep_length_ns = KTIME_MAX;
 
 	/* Check if there is any choice in the first place. */
 	if (drv->state_count < 2) {
@@ -384,11 +381,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		goto out_tick;
 	}
 
-	if (!dev->states_usage[0].disable) {
+	if (!dev->states_usage[0].disable)
 		idx = 0;
-		if (drv->states[1].target_residency_ns > duration_ns)
-			goto out_tick;
-	}
 
 	cpu_utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
 	/*
@@ -397,8 +391,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * the shallowest non-polling state and exit.
 	 */
 	if (drv->state_count < 3 && cpu_utilized) {
-		/* The CPU is utilized, so assume a short idle duration. */
-		duration_ns = teo_middle_of_bin(0, drv);
 		/*
 		 * If state 0 is enabled and it is not a polling one, select it
 		 * right away unless the scheduler tick has been stopped, in
@@ -408,22 +400,17 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		 * anyway.
 		 */
 		if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
-		    teo_time_ok(duration_ns)) || dev->states_usage[1].disable) {
+		    teo_state_ok(0, drv)) || dev->states_usage[1].disable) {
 			idx = 0;
 			goto out_tick;
 		}
 		/* Assume that state 1 is not a polling one and use it. */
 		idx = 1;
+		duration_ns = drv->states[1].target_residency_ns;
 		goto end;
 	}
 
-	/*
-	 * Find the deepest idle state whose target residency does not exceed
-	 * the current sleep length and the deepest idle state not deeper than
-	 * the former whose exit latency does not exceed the current latency
-	 * constraint.  Compute the sums of metrics for early wakeup pattern
-	 * detection.
-	 */
+	/* Compute the sums of metrics for early wakeup pattern detection. */
 	for (i = 1; i < drv->state_count; i++) {
 		struct teo_bin *prev_bin = &cpu_data->state_bins[i-1];
 		struct cpuidle_state *s = &drv->states[i];
@@ -439,19 +426,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		if (dev->states_usage[i].disable)
 			continue;
 
-		if (idx < 0) {
-			idx = i; /* first enabled state */
-			idx0 = i;
-		}
-
-		if (s->target_residency_ns > duration_ns)
-			break;
+		if (idx < 0)
+			idx0 = i; /* first enabled state */
 
 		idx = i;
 
 		if (s->exit_latency_ns <= latency_req)
 			constraint_idx = i;
 
+		/* Save the sums for the current state. */
 		idx_intercept_sum = intercept_sum;
 		idx_hit_sum = hit_sum;
 		idx_recent_sum = recent_sum;
@@ -465,7 +448,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 
 	if (idx == idx0) {
 		/*
-		 * This is the first enabled idle state, so use it, but do not
+		 * Only one idle state is enabled, so use it, but do not
 		 * allow the tick to be stopped it is shallow enough.
 		 */
 		duration_ns = drv->states[idx].target_residency_ns;
@@ -479,13 +462,11 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * all of the deeper states, or the sum of the numbers of recent
 	 * intercepts over all of the states shallower than the candidate one
 	 * is greater than a half of the number of recent events taken into
-	 * account, the CPU is likely to wake up early, so find an alternative
-	 * idle state to select.
+	 * account, a shallower idle state is likely to be a better choice.
 	 */
 	alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum;
 	alt_recent = idx_recent_sum > NR_RECENT / 2;
 	if (alt_recent || alt_intercepts) {
-		s64 first_suitable_span_ns = duration_ns;
 		int first_suitable_idx = idx;
 
 		/*
@@ -494,44 +475,39 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		 * cases (both with respect to intercepts overall and with
 		 * respect to the recent intercepts only) in the past.
 		 *
-		 * Take the possible latency constraint and duration limitation
-		 * present if the tick has been stopped already into account.
+		 * Take the possible duration limitation present if the tick
+		 * has been stopped already into account.
 		 */
 		intercept_sum = 0;
 		recent_sum = 0;
 
 		for (i = idx - 1; i >= 0; i--) {
 			struct teo_bin *bin = &cpu_data->state_bins[i];
-			s64 span_ns;
 
 			intercept_sum += bin->intercepts;
 			recent_sum += bin->recent;
 
-			span_ns = teo_middle_of_bin(i, drv);
-
 			if ((!alt_recent || 2 * recent_sum > idx_recent_sum) &&
 			    (!alt_intercepts ||
 			     2 * intercept_sum > idx_intercept_sum)) {
-				if (teo_time_ok(span_ns) &&
-				    !dev->states_usage[i].disable) {
+				/*
+				 * Use the current state unless it is too
+				 * shallow or disabled, in which case take the
+				 * first enabled state that is deep enough.
+				 */
+				if (teo_state_ok(i, drv) &&
+				    !dev->states_usage[i].disable)
 					idx = i;
-					duration_ns = span_ns;
-				} else {
-					/*
-					 * The current state is too shallow or
-					 * disabled, so take the first enabled
-					 * deeper state with suitable time span.
-					 */
+				else
 					idx = first_suitable_idx;
-					duration_ns = first_suitable_span_ns;
-				}
+
 				break;
 			}
 
 			if (dev->states_usage[i].disable)
 				continue;
 
-			if (!teo_time_ok(span_ns)) {
+			if (!teo_state_ok(i, drv)) {
 				/*
 				 * The current state is too shallow, but if an
 				 * alternative candidate state has been found,
@@ -543,7 +519,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 				break;
 			}
 
-			first_suitable_span_ns = span_ns;
 			first_suitable_idx = i;
 		}
 	}
@@ -562,14 +537,22 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * not sufficiently large.
 	 */
 	if (cpu_utilized) {
-		s64 span_ns;
-
-		i = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
-		span_ns = teo_middle_of_bin(i, drv);
-		if (teo_time_ok(span_ns)) {
+		i = teo_find_shallower_state(drv, dev, idx, KTIME_MAX, true);
+		if (teo_state_ok(i, drv))
+			idx = i;
+	}
+
+	duration_ns = tick_nohz_get_sleep_length(&delta_tick);
+	cpu_data->sleep_length_ns = duration_ns;
+
+	/*
+	 * If the closest expected timer is before the terget residency of the
+	 * candidate state, a shallower one needs to be found.
+	 */
+	if (drv->states[idx].target_residency_ns > duration_ns) {
+		i = teo_find_shallower_state(drv, dev, idx, duration_ns, false);
+		if (teo_state_ok(i, drv))
 			idx = i;
-			duration_ns = span_ns;
-		}
 	}
 
 end:

From 6da8f9ba5a87f8aecf2cd4441cc6024a77e5b645 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 3 Aug 2023 23:09:18 +0200
Subject: [PATCH 0928/2047] cpuidle: teo: Skip tick_nohz_get_sleep_length()
 call in some cases

Make teo_select() avoid calling tick_nohz_get_sleep_length() if the
candidate idle state to return is state 0 or if state 0 is a polling
one and the target residency of the current candidate one is below
a certain threshold, in which cases it may be assumed that the CPU will
be woken up immediately by a non-timer wakeup source and the timers
are not likely to matter.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Kajetan Puchalski <kajetan.puchalski@arm.com>
Tested-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
---
 drivers/cpuidle/governors/teo.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 4ef76cdfc5c8..8248492cad3a 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -166,6 +166,12 @@
  */
 #define NR_RECENT	9
 
+/*
+ * Idle state target residency threshold used for deciding whether or not to
+ * check the time till the closest expected timer event.
+ */
+#define RESIDENCY_THRESHOLD_NS	(15 * NSEC_PER_USEC)
+
 /**
  * struct teo_bin - Metrics used by the TEO cpuidle governor.
  * @intercepts: The "intercepts" metric.
@@ -542,6 +548,22 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 			idx = i;
 	}
 
+	/*
+	 * Skip the timers check if state 0 is the current candidate one,
+	 * because an immediate non-timer wakeup is expected in that case.
+	 */
+	if (!idx)
+		goto out_tick;
+
+	/*
+	 * If state 0 is a polling one, check if the target residency of
+	 * the current candidate state is low enough and skip the timers
+	 * check in that case too.
+	 */
+	if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
+	    drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS)
+		goto out_tick;
+
 	duration_ns = tick_nohz_get_sleep_length(&delta_tick);
 	cpu_data->sleep_length_ns = duration_ns;
 

From 2662342079f54b8a940f7094c197c99458caeb0d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 3 Aug 2023 23:11:40 +0200
Subject: [PATCH 0929/2047] cpuidle: teo: Gather statistics regarding whether
 or not to stop the tick

Currently, if the target residency of the deepest idle state is less than
the tick period length, which is quite likely for HZ=100, and the deepest
idle state is about to be selected by the TEO idle governor, the decision
on whether or not to stop the scheduler tick is based entirely on the
time till the closest timer.  This is often insufficient, because timers
may not be in heavy use and there may be a plenty of other CPU wakeup
events between the deepest idle state's target residency and the closest
tick.

Allow the governor to count those events by making the deepest idle
state's bin effectively end at TICK_NSEC and introducing an additional
"bin" for collecting "hit" events (ie. the ones in which the measured
idle duration falls into the same bin as the time till the closest
timer) with idle duration values past TICK_NSEC.

This way the "intercepts" metric for the deepest idle state's bin
becomes nonzero in general, and so it can influence the decision on
whether or not to stop the tick possibly increasing the governor's
accuracy in that respect.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Kajetan Puchalski <kajetan.puchalski@arm.com>
Tested-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
---
 drivers/cpuidle/governors/teo.c | 41 ++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 8248492cad3a..70846b669255 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -192,6 +192,7 @@ struct teo_bin {
  * @total: Grand total of the "intercepts" and "hits" metrics for all bins.
  * @next_recent_idx: Index of the next @recent_idx entry to update.
  * @recent_idx: Indices of bins corresponding to recent "intercepts".
+ * @tick_hits: Number of "hits" after TICK_NSEC.
  * @util_threshold: Threshold above which the CPU is considered utilized
  */
 struct teo_cpu {
@@ -201,6 +202,7 @@ struct teo_cpu {
 	unsigned int total;
 	int next_recent_idx;
 	int recent_idx[NR_RECENT];
+	unsigned int tick_hits;
 	unsigned long util_threshold;
 };
 
@@ -232,6 +234,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 	int i, idx_timer = 0, idx_duration = 0;
+	s64 target_residency_ns;
 	u64 measured_ns;
 
 	if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
@@ -272,7 +275,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * fall into.
 	 */
 	for (i = 0; i < drv->state_count; i++) {
-		s64 target_residency_ns = drv->states[i].target_residency_ns;
 		struct teo_bin *bin = &cpu_data->state_bins[i];
 
 		bin->hits -= bin->hits >> DECAY_SHIFT;
@@ -280,6 +282,8 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
 		cpu_data->total += bin->hits + bin->intercepts;
 
+		target_residency_ns = drv->states[i].target_residency_ns;
+
 		if (target_residency_ns <= cpu_data->sleep_length_ns) {
 			idx_timer = i;
 			if (target_residency_ns <= measured_ns)
@@ -294,6 +298,26 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	if (cpu_data->recent_idx[i] >= 0)
 		cpu_data->state_bins[cpu_data->recent_idx[i]].recent--;
 
+	/*
+	 * If the deepest state's target residency is below the tick length,
+	 * make a record of it to help teo_select() decide whether or not
+	 * to stop the tick.  This effectively adds an extra hits-only bin
+	 * beyond the last state-related one.
+	 */
+	if (target_residency_ns < TICK_NSEC) {
+		cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT;
+
+		cpu_data->total += cpu_data->tick_hits;
+
+		if (TICK_NSEC <= cpu_data->sleep_length_ns) {
+			idx_timer = drv->state_count;
+			if (TICK_NSEC <= measured_ns) {
+				cpu_data->tick_hits += PULSE;
+				goto end;
+			}
+		}
+	}
+
 	/*
 	 * If the measured idle duration falls into the same bin as the sleep
 	 * length, this is a "hit", so update the "hits" metric for that bin.
@@ -309,6 +333,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		cpu_data->recent_idx[i] = idx_duration;
 	}
 
+end:
 	cpu_data->total += PULSE;
 }
 
@@ -356,6 +381,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
 	ktime_t delta_tick = TICK_NSEC / 2;
+	unsigned int tick_intercept_sum = 0;
 	unsigned int idx_intercept_sum = 0;
 	unsigned int intercept_sum = 0;
 	unsigned int idx_recent_sum = 0;
@@ -429,6 +455,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		hit_sum += prev_bin->hits;
 		recent_sum += prev_bin->recent;
 
+		tick_intercept_sum = intercept_sum;
+
 		if (dev->states_usage[i].disable)
 			continue;
 
@@ -461,6 +489,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		goto end;
 	}
 
+	tick_intercept_sum += cpu_data->state_bins[drv->state_count-1].intercepts;
+
 	/*
 	 * If the sum of the intercepts metric for all of the idle states
 	 * shallower than the current candidate one (idx) is greater than the
@@ -577,6 +607,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 			idx = i;
 	}
 
+	/*
+	 * If the selected state's target residency is below the tick length
+	 * and intercepts occurring before the tick length are the majority of
+	 * total wakeup events, do not stop the tick.
+	 */
+	if (drv->states[idx].target_residency_ns < TICK_NSEC &&
+	    tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8)
+		duration_ns = TICK_NSEC / 2;
+
 end:
 	/*
 	 * Allow the tick to be stopped unless the selected state is a polling

From 882cdb06b668488a42ef717a260c05ba7dc43a49 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Aug 2023 14:38:07 +0200
Subject: [PATCH 0930/2047] x86/cpu: Fix Gracemont uarch

Alderlake N is an E-core only product using Gracemont
micro-architecture. It fits the pre-existing naming scheme perfectly
fine, adhere to it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20230807150405.686834933@infradead.org
---
 arch/x86/events/intel/core.c              |  2 +-
 arch/x86/events/intel/cstate.c            |  2 +-
 arch/x86/events/intel/uncore.c            |  2 +-
 arch/x86/events/msr.c                     |  2 +-
 arch/x86/events/rapl.c                    |  2 +-
 arch/x86/include/asm/intel-family.h       |  3 ++-
 arch/x86/kernel/cpu/intel_epb.c           |  2 +-
 arch/x86/mm/init.c                        |  2 +-
 drivers/idle/intel_idle.c                 | 10 +++++-----
 drivers/platform/x86/intel/pmc/core.c     |  2 +-
 drivers/powercap/intel_rapl_common.c      |  2 +-
 drivers/powercap/intel_rapl_msr.c         |  2 +-
 drivers/thermal/intel/intel_tcc_cooling.c |  2 +-
 tools/power/x86/turbostat/turbostat.c     |  2 +-
 14 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a149fafad813..c7e7ed6814ef 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6167,7 +6167,7 @@ __init int intel_pmu_init(void)
 		name = "Tremont";
 		break;
 
-	case INTEL_FAM6_ALDERLAKE_N:
+	case INTEL_FAM6_ATOM_GRACEMONT:
 		x86_pmu.mid_ack = true;
 		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 97035db68df2..96fffb2d521d 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -669,6 +669,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&glm_cstates),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT,	&glm_cstates),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,	&glm_cstates),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,	&adl_cstates),
 
 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,		&icl_cstates),
 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,		&icl_cstates),
@@ -684,7 +685,6 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,		&icl_cstates),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		&adl_cstates),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		&adl_cstates),
-	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,		&adl_cstates),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,		&adl_cstates),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,	&adl_cstates),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S,	&adl_cstates),
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index bc226603ef3e..69043e02e8a7 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1858,7 +1858,6 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,		&rkl_uncore_init),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		&adl_uncore_init),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		&adl_uncore_init),
-	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,		&adl_uncore_init),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,		&adl_uncore_init),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,	&adl_uncore_init),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S,	&adl_uncore_init),
@@ -1867,6 +1866,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&spr_uncore_init),
 	X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X,	&spr_uncore_init),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&snr_uncore_init),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,	&adl_uncore_init),
 	{},
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 61c59e0627d4..9e237b30f017 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -106,7 +106,7 @@ static bool test_intel(int idx, void *data)
 	case INTEL_FAM6_ROCKETLAKE:
 	case INTEL_FAM6_ALDERLAKE:
 	case INTEL_FAM6_ALDERLAKE_L:
-	case INTEL_FAM6_ALDERLAKE_N:
+	case INTEL_FAM6_ATOM_GRACEMONT:
 	case INTEL_FAM6_RAPTORLAKE:
 	case INTEL_FAM6_RAPTORLAKE_P:
 	case INTEL_FAM6_RAPTORLAKE_S:
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 52e6e7ed4f78..1579429846cc 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -804,7 +804,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,		&model_skl),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		&model_skl),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		&model_skl),
-	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,		&model_skl),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,	&model_skl),
 	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&model_spr),
 	X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X,	&model_spr),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,		&model_skl),
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index b3af2d45bbbb..40413262b4cc 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -114,7 +114,6 @@
 
 #define INTEL_FAM6_ALDERLAKE		0x97	/* Golden Cove / Gracemont */
 #define INTEL_FAM6_ALDERLAKE_L		0x9A	/* Golden Cove / Gracemont */
-#define INTEL_FAM6_ALDERLAKE_N		0xBE
 
 #define INTEL_FAM6_RAPTORLAKE		0xB7
 #define INTEL_FAM6_RAPTORLAKE_P		0xBA
@@ -154,6 +153,8 @@
 #define INTEL_FAM6_ATOM_TREMONT		0x96 /* Elkhart Lake */
 #define INTEL_FAM6_ATOM_TREMONT_L	0x9C /* Jasper Lake */
 
+#define INTEL_FAM6_ATOM_GRACEMONT	0xBE /* Alderlake N */
+
 #define INTEL_FAM6_SIERRAFOREST_X	0xAF
 
 #define INTEL_FAM6_GRANDRIDGE		0xB6
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
index 3b8476158236..e4c3ba91321c 100644
--- a/arch/x86/kernel/cpu/intel_epb.c
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -206,7 +206,7 @@ static int intel_epb_offline(unsigned int cpu)
 static const struct x86_cpu_id intel_epb_normal[] = {
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,
 				   ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
-	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,
 				   ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,
 				   ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 8192452d1d2d..ad207f3cf4d1 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -273,7 +273,7 @@ static void __init probe_page_size_mask(void)
 static const struct x86_cpu_id invlpg_miss_ids[] = {
 	INTEL_MATCH(INTEL_FAM6_ALDERLAKE   ),
 	INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ),
-	INTEL_MATCH(INTEL_FAM6_ALDERLAKE_N ),
+	INTEL_MATCH(INTEL_FAM6_ATOM_GRACEMONT ),
 	INTEL_MATCH(INTEL_FAM6_RAPTORLAKE  ),
 	INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P),
 	INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S),
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index b930036edbbe..46c6de321c50 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -960,7 +960,7 @@ static struct cpuidle_state adl_l_cstates[] __initdata = {
 		.enter = NULL }
 };
 
-static struct cpuidle_state adl_n_cstates[] __initdata = {
+static struct cpuidle_state gmt_cstates[] __initdata = {
 	{
 		.name = "C1",
 		.desc = "MWAIT 0x00",
@@ -1405,8 +1405,8 @@ static const struct idle_cpu idle_cpu_adl_l __initconst = {
 	.state_table = adl_l_cstates,
 };
 
-static const struct idle_cpu idle_cpu_adl_n __initconst = {
-	.state_table = adl_n_cstates,
+static const struct idle_cpu idle_cpu_gmt __initconst = {
+	.state_table = gmt_cstates,
 };
 
 static const struct idle_cpu idle_cpu_spr __initconst = {
@@ -1479,7 +1479,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&idle_cpu_icx),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		&idle_cpu_adl),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		&idle_cpu_adl_l),
-	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,		&idle_cpu_adl_n),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,	&idle_cpu_gmt),
 	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&idle_cpu_spr),
 	X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X,	&idle_cpu_spr),
 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&idle_cpu_knl),
@@ -1979,7 +1979,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 		break;
 	case INTEL_FAM6_ALDERLAKE:
 	case INTEL_FAM6_ALDERLAKE_L:
-	case INTEL_FAM6_ALDERLAKE_N:
+	case INTEL_FAM6_ATOM_GRACEMONT:
 		adl_idle_state_table_update();
 		break;
 	}
diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
index 5a36b3f77bc5..84c175b9721a 100644
--- a/drivers/platform/x86/intel/pmc/core.c
+++ b/drivers/platform/x86/intel/pmc/core.c
@@ -1123,7 +1123,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = {
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,	icl_core_init),
 	X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,		tgl_core_init),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		tgl_core_init),
-	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,		tgl_core_init),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,	tgl_core_init),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		adl_core_init),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,        tgl_core_init),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,		adl_core_init),
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 4e646e5e48f6..a942a2527a9a 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -1250,7 +1250,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,		&rapl_defaults_core),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		&rapl_defaults_core),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		&rapl_defaults_core),
-	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,		&rapl_defaults_core),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,	&rapl_defaults_core),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,		&rapl_defaults_core),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,        &rapl_defaults_core),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S,	&rapl_defaults_core),
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 569e25eab1e1..1e775b0db211 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -141,7 +141,7 @@ static const struct x86_cpu_id pl4_support_ids[] = {
 	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL),
-	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, NULL),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, NULL),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL),
 	X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, NULL),
diff --git a/drivers/thermal/intel/intel_tcc_cooling.c b/drivers/thermal/intel/intel_tcc_cooling.c
index e95f799454fe..6c392147e6d1 100644
--- a/drivers/thermal/intel/intel_tcc_cooling.c
+++ b/drivers/thermal/intel/intel_tcc_cooling.c
@@ -60,7 +60,7 @@ static const struct x86_cpu_id tcc_ids[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, NULL),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL),
-	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, NULL),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, NULL),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, NULL),
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 8a36ba5df9f9..9a10512e3407 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -5447,7 +5447,7 @@ unsigned int intel_model_duplicates(unsigned int model)
 	case INTEL_FAM6_LAKEFIELD:
 	case INTEL_FAM6_ALDERLAKE:
 	case INTEL_FAM6_ALDERLAKE_L:
-	case INTEL_FAM6_ALDERLAKE_N:
+	case INTEL_FAM6_ATOM_GRACEMONT:
 	case INTEL_FAM6_RAPTORLAKE:
 	case INTEL_FAM6_RAPTORLAKE_P:
 	case INTEL_FAM6_RAPTORLAKE_S:

From 0cfd8fbadd6833d243c9a9d8649ba4a9f4361c93 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Aug 2023 14:38:08 +0200
Subject: [PATCH 0931/2047] x86/cpu: Fix Crestmont uarch

Sierra Forest and Grand Ridge are both E-core only using Crestmont
micro-architecture, They fit the pre-existing naming scheme prefectly
fine, adhere to it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20230807150405.757666627@infradead.org
---
 arch/x86/include/asm/intel-family.h                         | 5 ++---
 drivers/edac/i10nm_base.c                                   | 2 +-
 drivers/platform/x86/intel/speed_select_if/isst_if_common.c | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 40413262b4cc..49d40eebe36a 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -155,9 +155,8 @@
 
 #define INTEL_FAM6_ATOM_GRACEMONT	0xBE /* Alderlake N */
 
-#define INTEL_FAM6_SIERRAFOREST_X	0xAF
-
-#define INTEL_FAM6_GRANDRIDGE		0xB6
+#define INTEL_FAM6_ATOM_CRESTMONT_X	0xAF /* Sierra Forest */
+#define INTEL_FAM6_ATOM_CRESTMONT	0xB6 /* Grand Ridge */
 
 /* Xeon Phi */
 
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index a897b6aff368..5abf997ca7c1 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -906,7 +906,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = {
 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X,	X86_STEPPINGS(0x0, 0xf), &spr_cfg),
 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X,	X86_STEPPINGS(0x0, 0xf), &spr_cfg),
 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X,	X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
-	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SIERRAFOREST_X,	X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
+	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X,	X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
index 1f59ac55c5f7..47e5a947b0b4 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
@@ -720,7 +720,7 @@ static struct miscdevice isst_if_char_driver = {
 
 static const struct x86_cpu_id hpm_cpu_ids[] = {
 	X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X,	NULL),
-	X86_MATCH_INTEL_FAM6_MODEL(SIERRAFOREST_X,	NULL),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X,	NULL),
 	{}
 };
 

From 535445621a66faa7050c21d9d668595116285648 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Aug 2023 14:38:09 +0200
Subject: [PATCH 0932/2047] x86/cpu: Update Hybrids

Give the hybrid thingies their own section, appropriately between Core
and Atom.

Add the Raptor Lake uarch names.

Put Lunar Lake after Arrow Lake per interweb guidance.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20230807150405.828551866@infradead.org
---
 arch/x86/include/asm/intel-family.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 49d40eebe36a..5fcd85fd64fd 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -98,8 +98,6 @@
 #define INTEL_FAM6_ICELAKE_L		0x7E	/* Sunny Cove */
 #define INTEL_FAM6_ICELAKE_NNPI		0x9D	/* Sunny Cove */
 
-#define INTEL_FAM6_LAKEFIELD		0x8A	/* Sunny Cove / Tremont */
-
 #define INTEL_FAM6_ROCKETLAKE		0xA7	/* Cypress Cove */
 
 #define INTEL_FAM6_TIGERLAKE_L		0x8C	/* Willow Cove */
@@ -112,20 +110,24 @@
 #define INTEL_FAM6_GRANITERAPIDS_X	0xAD
 #define INTEL_FAM6_GRANITERAPIDS_D	0xAE
 
+/* "Hybrid" Processors (P-Core/E-Core) */
+
+#define INTEL_FAM6_LAKEFIELD		0x8A	/* Sunny Cove / Tremont */
+
 #define INTEL_FAM6_ALDERLAKE		0x97	/* Golden Cove / Gracemont */
 #define INTEL_FAM6_ALDERLAKE_L		0x9A	/* Golden Cove / Gracemont */
 
-#define INTEL_FAM6_RAPTORLAKE		0xB7
+#define INTEL_FAM6_RAPTORLAKE		0xB7	/* Raptor Cove / Enhanced Gracemont */
 #define INTEL_FAM6_RAPTORLAKE_P		0xBA
 #define INTEL_FAM6_RAPTORLAKE_S		0xBF
 
 #define INTEL_FAM6_METEORLAKE		0xAC
 #define INTEL_FAM6_METEORLAKE_L		0xAA
 
-#define INTEL_FAM6_LUNARLAKE_M		0xBD
-
 #define INTEL_FAM6_ARROWLAKE		0xC6
 
+#define INTEL_FAM6_LUNARLAKE_M		0xBD
+
 /* "Small Core" Processors (Atom/E-Core) */
 
 #define INTEL_FAM6_ATOM_BONNELL		0x1C /* Diamondville, Pineview */

From a430021faad6b4fa86c820fc3e7f8dbfc2f14fb4 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Mon, 22 May 2023 04:30:35 -0700
Subject: [PATCH 0933/2047] perf/x86/intel: Add Crestmont PMU

The Grand Ridge and Sierra Forest are successors to Snow Ridge. They
both have Crestmont core. From the core PMU's perspective, they are
similar to the e-core of MTL. The only difference is the LBR event
logging feature, which will be implemented in the following patches.

Create a non-hybrid PMU setup for Grand Ridge and Sierra Forest.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Link: https://lore.kernel.org/r/20230522113040.2329924-1-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 52 +++++++++++++++++++++++++++++++++++-
 arch/x86/events/intel/ds.c   |  9 +++++--
 arch/x86/events/perf_event.h |  2 ++
 3 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index c7e7ed6814ef..64a3533997e1 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2129,6 +2129,17 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
 	EVENT_EXTRA_END
 };
 
+EVENT_ATTR_STR(topdown-retiring,       td_retiring_cmt,        "event=0x72,umask=0x0");
+EVENT_ATTR_STR(topdown-bad-spec,       td_bad_spec_cmt,        "event=0x73,umask=0x0");
+
+static struct attribute *cmt_events_attrs[] = {
+	EVENT_PTR(td_fe_bound_tnt),
+	EVENT_PTR(td_retiring_cmt),
+	EVENT_PTR(td_bad_spec_cmt),
+	EVENT_PTR(td_be_bound_tnt),
+	NULL
+};
+
 static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
 	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
 	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
@@ -4840,6 +4851,8 @@ PMU_FORMAT_ATTR(ldlat, "config1:0-15");
 
 PMU_FORMAT_ATTR(frontend, "config1:0-23");
 
+PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63");
+
 static struct attribute *intel_arch3_formats_attr[] = {
 	&format_attr_event.attr,
 	&format_attr_umask.attr,
@@ -4870,6 +4883,13 @@ static struct attribute *slm_format_attr[] = {
 	NULL
 };
 
+static struct attribute *cmt_format_attr[] = {
+	&format_attr_offcore_rsp.attr,
+	&format_attr_ldlat.attr,
+	&format_attr_snoop_rsp.attr,
+	NULL
+};
+
 static struct attribute *skl_format_attr[] = {
 	&format_attr_frontend.attr,
 	NULL,
@@ -5649,7 +5669,6 @@ static struct attribute *adl_hybrid_extra_attr[] = {
 	NULL
 };
 
-PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
 FORMAT_ATTR_HYBRID(snoop_rsp,	hybrid_small);
 
 static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
@@ -6197,6 +6216,37 @@ __init int intel_pmu_init(void)
 		name = "gracemont";
 		break;
 
+	case INTEL_FAM6_ATOM_CRESTMONT:
+	case INTEL_FAM6_ATOM_CRESTMONT_X:
+		x86_pmu.mid_ack = true;
+		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
+		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+		x86_pmu.event_constraints = intel_slm_event_constraints;
+		x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_cmt_extra_regs;
+
+		x86_pmu.pebs_aliases = NULL;
+		x86_pmu.pebs_prec_dist = true;
+		x86_pmu.lbr_pt_coexist = true;
+		x86_pmu.pebs_block = true;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+
+		intel_pmu_pebs_data_source_cmt();
+		x86_pmu.pebs_latency_data = mtl_latency_data_small;
+		x86_pmu.get_event_constraints = cmt_get_event_constraints;
+		x86_pmu.limit_period = spr_limit_period;
+		td_attr = cmt_events_attrs;
+		mem_attr = grt_mem_attrs;
+		extra_attr = cmt_format_attr;
+		pr_cont("Crestmont events, ");
+		name = "crestmont";
+		break;
+
 	case INTEL_FAM6_WESTMERE:
 	case INTEL_FAM6_WESTMERE_EP:
 	case INTEL_FAM6_WESTMERE_EX:
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index df88576d6b2a..eb8dd8b8a1e8 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -144,7 +144,7 @@ void __init intel_pmu_pebs_data_source_adl(void)
 	__intel_pmu_pebs_data_source_grt(data_source);
 }
 
-static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source)
+static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
 {
 	data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
 	data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
@@ -164,7 +164,12 @@ void __init intel_pmu_pebs_data_source_mtl(void)
 
 	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
 	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
-	intel_pmu_pebs_data_source_cmt(data_source);
+	__intel_pmu_pebs_data_source_cmt(data_source);
+}
+
+void __init intel_pmu_pebs_data_source_cmt(void)
+{
+	__intel_pmu_pebs_data_source_cmt(pebs_data_source);
 }
 
 static u64 precise_store_data(u64 status)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index d6de4487348c..c8ba2be7585d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1606,6 +1606,8 @@ void intel_pmu_pebs_data_source_grt(void);
 
 void intel_pmu_pebs_data_source_mtl(void);
 
+void intel_pmu_pebs_data_source_cmt(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
 void intel_pt_interrupt(void);

From 9eb7e20e0c5cd069457845f965b3e8a7d736ecb7 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 16 Jun 2023 02:16:27 +0200
Subject: [PATCH 0934/2047] kbuild: rust_is_available: fix confusion when a
 version appears in the path

`bindgen`'s output for `libclang`'s version check contains paths, which
in turn may contain strings that look like version numbers [1][2]:

    .../6.1.0-dev/.../rust_is_available_bindgen_libclang.h:2:9: warning: clang version 11.1.0  [-W#pragma-messages], err: false

which the script will pick up as the version instead of the latter.

It is also the case that versions may appear after the actual version
(e.g. distribution's version text), which was the reason behind `head` [3]:

    .../rust-is-available-bindgen-libclang.h:2:9: warning: clang version 13.0.0 (Fedora 13.0.0-3.fc35) [-W#pragma-messages], err: false

Thus instead ask for a match after the `clang version` string.

Reported-by: Jordan Isaacs <mail@jdisaacs.com>
Closes: https://github.com/Rust-for-Linux/linux/issues/942 [1]
Reported-by: "Ethan D. Twardy" <ethan.twardy@gmail.com>
Closes: https://lore.kernel.org/rust-for-linux/20230528131802.6390-2-ethan.twardy@gmail.com/ [2]
Reported-by: Tiago Lam <tiagolam@gmail.com>
Closes: https://github.com/Rust-for-Linux/linux/pull/789 [3]
Fixes: 78521f3399ab ("scripts: add `rust_is_available.sh`")
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Ethan Twardy <ethan.twardy@gmail.com>
Tested-by: Ethan Twardy <ethan.twardy@gmail.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20230616001631.463536-8-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 scripts/rust_is_available.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/scripts/rust_is_available.sh b/scripts/rust_is_available.sh
index 7e0368babe64..810691af66eb 100755
--- a/scripts/rust_is_available.sh
+++ b/scripts/rust_is_available.sh
@@ -157,9 +157,7 @@ fi
 # of the `libclang` found by the Rust bindings generator is suitable.
 bindgen_libclang_version=$( \
 	echo "$bindgen_libclang_output" \
-		| grep -F 'clang version ' \
-		| grep -oE '[0-9]+\.[0-9]+\.[0-9]+' \
-		| head -n 1 \
+		| sed -nE 's:.*clang version ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p'
 )
 bindgen_libclang_min_version=$($min_tool_version llvm)
 bindgen_libclang_cversion=$(get_canonical_version $bindgen_libclang_version)

From 7cd6a3e1f94bab4f2a3425e06f70ab13eb8190d4 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 16 Jun 2023 02:16:28 +0200
Subject: [PATCH 0935/2047] kbuild: rust_is_available: normalize version
 matching

In order to match the version string, `sed` is used in a couple
cases, and `grep` and `head` in a couple others.

Make the script more consistent and easier to understand by
using the same method, `sed`, for all of them.

This makes the version matching also a bit more strict for
the changed cases, since the strings `rustc ` and `bindgen `
will now be required, which should be fine since `rustc`
complains if one attempts to call it with another program
name, and `bindgen` uses a hardcoded string.

In addition, clarify why one of the existing `sed` commands
does not provide an address like the others.

Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Link: https://lore.kernel.org/r/20230616001631.463536-9-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 scripts/rust_is_available.sh | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/scripts/rust_is_available.sh b/scripts/rust_is_available.sh
index 810691af66eb..b7e0781fdea9 100755
--- a/scripts/rust_is_available.sh
+++ b/scripts/rust_is_available.sh
@@ -83,8 +83,7 @@ fi
 # Non-stable and distributions' versions may have a version suffix, e.g. `-dev`.
 rust_compiler_version=$( \
 	LC_ALL=C "$RUSTC" --version 2>/dev/null \
-		| head -n 1 \
-		| grep -oE '[0-9]+\.[0-9]+\.[0-9]+' \
+		| sed -nE '1s:.*rustc ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p'
 )
 rust_compiler_min_version=$($min_tool_version rustc)
 rust_compiler_cversion=$(get_canonical_version $rust_compiler_version)
@@ -111,8 +110,7 @@ fi
 # Non-stable and distributions' versions may have a version suffix, e.g. `-dev`.
 rust_bindings_generator_version=$( \
 	LC_ALL=C "$BINDGEN" --version 2>/dev/null \
-		| head -n 1 \
-		| grep -oE '[0-9]+\.[0-9]+\.[0-9]+' \
+		| sed -nE '1s:.*bindgen ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p'
 )
 rust_bindings_generator_min_version=$($min_tool_version bindgen)
 rust_bindings_generator_cversion=$(get_canonical_version $rust_bindings_generator_version)
@@ -155,6 +153,9 @@ fi
 
 # `bindgen` returned successfully, thus use the output to check that the version
 # of the `libclang` found by the Rust bindings generator is suitable.
+#
+# Unlike other version checks, note that this one does not necessarily appear
+# in the first line of the output, thus no `sed` address is provided.
 bindgen_libclang_version=$( \
 	echo "$bindgen_libclang_output" \
 		| sed -nE 's:.*clang version ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p'

From f295522886a4ebb628cadb2cd74d0661d6292978 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 16 Jun 2023 02:16:29 +0200
Subject: [PATCH 0936/2047] kbuild: rust_is_available: handle failures calling
 `$RUSTC`/`$BINDGEN`

The script already checks if `$RUSTC` and `$BINDGEN` exists via
`command`, but the environment variables may point to a
non-executable file, or the programs may fail for some other reason.
While the script successfully exits with a failure as it should,
the error given can be quite confusing depending on the shell and
the behavior of its `command`. For instance, with `dash`:

    $ RUSTC=./mm BINDGEN=bindgen CC=clang scripts/rust_is_available.sh
    scripts/rust_is_available.sh: 19: arithmetic expression: expecting primary: "100000 *  + 100 *  + "

Thus detect failure exit codes when calling `$RUSTC` and `$BINDGEN` and
print a better message, in a similar way to what we do when extracting
the `libclang` version found by `bindgen`.

Link: https://lore.kernel.org/rust-for-linux/CAK7LNAQYk6s11MASRHW6oxtkqF00EJVqhHOP=5rynWt-QDUsXw@mail.gmail.com/
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Link: https://lore.kernel.org/r/20230616001631.463536-10-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 scripts/rust_is_available.sh | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/scripts/rust_is_available.sh b/scripts/rust_is_available.sh
index b7e0781fdea9..da8296cd9b8d 100755
--- a/scripts/rust_is_available.sh
+++ b/scripts/rust_is_available.sh
@@ -81,8 +81,20 @@ fi
 # Check that the Rust compiler version is suitable.
 #
 # Non-stable and distributions' versions may have a version suffix, e.g. `-dev`.
+rust_compiler_output=$( \
+	LC_ALL=C "$RUSTC" --version 2>/dev/null
+) || rust_compiler_code=$?
+if [ -n "$rust_compiler_code" ]; then
+	echo >&2 "***"
+	echo >&2 "*** Running '$RUSTC' to check the Rust compiler version failed with"
+	echo >&2 "*** code $rust_compiler_code. See output and docs below for details:"
+	echo >&2 "***"
+	echo >&2 "$rust_compiler_output"
+	echo >&2 "***"
+	exit 1
+fi
 rust_compiler_version=$( \
-	LC_ALL=C "$RUSTC" --version 2>/dev/null \
+	echo "$rust_compiler_output" \
 		| sed -nE '1s:.*rustc ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p'
 )
 rust_compiler_min_version=$($min_tool_version rustc)
@@ -108,8 +120,20 @@ fi
 # Check that the Rust bindings generator is suitable.
 #
 # Non-stable and distributions' versions may have a version suffix, e.g. `-dev`.
+rust_bindings_generator_output=$( \
+	LC_ALL=C "$BINDGEN" --version 2>/dev/null
+) || rust_bindings_generator_code=$?
+if [ -n "$rust_bindings_generator_code" ]; then
+	echo >&2 "***"
+	echo >&2 "*** Running '$BINDGEN' to check the Rust bindings generator version failed with"
+	echo >&2 "*** code $rust_bindings_generator_code. See output and docs below for details:"
+	echo >&2 "***"
+	echo >&2 "$rust_bindings_generator_output"
+	echo >&2 "***"
+	exit 1
+fi
 rust_bindings_generator_version=$( \
-	LC_ALL=C "$BINDGEN" --version 2>/dev/null \
+	echo "$rust_bindings_generator_output" \
 		| sed -nE '1s:.*bindgen ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p'
 )
 rust_bindings_generator_min_version=$($min_tool_version bindgen)

From bc60c930a43c7c984c80e99282f0d4f7193f3986 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 16 Jun 2023 02:16:30 +0200
Subject: [PATCH 0937/2047] kbuild: rust_is_available: check that output looks
 as expected

The script already checks for `$RUSTC` and `$BINDGEN` existing
and exiting without failure. However, one may still pass an
unexpected binary that does not output what the later parsing
expects. The script still successfully reports a failure as
expected, but the error is confusing. For instance:

    $ RUSTC=true BINDGEN=bindgen CC=clang scripts/rust_is_available.sh
    scripts/rust_is_available.sh: 19: arithmetic expression: expecting primary: "100000 *  + 100 *  + "
    ***
    *** Please see Documentation/rust/quick-start.rst for details
    *** on how to set up the Rust support.
    ***

Thus add an explicit check and a proper message for unexpected
output from the called command.

Similarly, do so for the `libclang` version parsing, too.

Link: https://lore.kernel.org/rust-for-linux/CAK7LNAQYk6s11MASRHW6oxtkqF00EJVqhHOP=5rynWt-QDUsXw@mail.gmail.com/
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Link: https://lore.kernel.org/r/20230616001631.463536-11-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 scripts/rust_is_available.sh | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/scripts/rust_is_available.sh b/scripts/rust_is_available.sh
index da8296cd9b8d..117018946b57 100755
--- a/scripts/rust_is_available.sh
+++ b/scripts/rust_is_available.sh
@@ -97,6 +97,15 @@ rust_compiler_version=$( \
 	echo "$rust_compiler_output" \
 		| sed -nE '1s:.*rustc ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p'
 )
+if [ -z "$rust_compiler_version" ]; then
+	echo >&2 "***"
+	echo >&2 "*** Running '$RUSTC' to check the Rust compiler version did not return"
+	echo >&2 "*** an expected output. See output and docs below for details:"
+	echo >&2 "***"
+	echo >&2 "$rust_compiler_output"
+	echo >&2 "***"
+	exit 1
+fi
 rust_compiler_min_version=$($min_tool_version rustc)
 rust_compiler_cversion=$(get_canonical_version $rust_compiler_version)
 rust_compiler_min_cversion=$(get_canonical_version $rust_compiler_min_version)
@@ -136,6 +145,15 @@ rust_bindings_generator_version=$( \
 	echo "$rust_bindings_generator_output" \
 		| sed -nE '1s:.*bindgen ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p'
 )
+if [ -z "$rust_bindings_generator_version" ]; then
+	echo >&2 "***"
+	echo >&2 "*** Running '$BINDGEN' to check the bindings generator version did not return"
+	echo >&2 "*** an expected output. See output and docs below for details:"
+	echo >&2 "***"
+	echo >&2 "$rust_bindings_generator_output"
+	echo >&2 "***"
+	exit 1
+fi
 rust_bindings_generator_min_version=$($min_tool_version bindgen)
 rust_bindings_generator_cversion=$(get_canonical_version $rust_bindings_generator_version)
 rust_bindings_generator_min_cversion=$(get_canonical_version $rust_bindings_generator_min_version)
@@ -184,6 +202,16 @@ bindgen_libclang_version=$( \
 	echo "$bindgen_libclang_output" \
 		| sed -nE 's:.*clang version ([0-9]+\.[0-9]+\.[0-9]+).*:\1:p'
 )
+if [ -z "$bindgen_libclang_version" ]; then
+	echo >&2 "***"
+	echo >&2 "*** Running '$BINDGEN' to check the libclang version (used by the Rust"
+	echo >&2 "*** bindings generator) did not return an expected output. See output"
+	echo >&2 "*** and docs below for details:"
+	echo >&2 "***"
+	echo >&2 "$bindgen_libclang_output"
+	echo >&2 "***"
+	exit 1
+fi
 bindgen_libclang_min_version=$($min_tool_version llvm)
 bindgen_libclang_cversion=$(get_canonical_version $bindgen_libclang_version)
 bindgen_libclang_min_cversion=$(get_canonical_version $bindgen_libclang_min_version)

From 0bb1c9282e2cb38d199347d1d96b77f208b64810 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 16 Jun 2023 02:16:31 +0200
Subject: [PATCH 0938/2047] kbuild: rust_is_available: add test suite

The `rust_is_available.sh` script runs for everybody compiling the
kernel, even if not using Rust. Therefore, it is important to ensure
that the script is correct to avoid breaking people's compilation.

In addition, the script needs to be able to handle a set of subtle
cases, including parsing version strings of different tools.

Therefore, maintenance of this script can be greatly eased with
a set of tests.

Thus add a test suite to cover hopefully most of the setups that
the script may encounter in the wild. Extra setups can be easily
added later on if missing.

The script currently covers all the branches of the shell script,
including several ways in which they may be entered.

Python is used for this script, since the script under test
does not depend on Rust, thus hopefully making it easier for others
to use if the need arises.

Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Link: https://lore.kernel.org/r/20230616001631.463536-12-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 scripts/rust_is_available_test.py | 346 ++++++++++++++++++++++++++++++
 1 file changed, 346 insertions(+)
 create mode 100755 scripts/rust_is_available_test.py

diff --git a/scripts/rust_is_available_test.py b/scripts/rust_is_available_test.py
new file mode 100755
index 000000000000..57613fe5ed75
--- /dev/null
+++ b/scripts/rust_is_available_test.py
@@ -0,0 +1,346 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Tests the `rust_is_available.sh` script.
+
+Some of the tests require the real programs to be available in `$PATH`
+under their canonical name (and with the expected versions).
+"""
+
+import enum
+import os
+import pathlib
+import stat
+import subprocess
+import tempfile
+import unittest
+
+class TestRustIsAvailable(unittest.TestCase):
+    @enum.unique
+    class Expected(enum.Enum):
+        SUCCESS = enum.auto()
+        SUCCESS_WITH_WARNINGS = enum.auto()
+        SUCCESS_WITH_EXTRA_OUTPUT = enum.auto()
+        FAILURE = enum.auto()
+
+    @classmethod
+    def generate_executable(cls, content):
+        path = pathlib.Path(cls.tempdir.name)
+        name = str(len(tuple(path.iterdir())))
+        path = path / name
+        with open(path, "w") as file_:
+            file_.write(content)
+        os.chmod(path, os.stat(path).st_mode | stat.S_IXUSR)
+        return path
+
+    @classmethod
+    def generate_clang(cls, stdout):
+        return cls.generate_executable(f"""#!/usr/bin/env python3
+import sys
+if "-E" in " ".join(sys.argv):
+    print({repr("Clang " + " ".join(cls.llvm_default_version.split(" ")))})
+else:
+    print({repr(stdout)})
+""")
+
+    @classmethod
+    def generate_rustc(cls, stdout):
+        return cls.generate_executable(f"""#!/usr/bin/env python3
+import sys
+if "--print sysroot" in " ".join(sys.argv):
+    print({repr(cls.rust_default_sysroot)})
+else:
+    print({repr(stdout)})
+""")
+
+    @classmethod
+    def generate_bindgen(cls, version_stdout, libclang_stderr):
+        return cls.generate_executable(f"""#!/usr/bin/env python3
+import sys
+if "rust_is_available_bindgen_libclang.h" in " ".join(sys.argv):
+    print({repr(libclang_stderr)}, file=sys.stderr)
+else:
+    print({repr(version_stdout)})
+""")
+
+    @classmethod
+    def generate_bindgen_version(cls, stdout):
+        return cls.generate_bindgen(stdout, cls.bindgen_default_bindgen_libclang_stderr)
+
+    @classmethod
+    def generate_bindgen_libclang(cls, stderr):
+        return cls.generate_bindgen(cls.bindgen_default_bindgen_version_stdout, stderr)
+
+    @classmethod
+    def setUpClass(cls):
+        cls.tempdir = tempfile.TemporaryDirectory()
+
+        cls.missing = pathlib.Path(cls.tempdir.name) / "missing"
+
+        cls.nonexecutable = pathlib.Path(cls.tempdir.name) / "nonexecutable"
+        with open(cls.nonexecutable, "w") as file_:
+            file_.write("nonexecutable")
+
+        cls.unexpected_binary = "true"
+
+        cls.rustc_default_version = subprocess.check_output(("scripts/min-tool-version.sh", "rustc")).decode().strip()
+        cls.bindgen_default_version = subprocess.check_output(("scripts/min-tool-version.sh", "bindgen")).decode().strip()
+        cls.llvm_default_version = subprocess.check_output(("scripts/min-tool-version.sh", "llvm")).decode().strip()
+        cls.rust_default_sysroot = subprocess.check_output(("rustc", "--print", "sysroot")).decode().strip()
+
+        cls.bindgen_default_bindgen_version_stdout = f"bindgen {cls.bindgen_default_version}"
+        cls.bindgen_default_bindgen_libclang_stderr = f"scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version {cls.llvm_default_version} [-W#pragma-messages], err: false"
+
+        cls.default_rustc = cls.generate_rustc(f"rustc {cls.rustc_default_version}")
+        cls.default_bindgen =  cls.generate_bindgen(cls.bindgen_default_bindgen_version_stdout, cls.bindgen_default_bindgen_libclang_stderr)
+        cls.default_cc = cls.generate_clang(f"clang version {cls.llvm_default_version}")
+
+    def run_script(self, expected, override_env):
+        env = {
+            "RUSTC": self.default_rustc,
+            "BINDGEN": self.default_bindgen,
+            "CC": self.default_cc,
+        }
+
+        for key, value in override_env.items():
+            if value is None:
+                del env[key]
+                continue
+            env[key] = value
+
+        result = subprocess.run("scripts/rust_is_available.sh", env=env, capture_output=True)
+
+        # The script should never output anything to `stdout`.
+        self.assertEqual(result.stdout, b"")
+
+        if expected == self.Expected.SUCCESS:
+            # When expecting a success, the script should return 0
+            # and it should not output anything to `stderr`.
+            self.assertEqual(result.returncode, 0)
+            self.assertEqual(result.stderr, b"")
+        elif expected == self.Expected.SUCCESS_WITH_EXTRA_OUTPUT:
+            # When expecting a success with extra output (that is not warnings,
+            # which is the common case), the script should return 0 and it
+            # should output at least something to `stderr` (the output should
+            # be checked further by the test).
+            self.assertEqual(result.returncode, 0)
+            self.assertNotEqual(result.stderr, b"")
+        elif expected == self.Expected.SUCCESS_WITH_WARNINGS:
+            # When expecting a success with warnings, the script should return 0
+            # and it should output at least the instructions to `stderr`.
+            self.assertEqual(result.returncode, 0)
+            self.assertIn(b"Please see Documentation/rust/quick-start.rst for details", result.stderr)
+        else:
+            # When expecting a failure, the script should return non-0
+            # and it should output at least the instructions to `stderr`.
+            self.assertNotEqual(result.returncode, 0)
+            self.assertIn(b"Please see Documentation/rust/quick-start.rst for details", result.stderr)
+
+        # The output will generally be UTF-8 (i.e. unless the user has
+        # put strange values in the environment).
+        result.stderr = result.stderr.decode()
+
+        return result
+
+    def test_rustc_unset(self):
+        result = self.run_script(self.Expected.FAILURE, { "RUSTC": None })
+        self.assertIn("Environment variable 'RUSTC' is not set.", result.stderr)
+        self.assertIn("This script is intended to be called from Kbuild.", result.stderr)
+
+    def test_bindgen_unset(self):
+        result = self.run_script(self.Expected.FAILURE, { "BINDGEN": None })
+        self.assertIn("Environment variable 'BINDGEN' is not set.", result.stderr)
+        self.assertIn("This script is intended to be called from Kbuild.", result.stderr)
+
+    def test_cc_unset(self):
+        result = self.run_script(self.Expected.FAILURE, { "CC": None })
+        self.assertIn("Environment variable 'CC' is not set.", result.stderr)
+        self.assertIn("This script is intended to be called from Kbuild.", result.stderr)
+
+    def test_rustc_missing(self):
+        result = self.run_script(self.Expected.FAILURE, { "RUSTC": self.missing })
+        self.assertIn(f"Rust compiler '{self.missing}' could not be found.", result.stderr)
+
+    def test_bindgen_missing(self):
+        result = self.run_script(self.Expected.FAILURE, { "BINDGEN": self.missing })
+        self.assertIn(f"Rust bindings generator '{self.missing}' could not be found.", result.stderr)
+
+    def test_rustc_nonexecutable(self):
+        result = self.run_script(self.Expected.FAILURE, { "RUSTC": self.nonexecutable })
+        self.assertIn(f"Running '{self.nonexecutable}' to check the Rust compiler version failed with", result.stderr)
+
+    def test_rustc_unexpected_binary(self):
+        result = self.run_script(self.Expected.FAILURE, { "RUSTC": self.unexpected_binary })
+        self.assertIn(f"Running '{self.unexpected_binary}' to check the Rust compiler version did not return", result.stderr)
+
+    def test_rustc_unexpected_name(self):
+        rustc = self.generate_rustc(f"unexpected {self.rustc_default_version} (a8314ef7d 2022-06-27)")
+        result = self.run_script(self.Expected.FAILURE, { "RUSTC": rustc })
+        self.assertIn(f"Running '{rustc}' to check the Rust compiler version did not return", result.stderr)
+
+    def test_rustc_unexpected_version(self):
+        rustc = self.generate_rustc("rustc unexpected (a8314ef7d 2022-06-27)")
+        result = self.run_script(self.Expected.FAILURE, { "RUSTC": rustc })
+        self.assertIn(f"Running '{rustc}' to check the Rust compiler version did not return", result.stderr)
+
+    def test_rustc_no_minor(self):
+        rustc = self.generate_rustc(f"rustc {'.'.join(self.rustc_default_version.split('.')[:2])} (a8314ef7d 2022-06-27)")
+        result = self.run_script(self.Expected.FAILURE, { "RUSTC": rustc })
+        self.assertIn(f"Running '{rustc}' to check the Rust compiler version did not return", result.stderr)
+
+    def test_rustc_old_version(self):
+        rustc = self.generate_rustc("rustc 1.60.0 (a8314ef7d 2022-06-27)")
+        result = self.run_script(self.Expected.FAILURE, { "RUSTC": rustc })
+        self.assertIn(f"Rust compiler '{rustc}' is too old.", result.stderr)
+
+    def test_rustc_new_version(self):
+        rustc = self.generate_rustc("rustc 1.999.0 (a8314ef7d 2099-06-27)")
+        result = self.run_script(self.Expected.SUCCESS_WITH_WARNINGS, { "RUSTC": rustc })
+        self.assertIn(f"Rust compiler '{rustc}' is too new. This may or may not work.", result.stderr)
+
+    def test_bindgen_nonexecutable(self):
+        result = self.run_script(self.Expected.FAILURE, { "BINDGEN": self.nonexecutable })
+        self.assertIn(f"Running '{self.nonexecutable}' to check the Rust bindings generator version failed with", result.stderr)
+
+    def test_bindgen_unexpected_binary(self):
+        result = self.run_script(self.Expected.FAILURE, { "BINDGEN": self.unexpected_binary })
+        self.assertIn(f"Running '{self.unexpected_binary}' to check the bindings generator version did not return", result.stderr)
+
+    def test_bindgen_unexpected_name(self):
+        bindgen = self.generate_bindgen_version(f"unexpected {self.bindgen_default_version}")
+        result = self.run_script(self.Expected.FAILURE, { "BINDGEN": bindgen })
+        self.assertIn(f"Running '{bindgen}' to check the bindings generator version did not return", result.stderr)
+
+    def test_bindgen_unexpected_version(self):
+        bindgen = self.generate_bindgen_version("bindgen unexpected")
+        result = self.run_script(self.Expected.FAILURE, { "BINDGEN": bindgen })
+        self.assertIn(f"Running '{bindgen}' to check the bindings generator version did not return", result.stderr)
+
+    def test_bindgen_no_minor(self):
+        bindgen = self.generate_bindgen_version(f"bindgen {'.'.join(self.bindgen_default_version.split('.')[:2])}")
+        result = self.run_script(self.Expected.FAILURE, { "BINDGEN": bindgen })
+        self.assertIn(f"Running '{bindgen}' to check the bindings generator version did not return", result.stderr)
+
+    def test_bindgen_old_version(self):
+        bindgen = self.generate_bindgen_version("bindgen 0.50.0")
+        result = self.run_script(self.Expected.FAILURE, { "BINDGEN": bindgen })
+        self.assertIn(f"Rust bindings generator '{bindgen}' is too old.", result.stderr)
+
+    def test_bindgen_new_version(self):
+        bindgen = self.generate_bindgen_version("bindgen 0.999.0")
+        result = self.run_script(self.Expected.SUCCESS_WITH_WARNINGS, { "BINDGEN": bindgen })
+        self.assertIn(f"Rust bindings generator '{bindgen}' is too new. This may or may not work.", result.stderr)
+
+    def test_bindgen_libclang_failure(self):
+        for env in (
+            { "LLVM_CONFIG_PATH": self.missing },
+            { "LIBCLANG_PATH": self.missing },
+            { "CLANG_PATH": self.missing },
+        ):
+            with self.subTest(env=env):
+                result = self.run_script(self.Expected.FAILURE, env | { "PATH": os.environ["PATH"], "BINDGEN": "bindgen" })
+                self.assertIn("Running 'bindgen' to check the libclang version (used by the Rust", result.stderr)
+                self.assertIn("bindings generator) failed with code ", result.stderr)
+
+    def test_bindgen_libclang_unexpected_version(self):
+        bindgen = self.generate_bindgen_libclang("scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version unexpected [-W#pragma-messages], err: false")
+        result = self.run_script(self.Expected.FAILURE, { "BINDGEN": bindgen })
+        self.assertIn(f"Running '{bindgen}' to check the libclang version (used by the Rust", result.stderr)
+        self.assertIn("bindings generator) did not return an expected output. See output", result.stderr)
+
+    def test_bindgen_libclang_old_version(self):
+        bindgen = self.generate_bindgen_libclang("scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version 10.0.0 [-W#pragma-messages], err: false")
+        result = self.run_script(self.Expected.FAILURE, { "BINDGEN": bindgen })
+        self.assertIn(f"libclang (used by the Rust bindings generator '{bindgen}') is too old.", result.stderr)
+
+    def test_clang_matches_bindgen_libclang_different_bindgen(self):
+        bindgen = self.generate_bindgen_libclang("scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version 999.0.0 [-W#pragma-messages], err: false")
+        result = self.run_script(self.Expected.SUCCESS_WITH_WARNINGS, { "BINDGEN": bindgen })
+        self.assertIn("version does not match Clang's. This may be a problem.", result.stderr)
+
+    def test_clang_matches_bindgen_libclang_different_clang(self):
+        cc = self.generate_clang("clang version 999.0.0")
+        result = self.run_script(self.Expected.SUCCESS_WITH_WARNINGS, { "CC": cc })
+        self.assertIn("version does not match Clang's. This may be a problem.", result.stderr)
+
+    def test_rustc_src_core_krustflags(self):
+        result = self.run_script(self.Expected.FAILURE, { "PATH": os.environ["PATH"], "RUSTC": "rustc", "KRUSTFLAGS": f"--sysroot={self.missing}" })
+        self.assertIn("Source code for the 'core' standard library could not be found", result.stderr)
+
+    def test_rustc_src_core_rustlibsrc(self):
+        result = self.run_script(self.Expected.FAILURE, { "RUST_LIB_SRC": self.missing })
+        self.assertIn("Source code for the 'core' standard library could not be found", result.stderr)
+
+    def test_success_cc_unknown(self):
+        result = self.run_script(self.Expected.SUCCESS_WITH_EXTRA_OUTPUT, { "CC": self.missing })
+        self.assertIn("unknown C compiler", result.stderr)
+
+    def test_success_cc_multiple_arguments_ccache(self):
+        clang = self.generate_clang(f"""Ubuntu clang version {self.llvm_default_version}-1ubuntu1
+Target: x86_64-pc-linux-gnu
+Thread model: posix
+InstalledDir: /usr/bin
+""")
+        result = self.run_script(self.Expected.SUCCESS, { "CC": f"{clang} clang" })
+
+    def test_success_rustc_version(self):
+        for rustc_stdout in (
+            f"rustc {self.rustc_default_version} (a8314ef7d 2022-06-27)",
+            f"rustc {self.rustc_default_version}-dev (a8314ef7d 2022-06-27)",
+            f"rustc {self.rustc_default_version}-1.60.0 (a8314ef7d 2022-06-27)",
+        ):
+            with self.subTest(rustc_stdout=rustc_stdout):
+                rustc = self.generate_rustc(rustc_stdout)
+                result = self.run_script(self.Expected.SUCCESS, { "RUSTC": rustc })
+
+    def test_success_bindgen_version(self):
+        for bindgen_stdout in (
+            f"bindgen {self.bindgen_default_version}",
+            f"bindgen {self.bindgen_default_version}-dev",
+            f"bindgen {self.bindgen_default_version}-0.999.0",
+        ):
+            with self.subTest(bindgen_stdout=bindgen_stdout):
+                bindgen = self.generate_bindgen_version(bindgen_stdout)
+                result = self.run_script(self.Expected.SUCCESS, { "BINDGEN": bindgen })
+
+    def test_success_bindgen_libclang(self):
+        for stderr in (
+            f"scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version {self.llvm_default_version} (https://github.com/llvm/llvm-project.git 4a2c05b05ed07f1f620e94f6524a8b4b2760a0b1) [-W#pragma-messages], err: false",
+            f"/home/jd/Documents/dev/kernel-module-flake/linux-6.1/outputs/dev/lib/modules/6.1.0-development/source/scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version {self.llvm_default_version}  [-W#pragma-messages], err: false",
+            f"scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version {self.llvm_default_version} (Fedora 13.0.0-3.fc35) [-W#pragma-messages], err: false",
+            f"""
+/nix/store/dsd5gz46hdbdk2rfdimqddhq6m8m8fqs-bash-5.1-p16/bin/bash: warning: setlocale: LC_ALL: cannot change locale (c)
+scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version {self.llvm_default_version}  [-W#pragma-messages], err: false
+""",
+            f"""
+/nix/store/dsd5gz46hdbdk2rfdimqddhq6m8m8fqs-bash-5.1.0-p16/bin/bash: warning: setlocale: LC_ALL: cannot change locale (c)
+/home/jd/Documents/dev/kernel-module-flake/linux-6.1/outputs/dev/lib/modules/6.1.0-development/source/scripts/rust_is_available_bindgen_libclang.h:2:9: warning: clang version {self.llvm_default_version} (Fedora 13.0.0-3.fc35) [-W#pragma-messages], err: false
+"""
+        ):
+            with self.subTest(stderr=stderr):
+                bindgen = self.generate_bindgen_libclang(stderr)
+                result = self.run_script(self.Expected.SUCCESS, { "BINDGEN": bindgen })
+
+    def test_success_clang_version(self):
+        for clang_stdout in (
+            f"clang version {self.llvm_default_version} (https://github.com/llvm/llvm-project.git 4a2c05b05ed07f1f620e94f6524a8b4b2760a0b1)",
+            f"clang version {self.llvm_default_version}-dev",
+            f"clang version {self.llvm_default_version}-2~ubuntu20.04.1",
+            f"Ubuntu clang version {self.llvm_default_version}-2~ubuntu20.04.1",
+        ):
+            with self.subTest(clang_stdout=clang_stdout):
+                clang = self.generate_clang(clang_stdout)
+                result = self.run_script(self.Expected.SUCCESS, { "CC": clang })
+
+    def test_success_real_programs(self):
+        for cc in ["gcc", "clang"]:
+            with self.subTest(cc=cc):
+                result = self.run_script(self.Expected.SUCCESS, {
+                    "PATH": os.environ["PATH"],
+                    "RUSTC": "rustc",
+                    "BINDGEN": "bindgen",
+                    "CC": cc,
+                })
+
+if __name__ == "__main__":
+    unittest.main()

From 35cad617df2eeef8440a38e82bb2d81ae32ca50d Mon Sep 17 00:00:00 2001
From: Alice Ryhl <aliceryhl@google.com>
Date: Wed, 14 Jun 2023 11:53:28 +0000
Subject: [PATCH 0939/2047] rust: make `UnsafeCell` the outer type in `Opaque`

When combining `UnsafeCell` with `MaybeUninit`, it is idiomatic to use
`UnsafeCell` as the outer type. Intuitively, this is because a
`MaybeUninit<T>` might not contain a `T`, but we always want the effect
of the `UnsafeCell`, even if the inner value is uninitialized.

Now, strictly speaking, this doesn't really make a difference. The
compiler will always apply the `UnsafeCell` effect even if the inner
value is uninitialized. But I think we should follow the convention
here.

Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Benno Lossin <benno.lossin@proton.me>
Reviewed-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Link: https://lore.kernel.org/r/20230614115328.2825961-1-aliceryhl@google.com
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/types.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index d479f8da8f38..c0b8bb1a7539 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -206,17 +206,17 @@ impl<T, F: FnOnce(T)> Drop for ScopeGuard<T, F> {
 ///
 /// This is meant to be used with FFI objects that are never interpreted by Rust code.
 #[repr(transparent)]
-pub struct Opaque<T>(MaybeUninit<UnsafeCell<T>>);
+pub struct Opaque<T>(UnsafeCell<MaybeUninit<T>>);
 
 impl<T> Opaque<T> {
     /// Creates a new opaque value.
     pub const fn new(value: T) -> Self {
-        Self(MaybeUninit::new(UnsafeCell::new(value)))
+        Self(UnsafeCell::new(MaybeUninit::new(value)))
     }
 
     /// Creates an uninitialised value.
     pub const fn uninit() -> Self {
-        Self(MaybeUninit::uninit())
+        Self(UnsafeCell::new(MaybeUninit::uninit()))
     }
 
     /// Creates a pin-initializer from the given initializer closure.
@@ -240,7 +240,7 @@ impl<T> Opaque<T> {
 
     /// Returns a raw pointer to the opaque data.
     pub fn get(&self) -> *mut T {
-        UnsafeCell::raw_get(self.0.as_ptr())
+        UnsafeCell::get(&self.0).cast::<T>()
     }
 
     /// Gets the value behind `this`.
@@ -248,7 +248,7 @@ impl<T> Opaque<T> {
     /// This function is useful to get access to the value without creating intermediate
     /// references.
     pub const fn raw_get(this: *const Self) -> *mut T {
-        UnsafeCell::raw_get(this.cast::<UnsafeCell<T>>())
+        UnsafeCell::raw_get(this.cast::<UnsafeCell<MaybeUninit<T>>>()).cast::<T>()
     }
 }
 

From 0b4e3b6f6b79b1add04008a6ceaaf661107e8902 Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Fri, 30 Jun 2023 15:03:23 +0000
Subject: [PATCH 0940/2047] rust: types: make `Opaque` be `!Unpin`

Adds a `PhantomPinned` field to `Opaque<T>`. This removes the last Rust
guarantee: the assumption that the type `T` can be freely moved. This is
not the case for many types from the C side (e.g. if they contain a
`struct list_head`). This change removes the need to add a
`PhantomPinned` field manually to Rust structs that contain C structs
which must not be moved.

Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Reviewed-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Andreas Hindborg <a.hindborg@samsung.com>
Link: https://lore.kernel.org/r/20230630150216.109789-1-benno.lossin@proton.me
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/types.rs | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index c0b8bb1a7539..50cbd767ea9d 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -6,7 +6,7 @@ use crate::init::{self, PinInit};
 use alloc::boxed::Box;
 use core::{
     cell::UnsafeCell,
-    marker::PhantomData,
+    marker::{PhantomData, PhantomPinned},
     mem::MaybeUninit,
     ops::{Deref, DerefMut},
     ptr::NonNull,
@@ -206,17 +206,26 @@ impl<T, F: FnOnce(T)> Drop for ScopeGuard<T, F> {
 ///
 /// This is meant to be used with FFI objects that are never interpreted by Rust code.
 #[repr(transparent)]
-pub struct Opaque<T>(UnsafeCell<MaybeUninit<T>>);
+pub struct Opaque<T> {
+    value: UnsafeCell<MaybeUninit<T>>,
+    _pin: PhantomPinned,
+}
 
 impl<T> Opaque<T> {
     /// Creates a new opaque value.
     pub const fn new(value: T) -> Self {
-        Self(UnsafeCell::new(MaybeUninit::new(value)))
+        Self {
+            value: UnsafeCell::new(MaybeUninit::new(value)),
+            _pin: PhantomPinned,
+        }
     }
 
     /// Creates an uninitialised value.
     pub const fn uninit() -> Self {
-        Self(UnsafeCell::new(MaybeUninit::uninit()))
+        Self {
+            value: UnsafeCell::new(MaybeUninit::uninit()),
+            _pin: PhantomPinned,
+        }
     }
 
     /// Creates a pin-initializer from the given initializer closure.
@@ -240,7 +249,7 @@ impl<T> Opaque<T> {
 
     /// Returns a raw pointer to the opaque data.
     pub fn get(&self) -> *mut T {
-        UnsafeCell::get(&self.0).cast::<T>()
+        UnsafeCell::get(&self.value).cast::<T>()
     }
 
     /// Gets the value behind `this`.

From 0820debb7d489e9eb1f68b7bb69e6ae210699b3f Mon Sep 17 00:00:00 2001
From: Tzung-Bi Shih <tzungbi@kernel.org>
Date: Thu, 3 Aug 2023 09:12:45 +0800
Subject: [PATCH 0941/2047] platform/chrome: chromeos_acpi: print hex string
 for ACPI_TYPE_BUFFER

`element->buffer.pointer` should be binary blob.  `%s` doesn't work
perfect for them.

Print hex string for ACPI_TYPE_BUFFER.  Also update the documentation
to reflect this.

Fixes: 0a4cad9c11ad ("platform/chrome: Add ChromeOS ACPI device driver")
Cc: stable@vger.kernel.org
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20230803011245.3773756-1-tzungbi@kernel.org
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 .../ABI/testing/sysfs-driver-chromeos-acpi    |  2 +-
 drivers/platform/chrome/chromeos_acpi.c       | 31 ++++++++++++++++++-
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-chromeos-acpi b/Documentation/ABI/testing/sysfs-driver-chromeos-acpi
index 22082f9a7922..d46b1c85840d 100644
--- a/Documentation/ABI/testing/sysfs-driver-chromeos-acpi
+++ b/Documentation/ABI/testing/sysfs-driver-chromeos-acpi
@@ -149,4 +149,4 @@ KernelVersion:	5.19
 Description:
 		Returns the verified boot data block shared between the
 		firmware verification step and the kernel verification step
-		(binary).
+		(hex dump).
diff --git a/drivers/platform/chrome/chromeos_acpi.c b/drivers/platform/chrome/chromeos_acpi.c
index 1cc01d893ada..e6e6dcfc74d1 100644
--- a/drivers/platform/chrome/chromeos_acpi.c
+++ b/drivers/platform/chrome/chromeos_acpi.c
@@ -90,7 +90,36 @@ static int chromeos_acpi_handle_package(struct device *dev, union acpi_object *o
 	case ACPI_TYPE_STRING:
 		return sysfs_emit(buf, "%s\n", element->string.pointer);
 	case ACPI_TYPE_BUFFER:
-		return sysfs_emit(buf, "%s\n", element->buffer.pointer);
+		{
+			int i, r, at, room_left;
+			const int byte_per_line = 16;
+
+			at = 0;
+			room_left = PAGE_SIZE - 1;
+			for (i = 0; i < element->buffer.length && room_left; i += byte_per_line) {
+				r = hex_dump_to_buffer(element->buffer.pointer + i,
+						       element->buffer.length - i,
+						       byte_per_line, 1, buf + at, room_left,
+						       false);
+				if (r > room_left)
+					goto truncating;
+				at += r;
+				room_left -= r;
+
+				r = sysfs_emit_at(buf, at, "\n");
+				if (!r)
+					goto truncating;
+				at += r;
+				room_left -= r;
+			}
+
+			buf[at] = 0;
+			return at;
+truncating:
+			dev_info_once(dev, "truncating sysfs content for %s\n", name);
+			sysfs_emit_at(buf, PAGE_SIZE - 4, "..\n");
+			return PAGE_SIZE - 1;
+		}
 	default:
 		dev_err(dev, "element type %d not supported\n", element->type);
 		return -EINVAL;

From dbbff489064d89391c4f0c7a73e77e61ce29fe96 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:33 -0700
Subject: [PATCH 0942/2047] xfs: reformat the xfs_fs_free prototype

The xfs_fs_free prototype formatting is a weird mix of the classic XFS
style and the Linux style.  Fix it up to be consistent.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Message-Id: <20230809220545.1308228-2-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/xfs_super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 818510243130..79c1dd9435c2 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1934,7 +1934,8 @@ xfs_fs_reconfigure(
 	return 0;
 }
 
-static void xfs_fs_free(
+static void
+xfs_fs_free(
 	struct fs_context	*fc)
 {
 	struct xfs_mount	*mp = fc->s_fs_info;

From 1aa2d074d4c777e2150382878d0a5611d829b380 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:34 -0700
Subject: [PATCH 0943/2047] xfs: remove a superfluous s_fs_info NULL check in
 xfs_fs_put_super

->put_super is only called when sb->s_root is set, and thus when
fill_super succeeds.  Thus drop the NULL check that can't happen in
xfs_fs_put_super.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Message-Id: <20230809220545.1308228-3-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/xfs_super.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 79c1dd9435c2..e621e0da87f3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1133,10 +1133,6 @@ xfs_fs_put_super(
 {
 	struct xfs_mount	*mp = XFS_M(sb);
 
-	/* if ->fill_super failed, we have no mount to tear down */
-	if (!sb->s_fs_info)
-		return;
-
 	xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid);
 	xfs_filestream_unmount(mp);
 	xfs_unmountfs(mp);

From 2a9311adb87c98599989b80405fe2c60cd4075dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:35 -0700
Subject: [PATCH 0944/2047] xfs: free the xfs_mount in ->kill_sb

As a rule of thumb everything allocated to the fs_context and moved into
the super_block should be freed by ->kill_sb so that the teardown
handling doesn't need to be duplicated between the fill_super error
path and put_super.  Implement a XFS-specific kill_sb method to do that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Message-Id: <20230809220545.1308228-4-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/xfs_super.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index e621e0da87f3..31260fc09719 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1144,9 +1144,6 @@ xfs_fs_put_super(
 	xfs_destroy_percpu_counters(mp);
 	xfs_destroy_mount_workqueues(mp);
 	xfs_close_devices(mp);
-
-	sb->s_fs_info = NULL;
-	xfs_mount_free(mp);
 }
 
 static long
@@ -1488,7 +1485,7 @@ xfs_fs_fill_super(
 
 	error = xfs_fs_validate_params(mp);
 	if (error)
-		goto out_free_names;
+		return error;
 
 	sb_min_blocksize(sb, BBSIZE);
 	sb->s_xattr = xfs_xattr_handlers;
@@ -1515,7 +1512,7 @@ xfs_fs_fill_super(
 
 	error = xfs_open_devices(mp);
 	if (error)
-		goto out_free_names;
+		return error;
 
 	error = xfs_init_mount_workqueues(mp);
 	if (error)
@@ -1735,9 +1732,6 @@ xfs_fs_fill_super(
 	xfs_destroy_mount_workqueues(mp);
  out_close_devices:
 	xfs_close_devices(mp);
- out_free_names:
-	sb->s_fs_info = NULL;
-	xfs_mount_free(mp);
 	return error;
 
  out_unmount:
@@ -2000,12 +1994,20 @@ static int xfs_init_fs_context(
 	return 0;
 }
 
+static void
+xfs_kill_sb(
+	struct super_block		*sb)
+{
+	kill_block_super(sb);
+	xfs_mount_free(XFS_M(sb));
+}
+
 static struct file_system_type xfs_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= "xfs",
 	.init_fs_context	= xfs_init_fs_context,
 	.parameters		= xfs_fs_parameters,
-	.kill_sb		= kill_block_super,
+	.kill_sb		= xfs_kill_sb,
 	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
 };
 MODULE_ALIAS_FS("xfs");

From d3ef7e94ee36adc8f0006d253a9ad45793b874cd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:36 -0700
Subject: [PATCH 0945/2047] xfs: remove xfs_blkdev_put

There isn't much use for this trivial wrapper, especially as the NULL
check is only needed in a single call site.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Message-Id: <20230809220545.1308228-5-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/xfs_super.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 31260fc09719..332e32d8ca5a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -406,15 +406,6 @@ xfs_blkdev_get(
 	return error;
 }
 
-STATIC void
-xfs_blkdev_put(
-	struct xfs_mount	*mp,
-	struct block_device	*bdev)
-{
-	if (bdev)
-		blkdev_put(bdev, mp);
-}
-
 STATIC void
 xfs_close_devices(
 	struct xfs_mount	*mp)
@@ -423,13 +414,13 @@ xfs_close_devices(
 		struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
 
 		xfs_free_buftarg(mp->m_logdev_targp);
-		xfs_blkdev_put(mp, logdev);
+		blkdev_put(logdev, mp);
 	}
 	if (mp->m_rtdev_targp) {
 		struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
 
 		xfs_free_buftarg(mp->m_rtdev_targp);
-		xfs_blkdev_put(mp, rtdev);
+		blkdev_put(rtdev, mp);
 	}
 	xfs_free_buftarg(mp->m_ddev_targp);
 }
@@ -504,10 +495,11 @@ xfs_open_devices(
  out_free_ddev_targ:
 	xfs_free_buftarg(mp->m_ddev_targp);
  out_close_rtdev:
-	xfs_blkdev_put(mp, rtdev);
+	 if (rtdev)
+		 blkdev_put(rtdev, mp);
  out_close_logdev:
 	if (logdev && logdev != ddev)
-		xfs_blkdev_put(mp, logdev);
+		blkdev_put(logdev, mp);
 	return error;
 }
 

From 41233576e9a4515dc9b0bd1cbbb896b520a1f486 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:37 -0700
Subject: [PATCH 0946/2047] xfs: close the RT and log block devices in
 xfs_free_buftarg

Closing the block devices logically belongs into xfs_free_buftarg,  So
instead of open coding it in the caller move it there and add a check
for the s_bdev so that the main device isn't close as that's done by the
VFS helper.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Message-Id: <20230809220545.1308228-6-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/xfs_buf.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 15d1e5a7c2d3..65110df78e13 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1938,6 +1938,8 @@ void
 xfs_free_buftarg(
 	struct xfs_buftarg	*btp)
 {
+	struct block_device	*bdev = btp->bt_bdev;
+
 	unregister_shrinker(&btp->bt_shrinker);
 	ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
 	percpu_counter_destroy(&btp->bt_io_count);
@@ -1946,6 +1948,9 @@ xfs_free_buftarg(
 	blkdev_issue_flush(btp->bt_bdev);
 	invalidate_bdev(btp->bt_bdev);
 	fs_put_dax(btp->bt_daxdev, btp->bt_mount);
+	/* the main block device is closed by kill_block_super */
+	if (bdev != btp->bt_mount->m_super->s_bdev)
+		blkdev_put(bdev, btp->bt_mount);
 
 	kmem_free(btp);
 }

From 35a93b148b0363dca23c3db1cc9d48100eb8b276 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:38 -0700
Subject: [PATCH 0947/2047] xfs: close the external block devices in
 xfs_mount_free

blkdev_put must not be called under sb->s_umount to avoid a lock order
reversal with disk->open_mutex.  Move closing the buftargs into ->kill_sb
to archive that.  Note that the flushing of the disk caches and
block device mapping invalidated needs to stay in ->put_super as the main
block device is closed in kill_block_super already.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Message-Id: <20230809220545.1308228-7-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/xfs_buf.c   |  2 --
 fs/xfs/xfs_super.c | 36 ++++++++++++++++++++++--------------
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 65110df78e13..f2e5a2c78dc4 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1945,8 +1945,6 @@ xfs_free_buftarg(
 	percpu_counter_destroy(&btp->bt_io_count);
 	list_lru_destroy(&btp->bt_lru);
 
-	blkdev_issue_flush(btp->bt_bdev);
-	invalidate_bdev(btp->bt_bdev);
 	fs_put_dax(btp->bt_daxdev, btp->bt_mount);
 	/* the main block device is closed by kill_block_super */
 	if (bdev != btp->bt_mount->m_super->s_bdev)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 332e32d8ca5a..18d34e802aac 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -407,22 +407,19 @@ xfs_blkdev_get(
 }
 
 STATIC void
-xfs_close_devices(
+xfs_shutdown_devices(
 	struct xfs_mount	*mp)
 {
 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
-		struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
-
-		xfs_free_buftarg(mp->m_logdev_targp);
-		blkdev_put(logdev, mp);
+		blkdev_issue_flush(mp->m_logdev_targp->bt_bdev);
+		invalidate_bdev(mp->m_logdev_targp->bt_bdev);
 	}
 	if (mp->m_rtdev_targp) {
-		struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
-
-		xfs_free_buftarg(mp->m_rtdev_targp);
-		blkdev_put(rtdev, mp);
+		blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
+		invalidate_bdev(mp->m_rtdev_targp->bt_bdev);
 	}
-	xfs_free_buftarg(mp->m_ddev_targp);
+	blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
+	invalidate_bdev(mp->m_ddev_targp->bt_bdev);
 }
 
 /*
@@ -750,6 +747,17 @@ static void
 xfs_mount_free(
 	struct xfs_mount	*mp)
 {
+	/*
+	 * Free the buftargs here because blkdev_put needs to be called outside
+	 * of sb->s_umount, which is held around the call to ->put_super.
+	 */
+	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
+		xfs_free_buftarg(mp->m_logdev_targp);
+	if (mp->m_rtdev_targp)
+		xfs_free_buftarg(mp->m_rtdev_targp);
+	if (mp->m_ddev_targp)
+		xfs_free_buftarg(mp->m_ddev_targp);
+
 	kfree(mp->m_rtname);
 	kfree(mp->m_logname);
 	kmem_free(mp);
@@ -1135,7 +1143,7 @@ xfs_fs_put_super(
 	xfs_inodegc_free_percpu(mp);
 	xfs_destroy_percpu_counters(mp);
 	xfs_destroy_mount_workqueues(mp);
-	xfs_close_devices(mp);
+	xfs_shutdown_devices(mp);
 }
 
 static long
@@ -1508,7 +1516,7 @@ xfs_fs_fill_super(
 
 	error = xfs_init_mount_workqueues(mp);
 	if (error)
-		goto out_close_devices;
+		goto out_shutdown_devices;
 
 	error = xfs_init_percpu_counters(mp);
 	if (error)
@@ -1722,8 +1730,8 @@ xfs_fs_fill_super(
 	xfs_destroy_percpu_counters(mp);
  out_destroy_workqueues:
 	xfs_destroy_mount_workqueues(mp);
- out_close_devices:
-	xfs_close_devices(mp);
+ out_shutdown_devices:
+	xfs_shutdown_devices(mp);
 	return error;
 
  out_unmount:

From 1a0a5dad67b60250dce151c9533ccbecdfd822d4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:39 -0700
Subject: [PATCH 0948/2047] xfs: document the invalidate_bdev call in
 invalidate_bdev

Copy and paste the commit message from Darrick into a comment to explain
the seemingly odd invalidate_bdev in xfs_shutdown_devices.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Message-Id: <20230809220545.1308228-8-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/xfs_super.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 18d34e802aac..46d0cbe2b5f8 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -410,6 +410,32 @@ STATIC void
 xfs_shutdown_devices(
 	struct xfs_mount	*mp)
 {
+	/*
+	 * Udev is triggered whenever anyone closes a block device or unmounts
+	 * a file systemm on a block device.
+	 * The default udev rules invoke blkid to read the fs super and create
+	 * symlinks to the bdev under /dev/disk.  For this, it uses buffered
+	 * reads through the page cache.
+	 *
+	 * xfs_db also uses buffered reads to examine metadata.  There is no
+	 * coordination between xfs_db and udev, which means that they can run
+	 * concurrently.  Note there is no coordination between the kernel and
+	 * blkid either.
+	 *
+	 * On a system with 64k pages, the page cache can cache the superblock
+	 * and the root inode (and hence the root directory) with the same 64k
+	 * page.  If udev spawns blkid after the mkfs and the system is busy
+	 * enough that it is still running when xfs_db starts up, they'll both
+	 * read from the same page in the pagecache.
+	 *
+	 * The unmount writes updated inode metadata to disk directly.  The XFS
+	 * buffer cache does not use the bdev pagecache, so it needs to
+	 * invalidate that pagecache on unmount.  If the above scenario occurs,
+	 * the pagecache no longer reflects what's on disk, xfs_db reads the
+	 * stale metadata, and fails to find /a.  Most of the time this succeeds
+	 * because closing a bdev invalidates the page cache, but when processes
+	 * race, everyone loses.
+	 */
 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
 		blkdev_issue_flush(mp->m_logdev_targp->bt_bdev);
 		invalidate_bdev(mp->m_logdev_targp->bt_bdev);

From 1489dffd51d7e4519855723b00516719d5aa3a6a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:40 -0700
Subject: [PATCH 0949/2047] ext4: close the external journal device in
 ->kill_sb

blkdev_put must not be called under sb->s_umount to avoid a lock order
reversal with disk->open_mutex.  Move closing the external journal device
into ->kill_sb to archive that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Message-Id: <20230809220545.1308228-9-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ext4/super.c | 50 ++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b24f5b9f63e0..17c3e792d07e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -93,6 +93,7 @@ static int ext4_get_tree(struct fs_context *fc);
 static int ext4_reconfigure(struct fs_context *fc);
 static void ext4_fc_free(struct fs_context *fc);
 static int ext4_init_fs_context(struct fs_context *fc);
+static void ext4_kill_sb(struct super_block *sb);
 static const struct fs_parameter_spec ext4_param_specs[];
 
 /*
@@ -135,7 +136,7 @@ static struct file_system_type ext2_fs_type = {
 	.name			= "ext2",
 	.init_fs_context	= ext4_init_fs_context,
 	.parameters		= ext4_param_specs,
-	.kill_sb		= kill_block_super,
+	.kill_sb		= ext4_kill_sb,
 	.fs_flags		= FS_REQUIRES_DEV,
 };
 MODULE_ALIAS_FS("ext2");
@@ -151,7 +152,7 @@ static struct file_system_type ext3_fs_type = {
 	.name			= "ext3",
 	.init_fs_context	= ext4_init_fs_context,
 	.parameters		= ext4_param_specs,
-	.kill_sb		= kill_block_super,
+	.kill_sb		= ext4_kill_sb,
 	.fs_flags		= FS_REQUIRES_DEV,
 };
 MODULE_ALIAS_FS("ext3");
@@ -1125,25 +1126,6 @@ fail:
 	return NULL;
 }
 
-/*
- * Release the journal device
- */
-static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
-{
-	struct block_device *bdev;
-	bdev = sbi->s_journal_bdev;
-	if (bdev) {
-		/*
-		 * Invalidate the journal device's buffers.  We don't want them
-		 * floating about in memory - the physical journal device may
-		 * hotswapped, and it breaks the `ro-after' testing code.
-		 */
-		invalidate_bdev(bdev);
-		blkdev_put(bdev, sbi->s_sb);
-		sbi->s_journal_bdev = NULL;
-	}
-}
-
 static inline struct inode *orphan_list_entry(struct list_head *l)
 {
 	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
@@ -1339,8 +1321,13 @@ static void ext4_put_super(struct super_block *sb)
 	sync_blockdev(sb->s_bdev);
 	invalidate_bdev(sb->s_bdev);
 	if (sbi->s_journal_bdev) {
+		/*
+		 * Invalidate the journal device's buffers.  We don't want them
+		 * floating about in memory - the physical journal device may
+		 * hotswapped, and it breaks the `ro-after' testing code.
+		 */
 		sync_blockdev(sbi->s_journal_bdev);
-		ext4_blkdev_remove(sbi);
+		invalidate_bdev(sbi->s_journal_bdev);
 	}
 
 	ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
@@ -5663,9 +5650,11 @@ failed_mount:
 		kfree(get_qf_name(sb, sbi, i));
 #endif
 	fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
-	/* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
 	brelse(sbi->s_sbh);
-	ext4_blkdev_remove(sbi);
+	if (sbi->s_journal_bdev) {
+		invalidate_bdev(sbi->s_journal_bdev);
+		blkdev_put(sbi->s_journal_bdev, sb);
+	}
 out_fail:
 	invalidate_bdev(sb->s_bdev);
 	sb->s_fs_info = NULL;
@@ -7272,12 +7261,23 @@ static inline int ext3_feature_set_ok(struct super_block *sb)
 	return 1;
 }
 
+static void ext4_kill_sb(struct super_block *sb)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct block_device *journal_bdev = sbi ? sbi->s_journal_bdev : NULL;
+
+	kill_block_super(sb);
+
+	if (journal_bdev)
+		blkdev_put(journal_bdev, sb);
+}
+
 static struct file_system_type ext4_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= "ext4",
 	.init_fs_context	= ext4_init_fs_context,
 	.parameters		= ext4_param_specs,
-	.kill_sb		= kill_block_super,
+	.kill_sb		= ext4_kill_sb,
 	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
 };
 MODULE_ALIAS_FS("ext4");

From c934dc927e8efa9e0dc3cba8787912d6c23d272f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:41 -0700
Subject: [PATCH 0950/2047] exfat: don't RCU-free the sbi

There are no RCU critical sections for accessing any information in the
sbi, so drop the call_rcu indirection for freeing the sbi.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Message-Id: <20230809220545.1308228-10-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/exfat/exfat_fs.h |  2 --
 fs/exfat/super.c    | 15 ++++-----------
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 729ada9e26e8..f55498e5c23d 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -273,8 +273,6 @@ struct exfat_sb_info {
 
 	spinlock_t inode_hash_lock;
 	struct hlist_head inode_hashtable[EXFAT_HASH_SIZE];
-
-	struct rcu_head rcu;
 };
 
 #define EXFAT_CACHE_VALID	0
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 8c32460e031e..3c6aec96d0dc 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -31,16 +31,6 @@ static void exfat_free_iocharset(struct exfat_sb_info *sbi)
 		kfree(sbi->options.iocharset);
 }
 
-static void exfat_delayed_free(struct rcu_head *p)
-{
-	struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu);
-
-	unload_nls(sbi->nls_io);
-	exfat_free_iocharset(sbi);
-	exfat_free_upcase_table(sbi);
-	kfree(sbi);
-}
-
 static void exfat_put_super(struct super_block *sb)
 {
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
@@ -50,7 +40,10 @@ static void exfat_put_super(struct super_block *sb)
 	brelse(sbi->boot_bh);
 	mutex_unlock(&sbi->s_lock);
 
-	call_rcu(&sbi->rcu, exfat_delayed_free);
+	unload_nls(sbi->nls_io);
+	exfat_free_iocharset(sbi);
+	exfat_free_upcase_table(sbi);
+	kfree(sbi);
 }
 
 static int exfat_sync_fs(struct super_block *sb, int wait)

From 4abc9a43d99ccab7bd71742b86d2f48d8be798c3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:42 -0700
Subject: [PATCH 0951/2047] exfat: free the sbi and iocharset in ->kill_sb

As a rule of thumb everything allocated to the fs_context and moved into
the super_block should be freed by ->kill_sb so that the teardown
handling doesn't need to be duplicated between the fill_super error
path and put_super.  Implement an exfat-specific kill_sb method to do
that and share the code with the mount contex free helper for the
mount error handling case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Message-Id: <20230809220545.1308228-11-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/exfat/super.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 3c6aec96d0dc..85b04a4064af 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -41,9 +41,7 @@ static void exfat_put_super(struct super_block *sb)
 	mutex_unlock(&sbi->s_lock);
 
 	unload_nls(sbi->nls_io);
-	exfat_free_iocharset(sbi);
 	exfat_free_upcase_table(sbi);
-	kfree(sbi);
 }
 
 static int exfat_sync_fs(struct super_block *sb, int wait)
@@ -703,9 +701,6 @@ free_table:
 
 check_nls_io:
 	unload_nls(sbi->nls_io);
-	exfat_free_iocharset(sbi);
-	sb->s_fs_info = NULL;
-	kfree(sbi);
 	return err;
 }
 
@@ -714,14 +709,18 @@ static int exfat_get_tree(struct fs_context *fc)
 	return get_tree_bdev(fc, exfat_fill_super);
 }
 
+static void exfat_free_sbi(struct exfat_sb_info *sbi)
+{
+	exfat_free_iocharset(sbi);
+	kfree(sbi);
+}
+
 static void exfat_free(struct fs_context *fc)
 {
 	struct exfat_sb_info *sbi = fc->s_fs_info;
 
-	if (sbi) {
-		exfat_free_iocharset(sbi);
-		kfree(sbi);
-	}
+	if (sbi)
+		exfat_free_sbi(sbi);
 }
 
 static int exfat_reconfigure(struct fs_context *fc)
@@ -766,12 +765,21 @@ static int exfat_init_fs_context(struct fs_context *fc)
 	return 0;
 }
 
+static void exfat_kill_sb(struct super_block *sb)
+{
+	struct exfat_sb_info *sbi = sb->s_fs_info;
+
+	kill_block_super(sb);
+	if (sbi)
+		exfat_free_sbi(sbi);
+}
+
 static struct file_system_type exfat_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= "exfat",
 	.init_fs_context	= exfat_init_fs_context,
 	.parameters		= exfat_parameters,
-	.kill_sb		= kill_block_super,
+	.kill_sb		= exfat_kill_sb,
 	.fs_flags		= FS_REQUIRES_DEV,
 };
 

From 126dbf8a1b9cb2d764a314716a8ea98398a6fcb2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:43 -0700
Subject: [PATCH 0952/2047] ntfs3: rename put_ntfs ntfs3_free_sbi

put_ntfs is a rather unconventional name for a function that frees the
sbi and associated resources.  Give it a more descriptive name and drop
the duplicate name in the top of the function comment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Message-Id: <20230809220545.1308228-12-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ntfs3/super.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 1a02072b6b0e..bb985d3756d9 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -569,9 +569,9 @@ static void init_once(void *foo)
 }
 
 /*
- * put_ntfs - Noinline to reduce binary size.
+ * Noinline to reduce binary size.
  */
-static noinline void put_ntfs(struct ntfs_sb_info *sbi)
+static noinline void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
 {
 	kfree(sbi->new_rec);
 	kvfree(ntfs_put_shared(sbi->upcase));
@@ -627,7 +627,7 @@ static void ntfs_put_super(struct super_block *sb)
 	ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
 
 	put_mount_options(sbi->options);
-	put_ntfs(sbi);
+	ntfs3_free_sbi(sbi);
 	sb->s_fs_info = NULL;
 
 	sync_blockdev(sb->s_bdev);
@@ -1569,7 +1569,7 @@ out:
 	 * ntfs_fs_free will be called with fc->s_fs_info = NULL
 	 */
 	put_mount_options(sbi->options);
-	put_ntfs(sbi);
+	ntfs3_free_sbi(sbi);
 	sb->s_fs_info = NULL;
 	kfree(boot2);
 
@@ -1659,7 +1659,7 @@ static void ntfs_fs_free(struct fs_context *fc)
 	struct ntfs_sb_info *sbi = fc->s_fs_info;
 
 	if (sbi)
-		put_ntfs(sbi);
+		ntfs3_free_sbi(sbi);
 
 	if (opts)
 		put_mount_options(opts);

From 5f0fb2210bb34ecd3f7bfde0d8f0068b79b2e094 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:44 -0700
Subject: [PATCH 0953/2047] ntfs3: don't call sync_blockdev in ntfs_put_super

kill_block_super will call sync_blockdev just a tad later already.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Message-Id: <20230809220545.1308228-13-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ntfs3/super.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index bb985d3756d9..727138933a93 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -629,8 +629,6 @@ static void ntfs_put_super(struct super_block *sb)
 	put_mount_options(sbi->options);
 	ntfs3_free_sbi(sbi);
 	sb->s_fs_info = NULL;
-
-	sync_blockdev(sb->s_bdev);
 }
 
 static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf)

From a4f64a300a299f884a1da55d99c97a87061201cd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 9 Aug 2023 15:05:45 -0700
Subject: [PATCH 0954/2047] ntfs3: free the sbi in ->kill_sb

As a rule of thumb everything allocated to the fs_context and moved into
the super_block should be freed by ->kill_sb so that the teardown
handling doesn't need to be duplicated between the fill_super error
path and put_super.  Implement an ntfs3-specific kill_sb method to do
that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Message-Id: <20230809220545.1308228-14-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ntfs3/super.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 727138933a93..5fffddea554f 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -625,10 +625,6 @@ static void ntfs_put_super(struct super_block *sb)
 
 	/* Mark rw ntfs as clear, if possible. */
 	ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
-
-	put_mount_options(sbi->options);
-	ntfs3_free_sbi(sbi);
-	sb->s_fs_info = NULL;
 }
 
 static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1562,15 +1558,7 @@ load_root:
 put_inode_out:
 	iput(inode);
 out:
-	/*
-	 * Free resources here.
-	 * ntfs_fs_free will be called with fc->s_fs_info = NULL
-	 */
-	put_mount_options(sbi->options);
-	ntfs3_free_sbi(sbi);
-	sb->s_fs_info = NULL;
 	kfree(boot2);
-
 	return err;
 }
 
@@ -1726,13 +1714,24 @@ free_opts:
 	return -ENOMEM;
 }
 
+static void ntfs3_kill_sb(struct super_block *sb)
+{
+	struct ntfs_sb_info *sbi = sb->s_fs_info;
+
+	kill_block_super(sb);
+
+	if (sbi->options)
+		put_mount_options(sbi->options);
+	ntfs3_free_sbi(sbi);
+}
+
 // clang-format off
 static struct file_system_type ntfs_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= "ntfs3",
 	.init_fs_context	= ntfs_init_fs_context,
 	.parameters		= ntfs_fs_parameters,
-	.kill_sb		= kill_block_super,
+	.kill_sb		= ntfs3_kill_sb,
 	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
 };
 // clang-format on

From aca740cecbe57b12bd9c1fc632092af5ebacda0c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 24 Jul 2023 10:51:45 -0700
Subject: [PATCH 0955/2047] fs: open block device after superblock creation

Currently get_tree_bdev and mount_bdev open the block device before
committing to allocating a super block. That creates problems for
restricting the number of writers to a device, and also leads to a
unusual and not very helpful holder (the fs_type).

Reorganize the super block code to first look whether the superblock for
a particular device does already exist and open the block device only if
it doesn't.

[hch: port to before the bdev_handle changes,
      duplicate the bdev read-only check from blkdev_get_by_path,
      extend the fsfree_mutex coverage to protect against freezes,
      fix an open bdev leak when the bdev is frozen,
      use the bdev local variable more,
      rename the s variable to sb to be more descriptive]
[brauner: remove references to mounts as they're mostly irrelevant]
[brauner & hch: fold fixes for romfs and cramfs for
                syzbot+2faac0423fdc9692822b@syzkaller.appspotmail.com]

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Message-Id: <20230724175145.201318-1-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/cramfs/inode.c |   8 +-
 fs/romfs/super.c  |  10 ++-
 fs/super.c        | 188 +++++++++++++++++++++++-----------------------
 3 files changed, 107 insertions(+), 99 deletions(-)

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 27c6597aa1be..bc99d0fb8ad4 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -485,12 +485,16 @@ static void cramfs_kill_sb(struct super_block *sb)
 {
 	struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
 
+	generic_shutdown_super(sb);
+
 	if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) {
 		if (sbi && sbi->mtd_point_size)
 			mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size);
-		kill_mtd_super(sb);
+		put_mtd_device(sb->s_mtd);
+		sb->s_mtd = NULL;
 	} else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
-		kill_block_super(sb);
+		sync_blockdev(sb->s_bdev);
+		blkdev_put(sb->s_bdev, sb->s_type);
 	}
 	kfree(sbi);
 }
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index c59b230d55b4..42d7a344472f 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -583,16 +583,18 @@ static int romfs_init_fs_context(struct fs_context *fc)
  */
 static void romfs_kill_sb(struct super_block *sb)
 {
+	generic_shutdown_super(sb);
+
 #ifdef CONFIG_ROMFS_ON_MTD
 	if (sb->s_mtd) {
-		kill_mtd_super(sb);
-		return;
+		put_mtd_device(sb->s_mtd);
+		sb->s_mtd = NULL;
 	}
 #endif
 #ifdef CONFIG_ROMFS_ON_BLOCK
 	if (sb->s_bdev) {
-		kill_block_super(sb);
-		return;
+		sync_blockdev(sb->s_bdev);
+		blkdev_put(sb->s_bdev, sb->s_type);
 	}
 #endif
 }
diff --git a/fs/super.c b/fs/super.c
index 7755cc2a3607..249558ecfd77 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1228,12 +1228,7 @@ static const struct blk_holder_ops fs_holder_ops = {
 
 static int set_bdev_super(struct super_block *s, void *data)
 {
-	s->s_bdev = data;
-	s->s_dev = s->s_bdev->bd_dev;
-	s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
-
-	if (bdev_stable_writes(s->s_bdev))
-		s->s_iflags |= SB_I_STABLE_WRITES;
+	s->s_dev = *(dev_t *)data;
 	return 0;
 }
 
@@ -1244,7 +1239,61 @@ static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
 
 static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
 {
-	return !(s->s_iflags & SB_I_RETIRED) && s->s_bdev == fc->sget_key;
+	return !(s->s_iflags & SB_I_RETIRED) &&
+		s->s_dev == *(dev_t *)fc->sget_key;
+}
+
+static int setup_bdev_super(struct super_block *sb, int sb_flags,
+		struct fs_context *fc)
+{
+	blk_mode_t mode = sb_open_mode(sb_flags);
+	struct block_device *bdev;
+
+	bdev = blkdev_get_by_dev(sb->s_dev, mode, sb->s_type, &fs_holder_ops);
+	if (IS_ERR(bdev)) {
+		if (fc)
+			errorf(fc, "%s: Can't open blockdev", fc->source);
+		return PTR_ERR(bdev);
+	}
+
+	/*
+	 * This really should be in blkdev_get_by_dev, but right now can't due
+	 * to legacy issues that require us to allow opening a block device node
+	 * writable from userspace even for a read-only block device.
+	 */
+	if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
+		blkdev_put(bdev, sb->s_type);
+		return -EACCES;
+	}
+
+	/*
+	 * Until SB_BORN flag is set, there can be no active superblock
+	 * references and thus no filesystem freezing. get_active_super() will
+	 * just loop waiting for SB_BORN so even freeze_bdev() cannot proceed.
+	 *
+	 * It is enough to check bdev was not frozen before we set s_bdev.
+	 */
+	mutex_lock(&bdev->bd_fsfreeze_mutex);
+	if (bdev->bd_fsfreeze_count > 0) {
+		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+		if (fc)
+			warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
+		blkdev_put(bdev, sb->s_type);
+		return -EBUSY;
+	}
+	spin_lock(&sb_lock);
+	sb->s_bdev = bdev;
+	sb->s_bdi = bdi_get(bdev->bd_disk->bdi);
+	if (bdev_stable_writes(bdev))
+		sb->s_iflags |= SB_I_STABLE_WRITES;
+	spin_unlock(&sb_lock);
+	mutex_unlock(&bdev->bd_fsfreeze_mutex);
+
+	snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
+	shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name,
+				sb->s_id);
+	sb_set_blocksize(sb, block_size(bdev));
+	return 0;
 }
 
 /**
@@ -1256,71 +1305,48 @@ int get_tree_bdev(struct fs_context *fc,
 		int (*fill_super)(struct super_block *,
 				  struct fs_context *))
 {
-	struct block_device *bdev;
 	struct super_block *s;
 	int error = 0;
+	dev_t dev;
 
 	if (!fc->source)
 		return invalf(fc, "No source specified");
 
-	bdev = blkdev_get_by_path(fc->source, sb_open_mode(fc->sb_flags),
-				  fc->fs_type, &fs_holder_ops);
-	if (IS_ERR(bdev)) {
-		errorf(fc, "%s: Can't open blockdev", fc->source);
-		return PTR_ERR(bdev);
-	}
-
-	/* Once the superblock is inserted into the list by sget_fc(), s_umount
-	 * will protect the lockfs code from trying to start a snapshot while
-	 * we are mounting
-	 */
-	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (bdev->bd_fsfreeze_count > 0) {
-		mutex_unlock(&bdev->bd_fsfreeze_mutex);
-		warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
-		blkdev_put(bdev, fc->fs_type);
-		return -EBUSY;
+	error = lookup_bdev(fc->source, &dev);
+	if (error) {
+		errorf(fc, "%s: Can't lookup blockdev", fc->source);
+		return error;
 	}
 
 	fc->sb_flags |= SB_NOSEC;
-	fc->sget_key = bdev;
+	fc->sget_key = &dev;
 	s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
-	mutex_unlock(&bdev->bd_fsfreeze_mutex);
-	if (IS_ERR(s)) {
-		blkdev_put(bdev, fc->fs_type);
+	if (IS_ERR(s))
 		return PTR_ERR(s);
-	}
 
 	if (s->s_root) {
 		/* Don't summarily change the RO/RW state. */
 		if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
-			warnf(fc, "%pg: Can't mount, would change RO state", bdev);
+			warnf(fc, "%pg: Can't mount, would change RO state", s->s_bdev);
 			deactivate_locked_super(s);
-			blkdev_put(bdev, fc->fs_type);
 			return -EBUSY;
 		}
-
+	} else {
 		/*
-		 * s_umount nests inside open_mutex during
-		 * __invalidate_device().  blkdev_put() acquires
-		 * open_mutex and can't be called under s_umount.  Drop
-		 * s_umount temporarily.  This is safe as we're
-		 * holding an active reference.
+		 * We drop s_umount here because we need to open the bdev and
+		 * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+		 * __invalidate_device()). It is safe because we have active sb
+		 * reference and SB_BORN is not set yet.
 		 */
 		up_write(&s->s_umount);
-		blkdev_put(bdev, fc->fs_type);
+		error = setup_bdev_super(s, fc->sb_flags, fc);
 		down_write(&s->s_umount);
-	} else {
-		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
-		shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
-					fc->fs_type->name, s->s_id);
-		sb_set_blocksize(s, block_size(bdev));
-		error = fill_super(s, fc);
+		if (!error)
+			error = fill_super(s, fc);
 		if (error) {
 			deactivate_locked_super(s);
 			return error;
 		}
-
 		s->s_flags |= SB_ACTIVE;
 	}
 
@@ -1332,78 +1358,52 @@ EXPORT_SYMBOL(get_tree_bdev);
 
 static int test_bdev_super(struct super_block *s, void *data)
 {
-	return !(s->s_iflags & SB_I_RETIRED) && (void *)s->s_bdev == data;
+	return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
 }
 
 struct dentry *mount_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int))
 {
-	struct block_device *bdev;
 	struct super_block *s;
-	int error = 0;
+	int error;
+	dev_t dev;
 
-	bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type,
-				  &fs_holder_ops);
-	if (IS_ERR(bdev))
-		return ERR_CAST(bdev);
+	error = lookup_bdev(dev_name, &dev);
+	if (error)
+		return ERR_PTR(error);
 
-	/*
-	 * once the super is inserted into the list by sget, s_umount
-	 * will protect the lockfs code from trying to start a snapshot
-	 * while we are mounting
-	 */
-	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (bdev->bd_fsfreeze_count > 0) {
-		mutex_unlock(&bdev->bd_fsfreeze_mutex);
-		error = -EBUSY;
-		goto error_bdev;
-	}
-	s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC,
-		 bdev);
-	mutex_unlock(&bdev->bd_fsfreeze_mutex);
+	flags |= SB_NOSEC;
+	s = sget(fs_type, test_bdev_super, set_bdev_super, flags, &dev);
 	if (IS_ERR(s))
-		goto error_s;
+		return ERR_CAST(s);
 
 	if (s->s_root) {
 		if ((flags ^ s->s_flags) & SB_RDONLY) {
 			deactivate_locked_super(s);
-			error = -EBUSY;
-			goto error_bdev;
+			return ERR_PTR(-EBUSY);
 		}
-
+	} else {
 		/*
-		 * s_umount nests inside open_mutex during
-		 * __invalidate_device().  blkdev_put() acquires
-		 * open_mutex and can't be called under s_umount.  Drop
-		 * s_umount temporarily.  This is safe as we're
-		 * holding an active reference.
+		 * We drop s_umount here because we need to open the bdev and
+		 * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+		 * __invalidate_device()). It is safe because we have active sb
+		 * reference and SB_BORN is not set yet.
 		 */
 		up_write(&s->s_umount);
-		blkdev_put(bdev, fs_type);
+		error = setup_bdev_super(s, flags, NULL);
 		down_write(&s->s_umount);
-	} else {
-		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
-		shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
-					fs_type->name, s->s_id);
-		sb_set_blocksize(s, block_size(bdev));
-		error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
+		if (!error)
+			error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
 		if (error) {
 			deactivate_locked_super(s);
-			goto error;
+			return ERR_PTR(error);
 		}
 
 		s->s_flags |= SB_ACTIVE;
 	}
 
 	return dget(s->s_root);
-
-error_s:
-	error = PTR_ERR(s);
-error_bdev:
-	blkdev_put(bdev, fs_type);
-error:
-	return ERR_PTR(error);
 }
 EXPORT_SYMBOL(mount_bdev);
 
@@ -1412,8 +1412,10 @@ void kill_block_super(struct super_block *sb)
 	struct block_device *bdev = sb->s_bdev;
 
 	generic_shutdown_super(sb);
-	sync_blockdev(bdev);
-	blkdev_put(bdev, sb->s_type);
+	if (bdev) {
+		sync_blockdev(bdev);
+		blkdev_put(bdev, sb->s_type);
+	}
 }
 
 EXPORT_SYMBOL(kill_block_super);

From cf6da236c27a73ab91b657232cd3841aab27c37a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Aug 2023 17:41:20 +0200
Subject: [PATCH 0956/2047] fs: export setup_bdev_super

We'll want to use setup_bdev_super instead of duplicating it in nilfs2.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Message-Id: <20230802154131.2221419-2-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/super.c                 | 3 ++-
 include/linux/fs_context.h | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/super.c b/fs/super.c
index 249558ecfd77..a366bc65886e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1243,7 +1243,7 @@ static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
 		s->s_dev == *(dev_t *)fc->sget_key;
 }
 
-static int setup_bdev_super(struct super_block *sb, int sb_flags,
+int setup_bdev_super(struct super_block *sb, int sb_flags,
 		struct fs_context *fc)
 {
 	blk_mode_t mode = sb_open_mode(sb_flags);
@@ -1295,6 +1295,7 @@ static int setup_bdev_super(struct super_block *sb, int sb_flags,
 	sb_set_blocksize(sb, block_size(bdev));
 	return 0;
 }
+EXPORT_SYMBOL_GPL(setup_bdev_super);
 
 /**
  * get_tree_bdev - Get a superblock based on a single block device
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index ff6341e09925..58ef8433a94b 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -158,6 +158,8 @@ extern int get_tree_keyed(struct fs_context *fc,
 					   struct fs_context *fc),
 			 void *key);
 
+int setup_bdev_super(struct super_block *sb, int sb_flags,
+		struct fs_context *fc);
 extern int get_tree_bdev(struct fs_context *fc,
 			       int (*fill_super)(struct super_block *sb,
 						 struct fs_context *fc));

From c1e012ea9e835f81ac6062e8aa9e72940a671b95 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Aug 2023 17:41:21 +0200
Subject: [PATCH 0957/2047] nilfs2: use setup_bdev_super to de-duplicate the
 mount code

Use the generic setup_bdev_super helper to open the main block device
and do various bits of superblock setup instead of duplicating the
logic.  This includes moving to the new scheme implemented in common
code that only opens the block device after the superblock has allocated.

It does not yet convert nilfs2 to the new mount API, but doing so will
become a bit simpler after this first step.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Message-Id: <20230802154131.2221419-3-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/nilfs2/super.c | 81 ++++++++++++++++++-----------------------------
 1 file changed, 30 insertions(+), 51 deletions(-)

diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0ef8c71bde8e..a5d1fa4e7552 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -35,6 +35,7 @@
 #include <linux/writeback.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
+#include <linux/fs_context.h>
 #include "nilfs.h"
 #include "export.h"
 #include "mdt.h"
@@ -1216,7 +1217,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 }
 
 struct nilfs_super_data {
-	struct block_device *bdev;
 	__u64 cno;
 	int flags;
 };
@@ -1283,64 +1283,49 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd)
 
 static int nilfs_set_bdev_super(struct super_block *s, void *data)
 {
-	s->s_bdev = data;
-	s->s_dev = s->s_bdev->bd_dev;
+	s->s_dev = *(dev_t *)data;
 	return 0;
 }
 
 static int nilfs_test_bdev_super(struct super_block *s, void *data)
 {
-	return (void *)s->s_bdev == data;
+	return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
 }
 
 static struct dentry *
 nilfs_mount(struct file_system_type *fs_type, int flags,
 	     const char *dev_name, void *data)
 {
-	struct nilfs_super_data sd;
+	struct nilfs_super_data sd = { .flags = flags };
 	struct super_block *s;
-	struct dentry *root_dentry;
-	int err, s_new = false;
+	dev_t dev;
+	int err;
 
-	sd.bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type,
-				     NULL);
-	if (IS_ERR(sd.bdev))
-		return ERR_CAST(sd.bdev);
+	if (nilfs_identify(data, &sd))
+		return ERR_PTR(-EINVAL);
 
-	sd.cno = 0;
-	sd.flags = flags;
-	if (nilfs_identify((char *)data, &sd)) {
-		err = -EINVAL;
-		goto failed;
-	}
+	err = lookup_bdev(dev_name, &dev);
+	if (err)
+		return ERR_PTR(err);
 
-	/*
-	 * once the super is inserted into the list by sget, s_umount
-	 * will protect the lockfs code from trying to start a snapshot
-	 * while we are mounting
-	 */
-	mutex_lock(&sd.bdev->bd_fsfreeze_mutex);
-	if (sd.bdev->bd_fsfreeze_count > 0) {
-		mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
-		err = -EBUSY;
-		goto failed;
-	}
 	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags,
-		 sd.bdev);
-	mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
-	if (IS_ERR(s)) {
-		err = PTR_ERR(s);
-		goto failed;
-	}
+		 &dev);
+	if (IS_ERR(s))
+		return ERR_CAST(s);
 
 	if (!s->s_root) {
-		s_new = true;
-
-		/* New superblock instance created */
-		snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev);
-		sb_set_blocksize(s, block_size(sd.bdev));
-
-		err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0);
+		/*
+		 * We drop s_umount here because we need to open the bdev and
+		 * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+		 * __invalidate_device()). It is safe because we have active sb
+		 * reference and SB_BORN is not set yet.
+		 */
+		up_write(&s->s_umount);
+		err = setup_bdev_super(s, flags, NULL);
+		down_write(&s->s_umount);
+		if (!err)
+			err = nilfs_fill_super(s, data,
+					       flags & SB_SILENT ? 1 : 0);
 		if (err)
 			goto failed_super;
 
@@ -1366,24 +1351,18 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
 	}
 
 	if (sd.cno) {
+		struct dentry *root_dentry;
+
 		err = nilfs_attach_snapshot(s, sd.cno, &root_dentry);
 		if (err)
 			goto failed_super;
-	} else {
-		root_dentry = dget(s->s_root);
+		return root_dentry;
 	}
 
-	if (!s_new)
-		blkdev_put(sd.bdev, fs_type);
-
-	return root_dentry;
+	return dget(s->s_root);
 
  failed_super:
 	deactivate_locked_super(s);
-
- failed:
-	if (!s_new)
-		blkdev_put(sd.bdev, fs_type);
 	return ERR_PTR(err);
 }
 

From 4b41828be268544286794c18200e83861de3554e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Aug 2023 17:41:24 +0200
Subject: [PATCH 0958/2047] ext4: make the IS_EXT2_SB/IS_EXT3_SB checks more
 robust

Check for sb->s_type which is the right place to look at the file system
type, not the holder, which is just an implementation detail in the VFS
helpers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Message-Id: <20230802154131.2221419-6-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ext4/super.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 17c3e792d07e..fa8888f0fed9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -141,7 +141,7 @@ static struct file_system_type ext2_fs_type = {
 };
 MODULE_ALIAS_FS("ext2");
 MODULE_ALIAS("ext2");
-#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
+#define IS_EXT2_SB(sb) ((sb)->s_type == &ext2_fs_type)
 #else
 #define IS_EXT2_SB(sb) (0)
 #endif
@@ -157,7 +157,7 @@ static struct file_system_type ext3_fs_type = {
 };
 MODULE_ALIAS_FS("ext3");
 MODULE_ALIAS("ext3");
-#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
+#define IS_EXT3_SB(sb) ((sb)->s_type == &ext3_fs_type)
 
 
 static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,

From 01948b09edc3fecf8486c57c2d2fb8b80886f3d0 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 31 Jul 2023 14:58:48 +0100
Subject: [PATCH 0959/2047] arm64/fpsimd: Only provide the length to cpufeature
 for xCR registers

For both SVE and SME we abuse the generic register field comparison
support in the cpufeature code as part of our detection of unsupported
variations in the vector lengths available to PEs, reporting the maximum
vector lengths via ZCR_EL1.LEN and SMCR_EL1.LEN.  Since these are
configuration registers rather than identification registers the
assumptions the cpufeature code makes about how unknown bitfields behave
are invalid, leading to warnings when SME features like FA64 are enabled
and we hotplug a CPU:

  CPU features: SANITY CHECK: Unexpected variation in SYS_SMCR_EL1. Boot CPU: 0x0000000000000f, CPU3: 0x0000008000000f
  CPU features: Unsupported CPU feature variation detected.

SVE has no controls other than the vector length so is not yet impacted
but the same issue will apply there if any are defined.

Since the only field we are interested in having the cpufeature code
handle is the length field and we use a custom read function to obtain
the value we can avoid these warnings by filtering out all other bits
when we return the register value, if we're doing that we don't need to
bother reading the register at all and can simply use the RDVL/RDSVL
value we were filling in instead.

Fixes: 2e0f2478ea37 ("arm64/sve: Probe SVE capabilities and usable vector lengths")
FixeS: b42990d3bf77 ("arm64/sme: Identify supported SME vector lengths at boot")
Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20230731-arm64-sme-fa64-hotplug-v2-1-7714c00dd902@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/fpsimd.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 89d54a5242d1..8e9a3451af4c 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1178,9 +1178,6 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
  */
 u64 read_zcr_features(void)
 {
-	u64 zcr;
-	unsigned int vq_max;
-
 	/*
 	 * Set the maximum possible VL, and write zeroes to all other
 	 * bits to see if they stick.
@@ -1188,12 +1185,8 @@ u64 read_zcr_features(void)
 	sve_kernel_enable(NULL);
 	write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
 
-	zcr = read_sysreg_s(SYS_ZCR_EL1);
-	zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
-	vq_max = sve_vq_from_vl(sve_get_vl());
-	zcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
-	return zcr;
+	/* Return LEN value that would be written to get the maximum VL */
+	return sve_vq_from_vl(sve_get_vl()) - 1;
 }
 
 void __init sve_setup(void)
@@ -1348,9 +1341,6 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
  */
 u64 read_smcr_features(void)
 {
-	u64 smcr;
-	unsigned int vq_max;
-
 	sme_kernel_enable(NULL);
 
 	/*
@@ -1359,12 +1349,8 @@ u64 read_smcr_features(void)
 	write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
 		       SYS_SMCR_EL1);
 
-	smcr = read_sysreg_s(SYS_SMCR_EL1);
-	smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */
-	vq_max = sve_vq_from_vl(sme_get_vl());
-	smcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
-	return smcr;
+	/* Return LEN value that would be written to get the maximum VL */
+	return sve_vq_from_vl(sme_get_vl()) - 1;
 }
 
 void __init sme_setup(void)

From 2daf18a7884dc03d5164ab9c7dc3f2ea70638469 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 8 Aug 2023 21:33:56 -0700
Subject: [PATCH 0960/2047] tmpfs,xattr: enable limited user extended
 attributes

Enable "user." extended attributes on tmpfs, limiting them by tracking
the space they occupy, and deducting that space from the limited ispace
(unless tmpfs mounted with nr_inodes=0 to leave that ispace unlimited).

tmpfs inodes and simple xattrs are both unswappable, and have to be in
lowmem on a 32-bit highmem kernel: so the ispace limit is appropriate
for xattrs, without any need for a further mount option.

Add simple_xattr_space() to give approximate but deterministic estimate
of the space taken up by each xattr: with simple_xattrs_free() outputting
the space freed if required (but kernfs and even some tmpfs usages do not
require that, so don't waste time on strlen'ing if not needed).

Security and trusted xattrs were already supported: for consistency and
simplicity, account them from the same pool; though there's a small risk
that a tmpfs with enough space before would now be considered too small.

When extended attributes are used, "df -i" does show more IUsed and less
IFree than can be explained by the inodes: document that (manpage later).

xfstests tests/generic which were not run on tmpfs before but now pass:
020 037 062 070 077 097 103 117 337 377 454 486 523 533 611 618 728
with no new failures.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Message-Id: <2e63b26e-df46-5baa-c7d6-f9a8dd3282c5@google.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/tmpfs.rst |  7 ++-
 fs/Kconfig                          |  4 +-
 fs/kernfs/dir.c                     |  2 +-
 fs/xattr.c                          | 28 ++++++++++-
 include/linux/xattr.h               |  3 +-
 mm/shmem.c                          | 78 +++++++++++++++++++++++++----
 6 files changed, 106 insertions(+), 16 deletions(-)

diff --git a/Documentation/filesystems/tmpfs.rst b/Documentation/filesystems/tmpfs.rst
index 67422ee10e03..56a26c843dbe 100644
--- a/Documentation/filesystems/tmpfs.rst
+++ b/Documentation/filesystems/tmpfs.rst
@@ -21,8 +21,8 @@ explained further below, some of which can be reconfigured dynamically on the
 fly using a remount ('mount -o remount ...') of the filesystem. A tmpfs
 filesystem can be resized but it cannot be resized to a size below its current
 usage. tmpfs also supports POSIX ACLs, and extended attributes for the
-trusted.* and security.* namespaces. ramfs does not use swap and you cannot
-modify any parameter for a ramfs filesystem. The size limit of a ramfs
+trusted.*, security.* and user.* namespaces. ramfs does not use swap and you
+cannot modify any parameter for a ramfs filesystem. The size limit of a ramfs
 filesystem is how much memory you have available, and so care must be taken if
 used so to not run out of memory.
 
@@ -97,6 +97,9 @@ mount with such options, since it allows any user with write access to
 use up all the memory on the machine; but enhances the scalability of
 that instance in a system with many CPUs making intensive use of it.
 
+If nr_inodes is not 0, that limited space for inodes is also used up by
+extended attributes: "df -i"'s IUsed and IUse% increase, IFree decreases.
+
 tmpfs blocks may be swapped out, when there is a shortage of memory.
 tmpfs has a mount option to disable its use of swap:
 
diff --git a/fs/Kconfig b/fs/Kconfig
index 8218a71933f9..7da21f563192 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -205,8 +205,8 @@ config TMPFS_XATTR
 	  Extended attributes are name:value pairs associated with inodes by
 	  the kernel or by users (see the attr(5) manual page for details).
 
-	  Currently this enables support for the trusted.* and
-	  security.* namespaces.
+	  This enables support for the trusted.*, security.* and user.*
+	  namespaces.
 
 	  You need this for POSIX ACL support on tmpfs.
 
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 5a1a4af9d3d2..660995856a04 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -556,7 +556,7 @@ void kernfs_put(struct kernfs_node *kn)
 	kfree_const(kn->name);
 
 	if (kn->iattr) {
-		simple_xattrs_free(&kn->iattr->xattrs);
+		simple_xattrs_free(&kn->iattr->xattrs, NULL);
 		kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
 	}
 	spin_lock(&kernfs_idr_lock);
diff --git a/fs/xattr.c b/fs/xattr.c
index ba37a8f5cfd1..2d607542281b 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1039,6 +1039,26 @@ const char *xattr_full_name(const struct xattr_handler *handler,
 }
 EXPORT_SYMBOL(xattr_full_name);
 
+/**
+ * simple_xattr_space - estimate the memory used by a simple xattr
+ * @name: the full name of the xattr
+ * @size: the size of its value
+ *
+ * This takes no account of how much larger the two slab objects actually are:
+ * that would depend on the slab implementation, when what is required is a
+ * deterministic number, which grows with name length and size and quantity.
+ *
+ * Return: The approximate number of bytes of memory used by such an xattr.
+ */
+size_t simple_xattr_space(const char *name, size_t size)
+{
+	/*
+	 * Use "40" instead of sizeof(struct simple_xattr), to return the
+	 * same result on 32-bit and 64-bit, and even if simple_xattr grows.
+	 */
+	return 40 + size + strlen(name);
+}
+
 /**
  * simple_xattr_free - free an xattr object
  * @xattr: the xattr object
@@ -1363,14 +1383,17 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
 /**
  * simple_xattrs_free - free xattrs
  * @xattrs: xattr header whose xattrs to destroy
+ * @freed_space: approximate number of bytes of memory freed from @xattrs
  *
  * Destroy all xattrs in @xattr. When this is called no one can hold a
  * reference to any of the xattrs anymore.
  */
-void simple_xattrs_free(struct simple_xattrs *xattrs)
+void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
 {
 	struct rb_node *rbp;
 
+	if (freed_space)
+		*freed_space = 0;
 	rbp = rb_first(&xattrs->rb_root);
 	while (rbp) {
 		struct simple_xattr *xattr;
@@ -1379,6 +1402,9 @@ void simple_xattrs_free(struct simple_xattrs *xattrs)
 		rbp_next = rb_next(rbp);
 		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
 		rb_erase(&xattr->rb_node, &xattrs->rb_root);
+		if (freed_space)
+			*freed_space += simple_xattr_space(xattr->name,
+							   xattr->size);
 		simple_xattr_free(xattr);
 		rbp = rbp_next;
 	}
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index e37fe667ae04..d20051865800 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -114,7 +114,8 @@ struct simple_xattr {
 };
 
 void simple_xattrs_init(struct simple_xattrs *xattrs);
-void simple_xattrs_free(struct simple_xattrs *xattrs);
+void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
+size_t simple_xattr_space(const char *name, size_t size);
 struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
 void simple_xattr_free(struct simple_xattr *xattr);
 int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
diff --git a/mm/shmem.c b/mm/shmem.c
index 8c8f0b1b700c..ca43fb256b8e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -393,12 +393,12 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
 	return 0;
 }
 
-static void shmem_free_inode(struct super_block *sb)
+static void shmem_free_inode(struct super_block *sb, size_t freed_ispace)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 	if (sbinfo->max_inodes) {
 		raw_spin_lock(&sbinfo->stat_lock);
-		sbinfo->free_ispace += BOGO_INODE_SIZE;
+		sbinfo->free_ispace += BOGO_INODE_SIZE + freed_ispace;
 		raw_spin_unlock(&sbinfo->stat_lock);
 	}
 }
@@ -1232,6 +1232,7 @@ static void shmem_evict_inode(struct inode *inode)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	size_t freed = 0;
 
 	if (shmem_mapping(inode->i_mapping)) {
 		shmem_unacct_size(info->flags, inode->i_size);
@@ -1258,9 +1259,9 @@ static void shmem_evict_inode(struct inode *inode)
 		}
 	}
 
-	simple_xattrs_free(&info->xattrs);
+	simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+	shmem_free_inode(inode->i_sb, freed);
 	WARN_ON(inode->i_blocks);
-	shmem_free_inode(inode->i_sb);
 	clear_inode(inode);
 #ifdef CONFIG_TMPFS_QUOTA
 	dquot_free_inode(inode);
@@ -2440,7 +2441,7 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
 	inode = new_inode(sb);
 
 	if (!inode) {
-		shmem_free_inode(sb);
+		shmem_free_inode(sb, 0);
 		return ERR_PTR(-ENOSPC);
 	}
 
@@ -3284,7 +3285,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
 	ret = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
 	if (ret) {
 		if (inode->i_nlink)
-			shmem_free_inode(inode->i_sb);
+			shmem_free_inode(inode->i_sb, 0);
 		goto out;
 	}
 
@@ -3304,7 +3305,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
 	struct inode *inode = d_inode(dentry);
 
 	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
-		shmem_free_inode(inode->i_sb);
+		shmem_free_inode(inode->i_sb, 0);
 
 	simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
 
@@ -3557,21 +3558,40 @@ static int shmem_initxattrs(struct inode *inode,
 			    void *fs_info)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	const struct xattr *xattr;
 	struct simple_xattr *new_xattr;
+	size_t ispace = 0;
 	size_t len;
 
+	if (sbinfo->max_inodes) {
+		for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+			ispace += simple_xattr_space(xattr->name,
+				xattr->value_len + XATTR_SECURITY_PREFIX_LEN);
+		}
+		if (ispace) {
+			raw_spin_lock(&sbinfo->stat_lock);
+			if (sbinfo->free_ispace < ispace)
+				ispace = 0;
+			else
+				sbinfo->free_ispace -= ispace;
+			raw_spin_unlock(&sbinfo->stat_lock);
+			if (!ispace)
+				return -ENOSPC;
+		}
+	}
+
 	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
 		new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
 		if (!new_xattr)
-			return -ENOMEM;
+			break;
 
 		len = strlen(xattr->name) + 1;
 		new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
 					  GFP_KERNEL);
 		if (!new_xattr->name) {
 			kvfree(new_xattr);
-			return -ENOMEM;
+			break;
 		}
 
 		memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
@@ -3582,6 +3602,16 @@ static int shmem_initxattrs(struct inode *inode,
 		simple_xattr_add(&info->xattrs, new_xattr);
 	}
 
+	if (xattr->name != NULL) {
+		if (ispace) {
+			raw_spin_lock(&sbinfo->stat_lock);
+			sbinfo->free_ispace += ispace;
+			raw_spin_unlock(&sbinfo->stat_lock);
+		}
+		simple_xattrs_free(&info->xattrs, NULL);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
@@ -3602,16 +3632,39 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
 				   size_t size, int flags)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	struct simple_xattr *old_xattr;
+	size_t ispace = 0;
 
 	name = xattr_full_name(handler, name);
+	if (value && sbinfo->max_inodes) {
+		ispace = simple_xattr_space(name, size);
+		raw_spin_lock(&sbinfo->stat_lock);
+		if (sbinfo->free_ispace < ispace)
+			ispace = 0;
+		else
+			sbinfo->free_ispace -= ispace;
+		raw_spin_unlock(&sbinfo->stat_lock);
+		if (!ispace)
+			return -ENOSPC;
+	}
+
 	old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
 	if (!IS_ERR(old_xattr)) {
+		ispace = 0;
+		if (old_xattr && sbinfo->max_inodes)
+			ispace = simple_xattr_space(old_xattr->name,
+						    old_xattr->size);
 		simple_xattr_free(old_xattr);
 		old_xattr = NULL;
 		inode->i_ctime = current_time(inode);
 		inode_inc_iversion(inode);
 	}
+	if (ispace) {
+		raw_spin_lock(&sbinfo->stat_lock);
+		sbinfo->free_ispace += ispace;
+		raw_spin_unlock(&sbinfo->stat_lock);
+	}
 	return PTR_ERR(old_xattr);
 }
 
@@ -3627,9 +3680,16 @@ static const struct xattr_handler shmem_trusted_xattr_handler = {
 	.set = shmem_xattr_handler_set,
 };
 
+static const struct xattr_handler shmem_user_xattr_handler = {
+	.prefix = XATTR_USER_PREFIX,
+	.get = shmem_xattr_handler_get,
+	.set = shmem_xattr_handler_set,
+};
+
 static const struct xattr_handler *shmem_xattr_handlers[] = {
 	&shmem_security_xattr_handler,
 	&shmem_trusted_xattr_handler,
+	&shmem_user_xattr_handler,
 	NULL
 };
 

From b4120b69244470f6d572281993b2e39657ac31cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guido=20G=C3=BCnther?= <agx@sigxcpu.org>
Date: Wed, 9 Aug 2023 20:50:11 +0200
Subject: [PATCH 0961/2047] dt-bindings: mmc: Fix reference to pwr-seq-simple
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's a YAML file nowadays.

Signed-off-by: Guido Günther <agx@sigxcpu.org>
Link: https://lore.kernel.org/r/d7a0be6a2688a9829077cc21ca4a5bf9528f9eb1.1691606520.git.agx@sigxcpu.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/mmc-controller.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
index 86c73fd825fd..58ae298cd2fc 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
+++ b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
@@ -269,7 +269,7 @@ properties:
   post-power-on-delay-ms:
     description:
       It was invented for MMC pwrseq-simple which could be referred to
-      mmc-pwrseq-simple.txt. But now it\'s reused as a tunable delay
+      mmc-pwrseq-simple.yaml. But now it\'s reused as a tunable delay
       waiting for I/O signalling and card power supply to be stable,
       regardless of whether pwrseq-simple is used. Default to 10ms if
       no available.

From c64016609b6f66b753b5f37929a191477fa584c0 Mon Sep 17 00:00:00 2001
From: Avadhut Naik <Avadhut.Naik@amd.com>
Date: Tue, 8 Aug 2023 22:52:42 -0500
Subject: [PATCH 0962/2047] x86/amd_nb: Add PCI IDs for AMD Family 1Ah-based
 models

Add new PCI Device IDs required to support AMD's new Family 1Ah-based
models 00h-1Fh, 20h and 40h-4Fh.

  [ bp: Zap a useless sentence. ]

Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Avadhut Naik <Avadhut.Naik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230809035244.2722455-2-avadhut.naik@amd.com
---
 arch/x86/kernel/amd_nb.c | 8 ++++++++
 include/linux/pci_ids.h  | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 035a3db5330b..356de955e78d 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -24,6 +24,8 @@
 #define PCI_DEVICE_ID_AMD_19H_M40H_ROOT		0x14b5
 #define PCI_DEVICE_ID_AMD_19H_M60H_ROOT		0x14d8
 #define PCI_DEVICE_ID_AMD_19H_M70H_ROOT		0x14e8
+#define PCI_DEVICE_ID_AMD_1AH_M00H_ROOT		0x153a
+#define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT		0x1507
 #define PCI_DEVICE_ID_AMD_MI200_ROOT		0x14bb
 
 #define PCI_DEVICE_ID_AMD_17H_DF_F4		0x1464
@@ -39,6 +41,7 @@
 #define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4	0x14e4
 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4	0x14f4
 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4	0x12fc
+#define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4	0x12c4
 #define PCI_DEVICE_ID_AMD_MI200_DF_F4		0x14d4
 
 /* Protect the PCI config register pairs used for SMN. */
@@ -56,6 +59,8 @@ static const struct pci_device_id amd_root_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_ROOT) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) },
 	{}
 };
@@ -85,6 +90,8 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F3) },
 	{}
 };
@@ -106,6 +113,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) },
 	{}
 };
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2dc75df1437f..8f9a459e1671 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -576,6 +576,8 @@
 #define PCI_DEVICE_ID_AMD_19H_M60H_DF_F3 0x14e3
 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F3 0x14f3
 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F3 0x12fb
+#define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3
+#define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb
 #define PCI_DEVICE_ID_AMD_MI200_DF_F3	0x14d3
 #define PCI_DEVICE_ID_AMD_CNB17H_F3	0x1703
 #define PCI_DEVICE_ID_AMD_LANCE		0x2000

From 3cd9da416d5b625d52053c816ee91daf4e97007c Mon Sep 17 00:00:00 2001
From: Avadhut Naik <Avadhut.Naik@amd.com>
Date: Tue, 8 Aug 2023 22:52:43 -0500
Subject: [PATCH 0963/2047] hwmon: (k10temp) Add thermal support for AMD Family
 1Ah-based models

Add thermal info support for AMD Family 1Ah-based models. Support is
provided on a per-socket granularity.

Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Avadhut Naik <Avadhut.Naik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20230809035244.2722455-3-avadhut.naik@amd.com
---
 drivers/hwmon/k10temp.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index a267b11731a8..bae0becfa24b 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -65,7 +65,7 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
 #define F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET	0xd8200c64
 #define F15H_M60H_REPORTED_TEMP_CTRL_OFFSET	0xd8200ca4
 
-/* Common for Zen CPU families (Family 17h and 18h and 19h) */
+/* Common for Zen CPU families (Family 17h and 18h and 19h and 1Ah) */
 #define ZEN_REPORTED_TEMP_CTRL_BASE		0x00059800
 
 #define ZEN_CCD_TEMP(offset, x)			(ZEN_REPORTED_TEMP_CTRL_BASE + \
@@ -475,6 +475,10 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 			k10temp_get_ccd_support(pdev, data, 12);
 			break;
 		}
+	} else if (boot_cpu_data.x86 == 0x1a) {
+		data->temp_adjust_mask = ZEN_CUR_TEMP_RANGE_SEL_MASK;
+		data->read_tempreg = read_tempreg_nb_zen;
+		data->is_zen = true;
 	} else {
 		data->read_htcreg = read_htcreg_pci;
 		data->read_tempreg = read_tempreg_pci;
@@ -521,6 +525,8 @@ static const struct pci_device_id k10temp_id_table[] = {
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) },
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3) },
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3) },
 	{ PCI_VDEVICE(HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
 	{}
 };

From c4d07c371283cb0453c8ce187551e4d064cc407e Mon Sep 17 00:00:00 2001
From: Avadhut Naik <Avadhut.Naik@amd.com>
Date: Tue, 8 Aug 2023 22:52:44 -0500
Subject: [PATCH 0964/2047] EDAC/amd64: Add support for AMD family 1Ah models
 00h-1Fh and 40h-4Fh

Add support for family 1Ah-based models 00h-1Fh and 40h-4Fh.

  [ bp: Simplify. ]

Signed-off-by: Avadhut Naik <Avadhut.Naik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230809035244.2722455-4-avadhut.naik@amd.com
---
 drivers/edac/amd64_edac.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 597dae7692b1..9b6642d00871 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -4150,6 +4150,20 @@ static int per_family_init(struct amd64_pvt *pvt)
 		}
 		break;
 
+	case 0x1A:
+		switch (pvt->model) {
+		case 0x00 ... 0x1f:
+			pvt->ctl_name           = "F1Ah";
+			pvt->max_mcs            = 12;
+			pvt->flags.zn_regs_v2   = 1;
+			break;
+		case 0x40 ... 0x4f:
+			pvt->ctl_name           = "F1Ah_M40h";
+			pvt->flags.zn_regs_v2   = 1;
+			break;
+		}
+		break;
+
 	default:
 		amd64_err("Unsupported family!\n");
 		return -ENODEV;
@@ -4344,6 +4358,7 @@ static const struct x86_cpu_id amd64_cpuids[] = {
 	X86_MATCH_VENDOR_FAM(AMD,	0x17, NULL),
 	X86_MATCH_VENDOR_FAM(HYGON,	0x18, NULL),
 	X86_MATCH_VENDOR_FAM(AMD,	0x19, NULL),
+	X86_MATCH_VENDOR_FAM(AMD,	0x1A, NULL),
 	{ }
 };
 MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);

From 5693d077595de721f9ddbf9d37f40e5409707dfe Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Wed, 9 Aug 2023 13:31:08 +0200
Subject: [PATCH 0965/2047] PM / devfreq: Fix leak in devfreq_dev_release()

srcu_init_notifier_head() allocates resources that need to be released
with a srcu_cleanup_notifier_head() call.

Reported by kmemleak.

Fixes: 0fe3a66410a3 ("PM / devfreq: Add new DEVFREQ_TRANSITION_NOTIFIER notifier")
Cc: <stable@vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Dhruva Gole <d-gole@ti.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/devfreq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index db50c93c0ac3..474d81831ad3 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -764,6 +764,7 @@ static void devfreq_dev_release(struct device *dev)
 		dev_pm_opp_put_opp_table(devfreq->opp_table);
 
 	mutex_destroy(&devfreq->lock);
+	srcu_cleanup_notifier_head(&devfreq->transition_notifier_list);
 	kfree(devfreq);
 }
 

From 55975401fdf86ffe4736a557ae9d6f3d81ee5ba6 Mon Sep 17 00:00:00 2001
From: Li Zetao <lizetao1@huawei.com>
Date: Thu, 10 Aug 2023 19:48:58 +0800
Subject: [PATCH 0966/2047] regulator: qcom_rpm-regulator: Use devm_kmemdup to
 replace devm_kmalloc + memcpy

Use the helper function devm_kmemdup() rather than duplicating its
implementation, which helps to enhance code readability.

Signed-off-by: Li Zetao <lizetao1@huawei.com>
Link: https://lore.kernel.org/r/20230810114858.2103928-1-lizetao1@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/qcom_rpm-regulator.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/regulator/qcom_rpm-regulator.c b/drivers/regulator/qcom_rpm-regulator.c
index f95bc9208c13..9366488f0383 100644
--- a/drivers/regulator/qcom_rpm-regulator.c
+++ b/drivers/regulator/qcom_rpm-regulator.c
@@ -956,11 +956,10 @@ static int rpm_reg_probe(struct platform_device *pdev)
 	}
 
 	for (reg = match->data; reg->name; reg++) {
-		vreg = devm_kmalloc(&pdev->dev, sizeof(*vreg), GFP_KERNEL);
+		vreg = devm_kmemdup(&pdev->dev, reg->template, sizeof(*vreg), GFP_KERNEL);
 		if (!vreg)
 			return -ENOMEM;
 
-		memcpy(vreg, reg->template, sizeof(*vreg));
 		mutex_init(&vreg->lock);
 
 		vreg->dev = &pdev->dev;

From fddc9bb6e277b99b0410410794d156e963aa8f0b Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 13:19:12 +0200
Subject: [PATCH 0967/2047] regulator: ltc3589: Fix Wvoid-pointer-to-enum-cast
 warning

'variant' is an enum, thus cast of pointer on 64-bit compile test with
W=1 causes:

  ltc3589.c:394:22: error: cast to smaller integer type 'enum ltc3589_variant' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810111914.204847-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/ltc3589.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/regulator/ltc3589.c b/drivers/regulator/ltc3589.c
index cf931b8c36dc..d892c2a5df7b 100644
--- a/drivers/regulator/ltc3589.c
+++ b/drivers/regulator/ltc3589.c
@@ -391,8 +391,7 @@ static int ltc3589_probe(struct i2c_client *client)
 
 	i2c_set_clientdata(client, ltc3589);
 	if (client->dev.of_node)
-		ltc3589->variant = (enum ltc3589_variant)
-			of_device_get_match_data(&client->dev);
+		ltc3589->variant = (uintptr_t)of_device_get_match_data(&client->dev);
 	else
 		ltc3589->variant = id->driver_data;
 	ltc3589->dev = dev;

From b29f42c6629bb3bd3d479592d40a7e4c73461a01 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 13:19:13 +0200
Subject: [PATCH 0968/2047] regulator: max77857: Fix Wvoid-pointer-to-enum-cast
 warning

'id' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  max77857-regulator.c:56:24: error: cast to smaller integer type 'enum max77857_id' from 'void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810111914.204847-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max77857-regulator.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/regulator/max77857-regulator.c b/drivers/regulator/max77857-regulator.c
index 44a510547318..145ad0281857 100644
--- a/drivers/regulator/max77857-regulator.c
+++ b/drivers/regulator/max77857-regulator.c
@@ -53,7 +53,7 @@ enum max77857_id {
 
 static bool max77857_volatile_reg(struct device *dev, unsigned int reg)
 {
-	enum max77857_id id = (enum max77857_id)dev_get_drvdata(dev);
+	enum max77857_id id = (uintptr_t)dev_get_drvdata(dev);
 
 	switch (id) {
 	case ID_MAX77831:
@@ -91,7 +91,7 @@ static int max77857_get_status(struct regulator_dev *rdev)
 
 static unsigned int max77857_get_mode(struct regulator_dev *rdev)
 {
-	enum max77857_id id = (enum max77857_id)rdev_get_drvdata(rdev);
+	enum max77857_id id = (uintptr_t)rdev_get_drvdata(rdev);
 	unsigned int regval;
 	int ret;
 
@@ -125,7 +125,7 @@ static unsigned int max77857_get_mode(struct regulator_dev *rdev)
 
 static int max77857_set_mode(struct regulator_dev *rdev, unsigned int mode)
 {
-	enum max77857_id id = (enum max77857_id)rdev_get_drvdata(rdev);
+	enum max77857_id id = (uintptr_t)rdev_get_drvdata(rdev);
 	unsigned int reg, val;
 
 	switch (id) {

From c418920567ae6101826cc05cda042f71435830d0 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 13:19:14 +0200
Subject: [PATCH 0969/2047] regulator: lp872x: Fix Wvoid-pointer-to-enum-cast
 warning

'id' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  lp872x.c:867:5: error: cast to smaller integer type 'enum lp872x_regulator_id' from 'void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810111914.204847-3-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/lp872x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/lp872x.c b/drivers/regulator/lp872x.c
index 63aa227b1813..942f37082cb1 100644
--- a/drivers/regulator/lp872x.c
+++ b/drivers/regulator/lp872x.c
@@ -864,7 +864,7 @@ static struct lp872x_platform_data
 
 	for (i = 0; i < num_matches; i++) {
 		pdata->regulator_data[i].id =
-				(enum lp872x_regulator_id)match[i].driver_data;
+				(uintptr_t)match[i].driver_data;
 		pdata->regulator_data[i].init_data = match[i].init_data;
 	}
 out:

From eca3f0a8ff86b8f21061033043d243e4a466da05 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 10 Aug 2023 20:49:08 +0200
Subject: [PATCH 0970/2047] ACPI: thermal: Drop redundant local variable from
 acpi_thermal_resume()

Commit dabc621a3110 ("ACPI: thermal: Drop enabled flag from struct
acpi_thermal_active") left behind a variable that is only assigned to
and never read, so drop it now.

Fixes: dabc621a3110 ("ACPI: thermal: Drop enabled flag from struct acpi_thermal_active")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/thermal.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index a329b0dce78f..2fc02e90c400 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -1014,7 +1014,7 @@ static int acpi_thermal_suspend(struct device *dev)
 static int acpi_thermal_resume(struct device *dev)
 {
 	struct acpi_thermal *tz;
-	int i, j, power_state, result;
+	int i, j, power_state;
 
 	if (!dev)
 		return -EINVAL;
@@ -1028,9 +1028,8 @@ static int acpi_thermal_resume(struct device *dev)
 			break;
 
 		for (j = 0; j < tz->trips.active[i].devices.count; j++) {
-			result = acpi_bus_update_power(
-					tz->trips.active[i].devices.handles[j],
-					&power_state);
+			acpi_bus_update_power(tz->trips.active[i].devices.handles[j],
+					      &power_state);
 		}
 	}
 

From bc840ea5f9a98cfab61f6f8f70a73dac43db27d0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 7 Aug 2023 20:01:24 +0200
Subject: [PATCH 0971/2047] thermal: core: Do not handle trip points with
 invalid temperature

Trip points with temperature set to THERMAL_TEMP_INVALID are as good as
disabled, so make handle_thermal_trip() ignore them.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/thermal_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 842f678c1c3e..b38abe8a080a 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -348,7 +348,8 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip_id)
 	struct thermal_trip trip;
 
 	/* Ignore disabled trip points */
-	if (test_bit(trip_id, &tz->trips_disabled))
+	if (test_bit(trip_id, &tz->trips_disabled) ||
+	    trip.temperature == THERMAL_TEMP_INVALID)
 		return;
 
 	__thermal_zone_get_trip(tz, trip_id, &trip);

From b429b6ffbdc976d22bdf0b5ba9183c8466209cba Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 9 Aug 2023 22:27:18 +0200
Subject: [PATCH 0972/2047] thermal: intel: intel_soc_dts_iosf: Always use 2
 trips

Both the existing callers of intel_soc_dts_iosf_init() pass 2 as the trip
count argument, so it can be replaced with SOC_MAX_DTS_TRIPS everywhere in
the code and the trip_count argument of that function can be dropped.

This also allows the trip_count field to be dropped from struct
intel_soc_dts_sensor_entry, as it is always equal to 2, and some
related code can be simplified.

Make changes accordingly.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 .../processor_thermal_device_pci_legacy.c     |  2 +-
 drivers/thermal/intel/intel_soc_dts_iosf.c    | 26 ++++++-------------
 drivers/thermal/intel/intel_soc_dts_iosf.h    |  9 ++++---
 drivers/thermal/intel/intel_soc_dts_thermal.c |  2 +-
 4 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
index 09e032f822f3..46ec0e243692 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
@@ -59,7 +59,7 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev,
 		 * ACPI/MSR. So we don't want to fail for auxiliary DTSs.
 		 */
 		proc_priv->soc_dts = intel_soc_dts_iosf_init(
-					INTEL_SOC_DTS_INTERRUPT_MSI, 2, 0);
+					INTEL_SOC_DTS_INTERRUPT_MSI, 0);
 
 		if (!IS_ERR(proc_priv->soc_dts) && pdev->irq) {
 			ret = pci_enable_msi(pdev);
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index db97499f4f0a..426d5bb5a373 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -37,9 +37,6 @@
 /* DTS encoding for TJ MAX temperature */
 #define SOC_DTS_TJMAX_ENCODING		0x7F
 
-/* Only 2 out of 4 is allowed for OSPM */
-#define SOC_MAX_DTS_TRIPS		2
-
 /* Mask for two trips in status bits */
 #define SOC_DTS_TRIP_MASK		0x03
 
@@ -253,12 +250,10 @@ static void remove_dts_thermal_zone(struct intel_soc_dts_sensor_entry *dts)
 }
 
 static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
-				bool notification_support, int trip_cnt,
-				int read_only_trip_cnt)
+				bool notification_support, int read_only_trip_cnt)
 {
 	char name[10];
 	unsigned long trip;
-	int trip_count = 0;
 	int trip_mask = 0;
 	int writable_trip_cnt = 0;
 	unsigned long ptps;
@@ -274,8 +269,7 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
 
 	dts->id = id;
 	if (notification_support) {
-		trip_count = min(SOC_MAX_DTS_TRIPS, trip_cnt);
-		writable_trip_cnt = trip_count - read_only_trip_cnt;
+		writable_trip_cnt = SOC_MAX_DTS_TRIPS - read_only_trip_cnt;
 		trip_mask = GENMASK(writable_trip_cnt - 1, 0);
 	}
 
@@ -290,10 +284,9 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
 			trip_mask &= ~BIT(i / 8);
 	}
 	dts->trip_mask = trip_mask;
-	dts->trip_count = trip_count;
 	snprintf(name, sizeof(name), "soc_dts%d", id);
 	dts->tzone = thermal_zone_device_register(name,
-						  trip_count,
+						  SOC_MAX_DTS_TRIPS,
 						  trip_mask,
 						  dts, &tzone_ops,
 						  NULL, 0, 0);
@@ -324,11 +317,10 @@ int intel_soc_dts_iosf_add_read_only_critical_trip(
 	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
 		struct intel_soc_dts_sensor_entry *entry = &sensors->soc_dts[i];
 		int temp = sensors->tj_max - critical_offset;
-		unsigned long count = entry->trip_count;
 		unsigned long mask = entry->trip_mask;
 
-		j = find_first_zero_bit(&mask, count);
-		if (j < count)
+		j = find_first_zero_bit(&mask, SOC_MAX_DTS_TRIPS);
+		if (j < SOC_MAX_DTS_TRIPS)
 			return update_trip_temp(entry, j, temp, THERMAL_TRIP_CRITICAL);
 	}
 
@@ -372,8 +364,7 @@ void intel_soc_dts_iosf_interrupt_handler(struct intel_soc_dts_sensors *sensors)
 EXPORT_SYMBOL_GPL(intel_soc_dts_iosf_interrupt_handler);
 
 struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
-	enum intel_soc_dts_interrupt_type intr_type, int trip_count,
-	int read_only_trip_count)
+	enum intel_soc_dts_interrupt_type intr_type, int read_only_trip_count)
 {
 	struct intel_soc_dts_sensors *sensors;
 	bool notification;
@@ -384,7 +375,7 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
 	if (!iosf_mbi_available())
 		return ERR_PTR(-ENODEV);
 
-	if (!trip_count || read_only_trip_count > trip_count)
+	if (read_only_trip_count > SOC_MAX_DTS_TRIPS)
 		return ERR_PTR(-EINVAL);
 
 	tj_max = intel_tcc_get_tjmax(-1);
@@ -406,8 +397,7 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
 	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
 		sensors->soc_dts[i].sensors = sensors;
 		ret = add_dts_thermal_zone(i, &sensors->soc_dts[i],
-					   notification, trip_count,
-					   read_only_trip_count);
+					   notification, read_only_trip_count);
 		if (ret)
 			goto err_free;
 	}
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.h b/drivers/thermal/intel/intel_soc_dts_iosf.h
index c54945748200..d10902e12592 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.h
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.h
@@ -12,6 +12,9 @@
 /* DTS0 and DTS 1 */
 #define SOC_MAX_DTS_SENSORS	2
 
+/* Only 2 out of 4 is allowed for OSPM */
+#define SOC_MAX_DTS_TRIPS	2
+
 enum intel_soc_dts_interrupt_type {
 	INTEL_SOC_DTS_INTERRUPT_NONE,
 	INTEL_SOC_DTS_INTERRUPT_APIC,
@@ -26,8 +29,7 @@ struct intel_soc_dts_sensor_entry {
 	int id;
 	u32 store_status;
 	u32 trip_mask;
-	u32 trip_count;
-	enum thermal_trip_type trip_types[2];
+	enum thermal_trip_type trip_types[SOC_MAX_DTS_TRIPS];
 	struct thermal_zone_device *tzone;
 	struct intel_soc_dts_sensors *sensors;
 };
@@ -41,8 +43,7 @@ struct intel_soc_dts_sensors {
 };
 
 struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
-	enum intel_soc_dts_interrupt_type intr_type, int trip_count,
-	int read_only_trip_count);
+	enum intel_soc_dts_interrupt_type intr_type, int read_only_trip_count);
 void intel_soc_dts_iosf_exit(struct intel_soc_dts_sensors *sensors);
 void intel_soc_dts_iosf_interrupt_handler(
 				struct intel_soc_dts_sensors *sensors);
diff --git a/drivers/thermal/intel/intel_soc_dts_thermal.c b/drivers/thermal/intel/intel_soc_dts_thermal.c
index 92e5c19d03f6..8e21e5213dff 100644
--- a/drivers/thermal/intel/intel_soc_dts_thermal.c
+++ b/drivers/thermal/intel/intel_soc_dts_thermal.c
@@ -51,7 +51,7 @@ static int __init intel_soc_thermal_init(void)
 		return -ENODEV;
 
 	/* Create a zone with 2 trips with marked as read only */
-	soc_dts = intel_soc_dts_iosf_init(INTEL_SOC_DTS_INTERRUPT_APIC, 2, 1);
+	soc_dts = intel_soc_dts_iosf_init(INTEL_SOC_DTS_INTERRUPT_APIC, 1);
 	if (IS_ERR(soc_dts)) {
 		err = PTR_ERR(soc_dts);
 		return err;

From a39524aca314a93100cafa49f0a0cf0508a00d1e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 9 Aug 2023 22:28:12 +0200
Subject: [PATCH 0973/2047] thermal: intel: intel_soc_dts_iosf: Drop redundant
 symbol definition

SOC_MAX_DTS_SENSORS is already defined in intel_soc_dts_iosf.h which is
included in intel_soc_dts_iosf.c, so it does not need to be defined in
the latter again.

Drop the redundant definition of that symbol from intel_soc_dts_iosf.c.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/thermal/intel/intel_soc_dts_iosf.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index 426d5bb5a373..60bdca997c21 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -40,9 +40,6 @@
 /* Mask for two trips in status bits */
 #define SOC_DTS_TRIP_MASK		0x03
 
-/* DTS0 and DTS 1 */
-#define SOC_MAX_DTS_SENSORS		2
-
 static int sys_get_trip_temp(struct thermal_zone_device *tzd, int trip,
 			     int *temp)
 {

From fbc5d382407eb5d6a2eeef245cc2ca278d590645 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 2 Aug 2023 21:04:32 -0700
Subject: [PATCH 0974/2047] selftests/seccomp: Handle arm32 corner cases better

It turns out arm32 doesn't handle syscall -1 gracefully, so skip testing
for that. Additionally skip tests that depend on clone3 when it is not
available (for example when building the seccomp selftests on an old arm
image without clone3 headers). And improve error reporting for when
nanosleep fails, as seen on arm32 since v5.15.

Cc: Lecopzer Chen <lecopzer.chen@mediatek.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index f6a04d88e02f..38f651469968 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -2184,6 +2184,9 @@ FIXTURE_TEARDOWN(TRACE_syscall)
 
 TEST(negative_ENOSYS)
 {
+#if defined(__arm__)
+	SKIP(return, "arm32 does not support calling syscall -1");
+#endif
 	/*
 	 * There should be no difference between an "internal" skip
 	 * and userspace asking for syscall "-1".
@@ -3072,7 +3075,8 @@ TEST(syscall_restart)
 		timeout.tv_sec = 1;
 		errno = 0;
 		EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
-			TH_LOG("Call to nanosleep() failed (errno %d)", errno);
+			TH_LOG("Call to nanosleep() failed (errno %d: %s)",
+				errno, strerror(errno));
 		}
 
 		/* Read final sync from parent. */
@@ -3908,6 +3912,9 @@ TEST(user_notification_filter_empty)
 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 	}
 
+	if (__NR_clone3 < 0)
+		SKIP(return, "Test not built with clone3 support");
+
 	pid = sys_clone3(&args, sizeof(args));
 	ASSERT_GE(pid, 0);
 
@@ -3962,6 +3969,9 @@ TEST(user_notification_filter_empty_threaded)
 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 	}
 
+	if (__NR_clone3 < 0)
+		SKIP(return, "Test not built with clone3 support");
+
 	pid = sys_clone3(&args, sizeof(args));
 	ASSERT_GE(pid, 0);
 

From 823d4737d4c226699f4378b71675822f5ebe78ba Mon Sep 17 00:00:00 2001
From: Gary Guo <gary@garyguo.net>
Date: Wed, 28 Jun 2023 18:11:01 +0100
Subject: [PATCH 0975/2047] rust: macros: add `paste!` proc macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This macro provides a flexible way to concatenated identifiers together
and it allows the resulting identifier to be used to declare new items,
which `concat_idents!` does not allow. It also allows identifiers to be
transformed before concatenated.

The `concat_idents!` example

    let x_1 = 42;
    let x_2 = concat_idents!(x, _1);
    assert!(x_1 == x_2);

can be written with `paste!` macro like this:

    let x_1 = 42;
    let x_2 = paste!([<x _1>]);
    assert!(x_1 == x_2);

However `paste!` macro is more flexible because it can be used to create
a new variable:

    let x_1 = 42;
    paste!(let [<x _2>] = [<x _1>];);
    assert!(x_1 == x_2);

While this is not possible with `concat_idents!`.

This macro is similar to the `paste!` crate [1], but this is a fresh
implementation to avoid vendoring large amount of code directly. Also, I
have augmented it to provide a way to specify span of the resulting
token, allowing precise control.

For example, this code is broken because the variable is declared inside
the macro, so Rust macro hygiene rules prevents access from the outside:

    macro_rules! m {
        ($id: ident) => {
            // The resulting token has hygiene of the macro.
            paste!(let [<$id>] = 1;)
        }
    }

    m!(a);
    let _ = a;

In this version of `paste!` macro I added a `span` modifier to allow
this:

    macro_rules! m {
        ($id: ident) => {
            // The resulting token has hygiene of `$id`.
            paste!(let [<$id:span>] = 1;)
        }
    }

    m!(a);
    let _ = a;

Link: http://docs.rs/paste/ [1]
Signed-off-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Björn Roy Baron <bjorn3_gh@protonmail.com>
Reviewed-by: Benno Lossin <benno.lossin@proton.me>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Link: https://lore.kernel.org/r/20230628171108.1150742-1-gary@garyguo.net
[ Added SPDX license identifier as discussed in the list and fixed typo. ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/macros/lib.rs   | 97 ++++++++++++++++++++++++++++++++++++++++++++
 rust/macros/paste.rs | 96 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 193 insertions(+)
 create mode 100644 rust/macros/paste.rs

diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs
index 3fc74cb4ea19..b4bc44c27bd4 100644
--- a/rust/macros/lib.rs
+++ b/rust/macros/lib.rs
@@ -7,6 +7,7 @@ mod quote;
 mod concat_idents;
 mod helpers;
 mod module;
+mod paste;
 mod pin_data;
 mod pinned_drop;
 mod vtable;
@@ -246,3 +247,99 @@ pub fn pin_data(inner: TokenStream, item: TokenStream) -> TokenStream {
 pub fn pinned_drop(args: TokenStream, input: TokenStream) -> TokenStream {
     pinned_drop::pinned_drop(args, input)
 }
+
+/// Paste identifiers together.
+///
+/// Within the `paste!` macro, identifiers inside `[<` and `>]` are concatenated together to form a
+/// single identifier.
+///
+/// This is similar to the [`paste`] crate, but with pasting feature limited to identifiers
+/// (literals, lifetimes and documentation strings are not supported). There is a difference in
+/// supported modifiers as well.
+///
+/// # Example
+///
+/// ```ignore
+/// use kernel::macro::paste;
+///
+/// macro_rules! pub_no_prefix {
+///     ($prefix:ident, $($newname:ident),+) => {
+///         paste! {
+///             $(pub(crate) const $newname: u32 = [<$prefix $newname>];)+
+///         }
+///     };
+/// }
+///
+/// pub_no_prefix!(
+///     binder_driver_return_protocol_,
+///     BR_OK,
+///     BR_ERROR,
+///     BR_TRANSACTION,
+///     BR_REPLY,
+///     BR_DEAD_REPLY,
+///     BR_TRANSACTION_COMPLETE,
+///     BR_INCREFS,
+///     BR_ACQUIRE,
+///     BR_RELEASE,
+///     BR_DECREFS,
+///     BR_NOOP,
+///     BR_SPAWN_LOOPER,
+///     BR_DEAD_BINDER,
+///     BR_CLEAR_DEATH_NOTIFICATION_DONE,
+///     BR_FAILED_REPLY
+/// );
+///
+/// assert_eq!(BR_OK, binder_driver_return_protocol_BR_OK);
+/// ```
+///
+/// # Modifiers
+///
+/// For each identifier, it is possible to attach one or multiple modifiers to
+/// it.
+///
+/// Currently supported modifiers are:
+/// * `span`: change the span of concatenated identifier to the span of the specified token. By
+/// default the span of the `[< >]` group is used.
+/// * `lower`: change the identifier to lower case.
+/// * `upper`: change the identifier to upper case.
+///
+/// ```ignore
+/// use kernel::macro::paste;
+///
+/// macro_rules! pub_no_prefix {
+///     ($prefix:ident, $($newname:ident),+) => {
+///         kernel::macros::paste! {
+///             $(pub(crate) const fn [<$newname:lower:span>]: u32 = [<$prefix $newname:span>];)+
+///         }
+///     };
+/// }
+///
+/// pub_no_prefix!(
+///     binder_driver_return_protocol_,
+///     BR_OK,
+///     BR_ERROR,
+///     BR_TRANSACTION,
+///     BR_REPLY,
+///     BR_DEAD_REPLY,
+///     BR_TRANSACTION_COMPLETE,
+///     BR_INCREFS,
+///     BR_ACQUIRE,
+///     BR_RELEASE,
+///     BR_DECREFS,
+///     BR_NOOP,
+///     BR_SPAWN_LOOPER,
+///     BR_DEAD_BINDER,
+///     BR_CLEAR_DEATH_NOTIFICATION_DONE,
+///     BR_FAILED_REPLY
+/// );
+///
+/// assert_eq!(br_ok(), binder_driver_return_protocol_BR_OK);
+/// ```
+///
+/// [`paste`]: https://docs.rs/paste/
+#[proc_macro]
+pub fn paste(input: TokenStream) -> TokenStream {
+    let mut tokens = input.into_iter().collect();
+    paste::expand(&mut tokens);
+    tokens.into_iter().collect()
+}
diff --git a/rust/macros/paste.rs b/rust/macros/paste.rs
new file mode 100644
index 000000000000..385a78434224
--- /dev/null
+++ b/rust/macros/paste.rs
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use proc_macro::{Delimiter, Group, Ident, Spacing, Span, TokenTree};
+
+fn concat(tokens: &[TokenTree], group_span: Span) -> TokenTree {
+    let mut tokens = tokens.iter();
+    let mut segments = Vec::new();
+    let mut span = None;
+    loop {
+        match tokens.next() {
+            None => break,
+            Some(TokenTree::Literal(lit)) => segments.push((lit.to_string(), lit.span())),
+            Some(TokenTree::Ident(ident)) => {
+                let mut value = ident.to_string();
+                if value.starts_with("r#") {
+                    value.replace_range(0..2, "");
+                }
+                segments.push((value, ident.span()));
+            }
+            Some(TokenTree::Punct(p)) if p.as_char() == ':' => {
+                let Some(TokenTree::Ident(ident)) = tokens.next() else {
+                    panic!("expected identifier as modifier");
+                };
+
+                let (mut value, sp) = segments.pop().expect("expected identifier before modifier");
+                match ident.to_string().as_str() {
+                    // Set the overall span of concatenated token as current span
+                    "span" => {
+                        assert!(
+                            span.is_none(),
+                            "span modifier should only appear at most once"
+                        );
+                        span = Some(sp);
+                    }
+                    "lower" => value = value.to_lowercase(),
+                    "upper" => value = value.to_uppercase(),
+                    v => panic!("unknown modifier `{v}`"),
+                };
+                segments.push((value, sp));
+            }
+            _ => panic!("unexpected token in paste segments"),
+        };
+    }
+
+    let pasted: String = segments.into_iter().map(|x| x.0).collect();
+    TokenTree::Ident(Ident::new(&pasted, span.unwrap_or(group_span)))
+}
+
+pub(crate) fn expand(tokens: &mut Vec<TokenTree>) {
+    for token in tokens.iter_mut() {
+        if let TokenTree::Group(group) = token {
+            let delimiter = group.delimiter();
+            let span = group.span();
+            let mut stream: Vec<_> = group.stream().into_iter().collect();
+            // Find groups that looks like `[< A B C D >]`
+            if delimiter == Delimiter::Bracket
+                && stream.len() >= 3
+                && matches!(&stream[0], TokenTree::Punct(p) if p.as_char() == '<')
+                && matches!(&stream[stream.len() - 1], TokenTree::Punct(p) if p.as_char() == '>')
+            {
+                // Replace the group with concatenated token
+                *token = concat(&stream[1..stream.len() - 1], span);
+            } else {
+                // Recursively expand tokens inside the group
+                expand(&mut stream);
+                let mut group = Group::new(delimiter, stream.into_iter().collect());
+                group.set_span(span);
+                *token = TokenTree::Group(group);
+            }
+        }
+    }
+
+    // Path segments cannot contain invisible delimiter group, so remove them if any.
+    for i in (0..tokens.len().saturating_sub(3)).rev() {
+        // Looking for a double colon
+        if matches!(
+            (&tokens[i + 1], &tokens[i + 2]),
+            (TokenTree::Punct(a), TokenTree::Punct(b))
+                if a.as_char() == ':' && a.spacing() == Spacing::Joint && b.as_char() == ':'
+        ) {
+            match &tokens[i + 3] {
+                TokenTree::Group(group) if group.delimiter() == Delimiter::None => {
+                    tokens.splice(i + 3..i + 4, group.stream());
+                }
+                _ => (),
+            }
+
+            match &tokens[i] {
+                TokenTree::Group(group) if group.delimiter() == Delimiter::None => {
+                    tokens.splice(i..i + 1, group.stream());
+                }
+                _ => (),
+            }
+        }
+    }
+}

From 41bdc6decda074afc4d8f8ba44c69b08d0e9aff6 Mon Sep 17 00:00:00 2001
From: Andrea Righi <andrea.righi@canonical.com>
Date: Tue, 4 Jul 2023 07:21:36 +0200
Subject: [PATCH 0976/2047] btf, scripts: rust: drop is_rust_module.sh

With commit c1177979af9c ("btf, scripts: Exclude Rust CUs with pahole")
we are now able to use pahole directly to identify Rust compilation
units (CUs) and exclude them from generating BTF debugging information
(when DEBUG_INFO_BTF is enabled).

And if pahole doesn't support the --lang-exclude flag, we can't enable
both RUST and DEBUG_INFO_BTF at the same time.

So, in any case, the script is_rust_module.sh is just redundant and we
can drop it.

NOTE: we may also be able to drop the "Rust loadable module" mark
inside Rust modules, but it seems safer to keep it for now to make sure
we are not breaking any external tool that may potentially rely on it.

Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Eric Curtin <ecurtin@redhat.com>
Reviewed-by: Eric Curtin <ecurtin@redhat.com>
Reviewed-by: Neal Gompa <neal@gompa.dev>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Acked-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/20230704052136.155445-1-andrea.righi@canonical.com
[ Picked the `Reviewed-by`s from the old patch too. ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/macros/module.rs     |  2 +-
 scripts/Makefile.modfinal |  2 --
 scripts/is_rust_module.sh | 16 ----------------
 3 files changed, 1 insertion(+), 19 deletions(-)
 delete mode 100755 scripts/is_rust_module.sh

diff --git a/rust/macros/module.rs b/rust/macros/module.rs
index fb1244f8c2e6..d62d8710d77a 100644
--- a/rust/macros/module.rs
+++ b/rust/macros/module.rs
@@ -199,7 +199,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
             /// Used by the printing macros, e.g. [`info!`].
             const __LOG_PREFIX: &[u8] = b\"{name}\\0\";
 
-            /// The \"Rust loadable module\" mark, for `scripts/is_rust_module.sh`.
+            /// The \"Rust loadable module\" mark.
             //
             // This may be best done another way later on, e.g. as a new modinfo
             // key or a new section. For the moment, keep it simple.
diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index fc19f67039bd..b3a6aa8fbe8c 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -41,8 +41,6 @@ quiet_cmd_btf_ko = BTF [M] $@
       cmd_btf_ko = 							\
 	if [ ! -f vmlinux ]; then					\
 		printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \
-	elif [ -n "$(CONFIG_RUST)" ] && $(srctree)/scripts/is_rust_module.sh $@; then 		\
-		printf "Skipping BTF generation for %s because it's a Rust module\n" $@ 1>&2; \
 	else								\
 		LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J $(PAHOLE_FLAGS) --btf_base vmlinux $@; \
 		$(RESOLVE_BTFIDS) -b vmlinux $@; 			\
diff --git a/scripts/is_rust_module.sh b/scripts/is_rust_module.sh
deleted file mode 100755
index 464761a7cf7f..000000000000
--- a/scripts/is_rust_module.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# is_rust_module.sh module.ko
-#
-# Returns `0` if `module.ko` is a Rust module, `1` otherwise.
-
-set -e
-
-# Using the `16_` prefix ensures other symbols with the same substring
-# are not picked up (even if it would be unlikely). The last part is
-# used just in case LLVM decides to use the `.` suffix.
-#
-# In the future, checking for the `.comment` section may be another
-# option, see https://github.com/rust-lang/rust/pull/97550.
-${NM} "$*" | grep -qE '^[0-9a-fA-F]+ [Rr] _R[^[:space:]]+16___IS_RUST_MODULE[^[:space:]]*$'

From 216710a492dde71fc7674e9962727bccb08dfaeb Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Thu, 10 Aug 2023 20:12:03 +0200
Subject: [PATCH 0977/2047] cpufreq: mediatek-hw: Remove unused define

DYNAMIC_POWER does not seem to be used anywhere in the tree, remove it.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/mediatek-cpufreq.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c
index fef68cb2b38f..a0a61919bc4c 100644
--- a/drivers/cpufreq/mediatek-cpufreq.c
+++ b/drivers/cpufreq/mediatek-cpufreq.c
@@ -313,8 +313,6 @@ out:
 	return ret;
 }
 
-#define DYNAMIC_POWER "dynamic-power-coefficient"
-
 static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
 				    unsigned long event, void *data)
 {

From ffa09b3bd02427ab631f0c1b64714ce6fc885f61 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 10 Aug 2023 23:48:59 +0800
Subject: [PATCH 0978/2047] erofs: DEFLATE compression support

Add DEFLATE compression as the 3rd supported algorithm.

DEFLATE is a popular generic-purpose compression algorithm for quite
long time (many advanced formats like gzip, zlib, zip, png are all
based on that) as Apple documentation written "If you require
interoperability with non-Apple devices, use COMPRESSION_ZLIB. [1]".

Due to its popularity, there are several hardware on-market DEFLATE
accelerators, such as (s390) DFLTCC, (Intel) IAA/QAT, (HiSilicon) ZIP
accelerator, etc.  In addition, there are also several high-performence
IP cores and even open-source FPGA approches available for DEFLATE.
Therefore, it's useful to support DEFLATE compression in order to find
a way to utilize these accelerators for asynchronous I/Os and get
benefits from these later.

Besides, it's a good choice to trade off between compression ratios
and performance compared to LZ4 and LZMA.  The DEFLATE core format is
simple as well as easy to understand, therefore the code size of its
decompressor is small even for the bootloader use cases.  The runtime
memory consumption is quite limited too (e.g. 32K + ~7K for each zlib
stream).  As usual, EROFS ourperforms similar approaches too.

Alternatively, DEFLATE could still be used for some specific files
since EROFS supports multiple compression algorithms in one image.

[1] https://developer.apple.com/documentation/compression/compression_algorithm
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230810154859.118330-1-hsiangkao@linux.alibaba.com
---
 fs/erofs/Kconfig                |  15 ++
 fs/erofs/Makefile               |   1 +
 fs/erofs/compress.h             |   2 +
 fs/erofs/decompressor.c         |   6 +
 fs/erofs/decompressor_deflate.c | 247 ++++++++++++++++++++++++++++++++
 fs/erofs/erofs_fs.h             |   7 +
 fs/erofs/internal.h             |  20 +++
 fs/erofs/super.c                |  10 ++
 fs/erofs/zmap.c                 |   5 +-
 9 files changed, 311 insertions(+), 2 deletions(-)
 create mode 100644 fs/erofs/decompressor_deflate.c

diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index f259d92c9720..56a99ba8ce22 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -99,6 +99,21 @@ config EROFS_FS_ZIP_LZMA
 
 	  If unsure, say N.
 
+config EROFS_FS_ZIP_DEFLATE
+	bool "EROFS DEFLATE compressed data support"
+	depends on EROFS_FS_ZIP
+	select ZLIB_INFLATE
+	help
+	  Saying Y here includes support for reading EROFS file systems
+	  containing DEFLATE compressed data.  It gives better compression
+	  ratios than the default LZ4 format, while it costs more CPU
+	  overhead.
+
+	  DEFLATE support is an experimental feature for now and so most
+	  file systems will be readable without selecting this option.
+
+	  If unsure, say N.
+
 config EROFS_FS_ONDEMAND
 	bool "EROFS fscache-based on-demand read support"
 	depends on CACHEFILES_ONDEMAND && (EROFS_FS=m && FSCACHE || EROFS_FS=y && FSCACHE=y)
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
index a3a98fc3e481..994d0b9deddf 100644
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@@ -5,4 +5,5 @@ erofs-objs := super.o inode.o data.o namei.o dir.o utils.o sysfs.o
 erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
 erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o
 erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
+erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
 erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index b1b846504027..349c3316ae6b 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -94,4 +94,6 @@ extern const struct z_erofs_decompressor erofs_decompressors[];
 /* prototypes for specific algorithms */
 int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
 			    struct page **pagepool);
+int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
+			       struct page **pagepool);
 #endif
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index cfad1eac7fd9..332ec5f74002 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -379,4 +379,10 @@ const struct z_erofs_decompressor erofs_decompressors[] = {
 		.name = "lzma"
 	},
 #endif
+#ifdef CONFIG_EROFS_FS_ZIP_DEFLATE
+	[Z_EROFS_COMPRESSION_DEFLATE] = {
+		.decompress = z_erofs_deflate_decompress,
+		.name = "deflate"
+	},
+#endif
 };
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
new file mode 100644
index 000000000000..19e5bdeb30b6
--- /dev/null
+++ b/fs/erofs/decompressor_deflate.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/module.h>
+#include <linux/zlib.h>
+#include "compress.h"
+
+struct z_erofs_deflate {
+	struct z_erofs_deflate *next;
+	struct z_stream_s z;
+	u8 bounce[PAGE_SIZE];
+};
+
+static DEFINE_SPINLOCK(z_erofs_deflate_lock);
+static unsigned int z_erofs_deflate_nstrms, z_erofs_deflate_avail_strms;
+static struct z_erofs_deflate *z_erofs_deflate_head;
+static DECLARE_WAIT_QUEUE_HEAD(z_erofs_deflate_wq);
+
+module_param_named(deflate_streams, z_erofs_deflate_nstrms, uint, 0444);
+
+void z_erofs_deflate_exit(void)
+{
+	/* there should be no running fs instance */
+	while (z_erofs_deflate_avail_strms) {
+		struct z_erofs_deflate *strm;
+
+		spin_lock(&z_erofs_deflate_lock);
+		strm = z_erofs_deflate_head;
+		if (!strm) {
+			spin_unlock(&z_erofs_deflate_lock);
+			continue;
+		}
+		z_erofs_deflate_head = NULL;
+		spin_unlock(&z_erofs_deflate_lock);
+
+		while (strm) {
+			struct z_erofs_deflate *n = strm->next;
+
+			vfree(strm->z.workspace);
+			kfree(strm);
+			--z_erofs_deflate_avail_strms;
+			strm = n;
+		}
+	}
+}
+
+int __init z_erofs_deflate_init(void)
+{
+	/* by default, use # of possible CPUs instead */
+	if (!z_erofs_deflate_nstrms)
+		z_erofs_deflate_nstrms = num_possible_cpus();
+
+	for (; z_erofs_deflate_avail_strms < z_erofs_deflate_nstrms;
+	     ++z_erofs_deflate_avail_strms) {
+		struct z_erofs_deflate *strm;
+
+		strm = kzalloc(sizeof(*strm), GFP_KERNEL);
+		if (!strm)
+			goto out_failed;
+
+		/* XXX: in-kernel zlib cannot shrink windowbits currently */
+		strm->z.workspace = vmalloc(zlib_inflate_workspacesize());
+		if (!strm->z.workspace) {
+			kfree(strm);
+			goto out_failed;
+		}
+
+		spin_lock(&z_erofs_deflate_lock);
+		strm->next = z_erofs_deflate_head;
+		z_erofs_deflate_head = strm;
+		spin_unlock(&z_erofs_deflate_lock);
+	}
+	return 0;
+
+out_failed:
+	pr_err("failed to allocate zlib workspace\n");
+	z_erofs_deflate_exit();
+	return -ENOMEM;
+}
+
+int z_erofs_load_deflate_config(struct super_block *sb,
+				struct erofs_super_block *dsb,
+				struct z_erofs_deflate_cfgs *dfl, int size)
+{
+	if (!dfl || size < sizeof(struct z_erofs_deflate_cfgs)) {
+		erofs_err(sb, "invalid deflate cfgs, size=%u", size);
+		return -EINVAL;
+	}
+
+	if (dfl->windowbits > MAX_WBITS) {
+		erofs_err(sb, "unsupported windowbits %u", dfl->windowbits);
+		return -EOPNOTSUPP;
+	}
+
+	erofs_info(sb, "EXPERIMENTAL DEFLATE feature in use. Use at your own risk!");
+	return 0;
+}
+
+int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
+			       struct page **pagepool)
+{
+	const unsigned int nrpages_out =
+		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+	const unsigned int nrpages_in =
+		PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
+	struct super_block *sb = rq->sb;
+	unsigned int insz, outsz, pofs;
+	struct z_erofs_deflate *strm;
+	u8 *kin, *kout = NULL;
+	bool bounced = false;
+	int no = -1, ni = 0, j = 0, zerr, err;
+
+	/* 1. get the exact DEFLATE compressed size */
+	kin = kmap_local_page(*rq->in);
+	err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
+			min_t(unsigned int, rq->inputsize,
+			      sb->s_blocksize - rq->pageofs_in));
+	if (err) {
+		kunmap_local(kin);
+		return err;
+	}
+
+	/* 2. get an available DEFLATE context */
+again:
+	spin_lock(&z_erofs_deflate_lock);
+	strm = z_erofs_deflate_head;
+	if (!strm) {
+		spin_unlock(&z_erofs_deflate_lock);
+		wait_event(z_erofs_deflate_wq, READ_ONCE(z_erofs_deflate_head));
+		goto again;
+	}
+	z_erofs_deflate_head = strm->next;
+	spin_unlock(&z_erofs_deflate_lock);
+
+	/* 3. multi-call decompress */
+	insz = rq->inputsize;
+	outsz = rq->outputsize;
+	zerr = zlib_inflateInit2(&strm->z, -MAX_WBITS);
+	if (zerr != Z_OK) {
+		err = -EIO;
+		goto failed_zinit;
+	}
+
+	pofs = rq->pageofs_out;
+	strm->z.avail_in = min_t(u32, insz, PAGE_SIZE - rq->pageofs_in);
+	insz -= strm->z.avail_in;
+	strm->z.next_in = kin + rq->pageofs_in;
+	strm->z.avail_out = 0;
+
+	while (1) {
+		if (!strm->z.avail_out) {
+			if (++no >= nrpages_out || !outsz) {
+				erofs_err(sb, "insufficient space for decompressed data");
+				err = -EFSCORRUPTED;
+				break;
+			}
+
+			if (kout)
+				kunmap_local(kout);
+			strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs);
+			outsz -= strm->z.avail_out;
+			if (!rq->out[no]) {
+				rq->out[no] = erofs_allocpage(pagepool,
+						GFP_KERNEL | __GFP_NOFAIL);
+				set_page_private(rq->out[no],
+						 Z_EROFS_SHORTLIVED_PAGE);
+			}
+			kout = kmap_local_page(rq->out[no]);
+			strm->z.next_out = kout + pofs;
+			pofs = 0;
+		}
+
+		if (!strm->z.avail_in && insz) {
+			if (++ni >= nrpages_in) {
+				erofs_err(sb, "invalid compressed data");
+				err = -EFSCORRUPTED;
+				break;
+			}
+
+			if (kout) { /* unlike kmap(), take care of the orders */
+				j = strm->z.next_out - kout;
+				kunmap_local(kout);
+			}
+			kunmap_local(kin);
+			strm->z.avail_in = min_t(u32, insz, PAGE_SIZE);
+			insz -= strm->z.avail_in;
+			kin = kmap_local_page(rq->in[ni]);
+			strm->z.next_in = kin;
+			bounced = false;
+			if (kout) {
+				kout = kmap_local_page(rq->out[no]);
+				strm->z.next_out = kout + j;
+			}
+		}
+
+		/*
+		 * Handle overlapping: Use bounced buffer if the compressed
+		 * data is under processing; Or use short-lived pages from the
+		 * on-stack pagepool where pages share among the same request
+		 * and not _all_ inplace I/O pages are needed to be doubled.
+		 */
+		if (!bounced && rq->out[no] == rq->in[ni]) {
+			memcpy(strm->bounce, strm->z.next_in, strm->z.avail_in);
+			strm->z.next_in = strm->bounce;
+			bounced = true;
+		}
+
+		for (j = ni + 1; j < nrpages_in; ++j) {
+			struct page *tmppage;
+
+			if (rq->out[no] != rq->in[j])
+				continue;
+
+			DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb),
+							rq->in[j]));
+			tmppage = erofs_allocpage(pagepool,
+						  GFP_KERNEL | __GFP_NOFAIL);
+			set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
+			copy_highpage(tmppage, rq->in[j]);
+			rq->in[j] = tmppage;
+		}
+
+		zerr = zlib_inflate(&strm->z, Z_SYNC_FLUSH);
+		if (zerr != Z_OK || !(outsz + strm->z.avail_out)) {
+			if (zerr == Z_OK && rq->partial_decoding)
+				break;
+			if (zerr == Z_STREAM_END && !outsz)
+				break;
+			erofs_err(sb, "failed to decompress %d in[%u] out[%u]",
+				  zerr, rq->inputsize, rq->outputsize);
+			err = -EFSCORRUPTED;
+			break;
+		}
+	}
+
+	if (zlib_inflateEnd(&strm->z) != Z_OK && !err)
+		err = -EIO;
+	if (kout)
+		kunmap_local(kout);
+failed_zinit:
+	kunmap_local(kin);
+	/* 4. push back DEFLATE stream context to the global list */
+	spin_lock(&z_erofs_deflate_lock);
+	strm->next = z_erofs_deflate_head;
+	z_erofs_deflate_head = strm;
+	spin_unlock(&z_erofs_deflate_lock);
+	wake_up(&z_erofs_deflate_wq);
+	return err;
+}
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 2c7b16e340fe..364f51171c13 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -289,6 +289,7 @@ struct erofs_dirent {
 enum {
 	Z_EROFS_COMPRESSION_LZ4		= 0,
 	Z_EROFS_COMPRESSION_LZMA	= 1,
+	Z_EROFS_COMPRESSION_DEFLATE	= 2,
 	Z_EROFS_COMPRESSION_MAX
 };
 #define Z_EROFS_ALL_COMPR_ALGS		((1 << Z_EROFS_COMPRESSION_MAX) - 1)
@@ -309,6 +310,12 @@ struct z_erofs_lzma_cfgs {
 
 #define Z_EROFS_LZMA_MAX_DICT_SIZE	(8 * Z_EROFS_PCLUSTER_MAX_SIZE)
 
+/* 6 bytes (+ length field = 8 bytes) */
+struct z_erofs_deflate_cfgs {
+	u8 windowbits;			/* 8..15 for DEFLATE */
+	u8 reserved[5];
+} __packed;
+
 /*
  * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
  *  e.g. for 4k logical cluster size,      4B        if compacted 2B is off;
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 36e32fa542f0..fb45855cfd5d 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -519,6 +519,26 @@ static inline int z_erofs_load_lzma_config(struct super_block *sb,
 }
 #endif	/* !CONFIG_EROFS_FS_ZIP_LZMA */
 
+#ifdef CONFIG_EROFS_FS_ZIP_DEFLATE
+int __init z_erofs_deflate_init(void);
+void z_erofs_deflate_exit(void);
+int z_erofs_load_deflate_config(struct super_block *sb,
+				struct erofs_super_block *dsb,
+				struct z_erofs_deflate_cfgs *dfl, int size);
+#else
+static inline int z_erofs_deflate_init(void) { return 0; }
+static inline int z_erofs_deflate_exit(void) { return 0; }
+static inline int z_erofs_load_deflate_config(struct super_block *sb,
+			struct erofs_super_block *dsb,
+			struct z_erofs_deflate_cfgs *dfl, int size) {
+	if (dfl) {
+		erofs_err(sb, "deflate algorithm isn't enabled");
+		return -EINVAL;
+	}
+	return 0;
+}
+#endif	/* !CONFIG_EROFS_FS_ZIP_DEFLATE */
+
 #ifdef CONFIG_EROFS_FS_ONDEMAND
 int erofs_fscache_register_fs(struct super_block *sb);
 void erofs_fscache_unregister_fs(struct super_block *sb);
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 566f68ddfa36..3275505059cc 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -201,6 +201,9 @@ static int erofs_load_compr_cfgs(struct super_block *sb,
 		case Z_EROFS_COMPRESSION_LZMA:
 			ret = z_erofs_load_lzma_config(sb, dsb, data, size);
 			break;
+		case Z_EROFS_COMPRESSION_DEFLATE:
+			ret = z_erofs_load_deflate_config(sb, dsb, data, size);
+			break;
 		default:
 			DBG_BUGON(1);
 			ret = -EFAULT;
@@ -964,6 +967,10 @@ static int __init erofs_module_init(void)
 	if (err)
 		goto lzma_err;
 
+	err = z_erofs_deflate_init();
+	if (err)
+		goto deflate_err;
+
 	erofs_pcpubuf_init();
 	err = z_erofs_init_zip_subsystem();
 	if (err)
@@ -984,6 +991,8 @@ fs_err:
 sysfs_err:
 	z_erofs_exit_zip_subsystem();
 zip_err:
+	z_erofs_deflate_exit();
+deflate_err:
 	z_erofs_lzma_exit();
 lzma_err:
 	erofs_exit_shrinker();
@@ -1001,6 +1010,7 @@ static void __exit erofs_module_exit(void)
 
 	erofs_exit_sysfs();
 	z_erofs_exit_zip_subsystem();
+	z_erofs_deflate_exit();
 	z_erofs_lzma_exit();
 	erofs_exit_shrinker();
 	kmem_cache_destroy(erofs_inode_cachep);
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 1909ddafd9c7..7b55111fd533 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -561,8 +561,9 @@ static int z_erofs_do_map_blocks(struct inode *inode,
 
 	if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
 	    ((flags & EROFS_GET_BLOCKS_READMORE) &&
-	     map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA &&
-	     map->m_llen >= i_blocksize(inode))) {
+	     (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA ||
+	      map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE) &&
+	      map->m_llen >= i_blocksize(inode))) {
 		err = z_erofs_get_extent_decompressedlen(&m);
 		if (!err)
 			map->m_flags |= EROFS_MAP_FULL_MAPPED;

From 3f339920175c871ac63b4ea179117da7518618fd Mon Sep 17 00:00:00 2001
From: Jingbo Xu <jefflexu@linux.alibaba.com>
Date: Sat, 22 Jul 2023 17:45:37 +0800
Subject: [PATCH 0979/2047] erofs: update on-disk format for xattr name filter

The xattr name bloom filter feature is going to be introduced to speed
up the negative xattr lookup, e.g. system.posix_acl_[access|default]
lookup when running "ls -lR" workload.

There are some commonly used extended attributes (n) and the total
number of these is approximately 30.

	trusted.overlay.opaque
	trusted.overlay.redirect
	trusted.overlay.origin
	trusted.overlay.impure
	trusted.overlay.nlink
	trusted.overlay.upper
	trusted.overlay.metacopy
	trusted.overlay.protattr
	user.overlay.opaque
	user.overlay.redirect
	user.overlay.origin
	user.overlay.impure
	user.overlay.nlink
	user.overlay.upper
	user.overlay.metacopy
	user.overlay.protattr
	security.evm
	security.ima
	security.selinux
	security.SMACK64
	security.SMACK64IPIN
	security.SMACK64IPOUT
	security.SMACK64EXEC
	security.SMACK64TRANSMUTE
	security.SMACK64MMAP
	security.apparmor
	security.capability
	system.posix_acl_access
	system.posix_acl_default
	user.mime_type

Given the number of bits of the bloom filter (m) is 32, the optimal
value for the number of the hash functions (k) is 1 (ln2 * m/n = 0.74).

The single hash function is implemented as:

	xxh32(name, strlen(name), EROFS_XATTR_FILTER_SEED + index)

where `index` represents the index of corresponding predefined short name
prefix, while `name` represents the name string after stripping the above
predefined name prefix.

The constant magic number EROFS_XATTR_FILTER_SEED, i.e. 0x25BBE08F, is
used to give a better spread when mapping these 30 extended attributes
into 32-bit bloom filter as:

	bit  0: security.ima
	bit  1:
	bit  2: trusted.overlay.nlink
	bit  3:
	bit  4: user.overlay.nlink
	bit  5: trusted.overlay.upper
	bit  6: user.overlay.origin
	bit  7: trusted.overlay.protattr
	bit  8: security.apparmor
	bit  9: user.overlay.protattr
	bit 10: user.overlay.opaque
	bit 11: security.selinux
	bit 12: security.SMACK64TRANSMUTE
	bit 13: security.SMACK64
	bit 14: security.SMACK64MMAP
	bit 15: user.overlay.impure
	bit 16: security.SMACK64IPIN
	bit 17: trusted.overlay.redirect
	bit 18: trusted.overlay.origin
	bit 19: security.SMACK64IPOUT
	bit 20: trusted.overlay.opaque
	bit 21: system.posix_acl_default
	bit 22:
	bit 23: user.mime_type
	bit 24: trusted.overlay.impure
	bit 25: security.SMACK64EXEC
	bit 26: user.overlay.redirect
	bit 27: user.overlay.upper
	bit 28: security.evm
	bit 29: security.capability
	bit 30: system.posix_acl_access
	bit 31: trusted.overlay.metacopy, user.overlay.metacopy

h_name_filter is introduced to the on-disk per-inode xattr header to
place the corresponding xattr name filter, where bit value 1 indicates
non-existence for compatibility.

This feature is indicated by EROFS_FEATURE_COMPAT_XATTR_FILTER
compatible feature bit.

Reserve one byte in on-disk superblock as the on-disk format for xattr
name filter may change in the future.  With this flag we don't need
bothering these compatible bits again at that time.

Suggested-by: Alexander Larsson <alexl@redhat.com>
Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Link: https://lore.kernel.org/r/20230722094538.11754-2-jefflexu@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/erofs_fs.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 364f51171c13..a03ec70ba6f2 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -13,6 +13,7 @@
 
 #define EROFS_FEATURE_COMPAT_SB_CHKSUM          0x00000001
 #define EROFS_FEATURE_COMPAT_MTIME              0x00000002
+#define EROFS_FEATURE_COMPAT_XATTR_FILTER	0x00000004
 
 /*
  * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
@@ -81,7 +82,8 @@ struct erofs_super_block {
 	__u8 xattr_prefix_count;	/* # of long xattr name prefixes */
 	__le32 xattr_prefix_start;	/* start of long xattr prefixes */
 	__le64 packed_nid;	/* nid of the special packed inode */
-	__u8 reserved2[24];
+	__u8 xattr_filter_reserved; /* reserved for xattr name filter */
+	__u8 reserved2[23];
 };
 
 /*
@@ -200,7 +202,7 @@ struct erofs_inode_extended {
  * for read-only fs, no need to introduce h_refcount
  */
 struct erofs_xattr_ibody_header {
-	__le32 h_reserved;
+	__le32 h_name_filter;		/* bit value 1 indicates not-present */
 	__u8   h_shared_count;
 	__u8   h_reserved2[7];
 	__le32 h_shared_xattrs[];       /* shared xattr id array */
@@ -221,6 +223,10 @@ struct erofs_xattr_ibody_header {
 #define EROFS_XATTR_LONG_PREFIX		0x80
 #define EROFS_XATTR_LONG_PREFIX_MASK	0x7f
 
+#define EROFS_XATTR_FILTER_BITS		32
+#define EROFS_XATTR_FILTER_DEFAULT	UINT32_MAX
+#define EROFS_XATTR_FILTER_SEED		0x25BBE08F
+
 /* xattr entry (for both inline & shared xattrs) */
 struct erofs_xattr_entry {
 	__u8   e_name_len;      /* length of name */

From fd73a4395d477ae134f319f7368a9f8a6264fd8b Mon Sep 17 00:00:00 2001
From: Jingbo Xu <jefflexu@linux.alibaba.com>
Date: Sat, 22 Jul 2023 17:45:38 +0800
Subject: [PATCH 0980/2047] erofs: boost negative xattr lookup with bloom
 filter

Optimise the negative xattr lookup with bloom filter.

The bit value for the bloom filter map has a reverse semantics for
compatibility.  That is, the bit value of 0 indicates existence, while
the bit value of 1 indicates the absence of corresponding xattr.

The initial version is _only_ enabled when xattr_filter_reserved is
zero.  The filter map internals may change in the future, in which case
the reserved flag will be set non-zero and we don't need bothering the
compatible bits again at that time.  For now disable the optimization if
this reserved flag is non-zero.

Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Link: https://lore.kernel.org/r/20230722094538.11754-3-jefflexu@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/Kconfig    |  1 +
 fs/erofs/internal.h |  3 +++
 fs/erofs/super.c    |  1 +
 fs/erofs/xattr.c    | 14 ++++++++++++++
 4 files changed, 19 insertions(+)

diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 56a99ba8ce22..f6dc961e6c2b 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -38,6 +38,7 @@ config EROFS_FS_DEBUG
 config EROFS_FS_XATTR
 	bool "EROFS extended attributes"
 	depends on EROFS_FS
+	select XXHASH
 	default y
 	help
 	  Extended attributes are name:value pairs associated with inodes by
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index fb45855cfd5d..4ff88d0dd980 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -151,6 +151,7 @@ struct erofs_sb_info {
 	u32 xattr_prefix_start;
 	u8 xattr_prefix_count;
 	struct erofs_xattr_prefix_item *xattr_prefixes;
+	unsigned int xattr_filter_reserved;
 #endif
 	u16 device_id_mask;	/* valid bits of device id to be used */
 
@@ -251,6 +252,7 @@ EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
 EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE)
 EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES)
 EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
+EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
 
 /* atomic flag definitions */
 #define EROFS_I_EA_INITED_BIT	0
@@ -270,6 +272,7 @@ struct erofs_inode {
 	unsigned char inode_isize;
 	unsigned int xattr_isize;
 
+	unsigned int xattr_name_filter;
 	unsigned int xattr_shared_count;
 	unsigned int *xattr_shared_xattrs;
 
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 3275505059cc..3d04cc33a6b8 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -391,6 +391,7 @@ static int erofs_read_superblock(struct super_block *sb)
 	sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
 	sbi->xattr_prefix_start = le32_to_cpu(dsb->xattr_prefix_start);
 	sbi->xattr_prefix_count = dsb->xattr_prefix_count;
+	sbi->xattr_filter_reserved = dsb->xattr_filter_reserved;
 #endif
 	sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
 	sbi->root_nid = le16_to_cpu(dsb->root_nid);
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index 40178b6e0688..09d341675e89 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2021-2022, Alibaba Cloud
  */
 #include <linux/security.h>
+#include <linux/xxhash.h>
 #include "xattr.h"
 
 struct erofs_xattr_iter {
@@ -87,6 +88,7 @@ static int erofs_init_inode_xattrs(struct inode *inode)
 	}
 
 	ih = it.kaddr + erofs_blkoff(sb, it.pos);
+	vi->xattr_name_filter = le32_to_cpu(ih->h_name_filter);
 	vi->xattr_shared_count = ih->h_shared_count;
 	vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count,
 						sizeof(uint), GFP_KERNEL);
@@ -392,7 +394,10 @@ int erofs_getxattr(struct inode *inode, int index, const char *name,
 		   void *buffer, size_t buffer_size)
 {
 	int ret;
+	unsigned int hashbit;
 	struct erofs_xattr_iter it;
+	struct erofs_inode *vi = EROFS_I(inode);
+	struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
 
 	if (!name)
 		return -EINVAL;
@@ -401,6 +406,15 @@ int erofs_getxattr(struct inode *inode, int index, const char *name,
 	if (ret)
 		return ret;
 
+	/* reserved flag is non-zero if there's any change of on-disk format */
+	if (erofs_sb_has_xattr_filter(sbi) && !sbi->xattr_filter_reserved) {
+		hashbit = xxh32(name, strlen(name),
+				EROFS_XATTR_FILTER_SEED + index);
+		hashbit &= EROFS_XATTR_FILTER_BITS - 1;
+		if (vi->xattr_name_filter & (1U << hashbit))
+			return -ENOATTR;
+	}
+
 	it.index = index;
 	it.name = (struct qstr)QSTR_INIT(name, strlen(name));
 	if (it.name.len > EROFS_NAME_LEN)

From b3d46e11fec0c5a8972e5061bb1462119ae5736d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 7 Aug 2023 10:43:58 -0700
Subject: [PATCH 0981/2047] selftests/harness: Actually report SKIP for signal
 tests

Tests that were expecting a signal were not correctly checking for a
SKIP condition. Move the check before the signal checking when
processing test result.

Cc: Shuah Khan <shuah@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Will Drewry <wad@chromium.org>
Cc: linux-kselftest@vger.kernel.org
Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP")
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/kselftest_harness.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 5fd49ad0c696..e05ac8261046 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -938,7 +938,11 @@ void __wait_for_test(struct __test_metadata *t)
 		fprintf(TH_LOG_STREAM,
 			"# %s: Test terminated by timeout\n", t->name);
 	} else if (WIFEXITED(status)) {
-		if (t->termsig != -1) {
+		if (WEXITSTATUS(status) == 255) {
+			/* SKIP */
+			t->passed = 1;
+			t->skip = 1;
+		} else if (t->termsig != -1) {
 			t->passed = 0;
 			fprintf(TH_LOG_STREAM,
 				"# %s: Test exited normally instead of by signal (code: %d)\n",
@@ -950,11 +954,6 @@ void __wait_for_test(struct __test_metadata *t)
 			case 0:
 				t->passed = 1;
 				break;
-			/* SKIP */
-			case 255:
-				t->passed = 1;
-				t->skip = 1;
-				break;
 			/* Other failure, assume step report. */
 			default:
 				t->passed = 0;

From 2e3f65ccfe6b0778b261ad69c9603ae85f210334 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 7 Aug 2023 09:41:19 -0700
Subject: [PATCH 0982/2047] gcc-plugins: Rename last_stmt() for GCC 14+

In GCC 14, last_stmt() was renamed to last_nondebug_stmt(). Add a helper
macro to handle the renaming.

Cc: linux-hardening@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 scripts/gcc-plugins/gcc-common.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h
index 84c730da36dd..1ae39b9f4a95 100644
--- a/scripts/gcc-plugins/gcc-common.h
+++ b/scripts/gcc-plugins/gcc-common.h
@@ -440,4 +440,8 @@ static inline void debug_gimple_stmt(const_gimple s)
 #define SET_DECL_MODE(decl, mode)	DECL_MODE(decl) = (mode)
 #endif
 
+#if BUILDING_GCC_VERSION >= 14000
+#define last_stmt(x)			last_nondebug_stmt(x)
+#endif
+
 #endif

From d3dec5bb61ce113cc84146bf4a72f752256d3767 Mon Sep 17 00:00:00 2001
From: Meng Li <li.meng@amd.com>
Date: Thu, 3 Aug 2023 09:43:54 +0800
Subject: [PATCH 0983/2047] cpufreq: amd-pstate-ut: Modify the function to get
 the highest_perf value

The previous function amd_get_highest_perf() will be deprecated.
It can only return 166 or 255 by cpuinfo. For platforms that
support preferred core, the value of highest perf can be between
166 and 255. Therefore, it will cause amd-pstate-ut to fail when
run amd_pstate_ut_check_perf().

Signed-off-by: Meng Li <li.meng@amd.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/amd-pstate-ut.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
index 7f3fe2048981..9c889a4a0177 100644
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -145,8 +145,6 @@ static void amd_pstate_ut_check_perf(u32 index)
 	struct cpufreq_policy *policy = NULL;
 	struct amd_cpudata *cpudata = NULL;
 
-	highest_perf = amd_get_highest_perf();
-
 	for_each_possible_cpu(cpu) {
 		policy = cpufreq_cpu_get(cpu);
 		if (!policy)
@@ -161,6 +159,7 @@ static void amd_pstate_ut_check_perf(u32 index)
 				return;
 			}
 
+			highest_perf = cppc_perf.highest_perf;
 			nominal_perf = cppc_perf.nominal_perf;
 			lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
 			lowest_perf = cppc_perf.lowest_perf;
@@ -172,6 +171,7 @@ static void amd_pstate_ut_check_perf(u32 index)
 				return;
 			}
 
+			highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
 			nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
 			lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1);
 			lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);

From 97ebfdb7ad089d5213f6c75fdc48173fe0f14439 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 7 Aug 2023 15:38:37 -0400
Subject: [PATCH 0984/2047] ubifs: have ubifs_update_time use
 inode_update_timestamps

In later patches, we're going to drop the "now" parameter from the
update_time operation. Prepare ubifs for this, by having it use the new
inode_update_timestamps helper.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230807-mgctime-v7-6-d1dec143a704@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ubifs/file.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index df9086b19cd0..81d44a1b2414 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1397,13 +1397,7 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time,
 		return err;
 
 	mutex_lock(&ui->ui_mutex);
-	if (flags & S_ATIME)
-		inode->i_atime = *time;
-	if (flags & S_CTIME)
-		inode_set_ctime_to_ts(inode, *time);
-	if (flags & S_MTIME)
-		inode->i_mtime = *time;
-
+	inode_update_timestamps(inode, flags);
 	release = ui->dirty;
 	__mark_inode_dirty(inode, I_DIRTY_SYNC);
 	mutex_unlock(&ui->ui_mutex);

From 93e6c30435443bad38414b745845fb1d3fa6963e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Thu, 10 Aug 2023 09:12:04 -0400
Subject: [PATCH 0985/2047] fat: remove i_version handling from fat_update_time

commit 6bb885ecd746 (fat: add functions to update and truncate
timestamps appropriately") added an update_time routine for fat. That
patch added a section for handling the S_VERSION bit, even though FAT
doesn't enable SB_I_VERSION and the S_VERSION bit will never be set when
calling it.

Remove the section for handling S_VERSION since it's effectively dead
code, and will be problematic vs. future changes.

Cc: Frank Sorenson <sorenson@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Frank Sorenson <sorenson@redhat.com>
Message-Id: <20230810-ctime-fat-v1-1-327598fd1de8@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fat/misc.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 67006ea08db6..620436777f98 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -354,9 +354,6 @@ int fat_update_time(struct inode *inode, struct timespec64 *now, int flags)
 			dirty_flags |= I_DIRTY_SYNC;
 	}
 
-	if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
-		dirty_flags |= I_DIRTY_SYNC;
-
 	__mark_inode_dirty(inode, dirty_flags);
 	return 0;
 }

From 6f4aaee3faa84f00d38bfaba88a9f75d8c78e7e0 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Thu, 10 Aug 2023 09:12:05 -0400
Subject: [PATCH 0986/2047] fat: make fat_update_time get its own timestamp

In later patches, we're going to drop the "now" parameter from the
update_time operation. Fix fat_update_time to fetch its own timestamp.
It turns out that this is easily done by just passing a NULL timestamp
pointer to fat_truncate_time.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Frank Sorenson <sorenson@redhat.com>
Message-Id: <20230810-ctime-fat-v1-2-327598fd1de8@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fat/misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 620436777f98..e783792c2fc3 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -347,7 +347,7 @@ int fat_update_time(struct inode *inode, struct timespec64 *now, int flags)
 		return 0;
 
 	if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
-		fat_truncate_time(inode, now, flags);
+		fat_truncate_time(inode, NULL, flags);
 		if (inode->i_sb->s_flags & SB_LAZYTIME)
 			dirty_flags |= I_DIRTY_TIME;
 		else

From 51b0f3ebdbbf440b2d04f24cff2ad8b39d11842d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 7 Aug 2023 15:38:38 -0400
Subject: [PATCH 0987/2047] xfs: have xfs_vn_update_time gets its own timestamp

In later patches we're going to drop the "now" parameter from the
update_time operation. Prepare XFS for this by reworking how it fetches
timestamps and sets them in the inode. Ensure that we update the ctime
even if only S_MTIME is set.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Acked-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230807-mgctime-v7-7-d1dec143a704@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/xfs_iops.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 731f45391baa..549ad1fb4d65 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1029,7 +1029,7 @@ xfs_vn_setattr(
 STATIC int
 xfs_vn_update_time(
 	struct inode		*inode,
-	struct timespec64	*now,
+	struct timespec64	*time,
 	int			flags)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
@@ -1037,6 +1037,7 @@ xfs_vn_update_time(
 	int			log_flags = XFS_ILOG_TIMESTAMP;
 	struct xfs_trans	*tp;
 	int			error;
+	struct timespec64	now;
 
 	trace_xfs_update_time(ip);
 
@@ -1056,12 +1057,15 @@ xfs_vn_update_time(
 		return error;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	if (flags & S_CTIME)
-		inode_set_ctime_to_ts(inode, *now);
+	if (flags & (S_CTIME|S_MTIME))
+		now = inode_set_ctime_current(inode);
+	else
+		now = current_time(inode);
+
 	if (flags & S_MTIME)
-		inode->i_mtime = *now;
+		inode->i_mtime = now;
 	if (flags & S_ATIME)
-		inode->i_atime = *now;
+		inode->i_atime = now;
 
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 	xfs_trans_log_inode(tp, ip, log_flags);

From 913e99287b98fd051ac1976140a2764a8ef9dfbf Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 7 Aug 2023 15:38:39 -0400
Subject: [PATCH 0988/2047] fs: drop the timespec64 argument from update_time

Now that all of the update_time operations are prepared for it, we can
drop the timespec64 argument from the update_time operation. Do that and
remove it from some associated functions like inode_update_time and
inode_needs_update_time.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230807-mgctime-v7-8-d1dec143a704@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/bad_inode.c                |  3 +--
 fs/btrfs/inode.c              |  3 +--
 fs/btrfs/volumes.c            |  4 +---
 fs/fat/fat.h                  |  3 +--
 fs/fat/misc.c                 |  2 +-
 fs/gfs2/inode.c               |  3 +--
 fs/inode.c                    | 30 +++++++++++++-----------------
 fs/orangefs/inode.c           |  2 +-
 fs/orangefs/orangefs-kernel.h |  2 +-
 fs/overlayfs/inode.c          |  2 +-
 fs/overlayfs/overlayfs.h      |  2 +-
 fs/ubifs/file.c               |  3 +--
 fs/ubifs/ubifs.h              |  2 +-
 fs/xfs/xfs_iops.c             |  1 -
 include/linux/fs.h            |  4 ++--
 15 files changed, 27 insertions(+), 39 deletions(-)

diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 6e21f7412a85..83f9566c973b 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -133,8 +133,7 @@ static int bad_inode_fiemap(struct inode *inode,
 	return -EIO;
 }
 
-static int bad_inode_update_time(struct inode *inode, struct timespec64 *time,
-				 int flags)
+static int bad_inode_update_time(struct inode *inode, int flags)
 {
 	return -EIO;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d52e7d64570a..0964c66411a1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6059,8 +6059,7 @@ static int btrfs_dirty_inode(struct btrfs_inode *inode)
  * This is a copy of file_update_time.  We need this so we can return error on
  * ENOSPC for updating the inode in the case of file write and mmap writes.
  */
-static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
-			     int flags)
+static int btrfs_update_time(struct inode *inode, int flags)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	bool dirty = flags & ~S_VERSION;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 73f9ea7672db..264c71590370 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1917,15 +1917,13 @@ out:
 static void update_dev_time(const char *device_path)
 {
 	struct path path;
-	struct timespec64 now;
 	int ret;
 
 	ret = kern_path(device_path, LOOKUP_FOLLOW, &path);
 	if (ret)
 		return;
 
-	now = current_time(d_inode(path.dentry));
-	inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME | S_VERSION);
+	inode_update_time(d_inode(path.dentry), S_MTIME | S_CTIME | S_VERSION);
 	path_put(&path);
 }
 
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index e3b690b48e3e..66cf4778cf3b 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -460,8 +460,7 @@ extern struct timespec64 fat_truncate_mtime(const struct msdos_sb_info *sbi,
 					    const struct timespec64 *ts);
 extern int fat_truncate_time(struct inode *inode, struct timespec64 *now,
 			     int flags);
-extern int fat_update_time(struct inode *inode, struct timespec64 *now,
-			   int flags);
+extern int fat_update_time(struct inode *inode, int flags);
 extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
 
 int fat_cache_init(void);
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index e783792c2fc3..f2304a1054aa 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -339,7 +339,7 @@ int fat_truncate_time(struct inode *inode, struct timespec64 *now, int flags)
 }
 EXPORT_SYMBOL_GPL(fat_truncate_time);
 
-int fat_update_time(struct inode *inode, struct timespec64 *now, int flags)
+int fat_update_time(struct inode *inode, int flags)
 {
 	int dirty_flags = 0;
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index f1f04557aa21..a21ac41d6669 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -2139,8 +2139,7 @@ loff_t gfs2_seek_hole(struct file *file, loff_t offset)
 	return vfs_setpos(file, ret, inode->i_sb->s_maxbytes);
 }
 
-static int gfs2_update_time(struct inode *inode, struct timespec64 *time,
-			    int flags)
+static int gfs2_update_time(struct inode *inode, int flags)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_glock *gl = ip->i_gl;
diff --git a/fs/inode.c b/fs/inode.c
index e07e45f6cd01..e50d94a136fe 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1958,10 +1958,10 @@ EXPORT_SYMBOL(generic_update_time);
  * This does the actual work of updating an inodes time or version.  Must have
  * had called mnt_want_write() before calling this.
  */
-int inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
+int inode_update_time(struct inode *inode, int flags)
 {
 	if (inode->i_op->update_time)
-		return inode->i_op->update_time(inode, time, flags);
+		return inode->i_op->update_time(inode, flags);
 	generic_update_time(inode, flags);
 	return 0;
 }
@@ -2015,7 +2015,6 @@ void touch_atime(const struct path *path)
 {
 	struct vfsmount *mnt = path->mnt;
 	struct inode *inode = d_inode(path->dentry);
-	struct timespec64 now;
 
 	if (!atime_needs_update(path, inode))
 		return;
@@ -2034,8 +2033,7 @@ void touch_atime(const struct path *path)
 	 * We may also fail on filesystems that have the ability to make parts
 	 * of the fs read only, e.g. subvolumes in Btrfs.
 	 */
-	now = current_time(inode);
-	inode_update_time(inode, &now, S_ATIME);
+	inode_update_time(inode, S_ATIME);
 	__mnt_drop_write(mnt);
 skip_update:
 	sb_end_write(inode->i_sb);
@@ -2120,20 +2118,21 @@ int file_remove_privs(struct file *file)
 }
 EXPORT_SYMBOL(file_remove_privs);
 
-static int inode_needs_update_time(struct inode *inode, struct timespec64 *now)
+static int inode_needs_update_time(struct inode *inode)
 {
 	int sync_it = 0;
+	struct timespec64 now = current_time(inode);
 	struct timespec64 ctime;
 
 	/* First try to exhaust all avenues to not sync */
 	if (IS_NOCMTIME(inode))
 		return 0;
 
-	if (!timespec64_equal(&inode->i_mtime, now))
+	if (!timespec64_equal(&inode->i_mtime, &now))
 		sync_it = S_MTIME;
 
 	ctime = inode_get_ctime(inode);
-	if (!timespec64_equal(&ctime, now))
+	if (!timespec64_equal(&ctime, &now))
 		sync_it |= S_CTIME;
 
 	if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
@@ -2142,15 +2141,14 @@ static int inode_needs_update_time(struct inode *inode, struct timespec64 *now)
 	return sync_it;
 }
 
-static int __file_update_time(struct file *file, struct timespec64 *now,
-			int sync_mode)
+static int __file_update_time(struct file *file, int sync_mode)
 {
 	int ret = 0;
 	struct inode *inode = file_inode(file);
 
 	/* try to update time settings */
 	if (!__mnt_want_write_file(file)) {
-		ret = inode_update_time(inode, now, sync_mode);
+		ret = inode_update_time(inode, sync_mode);
 		__mnt_drop_write_file(file);
 	}
 
@@ -2175,13 +2173,12 @@ int file_update_time(struct file *file)
 {
 	int ret;
 	struct inode *inode = file_inode(file);
-	struct timespec64 now = current_time(inode);
 
-	ret = inode_needs_update_time(inode, &now);
+	ret = inode_needs_update_time(inode);
 	if (ret <= 0)
 		return ret;
 
-	return __file_update_time(file, &now, ret);
+	return __file_update_time(file, ret);
 }
 EXPORT_SYMBOL(file_update_time);
 
@@ -2204,7 +2201,6 @@ static int file_modified_flags(struct file *file, int flags)
 {
 	int ret;
 	struct inode *inode = file_inode(file);
-	struct timespec64 now = current_time(inode);
 
 	/*
 	 * Clear the security bits if the process is not being run by root.
@@ -2217,13 +2213,13 @@ static int file_modified_flags(struct file *file, int flags)
 	if (unlikely(file->f_mode & FMODE_NOCMTIME))
 		return 0;
 
-	ret = inode_needs_update_time(inode, &now);
+	ret = inode_needs_update_time(inode);
 	if (ret <= 0)
 		return ret;
 	if (flags & IOCB_NOWAIT)
 		return -EAGAIN;
 
-	return __file_update_time(file, &now, ret);
+	return __file_update_time(file, ret);
 }
 
 /**
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 3afa2a69bc63..085912268442 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -900,7 +900,7 @@ int orangefs_permission(struct mnt_idmap *idmap,
 	return generic_permission(&nop_mnt_idmap, inode, mask);
 }
 
-int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags)
+int orangefs_update_time(struct inode *inode, int flags)
 {
 	struct iattr iattr;
 
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index ce20d3443869..b711654ca18a 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -370,7 +370,7 @@ int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path,
 int orangefs_permission(struct mnt_idmap *idmap,
 			struct inode *inode, int mask);
 
-int orangefs_update_time(struct inode *, struct timespec64 *, int);
+int orangefs_update_time(struct inode *, int);
 
 /*
  * defined in xattr.c
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index a63e57447be9..f22e27b78025 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -693,7 +693,7 @@ int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 }
 #endif
 
-int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
+int ovl_update_time(struct inode *inode, int flags)
 {
 	if (flags & S_ATIME) {
 		struct ovl_fs *ofs = inode->i_sb->s_fs_info;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 9402591f12aa..8bbe6173bef4 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -665,7 +665,7 @@ static inline struct posix_acl *ovl_get_acl_path(const struct path *path,
 }
 #endif
 
-int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
+int ovl_update_time(struct inode *inode, int flags);
 bool ovl_is_private_xattr(struct super_block *sb, const char *name);
 
 struct ovl_inode_params {
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 81d44a1b2414..e5382f0b2587 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1378,8 +1378,7 @@ static inline int mctime_update_needed(const struct inode *inode,
  *
  * This function updates time of the inode.
  */
-int ubifs_update_time(struct inode *inode, struct timespec64 *time,
-			     int flags)
+int ubifs_update_time(struct inode *inode, int flags)
 {
 	struct ubifs_inode *ui = ubifs_inode(inode);
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 4c36044140e7..ebb3ad6b5e7e 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -2027,7 +2027,7 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc);
 int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
 int ubifs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 		  struct iattr *attr);
-int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags);
+int ubifs_update_time(struct inode *inode, int flags);
 
 /* dir.c */
 struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 549ad1fb4d65..1c1e6171209d 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1029,7 +1029,6 @@ xfs_vn_setattr(
 STATIC int
 xfs_vn_update_time(
 	struct inode		*inode,
-	struct timespec64	*time,
 	int			flags)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bb3c2c4f871f..a83313f90fe3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1887,7 +1887,7 @@ struct inode_operations {
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
-	int (*update_time)(struct inode *, struct timespec64 *, int);
+	int (*update_time)(struct inode *, int);
 	int (*atomic_open)(struct inode *, struct dentry *,
 			   struct file *, unsigned open_flag,
 			   umode_t create_mode);
@@ -2237,7 +2237,7 @@ enum file_time_flags {
 
 extern bool atime_needs_update(const struct path *, struct inode *);
 extern void touch_atime(const struct path *);
-int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
+int inode_update_time(struct inode *inode, int flags);
 
 static inline void file_accessed(struct file *file)
 {

From ffb6cf19e06334062744b7e3493f71e500964f8e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 7 Aug 2023 15:38:40 -0400
Subject: [PATCH 0989/2047] fs: add infrastructure for multigrain timestamps

The VFS always uses coarse-grained timestamps when updating the ctime
and mtime after a change. This has the benefit of allowing filesystems
to optimize away a lot metadata updates, down to around 1 per jiffy,
even when a file is under heavy writes.

Unfortunately, this has always been an issue when we're exporting via
NFSv3, which relies on timestamps to validate caches. A lot of changes
can happen in a jiffy, so timestamps aren't sufficient to help the
client decide to invalidate the cache. Even with NFSv4, a lot of
exported filesystems don't properly support a change attribute and are
subject to the same problems with timestamp granularity. Other
applications have similar issues with timestamps (e.g backup
applications).

If we were to always use fine-grained timestamps, that would improve the
situation, but that becomes rather expensive, as the underlying
filesystem would have to log a lot more metadata updates.

What we need is a way to only use fine-grained timestamps when they are
being actively queried.

POSIX generally mandates that when the the mtime changes, the ctime must
also change. The kernel always stores normalized ctime values, so only
the first 30 bits of the tv_nsec field are ever used.

Use the 31st bit of the ctime tv_nsec field to indicate that something
has queried the inode for the mtime or ctime. When this flag is set,
on the next mtime or ctime update, the kernel will fetch a fine-grained
timestamp instead of the usual coarse-grained one.

Filesytems can opt into this behavior by setting the FS_MGTIME flag in
the fstype. Filesystems that don't set this flag will continue to use
coarse-grained timestamps.

Later patches will convert individual filesystems to use the new
infrastructure.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230807-mgctime-v7-9-d1dec143a704@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/inode.c         | 82 ++++++++++++++++++++++++++++++++++++++++++++--
 fs/stat.c          | 41 +++++++++++++++++++++--
 include/linux/fs.h | 46 ++++++++++++++++++++++++--
 3 files changed, 162 insertions(+), 7 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index e50d94a136fe..f55957ac80e6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2118,10 +2118,52 @@ int file_remove_privs(struct file *file)
 }
 EXPORT_SYMBOL(file_remove_privs);
 
+/**
+ * current_mgtime - Return FS time (possibly fine-grained)
+ * @inode: inode.
+ *
+ * Return the current time truncated to the time granularity supported by
+ * the fs, as suitable for a ctime/mtime change. If the ctime is flagged
+ * as having been QUERIED, get a fine-grained timestamp.
+ */
+struct timespec64 current_mgtime(struct inode *inode)
+{
+	struct timespec64 now, ctime;
+	atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec;
+	long nsec = atomic_long_read(pnsec);
+
+	if (nsec & I_CTIME_QUERIED) {
+		ktime_get_real_ts64(&now);
+		return timestamp_truncate(now, inode);
+	}
+
+	ktime_get_coarse_real_ts64(&now);
+	now = timestamp_truncate(now, inode);
+
+	/*
+	 * If we've recently fetched a fine-grained timestamp
+	 * then the coarse-grained one may still be earlier than the
+	 * existing ctime. Just keep the existing value if so.
+	 */
+	ctime = inode_get_ctime(inode);
+	if (timespec64_compare(&ctime, &now) > 0)
+		now = ctime;
+
+	return now;
+}
+EXPORT_SYMBOL(current_mgtime);
+
+static struct timespec64 current_ctime(struct inode *inode)
+{
+	if (is_mgtime(inode))
+		return current_mgtime(inode);
+	return current_time(inode);
+}
+
 static int inode_needs_update_time(struct inode *inode)
 {
 	int sync_it = 0;
-	struct timespec64 now = current_time(inode);
+	struct timespec64 now = current_ctime(inode);
 	struct timespec64 ctime;
 
 	/* First try to exhaust all avenues to not sync */
@@ -2552,9 +2594,43 @@ EXPORT_SYMBOL(current_time);
  */
 struct timespec64 inode_set_ctime_current(struct inode *inode)
 {
-	struct timespec64 now = current_time(inode);
+	struct timespec64 now;
+	struct timespec64 ctime;
 
-	inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
+	ctime.tv_nsec = READ_ONCE(inode->__i_ctime.tv_nsec);
+	if (!(ctime.tv_nsec & I_CTIME_QUERIED)) {
+		now = current_time(inode);
+
+		/* Just copy it into place if it's not multigrain */
+		if (!is_mgtime(inode)) {
+			inode_set_ctime_to_ts(inode, now);
+			return now;
+		}
+
+		/*
+		 * If we've recently updated with a fine-grained timestamp,
+		 * then the coarse-grained one may still be earlier than the
+		 * existing ctime. Just keep the existing value if so.
+		 */
+		ctime.tv_sec = inode->__i_ctime.tv_sec;
+		if (timespec64_compare(&ctime, &now) > 0)
+			return ctime;
+
+		/*
+		 * Ctime updates are usually protected by the inode_lock, but
+		 * we can still race with someone setting the QUERIED flag.
+		 * Try to swap the new nsec value into place. If it's changed
+		 * in the interim, then just go with a fine-grained timestamp.
+		 */
+		if (cmpxchg(&inode->__i_ctime.tv_nsec, ctime.tv_nsec,
+			    now.tv_nsec) != ctime.tv_nsec)
+			goto fine_grained;
+		inode->__i_ctime.tv_sec = now.tv_sec;
+		return now;
+	}
+fine_grained:
+	ktime_get_real_ts64(&now);
+	inode_set_ctime_to_ts(inode, timestamp_truncate(now, inode));
 	return now;
 }
 EXPORT_SYMBOL(inode_set_ctime_current);
diff --git a/fs/stat.c b/fs/stat.c
index 7644e5997035..136711ae72fb 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -26,6 +26,37 @@
 #include "internal.h"
 #include "mount.h"
 
+/**
+ * fill_mg_cmtime - Fill in the mtime and ctime and flag ctime as QUERIED
+ * @stat: where to store the resulting values
+ * @request_mask: STATX_* values requested
+ * @inode: inode from which to grab the c/mtime
+ *
+ * Given @inode, grab the ctime and mtime out if it and store the result
+ * in @stat. When fetching the value, flag it as queried so the next write
+ * will use a fine-grained timestamp.
+ */
+void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode)
+{
+	atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec;
+
+	/* If neither time was requested, then don't report them */
+	if (!(request_mask & (STATX_CTIME|STATX_MTIME))) {
+		stat->result_mask &= ~(STATX_CTIME|STATX_MTIME);
+		return;
+	}
+
+	stat->mtime = inode->i_mtime;
+	stat->ctime.tv_sec = inode->__i_ctime.tv_sec;
+	/*
+	 * Atomically set the QUERIED flag and fetch the new value with
+	 * the flag masked off.
+	 */
+	stat->ctime.tv_nsec = atomic_long_fetch_or(I_CTIME_QUERIED, pnsec) &
+					~I_CTIME_QUERIED;
+}
+EXPORT_SYMBOL(fill_mg_cmtime);
+
 /**
  * generic_fillattr - Fill in the basic attributes from the inode struct
  * @idmap:		idmap of the mount the inode was found from
@@ -58,8 +89,14 @@ void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask,
 	stat->rdev = inode->i_rdev;
 	stat->size = i_size_read(inode);
 	stat->atime = inode->i_atime;
-	stat->mtime = inode->i_mtime;
-	stat->ctime = inode_get_ctime(inode);
+
+	if (is_mgtime(inode)) {
+		fill_mg_cmtime(stat, request_mask, inode);
+	} else {
+		stat->mtime = inode->i_mtime;
+		stat->ctime = inode_get_ctime(inode);
+	}
+
 	stat->blksize = i_blocksize(inode);
 	stat->blocks = inode->i_blocks;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a83313f90fe3..455835d0e963 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1474,18 +1474,47 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
 	       kgid_has_mapping(fs_userns, kgid);
 }
 
+struct timespec64 current_mgtime(struct inode *inode);
 struct timespec64 current_time(struct inode *inode);
 struct timespec64 inode_set_ctime_current(struct inode *inode);
 
+/*
+ * Multigrain timestamps
+ *
+ * Conditionally use fine-grained ctime and mtime timestamps when there
+ * are users actively observing them via getattr. The primary use-case
+ * for this is NFS clients that use the ctime to distinguish between
+ * different states of the file, and that are often fooled by multiple
+ * operations that occur in the same coarse-grained timer tick.
+ *
+ * The kernel always keeps normalized struct timespec64 values in the ctime,
+ * which means that only the first 30 bits of the value are used. Use the
+ * 31st bit of the ctime's tv_nsec field as a flag to indicate that the value
+ * has been queried since it was last updated.
+ */
+#define I_CTIME_QUERIED		(1L<<30)
+
 /**
  * inode_get_ctime - fetch the current ctime from the inode
  * @inode: inode from which to fetch ctime
  *
- * Grab the current ctime from the inode and return it.
+ * Grab the current ctime tv_nsec field from the inode, mask off the
+ * I_CTIME_QUERIED flag and return it. This is mostly intended for use by
+ * internal consumers of the ctime that aren't concerned with ensuring a
+ * fine-grained update on the next change (e.g. when preparing to store
+ * the value in the backing store for later retrieval).
+ *
+ * This is safe to call regardless of whether the underlying filesystem
+ * is using multigrain timestamps.
  */
 static inline struct timespec64 inode_get_ctime(const struct inode *inode)
 {
-	return inode->__i_ctime;
+	struct timespec64 ctime;
+
+	ctime.tv_sec = inode->__i_ctime.tv_sec;
+	ctime.tv_nsec = inode->__i_ctime.tv_nsec & ~I_CTIME_QUERIED;
+
+	return ctime;
 }
 
 /**
@@ -2259,6 +2288,7 @@ struct file_system_type {
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_DISALLOW_NOTIFY_PERM	16	/* Disable fanotify permission events */
 #define FS_ALLOW_IDMAP         32      /* FS has been updated to handle vfs idmappings. */
+#define FS_MGTIME		64	/* FS uses multigrain timestamps */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	int (*init_fs_context)(struct fs_context *);
 	const struct fs_parameter_spec *parameters;
@@ -2282,6 +2312,17 @@ struct file_system_type {
 
 #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
 
+/**
+ * is_mgtime: is this inode using multigrain timestamps
+ * @inode: inode to test for multigrain timestamps
+ *
+ * Return true if the inode uses multigrain timestamps, false otherwise.
+ */
+static inline bool is_mgtime(const struct inode *inode)
+{
+	return inode->i_sb->s_type->fs_flags & FS_MGTIME;
+}
+
 extern struct dentry *mount_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int));
@@ -2918,6 +2959,7 @@ extern void page_put_link(void *);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_link(void *);
+void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode);
 void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
 void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
 extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);

From d48c3397291690c3576d6c983b0a86ecbc203cac Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 7 Aug 2023 15:38:41 -0400
Subject: [PATCH 0990/2047] tmpfs: add support for multigrain timestamps

Enable multigrain timestamps, which should ensure that there is an
apparent change to the timestamp whenever it has been written after
being actively observed via getattr.

tmpfs only requires the FS_MGTIME flag.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230807-mgctime-v7-10-d1dec143a704@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 mm/shmem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 142ead70e8c1..98cc4be7a8a8 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -4220,7 +4220,7 @@ static struct file_system_type shmem_fs_type = {
 #endif
 	.kill_sb	= kill_litter_super,
 #ifdef CONFIG_SHMEM
-	.fs_flags	= FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
+	.fs_flags	= FS_USERNS_MOUNT | FS_ALLOW_IDMAP | FS_MGTIME,
 #else
 	.fs_flags	= FS_USERNS_MOUNT,
 #endif

From e44df2664746aed8b6dd5245eb711a0ce33c5cf5 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 7 Aug 2023 15:38:42 -0400
Subject: [PATCH 0991/2047] xfs: switch to multigrain timestamps

Enable multigrain timestamps, which should ensure that there is an
apparent change to the timestamp whenever it has been written after
being actively observed via getattr.

Also, anytime the mtime changes, the ctime must also change, and those
are now the only two options for xfs_trans_ichgtime. Have that function
unconditionally bump the ctime, and ASSERT that XFS_ICHGTIME_CHG is
always set.

Acked-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230807-mgctime-v7-11-d1dec143a704@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/libxfs/xfs_trans_inode.c | 6 +++---
 fs/xfs/xfs_iops.c               | 6 +++---
 fs/xfs/xfs_super.c              | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 6b2296ff248a..ad22656376d3 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -62,12 +62,12 @@ xfs_trans_ichgtime(
 	ASSERT(tp);
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
-	tv = current_time(inode);
+	/* If the mtime changes, then ctime must also change */
+	ASSERT(flags & XFS_ICHGTIME_CHG);
 
+	tv = inode_set_ctime_current(inode);
 	if (flags & XFS_ICHGTIME_MOD)
 		inode->i_mtime = tv;
-	if (flags & XFS_ICHGTIME_CHG)
-		inode_set_ctime_to_ts(inode, tv);
 	if (flags & XFS_ICHGTIME_CREATE)
 		ip->i_crtime = tv;
 }
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1c1e6171209d..2ededd3f6b8c 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -573,10 +573,10 @@ xfs_vn_getattr(
 	stat->gid = vfsgid_into_kgid(vfsgid);
 	stat->ino = ip->i_ino;
 	stat->atime = inode->i_atime;
-	stat->mtime = inode->i_mtime;
-	stat->ctime = inode_get_ctime(inode);
 	stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
 
+	fill_mg_cmtime(stat, request_mask, inode);
+
 	if (xfs_has_v3inodes(mp)) {
 		if (request_mask & STATX_BTIME) {
 			stat->result_mask |= STATX_BTIME;
@@ -917,7 +917,7 @@ xfs_setattr_size(
 	if (newsize != oldsize &&
 	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
 		iattr->ia_ctime = iattr->ia_mtime =
-			current_time(inode);
+			current_mgtime(inode);
 		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
 	}
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 818510243130..4b10edb2c972 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -2009,7 +2009,7 @@ static struct file_system_type xfs_fs_type = {
 	.init_fs_context	= xfs_init_fs_context,
 	.parameters		= xfs_fs_parameters,
 	.kill_sb		= kill_block_super,
-	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
 };
 MODULE_ALIAS_FS("xfs");
 

From 0269b585868e59b6a2ecc6ea685d39310e4fc18b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 7 Aug 2023 15:38:43 -0400
Subject: [PATCH 0992/2047] ext4: switch to multigrain timestamps

Enable multigrain timestamps, which should ensure that there is an
apparent change to the timestamp whenever it has been written after
being actively observed via getattr.

For ext4, we only need to enable the FS_MGTIME flag.

Acked-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230807-mgctime-v7-12-d1dec143a704@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ext4/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b54c70e1a74e..cb1ff47af156 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -7279,7 +7279,7 @@ static struct file_system_type ext4_fs_type = {
 	.init_fs_context	= ext4_init_fs_context,
 	.parameters		= ext4_param_specs,
 	.kill_sb		= kill_block_super,
-	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
 };
 MODULE_ALIAS_FS("ext4");
 

From 50e9ceef1d4f644ee0049e82e360058a64ec284c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 7 Aug 2023 15:38:44 -0400
Subject: [PATCH 0993/2047] btrfs: convert to multigrain timestamps

Enable multigrain timestamps, which should ensure that there is an
apparent change to the timestamp whenever it has been written after
being actively observed via getattr.

Beyond enabling the FS_MGTIME flag, this patch eliminates
update_time_for_write, which goes to great pains to avoid in-memory
stores. Just have it overwrite the timestamps unconditionally.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Acked-by: David Sterba <dsterba@suse.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230807-mgctime-v7-13-d1dec143a704@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/btrfs/file.c  | 24 ++++--------------------
 fs/btrfs/super.c |  5 +++--
 2 files changed, 7 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d7a9ece7a40b..b9e75c9f95ac 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1106,25 +1106,6 @@ void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
 	btrfs_drew_write_unlock(&inode->root->snapshot_lock);
 }
 
-static void update_time_for_write(struct inode *inode)
-{
-	struct timespec64 now, ctime;
-
-	if (IS_NOCMTIME(inode))
-		return;
-
-	now = current_time(inode);
-	if (!timespec64_equal(&inode->i_mtime, &now))
-		inode->i_mtime = now;
-
-	ctime = inode_get_ctime(inode);
-	if (!timespec64_equal(&ctime, &now))
-		inode_set_ctime_to_ts(inode, now);
-
-	if (IS_I_VERSION(inode))
-		inode_inc_iversion(inode);
-}
-
 static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
 			     size_t count)
 {
@@ -1156,7 +1137,10 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
 	 * need to start yet another transaction to update the inode as we will
 	 * update the inode when we finish writing whatever data we write.
 	 */
-	update_time_for_write(inode);
+	if (!IS_NOCMTIME(inode)) {
+		inode->i_mtime = inode_set_ctime_current(inode);
+		inode_inc_iversion(inode);
+	}
 
 	start_pos = round_down(pos, fs_info->sectorsize);
 	oldsize = i_size_read(inode);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f1dd172d8d5b..8eda51b095c9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2144,7 +2144,7 @@ static struct file_system_type btrfs_fs_type = {
 	.name		= "btrfs",
 	.mount		= btrfs_mount,
 	.kill_sb	= btrfs_kill_super,
-	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_MGTIME,
 };
 
 static struct file_system_type btrfs_root_fs_type = {
@@ -2152,7 +2152,8 @@ static struct file_system_type btrfs_root_fs_type = {
 	.name		= "btrfs",
 	.mount		= btrfs_mount_root,
 	.kill_sb	= btrfs_kill_super,
-	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP,
+	.fs_flags	= FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA |
+			  FS_ALLOW_IDMAP | FS_MGTIME,
 };
 
 MODULE_ALIAS_FS("btrfs");

From 3d757ddbd68ce8d464cc7a45521abc33e6249af1 Mon Sep 17 00:00:00 2001
From: Chevron Li <chevron.li@bayhubtech.com>
Date: Fri, 11 Aug 2023 11:35:16 +0800
Subject: [PATCH 0994/2047] mmc: sdhci-pci-o2micro: add Bayhub new chip GG8
 support for UHS-I

Add Bayhub new chip GG8 support for UHS-I function

Signed-off-by: Chevron Li <chevron.li@bayhubtech.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230811033517.11532-1-chevron_li@126.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pci-core.c    |   4 +
 drivers/mmc/host/sdhci-pci-o2micro.c | 144 ++++++++++++++++++++-------
 drivers/mmc/host/sdhci-pci.h         |   4 +
 3 files changed, 118 insertions(+), 34 deletions(-)

diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 1c2572c0f012..7c14feb5db77 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -1898,6 +1898,10 @@ static const struct pci_device_id pci_ids[] = {
 	SDHCI_PCI_DEVICE(O2, SDS1,     o2),
 	SDHCI_PCI_DEVICE(O2, SEABIRD0, o2),
 	SDHCI_PCI_DEVICE(O2, SEABIRD1, o2),
+	SDHCI_PCI_DEVICE(O2, GG8_9860, o2),
+	SDHCI_PCI_DEVICE(O2, GG8_9861, o2),
+	SDHCI_PCI_DEVICE(O2, GG8_9862, o2),
+	SDHCI_PCI_DEVICE(O2, GG8_9863, o2),
 	SDHCI_PCI_DEVICE(ARASAN, PHY_EMMC, arasan),
 	SDHCI_PCI_DEVICE(SYNOPSYS, DWC_MSHC, snps),
 	SDHCI_PCI_DEVICE(GLI, 9750, gl9750),
diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c
index 620f52ad9667..ae2707fbd119 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.c
+++ b/drivers/mmc/host/sdhci-pci-o2micro.c
@@ -36,6 +36,7 @@
 #define O2_SD_INF_MOD		0xF1
 #define O2_SD_MISC_CTRL4	0xFC
 #define O2_SD_MISC_CTRL		0x1C0
+#define O2_SD_EXP_INT_REG	0x1E0
 #define O2_SD_PWR_FORCE_L0	0x0002
 #define O2_SD_TUNING_CTRL	0x300
 #define O2_SD_PLL_SETTING	0x304
@@ -49,6 +50,9 @@
 #define O2_SD_UHS2_L1_CTRL	0x35C
 #define O2_SD_FUNC_REG3		0x3E0
 #define O2_SD_FUNC_REG4		0x3E4
+#define O2_SD_PARA_SET_REG1	0x444
+#define O2_SD_VDDX_CTRL_REG	0x508
+#define O2_SD_GPIO_CTRL_REG1	0x510
 #define O2_SD_LED_ENABLE	BIT(6)
 #define O2_SD_FREG0_LEDOFF	BIT(13)
 #define O2_SD_SEL_DLL		BIT(16)
@@ -334,33 +338,45 @@ static int sdhci_o2_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	scratch |= O2_SD_PWR_FORCE_L0;
 	sdhci_writew(host, scratch, O2_SD_MISC_CTRL);
 
-	/* Stop clk */
-	reg_val = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
-	reg_val &= ~SDHCI_CLOCK_CARD_EN;
-	sdhci_writew(host, reg_val, SDHCI_CLOCK_CONTROL);
+	/* Update output phase */
+	switch (chip->pdev->device) {
+	case PCI_DEVICE_ID_O2_SDS0:
+	case PCI_DEVICE_ID_O2_SEABIRD0:
+	case PCI_DEVICE_ID_O2_SEABIRD1:
+	case PCI_DEVICE_ID_O2_SDS1:
+	case PCI_DEVICE_ID_O2_FUJIN2:
+		/* Stop clk */
+		reg_val = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+		reg_val &= ~SDHCI_CLOCK_CARD_EN;
+		sdhci_writew(host, reg_val, SDHCI_CLOCK_CONTROL);
 
-	if ((host->timing == MMC_TIMING_MMC_HS200) ||
-		(host->timing == MMC_TIMING_UHS_SDR104)) {
-		/* UnLock WP */
-		pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8);
-		scratch_8 &= 0x7f;
-		pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8);
+		if (host->timing == MMC_TIMING_MMC_HS200 ||
+		    host->timing == MMC_TIMING_UHS_SDR104) {
+			/* UnLock WP */
+			pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8);
+			scratch_8 &= 0x7f;
+			pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8);
 
-		/* Set pcr 0x354[16] to choose dll clock, and set the default phase */
-		pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &reg_val);
-		reg_val &= ~(O2_SD_SEL_DLL | O2_SD_PHASE_MASK);
-		reg_val |= (O2_SD_SEL_DLL | O2_SD_FIX_PHASE);
-		pci_write_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, reg_val);
+			/* Set pcr 0x354[16] to choose dll clock, and set the default phase */
+			pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &reg_val);
+			reg_val &= ~(O2_SD_SEL_DLL | O2_SD_PHASE_MASK);
+			reg_val |= (O2_SD_SEL_DLL | O2_SD_FIX_PHASE);
+			pci_write_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, reg_val);
 
-		/* Lock WP */
-		pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8);
-		scratch_8 |= 0x80;
-		pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8);
+			/* Lock WP */
+			pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8);
+			scratch_8 |= 0x80;
+			pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8);
+		}
+
+		/* Start clk */
+		reg_val = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+		reg_val |= SDHCI_CLOCK_CARD_EN;
+		sdhci_writew(host, reg_val, SDHCI_CLOCK_CONTROL);
+		break;
+	default:
+		break;
 	}
-	/* Start clk */
-	reg_val = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
-	reg_val |= SDHCI_CLOCK_CARD_EN;
-	sdhci_writew(host, reg_val, SDHCI_CLOCK_CONTROL);
 
 	/* wait DLL lock, timeout value 5ms */
 	if (readx_poll_timeout(sdhci_o2_pll_dll_wdt_control, host,
@@ -563,6 +579,7 @@ static void sdhci_pci_o2_set_clock(struct sdhci_host *host, unsigned int clock)
 	u16 clk;
 	u8 scratch;
 	u32 scratch_32;
+	u32 dmdn_208m, dmdn_200m;
 	struct sdhci_pci_slot *slot = sdhci_priv(host);
 	struct sdhci_pci_chip *chip = slot->chip;
 
@@ -578,16 +595,27 @@ static void sdhci_pci_o2_set_clock(struct sdhci_host *host, unsigned int clock)
 	scratch &= 0x7f;
 	pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
 
+	if (chip->pdev->device == PCI_DEVICE_ID_O2_GG8_9860 ||
+	    chip->pdev->device == PCI_DEVICE_ID_O2_GG8_9861 ||
+	    chip->pdev->device == PCI_DEVICE_ID_O2_GG8_9862 ||
+	    chip->pdev->device == PCI_DEVICE_ID_O2_GG8_9863) {
+		dmdn_208m = 0x2c500000;
+		dmdn_200m = 0x25200000;
+	} else {
+		dmdn_208m = 0x2c280000;
+		dmdn_200m = 0x25100000;
+	}
+
 	if ((host->timing == MMC_TIMING_UHS_SDR104) && (clock == 200000000)) {
 		pci_read_config_dword(chip->pdev, O2_SD_PLL_SETTING, &scratch_32);
 
-		if ((scratch_32 & 0xFFFF0000) != 0x2c280000)
-			o2_pci_set_baseclk(chip, 0x2c280000);
+		if ((scratch_32 & 0xFFFF0000) != dmdn_208m)
+			o2_pci_set_baseclk(chip, dmdn_208m);
 	} else {
 		pci_read_config_dword(chip->pdev, O2_SD_PLL_SETTING, &scratch_32);
 
-		if ((scratch_32 & 0xFFFF0000) != 0x25100000)
-			o2_pci_set_baseclk(chip, 0x25100000);
+		if ((scratch_32 & 0xFFFF0000) != dmdn_200m)
+			o2_pci_set_baseclk(chip, dmdn_200m);
 	}
 
 	pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &scratch_32);
@@ -624,6 +652,11 @@ static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot)
 	if (caps & SDHCI_CAN_DO_8BIT)
 		host->mmc->caps |= MMC_CAP_8_BIT_DATA;
 
+	host->quirks2 |= SDHCI_QUIRK2_BROKEN_DDR50;
+
+	sdhci_pci_o2_enable_msi(chip, host);
+
+	host->mmc_host_ops.execute_tuning = sdhci_o2_execute_tuning;
 	switch (chip->pdev->device) {
 	case PCI_DEVICE_ID_O2_SDS0:
 	case PCI_DEVICE_ID_O2_SEABIRD0:
@@ -634,10 +667,6 @@ static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot)
 		if (reg & 0x1)
 			host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12;
 
-		host->quirks2 |= SDHCI_QUIRK2_BROKEN_DDR50;
-
-		sdhci_pci_o2_enable_msi(chip, host);
-
 		if (chip->pdev->device == PCI_DEVICE_ID_O2_SEABIRD0) {
 			ret = pci_read_config_dword(chip->pdev,
 						    O2_SD_MISC_SETTING, &reg);
@@ -663,15 +692,21 @@ static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot)
 			host->quirks2 |= SDHCI_QUIRK2_PRESET_VALUE_BROKEN;
 		}
 
-		host->mmc_host_ops.execute_tuning = sdhci_o2_execute_tuning;
-
 		if (chip->pdev->device != PCI_DEVICE_ID_O2_FUJIN2)
 			break;
 		/* set dll watch dog timer */
 		reg = sdhci_readl(host, O2_SD_VENDOR_SETTING2);
 		reg |= (1 << 12);
 		sdhci_writel(host, reg, O2_SD_VENDOR_SETTING2);
-
+		break;
+	case PCI_DEVICE_ID_O2_GG8_9860:
+	case PCI_DEVICE_ID_O2_GG8_9861:
+	case PCI_DEVICE_ID_O2_GG8_9862:
+	case PCI_DEVICE_ID_O2_GG8_9863:
+		host->mmc->caps2 |= MMC_CAP2_NO_SDIO;
+		host->mmc->caps |= MMC_CAP_HW_RESET;
+		host->quirks2 |= SDHCI_QUIRK2_PRESET_VALUE_BROKEN;
+		slot->host->mmc_host_ops.get_cd = sdhci_o2_get_cd;
 		break;
 	default:
 		break;
@@ -684,6 +719,7 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
 {
 	int ret;
 	u8 scratch;
+	u16 scratch16;
 	u32 scratch_32;
 
 	switch (chip->pdev->device) {
@@ -893,6 +929,46 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
 		scratch |= 0x80;
 		pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
 		break;
+	case PCI_DEVICE_ID_O2_GG8_9860:
+	case PCI_DEVICE_ID_O2_GG8_9861:
+	case PCI_DEVICE_ID_O2_GG8_9862:
+	case PCI_DEVICE_ID_O2_GG8_9863:
+		/* UnLock WP */
+		ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch);
+		if (ret)
+			return ret;
+		scratch &= 0x7f;
+		pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
+
+		/* Select mode switch source as software control */
+		pci_read_config_word(chip->pdev, O2_SD_PARA_SET_REG1, &scratch16);
+		scratch16 &= 0xF8FF;
+		scratch16 |= BIT(9);
+		pci_write_config_word(chip->pdev, O2_SD_PARA_SET_REG1, scratch16);
+
+		/* set VDD1 supply source */
+		pci_read_config_word(chip->pdev, O2_SD_VDDX_CTRL_REG, &scratch16);
+		scratch16 &= 0xFFE3;
+		scratch16 |= BIT(3);
+		pci_write_config_word(chip->pdev, O2_SD_VDDX_CTRL_REG, scratch16);
+
+		/* Set host drive strength*/
+		scratch16 = 0x0025;
+		pci_write_config_word(chip->pdev, O2_SD_PLL_SETTING, scratch16);
+
+		/* Set output delay*/
+		pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &scratch_32);
+		scratch_32 &= 0xFF0FFF00;
+		scratch_32 |= 0x00B0003B;
+		pci_write_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, scratch_32);
+
+		/* Lock WP */
+		ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch);
+		if (ret)
+			return ret;
+		scratch |= 0x80;
+		pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
+		break;
 	}
 
 	return 0;
diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
index 9c8863956381..153704f812ed 100644
--- a/drivers/mmc/host/sdhci-pci.h
+++ b/drivers/mmc/host/sdhci-pci.h
@@ -11,6 +11,10 @@
 #define PCI_DEVICE_ID_O2_FUJIN2		0x8520
 #define PCI_DEVICE_ID_O2_SEABIRD0	0x8620
 #define PCI_DEVICE_ID_O2_SEABIRD1	0x8621
+#define PCI_DEVICE_ID_O2_GG8_9860	0x9860
+#define PCI_DEVICE_ID_O2_GG8_9861	0x9861
+#define PCI_DEVICE_ID_O2_GG8_9862	0x9862
+#define PCI_DEVICE_ID_O2_GG8_9863	0x9863
 
 #define PCI_DEVICE_ID_INTEL_PCH_SDIO0	0x8809
 #define PCI_DEVICE_ID_INTEL_PCH_SDIO1	0x880a

From da2a69cf278729dae1ad95811111457c8aac306b Mon Sep 17 00:00:00 2001
From: Chevron Li <chevron.li@bayhubtech.com>
Date: Fri, 11 Aug 2023 11:35:17 +0800
Subject: [PATCH 0995/2047] mmc: sdhci-pci-o2micro: add Bayhub new chip GG8
 support for express card

Add Bayhub new chip GG8 support for SD express card.
This patch depends on patch 1/2.

Signed-off-by: Chevron Li <chevron.li@bayhubtech.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230811033517.11532-2-chevron_li@126.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pci-o2micro.c | 65 +++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c
index ae2707fbd119..7bfee28116af 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.c
+++ b/drivers/mmc/host/sdhci-pci-o2micro.c
@@ -21,6 +21,7 @@
  * O2Micro device registers
  */
 
+#define O2_SD_PCIE_SWITCH	0x54
 #define O2_SD_MISC_REG5		0x64
 #define O2_SD_LD0_CTRL		0x68
 #define O2_SD_DEV_CTRL		0x88
@@ -631,6 +632,67 @@ static void sdhci_pci_o2_set_clock(struct sdhci_host *host, unsigned int clock)
 	sdhci_o2_enable_clk(host, clk);
 }
 
+static int sdhci_pci_o2_init_sd_express(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	struct sdhci_pci_slot *slot = sdhci_priv(host);
+	struct sdhci_pci_chip *chip = slot->chip;
+	u8 scratch8;
+	u16 scratch16;
+	int ret;
+
+	/* Disable clock */
+	sdhci_writeb(host, 0, SDHCI_CLOCK_CONTROL);
+
+	/* Set VDD2 voltage*/
+	scratch8 = sdhci_readb(host, SDHCI_POWER_CONTROL);
+	scratch8 &= 0x0F;
+	if (host->mmc->ios.timing == MMC_TIMING_SD_EXP_1_2V &&
+	    host->mmc->caps2 & MMC_CAP2_SD_EXP_1_2V) {
+		scratch8 |= SDHCI_VDD2_POWER_ON | SDHCI_VDD2_POWER_120;
+	} else {
+		scratch8 |= SDHCI_VDD2_POWER_ON | SDHCI_VDD2_POWER_180;
+	}
+
+	sdhci_writeb(host, scratch8, SDHCI_POWER_CONTROL);
+
+	/* UnLock WP */
+	pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch8);
+	scratch8 &= 0x7f;
+	pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch8);
+
+	/* Wait for express card clkreqn assert */
+	ret = read_poll_timeout(sdhci_readb, scratch8, !(scratch8 & BIT(0)),
+				1, 30000, false, host, O2_SD_EXP_INT_REG);
+
+	if (!ret) {
+		/* Switch to PCIe mode */
+		scratch16 = sdhci_readw(host, O2_SD_PCIE_SWITCH);
+		scratch16 |= BIT(8);
+		sdhci_writew(host, scratch16, O2_SD_PCIE_SWITCH);
+	} else {
+		/* Power off VDD2 voltage*/
+		scratch8 = sdhci_readb(host, SDHCI_POWER_CONTROL);
+		scratch8 &= 0x0F;
+		sdhci_writeb(host, scratch8, SDHCI_POWER_CONTROL);
+
+		/* Keep mode as UHSI */
+		pci_read_config_word(chip->pdev, O2_SD_PARA_SET_REG1, &scratch16);
+		scratch16 &= ~BIT(11);
+		pci_write_config_word(chip->pdev, O2_SD_PARA_SET_REG1, scratch16);
+
+		host->mmc->ios.timing = MMC_TIMING_LEGACY;
+		pr_info("%s: Express card initialization failed, falling back to Legacy\n",
+			mmc_hostname(host->mmc));
+	}
+	/* Lock WP */
+	pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch8);
+	scratch8 |= 0x80;
+	pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch8);
+
+	return 0;
+}
+
 static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot)
 {
 	struct sdhci_pci_chip *chip;
@@ -703,10 +765,11 @@ static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot)
 	case PCI_DEVICE_ID_O2_GG8_9861:
 	case PCI_DEVICE_ID_O2_GG8_9862:
 	case PCI_DEVICE_ID_O2_GG8_9863:
-		host->mmc->caps2 |= MMC_CAP2_NO_SDIO;
+		host->mmc->caps2 |= MMC_CAP2_NO_SDIO | MMC_CAP2_SD_EXP | MMC_CAP2_SD_EXP_1_2V;
 		host->mmc->caps |= MMC_CAP_HW_RESET;
 		host->quirks2 |= SDHCI_QUIRK2_PRESET_VALUE_BROKEN;
 		slot->host->mmc_host_ops.get_cd = sdhci_o2_get_cd;
+		host->mmc_host_ops.init_sd_express = sdhci_pci_o2_init_sd_express;
 		break;
 	default:
 		break;

From 13433b9e2723947d636fb09fda68948b77833077 Mon Sep 17 00:00:00 2001
From: Victor Shih <victor.shih@genesyslogic.com.tw>
Date: Fri, 11 Aug 2023 18:14:04 +0800
Subject: [PATCH 0996/2047] mmc: core: Cleanup mmc_sd_num_wr_blocks() function

Use mmc_app_cmd() to avoid open-coding in mmc_sd_num_wr_blocks().

Signed-off-by: Victor Shih <victor.shih@genesyslogic.com.tw>
Link: https://lore.kernel.org/r/20230811101404.4463-1-victorshihgli@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 266298a04f78..b5b414a71e0b 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -918,17 +918,9 @@ static int mmc_sd_num_wr_blocks(struct mmc_card *card, u32 *written_blocks)
 
 	struct scatterlist sg;
 
-	cmd.opcode = MMC_APP_CMD;
-	cmd.arg = card->rca << 16;
-	cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
-
-	err = mmc_wait_for_cmd(card->host, &cmd, 0);
+	err = mmc_app_cmd(card->host, card);
 	if (err)
 		return err;
-	if (!mmc_host_is_spi(card->host) && !(cmd.resp[0] & R1_APP_CMD))
-		return -EIO;
-
-	memset(&cmd, 0, sizeof(struct mmc_command));
 
 	cmd.opcode = SD_APP_SEND_NUM_WR_BLKS;
 	cmd.arg = 0;

From 2ea6f68932f73a6a9d82160d3ad0a49a5a6bb183 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Aug 2023 17:41:25 +0200
Subject: [PATCH 0997/2047] fs: use the super_block as holder when mounting
 file systems

The file system type is not a very useful holder as it doesn't allow us
to go back to the actual file system instance.  Pass the super_block instead
which is useful when passed back to the file system driver.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Message-Id: <20230802154131.2221419-7-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/cramfs/inode.c | 2 +-
 fs/f2fs/super.c   | 7 +++----
 fs/romfs/super.c  | 2 +-
 fs/super.c        | 8 ++++----
 fs/xfs/xfs_buf.c  | 2 +-
 5 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index bc99d0fb8ad4..569f88dcb2f1 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -494,7 +494,7 @@ static void cramfs_kill_sb(struct super_block *sb)
 		sb->s_mtd = NULL;
 	} else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
 		sync_blockdev(sb->s_bdev);
-		blkdev_put(sb->s_bdev, sb->s_type);
+		blkdev_put(sb->s_bdev, sb);
 	}
 	kfree(sbi);
 }
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ca31163da00a..05c90fdb7a6c 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1561,7 +1561,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
 	int i;
 
 	for (i = 0; i < sbi->s_ndevs; i++) {
-		blkdev_put(FDEV(i).bdev, sbi->sb->s_type);
+		blkdev_put(FDEV(i).bdev, sbi->sb);
 #ifdef CONFIG_BLK_DEV_ZONED
 		kvfree(FDEV(i).blkz_seq);
 #endif
@@ -4198,7 +4198,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 			/* Single zoned block device mount */
 			FDEV(0).bdev =
 				blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, mode,
-						  sbi->sb->s_type, NULL);
+						  sbi->sb, NULL);
 		} else {
 			/* Multi-device mount */
 			memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
@@ -4217,8 +4217,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 					sbi->log_blocks_per_seg) - 1;
 			}
 			FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, mode,
-							  sbi->sb->s_type,
-							  NULL);
+							  sbi->sb, NULL);
 		}
 		if (IS_ERR(FDEV(i).bdev))
 			return PTR_ERR(FDEV(i).bdev);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 42d7a344472f..22cdb9a86a57 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb)
 #ifdef CONFIG_ROMFS_ON_BLOCK
 	if (sb->s_bdev) {
 		sync_blockdev(sb->s_bdev);
-		blkdev_put(sb->s_bdev, sb->s_type);
+		blkdev_put(sb->s_bdev, sb);
 	}
 #endif
 }
diff --git a/fs/super.c b/fs/super.c
index a366bc65886e..3c7e4633efeb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1249,7 +1249,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
 	blk_mode_t mode = sb_open_mode(sb_flags);
 	struct block_device *bdev;
 
-	bdev = blkdev_get_by_dev(sb->s_dev, mode, sb->s_type, &fs_holder_ops);
+	bdev = blkdev_get_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
 	if (IS_ERR(bdev)) {
 		if (fc)
 			errorf(fc, "%s: Can't open blockdev", fc->source);
@@ -1262,7 +1262,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
 	 * writable from userspace even for a read-only block device.
 	 */
 	if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
-		blkdev_put(bdev, sb->s_type);
+		blkdev_put(bdev, sb);
 		return -EACCES;
 	}
 
@@ -1278,7 +1278,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
 		if (fc)
 			warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
-		blkdev_put(bdev, sb->s_type);
+		blkdev_put(bdev, sb);
 		return -EBUSY;
 	}
 	spin_lock(&sb_lock);
@@ -1415,7 +1415,7 @@ void kill_block_super(struct super_block *sb)
 	generic_shutdown_super(sb);
 	if (bdev) {
 		sync_blockdev(bdev);
-		blkdev_put(bdev, sb->s_type);
+		blkdev_put(bdev, sb);
 	}
 }
 
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index f2e5a2c78dc4..3b903f6bce98 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1948,7 +1948,7 @@ xfs_free_buftarg(
 	fs_put_dax(btp->bt_daxdev, btp->bt_mount);
 	/* the main block device is closed by kill_block_super */
 	if (bdev != btp->bt_mount->m_super->s_bdev)
-		blkdev_put(bdev, btp->bt_mount);
+		blkdev_put(bdev, btp->bt_mount->m_super);
 
 	kmem_free(btp);
 }

From 9c09a7cf6220a11dac0f4845b7e8925706cdc458 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Aug 2023 17:41:26 +0200
Subject: [PATCH 0998/2047] fs: stop using get_super in fs_mark_dead

fs_mark_dead currently uses get_super to find the superblock for the
block device that is going away.  This means it is limited to the
main device stored in sb->s_dev, leading to a lot of code duplication
for file systems that can use multiple block devices.

Now that the holder for all block devices used by file systems is set
to the super_block, we can instead look at that holder and then check
if the file system is born and active, so do that instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Message-Id: <20230802154131.2221419-8-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/super.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 3c7e4633efeb..9cf7fc67727b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1209,17 +1209,39 @@ int get_tree_keyed(struct fs_context *fc,
 EXPORT_SYMBOL(get_tree_keyed);
 
 #ifdef CONFIG_BLOCK
+/*
+ * Lock a super block that the callers holds a reference to.
+ *
+ * The caller needs to ensure that the super_block isn't being freed while
+ * calling this function, e.g. by holding a lock over the call to this function
+ * and the place that clears the pointer to the superblock used by this function
+ * before freeing the superblock.
+ */
+static bool lock_active_super(struct super_block *sb)
+{
+	down_read(&sb->s_umount);
+	if (!sb->s_root ||
+	    (sb->s_flags & (SB_ACTIVE | SB_BORN)) != (SB_ACTIVE | SB_BORN)) {
+		up_read(&sb->s_umount);
+		return false;
+	}
+	return true;
+}
+
 static void fs_mark_dead(struct block_device *bdev)
 {
-	struct super_block *sb;
+	struct super_block *sb = bdev->bd_holder;
 
-	sb = get_super(bdev);
-	if (!sb)
+	/* bd_holder_lock ensures that the sb isn't freed */
+	lockdep_assert_held(&bdev->bd_holder_lock);
+
+	if (!lock_active_super(sb))
 		return;
 
 	if (sb->s_op->shutdown)
 		sb->s_op->shutdown(sb);
-	drop_super(sb);
+
+	up_read(&sb->s_umount);
 }
 
 static const struct blk_holder_ops fs_holder_ops = {

From 7ecd0b6f510005e120f5bc198bacb5951814cf36 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Aug 2023 17:41:27 +0200
Subject: [PATCH 0999/2047] fs: export fs_holder_ops

Export fs_holder_ops so that file systems that open additional block
devices can use it as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Message-Id: <20230802154131.2221419-9-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/super.c             | 3 ++-
 include/linux/blkdev.h | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/super.c b/fs/super.c
index 9cf7fc67727b..f72a1112a31b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1244,9 +1244,10 @@ static void fs_mark_dead(struct block_device *bdev)
 	up_read(&sb->s_umount);
 }
 
-static const struct blk_holder_ops fs_holder_ops = {
+const struct blk_holder_ops fs_holder_ops = {
 	.mark_dead		= fs_mark_dead,
 };
+EXPORT_SYMBOL_GPL(fs_holder_ops);
 
 static int set_bdev_super(struct super_block *s, void *data)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ed44a997f629..83262702eea7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1464,6 +1464,8 @@ struct blk_holder_ops {
 	void (*mark_dead)(struct block_device *bdev);
 };
 
+extern const struct blk_holder_ops fs_holder_ops;
+
 /*
  * Return the correct open flags for blkdev_get_by_* for super block flags
  * as stored in sb->s_flags.

From 6f5fc7de98857c75dad05e9f3fe2e6f358d4583e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Aug 2023 17:41:28 +0200
Subject: [PATCH 1000/2047] ext4: drop s_umount over opening the log device

Just like get_tree_bdev needs to drop s_umount when opening the main
device, we need to do the same for the ext4 log device to avoid a
potential lock order reversal with s_unmount for the mark_dead path.

It might be preferable to just drop s_umount over ->fill_super entirely,
but that will require a fairly massive audit first, so we'll do the easy
version here first.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Message-Id: <20230802154131.2221419-10-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ext4/super.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fa8888f0fed9..47e3272e0c5b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5842,7 +5842,10 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
 		return NULL;
 
+	/* see get_tree_bdev why this is needed and safe */
+	up_write(&sb->s_umount);
 	bdev = ext4_blkdev_get(j_dev, sb);
+	down_write(&sb->s_umount);
 	if (bdev == NULL)
 		return NULL;
 

From 8bed1783751fc8d0a5a263dcf87838a2fa570953 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Aug 2023 17:41:29 +0200
Subject: [PATCH 1001/2047] ext4: use fs_holder_ops for the log device

Use the generic fs_holder_ops to shut down the file system when the
log device goes away instead of duplicating the logic.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230802154131.2221419-11-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/ext4/super.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 47e3272e0c5b..60d2815a0b7e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1097,15 +1097,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
 	 */
 }
 
-static void ext4_bdev_mark_dead(struct block_device *bdev)
-{
-	ext4_force_shutdown(bdev->bd_holder, EXT4_GOING_FLAGS_NOLOGFLUSH);
-}
-
-static const struct blk_holder_ops ext4_holder_ops = {
-	.mark_dead		= ext4_bdev_mark_dead,
-};
-
 /*
  * Open the external journal device
  */
@@ -1114,7 +1105,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 	struct block_device *bdev;
 
 	bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
-				 &ext4_holder_ops);
+				 &fs_holder_ops);
 	if (IS_ERR(bdev))
 		goto fail;
 	return bdev;

From 8d945b595ed07db13fef1f3311ad456c97941930 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Aug 2023 17:41:30 +0200
Subject: [PATCH 1002/2047] xfs: drop s_umount over opening the log and RT
 devices

Just like get_tree_bdev needs to drop s_umount when opening the main
device, we need to do the same for the xfs log and RT devices to avoid a
potential lock order reversal with s_unmount for the mark_dead path.

It might be preferable to just drop s_umount over ->fill_super entirely,
but that will require a fairly massive audit first, so we'll do the easy
version here first.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Message-Id: <20230802154131.2221419-12-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/xfs_super.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 46d0cbe2b5f8..97ef41acb03d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -462,17 +462,24 @@ STATIC int
 xfs_open_devices(
 	struct xfs_mount	*mp)
 {
-	struct block_device	*ddev = mp->m_super->s_bdev;
+	struct super_block	*sb = mp->m_super;
+	struct block_device	*ddev = sb->s_bdev;
 	struct block_device	*logdev = NULL, *rtdev = NULL;
 	int			error;
 
+	/*
+	 * blkdev_put() can't be called under s_umount, see the comment
+	 * in get_tree_bdev() for more details
+	 */
+	up_write(&sb->s_umount);
+
 	/*
 	 * Open real time and log devices - order is important.
 	 */
 	if (mp->m_logname) {
 		error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
 		if (error)
-			return error;
+			goto out_relock;
 	}
 
 	if (mp->m_rtname) {
@@ -510,7 +517,10 @@ xfs_open_devices(
 		mp->m_logdev_targp = mp->m_ddev_targp;
 	}
 
-	return 0;
+	error = 0;
+out_relock:
+	down_write(&sb->s_umount);
+	return error;
 
  out_free_rtdev_targ:
 	if (mp->m_rtdev_targp)
@@ -523,7 +533,7 @@ xfs_open_devices(
  out_close_logdev:
 	if (logdev && logdev != ddev)
 		blkdev_put(logdev, mp);
-	return error;
+	goto out_relock;
 }
 
 /*

From 8ffa54e3370c5a8b9538dbe4077fc9c4b5a08f45 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Aug 2023 17:41:31 +0200
Subject: [PATCH 1003/2047] xfs use fs_holder_ops for the log and RT devices

Use the generic fs_holder_ops to shut down the file system when the
log or RT device goes away instead of duplicating the logic.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Message-Id: <20230802154131.2221419-13-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/xfs_super.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 97ef41acb03d..8fee15292499 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -377,17 +377,6 @@ disable_dax:
 	return 0;
 }
 
-static void
-xfs_bdev_mark_dead(
-	struct block_device	*bdev)
-{
-	xfs_force_shutdown(bdev->bd_holder, SHUTDOWN_DEVICE_REMOVED);
-}
-
-static const struct blk_holder_ops xfs_holder_ops = {
-	.mark_dead		= xfs_bdev_mark_dead,
-};
-
 STATIC int
 xfs_blkdev_get(
 	xfs_mount_t		*mp,
@@ -396,8 +385,8 @@ xfs_blkdev_get(
 {
 	int			error = 0;
 
-	*bdevp = blkdev_get_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE, mp,
-				    &xfs_holder_ops);
+	*bdevp = blkdev_get_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE,
+				    mp->m_super, &fs_holder_ops);
 	if (IS_ERR(*bdevp)) {
 		error = PTR_ERR(*bdevp);
 		xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
@@ -529,10 +518,10 @@ out_relock:
 	xfs_free_buftarg(mp->m_ddev_targp);
  out_close_rtdev:
 	 if (rtdev)
-		 blkdev_put(rtdev, mp);
+		 blkdev_put(rtdev, sb);
  out_close_logdev:
 	if (logdev && logdev != ddev)
-		blkdev_put(logdev, mp);
+		blkdev_put(logdev, sb);
 	goto out_relock;
 }
 

From 33b53749aa1f9fb0f5aa0ac37269944a080b982f Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 31 Jul 2023 22:02:49 +0800
Subject: [PATCH 1004/2047] crypto: starfive - fix return value check in
 starfive_aes_prepare_req()

kzalloc() returns NULL pointer not PTR_ERR() when it fails,
so replace the IS_ERR() check with NULL pointer check.

Fixes: e22471c2331c ("crypto: starfive - Add AES skcipher and aead support")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/starfive/jh7110-aes.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/starfive/jh7110-aes.c b/drivers/crypto/starfive/jh7110-aes.c
index 04dd7958054f..278dfa4aa743 100644
--- a/drivers/crypto/starfive/jh7110-aes.c
+++ b/drivers/crypto/starfive/jh7110-aes.c
@@ -496,8 +496,8 @@ static int starfive_aes_prepare_req(struct skcipher_request *req,
 
 	if (cryp->assoclen) {
 		rctx->adata = kzalloc(ALIGN(cryp->assoclen, AES_BLOCK_SIZE), GFP_KERNEL);
-		if (IS_ERR(rctx->adata))
-			return dev_err_probe(cryp->dev, PTR_ERR(rctx->adata),
+		if (!rctx->adata)
+			return dev_err_probe(cryp->dev, -ENOMEM,
 					     "Failed to alloc memory for adata");
 
 		scatterwalk_copychunks(rctx->adata, &cryp->in_walk, cryp->assoclen, 0);

From aec48805163338f8413118796c1dd035661b9140 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Mon, 31 Jul 2023 18:54:54 +0200
Subject: [PATCH 1005/2047] crypto: stm32 - Properly handle pm_runtime_get
 failing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If pm_runtime_get() (disguised as pm_runtime_resume_and_get()) fails, this
means the clk wasn't prepared and enabled. Returning early in this case
however is wrong as then the following resource frees are skipped and this
is never catched up. So do all the cleanups but clk_disable_unprepare().

Also don't emit a warning, as stm32_hash_runtime_resume() already emitted
one.

Note that the return value of stm32_hash_remove() is mostly ignored by
the device core. The only effect of returning zero instead of an error
value is to suppress another warning in platform_remove(). So return 0
even if pm_runtime_resume_and_get() failed.

Fixes: 8b4d566de6a5 ("crypto: stm32/hash - Add power management support")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32-hash.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index 88a186c3dd78..75d281edae2a 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -2121,9 +2121,7 @@ static int stm32_hash_remove(struct platform_device *pdev)
 	if (!hdev)
 		return -ENODEV;
 
-	ret = pm_runtime_resume_and_get(hdev->dev);
-	if (ret < 0)
-		return ret;
+	ret = pm_runtime_get_sync(hdev->dev);
 
 	stm32_hash_unregister_algs(hdev);
 
@@ -2139,7 +2137,8 @@ static int stm32_hash_remove(struct platform_device *pdev)
 	pm_runtime_disable(hdev->dev);
 	pm_runtime_put_noidle(hdev->dev);
 
-	clk_disable_unprepare(hdev->clk);
+	if (ret >= 0)
+		clk_disable_unprepare(hdev->clk);
 
 	return 0;
 }

From 3feec4ef9f99e2ca3c4614bc0c46e9c0ad337357 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Mon, 31 Jul 2023 18:54:55 +0200
Subject: [PATCH 1006/2047] crypto: stm32 - Drop if block with always false
 condition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stm32_hash_remove() is only called after stm32_hash_probe() succeeded. In
this case platform_set_drvdata() was called with a non-NULL data patameter.

The check for hdev being non-NULL can be dropped because hdev is never NULL
(or something bad like memory corruption happened and then the check
doesn't help any more either).

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32-hash.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index 75d281edae2a..b10243035584 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -2114,13 +2114,9 @@ err_reset:
 
 static int stm32_hash_remove(struct platform_device *pdev)
 {
-	struct stm32_hash_dev *hdev;
+	struct stm32_hash_dev *hdev = platform_get_drvdata(pdev);
 	int ret;
 
-	hdev = platform_get_drvdata(pdev);
-	if (!hdev)
-		return -ENODEV;
-
 	ret = pm_runtime_get_sync(hdev->dev);
 
 	stm32_hash_unregister_algs(hdev);

From 7f1045c61876addde13b9aeff2d504aa9dabc3a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Mon, 31 Jul 2023 18:54:56 +0200
Subject: [PATCH 1007/2047] crypto: stm32 - Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is ignored (apart
from emitting a warning) and this typically results in resource leaks.
To improve here there is a quest to make the remove callback return
void. In the first step of this quest all drivers are converted to
.remove_new() which already returns void. Eventually after all drivers
are converted, .remove_new() is renamed to .remove().

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32-hash.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index b10243035584..68c52eeaa6b1 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -2112,7 +2112,7 @@ err_reset:
 	return ret;
 }
 
-static int stm32_hash_remove(struct platform_device *pdev)
+static void stm32_hash_remove(struct platform_device *pdev)
 {
 	struct stm32_hash_dev *hdev = platform_get_drvdata(pdev);
 	int ret;
@@ -2135,8 +2135,6 @@ static int stm32_hash_remove(struct platform_device *pdev)
 
 	if (ret >= 0)
 		clk_disable_unprepare(hdev->clk);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -2173,7 +2171,7 @@ static const struct dev_pm_ops stm32_hash_pm_ops = {
 
 static struct platform_driver stm32_hash_driver = {
 	.probe		= stm32_hash_probe,
-	.remove		= stm32_hash_remove,
+	.remove_new	= stm32_hash_remove,
 	.driver		= {
 		.name	= "stm32-hash",
 		.pm = &stm32_hash_pm_ops,

From ac2d838fb7c479434513c8d4565a111fb805edaa Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 1 Aug 2023 19:11:46 +0900
Subject: [PATCH 1008/2047] crypto: arm64/aes - remove Makefile hack

Do it more simiply. This also fixes single target builds.

[before]

  $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- arch/arm64/crypto/aes-glue-ce.i
    [snip]
  make[4]: *** No rule to make target 'arch/arm64/crypto/aes-glue-ce.i'.  Stop.

[after]

  $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- arch/arm64/crypto/aes-glue-ce.i
    [snip]
    CPP     arch/arm64/crypto/aes-glue-ce.i

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Makefile        | 5 -----
 arch/arm64/crypto/aes-glue-ce.c   | 2 ++
 arch/arm64/crypto/aes-glue-neon.c | 1 +
 3 files changed, 3 insertions(+), 5 deletions(-)
 create mode 100644 arch/arm64/crypto/aes-glue-ce.c
 create mode 100644 arch/arm64/crypto/aes-glue-neon.c

diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 4818e204c2ac..fbe64dce66e0 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -81,11 +81,6 @@ aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
 obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
 aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
 
-CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
-
-$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
-	$(call if_changed_rule,cc_o_c)
-
 quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $(<) void $(@)
 
diff --git a/arch/arm64/crypto/aes-glue-ce.c b/arch/arm64/crypto/aes-glue-ce.c
new file mode 100644
index 000000000000..7d309ceeddf3
--- /dev/null
+++ b/arch/arm64/crypto/aes-glue-ce.c
@@ -0,0 +1,2 @@
+#define USE_V8_CRYPTO_EXTENSIONS
+#include "aes-glue.c"
diff --git a/arch/arm64/crypto/aes-glue-neon.c b/arch/arm64/crypto/aes-glue-neon.c
new file mode 100644
index 000000000000..8ba046321064
--- /dev/null
+++ b/arch/arm64/crypto/aes-glue-neon.c
@@ -0,0 +1 @@
+#include "aes-glue.c"

From 6b4b53ca0b7300ba2af98a49dbce22054bf034fe Mon Sep 17 00:00:00 2001
From: Frederick Lawler <fred@cloudflare.com>
Date: Tue, 1 Aug 2023 08:57:09 -0500
Subject: [PATCH 1009/2047] crypto: af_alg - Decrement struct key.usage in
 alg_set_by_key_serial()

Calls to lookup_user_key() require a corresponding key_put() to
decrement the usage counter. Once it reaches zero, we schedule key GC.
Therefore decrement struct key.usage in alg_set_by_key_serial().

Fixes: 7984ceb134bf ("crypto: af_alg - Support symmetric encryption via keyring keys")
Cc: <stable@vger.kernel.org>
Signed-off-by: Frederick Lawler <fred@cloudflare.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/af_alg.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 9ee8575d3b1a..e9969dc0e83c 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -320,18 +320,21 @@ static int alg_setkey_by_key_serial(struct alg_sock *ask, sockptr_t optval,
 
 	if (IS_ERR(ret)) {
 		up_read(&key->sem);
+		key_put(key);
 		return PTR_ERR(ret);
 	}
 
 	key_data = sock_kmalloc(&ask->sk, key_datalen, GFP_KERNEL);
 	if (!key_data) {
 		up_read(&key->sem);
+		key_put(key);
 		return -ENOMEM;
 	}
 
 	memcpy(key_data, ret, key_datalen);
 
 	up_read(&key->sem);
+	key_put(key);
 
 	err = type->setkey(ask->private, key_data, key_datalen);
 

From dd8e82f4fa9f0ef18e4409a817609f794ea2d468 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 2 Aug 2023 17:14:27 +0800
Subject: [PATCH 1010/2047] crypto: qat - use kfree_sensitive instead of
 memset/kfree()

Use kfree_sensitive() instead of memset() and kfree().

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/intel/qat/qat_common/qat_compression.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/crypto/intel/qat/qat_common/qat_compression.c b/drivers/crypto/intel/qat/qat_common/qat_compression.c
index 3f1f35283266..7842a9f22178 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_compression.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_compression.c
@@ -234,8 +234,7 @@ static void qat_free_dc_data(struct adf_accel_dev *accel_dev)
 
 	dma_unmap_single(dev, dc_data->ovf_buff_p, dc_data->ovf_buff_sz,
 			 DMA_FROM_DEVICE);
-	memset(dc_data->ovf_buff, 0, dc_data->ovf_buff_sz);
-	kfree(dc_data->ovf_buff);
+	kfree_sensitive(dc_data->ovf_buff);
 	devm_kfree(dev, dc_data);
 	accel_dev->dc_data = NULL;
 }

From 7999b615fd18e2c2d1aa540b930341c494c0c3e7 Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Thu, 3 Aug 2023 17:29:33 +0800
Subject: [PATCH 1011/2047] crypto: hisilicon/sec - Do not check for 0 return
 after calling platform_get_irq()

Since commit ce753ad1549c ("platform: finally disallow IRQ0 in
platform_get_irq() and its ilk"), there is no possible for
platform_get_irq() to return 0. Use the return value
from platform_get_irq().

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec/sec_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec/sec_drv.c b/drivers/crypto/hisilicon/sec/sec_drv.c
index e75851326c1e..e1e08993de12 100644
--- a/drivers/crypto/hisilicon/sec/sec_drv.c
+++ b/drivers/crypto/hisilicon/sec/sec_drv.c
@@ -1107,8 +1107,8 @@ static int sec_queue_res_cfg(struct sec_queue *queue)
 	}
 	queue->task_irq = platform_get_irq(to_platform_device(dev),
 					   queue->queue_id * 2 + 1);
-	if (queue->task_irq <= 0) {
-		ret = -EINVAL;
+	if (queue->task_irq < 0) {
+		ret = queue->task_irq;
 		goto err_free_ring_db;
 	}
 

From 9ae4577bc077a7e32c3c7d442c95bc76865c0f17 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 3 Aug 2023 17:59:28 +0800
Subject: [PATCH 1012/2047] crypto: api - Use work queue in
 crypto_destroy_instance

The function crypto_drop_spawn expects to be called in process
context.  However, when an instance is unregistered while it still
has active users, the last user may cause the instance to be freed
in atomic context.

Fix this by delaying the freeing to a work queue.

Fixes: 6bfd48096ff8 ("[CRYPTO] api: Added spawns")
Reported-by: Florent Revest <revest@chromium.org>
Reported-by: syzbot+d769eed29cc42d75e2a3@syzkaller.appspotmail.com
Reported-by: syzbot+610ec0671f51e838436e@syzkaller.appspotmail.com
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Florent Revest <revest@chromium.org>
Acked-by: Florent Revest <revest@chromium.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c         | 16 ++++++++++++++--
 include/crypto/algapi.h |  3 +++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 5e7cd603d489..4fe95c448047 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -17,6 +17,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/workqueue.h>
 
 #include "internal.h"
 
@@ -74,15 +75,26 @@ static void crypto_free_instance(struct crypto_instance *inst)
 	inst->alg.cra_type->free(inst);
 }
 
-static void crypto_destroy_instance(struct crypto_alg *alg)
+static void crypto_destroy_instance_workfn(struct work_struct *w)
 {
-	struct crypto_instance *inst = (void *)alg;
+	struct crypto_instance *inst = container_of(w, struct crypto_instance,
+						    free_work);
 	struct crypto_template *tmpl = inst->tmpl;
 
 	crypto_free_instance(inst);
 	crypto_tmpl_put(tmpl);
 }
 
+static void crypto_destroy_instance(struct crypto_alg *alg)
+{
+	struct crypto_instance *inst = container_of(alg,
+						    struct crypto_instance,
+						    alg);
+
+	INIT_WORK(&inst->free_work, crypto_destroy_instance_workfn);
+	schedule_work(&inst->free_work);
+}
+
 /*
  * This function adds a spawn to the list secondary_spawns which
  * will be used at the end of crypto_remove_spawns to unregister
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 6156161b181f..ca86f4c6ba43 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -12,6 +12,7 @@
 #include <linux/cache.h>
 #include <linux/crypto.h>
 #include <linux/types.h>
+#include <linux/workqueue.h>
 
 /*
  * Maximum values for blocksize and alignmask, used to allocate
@@ -82,6 +83,8 @@ struct crypto_instance {
 		struct crypto_spawn *spawns;
 	};
 
+	struct work_struct free_work;
+
 	void *__ctx[] CRYPTO_MINALIGN_ATTR;
 };
 

From 2a598d0b2800aa23ba51adcf060cec524aaa63b2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 4 Aug 2023 17:24:34 +0800
Subject: [PATCH 1013/2047] crypto: lib - Move mpi into lib/crypto

As lib/mpi is mostly used by crypto code, move it under lib/crypto
so that patches touching it get directed to the right mailing list.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/Makefile                               | 1 -
 lib/crypto/Makefile                        | 2 ++
 lib/{ => crypto}/mpi/Makefile              | 0
 lib/{ => crypto}/mpi/ec.c                  | 0
 lib/{ => crypto}/mpi/generic_mpih-add1.c   | 0
 lib/{ => crypto}/mpi/generic_mpih-lshift.c | 0
 lib/{ => crypto}/mpi/generic_mpih-mul1.c   | 0
 lib/{ => crypto}/mpi/generic_mpih-mul2.c   | 0
 lib/{ => crypto}/mpi/generic_mpih-mul3.c   | 0
 lib/{ => crypto}/mpi/generic_mpih-rshift.c | 0
 lib/{ => crypto}/mpi/generic_mpih-sub1.c   | 0
 lib/{ => crypto}/mpi/longlong.h            | 0
 lib/{ => crypto}/mpi/mpi-add.c             | 0
 lib/{ => crypto}/mpi/mpi-bit.c             | 0
 lib/{ => crypto}/mpi/mpi-cmp.c             | 0
 lib/{ => crypto}/mpi/mpi-div.c             | 0
 lib/{ => crypto}/mpi/mpi-inline.h          | 0
 lib/{ => crypto}/mpi/mpi-internal.h        | 0
 lib/{ => crypto}/mpi/mpi-inv.c             | 0
 lib/{ => crypto}/mpi/mpi-mod.c             | 0
 lib/{ => crypto}/mpi/mpi-mul.c             | 0
 lib/{ => crypto}/mpi/mpi-pow.c             | 0
 lib/{ => crypto}/mpi/mpi-sub-ui.c          | 0
 lib/{ => crypto}/mpi/mpicoder.c            | 0
 lib/{ => crypto}/mpi/mpih-cmp.c            | 0
 lib/{ => crypto}/mpi/mpih-div.c            | 0
 lib/{ => crypto}/mpi/mpih-mul.c            | 0
 lib/{ => crypto}/mpi/mpiutil.c             | 0
 28 files changed, 2 insertions(+), 1 deletion(-)
 rename lib/{ => crypto}/mpi/Makefile (100%)
 rename lib/{ => crypto}/mpi/ec.c (100%)
 rename lib/{ => crypto}/mpi/generic_mpih-add1.c (100%)
 rename lib/{ => crypto}/mpi/generic_mpih-lshift.c (100%)
 rename lib/{ => crypto}/mpi/generic_mpih-mul1.c (100%)
 rename lib/{ => crypto}/mpi/generic_mpih-mul2.c (100%)
 rename lib/{ => crypto}/mpi/generic_mpih-mul3.c (100%)
 rename lib/{ => crypto}/mpi/generic_mpih-rshift.c (100%)
 rename lib/{ => crypto}/mpi/generic_mpih-sub1.c (100%)
 rename lib/{ => crypto}/mpi/longlong.h (100%)
 rename lib/{ => crypto}/mpi/mpi-add.c (100%)
 rename lib/{ => crypto}/mpi/mpi-bit.c (100%)
 rename lib/{ => crypto}/mpi/mpi-cmp.c (100%)
 rename lib/{ => crypto}/mpi/mpi-div.c (100%)
 rename lib/{ => crypto}/mpi/mpi-inline.h (100%)
 rename lib/{ => crypto}/mpi/mpi-internal.h (100%)
 rename lib/{ => crypto}/mpi/mpi-inv.c (100%)
 rename lib/{ => crypto}/mpi/mpi-mod.c (100%)
 rename lib/{ => crypto}/mpi/mpi-mul.c (100%)
 rename lib/{ => crypto}/mpi/mpi-pow.c (100%)
 rename lib/{ => crypto}/mpi/mpi-sub-ui.c (100%)
 rename lib/{ => crypto}/mpi/mpicoder.c (100%)
 rename lib/{ => crypto}/mpi/mpih-cmp.c (100%)
 rename lib/{ => crypto}/mpi/mpih-div.c (100%)
 rename lib/{ => crypto}/mpi/mpih-mul.c (100%)
 rename lib/{ => crypto}/mpi/mpiutil.c (100%)

diff --git a/lib/Makefile b/lib/Makefile
index 42d307ade225..1f5235478259 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -253,7 +253,6 @@ obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 obj-$(CONFIG_GLOB) += glob.o
 obj-$(CONFIG_GLOB_SELFTEST) += globtest.o
 
-obj-$(CONFIG_MPILIB) += mpi/
 obj-$(CONFIG_DIMLIB) += dim/
 obj-$(CONFIG_SIGNATURE) += digsig.o
 
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 6ec2d4543d9c..8d1446c2be71 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -53,3 +53,5 @@ libblake2s-y					+= blake2s-selftest.o
 libchacha20poly1305-y				+= chacha20poly1305-selftest.o
 libcurve25519-y					+= curve25519-selftest.o
 endif
+
+obj-$(CONFIG_MPILIB) += mpi/
diff --git a/lib/mpi/Makefile b/lib/crypto/mpi/Makefile
similarity index 100%
rename from lib/mpi/Makefile
rename to lib/crypto/mpi/Makefile
diff --git a/lib/mpi/ec.c b/lib/crypto/mpi/ec.c
similarity index 100%
rename from lib/mpi/ec.c
rename to lib/crypto/mpi/ec.c
diff --git a/lib/mpi/generic_mpih-add1.c b/lib/crypto/mpi/generic_mpih-add1.c
similarity index 100%
rename from lib/mpi/generic_mpih-add1.c
rename to lib/crypto/mpi/generic_mpih-add1.c
diff --git a/lib/mpi/generic_mpih-lshift.c b/lib/crypto/mpi/generic_mpih-lshift.c
similarity index 100%
rename from lib/mpi/generic_mpih-lshift.c
rename to lib/crypto/mpi/generic_mpih-lshift.c
diff --git a/lib/mpi/generic_mpih-mul1.c b/lib/crypto/mpi/generic_mpih-mul1.c
similarity index 100%
rename from lib/mpi/generic_mpih-mul1.c
rename to lib/crypto/mpi/generic_mpih-mul1.c
diff --git a/lib/mpi/generic_mpih-mul2.c b/lib/crypto/mpi/generic_mpih-mul2.c
similarity index 100%
rename from lib/mpi/generic_mpih-mul2.c
rename to lib/crypto/mpi/generic_mpih-mul2.c
diff --git a/lib/mpi/generic_mpih-mul3.c b/lib/crypto/mpi/generic_mpih-mul3.c
similarity index 100%
rename from lib/mpi/generic_mpih-mul3.c
rename to lib/crypto/mpi/generic_mpih-mul3.c
diff --git a/lib/mpi/generic_mpih-rshift.c b/lib/crypto/mpi/generic_mpih-rshift.c
similarity index 100%
rename from lib/mpi/generic_mpih-rshift.c
rename to lib/crypto/mpi/generic_mpih-rshift.c
diff --git a/lib/mpi/generic_mpih-sub1.c b/lib/crypto/mpi/generic_mpih-sub1.c
similarity index 100%
rename from lib/mpi/generic_mpih-sub1.c
rename to lib/crypto/mpi/generic_mpih-sub1.c
diff --git a/lib/mpi/longlong.h b/lib/crypto/mpi/longlong.h
similarity index 100%
rename from lib/mpi/longlong.h
rename to lib/crypto/mpi/longlong.h
diff --git a/lib/mpi/mpi-add.c b/lib/crypto/mpi/mpi-add.c
similarity index 100%
rename from lib/mpi/mpi-add.c
rename to lib/crypto/mpi/mpi-add.c
diff --git a/lib/mpi/mpi-bit.c b/lib/crypto/mpi/mpi-bit.c
similarity index 100%
rename from lib/mpi/mpi-bit.c
rename to lib/crypto/mpi/mpi-bit.c
diff --git a/lib/mpi/mpi-cmp.c b/lib/crypto/mpi/mpi-cmp.c
similarity index 100%
rename from lib/mpi/mpi-cmp.c
rename to lib/crypto/mpi/mpi-cmp.c
diff --git a/lib/mpi/mpi-div.c b/lib/crypto/mpi/mpi-div.c
similarity index 100%
rename from lib/mpi/mpi-div.c
rename to lib/crypto/mpi/mpi-div.c
diff --git a/lib/mpi/mpi-inline.h b/lib/crypto/mpi/mpi-inline.h
similarity index 100%
rename from lib/mpi/mpi-inline.h
rename to lib/crypto/mpi/mpi-inline.h
diff --git a/lib/mpi/mpi-internal.h b/lib/crypto/mpi/mpi-internal.h
similarity index 100%
rename from lib/mpi/mpi-internal.h
rename to lib/crypto/mpi/mpi-internal.h
diff --git a/lib/mpi/mpi-inv.c b/lib/crypto/mpi/mpi-inv.c
similarity index 100%
rename from lib/mpi/mpi-inv.c
rename to lib/crypto/mpi/mpi-inv.c
diff --git a/lib/mpi/mpi-mod.c b/lib/crypto/mpi/mpi-mod.c
similarity index 100%
rename from lib/mpi/mpi-mod.c
rename to lib/crypto/mpi/mpi-mod.c
diff --git a/lib/mpi/mpi-mul.c b/lib/crypto/mpi/mpi-mul.c
similarity index 100%
rename from lib/mpi/mpi-mul.c
rename to lib/crypto/mpi/mpi-mul.c
diff --git a/lib/mpi/mpi-pow.c b/lib/crypto/mpi/mpi-pow.c
similarity index 100%
rename from lib/mpi/mpi-pow.c
rename to lib/crypto/mpi/mpi-pow.c
diff --git a/lib/mpi/mpi-sub-ui.c b/lib/crypto/mpi/mpi-sub-ui.c
similarity index 100%
rename from lib/mpi/mpi-sub-ui.c
rename to lib/crypto/mpi/mpi-sub-ui.c
diff --git a/lib/mpi/mpicoder.c b/lib/crypto/mpi/mpicoder.c
similarity index 100%
rename from lib/mpi/mpicoder.c
rename to lib/crypto/mpi/mpicoder.c
diff --git a/lib/mpi/mpih-cmp.c b/lib/crypto/mpi/mpih-cmp.c
similarity index 100%
rename from lib/mpi/mpih-cmp.c
rename to lib/crypto/mpi/mpih-cmp.c
diff --git a/lib/mpi/mpih-div.c b/lib/crypto/mpi/mpih-div.c
similarity index 100%
rename from lib/mpi/mpih-div.c
rename to lib/crypto/mpi/mpih-div.c
diff --git a/lib/mpi/mpih-mul.c b/lib/crypto/mpi/mpih-mul.c
similarity index 100%
rename from lib/mpi/mpih-mul.c
rename to lib/crypto/mpi/mpih-mul.c
diff --git a/lib/mpi/mpiutil.c b/lib/crypto/mpi/mpiutil.c
similarity index 100%
rename from lib/mpi/mpiutil.c
rename to lib/crypto/mpi/mpiutil.c

From 9e47a758b70167c9301d2b44d2569f86c7796f2d Mon Sep 17 00:00:00 2001
From: Mark O'Donovan <shiftee@posteo.net>
Date: Fri, 4 Aug 2023 09:32:18 +0000
Subject: [PATCH 1014/2047] crypto: lib/mpi - avoid null pointer deref in
 mpi_cmp_ui()

During NVMeTCP Authentication a controller can trigger a kernel
oops by specifying the 8192 bit Diffie Hellman group and passing
a correctly sized, but zeroed Diffie Hellamn value.
mpi_cmp_ui() was detecting this if the second parameter was 0,
but 1 is passed from dh_is_pubkey_valid(). This causes the null
pointer u->d to be dereferenced towards the end of mpi_cmp_ui()

Signed-off-by: Mark O'Donovan <shiftee@posteo.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/crypto/mpi/mpi-cmp.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/crypto/mpi/mpi-cmp.c b/lib/crypto/mpi/mpi-cmp.c
index c4cfa3ff0581..0835b6213235 100644
--- a/lib/crypto/mpi/mpi-cmp.c
+++ b/lib/crypto/mpi/mpi-cmp.c
@@ -25,8 +25,12 @@ int mpi_cmp_ui(MPI u, unsigned long v)
 	mpi_limb_t limb = v;
 
 	mpi_normalize(u);
-	if (!u->nlimbs && !limb)
-		return 0;
+	if (u->nlimbs == 0) {
+		if (v == 0)
+			return 0;
+		else
+			return -1;
+	}
 	if (u->sign)
 		return -1;
 	if (u->nlimbs > 1)

From 91cb1e1432b3d873a0c8831d0dd8022db98ac8b8 Mon Sep 17 00:00:00 2001
From: Joachim Vandersmissen <git@jvdsn.com>
Date: Sun, 6 Aug 2023 14:19:03 -0500
Subject: [PATCH 1015/2047] crypto: jitter - Add clarifying comments to Jitter
 Entropy RCT cutoff values

The RCT cutoff values are correct, but they don't exactly match the ones
one would expect when computing them using the formula in SP800-90B. This
discrepancy is due to the fact that the Jitter Entropy RCT starts at 1. To
avoid any confusion by future reviewers, add some comments and explicitly
subtract 1 from the "correct" cutoff values in the definitions.

Signed-off-by: Joachim Vandersmissen <git@jvdsn.com>
Reviewed-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/jitterentropy.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c
index c7d7f2caa779..fe9c233ec769 100644
--- a/crypto/jitterentropy.c
+++ b/crypto/jitterentropy.c
@@ -89,10 +89,14 @@ struct rand_data {
 	unsigned int rct_count;			/* Number of stuck values */
 
 	/* Intermittent health test failure threshold of 2^-30 */
-#define JENT_RCT_CUTOFF		30	/* Taken from SP800-90B sec 4.4.1 */
-#define JENT_APT_CUTOFF		325	/* Taken from SP800-90B sec 4.4.2 */
+	/* From an SP800-90B perspective, this RCT cutoff value is equal to 31. */
+	/* However, our RCT implementation starts at 1, so we subtract 1 here. */
+#define JENT_RCT_CUTOFF		(31 - 1)	/* Taken from SP800-90B sec 4.4.1 */
+#define JENT_APT_CUTOFF		325			/* Taken from SP800-90B sec 4.4.2 */
 	/* Permanent health test failure threshold of 2^-60 */
-#define JENT_RCT_CUTOFF_PERMANENT	60
+	/* From an SP800-90B perspective, this RCT cutoff value is equal to 61. */
+	/* However, our RCT implementation starts at 1, so we subtract 1 here. */
+#define JENT_RCT_CUTOFF_PERMANENT	(61 - 1)
 #define JENT_APT_CUTOFF_PERMANENT	355
 #define JENT_APT_WINDOW_SIZE	512	/* Data window size */
 	/* LSB of time stamp to process */

From f130ac0ae4412dbe4cbe32d0449a6b694b459ce6 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 8 Aug 2023 11:11:48 +0100
Subject: [PATCH 1016/2047] arm64: syscall: unmask DAIF earlier for SVCs

For a number of historical reasons, when handling SVCs we don't unmask
DAIF in el0_svc() or el0_svc_compat(), and instead do so later in
el0_svc_common(). This is unfortunate and makes it harder to make
changes to the DAIF management in entry-common.c as we'd like to do as
cleanup and preparation for FEAT_NMI support. We can move the DAIF
unmasking to entry-common.c as long as we also hoist the
fp_user_discard() logic, as reasoned below.

We converted the syscall trace logic from assembly to C in commit:

  f37099b6992a0b81 ("arm64: convert syscall trace logic to C")

... which was intended to have no functional change, and mirrored the
existing assembly logic to avoid the risk of any functional regression.

With the logic in C, it's clear that there is currently no reason to
unmask DAIF so late within el0_svc_common():

* The thread flags are read prior to unmasking DAIF, but are not
  consumed until after DAIF is unmasked, and we don't perform a
  read-modify-write sequence of the thread flags for which we might need
  to serialize against an IPI modifying the flags. Similarly, for any
  thread flags set by other threads, whether DAIF is masked or not has
  no impact.

  The read_thread_flags() helpers performs a single-copy-atomic read of
  the flags, and so this can safely be moved after unmasking DAIF.

* The pt_regs::orig_x0 and pt_regs::syscallno fields are neither
  consumed nor modified by the handler for any DAIF exception (e.g.
  these do not exist in the `perf_event_arm_regs` enum and are not
  sampled by perf in its IRQ handler).

  Thus, the manipulation of pt_regs::orig_x0 and pt_regs::syscallno can
  safely be moved after unmasking DAIF.

Given the above, we can safely hoist unmasking of DAIF out of
el0_svc_common(), and into its immediate callers: do_el0_svc() and
do_el0_svc_compat(). Further:

* In do_el0_svc(), we sample the syscall number from
  pt_regs::regs[8]. This is not modified by the handler for any DAIF
  exception, and thus can safely be moved after unmasking DAIF.

  As fp_user_discard() operates on the live FP/SVE/SME register state,
  this needs to occur before we clear DAIF.IF, as interrupts could
  result in preemption which would cause this state to become foreign.
  As fp_user_discard() is the first function called within do_el0_svc(),
  it has no dependency on other parts of do_el0_svc() and can be moved
  earlier so long as it is called prior to unmasking DAIF.IF.

* In do_el0_svc_compat(), we sample the syscall number from
  pt_regs::regs[7]. This is not modified by the handler for any DAIF
  exception, and thus can safely be moved after unmasking DAIF.

  Compat threads cannot use SVE or SME, so there's no need for
  el0_svc_compat() to call fp_user_discard().

Given the above, we can safely hoist the unmasking of DAIF out of
do_el0_svc() and do_el0_svc_compat(), and into their immediate callers:
el0_svc() and el0_svc_compat(), so long a we also hoist
fp_user_discard() into el0_svc().

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230808101148.1064172-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry-common.c | 32 +++++++++++++++++++++++++++++++
 arch/arm64/kernel/syscall.c      | 33 --------------------------------
 2 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 6b2e0c367702..0fc94207e69a 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -355,6 +355,35 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
 }
 #endif /* CONFIG_ARM64_ERRATUM_1463225 */
 
+/*
+ * As per the ABI exit SME streaming mode and clear the SVE state not
+ * shared with FPSIMD on syscall entry.
+ */
+static inline void fp_user_discard(void)
+{
+	/*
+	 * If SME is active then exit streaming mode.  If ZA is active
+	 * then flush the SVE registers but leave userspace access to
+	 * both SVE and SME enabled, otherwise disable SME for the
+	 * task and fall through to disabling SVE too.  This means
+	 * that after a syscall we never have any streaming mode
+	 * register state to track, if this changes the KVM code will
+	 * need updating.
+	 */
+	if (system_supports_sme())
+		sme_smstop_sm();
+
+	if (!system_supports_sve())
+		return;
+
+	if (test_thread_flag(TIF_SVE)) {
+		unsigned int sve_vq_minus_one;
+
+		sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
+		sve_flush_live(true, sve_vq_minus_one);
+	}
+}
+
 UNHANDLED(el1t, 64, sync)
 UNHANDLED(el1t, 64, irq)
 UNHANDLED(el1t, 64, fiq)
@@ -644,6 +673,8 @@ static void noinstr el0_svc(struct pt_regs *regs)
 {
 	enter_from_user_mode(regs);
 	cortex_a76_erratum_1463225_svc_handler();
+	fp_user_discard();
+	local_daif_restore(DAIF_PROCCTX);
 	do_el0_svc(regs);
 	exit_to_user_mode(regs);
 }
@@ -783,6 +814,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
 {
 	enter_from_user_mode(regs);
 	cortex_a76_erratum_1463225_svc_handler();
+	local_daif_restore(DAIF_PROCCTX);
 	do_el0_svc_compat(regs);
 	exit_to_user_mode(regs);
 }
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index b1ae2f2eaf77..9a70d9746b66 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -8,7 +8,6 @@
 #include <linux/randomize_kstack.h>
 #include <linux/syscalls.h>
 
-#include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/exception.h>
 #include <asm/fpsimd.h>
@@ -101,8 +100,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 	 * (Similarly for HVC and SMC elsewhere.)
 	 */
 
-	local_daif_restore(DAIF_PROCCTX);
-
 	if (flags & _TIF_MTE_ASYNC_FAULT) {
 		/*
 		 * Process the asynchronous tag check fault before the actual
@@ -153,38 +150,8 @@ trace_exit:
 	syscall_trace_exit(regs);
 }
 
-/*
- * As per the ABI exit SME streaming mode and clear the SVE state not
- * shared with FPSIMD on syscall entry.
- */
-static inline void fp_user_discard(void)
-{
-	/*
-	 * If SME is active then exit streaming mode.  If ZA is active
-	 * then flush the SVE registers but leave userspace access to
-	 * both SVE and SME enabled, otherwise disable SME for the
-	 * task and fall through to disabling SVE too.  This means
-	 * that after a syscall we never have any streaming mode
-	 * register state to track, if this changes the KVM code will
-	 * need updating.
-	 */
-	if (system_supports_sme())
-		sme_smstop_sm();
-
-	if (!system_supports_sve())
-		return;
-
-	if (test_thread_flag(TIF_SVE)) {
-		unsigned int sve_vq_minus_one;
-
-		sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
-		sve_flush_live(true, sve_vq_minus_one);
-	}
-}
-
 void do_el0_svc(struct pt_regs *regs)
 {
-	fp_user_discard();
 	el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
 }
 

From eb27c76ac9e1cf5e278703db8f2fea1ed5d3f78a Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Tue, 8 Aug 2023 21:40:32 +0800
Subject: [PATCH 1017/2047] kselftest/arm64: add float-point feature to hwcap
 test

Add the FP feature check in the set of hwcap tests.

Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230808134036.668954-2-zengheng4@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/hwcap.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 6a0adf916028..ec247587f2bf 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -39,6 +39,11 @@ static void cssc_sigill(void)
 	asm volatile(".inst 0xdac01c00" : : : "x0");
 }
 
+static void fp_sigill(void)
+{
+	asm volatile("fmov s0, #1");
+}
+
 static void ilrcpc_sigill(void)
 {
 	/* LDAPUR W0, [SP, #8] */
@@ -235,6 +240,13 @@ static const struct hwcap_data {
 		.cpuinfo = "cssc",
 		.sigill_fn = cssc_sigill,
 	},
+	{
+		.name = "FP",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_FP,
+		.cpuinfo = "fp",
+		.sigill_fn = fp_sigill,
+	},
 	{
 		.name = "LRCPC",
 		.at_hwcap = AT_HWCAP,

From 09d2e95a04ad6fd258082ea3ba9a5bf52bbd5229 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Tue, 8 Aug 2023 21:40:33 +0800
Subject: [PATCH 1018/2047] kselftest/arm64: add crc32 feature to hwcap test

Add the CRC32 feature check in the set of hwcap tests.

Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230808134036.668954-3-zengheng4@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/hwcap.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index ec247587f2bf..70850bdb0e35 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -33,6 +33,11 @@
  */
 typedef void (*sigill_fn)(void);
 
+static void crc32_sigill(void)
+{
+	asm volatile("crc32w w0, w0, w1");
+}
+
 static void cssc_sigill(void)
 {
 	/* CNT x0, x0 */
@@ -233,6 +238,13 @@ static const struct hwcap_data {
 	sigill_fn sigill_fn;
 	bool sigill_reliable;
 } hwcaps[] = {
+	{
+		.name = "CRC32",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_CRC32,
+		.cpuinfo = "crc32",
+		.sigill_fn = crc32_sigill,
+	},
 	{
 		.name = "CSSC",
 		.at_hwcap = AT_HWCAP2,

From 71b634aba36201fa24e214db1be7ce50cf6e1863 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Tue, 8 Aug 2023 21:40:34 +0800
Subject: [PATCH 1019/2047] kselftest/arm64: add DEF_SIGHANDLER_FUNC() and
 DEF_INST_RAISE_SIG() helpers

Add macro definition functions DEF_SIGHANDLER_FUNC() and
DEF_INST_RAISE_SIG() helpers.

Furthermore, there is no need to modify the default SIGILL handling
function throughout the entire testing lifecycle in the main() function.
It is reasonable to narrow the scope to the context of the sig_fn
function only.

This is a pre-patch for the subsequent SIGBUS handler patch.

Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230808134036.668954-4-zengheng4@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/hwcap.c | 116 ++++++++++++++--------
 1 file changed, 74 insertions(+), 42 deletions(-)

diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 70850bdb0e35..141e50c16f21 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -438,18 +438,21 @@ static const struct hwcap_data {
 	},
 };
 
-static bool seen_sigill;
+typedef void (*sighandler_fn)(int, siginfo_t *, void *);
 
-static void handle_sigill(int sig, siginfo_t *info, void *context)
-{
-	ucontext_t *uc = context;
-
-	seen_sigill = true;
-
-	/* Skip over the offending instruction */
-	uc->uc_mcontext.pc += 4;
+#define DEF_SIGHANDLER_FUNC(SIG, NUM)					\
+static bool seen_##SIG;							\
+static void handle_##SIG(int sig, siginfo_t *info, void *context)	\
+{									\
+	ucontext_t *uc = context;					\
+									\
+	seen_##SIG = true;						\
+	/* Skip over the offending instruction */			\
+	uc->uc_mcontext.pc += 4;					\
 }
 
+DEF_SIGHANDLER_FUNC(sigill, SIGILL);
+
 bool cpuinfo_present(const char *name)
 {
 	FILE *f;
@@ -492,25 +495,77 @@ bool cpuinfo_present(const char *name)
 	return false;
 }
 
-int main(void)
+static int install_sigaction(int signum, sighandler_fn handler)
 {
-	const struct hwcap_data *hwcap;
-	int i, ret;
-	bool have_cpuinfo, have_hwcap;
+	int ret;
 	struct sigaction sa;
 
-	ksft_print_header();
-	ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP);
-
 	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handle_sigill;
+	sa.sa_sigaction = handler;
 	sa.sa_flags = SA_RESTART | SA_SIGINFO;
 	sigemptyset(&sa.sa_mask);
-	ret = sigaction(SIGILL, &sa, NULL);
+	ret = sigaction(signum, &sa, NULL);
 	if (ret < 0)
 		ksft_exit_fail_msg("Failed to install SIGILL handler: %s (%d)\n",
 				   strerror(errno), errno);
 
+	return ret;
+}
+
+static void uninstall_sigaction(int signum)
+{
+	if (sigaction(signum, NULL, NULL) < 0)
+		ksft_exit_fail_msg("Failed to uninstall SIGILL handler: %s (%d)\n",
+				   strerror(errno), errno);
+}
+
+#define DEF_INST_RAISE_SIG(SIG, NUM)					\
+static bool inst_raise_##SIG(const struct hwcap_data *hwcap,		\
+				bool have_hwcap)			\
+{									\
+	if (!hwcap->SIG##_fn) {						\
+		ksft_test_result_skip(#SIG"_%s\n", hwcap->name);	\
+		/* assume that it would raise exception in default */	\
+		return true;						\
+	}								\
+									\
+	install_sigaction(NUM, handle_##SIG);				\
+									\
+	seen_##SIG = false;						\
+	hwcap->SIG##_fn();						\
+									\
+	if (have_hwcap) {						\
+		/* Should be able to use the extension */		\
+		ksft_test_result(!seen_##SIG,				\
+				#SIG"_%s\n", hwcap->name);		\
+	} else if (hwcap->SIG##_reliable) {				\
+		/* Guaranteed a SIGNAL */				\
+		ksft_test_result(seen_##SIG,				\
+				#SIG"_%s\n", hwcap->name);		\
+	} else {							\
+		/* Missing SIGNAL might be fine */			\
+		ksft_print_msg(#SIG"_%sreported for %s\n",		\
+				seen_##SIG ? "" : "not ",		\
+				hwcap->name);				\
+		ksft_test_result_skip(#SIG"_%s\n",			\
+					hwcap->name);			\
+	}								\
+									\
+	uninstall_sigaction(NUM);					\
+	return seen_##SIG;						\
+}
+
+DEF_INST_RAISE_SIG(sigill, SIGILL);
+
+int main(void)
+{
+	int i;
+	const struct hwcap_data *hwcap;
+	bool have_cpuinfo, have_hwcap;
+
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP);
+
 	for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
 		hwcap = &hwcaps[i];
 
@@ -523,30 +578,7 @@ int main(void)
 		ksft_test_result(have_hwcap == have_cpuinfo,
 				 "cpuinfo_match_%s\n", hwcap->name);
 
-		if (hwcap->sigill_fn) {
-			seen_sigill = false;
-			hwcap->sigill_fn();
-
-			if (have_hwcap) {
-				/* Should be able to use the extension */
-				ksft_test_result(!seen_sigill, "sigill_%s\n",
-						 hwcap->name);
-			} else if (hwcap->sigill_reliable) {
-				/* Guaranteed a SIGILL */
-				ksft_test_result(seen_sigill, "sigill_%s\n",
-						 hwcap->name);
-			} else {
-				/* Missing SIGILL might be fine */
-				ksft_print_msg("SIGILL %sreported for %s\n",
-					       seen_sigill ? "" : "not ",
-					       hwcap->name);
-				ksft_test_result_skip("sigill_%s\n",
-						      hwcap->name);
-			}
-		} else {
-			ksft_test_result_skip("sigill_%s\n",
-					      hwcap->name);
-		}
+		inst_raise_sigill(hwcap, have_hwcap);
 	}
 
 	ksft_print_cnts();

From fd49cf088867f8fad7515b5424d301ca5d147070 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Tue, 8 Aug 2023 21:40:35 +0800
Subject: [PATCH 1020/2047] kselftest/arm64: add test item that support to
 capturing the SIGBUS signal

Some enhanced features, such as the LSE2 feature, do not result in
SILLILL if LSE2 is missing and LSE is present, but will generate a
SIGBUS exception when atomic access unaligned.

Therefore, we add test item to test this type of features.

Notice that testing for SIGBUS only makes sense after make sure that
the instruction does not cause a SIGILL signal.

Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230808134036.668954-5-zengheng4@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/hwcap.c | 33 ++++++++++++++++-------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 141e50c16f21..69234a8e8c59 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -19,19 +19,20 @@
 
 #include "../../kselftest.h"
 
-#define TESTS_PER_HWCAP 2
+#define TESTS_PER_HWCAP 3
 
 /*
- * Function expected to generate SIGILL when the feature is not
- * supported and return when it is supported. If SIGILL is generated
- * then the handler must be able to skip over the instruction safely.
+ * Function expected to generate exception when the feature is not
+ * supported and return when it is supported. If the specific exception
+ * is generated then the handler must be able to skip over the
+ * instruction safely.
  *
  * Note that it is expected that for many architecture extensions
  * there are no specific traps due to no architecture state being
  * added so we may not fault if running on a kernel which doesn't know
  * to add the hwcap.
  */
-typedef void (*sigill_fn)(void);
+typedef void (*sig_fn)(void);
 
 static void crc32_sigill(void)
 {
@@ -235,8 +236,10 @@ static const struct hwcap_data {
 	unsigned long at_hwcap;
 	unsigned long hwcap_bit;
 	const char *cpuinfo;
-	sigill_fn sigill_fn;
+	sig_fn sigill_fn;
 	bool sigill_reliable;
+	sig_fn sigbus_fn;
+	bool sigbus_reliable;
 } hwcaps[] = {
 	{
 		.name = "CRC32",
@@ -452,6 +455,7 @@ static void handle_##SIG(int sig, siginfo_t *info, void *context)	\
 }
 
 DEF_SIGHANDLER_FUNC(sigill, SIGILL);
+DEF_SIGHANDLER_FUNC(sigbus, SIGBUS);
 
 bool cpuinfo_present(const char *name)
 {
@@ -506,7 +510,7 @@ static int install_sigaction(int signum, sighandler_fn handler)
 	sigemptyset(&sa.sa_mask);
 	ret = sigaction(signum, &sa, NULL);
 	if (ret < 0)
-		ksft_exit_fail_msg("Failed to install SIGILL handler: %s (%d)\n",
+		ksft_exit_fail_msg("Failed to install SIGNAL handler: %s (%d)\n",
 				   strerror(errno), errno);
 
 	return ret;
@@ -515,7 +519,7 @@ static int install_sigaction(int signum, sighandler_fn handler)
 static void uninstall_sigaction(int signum)
 {
 	if (sigaction(signum, NULL, NULL) < 0)
-		ksft_exit_fail_msg("Failed to uninstall SIGILL handler: %s (%d)\n",
+		ksft_exit_fail_msg("Failed to uninstall SIGNAL handler: %s (%d)\n",
 				   strerror(errno), errno);
 }
 
@@ -556,12 +560,13 @@ static bool inst_raise_##SIG(const struct hwcap_data *hwcap,		\
 }
 
 DEF_INST_RAISE_SIG(sigill, SIGILL);
+DEF_INST_RAISE_SIG(sigbus, SIGBUS);
 
 int main(void)
 {
 	int i;
 	const struct hwcap_data *hwcap;
-	bool have_cpuinfo, have_hwcap;
+	bool have_cpuinfo, have_hwcap, raise_sigill;
 
 	ksft_print_header();
 	ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP);
@@ -578,7 +583,15 @@ int main(void)
 		ksft_test_result(have_hwcap == have_cpuinfo,
 				 "cpuinfo_match_%s\n", hwcap->name);
 
-		inst_raise_sigill(hwcap, have_hwcap);
+		/*
+		 * Testing for SIGBUS only makes sense after make sure
+		 * that the instruction does not cause a SIGILL signal.
+		 */
+		raise_sigill = inst_raise_sigill(hwcap, have_hwcap);
+		if (!raise_sigill)
+			inst_raise_sigbus(hwcap, have_hwcap);
+		else
+			ksft_test_result_skip("sigbus_%s\n", hwcap->name);
 	}
 
 	ksft_print_cnts();

From 82e7882b141c177b1b09ae66e1b2ae791a9a3918 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Tue, 8 Aug 2023 21:40:36 +0800
Subject: [PATCH 1021/2047] kselftest/arm64: add lse and lse2 features to hwcap
 test

Add the LSE and various features check in the set of hwcap tests.

As stated in the ARM manual, the LSE2 feature allows for atomic access
to unaligned memory. Therefore, for processors that only have the LSE
feature, we register .sigbus_fn to test their ability to perform
unaligned access.

Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230808134036.668954-6-zengheng4@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/hwcap.c | 30 +++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 69234a8e8c59..112b6a219382 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -34,6 +34,12 @@
  */
 typedef void (*sig_fn)(void);
 
+static void atomics_sigill(void)
+{
+	/* STADD W0, [SP] */
+	asm volatile(".inst 0xb82003ff" : : : );
+}
+
 static void crc32_sigill(void)
 {
 	asm volatile("crc32w w0, w0, w1");
@@ -231,6 +237,14 @@ static void svebf16_sigill(void)
 	asm volatile(".inst 0x658aa000" : : : "z0");
 }
 
+static void uscat_sigbus(void)
+{
+	/* unaligned atomic access */
+	asm volatile("ADD x1, sp, #2" : : : );
+	/* STADD W0, [X1] */
+	asm volatile(".inst 0xb820003f" : : : );
+}
+
 static const struct hwcap_data {
 	const char *name;
 	unsigned long at_hwcap;
@@ -276,6 +290,22 @@ static const struct hwcap_data {
 		.cpuinfo = "ilrcpc",
 		.sigill_fn = ilrcpc_sigill,
 	},
+	{
+		.name = "LSE",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_ATOMICS,
+		.cpuinfo = "atomics",
+		.sigill_fn = atomics_sigill,
+	},
+	{
+		.name = "LSE2",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_USCAT,
+		.cpuinfo = "uscat",
+		.sigill_fn = atomics_sigill,
+		.sigbus_fn = uscat_sigbus,
+		.sigbus_reliable = true,
+	},
 	{
 		.name = "MOPS",
 		.at_hwcap = AT_HWCAP2,

From 358b763ee64b11f45287c286e1f6145729ed49ab Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 10 Aug 2023 12:39:19 +0100
Subject: [PATCH 1022/2047] kselftest/arm64: Size sycall-abi buffers for the
 actual maximum VL

Our ABI opts to provide future proofing by defining a much larger
SVE_VQ_MAX than the architecture actually supports.  Since we use
this define to control the size of our vector data buffers this results
in a lot of overhead when we initialise which can be a very noticable
problem in emulation, we fill buffers that are orders of magnitude
larger than we will ever actually use even with virtual platforms that
provide the full range of architecturally supported vector lengths.

Define and use the actual architecture maximum to mitigate this.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230810-arm64-syscall-abi-perf-v1-1-6a0d7656359c@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../testing/selftests/arm64/abi/syscall-abi.c | 38 +++++++++++--------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
index 18cc123e2347..d704511a0955 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi.c
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -20,12 +20,20 @@
 
 #include "syscall-abi.h"
 
+/*
+ * The kernel defines a much larger SVE_VQ_MAX than is expressable in
+ * the architecture, this creates a *lot* of overhead filling the
+ * buffers (especially ZA) on emulated platforms so use the actual
+ * architectural maximum instead.
+ */
+#define ARCH_SVE_VQ_MAX 16
+
 static int default_sme_vl;
 
 static int sve_vl_count;
-static unsigned int sve_vls[SVE_VQ_MAX];
+static unsigned int sve_vls[ARCH_SVE_VQ_MAX];
 static int sme_vl_count;
-static unsigned int sme_vls[SVE_VQ_MAX];
+static unsigned int sme_vls[ARCH_SVE_VQ_MAX];
 
 extern void do_syscall(int sve_vl, int sme_vl);
 
@@ -130,9 +138,9 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 
 #define SVE_Z_SHARED_BYTES (128 / 8)
 
-static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)];
-uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
-uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
+static uint8_t z_zero[__SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)];
 
 static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 		    uint64_t svcr)
@@ -190,8 +198,8 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 	return errors;
 }
 
-uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)];
-uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)];
+uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
 
 static void setup_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 		    uint64_t svcr)
@@ -222,8 +230,8 @@ static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 	return errors;
 }
 
-uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)];
-uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)];
+uint8_t ffr_in[__SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t ffr_out[__SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
 
 static void setup_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 		      uint64_t svcr)
@@ -300,8 +308,8 @@ static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 	return errors;
 }
 
-uint8_t za_in[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)];
-uint8_t za_out[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)];
+uint8_t za_in[ZA_SIG_REGS_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t za_out[ZA_SIG_REGS_SIZE(ARCH_SVE_VQ_MAX)];
 
 static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 		     uint64_t svcr)
@@ -470,9 +478,9 @@ void sve_count_vls(void)
 		return;
 
 	/*
-	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 * Enumerate up to ARCH_SVE_VQ_MAX vector lengths
 	 */
-	for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) {
+	for (vq = ARCH_SVE_VQ_MAX; vq > 0; vq /= 2) {
 		vl = prctl(PR_SVE_SET_VL, vq * 16);
 		if (vl == -1)
 			ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
@@ -496,9 +504,9 @@ void sme_count_vls(void)
 		return;
 
 	/*
-	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 * Enumerate up to ARCH_SVE_VQ_MAX vector lengths
 	 */
-	for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) {
+	for (vq = ARCH_SVE_VQ_MAX; vq > 0; vq /= 2) {
 		vl = prctl(PR_SME_SET_VL, vq * 16);
 		if (vl == -1)
 			ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",

From e88e0d366f9cfbb810b0c8509dc5d130d5a53e02 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 10 Aug 2023 23:27:07 -0700
Subject: [PATCH 1023/2047] tmpfs: trivial support for direct IO

Depending upon your philosophical viewpoint, either tmpfs always does
direct IO, or it cannot ever do direct IO; but whichever, if tmpfs is to
stand in for a more sophisticated filesystem, it can be helpful for tmpfs
to support O_DIRECT.  So, give tmpfs a shmem_file_open() method, to set
the FMODE_CAN_ODIRECT flag: then unchanged shmem_file_read_iter() and new
shmem_file_write_iter() do the work (without any shmem_direct_IO() stub).

Perhaps later, once the direct_IO method has been eliminated from all
filesystems, generic_file_write_iter() will be such that tmpfs can again
use it, even for O_DIRECT.

xfstests auto generic which were not run on tmpfs before but now pass:
036 091 113 125 130 133 135 198 207 208 209 210 211 212 214 226 239 263
323 355 391 406 412 422 427 446 451 465 551 586 591 609 615 647 708 729
with no new failures.

LTP dio tests which were not run on tmpfs before but now pass:
dio01 through dio30, except for dio04 and dio10, which fail because
tmpfs dio read and write allow odd count: tmpfs could be made stricter,
but would that be an improvement?

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <6f2742-6f1f-cae9-7c5b-ed20fc53215@google.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 mm/shmem.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index ca43fb256b8e..b782edeb69aa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2388,6 +2388,12 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
+static int shmem_file_open(struct inode *inode, struct file *file)
+{
+	file->f_mode |= FMODE_CAN_ODIRECT;
+	return generic_file_open(inode, file);
+}
+
 #ifdef CONFIG_TMPFS_XATTR
 static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
 
@@ -2839,6 +2845,28 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	return retval ? retval : error;
 }
 
+static ssize_t shmem_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
+
+	inode_lock(inode);
+	ret = generic_write_checks(iocb, from);
+	if (ret <= 0)
+		goto unlock;
+	ret = file_remove_privs(file);
+	if (ret)
+		goto unlock;
+	ret = file_update_time(file);
+	if (ret)
+		goto unlock;
+	ret = generic_perform_write(iocb, from);
+unlock:
+	inode_unlock(inode);
+	return ret;
+}
+
 static bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
 			      struct pipe_buffer *buf)
 {
@@ -4434,12 +4462,12 @@ EXPORT_SYMBOL(shmem_aops);
 
 static const struct file_operations shmem_file_operations = {
 	.mmap		= shmem_mmap,
-	.open		= generic_file_open,
+	.open		= shmem_file_open,
 	.get_unmapped_area = shmem_get_unmapped_area,
 #ifdef CONFIG_TMPFS
 	.llseek		= shmem_file_llseek,
 	.read_iter	= shmem_file_read_iter,
-	.write_iter	= generic_file_write_iter,
+	.write_iter	= shmem_file_write_iter,
 	.fsync		= noop_fsync,
 	.splice_read	= shmem_file_splice_read,
 	.splice_write	= iter_file_splice_write,

From aa5b9178c01905d7691512b366cf2886dfe2680c Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 8 Aug 2023 21:36:12 -0700
Subject: [PATCH 1024/2047] mm: invalidation check mapping before
 folio_contains

Enabling tmpfs "direct IO" exposes it to invalidate_inode_pages2_range(),
which when swapping can hit the VM_BUG_ON_FOLIO(!folio_contains()): the
folio has been moved from page cache to swap cache (with folio->mapping
reset to NULL), but the folio_index() embedded in folio_contains() sees
swapcache, and so returns the swapcache_index() - whereas folio->index
would be the right one to check against the index from mapping's xarray.

There are different ways to fix this, but my preference is just to order
the checks in invalidate_inode_pages2_range() the same way that they are
in __filemap_get_folio() and find_lock_entries() and filemap_fault():
check folio->mapping before folio_contains().

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <f0b31772-78d7-f198-6482-9f25aab8c13f@google.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 mm/truncate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/truncate.c b/mm/truncate.c
index 95d1291d269b..c3320e66d6ea 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -657,11 +657,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 			}
 
 			folio_lock(folio);
-			VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
-			if (folio->mapping != mapping) {
+			if (unlikely(folio->mapping != mapping)) {
 				folio_unlock(folio);
 				continue;
 			}
+			VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
 			folio_wait_writeback(folio);
 
 			if (folio_mapped(folio))

From 39df52ddd932244f2cc08ce520ad064b7dc8c278 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 9 Aug 2023 15:14:41 +0200
Subject: [PATCH 1025/2047] gpio: sim: use sysfs_streq() and avoid an strdup()

When comparing strings passed to us from configfs, we can pass the page
argument directly to sysfs_streq() and avoid manual string trimming.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-sim.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index cfbdade841e8..1a3729eb44eb 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -1272,7 +1272,6 @@ gpio_sim_hog_config_direction_store(struct config_item *item,
 {
 	struct gpio_sim_hog *hog = to_gpio_sim_hog(item);
 	struct gpio_sim_device *dev = gpio_sim_hog_get_device(hog);
-	char *trimmed;
 	int dir;
 
 	mutex_lock(&dev->lock);
@@ -1282,23 +1281,15 @@ gpio_sim_hog_config_direction_store(struct config_item *item,
 		return -EBUSY;
 	}
 
-	trimmed = gpio_sim_strdup_trimmed(page, count);
-	if (!trimmed) {
-		mutex_unlock(&dev->lock);
-		return -ENOMEM;
-	}
-
-	if (strcmp(trimmed, "input") == 0)
+	if (sysfs_streq(page, "input"))
 		dir = GPIOD_IN;
-	else if (strcmp(trimmed, "output-high") == 0)
+	else if (sysfs_streq(page, "output-high"))
 		dir = GPIOD_OUT_HIGH;
-	else if (strcmp(trimmed, "output-low") == 0)
+	else if (sysfs_streq(page, "output-low"))
 		dir = GPIOD_OUT_LOW;
 	else
 		dir = -EINVAL;
 
-	kfree(trimmed);
-
 	if (dir < 0) {
 		mutex_unlock(&dev->lock);
 		return dir;

From db02247827ef2adc1617839b4bdcc5e1cef3e1ed Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Thu, 10 Aug 2023 18:00:38 -0400
Subject: [PATCH 1026/2047] gpio: idio-16: Migrate to the regmap API

The regmap API supports IO port accessors so we can take advantage of
regmap abstractions rather than handling access to the device registers
directly in the driver.

By leveraging the regmap API, the idio-16 library is reduced to simply a
devm_idio_16_regmap_register() function and a configuration structure
struct idio_16_regmap_config.

Legacy functions and code will be removed once all consumers have
migrated to the new idio-16 library interface.

For IDIO-16 devices we have the following IRQ registers:

    Base Address +1 (Write): Clear Interrupt
    Base Address +2 (Read): Enable Interrupt
    Base Address +2 (Write): Disable Interrupt

An interrupt is asserted whenever a change-of-state is detected on any
of the inputs. Any write to 0x2 will disable interrupts, while any read
will enable interrupts. Interrupts are cleared by a write to 0x1.

For 104-IDIO-16 devices, there is no IRQ status register, so software
has to assume that if an interrupt is raised then it was for the
104-IDIO-16 device.

For PCI-IDIO-16 devices, there is an additional IRQ register:

    Base Address +6 (Read): Interrupt Status

Interrupt status can be read from 0x6 where bit 2 set indicates that an
IRQ has been generated.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/b45081958ab53dfa697f4a8b15f1bfba46718068.1680618405.git.william.gray@linaro.org/
Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig        |   3 +
 drivers/gpio/gpio-idio-16.c | 155 ++++++++++++++++++++++++++++++++++++
 drivers/gpio/gpio-idio-16.h |  26 ++++++
 3 files changed, 184 insertions(+)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index c5324e44b74e..7f5cc69fff23 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -111,6 +111,9 @@ config GPIO_MAX730X
 
 config GPIO_IDIO_16
 	tristate
+	select REGMAP_IRQ
+	select GPIOLIB_IRQCHIP
+	select GPIO_REGMAP
 	help
 	  Enables support for the idio-16 library functions. The idio-16 library
 	  provides functions to facilitate communication with devices within the
diff --git a/drivers/gpio/gpio-idio-16.c b/drivers/gpio/gpio-idio-16.c
index 13315242d220..f9349e8d7fdc 100644
--- a/drivers/gpio/gpio-idio-16.c
+++ b/drivers/gpio/gpio-idio-16.c
@@ -4,9 +4,13 @@
  * Copyright (C) 2022 William Breathitt Gray
  */
 #include <linux/bitmap.h>
+#include <linux/device.h>
+#include <linux/err.h>
 #include <linux/export.h>
+#include <linux/gpio/regmap.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/regmap.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
@@ -14,6 +18,157 @@
 
 #define DEFAULT_SYMBOL_NAMESPACE GPIO_IDIO_16
 
+#define IDIO_16_DAT_BASE 0x0
+#define IDIO_16_OUT_BASE IDIO_16_DAT_BASE
+#define IDIO_16_IN_BASE (IDIO_16_DAT_BASE + 1)
+#define IDIO_16_CLEAR_INTERRUPT 0x1
+#define IDIO_16_ENABLE_IRQ 0x2
+#define IDIO_16_DEACTIVATE_INPUT_FILTERS 0x3
+#define IDIO_16_DISABLE_IRQ IDIO_16_ENABLE_IRQ
+#define IDIO_16_INTERRUPT_STATUS 0x6
+
+#define IDIO_16_NGPIO 32
+#define IDIO_16_NGPIO_PER_REG 8
+#define IDIO_16_REG_STRIDE 4
+
+struct idio_16_data {
+	struct regmap *map;
+	unsigned int irq_mask;
+};
+
+static int idio_16_handle_mask_sync(const int index, const unsigned int mask_buf_def,
+				    const unsigned int mask_buf, void *const irq_drv_data)
+{
+	struct idio_16_data *const data = irq_drv_data;
+	const unsigned int prev_mask = data->irq_mask;
+	int err;
+	unsigned int val;
+
+	/* exit early if no change since the previous mask */
+	if (mask_buf == prev_mask)
+		return 0;
+
+	/* remember the current mask for the next mask sync */
+	data->irq_mask = mask_buf;
+
+	/* if all previously masked, enable interrupts when unmasking */
+	if (prev_mask == mask_buf_def) {
+		err = regmap_write(data->map, IDIO_16_CLEAR_INTERRUPT, 0x00);
+		if (err)
+			return err;
+		return regmap_read(data->map, IDIO_16_ENABLE_IRQ, &val);
+	}
+
+	/* if all are currently masked, disable interrupts */
+	if (mask_buf == mask_buf_def)
+		return regmap_write(data->map, IDIO_16_DISABLE_IRQ, 0x00);
+
+	return 0;
+}
+
+static int idio_16_reg_mask_xlate(struct gpio_regmap *const gpio, const unsigned int base,
+				  const unsigned int offset, unsigned int *const reg,
+				  unsigned int *const mask)
+{
+	unsigned int stride;
+
+	/* Input lines start at GPIO 16 */
+	if (offset < 16) {
+		stride = offset / IDIO_16_NGPIO_PER_REG;
+		*reg = IDIO_16_OUT_BASE + stride * IDIO_16_REG_STRIDE;
+	} else {
+		stride = (offset - 16) / IDIO_16_NGPIO_PER_REG;
+		*reg = IDIO_16_IN_BASE + stride * IDIO_16_REG_STRIDE;
+	}
+
+	*mask = BIT(offset % IDIO_16_NGPIO_PER_REG);
+
+	return 0;
+}
+
+static const char *idio_16_names[IDIO_16_NGPIO] = {
+	"OUT0", "OUT1", "OUT2", "OUT3", "OUT4", "OUT5", "OUT6", "OUT7",
+	"OUT8", "OUT9", "OUT10", "OUT11", "OUT12", "OUT13", "OUT14", "OUT15",
+	"IIN0", "IIN1", "IIN2", "IIN3", "IIN4", "IIN5", "IIN6", "IIN7",
+	"IIN8", "IIN9", "IIN10", "IIN11", "IIN12", "IIN13", "IIN14", "IIN15",
+};
+
+/**
+ * devm_idio_16_regmap_register - Register an IDIO-16 GPIO device
+ * @dev:	device that is registering this IDIO-16 GPIO device
+ * @config:	configuration for idio_16_regmap_config
+ *
+ * Registers an IDIO-16 GPIO device. Returns 0 on success and negative error number on failure.
+ */
+int devm_idio_16_regmap_register(struct device *const dev,
+				 const struct idio_16_regmap_config *const config)
+{
+	struct gpio_regmap_config gpio_config = {};
+	int err;
+	struct idio_16_data *data;
+	struct regmap_irq_chip *chip;
+	struct regmap_irq_chip_data *chip_data;
+
+	if (!config->parent)
+		return -EINVAL;
+
+	if (!config->map)
+		return -EINVAL;
+
+	if (!config->regmap_irqs)
+		return -EINVAL;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	data->map = config->map;
+
+	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->name = dev_name(dev);
+	chip->status_base = IDIO_16_INTERRUPT_STATUS;
+	chip->mask_base = IDIO_16_ENABLE_IRQ;
+	chip->ack_base = IDIO_16_CLEAR_INTERRUPT;
+	chip->no_status = config->no_status;
+	chip->num_regs = 1;
+	chip->irqs = config->regmap_irqs;
+	chip->num_irqs = config->num_regmap_irqs;
+	chip->handle_mask_sync = idio_16_handle_mask_sync;
+	chip->irq_drv_data = data;
+
+	/* Disable IRQ to prevent spurious interrupts before we're ready */
+	err = regmap_write(data->map, IDIO_16_DISABLE_IRQ, 0x00);
+	if (err)
+		return err;
+
+	err = devm_regmap_add_irq_chip(dev, data->map, config->irq, 0, 0, chip, &chip_data);
+	if (err)
+		return dev_err_probe(dev, err, "IRQ registration failed\n");
+
+	if (config->filters) {
+		/* Deactivate input filters */
+		err = regmap_write(data->map, IDIO_16_DEACTIVATE_INPUT_FILTERS, 0x00);
+		if (err)
+			return err;
+	}
+
+	gpio_config.parent = config->parent;
+	gpio_config.regmap = data->map;
+	gpio_config.ngpio = IDIO_16_NGPIO;
+	gpio_config.names = idio_16_names;
+	gpio_config.reg_dat_base = GPIO_REGMAP_ADDR(IDIO_16_DAT_BASE);
+	gpio_config.reg_set_base = GPIO_REGMAP_ADDR(IDIO_16_DAT_BASE);
+	gpio_config.ngpio_per_reg = IDIO_16_NGPIO_PER_REG;
+	gpio_config.reg_stride = IDIO_16_REG_STRIDE;
+	gpio_config.irq_domain = regmap_irq_get_domain(chip_data);
+	gpio_config.reg_mask_xlate = idio_16_reg_mask_xlate;
+
+	return PTR_ERR_OR_ZERO(devm_gpio_regmap_register(dev, &gpio_config));
+}
+EXPORT_SYMBOL_GPL(devm_idio_16_regmap_register);
+
 /**
  * idio_16_get - get signal value at signal offset
  * @reg:	ACCES IDIO-16 device registers
diff --git a/drivers/gpio/gpio-idio-16.h b/drivers/gpio/gpio-idio-16.h
index 928f8251a2bd..255bd8504ed7 100644
--- a/drivers/gpio/gpio-idio-16.h
+++ b/drivers/gpio/gpio-idio-16.h
@@ -6,6 +6,30 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
+struct device;
+struct regmap;
+struct regmap_irq;
+
+/**
+ * struct idio_16_regmap_config - Configuration for the IDIO-16 register map
+ * @parent:		parent device
+ * @map:		regmap for the IDIO-16 device
+ * @regmap_irqs:	descriptors for individual IRQs
+ * @num_regmap_irqs:	number of IRQ descriptors
+ * @irq:		IRQ number for the IDIO-16 device
+ * @no_status:		device has no status register
+ * @filters:		device has input filters
+ */
+struct idio_16_regmap_config {
+	struct device *parent;
+	struct regmap *map;
+	const struct regmap_irq *regmap_irqs;
+	int num_regmap_irqs;
+	unsigned int irq;
+	bool no_status;
+	bool filters;
+};
+
 /**
  * struct idio_16 - IDIO-16 registers structure
  * @out0_7:	Read: FET Drive Outputs 0-7
@@ -68,4 +92,6 @@ void idio_16_set_multiple(struct idio_16 __iomem *reg,
 			  const unsigned long *mask, const unsigned long *bits);
 void idio_16_state_init(struct idio_16_state *state);
 
+int devm_idio_16_regmap_register(struct device *dev, const struct idio_16_regmap_config *config);
+
 #endif /* _IDIO_16_H_ */

From 2c210c9a34a31076e03afee3eae7a748e56a75e9 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Thu, 10 Aug 2023 18:00:39 -0400
Subject: [PATCH 1027/2047] gpio: 104-idio-16: Migrate to the regmap API

The regmap API supports IO port accessors so we can take advantage of
regmap abstractions rather than handling access to the device registers
directly in the driver. Migrate the 104-idio-16 module to the new
idio-16 library interface leveraging the gpio-regmap API.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/1f24a1f18c9a9daa4983713e0a5b53e838d624a8.1680618405.git.william.gray@linaro.org/
Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig            |   2 +-
 drivers/gpio/gpio-104-idio-16.c | 286 +++++++-------------------------
 2 files changed, 64 insertions(+), 224 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 7f5cc69fff23..8bc60a6ce3a9 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -872,7 +872,7 @@ config GPIO_104_IDIO_16
 	tristate "ACCES 104-IDIO-16 GPIO support"
 	depends on PC104
 	select ISA_BUS_API
-	select GPIOLIB_IRQCHIP
+	select REGMAP_MMIO
 	select GPIO_IDIO_16
 	help
 	  Enables GPIO support for the ACCES 104-IDIO-16 family (104-IDIO-16,
diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c
index 098fbefdbe22..f03ccd0f534c 100644
--- a/drivers/gpio/gpio-104-idio-16.c
+++ b/drivers/gpio/gpio-104-idio-16.c
@@ -6,19 +6,16 @@
  * This driver supports the following ACCES devices: 104-IDIO-16,
  * 104-IDIO-16E, 104-IDO-16, 104-IDIO-8, 104-IDIO-8E, and 104-IDO-8.
  */
-#include <linux/bitmap.h>
+#include <linux/bits.h>
 #include <linux/device.h>
-#include <linux/errno.h>
-#include <linux/gpio/driver.h>
-#include <linux/io.h>
+#include <linux/err.h>
 #include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/irqdesc.h>
+#include <linux/irq.h>
 #include <linux/isa.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/spinlock.h>
+#include <linux/regmap.h>
 #include <linux/types.h>
 
 #include "gpio-idio-16.h"
@@ -36,187 +33,62 @@ static unsigned int num_irq;
 module_param_hw_array(irq, uint, irq, &num_irq, 0);
 MODULE_PARM_DESC(irq, "ACCES 104-IDIO-16 interrupt line numbers");
 
-/**
- * struct idio_16_gpio - GPIO device private data structure
- * @chip:	instance of the gpio_chip
- * @lock:	synchronization lock to prevent I/O race conditions
- * @irq_mask:	I/O bits affected by interrupts
- * @reg:	I/O address offset for the device registers
- * @state:	ACCES IDIO-16 device state
- */
-struct idio_16_gpio {
-	struct gpio_chip chip;
-	raw_spinlock_t lock;
-	unsigned long irq_mask;
-	struct idio_16 __iomem *reg;
-	struct idio_16_state state;
+static const struct regmap_range idio_16_wr_ranges[] = {
+	regmap_reg_range(0x0, 0x2), regmap_reg_range(0x4, 0x4),
+};
+static const struct regmap_range idio_16_rd_ranges[] = {
+	regmap_reg_range(0x1, 0x2), regmap_reg_range(0x5, 0x5),
+};
+static const struct regmap_range idio_16_precious_ranges[] = {
+	regmap_reg_range(0x2, 0x2),
+};
+static const struct regmap_access_table idio_16_wr_table = {
+	.yes_ranges = idio_16_wr_ranges,
+	.n_yes_ranges = ARRAY_SIZE(idio_16_wr_ranges),
+};
+static const struct regmap_access_table idio_16_rd_table = {
+	.yes_ranges = idio_16_rd_ranges,
+	.n_yes_ranges = ARRAY_SIZE(idio_16_rd_ranges),
+};
+static const struct regmap_access_table idio_16_precious_table = {
+	.yes_ranges = idio_16_precious_ranges,
+	.n_yes_ranges = ARRAY_SIZE(idio_16_precious_ranges),
+};
+static const struct regmap_config idio_16_regmap_config = {
+	.reg_bits = 8,
+	.reg_stride = 1,
+	.val_bits = 8,
+	.io_port = true,
+	.wr_table = &idio_16_wr_table,
+	.rd_table = &idio_16_rd_table,
+	.volatile_table = &idio_16_rd_table,
+	.precious_table = &idio_16_precious_table,
+	.cache_type = REGCACHE_FLAT,
+	.use_raw_spinlock = true,
 };
 
-static int idio_16_gpio_get_direction(struct gpio_chip *chip,
-				      unsigned int offset)
-{
-	if (idio_16_get_direction(offset))
-		return GPIO_LINE_DIRECTION_IN;
-
-	return GPIO_LINE_DIRECTION_OUT;
-}
-
-static int idio_16_gpio_direction_input(struct gpio_chip *chip,
-					unsigned int offset)
-{
-	return 0;
-}
-
-static int idio_16_gpio_direction_output(struct gpio_chip *chip,
-	unsigned int offset, int value)
-{
-	chip->set(chip, offset, value);
-	return 0;
-}
-
-static int idio_16_gpio_get(struct gpio_chip *chip, unsigned int offset)
-{
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-
-	return idio_16_get(idio16gpio->reg, &idio16gpio->state, offset);
-}
-
-static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
-	unsigned long *mask, unsigned long *bits)
-{
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-
-	idio_16_get_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits);
-
-	return 0;
-}
-
-static void idio_16_gpio_set(struct gpio_chip *chip, unsigned int offset,
-			     int value)
-{
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-
-	idio_16_set(idio16gpio->reg, &idio16gpio->state, offset, value);
-}
-
-static void idio_16_gpio_set_multiple(struct gpio_chip *chip,
-	unsigned long *mask, unsigned long *bits)
-{
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-
-	idio_16_set_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits);
-}
-
-static void idio_16_irq_ack(struct irq_data *data)
-{
-}
-
-static void idio_16_irq_mask(struct irq_data *data)
-{
-	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	const unsigned long offset = irqd_to_hwirq(data);
-	unsigned long flags;
-
-	idio16gpio->irq_mask &= ~BIT(offset);
-	gpiochip_disable_irq(chip, offset);
-
-	if (!idio16gpio->irq_mask) {
-		raw_spin_lock_irqsave(&idio16gpio->lock, flags);
-
-		iowrite8(0, &idio16gpio->reg->irq_ctl);
-
-		raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
+/* Only input lines (GPIO 16-31) support interrupts */
+#define IDIO_16_REGMAP_IRQ(_id)						\
+	[16 + _id] = {							\
+		.mask = BIT(_id),					\
+		.type = { .types_supported = IRQ_TYPE_EDGE_BOTH },	\
 	}
-}
 
-static void idio_16_irq_unmask(struct irq_data *data)
-{
-	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	const unsigned long offset = irqd_to_hwirq(data);
-	const unsigned long prev_irq_mask = idio16gpio->irq_mask;
-	unsigned long flags;
-
-	gpiochip_enable_irq(chip, offset);
-	idio16gpio->irq_mask |= BIT(offset);
-
-	if (!prev_irq_mask) {
-		raw_spin_lock_irqsave(&idio16gpio->lock, flags);
-
-		ioread8(&idio16gpio->reg->irq_ctl);
-
-		raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
-	}
-}
-
-static int idio_16_irq_set_type(struct irq_data *data, unsigned int flow_type)
-{
-	/* The only valid irq types are none and both-edges */
-	if (flow_type != IRQ_TYPE_NONE &&
-		(flow_type & IRQ_TYPE_EDGE_BOTH) != IRQ_TYPE_EDGE_BOTH)
-		return -EINVAL;
-
-	return 0;
-}
-
-static const struct irq_chip idio_16_irqchip = {
-	.name = "104-idio-16",
-	.irq_ack = idio_16_irq_ack,
-	.irq_mask = idio_16_irq_mask,
-	.irq_unmask = idio_16_irq_unmask,
-	.irq_set_type = idio_16_irq_set_type,
-	.flags = IRQCHIP_IMMUTABLE,
-	GPIOCHIP_IRQ_RESOURCE_HELPERS,
+static const struct regmap_irq idio_16_regmap_irqs[] = {
+	IDIO_16_REGMAP_IRQ(0), IDIO_16_REGMAP_IRQ(1), IDIO_16_REGMAP_IRQ(2), /* 0-2 */
+	IDIO_16_REGMAP_IRQ(3), IDIO_16_REGMAP_IRQ(4), IDIO_16_REGMAP_IRQ(5), /* 3-5 */
+	IDIO_16_REGMAP_IRQ(6), IDIO_16_REGMAP_IRQ(7), IDIO_16_REGMAP_IRQ(8), /* 6-8 */
+	IDIO_16_REGMAP_IRQ(9), IDIO_16_REGMAP_IRQ(10), IDIO_16_REGMAP_IRQ(11), /* 9-11 */
+	IDIO_16_REGMAP_IRQ(12), IDIO_16_REGMAP_IRQ(13), IDIO_16_REGMAP_IRQ(14), /* 12-14 */
+	IDIO_16_REGMAP_IRQ(15), /* 15 */
 };
 
-static irqreturn_t idio_16_irq_handler(int irq, void *dev_id)
-{
-	struct idio_16_gpio *const idio16gpio = dev_id;
-	struct gpio_chip *const chip = &idio16gpio->chip;
-	int gpio;
-
-	for_each_set_bit(gpio, &idio16gpio->irq_mask, chip->ngpio)
-		generic_handle_domain_irq(chip->irq.domain, gpio);
-
-	raw_spin_lock(&idio16gpio->lock);
-
-	iowrite8(0, &idio16gpio->reg->in0_7);
-
-	raw_spin_unlock(&idio16gpio->lock);
-
-	return IRQ_HANDLED;
-}
-
-#define IDIO_16_NGPIO 32
-static const char *idio_16_names[IDIO_16_NGPIO] = {
-	"OUT0", "OUT1", "OUT2", "OUT3", "OUT4", "OUT5", "OUT6", "OUT7",
-	"OUT8", "OUT9", "OUT10", "OUT11", "OUT12", "OUT13", "OUT14", "OUT15",
-	"IIN0", "IIN1", "IIN2", "IIN3", "IIN4", "IIN5", "IIN6", "IIN7",
-	"IIN8", "IIN9", "IIN10", "IIN11", "IIN12", "IIN13", "IIN14", "IIN15"
-};
-
-static int idio_16_irq_init_hw(struct gpio_chip *gc)
-{
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(gc);
-
-	/* Disable IRQ by default */
-	iowrite8(0, &idio16gpio->reg->irq_ctl);
-	iowrite8(0, &idio16gpio->reg->in0_7);
-
-	return 0;
-}
-
 static int idio_16_probe(struct device *dev, unsigned int id)
 {
-	struct idio_16_gpio *idio16gpio;
 	const char *const name = dev_name(dev);
-	struct gpio_irq_chip *girq;
-	int err;
-
-	idio16gpio = devm_kzalloc(dev, sizeof(*idio16gpio), GFP_KERNEL);
-	if (!idio16gpio)
-		return -ENOMEM;
+	struct idio_16_regmap_config config = {};
+	void __iomem *regs;
+	struct regmap *map;
 
 	if (!devm_request_region(dev, base[id], IDIO_16_EXTENT, name)) {
 		dev_err(dev, "Unable to lock port addresses (0x%X-0x%X)\n",
@@ -224,54 +96,22 @@ static int idio_16_probe(struct device *dev, unsigned int id)
 		return -EBUSY;
 	}
 
-	idio16gpio->reg = devm_ioport_map(dev, base[id], IDIO_16_EXTENT);
-	if (!idio16gpio->reg)
+	regs = devm_ioport_map(dev, base[id], IDIO_16_EXTENT);
+	if (!regs)
 		return -ENOMEM;
 
-	idio16gpio->chip.label = name;
-	idio16gpio->chip.parent = dev;
-	idio16gpio->chip.owner = THIS_MODULE;
-	idio16gpio->chip.base = -1;
-	idio16gpio->chip.ngpio = IDIO_16_NGPIO;
-	idio16gpio->chip.names = idio_16_names;
-	idio16gpio->chip.get_direction = idio_16_gpio_get_direction;
-	idio16gpio->chip.direction_input = idio_16_gpio_direction_input;
-	idio16gpio->chip.direction_output = idio_16_gpio_direction_output;
-	idio16gpio->chip.get = idio_16_gpio_get;
-	idio16gpio->chip.get_multiple = idio_16_gpio_get_multiple;
-	idio16gpio->chip.set = idio_16_gpio_set;
-	idio16gpio->chip.set_multiple = idio_16_gpio_set_multiple;
+	map = devm_regmap_init_mmio(dev, regs, &idio_16_regmap_config);
+	if (IS_ERR(map))
+		return dev_err_probe(dev, PTR_ERR(map), "Unable to initialize register map\n");
 
-	idio_16_state_init(&idio16gpio->state);
-	/* FET off states are represented by bit values of "1" */
-	bitmap_fill(idio16gpio->state.out_state, IDIO_16_NOUT);
+	config.parent = dev;
+	config.map = map;
+	config.regmap_irqs = idio_16_regmap_irqs;
+	config.num_regmap_irqs = ARRAY_SIZE(idio_16_regmap_irqs);
+	config.irq = irq[id];
+	config.no_status = true;
 
-	girq = &idio16gpio->chip.irq;
-	gpio_irq_chip_set_chip(girq, &idio_16_irqchip);
-	/* This will let us handle the parent IRQ in the driver */
-	girq->parent_handler = NULL;
-	girq->num_parents = 0;
-	girq->parents = NULL;
-	girq->default_type = IRQ_TYPE_NONE;
-	girq->handler = handle_edge_irq;
-	girq->init_hw = idio_16_irq_init_hw;
-
-	raw_spin_lock_init(&idio16gpio->lock);
-
-	err = devm_gpiochip_add_data(dev, &idio16gpio->chip, idio16gpio);
-	if (err) {
-		dev_err(dev, "GPIO registering failed (%d)\n", err);
-		return err;
-	}
-
-	err = devm_request_irq(dev, irq[id], idio_16_irq_handler, 0, name,
-		idio16gpio);
-	if (err) {
-		dev_err(dev, "IRQ handler registering failed (%d)\n", err);
-		return err;
-	}
-
-	return 0;
+	return devm_idio_16_regmap_register(dev, &config);
 }
 
 static struct isa_driver idio_16_driver = {

From 73d8f3efc5c2b757ab06685741df01eaed8090c4 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Thu, 10 Aug 2023 18:00:40 -0400
Subject: [PATCH 1028/2047] gpio: pci-idio-16: Migrate to the regmap API

The regmap API supports IO port accessors so we can take advantage of
regmap abstractions rather than handling access to the device registers
directly in the driver. Migrate the pci-idio-16 module to the new
idio-16 library interface leveraging the gpio-regmap API.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/5ba5405c64aca984d5cf3bdbdffa04c325e5a147.1680618405.git.william.gray@linaro.org/
Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig            |   2 +-
 drivers/gpio/gpio-pci-idio-16.c | 294 +++++++-------------------------
 2 files changed, 62 insertions(+), 234 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 8bc60a6ce3a9..864b221252b2 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -1655,7 +1655,7 @@ config GPIO_PCH
 
 config GPIO_PCI_IDIO_16
 	tristate "ACCES PCI-IDIO-16 GPIO support"
-	select GPIOLIB_IRQCHIP
+	select REGMAP_MMIO
 	select GPIO_IDIO_16
 	help
 	  Enables GPIO support for the ACCES PCI-IDIO-16. An interrupt is
diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c
index 6726c32e31e6..44c0a21b1d1d 100644
--- a/drivers/gpio/gpio-pci-idio-16.c
+++ b/drivers/gpio/gpio-pci-idio-16.c
@@ -5,214 +5,75 @@
  */
 #include <linux/bits.h>
 #include <linux/device.h>
-#include <linux/errno.h>
-#include <linux/gpio/driver.h>
-#include <linux/interrupt.h>
-#include <linux/irqdesc.h>
+#include <linux/err.h>
+#include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/spinlock.h>
+#include <linux/regmap.h>
 #include <linux/types.h>
 
 #include "gpio-idio-16.h"
 
-/**
- * struct idio_16_gpio - GPIO device private data structure
- * @chip:	instance of the gpio_chip
- * @lock:	synchronization lock to prevent I/O race conditions
- * @reg:	I/O address offset for the GPIO device registers
- * @state:	ACCES IDIO-16 device state
- * @irq_mask:	I/O bits affected by interrupts
- */
-struct idio_16_gpio {
-	struct gpio_chip chip;
-	raw_spinlock_t lock;
-	struct idio_16 __iomem *reg;
-	struct idio_16_state state;
-	unsigned long irq_mask;
+static const struct regmap_range idio_16_wr_ranges[] = {
+	regmap_reg_range(0x0, 0x2), regmap_reg_range(0x3, 0x4),
+};
+static const struct regmap_range idio_16_rd_ranges[] = {
+	regmap_reg_range(0x1, 0x2), regmap_reg_range(0x5, 0x6),
+};
+static const struct regmap_range idio_16_precious_ranges[] = {
+	regmap_reg_range(0x2, 0x2),
+};
+static const struct regmap_access_table idio_16_wr_table = {
+	.yes_ranges = idio_16_wr_ranges,
+	.n_yes_ranges = ARRAY_SIZE(idio_16_wr_ranges),
+};
+static const struct regmap_access_table idio_16_rd_table = {
+	.yes_ranges = idio_16_rd_ranges,
+	.n_yes_ranges = ARRAY_SIZE(idio_16_rd_ranges),
+};
+static const struct regmap_access_table idio_16_precious_table = {
+	.yes_ranges = idio_16_precious_ranges,
+	.n_yes_ranges = ARRAY_SIZE(idio_16_precious_ranges),
+};
+static const struct regmap_config idio_16_regmap_config = {
+	.reg_bits = 8,
+	.reg_stride = 1,
+	.val_bits = 8,
+	.io_port = true,
+	.wr_table = &idio_16_wr_table,
+	.rd_table = &idio_16_rd_table,
+	.volatile_table = &idio_16_rd_table,
+	.precious_table = &idio_16_precious_table,
+	.cache_type = REGCACHE_FLAT,
+	.use_raw_spinlock = true,
 };
 
-static int idio_16_gpio_get_direction(struct gpio_chip *chip,
-	unsigned int offset)
-{
-	if (idio_16_get_direction(offset))
-		return GPIO_LINE_DIRECTION_IN;
-
-	return GPIO_LINE_DIRECTION_OUT;
-}
-
-static int idio_16_gpio_direction_input(struct gpio_chip *chip,
-	unsigned int offset)
-{
-	return 0;
-}
-
-static int idio_16_gpio_direction_output(struct gpio_chip *chip,
-	unsigned int offset, int value)
-{
-	chip->set(chip, offset, value);
-	return 0;
-}
-
-static int idio_16_gpio_get(struct gpio_chip *chip, unsigned int offset)
-{
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-
-	return idio_16_get(idio16gpio->reg, &idio16gpio->state, offset);
-}
-
-static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
-	unsigned long *mask, unsigned long *bits)
-{
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-
-	idio_16_get_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits);
-	return 0;
-}
-
-static void idio_16_gpio_set(struct gpio_chip *chip, unsigned int offset,
-	int value)
-{
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-
-	idio_16_set(idio16gpio->reg, &idio16gpio->state, offset, value);
-}
-
-static void idio_16_gpio_set_multiple(struct gpio_chip *chip,
-	unsigned long *mask, unsigned long *bits)
-{
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-
-	idio_16_set_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits);
-}
-
-static void idio_16_irq_ack(struct irq_data *data)
-{
-}
-
-static void idio_16_irq_mask(struct irq_data *data)
-{
-	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	const unsigned long mask = BIT(irqd_to_hwirq(data));
-	unsigned long flags;
-
-	idio16gpio->irq_mask &= ~mask;
-
-	if (!idio16gpio->irq_mask) {
-		raw_spin_lock_irqsave(&idio16gpio->lock, flags);
-
-		iowrite8(0, &idio16gpio->reg->irq_ctl);
-
-		raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
+/* Only input lines (GPIO 16-31) support interrupts */
+#define IDIO_16_REGMAP_IRQ(_id)						\
+	[16 + _id] = {							\
+		.mask = BIT(2),						\
+		.type = { .types_supported = IRQ_TYPE_EDGE_BOTH },	\
 	}
 
-	gpiochip_disable_irq(chip, irqd_to_hwirq(data));
-}
-
-static void idio_16_irq_unmask(struct irq_data *data)
-{
-	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	const unsigned long mask = BIT(irqd_to_hwirq(data));
-	const unsigned long prev_irq_mask = idio16gpio->irq_mask;
-	unsigned long flags;
-
-	gpiochip_enable_irq(chip, irqd_to_hwirq(data));
-
-	idio16gpio->irq_mask |= mask;
-
-	if (!prev_irq_mask) {
-		raw_spin_lock_irqsave(&idio16gpio->lock, flags);
-
-		ioread8(&idio16gpio->reg->irq_ctl);
-
-		raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
-	}
-}
-
-static int idio_16_irq_set_type(struct irq_data *data, unsigned int flow_type)
-{
-	/* The only valid irq types are none and both-edges */
-	if (flow_type != IRQ_TYPE_NONE &&
-		(flow_type & IRQ_TYPE_EDGE_BOTH) != IRQ_TYPE_EDGE_BOTH)
-		return -EINVAL;
-
-	return 0;
-}
-
-static const struct irq_chip idio_16_irqchip = {
-	.name = "pci-idio-16",
-	.irq_ack = idio_16_irq_ack,
-	.irq_mask = idio_16_irq_mask,
-	.irq_unmask = idio_16_irq_unmask,
-	.irq_set_type = idio_16_irq_set_type,
-	.flags = IRQCHIP_IMMUTABLE,
-	GPIOCHIP_IRQ_RESOURCE_HELPERS,
+static const struct regmap_irq idio_16_regmap_irqs[] = {
+	IDIO_16_REGMAP_IRQ(0), IDIO_16_REGMAP_IRQ(1), IDIO_16_REGMAP_IRQ(2), /* 0-2 */
+	IDIO_16_REGMAP_IRQ(3), IDIO_16_REGMAP_IRQ(4), IDIO_16_REGMAP_IRQ(5), /* 3-5 */
+	IDIO_16_REGMAP_IRQ(6), IDIO_16_REGMAP_IRQ(7), IDIO_16_REGMAP_IRQ(8), /* 6-8 */
+	IDIO_16_REGMAP_IRQ(9), IDIO_16_REGMAP_IRQ(10), IDIO_16_REGMAP_IRQ(11), /* 9-11 */
+	IDIO_16_REGMAP_IRQ(12), IDIO_16_REGMAP_IRQ(13), IDIO_16_REGMAP_IRQ(14), /* 12-14 */
+	IDIO_16_REGMAP_IRQ(15), /* 15 */
 };
 
-static irqreturn_t idio_16_irq_handler(int irq, void *dev_id)
-{
-	struct idio_16_gpio *const idio16gpio = dev_id;
-	unsigned int irq_status;
-	struct gpio_chip *const chip = &idio16gpio->chip;
-	int gpio;
-
-	raw_spin_lock(&idio16gpio->lock);
-
-	irq_status = ioread8(&idio16gpio->reg->irq_status);
-
-	raw_spin_unlock(&idio16gpio->lock);
-
-	/* Make sure our device generated IRQ */
-	if (!(irq_status & 0x3) || !(irq_status & 0x4))
-		return IRQ_NONE;
-
-	for_each_set_bit(gpio, &idio16gpio->irq_mask, chip->ngpio)
-		generic_handle_domain_irq(chip->irq.domain, gpio);
-
-	raw_spin_lock(&idio16gpio->lock);
-
-	/* Clear interrupt */
-	iowrite8(0, &idio16gpio->reg->in0_7);
-
-	raw_spin_unlock(&idio16gpio->lock);
-
-	return IRQ_HANDLED;
-}
-
-#define IDIO_16_NGPIO 32
-static const char *idio_16_names[IDIO_16_NGPIO] = {
-	"OUT0", "OUT1", "OUT2", "OUT3", "OUT4", "OUT5", "OUT6", "OUT7",
-	"OUT8", "OUT9", "OUT10", "OUT11", "OUT12", "OUT13", "OUT14", "OUT15",
-	"IIN0", "IIN1", "IIN2", "IIN3", "IIN4", "IIN5", "IIN6", "IIN7",
-	"IIN8", "IIN9", "IIN10", "IIN11", "IIN12", "IIN13", "IIN14", "IIN15"
-};
-
-static int idio_16_irq_init_hw(struct gpio_chip *gc)
-{
-	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(gc);
-
-	/* Disable IRQ by default and clear any pending interrupt */
-	iowrite8(0, &idio16gpio->reg->irq_ctl);
-	iowrite8(0, &idio16gpio->reg->in0_7);
-
-	return 0;
-}
-
 static int idio_16_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct device *const dev = &pdev->dev;
-	struct idio_16_gpio *idio16gpio;
 	int err;
 	const size_t pci_bar_index = 2;
 	const char *const name = pci_name(pdev);
-	struct gpio_irq_chip *girq;
-
-	idio16gpio = devm_kzalloc(dev, sizeof(*idio16gpio), GFP_KERNEL);
-	if (!idio16gpio)
-		return -ENOMEM;
+	struct idio_16_regmap_config config = {};
+	void __iomem *regs;
+	struct regmap *map;
 
 	err = pcim_enable_device(pdev);
 	if (err) {
@@ -226,53 +87,20 @@ static int idio_16_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return err;
 	}
 
-	idio16gpio->reg = pcim_iomap_table(pdev)[pci_bar_index];
+	regs = pcim_iomap_table(pdev)[pci_bar_index];
 
-	/* Deactivate input filters */
-	iowrite8(0, &idio16gpio->reg->filter_ctl);
+	map = devm_regmap_init_mmio(dev, regs, &idio_16_regmap_config);
+	if (IS_ERR(map))
+		return dev_err_probe(dev, PTR_ERR(map), "Unable to initialize register map\n");
 
-	idio16gpio->chip.label = name;
-	idio16gpio->chip.parent = dev;
-	idio16gpio->chip.owner = THIS_MODULE;
-	idio16gpio->chip.base = -1;
-	idio16gpio->chip.ngpio = IDIO_16_NGPIO;
-	idio16gpio->chip.names = idio_16_names;
-	idio16gpio->chip.get_direction = idio_16_gpio_get_direction;
-	idio16gpio->chip.direction_input = idio_16_gpio_direction_input;
-	idio16gpio->chip.direction_output = idio_16_gpio_direction_output;
-	idio16gpio->chip.get = idio_16_gpio_get;
-	idio16gpio->chip.get_multiple = idio_16_gpio_get_multiple;
-	idio16gpio->chip.set = idio_16_gpio_set;
-	idio16gpio->chip.set_multiple = idio_16_gpio_set_multiple;
+	config.parent = dev;
+	config.map = map;
+	config.regmap_irqs = idio_16_regmap_irqs;
+	config.num_regmap_irqs = ARRAY_SIZE(idio_16_regmap_irqs);
+	config.irq = pdev->irq;
+	config.filters = true;
 
-	idio_16_state_init(&idio16gpio->state);
-
-	girq = &idio16gpio->chip.irq;
-	gpio_irq_chip_set_chip(girq, &idio_16_irqchip);
-	/* This will let us handle the parent IRQ in the driver */
-	girq->parent_handler = NULL;
-	girq->num_parents = 0;
-	girq->parents = NULL;
-	girq->default_type = IRQ_TYPE_NONE;
-	girq->handler = handle_edge_irq;
-	girq->init_hw = idio_16_irq_init_hw;
-
-	raw_spin_lock_init(&idio16gpio->lock);
-
-	err = devm_gpiochip_add_data(dev, &idio16gpio->chip, idio16gpio);
-	if (err) {
-		dev_err(dev, "GPIO registering failed (%d)\n", err);
-		return err;
-	}
-
-	err = devm_request_irq(dev, pdev->irq, idio_16_irq_handler, IRQF_SHARED,
-		name, idio16gpio);
-	if (err) {
-		dev_err(dev, "IRQ handler registering failed (%d)\n", err);
-		return err;
-	}
-
-	return 0;
+	return devm_idio_16_regmap_register(dev, &config);
 }
 
 static const struct pci_device_id idio_16_pci_dev_id[] = {

From 98aaff7c4e65a448545cd46d93e06cb8042f4409 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Thu, 10 Aug 2023 18:00:41 -0400
Subject: [PATCH 1029/2047] gpio: idio-16: Remove unused legacy interface

All idio-16 library consumers have migrated to the new interface
leveraging the gpio-regmap API. Legacy interface functions and code are
removed as no longer needed.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/651cff1cc3eb57b455a8048121cf6a4d4367f018.1680618405.git.william.gray@linaro.org/
Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-idio-16.c | 131 +-----------------------------------
 drivers/gpio/gpio-idio-16.h |  65 ------------------
 2 files changed, 1 insertion(+), 195 deletions(-)

diff --git a/drivers/gpio/gpio-idio-16.c b/drivers/gpio/gpio-idio-16.c
index f9349e8d7fdc..53b1eb876a12 100644
--- a/drivers/gpio/gpio-idio-16.c
+++ b/drivers/gpio/gpio-idio-16.c
@@ -3,15 +3,13 @@
  * GPIO library for the ACCES IDIO-16 family
  * Copyright (C) 2022 William Breathitt Gray
  */
-#include <linux/bitmap.h>
+#include <linux/bits.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/gpio/regmap.h>
-#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/regmap.h>
-#include <linux/spinlock.h>
 #include <linux/types.h>
 
 #include "gpio-idio-16.h"
@@ -169,133 +167,6 @@ int devm_idio_16_regmap_register(struct device *const dev,
 }
 EXPORT_SYMBOL_GPL(devm_idio_16_regmap_register);
 
-/**
- * idio_16_get - get signal value at signal offset
- * @reg:	ACCES IDIO-16 device registers
- * @state:	ACCES IDIO-16 device state
- * @offset:	offset of signal to get
- *
- * Returns the signal value (0=low, 1=high) for the signal at @offset.
- */
-int idio_16_get(struct idio_16 __iomem *const reg,
-		struct idio_16_state *const state, const unsigned long offset)
-{
-	const unsigned long mask = BIT(offset);
-
-	if (offset < IDIO_16_NOUT)
-		return test_bit(offset, state->out_state);
-
-	if (offset < 24)
-		return !!(ioread8(&reg->in0_7) & (mask >> IDIO_16_NOUT));
-
-	if (offset < 32)
-		return !!(ioread8(&reg->in8_15) & (mask >> 24));
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(idio_16_get);
-
-/**
- * idio_16_get_multiple - get multiple signal values at multiple signal offsets
- * @reg:	ACCES IDIO-16 device registers
- * @state:	ACCES IDIO-16 device state
- * @mask:	mask of signals to get
- * @bits:	bitmap to store signal values
- *
- * Stores in @bits the values (0=low, 1=high) for the signals defined by @mask.
- */
-void idio_16_get_multiple(struct idio_16 __iomem *const reg,
-			  struct idio_16_state *const state,
-			  const unsigned long *const mask,
-			  unsigned long *const bits)
-{
-	unsigned long flags;
-	const unsigned long out_mask = GENMASK(IDIO_16_NOUT - 1, 0);
-
-	spin_lock_irqsave(&state->lock, flags);
-
-	bitmap_replace(bits, bits, state->out_state, &out_mask, IDIO_16_NOUT);
-	if (*mask & GENMASK(23, 16))
-		bitmap_set_value8(bits, ioread8(&reg->in0_7), 16);
-	if (*mask & GENMASK(31, 24))
-		bitmap_set_value8(bits, ioread8(&reg->in8_15), 24);
-
-	spin_unlock_irqrestore(&state->lock, flags);
-}
-EXPORT_SYMBOL_GPL(idio_16_get_multiple);
-
-/**
- * idio_16_set - set signal value at signal offset
- * @reg:	ACCES IDIO-16 device registers
- * @state:	ACCES IDIO-16 device state
- * @offset:	offset of signal to set
- * @value:	value of signal to set
- *
- * Assigns output @value for the signal at @offset.
- */
-void idio_16_set(struct idio_16 __iomem *const reg,
-		 struct idio_16_state *const state, const unsigned long offset,
-		 const unsigned long value)
-{
-	unsigned long flags;
-
-	if (offset >= IDIO_16_NOUT)
-		return;
-
-	spin_lock_irqsave(&state->lock, flags);
-
-	__assign_bit(offset, state->out_state, value);
-	if (offset < 8)
-		iowrite8(bitmap_get_value8(state->out_state, 0), &reg->out0_7);
-	else
-		iowrite8(bitmap_get_value8(state->out_state, 8), &reg->out8_15);
-
-	spin_unlock_irqrestore(&state->lock, flags);
-}
-EXPORT_SYMBOL_GPL(idio_16_set);
-
-/**
- * idio_16_set_multiple - set signal values at multiple signal offsets
- * @reg:	ACCES IDIO-16 device registers
- * @state:	ACCES IDIO-16 device state
- * @mask:	mask of signals to set
- * @bits:	bitmap of signal output values
- *
- * Assigns output values defined by @bits for the signals defined by @mask.
- */
-void idio_16_set_multiple(struct idio_16 __iomem *const reg,
-			  struct idio_16_state *const state,
-			  const unsigned long *const mask,
-			  const unsigned long *const bits)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&state->lock, flags);
-
-	bitmap_replace(state->out_state, state->out_state, bits, mask,
-		       IDIO_16_NOUT);
-	if (*mask & GENMASK(7, 0))
-		iowrite8(bitmap_get_value8(state->out_state, 0), &reg->out0_7);
-	if (*mask & GENMASK(15, 8))
-		iowrite8(bitmap_get_value8(state->out_state, 8), &reg->out8_15);
-
-	spin_unlock_irqrestore(&state->lock, flags);
-}
-EXPORT_SYMBOL_GPL(idio_16_set_multiple);
-
-/**
- * idio_16_state_init - initialize idio_16_state structure
- * @state:	ACCES IDIO-16 device state
- *
- * Initializes the ACCES IDIO-16 device @state for use in idio-16 library
- * functions.
- */
-void idio_16_state_init(struct idio_16_state *const state)
-{
-	spin_lock_init(&state->lock);
-}
-EXPORT_SYMBOL_GPL(idio_16_state_init);
-
 MODULE_AUTHOR("William Breathitt Gray");
 MODULE_DESCRIPTION("ACCES IDIO-16 GPIO Library");
 MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/gpio-idio-16.h b/drivers/gpio/gpio-idio-16.h
index 255bd8504ed7..93b08ad73065 100644
--- a/drivers/gpio/gpio-idio-16.h
+++ b/drivers/gpio/gpio-idio-16.h
@@ -3,9 +3,6 @@
 #ifndef _IDIO_16_H_
 #define _IDIO_16_H_
 
-#include <linux/spinlock.h>
-#include <linux/types.h>
-
 struct device;
 struct regmap;
 struct regmap_irq;
@@ -30,68 +27,6 @@ struct idio_16_regmap_config {
 	bool filters;
 };
 
-/**
- * struct idio_16 - IDIO-16 registers structure
- * @out0_7:	Read: FET Drive Outputs 0-7
- *		Write: FET Drive Outputs 0-7
- * @in0_7:	Read: Isolated Inputs 0-7
- *		Write: Clear Interrupt
- * @irq_ctl:	Read: Enable IRQ
- *		Write: Disable IRQ
- * @filter_ctl:	Read: Activate Input Filters 0-15
- *		Write: Deactivate Input Filters 0-15
- * @out8_15:	Read: FET Drive Outputs 8-15
- *		Write: FET Drive Outputs 8-15
- * @in8_15:	Read: Isolated Inputs 8-15
- *		Write: Unused
- * @irq_status:	Read: Interrupt status
- *		Write: Unused
- */
-struct idio_16 {
-	u8 out0_7;
-	u8 in0_7;
-	u8 irq_ctl;
-	u8 filter_ctl;
-	u8 out8_15;
-	u8 in8_15;
-	u8 irq_status;
-};
-
-#define IDIO_16_NOUT 16
-
-/**
- * struct idio_16_state - IDIO-16 state structure
- * @lock:	synchronization lock for accessing device state
- * @out_state:	output signals state
- */
-struct idio_16_state {
-	spinlock_t lock;
-	DECLARE_BITMAP(out_state, IDIO_16_NOUT);
-};
-
-/**
- * idio_16_get_direction - get the I/O direction for a signal offset
- * @offset:	offset of signal to get direction
- *
- * Returns the signal direction (0=output, 1=input) for the signal at @offset.
- */
-static inline int idio_16_get_direction(const unsigned long offset)
-{
-	return (offset >= IDIO_16_NOUT) ? 1 : 0;
-}
-
-int idio_16_get(struct idio_16 __iomem *reg, struct idio_16_state *state,
-		unsigned long offset);
-void idio_16_get_multiple(struct idio_16 __iomem *reg,
-			  struct idio_16_state *state,
-			  const unsigned long *mask, unsigned long *bits);
-void idio_16_set(struct idio_16 __iomem *reg, struct idio_16_state *state,
-		 unsigned long offset, unsigned long value);
-void idio_16_set_multiple(struct idio_16 __iomem *reg,
-			  struct idio_16_state *state,
-			  const unsigned long *mask, const unsigned long *bits);
-void idio_16_state_init(struct idio_16_state *state);
-
 int devm_idio_16_regmap_register(struct device *dev, const struct idio_16_regmap_config *config);
 
 #endif /* _IDIO_16_H_ */

From 1a200a3966176b89aa038713a00367c9f57f1dff Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Thu, 10 Aug 2023 18:00:42 -0400
Subject: [PATCH 1030/2047] gpio: pcie-idio-24: Migrate to the regmap API

The regmap API supports IO port accessors so we can take advantage of
regmap abstractions rather than handling access to the device registers
directly in the driver.

For the PCIe-IDIO-24 series of devices, the following BARs are
available:

    BAR[0]: memory mapped PEX8311
    BAR[1]: I/O mapped PEX8311
    BAR[2]: I/O mapped card registers

There are 24 FET Output lines, 24 Isolated Input lines, and 8 TTL/CMOS
lines (which may be configured for either output or input). The GPIO
lines are exposed by the following card registers:

    Base +0x0-0x2 (Read/Write): FET Outputs
    Base +0xB (Read/Write): TTL/CMOS
    Base +0x4-0x6 (Read): Isolated Inputs
    Base +0x7 (Read): TTL/CMOS

In order for the device to support interrupts, the PLX PEX8311 internal
PCI wire interrupt and local interrupt input must first be enabled.

The following card registers for Change-Of-State may be used:

    Base +0x8-0xA (Read): COS Status Inputs
    Base +0x8-0xA (Write): COS Clear Inputs
    Base +0xB (Read): COS Status TTL/CMOS
    Base +0xB (Write): COS Clear TTL/CMOS
    Base +0xE (Read/Write): COS Enable

The COS Enable register is used to enable/disable interrupts and
configure the interrupt levels; each bit maps to a group of eight inputs
as described below:

    Bit 0: IRQ EN Rising Edge IN0-7
    Bit 1: IRQ EN Rising Edge IN8-15
    Bit 2: IRQ EN Rising Edge IN16-23
    Bit 3: IRQ EN Rising Edge TTL0-7
    Bit 4: IRQ EN Falling Edge IN0-7
    Bit 5: IRQ EN Falling Edge IN8-15
    Bit 6: IRQ EN Falling Edge IN16-23
    Bit 7: IRQ EN Falling Edge TTL0-7

An interrupt is asserted when a change-of-state matching the interrupt
level configuration respective for a particular group of eight inputs
with enabled COS is detected.

The COS Status registers may be read to determine which inputs have
changed; if interrupts were enabled, an IRQ will be generated for the
set bits in these registers. Writing the value read from the COS Status
register back to the respective COS Clear register will clear just those
interrupts.

Reviewed-by: Michael Walle <michael@walle.cc>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/3091e387b1d2eac011a1d84e493663aa2acf982e.1680708357.git.william.gray@linaro.org/
Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig             |   3 +
 drivers/gpio/gpio-pcie-idio-24.c | 669 +++++++++++--------------------
 2 files changed, 246 insertions(+), 426 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 864b221252b2..3cccc6cc315f 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -1665,7 +1665,10 @@ config GPIO_PCI_IDIO_16
 
 config GPIO_PCIE_IDIO_24
 	tristate "ACCES PCIe-IDIO-24 GPIO support"
+	select REGMAP_IRQ
+	select REGMAP_MMIO
 	select GPIOLIB_IRQCHIP
+	select GPIO_REGMAP
 	help
 	  Enables GPIO support for the ACCES PCIe-IDIO-24 family (PCIe-IDIO-24,
 	  PCIe-IDI-24, PCIe-IDO-24, PCIe-IDIO-12). An interrupt is generated
diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c
index 463c0613abb9..2efd1b1a0805 100644
--- a/drivers/gpio/gpio-pcie-idio-24.c
+++ b/drivers/gpio/gpio-pcie-idio-24.c
@@ -6,16 +6,15 @@
  * This driver supports the following ACCES devices: PCIe-IDIO-24,
  * PCIe-IDI-24, PCIe-IDO-24, and PCIe-IDIO-12.
  */
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
+#include <linux/bits.h>
 #include <linux/device.h>
-#include <linux/errno.h>
-#include <linux/gpio/driver.h>
-#include <linux/interrupt.h>
-#include <linux/irqdesc.h>
+#include <linux/err.h>
+#include <linux/gpio/regmap.h>
+#include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/regmap.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
@@ -59,422 +58,224 @@
 #define PLX_PEX8311_PCI_LCS_INTCSR  0x68
 #define INTCSR_INTERNAL_PCI_WIRE    BIT(8)
 #define INTCSR_LOCAL_INPUT          BIT(11)
+#define IDIO_24_ENABLE_IRQ          (INTCSR_INTERNAL_PCI_WIRE | INTCSR_LOCAL_INPUT)
 
-/**
- * struct idio_24_gpio_reg - GPIO device registers structure
- * @out0_7:	Read: FET Outputs 0-7
- *		Write: FET Outputs 0-7
- * @out8_15:	Read: FET Outputs 8-15
- *		Write: FET Outputs 8-15
- * @out16_23:	Read: FET Outputs 16-23
- *		Write: FET Outputs 16-23
- * @ttl_out0_7:	Read: TTL/CMOS Outputs 0-7
- *		Write: TTL/CMOS Outputs 0-7
- * @in0_7:	Read: Isolated Inputs 0-7
- *		Write: Reserved
- * @in8_15:	Read: Isolated Inputs 8-15
- *		Write: Reserved
- * @in16_23:	Read: Isolated Inputs 16-23
- *		Write: Reserved
- * @ttl_in0_7:	Read: TTL/CMOS Inputs 0-7
- *		Write: Reserved
- * @cos0_7:	Read: COS Status Inputs 0-7
- *		Write: COS Clear Inputs 0-7
- * @cos8_15:	Read: COS Status Inputs 8-15
- *		Write: COS Clear Inputs 8-15
- * @cos16_23:	Read: COS Status Inputs 16-23
- *		Write: COS Clear Inputs 16-23
- * @cos_ttl0_7:	Read: COS Status TTL/CMOS 0-7
- *		Write: COS Clear TTL/CMOS 0-7
- * @ctl:	Read: Control Register
- *		Write: Control Register
- * @reserved:	Read: Reserved
- *		Write: Reserved
- * @cos_enable:	Read: COS Enable
- *		Write: COS Enable
- * @soft_reset:	Read: IRQ Output Pin Status
- *		Write: Software Board Reset
- */
-struct idio_24_gpio_reg {
-	u8 out0_7;
-	u8 out8_15;
-	u8 out16_23;
-	u8 ttl_out0_7;
-	u8 in0_7;
-	u8 in8_15;
-	u8 in16_23;
-	u8 ttl_in0_7;
-	u8 cos0_7;
-	u8 cos8_15;
-	u8 cos16_23;
-	u8 cos_ttl0_7;
-	u8 ctl;
-	u8 reserved;
-	u8 cos_enable;
-	u8 soft_reset;
+#define IDIO_24_OUT_BASE 0x0
+#define IDIO_24_TTLCMOS_OUT_REG 0x3
+#define IDIO_24_IN_BASE 0x4
+#define IDIO_24_TTLCMOS_IN_REG 0x7
+#define IDIO_24_COS_STATUS_BASE 0x8
+#define IDIO_24_CONTROL_REG 0xC
+#define IDIO_24_COS_ENABLE 0xE
+#define IDIO_24_SOFT_RESET 0xF
+
+#define CONTROL_REG_OUT_MODE BIT(1)
+
+#define COS_ENABLE_RISING BIT(1)
+#define COS_ENABLE_FALLING BIT(4)
+#define COS_ENABLE_BOTH (COS_ENABLE_RISING | COS_ENABLE_FALLING)
+
+static const struct regmap_config pex8311_intcsr_regmap_config = {
+	.name = "pex8311_intcsr",
+	.reg_bits = 32,
+	.reg_stride = 1,
+	.reg_base = PLX_PEX8311_PCI_LCS_INTCSR,
+	.val_bits = 32,
+	.io_port = true,
+};
+
+static const struct regmap_range idio_24_wr_ranges[] = {
+	regmap_reg_range(0x0, 0x3), regmap_reg_range(0x8, 0xC),
+	regmap_reg_range(0xE, 0xF),
+};
+static const struct regmap_range idio_24_rd_ranges[] = {
+	regmap_reg_range(0x0, 0xC), regmap_reg_range(0xE, 0xF),
+};
+static const struct regmap_range idio_24_volatile_ranges[] = {
+	regmap_reg_range(0x4, 0xB), regmap_reg_range(0xF, 0xF),
+};
+static const struct regmap_access_table idio_24_wr_table = {
+	.yes_ranges = idio_24_wr_ranges,
+	.n_yes_ranges = ARRAY_SIZE(idio_24_wr_ranges),
+};
+static const struct regmap_access_table idio_24_rd_table = {
+	.yes_ranges = idio_24_rd_ranges,
+	.n_yes_ranges = ARRAY_SIZE(idio_24_rd_ranges),
+};
+static const struct regmap_access_table idio_24_volatile_table = {
+	.yes_ranges = idio_24_volatile_ranges,
+	.n_yes_ranges = ARRAY_SIZE(idio_24_volatile_ranges),
+};
+
+static const struct regmap_config idio_24_regmap_config = {
+	.reg_bits = 8,
+	.reg_stride = 1,
+	.val_bits = 8,
+	.io_port = true,
+	.wr_table = &idio_24_wr_table,
+	.rd_table = &idio_24_rd_table,
+	.volatile_table = &idio_24_volatile_table,
+	.cache_type = REGCACHE_FLAT,
+	.use_raw_spinlock = true,
+};
+
+#define IDIO_24_NGPIO_PER_REG 8
+#define IDIO_24_REGMAP_IRQ(_id)						\
+	[24 + _id] = {							\
+		.reg_offset = (_id) / IDIO_24_NGPIO_PER_REG,		\
+		.mask = BIT((_id) % IDIO_24_NGPIO_PER_REG),		\
+		.type = { .types_supported = IRQ_TYPE_EDGE_BOTH },	\
+	}
+#define IDIO_24_IIN_IRQ(_id) IDIO_24_REGMAP_IRQ(_id)
+#define IDIO_24_TTL_IRQ(_id) IDIO_24_REGMAP_IRQ(24 + _id)
+
+static const struct regmap_irq idio_24_regmap_irqs[] = {
+	IDIO_24_IIN_IRQ(0), IDIO_24_IIN_IRQ(1), IDIO_24_IIN_IRQ(2), /* IIN 0-2 */
+	IDIO_24_IIN_IRQ(3), IDIO_24_IIN_IRQ(4), IDIO_24_IIN_IRQ(5), /* IIN 3-5 */
+	IDIO_24_IIN_IRQ(6), IDIO_24_IIN_IRQ(7), IDIO_24_IIN_IRQ(8), /* IIN 6-8 */
+	IDIO_24_IIN_IRQ(9), IDIO_24_IIN_IRQ(10), IDIO_24_IIN_IRQ(11), /* IIN 9-11 */
+	IDIO_24_IIN_IRQ(12), IDIO_24_IIN_IRQ(13), IDIO_24_IIN_IRQ(14), /* IIN 12-14 */
+	IDIO_24_IIN_IRQ(15), IDIO_24_IIN_IRQ(16), IDIO_24_IIN_IRQ(17), /* IIN 15-17 */
+	IDIO_24_IIN_IRQ(18), IDIO_24_IIN_IRQ(19), IDIO_24_IIN_IRQ(20), /* IIN 18-20 */
+	IDIO_24_IIN_IRQ(21), IDIO_24_IIN_IRQ(22), IDIO_24_IIN_IRQ(23), /* IIN 21-23 */
+	IDIO_24_TTL_IRQ(0), IDIO_24_TTL_IRQ(1), IDIO_24_TTL_IRQ(2), /* TTL 0-2 */
+	IDIO_24_TTL_IRQ(3), IDIO_24_TTL_IRQ(4), IDIO_24_TTL_IRQ(5), /* TTL 3-5 */
+	IDIO_24_TTL_IRQ(6), IDIO_24_TTL_IRQ(7), /* TTL 6-7 */
 };
 
 /**
  * struct idio_24_gpio - GPIO device private data structure
- * @chip:	instance of the gpio_chip
+ * @map:	regmap for the device
  * @lock:	synchronization lock to prevent I/O race conditions
- * @reg:	I/O address offset for the GPIO device registers
- * @irq_mask:	I/O bits affected by interrupts
+ * @irq_type:	type configuration for IRQs
  */
 struct idio_24_gpio {
-	struct gpio_chip chip;
+	struct regmap *map;
 	raw_spinlock_t lock;
-	__u8 __iomem *plx;
-	struct idio_24_gpio_reg __iomem *reg;
-	unsigned long irq_mask;
+	u8 irq_type;
 };
 
-static int idio_24_gpio_get_direction(struct gpio_chip *chip,
-	unsigned int offset)
+static int idio_24_handle_mask_sync(const int index, const unsigned int mask_buf_def,
+				    const unsigned int mask_buf, void *const irq_drv_data)
 {
-	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	const unsigned long out_mode_mask = BIT(1);
+	const unsigned int type_mask = COS_ENABLE_BOTH << index;
+	struct idio_24_gpio *const idio24gpio = irq_drv_data;
+	u8 type;
+	int ret;
 
-	/* FET Outputs */
-	if (offset < 24)
-		return GPIO_LINE_DIRECTION_OUT;
+	raw_spin_lock(&idio24gpio->lock);
 
-	/* Isolated Inputs */
-	if (offset < 48)
-		return GPIO_LINE_DIRECTION_IN;
+	/* if all are masked, then disable interrupts, else set to type */
+	type = (mask_buf == mask_buf_def) ? ~type_mask : idio24gpio->irq_type;
 
-	/* TTL/CMOS I/O */
-	/* OUT MODE = 1 when TTL/CMOS Output Mode is set */
-	if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask)
-		return GPIO_LINE_DIRECTION_OUT;
+	ret = regmap_update_bits(idio24gpio->map, IDIO_24_COS_ENABLE, type_mask, type);
 
-	return GPIO_LINE_DIRECTION_IN;
+	raw_spin_unlock(&idio24gpio->lock);
+
+	return ret;
 }
 
-static int idio_24_gpio_direction_input(struct gpio_chip *chip,
-	unsigned int offset)
+static int idio_24_set_type_config(unsigned int **const buf, const unsigned int type,
+				   const struct regmap_irq *const irq_data, const int idx,
+				   void *const irq_drv_data)
 {
-	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	unsigned long flags;
-	unsigned int ctl_state;
-	const unsigned long out_mode_mask = BIT(1);
+	const unsigned int offset = irq_data->reg_offset;
+	const unsigned int rising = COS_ENABLE_RISING << offset;
+	const unsigned int falling = COS_ENABLE_FALLING << offset;
+	const unsigned int mask = COS_ENABLE_BOTH << offset;
+	struct idio_24_gpio *const idio24gpio = irq_drv_data;
+	unsigned int new;
+	unsigned int cos_enable;
+	int ret;
 
-	/* TTL/CMOS I/O */
-	if (offset > 47) {
-		raw_spin_lock_irqsave(&idio24gpio->lock, flags);
-
-		/* Clear TTL/CMOS Output Mode */
-		ctl_state = ioread8(&idio24gpio->reg->ctl) & ~out_mode_mask;
-		iowrite8(ctl_state, &idio24gpio->reg->ctl);
-
-		raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+	switch (type) {
+	case IRQ_TYPE_EDGE_RISING:
+		new = rising;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		new = falling;
+		break;
+	case IRQ_TYPE_EDGE_BOTH:
+		new = mask;
+		break;
+	default:
+		return -EINVAL;
 	}
 
-	return 0;
-}
+	raw_spin_lock(&idio24gpio->lock);
 
-static int idio_24_gpio_direction_output(struct gpio_chip *chip,
-	unsigned int offset, int value)
-{
-	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	unsigned long flags;
-	unsigned int ctl_state;
-	const unsigned long out_mode_mask = BIT(1);
+	/* replace old bitmap with new bitmap */
+	idio24gpio->irq_type = (idio24gpio->irq_type & ~mask) | (new & mask);
 
-	/* TTL/CMOS I/O */
-	if (offset > 47) {
-		raw_spin_lock_irqsave(&idio24gpio->lock, flags);
+	ret = regmap_read(idio24gpio->map, IDIO_24_COS_ENABLE, &cos_enable);
+	if (ret)
+		goto exit_unlock;
 
-		/* Set TTL/CMOS Output Mode */
-		ctl_state = ioread8(&idio24gpio->reg->ctl) | out_mode_mask;
-		iowrite8(ctl_state, &idio24gpio->reg->ctl);
-
-		raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
+	/* if COS is currently enabled then update the edge type */
+	if (cos_enable & mask) {
+		ret = regmap_update_bits(idio24gpio->map, IDIO_24_COS_ENABLE, mask,
+					 idio24gpio->irq_type);
+		if (ret)
+			goto exit_unlock;
 	}
 
-	chip->set(chip, offset, value);
-	return 0;
+exit_unlock:
+	raw_spin_unlock(&idio24gpio->lock);
+
+	return ret;
 }
 
-static int idio_24_gpio_get(struct gpio_chip *chip, unsigned int offset)
+static int idio_24_reg_mask_xlate(struct gpio_regmap *const gpio, const unsigned int base,
+				  const unsigned int offset, unsigned int *const reg,
+				  unsigned int *const mask)
 {
-	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	const unsigned long offset_mask = BIT(offset % 8);
-	const unsigned long out_mode_mask = BIT(1);
+	const unsigned int out_stride = offset / IDIO_24_NGPIO_PER_REG;
+	const unsigned int in_stride = (offset - 24) / IDIO_24_NGPIO_PER_REG;
+	struct regmap *const map = gpio_regmap_get_drvdata(gpio);
+	int err;
+	unsigned int ctrl_reg;
 
-	/* FET Outputs */
-	if (offset < 8)
-		return !!(ioread8(&idio24gpio->reg->out0_7) & offset_mask);
+	switch (base) {
+	case IDIO_24_OUT_BASE:
+		*mask = BIT(offset % IDIO_24_NGPIO_PER_REG);
 
-	if (offset < 16)
-		return !!(ioread8(&idio24gpio->reg->out8_15) & offset_mask);
-
-	if (offset < 24)
-		return !!(ioread8(&idio24gpio->reg->out16_23) & offset_mask);
-
-	/* Isolated Inputs */
-	if (offset < 32)
-		return !!(ioread8(&idio24gpio->reg->in0_7) & offset_mask);
-
-	if (offset < 40)
-		return !!(ioread8(&idio24gpio->reg->in8_15) & offset_mask);
-
-	if (offset < 48)
-		return !!(ioread8(&idio24gpio->reg->in16_23) & offset_mask);
-
-	/* TTL/CMOS Outputs */
-	if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask)
-		return !!(ioread8(&idio24gpio->reg->ttl_out0_7) & offset_mask);
-
-	/* TTL/CMOS Inputs */
-	return !!(ioread8(&idio24gpio->reg->ttl_in0_7) & offset_mask);
-}
-
-static int idio_24_gpio_get_multiple(struct gpio_chip *chip,
-	unsigned long *mask, unsigned long *bits)
-{
-	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	unsigned long offset;
-	unsigned long gpio_mask;
-	void __iomem *ports[] = {
-		&idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
-		&idio24gpio->reg->out16_23, &idio24gpio->reg->in0_7,
-		&idio24gpio->reg->in8_15, &idio24gpio->reg->in16_23,
-	};
-	size_t index;
-	unsigned long port_state;
-	const unsigned long out_mode_mask = BIT(1);
-
-	/* clear bits array to a clean slate */
-	bitmap_zero(bits, chip->ngpio);
-
-	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
-		index = offset / 8;
-
-		/* read bits from current gpio port (port 6 is TTL GPIO) */
-		if (index < 6)
-			port_state = ioread8(ports[index]);
-		else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask)
-			port_state = ioread8(&idio24gpio->reg->ttl_out0_7);
-		else
-			port_state = ioread8(&idio24gpio->reg->ttl_in0_7);
-
-		port_state &= gpio_mask;
-
-		bitmap_set_value8(bits, port_state, offset);
-	}
-
-	return 0;
-}
-
-static void idio_24_gpio_set(struct gpio_chip *chip, unsigned int offset,
-	int value)
-{
-	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	const unsigned long out_mode_mask = BIT(1);
-	void __iomem *base;
-	const unsigned int mask = BIT(offset % 8);
-	unsigned long flags;
-	unsigned int out_state;
-
-	/* Isolated Inputs */
-	if (offset > 23 && offset < 48)
-		return;
-
-	/* TTL/CMOS Inputs */
-	if (offset > 47 && !(ioread8(&idio24gpio->reg->ctl) & out_mode_mask))
-		return;
-
-	/* TTL/CMOS Outputs */
-	if (offset > 47)
-		base = &idio24gpio->reg->ttl_out0_7;
-	/* FET Outputs */
-	else if (offset > 15)
-		base = &idio24gpio->reg->out16_23;
-	else if (offset > 7)
-		base = &idio24gpio->reg->out8_15;
-	else
-		base = &idio24gpio->reg->out0_7;
-
-	raw_spin_lock_irqsave(&idio24gpio->lock, flags);
-
-	if (value)
-		out_state = ioread8(base) | mask;
-	else
-		out_state = ioread8(base) & ~mask;
-
-	iowrite8(out_state, base);
-
-	raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
-}
-
-static void idio_24_gpio_set_multiple(struct gpio_chip *chip,
-	unsigned long *mask, unsigned long *bits)
-{
-	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	unsigned long offset;
-	unsigned long gpio_mask;
-	void __iomem *ports[] = {
-		&idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15,
-		&idio24gpio->reg->out16_23
-	};
-	size_t index;
-	unsigned long bitmask;
-	unsigned long flags;
-	unsigned long out_state;
-	const unsigned long out_mode_mask = BIT(1);
-
-	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
-		index = offset / 8;
-
-		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
-
-		raw_spin_lock_irqsave(&idio24gpio->lock, flags);
-
-		/* read bits from current gpio port (port 6 is TTL GPIO) */
-		if (index < 6) {
-			out_state = ioread8(ports[index]);
-		} else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask) {
-			out_state = ioread8(&idio24gpio->reg->ttl_out0_7);
-		} else {
-			/* skip TTL GPIO if set for input */
-			raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
-			continue;
+		/* FET Outputs */
+		if (offset < 24) {
+			*reg = IDIO_24_OUT_BASE + out_stride;
+			return 0;
 		}
 
-		/* set requested bit states */
-		out_state &= ~gpio_mask;
-		out_state |= bitmask;
+		/* Isolated Inputs */
+		if (offset < 48) {
+			*reg = IDIO_24_IN_BASE + in_stride;
+			return 0;
+		}
 
-		/* write bits for current gpio port (port 6 is TTL GPIO) */
-		if (index < 6)
-			iowrite8(out_state, ports[index]);
-		else
-			iowrite8(out_state, &idio24gpio->reg->ttl_out0_7);
+		err = regmap_read(map, IDIO_24_CONTROL_REG, &ctrl_reg);
+		if (err)
+			return err;
 
-		raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
-	}
-}
+		/* TTL/CMOS Outputs */
+		if (ctrl_reg & CONTROL_REG_OUT_MODE) {
+			*reg = IDIO_24_TTLCMOS_OUT_REG;
+			return 0;
+		}
 
-static void idio_24_irq_ack(struct irq_data *data)
-{
-}
+		/* TTL/CMOS Inputs */
+		*reg = IDIO_24_TTLCMOS_IN_REG;
+		return 0;
+	case IDIO_24_CONTROL_REG:
+		/* We can only set direction for TTL/CMOS lines */
+		if (offset < 48)
+			return -EOPNOTSUPP;
 
-static void idio_24_irq_mask(struct irq_data *data)
-{
-	struct gpio_chip *const chip = irq_data_get_irq_chip_data(data);
-	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	unsigned long flags;
-	const unsigned long bit_offset = irqd_to_hwirq(data) - 24;
-	unsigned char new_irq_mask;
-	const unsigned long bank_offset = bit_offset / 8;
-	unsigned char cos_enable_state;
-
-	raw_spin_lock_irqsave(&idio24gpio->lock, flags);
-
-	idio24gpio->irq_mask &= ~BIT(bit_offset);
-	new_irq_mask = idio24gpio->irq_mask >> bank_offset * 8;
-
-	if (!new_irq_mask) {
-		cos_enable_state = ioread8(&idio24gpio->reg->cos_enable);
-
-		/* Disable Rising Edge detection */
-		cos_enable_state &= ~BIT(bank_offset);
-		/* Disable Falling Edge detection */
-		cos_enable_state &= ~BIT(bank_offset + 4);
-
-		iowrite8(cos_enable_state, &idio24gpio->reg->cos_enable);
-	}
-
-	raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
-
-	gpiochip_disable_irq(chip, irqd_to_hwirq(data));
-}
-
-static void idio_24_irq_unmask(struct irq_data *data)
-{
-	struct gpio_chip *const chip = irq_data_get_irq_chip_data(data);
-	struct idio_24_gpio *const idio24gpio = gpiochip_get_data(chip);
-	unsigned long flags;
-	unsigned char prev_irq_mask;
-	const unsigned long bit_offset = irqd_to_hwirq(data) - 24;
-	const unsigned long bank_offset = bit_offset / 8;
-	unsigned char cos_enable_state;
-
-	gpiochip_enable_irq(chip, irqd_to_hwirq(data));
-
-	raw_spin_lock_irqsave(&idio24gpio->lock, flags);
-
-	prev_irq_mask = idio24gpio->irq_mask >> bank_offset * 8;
-	idio24gpio->irq_mask |= BIT(bit_offset);
-
-	if (!prev_irq_mask) {
-		cos_enable_state = ioread8(&idio24gpio->reg->cos_enable);
-
-		/* Enable Rising Edge detection */
-		cos_enable_state |= BIT(bank_offset);
-		/* Enable Falling Edge detection */
-		cos_enable_state |= BIT(bank_offset + 4);
-
-		iowrite8(cos_enable_state, &idio24gpio->reg->cos_enable);
-	}
-
-	raw_spin_unlock_irqrestore(&idio24gpio->lock, flags);
-}
-
-static int idio_24_irq_set_type(struct irq_data *data, unsigned int flow_type)
-{
-	/* The only valid irq types are none and both-edges */
-	if (flow_type != IRQ_TYPE_NONE &&
-		(flow_type & IRQ_TYPE_EDGE_BOTH) != IRQ_TYPE_EDGE_BOTH)
+		*reg = IDIO_24_CONTROL_REG;
+		*mask = CONTROL_REG_OUT_MODE;
+		return 0;
+	default:
+		/* Should never reach this path */
 		return -EINVAL;
-
-	return 0;
-}
-
-static const struct irq_chip idio_24_irqchip = {
-	.name = "pcie-idio-24",
-	.irq_ack = idio_24_irq_ack,
-	.irq_mask = idio_24_irq_mask,
-	.irq_unmask = idio_24_irq_unmask,
-	.irq_set_type = idio_24_irq_set_type,
-	.flags = IRQCHIP_IMMUTABLE,
-	GPIOCHIP_IRQ_RESOURCE_HELPERS,
-};
-
-static irqreturn_t idio_24_irq_handler(int irq, void *dev_id)
-{
-	struct idio_24_gpio *const idio24gpio = dev_id;
-	unsigned long irq_status;
-	struct gpio_chip *const chip = &idio24gpio->chip;
-	unsigned long irq_mask;
-	int gpio;
-
-	raw_spin_lock(&idio24gpio->lock);
-
-	/* Read Change-Of-State status */
-	irq_status = ioread32(&idio24gpio->reg->cos0_7);
-
-	raw_spin_unlock(&idio24gpio->lock);
-
-	/* Make sure our device generated IRQ */
-	if (!irq_status)
-		return IRQ_NONE;
-
-	/* Handle only unmasked IRQ */
-	irq_mask = idio24gpio->irq_mask & irq_status;
-
-	for_each_set_bit(gpio, &irq_mask, chip->ngpio - 24)
-		generic_handle_domain_irq(chip->irq.domain, gpio + 24);
-
-	raw_spin_lock(&idio24gpio->lock);
-
-	/* Clear Change-Of-State status */
-	iowrite32(irq_status, &idio24gpio->reg->cos0_7);
-
-	raw_spin_unlock(&idio24gpio->lock);
-
-	return IRQ_HANDLED;
+	}
 }
 
 #define IDIO_24_NGPIO 56
@@ -496,11 +297,12 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	const size_t pci_plx_bar_index = 1;
 	const size_t pci_bar_index = 2;
 	const char *const name = pci_name(pdev);
-	struct gpio_irq_chip *girq;
-
-	idio24gpio = devm_kzalloc(dev, sizeof(*idio24gpio), GFP_KERNEL);
-	if (!idio24gpio)
-		return -ENOMEM;
+	struct gpio_regmap_config gpio_config = {};
+	void __iomem *pex8311_regs;
+	void __iomem *idio_24_regs;
+	struct regmap *intcsr_map;
+	struct regmap_irq_chip *chip;
+	struct regmap_irq_chip_data *chip_data;
 
 	err = pcim_enable_device(pdev);
 	if (err) {
@@ -514,57 +316,72 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return err;
 	}
 
-	idio24gpio->plx = pcim_iomap_table(pdev)[pci_plx_bar_index];
-	idio24gpio->reg = pcim_iomap_table(pdev)[pci_bar_index];
+	pex8311_regs = pcim_iomap_table(pdev)[pci_plx_bar_index];
+	idio_24_regs = pcim_iomap_table(pdev)[pci_bar_index];
 
-	idio24gpio->chip.label = name;
-	idio24gpio->chip.parent = dev;
-	idio24gpio->chip.owner = THIS_MODULE;
-	idio24gpio->chip.base = -1;
-	idio24gpio->chip.ngpio = IDIO_24_NGPIO;
-	idio24gpio->chip.names = idio_24_names;
-	idio24gpio->chip.get_direction = idio_24_gpio_get_direction;
-	idio24gpio->chip.direction_input = idio_24_gpio_direction_input;
-	idio24gpio->chip.direction_output = idio_24_gpio_direction_output;
-	idio24gpio->chip.get = idio_24_gpio_get;
-	idio24gpio->chip.get_multiple = idio_24_gpio_get_multiple;
-	idio24gpio->chip.set = idio_24_gpio_set;
-	idio24gpio->chip.set_multiple = idio_24_gpio_set_multiple;
+	intcsr_map = devm_regmap_init_mmio(dev, pex8311_regs, &pex8311_intcsr_regmap_config);
+	if (IS_ERR(intcsr_map))
+		return dev_err_probe(dev, PTR_ERR(intcsr_map),
+				     "Unable to initialize PEX8311 register map\n");
 
-	girq = &idio24gpio->chip.irq;
-	gpio_irq_chip_set_chip(girq, &idio_24_irqchip);
-	/* This will let us handle the parent IRQ in the driver */
-	girq->parent_handler = NULL;
-	girq->num_parents = 0;
-	girq->parents = NULL;
-	girq->default_type = IRQ_TYPE_NONE;
-	girq->handler = handle_edge_irq;
+	idio24gpio = devm_kzalloc(dev, sizeof(*idio24gpio), GFP_KERNEL);
+	if (!idio24gpio)
+		return -ENOMEM;
+
+	idio24gpio->map = devm_regmap_init_mmio(dev, idio_24_regs, &idio_24_regmap_config);
+	if (IS_ERR(idio24gpio->map))
+		return dev_err_probe(dev, PTR_ERR(idio24gpio->map),
+				     "Unable to initialize register map\n");
 
 	raw_spin_lock_init(&idio24gpio->lock);
 
+	/* Initialize all IRQ type configuration to IRQ_TYPE_EDGE_BOTH */
+	idio24gpio->irq_type = GENMASK(7, 0);
+
+	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->name = name;
+	chip->status_base = IDIO_24_COS_STATUS_BASE;
+	chip->mask_base = IDIO_24_COS_ENABLE;
+	chip->ack_base = IDIO_24_COS_STATUS_BASE;
+	chip->num_regs = 4;
+	chip->irqs = idio_24_regmap_irqs;
+	chip->num_irqs = ARRAY_SIZE(idio_24_regmap_irqs);
+	chip->handle_mask_sync = idio_24_handle_mask_sync;
+	chip->set_type_config = idio_24_set_type_config;
+	chip->irq_drv_data = idio24gpio;
+
 	/* Software board reset */
-	iowrite8(0, &idio24gpio->reg->soft_reset);
+	err = regmap_write(idio24gpio->map, IDIO_24_SOFT_RESET, 0);
+	if (err)
+		return err;
 	/*
 	 * enable PLX PEX8311 internal PCI wire interrupt and local interrupt
 	 * input
 	 */
-	iowrite8((INTCSR_INTERNAL_PCI_WIRE | INTCSR_LOCAL_INPUT) >> 8,
-		 idio24gpio->plx + PLX_PEX8311_PCI_LCS_INTCSR + 1);
-
-	err = devm_gpiochip_add_data(dev, &idio24gpio->chip, idio24gpio);
-	if (err) {
-		dev_err(dev, "GPIO registering failed (%d)\n", err);
+	err = regmap_update_bits(intcsr_map, 0x0, IDIO_24_ENABLE_IRQ, IDIO_24_ENABLE_IRQ);
+	if (err)
 		return err;
-	}
 
-	err = devm_request_irq(dev, pdev->irq, idio_24_irq_handler, IRQF_SHARED,
-		name, idio24gpio);
-	if (err) {
-		dev_err(dev, "IRQ handler registering failed (%d)\n", err);
-		return err;
-	}
+	err = devm_regmap_add_irq_chip(dev, idio24gpio->map, pdev->irq, 0, 0, chip, &chip_data);
+	if (err)
+		return dev_err_probe(dev, err, "IRQ registration failed\n");
 
-	return 0;
+	gpio_config.parent = dev;
+	gpio_config.regmap = idio24gpio->map;
+	gpio_config.ngpio = IDIO_24_NGPIO;
+	gpio_config.names = idio_24_names;
+	gpio_config.reg_dat_base = GPIO_REGMAP_ADDR(IDIO_24_OUT_BASE);
+	gpio_config.reg_set_base = GPIO_REGMAP_ADDR(IDIO_24_OUT_BASE);
+	gpio_config.reg_dir_out_base = GPIO_REGMAP_ADDR(IDIO_24_CONTROL_REG);
+	gpio_config.ngpio_per_reg = IDIO_24_NGPIO_PER_REG;
+	gpio_config.irq_domain = regmap_irq_get_domain(chip_data);
+	gpio_config.reg_mask_xlate = idio_24_reg_mask_xlate;
+	gpio_config.drvdata = idio24gpio->map;
+
+	return PTR_ERR_OR_ZERO(devm_gpio_regmap_register(dev, &gpio_config));
 }
 
 static const struct pci_device_id idio_24_pci_dev_id[] = {

From 0988ffa09630d4f2f66e345d1a44ffd3f1edb8e0 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Thu, 10 Aug 2023 18:00:43 -0400
Subject: [PATCH 1031/2047] gpio: ws16c48: Migrate to the regmap API

The regmap API supports IO port accessors so we can take advantage of
regmap abstractions rather than handling access to the device registers
directly in the driver.

The WinSystems WS16C48 provides the following registers:

    Offset 0x0-0x5: Port 0-5 I/O
    Offset 0x6: Int_Pending
    Offset 0x7: Page/Lock
    Offset 0x8-0xA (Page 1): Pol_0-Pol_2
    Offset 0x8-0xA (Page 2): Enab_0-Enab_2
    Offset 0x8-0xA (Page 3): Int_ID0-Int_ID2

Port 0-5 I/O provides access to 48 lines of digital I/O across six
registers, each bit position corresponding to the respective line.
Writing a 1 to a respective bit position causes that output pin to sink
current, while writing a 0 to the same bit position causes that output
pin to go to a high-impedance state and allows it to be used an input.
Reads on a port report the inverted state (0 = high, 1 = low) of an I/O
pin when used in input mode. Interrupts are supported on Port 0-2.

Int_Pending is a read-only register that reports the combined state of
the INT_ID0 through INT_ID2 registers; an interrupt pending is indicated
when any of the low three bits are set.

The Page/Lock register provides the following bits:

    Bit 0-5: Port 0-5 I/O Lock
    Bit 6-7: Page 0-3 Selection

For Bits 0-5, writing a 1 to a respective bit position locks the output
state of the corresponding I/O port. Writing the page number to Bits 6-7
selects that respective register page for use.

Pol_0-Pol_2 are accessible when Page 1 is selected. Writing a 1 to a
respective bit position selects the rising edge detection interrupts for
that input line, while writing a 0 to the same bit position selects the
falling edge detection interrupts.

Enab_0-Enab_2 are accessible when Page 2 is selected. Writing a 1 to a
respective bit position enables interrupts for that input line, while
writing a 0 to that same bit position clears and disables interrupts for
that input line.

Int_ID0-Int_ID2 are accessible when Page 3 is selected. A respective bit
when read as a 1 indicates that an edge of the polarity set in the
corresponding polarity register was detected for the corresponding input
line. Writing any value to this register clears all pending interrupts
for the register.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Michael Walle <michael@walle.cc>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/f59de81e80f7198bcfa9a15615c459c38b5d0e08.1680708357.git.william.gray@linaro.org/
Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig        |   3 +
 drivers/gpio/gpio-ws16c48.c | 552 ++++++++++++------------------------
 2 files changed, 188 insertions(+), 367 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 3cccc6cc315f..673bafb8be58 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -998,7 +998,10 @@ config GPIO_WINBOND
 config GPIO_WS16C48
 	tristate "WinSystems WS16C48 GPIO support"
 	select ISA_BUS_API
+	select REGMAP_IRQ
+	select REGMAP_MMIO
 	select GPIOLIB_IRQCHIP
+	select GPIO_REGMAP
 	help
 	  Enables GPIO support for the WinSystems WS16C48. The base port
 	  addresses for the devices may be configured via the base module
diff --git a/drivers/gpio/gpio-ws16c48.c b/drivers/gpio/gpio-ws16c48.c
index e73885a4dc32..701847508e94 100644
--- a/drivers/gpio/gpio-ws16c48.c
+++ b/drivers/gpio/gpio-ws16c48.c
@@ -3,19 +3,18 @@
  * GPIO driver for the WinSystems WS16C48
  * Copyright (C) 2016 William Breathitt Gray
  */
-#include <linux/bitmap.h>
+#include <linux/bitfield.h>
+#include <linux/bits.h>
 #include <linux/device.h>
-#include <linux/errno.h>
-#include <linux/gpio/driver.h>
-#include <linux/io.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/irqdesc.h>
+#include <linux/err.h>
+#include <linux/gpio/regmap.h>
+#include <linux/irq.h>
 #include <linux/isa.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/spinlock.h>
+#include <linux/regmap.h>
 #include <linux/types.h>
 
 #define WS16C48_EXTENT 10
@@ -31,371 +30,178 @@ static unsigned int num_irq;
 module_param_hw_array(irq, uint, irq, &num_irq, 0);
 MODULE_PARM_DESC(irq, "WinSystems WS16C48 interrupt line numbers");
 
-/**
- * struct ws16c48_reg - device register structure
- * @port:		Port 0 through 5 I/O
- * @int_pending:	Interrupt Pending
- * @page_lock:		Register page (Bits 7-6) and I/O port lock (Bits 5-0)
- * @pol_enab_int_id:	Interrupt polarity, enable, and ID
- */
-struct ws16c48_reg {
-	u8 port[6];
-	u8 int_pending;
-	u8 page_lock;
-	u8 pol_enab_int_id[3];
+#define WS16C48_DAT_BASE 0x0
+#define WS16C48_PAGE_LOCK 0x7
+#define WS16C48_PAGE_BASE 0x8
+#define WS16C48_POL WS16C48_PAGE_BASE
+#define WS16C48_ENAB WS16C48_PAGE_BASE
+#define WS16C48_INT_ID WS16C48_PAGE_BASE
+
+#define PAGE_LOCK_PAGE_FIELD GENMASK(7, 6)
+#define POL_PAGE u8_encode_bits(1, PAGE_LOCK_PAGE_FIELD)
+#define ENAB_PAGE u8_encode_bits(2, PAGE_LOCK_PAGE_FIELD)
+#define INT_ID_PAGE u8_encode_bits(3, PAGE_LOCK_PAGE_FIELD)
+
+static const struct regmap_range ws16c48_wr_ranges[] = {
+	regmap_reg_range(0x0, 0x5), regmap_reg_range(0x7, 0xA),
+};
+static const struct regmap_range ws16c48_rd_ranges[] = {
+	regmap_reg_range(0x0, 0xA),
+};
+static const struct regmap_range ws16c48_volatile_ranges[] = {
+	regmap_reg_range(0x0, 0x6), regmap_reg_range(0x8, 0xA),
+};
+static const struct regmap_access_table ws16c48_wr_table = {
+	.yes_ranges = ws16c48_wr_ranges,
+	.n_yes_ranges = ARRAY_SIZE(ws16c48_wr_ranges),
+};
+static const struct regmap_access_table ws16c48_rd_table = {
+	.yes_ranges = ws16c48_rd_ranges,
+	.n_yes_ranges = ARRAY_SIZE(ws16c48_rd_ranges),
+};
+static const struct regmap_access_table ws16c48_volatile_table = {
+	.yes_ranges = ws16c48_volatile_ranges,
+	.n_yes_ranges = ARRAY_SIZE(ws16c48_volatile_ranges),
+};
+static const struct regmap_config ws16c48_regmap_config = {
+	.reg_bits = 8,
+	.reg_stride = 1,
+	.val_bits = 8,
+	.io_port = true,
+	.wr_table = &ws16c48_wr_table,
+	.rd_table = &ws16c48_rd_table,
+	.volatile_table = &ws16c48_volatile_table,
+	.cache_type = REGCACHE_FLAT,
+	.use_raw_spinlock = true,
+};
+
+#define WS16C48_NGPIO_PER_REG 8
+#define WS16C48_REGMAP_IRQ(_id)							\
+	[_id] = {								\
+		.reg_offset = (_id) / WS16C48_NGPIO_PER_REG,			\
+		.mask = BIT((_id) % WS16C48_NGPIO_PER_REG),			\
+		.type = {							\
+			.type_reg_offset = (_id) / WS16C48_NGPIO_PER_REG,	\
+			.types_supported = IRQ_TYPE_EDGE_BOTH,			\
+		},								\
+	}
+
+/* Only the first 24 lines (Port 0-2) support interrupts */
+#define WS16C48_NUM_IRQS 24
+static const struct regmap_irq ws16c48_regmap_irqs[WS16C48_NUM_IRQS] = {
+	WS16C48_REGMAP_IRQ(0), WS16C48_REGMAP_IRQ(1), WS16C48_REGMAP_IRQ(2), /* 0-2 */
+	WS16C48_REGMAP_IRQ(3), WS16C48_REGMAP_IRQ(4), WS16C48_REGMAP_IRQ(5), /* 3-5 */
+	WS16C48_REGMAP_IRQ(6), WS16C48_REGMAP_IRQ(7), WS16C48_REGMAP_IRQ(8), /* 6-8 */
+	WS16C48_REGMAP_IRQ(9), WS16C48_REGMAP_IRQ(10), WS16C48_REGMAP_IRQ(11), /* 9-11 */
+	WS16C48_REGMAP_IRQ(12), WS16C48_REGMAP_IRQ(13), WS16C48_REGMAP_IRQ(14), /* 12-14 */
+	WS16C48_REGMAP_IRQ(15), WS16C48_REGMAP_IRQ(16), WS16C48_REGMAP_IRQ(17), /* 15-17 */
+	WS16C48_REGMAP_IRQ(18), WS16C48_REGMAP_IRQ(19), WS16C48_REGMAP_IRQ(20), /* 18-20 */
+	WS16C48_REGMAP_IRQ(21), WS16C48_REGMAP_IRQ(22), WS16C48_REGMAP_IRQ(23), /* 21-23 */
 };
 
 /**
  * struct ws16c48_gpio - GPIO device private data structure
- * @chip:	instance of the gpio_chip
- * @io_state:	bit I/O state (whether bit is set to input or output)
- * @out_state:	output bits state
+ * @map:	regmap for the device
  * @lock:	synchronization lock to prevent I/O race conditions
  * @irq_mask:	I/O bits affected by interrupts
- * @flow_mask:	IRQ flow type mask for the respective I/O bits
- * @reg:	I/O address offset for the device registers
  */
 struct ws16c48_gpio {
-	struct gpio_chip chip;
-	unsigned char io_state[6];
-	unsigned char out_state[6];
+	struct regmap *map;
 	raw_spinlock_t lock;
-	unsigned long irq_mask;
-	unsigned long flow_mask;
-	struct ws16c48_reg __iomem *reg;
+	u8 irq_mask[WS16C48_NUM_IRQS / WS16C48_NGPIO_PER_REG];
 };
 
-static int ws16c48_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
+static int ws16c48_handle_pre_irq(void *const irq_drv_data) __acquires(&ws16c48gpio->lock)
 {
-	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	const unsigned port = offset / 8;
-	const unsigned mask = BIT(offset % 8);
+	struct ws16c48_gpio *const ws16c48gpio = irq_drv_data;
 
-	if (ws16c48gpio->io_state[port] & mask)
-		return GPIO_LINE_DIRECTION_IN;
-
-	return GPIO_LINE_DIRECTION_OUT;
-}
-
-static int ws16c48_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
-{
-	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	const unsigned port = offset / 8;
-	const unsigned mask = BIT(offset % 8);
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
-
-	ws16c48gpio->io_state[port] |= mask;
-	ws16c48gpio->out_state[port] &= ~mask;
-	iowrite8(ws16c48gpio->out_state[port], ws16c48gpio->reg->port + port);
-
-	raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
+	/* Lock to prevent Page/Lock register change while we handle IRQ */
+	raw_spin_lock(&ws16c48gpio->lock);
 
 	return 0;
 }
 
-static int ws16c48_gpio_direction_output(struct gpio_chip *chip,
-	unsigned offset, int value)
+static int ws16c48_handle_post_irq(void *const irq_drv_data) __releases(&ws16c48gpio->lock)
 {
-	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	const unsigned port = offset / 8;
-	const unsigned mask = BIT(offset % 8);
-	unsigned long flags;
+	struct ws16c48_gpio *const ws16c48gpio = irq_drv_data;
 
-	raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
-
-	ws16c48gpio->io_state[port] &= ~mask;
-	if (value)
-		ws16c48gpio->out_state[port] |= mask;
-	else
-		ws16c48gpio->out_state[port] &= ~mask;
-	iowrite8(ws16c48gpio->out_state[port], ws16c48gpio->reg->port + port);
-
-	raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
+	raw_spin_unlock(&ws16c48gpio->lock);
 
 	return 0;
 }
 
-static int ws16c48_gpio_get(struct gpio_chip *chip, unsigned offset)
+static int ws16c48_handle_mask_sync(const int index, const unsigned int mask_buf_def,
+				    const unsigned int mask_buf, void *const irq_drv_data)
 {
-	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	const unsigned port = offset / 8;
-	const unsigned mask = BIT(offset % 8);
+	struct ws16c48_gpio *const ws16c48gpio = irq_drv_data;
 	unsigned long flags;
-	unsigned port_state;
+	int ret = 0;
 
 	raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
 
-	/* ensure that GPIO is set for input */
-	if (!(ws16c48gpio->io_state[port] & mask)) {
-		raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
-		return -EINVAL;
-	}
+	/* exit early if no change since the last mask sync */
+	if (mask_buf == ws16c48gpio->irq_mask[index])
+		goto exit_unlock;
+	ws16c48gpio->irq_mask[index] = mask_buf;
 
-	port_state = ioread8(ws16c48gpio->reg->port + port);
+	ret = regmap_write(ws16c48gpio->map, WS16C48_PAGE_LOCK, ENAB_PAGE);
+	if (ret)
+		goto exit_unlock;
 
+	/* Update ENAB register (inverted mask) */
+	ret = regmap_write(ws16c48gpio->map, WS16C48_ENAB + index, ~mask_buf);
+	if (ret)
+		goto exit_unlock;
+
+	ret = regmap_write(ws16c48gpio->map, WS16C48_PAGE_LOCK, INT_ID_PAGE);
+	if (ret)
+		goto exit_unlock;
+
+exit_unlock:
 	raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
 
-	return !!(port_state & mask);
+	return ret;
 }
 
-static int ws16c48_gpio_get_multiple(struct gpio_chip *chip,
-	unsigned long *mask, unsigned long *bits)
+static int ws16c48_set_type_config(unsigned int **const buf, const unsigned int type,
+				   const struct regmap_irq *const irq_data, const int idx,
+				   void *const irq_drv_data)
 {
-	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	unsigned long offset;
-	unsigned long gpio_mask;
-	size_t index;
-	u8 __iomem *port_addr;
-	unsigned long port_state;
-
-	/* clear bits array to a clean slate */
-	bitmap_zero(bits, chip->ngpio);
-
-	for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
-		index = offset / 8;
-		port_addr = ws16c48gpio->reg->port + index;
-		port_state = ioread8(port_addr) & gpio_mask;
-
-		bitmap_set_value8(bits, port_state, offset);
-	}
-
-	return 0;
-}
-
-static void ws16c48_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
-{
-	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	const unsigned port = offset / 8;
-	const unsigned mask = BIT(offset % 8);
+	struct ws16c48_gpio *const ws16c48gpio = irq_drv_data;
+	unsigned int polarity;
 	unsigned long flags;
+	int ret;
 
-	raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
-
-	/* ensure that GPIO is set for output */
-	if (ws16c48gpio->io_state[port] & mask) {
-		raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
-		return;
-	}
-
-	if (value)
-		ws16c48gpio->out_state[port] |= mask;
-	else
-		ws16c48gpio->out_state[port] &= ~mask;
-	iowrite8(ws16c48gpio->out_state[port], ws16c48gpio->reg->port + port);
-
-	raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
-}
-
-static void ws16c48_gpio_set_multiple(struct gpio_chip *chip,
-	unsigned long *mask, unsigned long *bits)
-{
-	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	unsigned long offset;
-	unsigned long gpio_mask;
-	size_t index;
-	u8 __iomem *port_addr;
-	unsigned long bitmask;
-	unsigned long flags;
-
-	for_each_set_clump8(offset, gpio_mask, mask, chip->ngpio) {
-		index = offset / 8;
-		port_addr = ws16c48gpio->reg->port + index;
-
-		/* mask out GPIO configured for input */
-		gpio_mask &= ~ws16c48gpio->io_state[index];
-		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
-
-		raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
-
-		/* update output state data and set device gpio register */
-		ws16c48gpio->out_state[index] &= ~gpio_mask;
-		ws16c48gpio->out_state[index] |= bitmask;
-		iowrite8(ws16c48gpio->out_state[index], port_addr);
-
-		raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
-	}
-}
-
-static void ws16c48_irq_ack(struct irq_data *data)
-{
-	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
-	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	const unsigned long offset = irqd_to_hwirq(data);
-	const unsigned port = offset / 8;
-	const unsigned mask = BIT(offset % 8);
-	unsigned long flags;
-	unsigned port_state;
-
-	/* only the first 3 ports support interrupts */
-	if (port > 2)
-		return;
-
-	raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
-
-	port_state = ws16c48gpio->irq_mask >> (8*port);
-
-	/* Select Register Page 2; Unlock all I/O ports */
-	iowrite8(0x80, &ws16c48gpio->reg->page_lock);
-
-	/* Clear pending interrupt */
-	iowrite8(port_state & ~mask, ws16c48gpio->reg->pol_enab_int_id + port);
-	iowrite8(port_state | mask, ws16c48gpio->reg->pol_enab_int_id + port);
-
-	/* Select Register Page 3; Unlock all I/O ports */
-	iowrite8(0xC0, &ws16c48gpio->reg->page_lock);
-
-	raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
-}
-
-static void ws16c48_irq_mask(struct irq_data *data)
-{
-	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
-	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	const unsigned long offset = irqd_to_hwirq(data);
-	const unsigned long mask = BIT(offset);
-	const unsigned port = offset / 8;
-	unsigned long flags;
-	unsigned long port_state;
-
-	/* only the first 3 ports support interrupts */
-	if (port > 2)
-		return;
-
-	raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
-
-	ws16c48gpio->irq_mask &= ~mask;
-	gpiochip_disable_irq(chip, offset);
-	port_state = ws16c48gpio->irq_mask >> (8 * port);
-
-	/* Select Register Page 2; Unlock all I/O ports */
-	iowrite8(0x80, &ws16c48gpio->reg->page_lock);
-
-	/* Disable interrupt */
-	iowrite8(port_state, ws16c48gpio->reg->pol_enab_int_id + port);
-
-	/* Select Register Page 3; Unlock all I/O ports */
-	iowrite8(0xC0, &ws16c48gpio->reg->page_lock);
-
-	raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
-}
-
-static void ws16c48_irq_unmask(struct irq_data *data)
-{
-	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
-	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	const unsigned long offset = irqd_to_hwirq(data);
-	const unsigned long mask = BIT(offset);
-	const unsigned port = offset / 8;
-	unsigned long flags;
-	unsigned long port_state;
-
-	/* only the first 3 ports support interrupts */
-	if (port > 2)
-		return;
-
-	raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
-
-	gpiochip_enable_irq(chip, offset);
-	ws16c48gpio->irq_mask |= mask;
-	port_state = ws16c48gpio->irq_mask >> (8 * port);
-
-	/* Select Register Page 2; Unlock all I/O ports */
-	iowrite8(0x80, &ws16c48gpio->reg->page_lock);
-
-	/* Enable interrupt */
-	iowrite8(port_state, ws16c48gpio->reg->pol_enab_int_id + port);
-
-	/* Select Register Page 3; Unlock all I/O ports */
-	iowrite8(0xC0, &ws16c48gpio->reg->page_lock);
-
-	raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
-}
-
-static int ws16c48_irq_set_type(struct irq_data *data, unsigned flow_type)
-{
-	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
-	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(chip);
-	const unsigned long offset = irqd_to_hwirq(data);
-	const unsigned long mask = BIT(offset);
-	const unsigned port = offset / 8;
-	unsigned long flags;
-	unsigned long port_state;
-
-	/* only the first 3 ports support interrupts */
-	if (port > 2)
-		return -EINVAL;
-
-	raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
-
-	switch (flow_type) {
-	case IRQ_TYPE_NONE:
-		break;
+	switch (type) {
 	case IRQ_TYPE_EDGE_RISING:
-		ws16c48gpio->flow_mask |= mask;
+		polarity = irq_data->mask;
 		break;
 	case IRQ_TYPE_EDGE_FALLING:
-		ws16c48gpio->flow_mask &= ~mask;
+		polarity = 0;
 		break;
 	default:
-		raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
 		return -EINVAL;
 	}
 
-	port_state = ws16c48gpio->flow_mask >> (8 * port);
+	raw_spin_lock_irqsave(&ws16c48gpio->lock, flags);
 
-	/* Select Register Page 1; Unlock all I/O ports */
-	iowrite8(0x40, &ws16c48gpio->reg->page_lock);
+	ret = regmap_write(ws16c48gpio->map, WS16C48_PAGE_LOCK, POL_PAGE);
+	if (ret)
+		goto exit_unlock;
 
 	/* Set interrupt polarity */
-	iowrite8(port_state, ws16c48gpio->reg->pol_enab_int_id + port);
+	ret = regmap_update_bits(ws16c48gpio->map, WS16C48_POL + idx, irq_data->mask, polarity);
+	if (ret)
+		goto exit_unlock;
 
-	/* Select Register Page 3; Unlock all I/O ports */
-	iowrite8(0xC0, &ws16c48gpio->reg->page_lock);
+	ret = regmap_write(ws16c48gpio->map, WS16C48_PAGE_LOCK, INT_ID_PAGE);
+	if (ret)
+		goto exit_unlock;
 
+exit_unlock:
 	raw_spin_unlock_irqrestore(&ws16c48gpio->lock, flags);
 
-	return 0;
-}
-
-static const struct irq_chip ws16c48_irqchip = {
-	.name = "ws16c48",
-	.irq_ack = ws16c48_irq_ack,
-	.irq_mask = ws16c48_irq_mask,
-	.irq_unmask = ws16c48_irq_unmask,
-	.irq_set_type = ws16c48_irq_set_type,
-	.flags = IRQCHIP_IMMUTABLE,
-	GPIOCHIP_IRQ_RESOURCE_HELPERS,
-};
-
-static irqreturn_t ws16c48_irq_handler(int irq, void *dev_id)
-{
-	struct ws16c48_gpio *const ws16c48gpio = dev_id;
-	struct gpio_chip *const chip = &ws16c48gpio->chip;
-	struct ws16c48_reg __iomem *const reg = ws16c48gpio->reg;
-	unsigned long int_pending;
-	unsigned long port;
-	unsigned long int_id;
-	unsigned long gpio;
-
-	int_pending = ioread8(&reg->int_pending) & 0x7;
-	if (!int_pending)
-		return IRQ_NONE;
-
-	/* loop until all pending interrupts are handled */
-	do {
-		for_each_set_bit(port, &int_pending, 3) {
-			int_id = ioread8(reg->pol_enab_int_id + port);
-			for_each_set_bit(gpio, &int_id, 8)
-				generic_handle_domain_irq(chip->irq.domain,
-							  gpio + 8*port);
-		}
-
-		int_pending = ioread8(&reg->int_pending) & 0x7;
-	} while (int_pending);
-
-	return IRQ_HANDLED;
+	return ret;
 }
 
 #define WS16C48_NGPIO 48
@@ -414,30 +220,37 @@ static const char *ws16c48_names[WS16C48_NGPIO] = {
 	"Port 5 Bit 4", "Port 5 Bit 5", "Port 5 Bit 6", "Port 5 Bit 7"
 };
 
-static int ws16c48_irq_init_hw(struct gpio_chip *gc)
+static int ws16c48_irq_init_hw(struct regmap *const map)
 {
-	struct ws16c48_gpio *const ws16c48gpio = gpiochip_get_data(gc);
+	int err;
 
-	/* Select Register Page 2; Unlock all I/O ports */
-	iowrite8(0x80, &ws16c48gpio->reg->page_lock);
+	err = regmap_write(map, WS16C48_PAGE_LOCK, ENAB_PAGE);
+	if (err)
+		return err;
 
 	/* Disable interrupts for all lines */
-	iowrite8(0, &ws16c48gpio->reg->pol_enab_int_id[0]);
-	iowrite8(0, &ws16c48gpio->reg->pol_enab_int_id[1]);
-	iowrite8(0, &ws16c48gpio->reg->pol_enab_int_id[2]);
+	err = regmap_write(map, WS16C48_ENAB + 0, 0x00);
+	if (err)
+		return err;
+	err = regmap_write(map, WS16C48_ENAB + 1, 0x00);
+	if (err)
+		return err;
+	err = regmap_write(map, WS16C48_ENAB + 2, 0x00);
+	if (err)
+		return err;
 
-	/* Select Register Page 3; Unlock all I/O ports */
-	iowrite8(0xC0, &ws16c48gpio->reg->page_lock);
-
-	return 0;
+	return regmap_write(map, WS16C48_PAGE_LOCK, INT_ID_PAGE);
 }
 
 static int ws16c48_probe(struct device *dev, unsigned int id)
 {
 	struct ws16c48_gpio *ws16c48gpio;
 	const char *const name = dev_name(dev);
-	struct gpio_irq_chip *girq;
 	int err;
+	struct gpio_regmap_config gpio_config = {};
+	void __iomem *regs;
+	struct regmap_irq_chip *chip;
+	struct regmap_irq_chip_data *chip_data;
 
 	ws16c48gpio = devm_kzalloc(dev, sizeof(*ws16c48gpio), GFP_KERNEL);
 	if (!ws16c48gpio)
@@ -449,50 +262,55 @@ static int ws16c48_probe(struct device *dev, unsigned int id)
 		return -EBUSY;
 	}
 
-	ws16c48gpio->reg = devm_ioport_map(dev, base[id], WS16C48_EXTENT);
-	if (!ws16c48gpio->reg)
+	regs = devm_ioport_map(dev, base[id], WS16C48_EXTENT);
+	if (!regs)
 		return -ENOMEM;
 
-	ws16c48gpio->chip.label = name;
-	ws16c48gpio->chip.parent = dev;
-	ws16c48gpio->chip.owner = THIS_MODULE;
-	ws16c48gpio->chip.base = -1;
-	ws16c48gpio->chip.ngpio = WS16C48_NGPIO;
-	ws16c48gpio->chip.names = ws16c48_names;
-	ws16c48gpio->chip.get_direction = ws16c48_gpio_get_direction;
-	ws16c48gpio->chip.direction_input = ws16c48_gpio_direction_input;
-	ws16c48gpio->chip.direction_output = ws16c48_gpio_direction_output;
-	ws16c48gpio->chip.get = ws16c48_gpio_get;
-	ws16c48gpio->chip.get_multiple = ws16c48_gpio_get_multiple;
-	ws16c48gpio->chip.set = ws16c48_gpio_set;
-	ws16c48gpio->chip.set_multiple = ws16c48_gpio_set_multiple;
+	ws16c48gpio->map = devm_regmap_init_mmio(dev, regs, &ws16c48_regmap_config);
+	if (IS_ERR(ws16c48gpio->map))
+		return dev_err_probe(dev, PTR_ERR(ws16c48gpio->map),
+				     "Unable to initialize register map\n");
 
-	girq = &ws16c48gpio->chip.irq;
-	gpio_irq_chip_set_chip(girq, &ws16c48_irqchip);
-	/* This will let us handle the parent IRQ in the driver */
-	girq->parent_handler = NULL;
-	girq->num_parents = 0;
-	girq->parents = NULL;
-	girq->default_type = IRQ_TYPE_NONE;
-	girq->handler = handle_edge_irq;
-	girq->init_hw = ws16c48_irq_init_hw;
+	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->name = name;
+	chip->status_base = WS16C48_INT_ID;
+	chip->mask_base = WS16C48_ENAB;
+	chip->ack_base = WS16C48_INT_ID;
+	chip->num_regs = 3;
+	chip->irqs = ws16c48_regmap_irqs;
+	chip->num_irqs = ARRAY_SIZE(ws16c48_regmap_irqs);
+	chip->handle_pre_irq = ws16c48_handle_pre_irq;
+	chip->handle_post_irq = ws16c48_handle_post_irq;
+	chip->handle_mask_sync = ws16c48_handle_mask_sync;
+	chip->set_type_config = ws16c48_set_type_config;
+	chip->irq_drv_data = ws16c48gpio;
 
 	raw_spin_lock_init(&ws16c48gpio->lock);
 
-	err = devm_gpiochip_add_data(dev, &ws16c48gpio->chip, ws16c48gpio);
-	if (err) {
-		dev_err(dev, "GPIO registering failed (%d)\n", err);
+	/* Initialize to prevent spurious interrupts before we're ready */
+	err = ws16c48_irq_init_hw(ws16c48gpio->map);
+	if (err)
 		return err;
-	}
 
-	err = devm_request_irq(dev, irq[id], ws16c48_irq_handler, IRQF_SHARED,
-		name, ws16c48gpio);
-	if (err) {
-		dev_err(dev, "IRQ handler registering failed (%d)\n", err);
-		return err;
-	}
+	err = devm_regmap_add_irq_chip(dev, ws16c48gpio->map, irq[id], 0, 0, chip, &chip_data);
+	if (err)
+		return dev_err_probe(dev, err, "IRQ registration failed\n");
 
-	return 0;
+	gpio_config.parent = dev;
+	gpio_config.regmap = ws16c48gpio->map;
+	gpio_config.ngpio = WS16C48_NGPIO;
+	gpio_config.names = ws16c48_names;
+	gpio_config.reg_dat_base = GPIO_REGMAP_ADDR(WS16C48_DAT_BASE);
+	gpio_config.reg_set_base = GPIO_REGMAP_ADDR(WS16C48_DAT_BASE);
+	/* Setting a GPIO to 0 allows it to be used as an input */
+	gpio_config.reg_dir_out_base = GPIO_REGMAP_ADDR(WS16C48_DAT_BASE);
+	gpio_config.ngpio_per_reg = WS16C48_NGPIO_PER_REG;
+	gpio_config.irq_domain = regmap_irq_get_domain(chip_data);
+
+	return PTR_ERR_OR_ZERO(devm_gpio_regmap_register(dev, &gpio_config));
 }
 
 static struct isa_driver ws16c48_driver = {

From a0e3b8e2acd0f6e30fe68dd74116596bcf59aa63 Mon Sep 17 00:00:00 2001
From: Rajeshwar R Shinde <coolrrsh@gmail.com>
Date: Thu, 10 Aug 2023 18:36:00 +0530
Subject: [PATCH 1032/2047] gpio: imx-scu: Use ARRAY_SIZE for array length

Use of macro ARRAY_SIZE to calculate array size minimizes
the redundant code and improves code reusability.
This fixes warnings reported by Coccinelle:
drivers/gpio/gpio-imx-scu.c:106:32-33: WARNING: Use ARRAY_SIZE

Signed-off-by: Rajeshwar R Shinde <coolrrsh@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-imx-scu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-imx-scu.c b/drivers/gpio/gpio-imx-scu.c
index e190bde5397d..13baf465aedf 100644
--- a/drivers/gpio/gpio-imx-scu.c
+++ b/drivers/gpio/gpio-imx-scu.c
@@ -6,6 +6,7 @@
  * to control the PIN resources on SCU domain.
  */
 
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/gpio/driver.h>
 #include <linux/platform_device.h>
@@ -103,7 +104,7 @@ static int imx_scu_gpio_probe(struct platform_device *pdev)
 	gc = &priv->chip;
 	gc->base = -1;
 	gc->parent = dev;
-	gc->ngpio = sizeof(scu_rsrc_arr)/sizeof(unsigned int);
+	gc->ngpio = ARRAY_SIZE(scu_rsrc_arr);
 	gc->label = dev_name(dev);
 	gc->get = imx_scu_gpio_get;
 	gc->set = imx_scu_gpio_set;

From b7df0f340b64c543bb84c42698347f08b6e18fe2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:59:49 +0200
Subject: [PATCH 1033/2047] gpio: mxs: fix Wvoid-pointer-to-enum-cast warning

'devid' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  gpio-mxs.c:274:16: error: cast to smaller integer type 'enum mxs_gpio_id' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-mxs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c
index 8e04c9c4b5a2..024ad077e98d 100644
--- a/drivers/gpio/gpio-mxs.c
+++ b/drivers/gpio/gpio-mxs.c
@@ -271,7 +271,7 @@ static int mxs_gpio_probe(struct platform_device *pdev)
 	port->id = of_alias_get_id(np, "gpio");
 	if (port->id < 0)
 		return port->id;
-	port->devid = (enum mxs_gpio_id)of_device_get_match_data(&pdev->dev);
+	port->devid = (uintptr_t)of_device_get_match_data(&pdev->dev);
 	port->dev = &pdev->dev;
 	port->irq = platform_get_irq(pdev, 0);
 	if (port->irq < 0)

From 5f6d1998adeb5374ef248b5ba2e2a0c30ab0f60b Mon Sep 17 00:00:00 2001
From: Shenwei Wang <shenwei.wang@nxp.com>
Date: Wed, 9 Aug 2023 09:13:24 -0500
Subject: [PATCH 1034/2047] gpio: mxc: release the parent IRQ in runtime
 suspend

Release the parent interrupt request during runtime suspend, allowing
the parent interrupt controller to enter runtime suspend if there are
no active users.

This change may not have a visible impact if the parent controller is
the GIC, but it can enable significant power savings for parent IRQ
controllers like IRQSteer inside a subsystem on i.MX8 SoCs. Releasing
the parent IRQ provides an opportunity for the subsystem to enter suspend
states if there are no active users.

Signed-off-by: Shenwei Wang <shenwei.wang@nxp.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-mxc.c | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index 377d3ab8d626..004c6ad7ce52 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -62,6 +62,7 @@ struct mxc_gpio_port {
 	struct clk *clk;
 	int irq;
 	int irq_high;
+	void (*mx_irq_handler)(struct irq_desc *desc);
 	struct irq_domain *domain;
 	struct gpio_chip gc;
 	struct device *dev;
@@ -399,6 +400,24 @@ static void mxc_gpio_free(struct gpio_chip *chip, unsigned int offset)
 	pm_runtime_put(chip->parent);
 }
 
+static void mxc_update_irq_chained_handler(struct mxc_gpio_port *port, bool enable)
+{
+	if (enable)
+		irq_set_chained_handler_and_data(port->irq, port->mx_irq_handler, port);
+	else
+		irq_set_chained_handler_and_data(port->irq, NULL, NULL);
+
+	/* setup handler for GPIO 16 to 31 */
+	if (port->irq_high > 0) {
+		if (enable)
+			irq_set_chained_handler_and_data(port->irq_high,
+							 port->mx_irq_handler,
+							 port);
+		else
+			irq_set_chained_handler_and_data(port->irq_high, NULL, NULL);
+	}
+}
+
 static int mxc_gpio_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
@@ -460,18 +479,12 @@ static int mxc_gpio_probe(struct platform_device *pdev)
 		 * the handler is needed only once, but doing it for every port
 		 * is more robust and easier.
 		 */
-		irq_set_chained_handler(port->irq, mx2_gpio_irq_handler);
-	} else {
-		/* setup one handler for each entry */
-		irq_set_chained_handler_and_data(port->irq,
-						 mx3_gpio_irq_handler, port);
-		if (port->irq_high > 0)
-			/* setup handler for GPIO 16 to 31 */
-			irq_set_chained_handler_and_data(port->irq_high,
-							 mx3_gpio_irq_handler,
-							 port);
-	}
+		port->irq_high = -1;
+		port->mx_irq_handler = mx2_gpio_irq_handler;
+	} else
+		port->mx_irq_handler = mx3_gpio_irq_handler;
 
+	mxc_update_irq_chained_handler(port, true);
 	err = bgpio_init(&port->gc, &pdev->dev, 4,
 			 port->base + GPIO_PSR,
 			 port->base + GPIO_DR, NULL,
@@ -604,6 +617,7 @@ static int mxc_gpio_runtime_suspend(struct device *dev)
 
 	mxc_gpio_save_regs(port);
 	clk_disable_unprepare(port->clk);
+	mxc_update_irq_chained_handler(port, false);
 
 	return 0;
 }
@@ -613,9 +627,12 @@ static int mxc_gpio_runtime_resume(struct device *dev)
 	struct mxc_gpio_port *port = dev_get_drvdata(dev);
 	int ret;
 
+	mxc_update_irq_chained_handler(port, true);
 	ret = clk_prepare_enable(port->clk);
-	if (ret)
+	if (ret) {
+		mxc_update_irq_chained_handler(port, false);
 		return ret;
+	}
 
 	mxc_gpio_restore_regs(port);
 

From 1393bc8c6a3311bf6496e704c59870e0fc44a2c3 Mon Sep 17 00:00:00 2001
From: Justin Ernst <justin.ernst@hpe.com>
Date: Tue, 1 Aug 2023 10:57:56 -0500
Subject: [PATCH 1035/2047] x86/uv: Update HPE Superdome Flex Maintainers

Mike Travis has retired. His expertise will be sorely missed.
Remove Mike's entry under SGI XP/XPC/XPNET DRIVER.
Replace Mike's entry under UV HPE SUPERDOME FLEX.

Signed-off-by: Justin Ernst <justin.ernst@hpe.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Joel Granados <j.granados@samsung.com>
Link: https://lore.kernel.org/all/20230801155756.22308-1-justin.ernst%40hpe.com
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0f966f05fb0d..7998c1ec2d19 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19283,7 +19283,6 @@ F:	drivers/misc/sgi-gru/
 SGI XP/XPC/XPNET DRIVER
 M:	Robin Holt <robinmholt@gmail.com>
 M:	Steve Wahl <steve.wahl@hpe.com>
-R:	Mike Travis <mike.travis@hpe.com>
 S:	Maintained
 F:	drivers/misc/sgi-xp/
 
@@ -23142,7 +23141,8 @@ F:	arch/x86/platform
 
 X86 PLATFORM UV HPE SUPERDOME FLEX
 M:	Steve Wahl <steve.wahl@hpe.com>
-R:	Mike Travis <mike.travis@hpe.com>
+R:	Justin Ernst <justin.ernst@hpe.com>
+R:	Kyle Meyer <kyle.meyer@hpe.com>
 R:	Dimitri Sivanich <dimitri.sivanich@hpe.com>
 R:	Russ Anderson <russ.anderson@hpe.com>
 S:	Supported

From 4f16443596f4d3dcf0474f8c753219bd6b9470f2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 10 Aug 2023 21:09:54 +0200
Subject: [PATCH 1036/2047] thermal: intel: intel_soc_dts_iosf: Always assume
 notification support

None of the existing callers of intel_soc_dts_iosf_init() passes
INTEL_SOC_DTS_INTERRUPT_NONE as the first argument to it, so the
notification local variable in it is always true and the
notification_support argument of add_dts_thermal_zone() is always
true either.

For this reason, drop the notification local variable from
intel_soc_dts_iosf_init() and the notification_support argument from
add_dts_thermal_zone() and rearrange the latter to always set
writable_trip_cnt and trip_mask.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/thermal/intel/intel_soc_dts_iosf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index 60bdca997c21..5c5790d6e786 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -247,12 +247,12 @@ static void remove_dts_thermal_zone(struct intel_soc_dts_sensor_entry *dts)
 }
 
 static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
-				bool notification_support, int read_only_trip_cnt)
+				int read_only_trip_cnt)
 {
 	char name[10];
 	unsigned long trip;
-	int trip_mask = 0;
-	int writable_trip_cnt = 0;
+	int writable_trip_cnt;
+	int trip_mask;
 	unsigned long ptps;
 	u32 store_ptps;
 	unsigned long i;
@@ -265,10 +265,9 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
 		goto err_ret;
 
 	dts->id = id;
-	if (notification_support) {
-		writable_trip_cnt = SOC_MAX_DTS_TRIPS - read_only_trip_cnt;
-		trip_mask = GENMASK(writable_trip_cnt - 1, 0);
-	}
+
+	writable_trip_cnt = SOC_MAX_DTS_TRIPS - read_only_trip_cnt;
+	trip_mask = GENMASK(writable_trip_cnt - 1, 0);
 
 	/* Check if the writable trip we provide is not used by BIOS */
 	ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
@@ -364,7 +363,6 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
 	enum intel_soc_dts_interrupt_type intr_type, int read_only_trip_count)
 {
 	struct intel_soc_dts_sensors *sensors;
-	bool notification;
 	int tj_max;
 	int ret;
 	int i;
@@ -387,14 +385,11 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
 	mutex_init(&sensors->dts_update_lock);
 	sensors->intr_type = intr_type;
 	sensors->tj_max = tj_max * 1000;
-	if (intr_type == INTEL_SOC_DTS_INTERRUPT_NONE)
-		notification = false;
-	else
-		notification = true;
+
 	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
 		sensors->soc_dts[i].sensors = sensors;
 		ret = add_dts_thermal_zone(i, &sensors->soc_dts[i],
-					   notification, read_only_trip_count);
+					   read_only_trip_count);
 		if (ret)
 			goto err_free;
 	}

From 0b28ba273ef3bdf26c933cbe2a624deb35c82aac Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 10 Aug 2023 21:11:26 +0200
Subject: [PATCH 1037/2047] thermal: intel: intel_soc_dts_iosf: Untangle
 update_trip_temp()

Function update_trip_temp() is currently used for the initialization
of trip points as well as for changing trip point temperatures in
sys_set_trip_temp().

This is quite confusing and passing the value of dts->trip_types[trip]
to it so that it can store that value in the same memory location is
not particularly useful, because it only is necessary to set the
trip point type once, at the initialization time.

For this reason, drop the last argument from update_trip_temp() and
introduce configure_trip() calling the former internally for the
initial configuration of trip points.

Modify the majority of update_trip_temp() callers to use
configure_trip() instead of it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/thermal/intel/intel_soc_dts_iosf.c | 37 ++++++++++++++--------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index 5c5790d6e786..1f4a0ae1fda0 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -67,8 +67,7 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd, int trip,
 }
 
 static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts,
-			    int thres_index, int temp,
-			    enum thermal_trip_type trip_type)
+			    int thres_index, int temp)
 {
 	int status;
 	u32 temp_out;
@@ -142,8 +141,6 @@ static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts,
 	if (status)
 		goto err_restore_te_out;
 
-	dts->trip_types[thres_index] = trip_type;
-
 	return 0;
 err_restore_te_out:
 	iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
@@ -159,6 +156,21 @@ err_restore_ptps:
 	return status;
 }
 
+static int configure_trip(struct intel_soc_dts_sensor_entry *dts,
+			  int thres_index, enum thermal_trip_type trip_type,
+			  int temp)
+{
+	int ret;
+
+	ret = update_trip_temp(dts, thres_index, temp);
+	if (ret)
+		return ret;
+
+	dts->trip_types[thres_index] = trip_type;
+
+	return 0;
+}
+
 static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
 			     int temp)
 {
@@ -170,8 +182,7 @@ static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
 		return -EINVAL;
 
 	mutex_lock(&sensors->dts_update_lock);
-	status = update_trip_temp(dts, trip, temp,
-				  dts->trip_types[trip]);
+	status = update_trip_temp(dts, trip, temp);
 	mutex_unlock(&sensors->dts_update_lock);
 
 	return status;
@@ -317,7 +328,7 @@ int intel_soc_dts_iosf_add_read_only_critical_trip(
 
 		j = find_first_zero_bit(&mask, SOC_MAX_DTS_TRIPS);
 		if (j < SOC_MAX_DTS_TRIPS)
-			return update_trip_temp(entry, j, temp, THERMAL_TRIP_CRITICAL);
+			return configure_trip(entry, j, THERMAL_TRIP_CRITICAL, temp);
 	}
 
 	return -EINVAL;
@@ -395,13 +406,13 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
 	}
 
 	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
-		ret = update_trip_temp(&sensors->soc_dts[i], 0, 0,
-				       THERMAL_TRIP_PASSIVE);
+		ret = configure_trip(&sensors->soc_dts[i], 0,
+				     THERMAL_TRIP_PASSIVE, 0);
 		if (ret)
 			goto err_remove_zone;
 
-		ret = update_trip_temp(&sensors->soc_dts[i], 1, 0,
-				       THERMAL_TRIP_PASSIVE);
+		ret = configure_trip(&sensors->soc_dts[i], 1,
+				     THERMAL_TRIP_PASSIVE, 0);
 		if (ret)
 			goto err_remove_zone;
 	}
@@ -422,8 +433,8 @@ void intel_soc_dts_iosf_exit(struct intel_soc_dts_sensors *sensors)
 	int i;
 
 	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
-		update_trip_temp(&sensors->soc_dts[i], 0, 0, 0);
-		update_trip_temp(&sensors->soc_dts[i], 1, 0, 0);
+		configure_trip(&sensors->soc_dts[i], 0, 0, 0);
+		configure_trip(&sensors->soc_dts[i], 1, 0, 0);
 		remove_dts_thermal_zone(&sensors->soc_dts[i]);
 	}
 	kfree(sensors);

From cbc280570438ce0e6bcb10288048eaa00762b3db Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 10 Aug 2023 21:12:32 +0200
Subject: [PATCH 1038/2047] thermal: intel: intel_soc_dts_iosf: Pass sensors to
 update_trip_temp()

After previous changes, update_trip_temp() only uses its dts argument to
get to the sensors field in the struct intel_soc_dts_sensor_entry object
pointed to by that argument, so pass the value of that field directly to
it instead.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/thermal/intel/intel_soc_dts_iosf.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index 1f4a0ae1fda0..afd1cbe280bf 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -66,7 +66,7 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd, int trip,
 	return 0;
 }
 
-static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts,
+static int update_trip_temp(struct intel_soc_dts_sensors *sensors,
 			    int thres_index, int temp)
 {
 	int status;
@@ -78,7 +78,6 @@ static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts,
 	u32 store_te_out;
 	u32 te_out;
 	u32 int_enable_bit = SOC_DTS_TE_APICA_ENABLE;
-	struct intel_soc_dts_sensors *sensors = dts->sensors;
 
 	if (sensors->intr_type == INTEL_SOC_DTS_INTERRUPT_MSI)
 		int_enable_bit |= SOC_DTS_TE_MSI_ENABLE;
@@ -162,7 +161,7 @@ static int configure_trip(struct intel_soc_dts_sensor_entry *dts,
 {
 	int ret;
 
-	ret = update_trip_temp(dts, thres_index, temp);
+	ret = update_trip_temp(dts->sensors, thres_index, temp);
 	if (ret)
 		return ret;
 
@@ -182,7 +181,7 @@ static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
 		return -EINVAL;
 
 	mutex_lock(&sensors->dts_update_lock);
-	status = update_trip_temp(dts, trip, temp);
+	status = update_trip_temp(sensors, trip, temp);
 	mutex_unlock(&sensors->dts_update_lock);
 
 	return status;

From 51f2aaf0dfb1a21281e2d47c94b1a5b245b033fa Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 10 Aug 2023 21:13:25 +0200
Subject: [PATCH 1039/2047] thermal: intel: intel_soc_dts_iosf: Change
 initialization ordering

The initial configuration of trip points in intel_soc_dts_iosf_init()
takes place after registering the sensor thermal zones which is
potentially problematic, because it may race with the setting of trip
point temperatures via sysfs, as there is no synchronization between it
and sys_set_trip_temp().

To address this, change the initialization ordering so that the trip
points are configured prior to the registration of thermal zones.

Accordingly, change the cleanup ordering in intel_soc_dts_iosf_exit()
to remove the thermal zones before resetting the trip points.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/thermal/intel/intel_soc_dts_iosf.c | 25 ++++++++++++++--------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index afd1cbe280bf..5ca2e565f9a3 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -398,30 +398,37 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
 
 	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
 		sensors->soc_dts[i].sensors = sensors;
-		ret = add_dts_thermal_zone(i, &sensors->soc_dts[i],
-					   read_only_trip_count);
-		if (ret)
-			goto err_free;
-	}
 
-	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
 		ret = configure_trip(&sensors->soc_dts[i], 0,
 				     THERMAL_TRIP_PASSIVE, 0);
 		if (ret)
-			goto err_remove_zone;
+			goto err_reset_trips;
 
 		ret = configure_trip(&sensors->soc_dts[i], 1,
 				     THERMAL_TRIP_PASSIVE, 0);
+		if (ret)
+			goto err_reset_trips;
+	}
+
+	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
+		ret = add_dts_thermal_zone(i, &sensors->soc_dts[i],
+					   read_only_trip_count);
 		if (ret)
 			goto err_remove_zone;
 	}
 
 	return sensors;
+
 err_remove_zone:
 	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i)
 		remove_dts_thermal_zone(&sensors->soc_dts[i]);
 
-err_free:
+err_reset_trips:
+	for (i = 0; i < SOC_MAX_DTS_SENSORS; i++) {
+		configure_trip(&sensors->soc_dts[i], 0, 0, 0);
+		configure_trip(&sensors->soc_dts[i], 1, 0, 0);
+	}
+
 	kfree(sensors);
 	return ERR_PTR(ret);
 }
@@ -432,9 +439,9 @@ void intel_soc_dts_iosf_exit(struct intel_soc_dts_sensors *sensors)
 	int i;
 
 	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
+		remove_dts_thermal_zone(&sensors->soc_dts[i]);
 		configure_trip(&sensors->soc_dts[i], 0, 0, 0);
 		configure_trip(&sensors->soc_dts[i], 1, 0, 0);
-		remove_dts_thermal_zone(&sensors->soc_dts[i]);
 	}
 	kfree(sensors);
 }

From 5bc3da35d7ad35e61d2b902f0fdc9b530bed3d89 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 10 Aug 2023 21:14:49 +0200
Subject: [PATCH 1040/2047] thermal: intel: intel_soc_dts_iosf: Add helper for
 resetting trip points

Because trip points are reset for each sensor in two places in the
same way, add a helper function for that to reduce code duplication
a bit.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/thermal/intel/intel_soc_dts_iosf.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index 5ca2e565f9a3..d1d2c77bbce7 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -369,6 +369,12 @@ void intel_soc_dts_iosf_interrupt_handler(struct intel_soc_dts_sensors *sensors)
 }
 EXPORT_SYMBOL_GPL(intel_soc_dts_iosf_interrupt_handler);
 
+static void dts_trips_reset(struct intel_soc_dts_sensors *sensors, int dts_index)
+{
+	configure_trip(&sensors->soc_dts[dts_index], 0, 0, 0);
+	configure_trip(&sensors->soc_dts[dts_index], 1, 0, 0);
+}
+
 struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
 	enum intel_soc_dts_interrupt_type intr_type, int read_only_trip_count)
 {
@@ -424,10 +430,8 @@ err_remove_zone:
 		remove_dts_thermal_zone(&sensors->soc_dts[i]);
 
 err_reset_trips:
-	for (i = 0; i < SOC_MAX_DTS_SENSORS; i++) {
-		configure_trip(&sensors->soc_dts[i], 0, 0, 0);
-		configure_trip(&sensors->soc_dts[i], 1, 0, 0);
-	}
+	for (i = 0; i < SOC_MAX_DTS_SENSORS; i++)
+		dts_trips_reset(sensors, i);
 
 	kfree(sensors);
 	return ERR_PTR(ret);
@@ -440,8 +444,7 @@ void intel_soc_dts_iosf_exit(struct intel_soc_dts_sensors *sensors)
 
 	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
 		remove_dts_thermal_zone(&sensors->soc_dts[i]);
-		configure_trip(&sensors->soc_dts[i], 0, 0, 0);
-		configure_trip(&sensors->soc_dts[i], 1, 0, 0);
+		dts_trips_reset(sensors, i);
 	}
 	kfree(sensors);
 }

From 02a49aaceff4a229d775b82ccaaafe59fe16b8f5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 10 Aug 2023 21:16:14 +0200
Subject: [PATCH 1041/2047] thermal: intel: intel_soc_dts_iosf: Rework critical
 trip setup

Critical trip points appear in the DTS thermal zones only after those
thermal zones have been registered via intel_soc_dts_iosf_init().
Moreover, they are "created" by changing the type of an existing trip
point from THERMAL_TRIP_PASSIVE to THERMAL_TRIP_CRITICAL via
intel_soc_dts_iosf_add_read_only_critical_trip(), the caller of which
has to be careful enough to pass at least 1 as the number of read-only
trip points to intel_soc_dts_iosf_init() beforehand.

This is questionable, because user space may have started to use the
trips at the time when intel_soc_dts_iosf_add_read_only_critical_trip()
runs and there is no synchronization between it and sys_set_trip_temp().

To address it, use the observation that nonzero number of read-only
trip points is only passed to intel_soc_dts_iosf_init() when critical
trip points are going to be used, so in fact that function may get all
of the information regarding the critical trip points upfront and it
can configure them before registering the corresponding thermal zones.

Accordingly, replace the read_only_trip_count argument of
intel_soc_dts_iosf_init() with a pair of new arguments related to
critical trip points: a bool one indicating whether or not critical
trip points are to be used at all and an int one representing the
critical trip point temperature offset relative to Tj_max.  Use these
arguments to configure the critical trip points before the registration
of the thermal zones and to compute the number of writeable trip points
in add_dts_thermal_zone().

Modify both callers of intel_soc_dts_iosf_init() to take these changes
into account and drop the intel_soc_dts_iosf_add_read_only_critical_trip()
call, that is not necessary any more, from intel_soc_thermal_init(),
which also allows it to return success right after requesting the IRQ.

Finally, drop intel_soc_dts_iosf_add_read_only_critical_trip()
altogether, because it does not have any more users.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 .../processor_thermal_device_pci_legacy.c     |  2 +-
 drivers/thermal/intel/intel_soc_dts_iosf.c    | 51 ++++++++-----------
 drivers/thermal/intel/intel_soc_dts_iosf.h    |  8 +--
 drivers/thermal/intel/intel_soc_dts_thermal.c | 17 +------
 4 files changed, 27 insertions(+), 51 deletions(-)

diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
index 46ec0e243692..16fd9df5f36d 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
@@ -59,7 +59,7 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev,
 		 * ACPI/MSR. So we don't want to fail for auxiliary DTSs.
 		 */
 		proc_priv->soc_dts = intel_soc_dts_iosf_init(
-					INTEL_SOC_DTS_INTERRUPT_MSI, 0);
+					INTEL_SOC_DTS_INTERRUPT_MSI, false, 0);
 
 		if (!IS_ERR(proc_priv->soc_dts) && pdev->irq) {
 			ret = pci_enable_msi(pdev);
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index d1d2c77bbce7..cb418a91100c 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -257,11 +257,11 @@ static void remove_dts_thermal_zone(struct intel_soc_dts_sensor_entry *dts)
 }
 
 static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
-				int read_only_trip_cnt)
+				bool critical_trip)
 {
+	int writable_trip_cnt = SOC_MAX_DTS_TRIPS;
 	char name[10];
 	unsigned long trip;
-	int writable_trip_cnt;
 	int trip_mask;
 	unsigned long ptps;
 	u32 store_ptps;
@@ -276,7 +276,9 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
 
 	dts->id = id;
 
-	writable_trip_cnt = SOC_MAX_DTS_TRIPS - read_only_trip_cnt;
+	if (critical_trip)
+		writable_trip_cnt--;
+
 	trip_mask = GENMASK(writable_trip_cnt - 1, 0);
 
 	/* Check if the writable trip we provide is not used by BIOS */
@@ -315,25 +317,6 @@ err_ret:
 	return ret;
 }
 
-int intel_soc_dts_iosf_add_read_only_critical_trip(
-	struct intel_soc_dts_sensors *sensors, int critical_offset)
-{
-	int i, j;
-
-	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
-		struct intel_soc_dts_sensor_entry *entry = &sensors->soc_dts[i];
-		int temp = sensors->tj_max - critical_offset;
-		unsigned long mask = entry->trip_mask;
-
-		j = find_first_zero_bit(&mask, SOC_MAX_DTS_TRIPS);
-		if (j < SOC_MAX_DTS_TRIPS)
-			return configure_trip(entry, j, THERMAL_TRIP_CRITICAL, temp);
-	}
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(intel_soc_dts_iosf_add_read_only_critical_trip);
-
 void intel_soc_dts_iosf_interrupt_handler(struct intel_soc_dts_sensors *sensors)
 {
 	u32 sticky_out;
@@ -375,8 +358,9 @@ static void dts_trips_reset(struct intel_soc_dts_sensors *sensors, int dts_index
 	configure_trip(&sensors->soc_dts[dts_index], 1, 0, 0);
 }
 
-struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
-	enum intel_soc_dts_interrupt_type intr_type, int read_only_trip_count)
+struct intel_soc_dts_sensors *
+intel_soc_dts_iosf_init(enum intel_soc_dts_interrupt_type intr_type,
+			bool critical_trip, int crit_offset)
 {
 	struct intel_soc_dts_sensors *sensors;
 	int tj_max;
@@ -386,9 +370,6 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
 	if (!iosf_mbi_available())
 		return ERR_PTR(-ENODEV);
 
-	if (read_only_trip_count > SOC_MAX_DTS_TRIPS)
-		return ERR_PTR(-EINVAL);
-
 	tj_max = intel_tcc_get_tjmax(-1);
 	if (tj_max < 0)
 		return ERR_PTR(tj_max);
@@ -403,6 +384,9 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
 	sensors->tj_max = tj_max * 1000;
 
 	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
+		enum thermal_trip_type trip_type;
+		int temp;
+
 		sensors->soc_dts[i].sensors = sensors;
 
 		ret = configure_trip(&sensors->soc_dts[i], 0,
@@ -410,15 +394,20 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
 		if (ret)
 			goto err_reset_trips;
 
-		ret = configure_trip(&sensors->soc_dts[i], 1,
-				     THERMAL_TRIP_PASSIVE, 0);
+		if (critical_trip) {
+			trip_type = THERMAL_TRIP_CRITICAL;
+			temp = sensors->tj_max - crit_offset;
+		} else {
+			trip_type = THERMAL_TRIP_PASSIVE;
+			temp = 0;
+		}
+		ret = configure_trip(&sensors->soc_dts[i], 1, trip_type, temp);
 		if (ret)
 			goto err_reset_trips;
 	}
 
 	for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
-		ret = add_dts_thermal_zone(i, &sensors->soc_dts[i],
-					   read_only_trip_count);
+		ret = add_dts_thermal_zone(i, &sensors->soc_dts[i], critical_trip);
 		if (ret)
 			goto err_remove_zone;
 	}
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.h b/drivers/thermal/intel/intel_soc_dts_iosf.h
index d10902e12592..739aa556dc43 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.h
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.h
@@ -42,11 +42,11 @@ struct intel_soc_dts_sensors {
 	struct intel_soc_dts_sensor_entry soc_dts[SOC_MAX_DTS_SENSORS];
 };
 
-struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
-	enum intel_soc_dts_interrupt_type intr_type, int read_only_trip_count);
+
+struct intel_soc_dts_sensors *
+intel_soc_dts_iosf_init(enum intel_soc_dts_interrupt_type intr_type,
+			bool critical_trip, int crit_offset);
 void intel_soc_dts_iosf_exit(struct intel_soc_dts_sensors *sensors);
 void intel_soc_dts_iosf_interrupt_handler(
 				struct intel_soc_dts_sensors *sensors);
-int intel_soc_dts_iosf_add_read_only_critical_trip(
-	struct intel_soc_dts_sensors *sensors, int critical_offset);
 #endif
diff --git a/drivers/thermal/intel/intel_soc_dts_thermal.c b/drivers/thermal/intel/intel_soc_dts_thermal.c
index 8e21e5213dff..9c825c6e1f38 100644
--- a/drivers/thermal/intel/intel_soc_dts_thermal.c
+++ b/drivers/thermal/intel/intel_soc_dts_thermal.c
@@ -51,7 +51,8 @@ static int __init intel_soc_thermal_init(void)
 		return -ENODEV;
 
 	/* Create a zone with 2 trips with marked as read only */
-	soc_dts = intel_soc_dts_iosf_init(INTEL_SOC_DTS_INTERRUPT_APIC, 1);
+	soc_dts = intel_soc_dts_iosf_init(INTEL_SOC_DTS_INTERRUPT_APIC, true,
+					  crit_offset);
 	if (IS_ERR(soc_dts)) {
 		err = PTR_ERR(soc_dts);
 		return err;
@@ -88,21 +89,7 @@ static int __init intel_soc_thermal_init(void)
 		}
 	}
 
-	err = intel_soc_dts_iosf_add_read_only_critical_trip(soc_dts,
-							     crit_offset);
-	if (err)
-		goto error_trips;
-
 	return 0;
-
-error_trips:
-	if (soc_dts_thres_irq) {
-		free_irq(soc_dts_thres_irq, soc_dts);
-		acpi_unregister_gsi(soc_dts_thres_gsi);
-	}
-	intel_soc_dts_iosf_exit(soc_dts);
-
-	return err;
 }
 
 static void __exit intel_soc_thermal_exit(void)

From 4effd28e61e768bcc2017c58cd23bd0846649ac4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 10 Aug 2023 21:17:36 +0200
Subject: [PATCH 1042/2047] thermal: intel: intel_soc_dts_iosf: Use struct
 thermal_trip

Because the number of trip points in each thermal zone and their
types are known to intel_soc_dts_iosf_init() prior to the registration
of the thermal zones, make it create an array of struct thermal_trip
entries in each struct intel_soc_dts_sensor_entry object and make
add_dts_thermal_zone() use thermal_zone_device_register_with_trips()
for thermal zone registration and pass that array as its second
argument.

Drop the sys_get_trip_temp() and sys_get_trip_type() callback
functions along with the respective callback pointers in
tzone_ops, because they are not necessary any more.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/thermal/intel/intel_soc_dts_iosf.c | 51 +++-------------------
 drivers/thermal/intel/intel_soc_dts_iosf.h |  2 +-
 2 files changed, 8 insertions(+), 45 deletions(-)

diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index cb418a91100c..7a66d0f077b0 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -40,32 +40,6 @@
 /* Mask for two trips in status bits */
 #define SOC_DTS_TRIP_MASK		0x03
 
-static int sys_get_trip_temp(struct thermal_zone_device *tzd, int trip,
-			     int *temp)
-{
-	int status;
-	u32 out;
-	struct intel_soc_dts_sensor_entry *dts;
-	struct intel_soc_dts_sensors *sensors;
-
-	dts = thermal_zone_device_priv(tzd);
-	sensors = dts->sensors;
-	mutex_lock(&sensors->dts_update_lock);
-	status = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
-			       SOC_DTS_OFFSET_PTPS, &out);
-	mutex_unlock(&sensors->dts_update_lock);
-	if (status)
-		return status;
-
-	out = (out >> (trip * 8)) & SOC_DTS_TJMAX_ENCODING;
-	if (!out)
-		*temp = 0;
-	else
-		*temp = sensors->tj_max - out * 1000;
-
-	return 0;
-}
-
 static int update_trip_temp(struct intel_soc_dts_sensors *sensors,
 			    int thres_index, int temp)
 {
@@ -165,7 +139,8 @@ static int configure_trip(struct intel_soc_dts_sensor_entry *dts,
 	if (ret)
 		return ret;
 
-	dts->trip_types[thres_index] = trip_type;
+	dts->trips[thres_index].temperature = temp;
+	dts->trips[thres_index].type = trip_type;
 
 	return 0;
 }
@@ -187,16 +162,6 @@ static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
 	return status;
 }
 
-static int sys_get_trip_type(struct thermal_zone_device *tzd,
-			     int trip, enum thermal_trip_type *type)
-{
-	struct intel_soc_dts_sensor_entry *dts = thermal_zone_device_priv(tzd);
-
-	*type = dts->trip_types[trip];
-
-	return 0;
-}
-
 static int sys_get_curr_temp(struct thermal_zone_device *tzd,
 			     int *temp)
 {
@@ -221,8 +186,6 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd,
 
 static struct thermal_zone_device_ops tzone_ops = {
 	.get_temp = sys_get_curr_temp,
-	.get_trip_temp = sys_get_trip_temp,
-	.get_trip_type = sys_get_trip_type,
 	.set_trip_temp = sys_set_trip_temp,
 };
 
@@ -293,11 +256,11 @@ static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
 	}
 	dts->trip_mask = trip_mask;
 	snprintf(name, sizeof(name), "soc_dts%d", id);
-	dts->tzone = thermal_zone_device_register(name,
-						  SOC_MAX_DTS_TRIPS,
-						  trip_mask,
-						  dts, &tzone_ops,
-						  NULL, 0, 0);
+	dts->tzone = thermal_zone_device_register_with_trips(name, dts->trips,
+							     SOC_MAX_DTS_TRIPS,
+							     trip_mask,
+							     dts, &tzone_ops,
+							     NULL, 0, 0);
 	if (IS_ERR(dts->tzone)) {
 		ret = PTR_ERR(dts->tzone);
 		goto err_ret;
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.h b/drivers/thermal/intel/intel_soc_dts_iosf.h
index 739aa556dc43..162841df0ebe 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.h
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.h
@@ -29,7 +29,7 @@ struct intel_soc_dts_sensor_entry {
 	int id;
 	u32 store_status;
 	u32 trip_mask;
-	enum thermal_trip_type trip_types[SOC_MAX_DTS_TRIPS];
+	struct thermal_trip trips[SOC_MAX_DTS_TRIPS];
 	struct thermal_zone_device *tzone;
 	struct intel_soc_dts_sensors *sensors;
 };

From a40fe1ffb69b709835a0623959f95d6c81ff38c1 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Fri, 11 Aug 2023 15:14:26 +0200
Subject: [PATCH 1043/2047] gpio: sim: simplify
 gpio_sim_device_config_live_store()

Simplify the logic when checking the current live value against the user
input.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-sim.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index 1a3729eb44eb..1fbbb49985f6 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -986,8 +986,7 @@ gpio_sim_device_config_live_store(struct config_item *item,
 
 	mutex_lock(&dev->lock);
 
-	if ((!live && !gpio_sim_device_is_live_unlocked(dev)) ||
-	    (live && gpio_sim_device_is_live_unlocked(dev)))
+	if (live == gpio_sim_device_is_live_unlocked(dev))
 		ret = -EPERM;
 	else if (live)
 		ret = gpio_sim_device_activate_unlocked(dev);

From e5a8192942da0026f28a1713684bd46eded04c92 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 12 Aug 2023 21:58:38 +0200
Subject: [PATCH 1044/2047] x86/mm: Remove unused microcode.h include

No usage for anything in that header.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230812195727.600549655@linutronix.de
---
 arch/x86/mm/init.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 8192452d1d2d..f1697a522845 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -20,7 +20,6 @@
 #include <asm/tlb.h>
 #include <asm/proto.h>
 #include <asm/dma.h>		/* for MAX_DMA_PFN */
-#include <asm/microcode.h>
 #include <asm/kaslr.h>
 #include <asm/hypervisor.h>
 #include <asm/cpufeature.h>

From e6bcfdd75d53390a67f67237f4eafc77d9772056 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 10 Aug 2023 20:37:29 +0200
Subject: [PATCH 1045/2047] x86/microcode: Hide the config knob

In reality CONFIG_MICROCODE is enabled in any reasonable configuration when
Intel or AMD support is enabled. Accommodate to reality.

Suggested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230812195727.660453052@linutronix.de
---
 arch/x86/Kconfig                       | 38 +-------------------------
 arch/x86/include/asm/microcode.h       |  6 ++--
 arch/x86/include/asm/microcode_amd.h   |  2 +-
 arch/x86/include/asm/microcode_intel.h |  2 +-
 arch/x86/kernel/cpu/microcode/Makefile |  4 +--
 5 files changed, 8 insertions(+), 44 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7422db409770..ae6503cf9bc9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1308,44 +1308,8 @@ config X86_REBOOTFIXUPS
 	  Say N otherwise.
 
 config MICROCODE
-	bool "CPU microcode loading support"
-	default y
+	def_bool y
 	depends on CPU_SUP_AMD || CPU_SUP_INTEL
-	help
-	  If you say Y here, you will be able to update the microcode on
-	  Intel and AMD processors. The Intel support is for the IA32 family,
-	  e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The
-	  AMD support is for families 0x10 and later. You will obviously need
-	  the actual microcode binary data itself which is not shipped with
-	  the Linux kernel.
-
-	  The preferred method to load microcode from a detached initrd is described
-	  in Documentation/arch/x86/microcode.rst. For that you need to enable
-	  CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
-	  initrd for microcode blobs.
-
-	  In addition, you can build the microcode into the kernel. For that you
-	  need to add the vendor-supplied microcode to the CONFIG_EXTRA_FIRMWARE
-	  config option.
-
-config MICROCODE_INTEL
-	bool "Intel microcode loading support"
-	depends on CPU_SUP_INTEL && MICROCODE
-	default MICROCODE
-	help
-	  This options enables microcode patch loading support for Intel
-	  processors.
-
-	  For the current Intel microcode data package go to
-	  <https://downloadcenter.intel.com> and search for
-	  'Linux Processor Microcode Data File'.
-
-config MICROCODE_AMD
-	bool "AMD microcode loading support"
-	depends on CPU_SUP_AMD && MICROCODE
-	help
-	  If you select this option, microcode patch loading support for AMD
-	  processors will be enabled.
 
 config MICROCODE_LATE_LOADING
 	bool "Late microcode loading (DANGEROUS)"
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 320566a0443d..0deab6c96d02 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -54,16 +54,16 @@ struct ucode_cpu_info {
 extern struct ucode_cpu_info ucode_cpu_info[];
 struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa);
 
-#ifdef CONFIG_MICROCODE_INTEL
+#ifdef CONFIG_CPU_SUP_INTEL
 extern struct microcode_ops * __init init_intel_microcode(void);
 #else
 static inline struct microcode_ops * __init init_intel_microcode(void)
 {
 	return NULL;
 }
-#endif /* CONFIG_MICROCODE_INTEL */
+#endif /* CONFIG_CPU_SUP_INTEL */
 
-#ifdef CONFIG_MICROCODE_AMD
+#ifdef CONFIG_CPU_SUP_AMD
 extern struct microcode_ops * __init init_amd_microcode(void);
 extern void __exit exit_amd_microcode(void);
 #else
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index a995b7685223..58e9c83732c9 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -43,7 +43,7 @@ struct microcode_amd {
 
 #define PATCH_MAX_SIZE (3 * PAGE_SIZE)
 
-#ifdef CONFIG_MICROCODE_AMD
+#ifdef CONFIG_CPU_SUP_AMD
 extern void load_ucode_amd_early(unsigned int cpuid_1_eax);
 extern int __init save_microcode_in_initrd_amd(unsigned int family);
 void reload_ucode_amd(unsigned int cpu);
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index f1fa979e05bf..a279dee8f898 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -71,7 +71,7 @@ static inline u32 intel_get_microcode_revision(void)
 	return rev;
 }
 
-#ifdef CONFIG_MICROCODE_INTEL
+#ifdef CONFIG_CPU_SUP_INTEL
 extern void __init load_ucode_intel_bsp(void);
 extern void load_ucode_intel_ap(void);
 extern void show_ucode_info_early(void);
diff --git a/arch/x86/kernel/cpu/microcode/Makefile b/arch/x86/kernel/cpu/microcode/Makefile
index 34098d48c48f..193d98b33a0a 100644
--- a/arch/x86/kernel/cpu/microcode/Makefile
+++ b/arch/x86/kernel/cpu/microcode/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 microcode-y				:= core.o
 obj-$(CONFIG_MICROCODE)			+= microcode.o
-microcode-$(CONFIG_MICROCODE_INTEL)	+= intel.o
-microcode-$(CONFIG_MICROCODE_AMD)	+= amd.o
+microcode-$(CONFIG_CPU_SUP_INTEL)	+= intel.o
+microcode-$(CONFIG_CPU_SUP_AMD)		+= amd.o

From 4da2131fac10b6928da0b2e39b65189203810f44 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 12 Aug 2023 21:58:41 +0200
Subject: [PATCH 1046/2047] x86/microcode/intel: Move microcode functions out
 of cpu/intel.c

There is really no point to have that in the CPUID evaluation code. Move it
into the Intel-specific microcode handling along with the data
structures, defines and helpers required by it. The exports need to stay
for IFS.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230812195727.719202319@linutronix.de
---
 arch/x86/include/asm/microcode_intel.h |  28 +---
 arch/x86/kernel/cpu/intel.c            | 174 ---------------------
 arch/x86/kernel/cpu/microcode/intel.c  | 202 +++++++++++++++++++++++++
 3 files changed, 204 insertions(+), 200 deletions(-)

diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index a279dee8f898..4a32eadb95bf 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -23,39 +23,15 @@ struct microcode_intel {
 	unsigned int            bits[];
 };
 
-/* microcode format is extended from prescott processors */
-struct extended_signature {
-	unsigned int            sig;
-	unsigned int            pf;
-	unsigned int            cksum;
-};
-
-struct extended_sigtable {
-	unsigned int            count;
-	unsigned int            cksum;
-	unsigned int            reserved[3];
-	struct extended_signature sigs[];
-};
-
-#define DEFAULT_UCODE_DATASIZE	(2000)
-#define MC_HEADER_SIZE		(sizeof(struct microcode_header_intel))
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
-#define EXT_HEADER_SIZE		(sizeof(struct extended_sigtable))
-#define EXT_SIGNATURE_SIZE	(sizeof(struct extended_signature))
+#define MC_HEADER_SIZE			(sizeof(struct microcode_header_intel))
 #define MC_HEADER_TYPE_MICROCODE	1
 #define MC_HEADER_TYPE_IFS		2
-
-#define get_totalsize(mc) \
-	(((struct microcode_intel *)mc)->hdr.datasize ? \
-	 ((struct microcode_intel *)mc)->hdr.totalsize : \
-	 DEFAULT_UCODE_TOTALSIZE)
+#define DEFAULT_UCODE_DATASIZE		(2000)
 
 #define get_datasize(mc) \
 	(((struct microcode_intel *)mc)->hdr.datasize ? \
 	 ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
 
-#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
-
 static inline u32 intel_get_microcode_revision(void)
 {
 	u32 rev, dummy;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 1c4639588ff9..5a8f56942a71 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -184,180 +184,6 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
 	return false;
 }
 
-int intel_cpu_collect_info(struct ucode_cpu_info *uci)
-{
-	unsigned int val[2];
-	unsigned int family, model;
-	struct cpu_signature csig = { 0 };
-	unsigned int eax, ebx, ecx, edx;
-
-	memset(uci, 0, sizeof(*uci));
-
-	eax = 0x00000001;
-	ecx = 0;
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-	csig.sig = eax;
-
-	family = x86_family(eax);
-	model  = x86_model(eax);
-
-	if (model >= 5 || family > 6) {
-		/* get processor flags from MSR 0x17 */
-		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
-		csig.pf = 1 << ((val[1] >> 18) & 7);
-	}
-
-	csig.rev = intel_get_microcode_revision();
-
-	uci->cpu_sig = csig;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(intel_cpu_collect_info);
-
-/*
- * Returns 1 if update has been found, 0 otherwise.
- */
-int intel_find_matching_signature(void *mc, unsigned int csig, int cpf)
-{
-	struct microcode_header_intel *mc_hdr = mc;
-	struct extended_sigtable *ext_hdr;
-	struct extended_signature *ext_sig;
-	int i;
-
-	if (intel_cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf))
-		return 1;
-
-	/* Look for ext. headers: */
-	if (get_totalsize(mc_hdr) <= get_datasize(mc_hdr) + MC_HEADER_SIZE)
-		return 0;
-
-	ext_hdr = mc + get_datasize(mc_hdr) + MC_HEADER_SIZE;
-	ext_sig = (void *)ext_hdr + EXT_HEADER_SIZE;
-
-	for (i = 0; i < ext_hdr->count; i++) {
-		if (intel_cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf))
-			return 1;
-		ext_sig++;
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(intel_find_matching_signature);
-
-/**
- * intel_microcode_sanity_check() - Sanity check microcode file.
- * @mc: Pointer to the microcode file contents.
- * @print_err: Display failure reason if true, silent if false.
- * @hdr_type: Type of file, i.e. normal microcode file or In Field Scan file.
- *            Validate if the microcode header type matches with the type
- *            specified here.
- *
- * Validate certain header fields and verify if computed checksum matches
- * with the one specified in the header.
- *
- * Return: 0 if the file passes all the checks, -EINVAL if any of the checks
- * fail.
- */
-int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type)
-{
-	unsigned long total_size, data_size, ext_table_size;
-	struct microcode_header_intel *mc_header = mc;
-	struct extended_sigtable *ext_header = NULL;
-	u32 sum, orig_sum, ext_sigcount = 0, i;
-	struct extended_signature *ext_sig;
-
-	total_size = get_totalsize(mc_header);
-	data_size = get_datasize(mc_header);
-
-	if (data_size + MC_HEADER_SIZE > total_size) {
-		if (print_err)
-			pr_err("Error: bad microcode data file size.\n");
-		return -EINVAL;
-	}
-
-	if (mc_header->ldrver != 1 || mc_header->hdrver != hdr_type) {
-		if (print_err)
-			pr_err("Error: invalid/unknown microcode update format. Header type %d\n",
-			       mc_header->hdrver);
-		return -EINVAL;
-	}
-
-	ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
-	if (ext_table_size) {
-		u32 ext_table_sum = 0;
-		u32 *ext_tablep;
-
-		if (ext_table_size < EXT_HEADER_SIZE ||
-		    ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
-			if (print_err)
-				pr_err("Error: truncated extended signature table.\n");
-			return -EINVAL;
-		}
-
-		ext_header = mc + MC_HEADER_SIZE + data_size;
-		if (ext_table_size != exttable_size(ext_header)) {
-			if (print_err)
-				pr_err("Error: extended signature table size mismatch.\n");
-			return -EFAULT;
-		}
-
-		ext_sigcount = ext_header->count;
-
-		/*
-		 * Check extended table checksum: the sum of all dwords that
-		 * comprise a valid table must be 0.
-		 */
-		ext_tablep = (u32 *)ext_header;
-
-		i = ext_table_size / sizeof(u32);
-		while (i--)
-			ext_table_sum += ext_tablep[i];
-
-		if (ext_table_sum) {
-			if (print_err)
-				pr_warn("Bad extended signature table checksum, aborting.\n");
-			return -EINVAL;
-		}
-	}
-
-	/*
-	 * Calculate the checksum of update data and header. The checksum of
-	 * valid update data and header including the extended signature table
-	 * must be 0.
-	 */
-	orig_sum = 0;
-	i = (MC_HEADER_SIZE + data_size) / sizeof(u32);
-	while (i--)
-		orig_sum += ((u32 *)mc)[i];
-
-	if (orig_sum) {
-		if (print_err)
-			pr_err("Bad microcode data checksum, aborting.\n");
-		return -EINVAL;
-	}
-
-	if (!ext_table_size)
-		return 0;
-
-	/*
-	 * Check extended signature checksum: 0 => valid.
-	 */
-	for (i = 0; i < ext_sigcount; i++) {
-		ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
-			  EXT_SIGNATURE_SIZE * i;
-
-		sum = (mc_header->sig + mc_header->pf + mc_header->cksum) -
-		      (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
-		if (sum) {
-			if (print_err)
-				pr_err("Bad extended signature checksum, aborting.\n");
-			return -EINVAL;
-		}
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(intel_microcode_sanity_check);
-
 static void early_init_intel(struct cpuinfo_x86 *c)
 {
 	u64 misc_enable;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 467cf37ea90a..952f1733583e 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -45,6 +45,208 @@ static struct microcode_intel *intel_ucode_patch;
 /* last level cache size per core */
 static int llc_size_per_core;
 
+/* microcode format is extended from prescott processors */
+struct extended_signature {
+	unsigned int	sig;
+	unsigned int	pf;
+	unsigned int	cksum;
+};
+
+struct extended_sigtable {
+	unsigned int			count;
+	unsigned int			cksum;
+	unsigned int			reserved[3];
+	struct extended_signature	sigs[];
+};
+
+#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
+#define EXT_HEADER_SIZE		(sizeof(struct extended_sigtable))
+#define EXT_SIGNATURE_SIZE	(sizeof(struct extended_signature))
+
+static inline unsigned int get_totalsize(struct microcode_header_intel *hdr)
+{
+	return hdr->datasize ? hdr->totalsize : DEFAULT_UCODE_TOTALSIZE;
+}
+
+static inline unsigned int exttable_size(struct extended_sigtable *et)
+{
+	return et->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE;
+}
+
+int intel_cpu_collect_info(struct ucode_cpu_info *uci)
+{
+	unsigned int val[2];
+	unsigned int family, model;
+	struct cpu_signature csig = { 0 };
+	unsigned int eax, ebx, ecx, edx;
+
+	memset(uci, 0, sizeof(*uci));
+
+	eax = 0x00000001;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+	csig.sig = eax;
+
+	family = x86_family(eax);
+	model  = x86_model(eax);
+
+	if (model >= 5 || family > 6) {
+		/* get processor flags from MSR 0x17 */
+		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+		csig.pf = 1 << ((val[1] >> 18) & 7);
+	}
+
+	csig.rev = intel_get_microcode_revision();
+
+	uci->cpu_sig = csig;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_cpu_collect_info);
+
+/*
+ * Returns 1 if update has been found, 0 otherwise.
+ */
+int intel_find_matching_signature(void *mc, unsigned int csig, int cpf)
+{
+	struct microcode_header_intel *mc_hdr = mc;
+	struct extended_sigtable *ext_hdr;
+	struct extended_signature *ext_sig;
+	int i;
+
+	if (intel_cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf))
+		return 1;
+
+	/* Look for ext. headers: */
+	if (get_totalsize(mc_hdr) <= get_datasize(mc_hdr) + MC_HEADER_SIZE)
+		return 0;
+
+	ext_hdr = mc + get_datasize(mc_hdr) + MC_HEADER_SIZE;
+	ext_sig = (void *)ext_hdr + EXT_HEADER_SIZE;
+
+	for (i = 0; i < ext_hdr->count; i++) {
+		if (intel_cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf))
+			return 1;
+		ext_sig++;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_find_matching_signature);
+
+/**
+ * intel_microcode_sanity_check() - Sanity check microcode file.
+ * @mc: Pointer to the microcode file contents.
+ * @print_err: Display failure reason if true, silent if false.
+ * @hdr_type: Type of file, i.e. normal microcode file or In Field Scan file.
+ *            Validate if the microcode header type matches with the type
+ *            specified here.
+ *
+ * Validate certain header fields and verify if computed checksum matches
+ * with the one specified in the header.
+ *
+ * Return: 0 if the file passes all the checks, -EINVAL if any of the checks
+ * fail.
+ */
+int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type)
+{
+	unsigned long total_size, data_size, ext_table_size;
+	struct microcode_header_intel *mc_header = mc;
+	struct extended_sigtable *ext_header = NULL;
+	u32 sum, orig_sum, ext_sigcount = 0, i;
+	struct extended_signature *ext_sig;
+
+	total_size = get_totalsize(mc_header);
+	data_size = get_datasize(mc_header);
+
+	if (data_size + MC_HEADER_SIZE > total_size) {
+		if (print_err)
+			pr_err("Error: bad microcode data file size.\n");
+		return -EINVAL;
+	}
+
+	if (mc_header->ldrver != 1 || mc_header->hdrver != hdr_type) {
+		if (print_err)
+			pr_err("Error: invalid/unknown microcode update format. Header type %d\n",
+			       mc_header->hdrver);
+		return -EINVAL;
+	}
+
+	ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
+	if (ext_table_size) {
+		u32 ext_table_sum = 0;
+		u32 *ext_tablep;
+
+		if (ext_table_size < EXT_HEADER_SIZE ||
+		    ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
+			if (print_err)
+				pr_err("Error: truncated extended signature table.\n");
+			return -EINVAL;
+		}
+
+		ext_header = mc + MC_HEADER_SIZE + data_size;
+		if (ext_table_size != exttable_size(ext_header)) {
+			if (print_err)
+				pr_err("Error: extended signature table size mismatch.\n");
+			return -EFAULT;
+		}
+
+		ext_sigcount = ext_header->count;
+
+		/*
+		 * Check extended table checksum: the sum of all dwords that
+		 * comprise a valid table must be 0.
+		 */
+		ext_tablep = (u32 *)ext_header;
+
+		i = ext_table_size / sizeof(u32);
+		while (i--)
+			ext_table_sum += ext_tablep[i];
+
+		if (ext_table_sum) {
+			if (print_err)
+				pr_warn("Bad extended signature table checksum, aborting.\n");
+			return -EINVAL;
+		}
+	}
+
+	/*
+	 * Calculate the checksum of update data and header. The checksum of
+	 * valid update data and header including the extended signature table
+	 * must be 0.
+	 */
+	orig_sum = 0;
+	i = (MC_HEADER_SIZE + data_size) / sizeof(u32);
+	while (i--)
+		orig_sum += ((u32 *)mc)[i];
+
+	if (orig_sum) {
+		if (print_err)
+			pr_err("Bad microcode data checksum, aborting.\n");
+		return -EINVAL;
+	}
+
+	if (!ext_table_size)
+		return 0;
+
+	/*
+	 * Check extended signature checksum: 0 => valid.
+	 */
+	for (i = 0; i < ext_sigcount; i++) {
+		ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
+			  EXT_SIGNATURE_SIZE * i;
+
+		sum = (mc_header->sig + mc_header->pf + mc_header->cksum) -
+		      (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
+		if (sum) {
+			if (print_err)
+				pr_err("Bad extended signature checksum, aborting.\n");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_microcode_sanity_check);
+
 /*
  * Returns 1 if update has been found, 0 otherwise.
  */

From 82ad097b02b6ab7985375b925efd06ac042caf1d Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Sat, 12 Aug 2023 21:58:42 +0200
Subject: [PATCH 1047/2047] x86/microcode: Include vendor headers into
 microcode.h

Currently vendor specific headers are included explicitly when used in
common code. Instead, include the vendor specific headers in
microcode.h, and include that in all usages.

No functional change.

Suggested-by: Boris Petkov <bp@alien8.de>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230812195727.776541545@linutronix.de
---
 arch/x86/include/asm/microcode.h       | 5 ++++-
 arch/x86/include/asm/microcode_amd.h   | 2 --
 arch/x86/include/asm/microcode_intel.h | 2 --
 arch/x86/kernel/cpu/common.c           | 1 -
 arch/x86/kernel/cpu/intel.c            | 2 +-
 arch/x86/kernel/cpu/microcode/amd.c    | 1 -
 arch/x86/kernel/cpu/microcode/core.c   | 2 --
 arch/x86/kernel/cpu/microcode/intel.c  | 2 +-
 drivers/platform/x86/intel/ifs/load.c  | 2 +-
 9 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 0deab6c96d02..9c713b74b1b4 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -2,10 +2,13 @@
 #ifndef _ASM_X86_MICROCODE_H
 #define _ASM_X86_MICROCODE_H
 
-#include <asm/cpu.h>
 #include <linux/earlycpio.h>
 #include <linux/initrd.h>
 
+#include <asm/cpu.h>
+#include <asm/microcode_amd.h>
+#include <asm/microcode_intel.h>
+
 struct ucode_patch {
 	struct list_head plist;
 	void *data;		/* Intel uses only this one */
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index 58e9c83732c9..febe9bf0c708 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -2,8 +2,6 @@
 #ifndef _ASM_X86_MICROCODE_AMD_H
 #define _ASM_X86_MICROCODE_AMD_H
 
-#include <asm/microcode.h>
-
 #define UCODE_MAGIC			0x00414d44
 #define UCODE_EQUIV_CPU_TABLE_TYPE	0x00000000
 #define UCODE_UCODE_TYPE		0x00000001
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 4a32eadb95bf..9951a4b7c481 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -2,8 +2,6 @@
 #ifndef _ASM_X86_MICROCODE_INTEL_H
 #define _ASM_X86_MICROCODE_INTEL_H
 
-#include <asm/microcode.h>
-
 struct microcode_header_intel {
 	unsigned int            hdrver;
 	unsigned int            rev;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 06015f304cb8..1ea5f822a7ca 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -59,7 +59,6 @@
 #include <asm/cacheinfo.h>
 #include <asm/memtype.h>
 #include <asm/microcode.h>
-#include <asm/microcode_intel.h>
 #include <asm/intel-family.h>
 #include <asm/cpu_device_id.h>
 #include <asm/uv/uv.h>
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 5a8f56942a71..be4045628fd3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -20,7 +20,7 @@
 #include <asm/bugs.h>
 #include <asm/cpu.h>
 #include <asm/intel-family.h>
-#include <asm/microcode_intel.h>
+#include <asm/microcode.h>
 #include <asm/hwcap2.h>
 #include <asm/elf.h>
 #include <asm/cpu_device_id.h>
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index a28b103256ff..dc0a3bed5f6e 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -29,7 +29,6 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 
-#include <asm/microcode_amd.h>
 #include <asm/microcode.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index c9a53e330df3..73a3c22773c2 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -31,9 +31,7 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 
-#include <asm/microcode_intel.h>
 #include <asm/cpu_device_id.h>
-#include <asm/microcode_amd.h>
 #include <asm/perf_event.h>
 #include <asm/microcode.h>
 #include <asm/processor.h>
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 952f1733583e..171d96b632d8 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -30,9 +30,9 @@
 #include <linux/uio.h>
 #include <linux/mm.h>
 
-#include <asm/microcode_intel.h>
 #include <asm/intel-family.h>
 #include <asm/processor.h>
+#include <asm/microcode.h>
 #include <asm/tlbflush.h>
 #include <asm/setup.h>
 #include <asm/msr.h>
diff --git a/drivers/platform/x86/intel/ifs/load.c b/drivers/platform/x86/intel/ifs/load.c
index e6ae8265f3a3..390862ab0357 100644
--- a/drivers/platform/x86/intel/ifs/load.c
+++ b/drivers/platform/x86/intel/ifs/load.c
@@ -3,7 +3,7 @@
 
 #include <linux/firmware.h>
 #include <asm/cpu.h>
-#include <asm/microcode_intel.h>
+#include <asm/microcode.h>
 
 #include "ifs.h"
 

From 18648dbd3338b75e7b2895d725ee3a7187d6e3a3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 12 Aug 2023 21:58:44 +0200
Subject: [PATCH 1048/2047] x86/microcode: Make reload_early_microcode() static

fe055896c040 ("x86/microcode: Merge the early microcode loader") left this
needlessly public. Git archaeology provided by Borislav.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230812195727.834943153@linutronix.de
---
 arch/x86/include/asm/microcode.h     | 2 --
 arch/x86/kernel/cpu/microcode/core.c | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 9c713b74b1b4..166a633e2796 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -128,13 +128,11 @@ static inline unsigned int x86_cpuid_family(void)
 #ifdef CONFIG_MICROCODE
 extern void __init load_ucode_bsp(void);
 extern void load_ucode_ap(void);
-void reload_early_microcode(unsigned int cpu);
 extern bool initrd_gone;
 void microcode_bsp_resume(void);
 #else
 static inline void __init load_ucode_bsp(void)			{ }
 static inline void load_ucode_ap(void)				{ }
-static inline void reload_early_microcode(unsigned int cpu)	{ }
 static inline void microcode_bsp_resume(void)			{ }
 #endif
 
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 73a3c22773c2..75c5bc705d76 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -293,7 +293,7 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
 #endif
 }
 
-void reload_early_microcode(unsigned int cpu)
+static void reload_early_microcode(unsigned int cpu)
 {
 	int vendor, family;
 

From b0e67db12d769cc308a50c1c0ac3721c4f6aead7 Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Sat, 12 Aug 2023 21:58:45 +0200
Subject: [PATCH 1049/2047] x86/microcode/intel: Rename get_datasize() since
 its used externally

Rename get_datasize() to intel_microcode_get_datasize() and make it an inline.

  [ tglx: Make the argument typed and fix up the IFS code ]

Suggested-by: Boris Petkov <bp@alien8.de>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230812195727.894165745@linutronix.de
---
 arch/x86/include/asm/microcode_intel.h | 7 ++++---
 arch/x86/kernel/cpu/microcode/intel.c  | 8 ++++----
 drivers/platform/x86/intel/ifs/load.c  | 5 +++--
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 9951a4b7c481..7bd4c2960c75 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -26,9 +26,10 @@ struct microcode_intel {
 #define MC_HEADER_TYPE_IFS		2
 #define DEFAULT_UCODE_DATASIZE		(2000)
 
-#define get_datasize(mc) \
-	(((struct microcode_intel *)mc)->hdr.datasize ? \
-	 ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
+static inline int intel_microcode_get_datasize(struct microcode_header_intel *hdr)
+{
+	return hdr->datasize ? : DEFAULT_UCODE_DATASIZE;
+}
 
 static inline u32 intel_get_microcode_revision(void)
 {
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 171d96b632d8..98666729b80e 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -118,10 +118,10 @@ int intel_find_matching_signature(void *mc, unsigned int csig, int cpf)
 		return 1;
 
 	/* Look for ext. headers: */
-	if (get_totalsize(mc_hdr) <= get_datasize(mc_hdr) + MC_HEADER_SIZE)
+	if (get_totalsize(mc_hdr) <= intel_microcode_get_datasize(mc_hdr) + MC_HEADER_SIZE)
 		return 0;
 
-	ext_hdr = mc + get_datasize(mc_hdr) + MC_HEADER_SIZE;
+	ext_hdr = mc + intel_microcode_get_datasize(mc_hdr) + MC_HEADER_SIZE;
 	ext_sig = (void *)ext_hdr + EXT_HEADER_SIZE;
 
 	for (i = 0; i < ext_hdr->count; i++) {
@@ -156,7 +156,7 @@ int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type)
 	struct extended_signature *ext_sig;
 
 	total_size = get_totalsize(mc_header);
-	data_size = get_datasize(mc_header);
+	data_size = intel_microcode_get_datasize(mc_header);
 
 	if (data_size + MC_HEADER_SIZE > total_size) {
 		if (print_err)
@@ -438,7 +438,7 @@ static void show_saved_mc(void)
 		date	= mc_saved_header->date;
 
 		total_size	= get_totalsize(mc_saved_header);
-		data_size	= get_datasize(mc_saved_header);
+		data_size	= intel_microcode_get_datasize(mc_saved_header);
 
 		pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, total size=0x%x, date = %04x-%02x-%02x\n",
 			 i++, sig, pf, rev, total_size,
diff --git a/drivers/platform/x86/intel/ifs/load.c b/drivers/platform/x86/intel/ifs/load.c
index 390862ab0357..cefd0d886cfd 100644
--- a/drivers/platform/x86/intel/ifs/load.c
+++ b/drivers/platform/x86/intel/ifs/load.c
@@ -56,12 +56,13 @@ struct metadata_header {
 
 static struct metadata_header *find_meta_data(void *ucode, unsigned int meta_type)
 {
+	struct microcode_header_intel *hdr = &((struct microcode_intel *)ucode)->hdr;
 	struct metadata_header *meta_header;
 	unsigned long data_size, total_meta;
 	unsigned long meta_size = 0;
 
-	data_size = get_datasize(ucode);
-	total_meta = ((struct microcode_intel *)ucode)->hdr.metasize;
+	data_size = intel_microcode_get_datasize(hdr);
+	total_meta = hdr->metasize;
 	if (!total_meta)
 		return NULL;
 

From d02a0efd0f471a775b9e28ce6593fc8b3cc46e8c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 12 Aug 2023 21:58:47 +0200
Subject: [PATCH 1050/2047] x86/microcode: Move core specific defines to local
 header

There is no reason to expose all of this globally. Move everything which is
not required outside of the microcode specific code to local header files
and into the respective source files.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230812195727.952876381@linutronix.de
---
 arch/x86/include/asm/microcode.h         | 175 ++++++++---------------
 arch/x86/include/asm/microcode_amd.h     |  54 -------
 arch/x86/include/asm/microcode_intel.h   |  63 --------
 arch/x86/kernel/cpu/microcode/amd.c      |  41 ++++++
 arch/x86/kernel/cpu/microcode/core.c     |   3 +-
 arch/x86/kernel/cpu/microcode/intel.c    |   3 +-
 arch/x86/kernel/cpu/microcode/internal.h | 131 +++++++++++++++++
 7 files changed, 233 insertions(+), 237 deletions(-)
 delete mode 100644 arch/x86/include/asm/microcode_amd.h
 delete mode 100644 arch/x86/include/asm/microcode_intel.h
 create mode 100644 arch/x86/kernel/cpu/microcode/internal.h

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 166a633e2796..bbbe9d744977 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -2,138 +2,77 @@
 #ifndef _ASM_X86_MICROCODE_H
 #define _ASM_X86_MICROCODE_H
 
-#include <linux/earlycpio.h>
-#include <linux/initrd.h>
-
-#include <asm/cpu.h>
-#include <asm/microcode_amd.h>
-#include <asm/microcode_intel.h>
-
-struct ucode_patch {
-	struct list_head plist;
-	void *data;		/* Intel uses only this one */
-	unsigned int size;
-	u32 patch_id;
-	u16 equiv_cpu;
-};
-
-extern struct list_head microcode_cache;
-
 struct cpu_signature {
 	unsigned int sig;
 	unsigned int pf;
 	unsigned int rev;
 };
 
-struct device;
-
-enum ucode_state {
-	UCODE_OK	= 0,
-	UCODE_NEW,
-	UCODE_UPDATED,
-	UCODE_NFOUND,
-	UCODE_ERROR,
-};
-
-struct microcode_ops {
-	enum ucode_state (*request_microcode_fw) (int cpu, struct device *);
-
-	void (*microcode_fini_cpu) (int cpu);
-
-	/*
-	 * The generic 'microcode_core' part guarantees that
-	 * the callbacks below run on a target cpu when they
-	 * are being called.
-	 * See also the "Synchronization" section in microcode_core.c.
-	 */
-	enum ucode_state (*apply_microcode) (int cpu);
-	int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
-};
-
 struct ucode_cpu_info {
 	struct cpu_signature	cpu_sig;
 	void			*mc;
 };
-extern struct ucode_cpu_info ucode_cpu_info[];
-struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa);
-
-#ifdef CONFIG_CPU_SUP_INTEL
-extern struct microcode_ops * __init init_intel_microcode(void);
-#else
-static inline struct microcode_ops * __init init_intel_microcode(void)
-{
-	return NULL;
-}
-#endif /* CONFIG_CPU_SUP_INTEL */
-
-#ifdef CONFIG_CPU_SUP_AMD
-extern struct microcode_ops * __init init_amd_microcode(void);
-extern void __exit exit_amd_microcode(void);
-#else
-static inline struct microcode_ops * __init init_amd_microcode(void)
-{
-	return NULL;
-}
-static inline void __exit exit_amd_microcode(void) {}
-#endif
-
-#define MAX_UCODE_COUNT 128
-
-#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
-#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
-#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
-#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
-#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
-#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
-#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
-
-#define CPUID_IS(a, b, c, ebx, ecx, edx)	\
-		(!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
-
-/*
- * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
- * x86_cpuid_vendor() gets vendor id for BSP.
- *
- * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
- * coding, we still use x86_cpuid_vendor() to get vendor id for AP.
- *
- * x86_cpuid_vendor() gets vendor information directly from CPUID.
- */
-static inline int x86_cpuid_vendor(void)
-{
-	u32 eax = 0x00000000;
-	u32 ebx, ecx = 0, edx;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-
-	if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
-		return X86_VENDOR_INTEL;
-
-	if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
-		return X86_VENDOR_AMD;
-
-	return X86_VENDOR_UNKNOWN;
-}
-
-static inline unsigned int x86_cpuid_family(void)
-{
-	u32 eax = 0x00000001;
-	u32 ebx, ecx = 0, edx;
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-
-	return x86_family(eax);
-}
 
 #ifdef CONFIG_MICROCODE
-extern void __init load_ucode_bsp(void);
-extern void load_ucode_ap(void);
-extern bool initrd_gone;
+void load_ucode_bsp(void);
+void load_ucode_ap(void);
 void microcode_bsp_resume(void);
 #else
-static inline void __init load_ucode_bsp(void)			{ }
-static inline void load_ucode_ap(void)				{ }
-static inline void microcode_bsp_resume(void)			{ }
+static inline void load_ucode_bsp(void)	{ }
+static inline void load_ucode_ap(void) { }
+static inline void microcode_bsp_resume(void) { }
 #endif
 
+#ifdef CONFIG_CPU_SUP_INTEL
+/* Intel specific microcode defines. Public for IFS */
+struct microcode_header_intel {
+	unsigned int	hdrver;
+	unsigned int	rev;
+	unsigned int	date;
+	unsigned int	sig;
+	unsigned int	cksum;
+	unsigned int	ldrver;
+	unsigned int	pf;
+	unsigned int	datasize;
+	unsigned int	totalsize;
+	unsigned int	metasize;
+	unsigned int	reserved[2];
+};
+
+struct microcode_intel {
+	struct microcode_header_intel	hdr;
+	unsigned int			bits[];
+};
+
+#define DEFAULT_UCODE_DATASIZE		(2000)
+#define MC_HEADER_SIZE			(sizeof(struct microcode_header_intel))
+#define MC_HEADER_TYPE_MICROCODE	1
+#define MC_HEADER_TYPE_IFS		2
+
+static inline int intel_microcode_get_datasize(struct microcode_header_intel *hdr)
+{
+	return hdr->datasize ? : DEFAULT_UCODE_DATASIZE;
+}
+
+static inline u32 intel_get_microcode_revision(void)
+{
+	u32 rev, dummy;
+
+	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
+
+	/* As documented in the SDM: Do a CPUID 1 here */
+	native_cpuid_eax(1);
+
+	/* get the current revision from MSR 0x8B */
+	native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
+
+	return rev;
+}
+
+void show_ucode_info_early(void);
+
+#else /* CONFIG_CPU_SUP_INTEL */
+static inline void show_ucode_info_early(void) { }
+#endif /* !CONFIG_CPU_SUP_INTEL */
+
 #endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
deleted file mode 100644
index febe9bf0c708..000000000000
--- a/arch/x86/include/asm/microcode_amd.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MICROCODE_AMD_H
-#define _ASM_X86_MICROCODE_AMD_H
-
-#define UCODE_MAGIC			0x00414d44
-#define UCODE_EQUIV_CPU_TABLE_TYPE	0x00000000
-#define UCODE_UCODE_TYPE		0x00000001
-
-#define SECTION_HDR_SIZE		8
-#define CONTAINER_HDR_SZ		12
-
-struct equiv_cpu_entry {
-	u32	installed_cpu;
-	u32	fixed_errata_mask;
-	u32	fixed_errata_compare;
-	u16	equiv_cpu;
-	u16	res;
-} __attribute__((packed));
-
-struct microcode_header_amd {
-	u32	data_code;
-	u32	patch_id;
-	u16	mc_patch_data_id;
-	u8	mc_patch_data_len;
-	u8	init_flag;
-	u32	mc_patch_data_checksum;
-	u32	nb_dev_id;
-	u32	sb_dev_id;
-	u16	processor_rev_id;
-	u8	nb_rev_id;
-	u8	sb_rev_id;
-	u8	bios_api_rev;
-	u8	reserved1[3];
-	u32	match_reg[8];
-} __attribute__((packed));
-
-struct microcode_amd {
-	struct microcode_header_amd	hdr;
-	unsigned int			mpb[];
-};
-
-#define PATCH_MAX_SIZE (3 * PAGE_SIZE)
-
-#ifdef CONFIG_CPU_SUP_AMD
-extern void load_ucode_amd_early(unsigned int cpuid_1_eax);
-extern int __init save_microcode_in_initrd_amd(unsigned int family);
-void reload_ucode_amd(unsigned int cpu);
-#else
-static inline void load_ucode_amd_early(unsigned int cpuid_1_eax) {}
-static inline int __init
-save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
-static inline void reload_ucode_amd(unsigned int cpu) {}
-#endif
-#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
deleted file mode 100644
index 7bd4c2960c75..000000000000
--- a/arch/x86/include/asm/microcode_intel.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MICROCODE_INTEL_H
-#define _ASM_X86_MICROCODE_INTEL_H
-
-struct microcode_header_intel {
-	unsigned int            hdrver;
-	unsigned int            rev;
-	unsigned int            date;
-	unsigned int            sig;
-	unsigned int            cksum;
-	unsigned int            ldrver;
-	unsigned int            pf;
-	unsigned int            datasize;
-	unsigned int            totalsize;
-	unsigned int            metasize;
-	unsigned int            reserved[2];
-};
-
-struct microcode_intel {
-	struct microcode_header_intel hdr;
-	unsigned int            bits[];
-};
-
-#define MC_HEADER_SIZE			(sizeof(struct microcode_header_intel))
-#define MC_HEADER_TYPE_MICROCODE	1
-#define MC_HEADER_TYPE_IFS		2
-#define DEFAULT_UCODE_DATASIZE		(2000)
-
-static inline int intel_microcode_get_datasize(struct microcode_header_intel *hdr)
-{
-	return hdr->datasize ? : DEFAULT_UCODE_DATASIZE;
-}
-
-static inline u32 intel_get_microcode_revision(void)
-{
-	u32 rev, dummy;
-
-	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
-
-	/* As documented in the SDM: Do a CPUID 1 here */
-	native_cpuid_eax(1);
-
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
-
-	return rev;
-}
-
-#ifdef CONFIG_CPU_SUP_INTEL
-extern void __init load_ucode_intel_bsp(void);
-extern void load_ucode_intel_ap(void);
-extern void show_ucode_info_early(void);
-extern int __init save_microcode_in_initrd_intel(void);
-void reload_ucode_intel(void);
-#else
-static inline __init void load_ucode_intel_bsp(void) {}
-static inline void load_ucode_intel_ap(void) {}
-static inline void show_ucode_info_early(void) {}
-static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; }
-static inline void reload_ucode_intel(void) {}
-#endif
-
-#endif /* _ASM_X86_MICROCODE_INTEL_H */
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index dc0a3bed5f6e..bbd1dc38ea03 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -35,6 +35,47 @@
 #include <asm/cpu.h>
 #include <asm/msr.h>
 
+#include "internal.h"
+
+#define UCODE_MAGIC			0x00414d44
+#define UCODE_EQUIV_CPU_TABLE_TYPE	0x00000000
+#define UCODE_UCODE_TYPE		0x00000001
+
+#define SECTION_HDR_SIZE		8
+#define CONTAINER_HDR_SZ		12
+
+struct equiv_cpu_entry {
+	u32	installed_cpu;
+	u32	fixed_errata_mask;
+	u32	fixed_errata_compare;
+	u16	equiv_cpu;
+	u16	res;
+} __packed;
+
+struct microcode_header_amd {
+	u32	data_code;
+	u32	patch_id;
+	u16	mc_patch_data_id;
+	u8	mc_patch_data_len;
+	u8	init_flag;
+	u32	mc_patch_data_checksum;
+	u32	nb_dev_id;
+	u32	sb_dev_id;
+	u16	processor_rev_id;
+	u8	nb_rev_id;
+	u8	sb_rev_id;
+	u8	bios_api_rev;
+	u8	reserved1[3];
+	u32	match_reg[8];
+} __packed;
+
+struct microcode_amd {
+	struct microcode_header_amd	hdr;
+	unsigned int			mpb[];
+};
+
+#define PATCH_MAX_SIZE (3 * PAGE_SIZE)
+
 static struct equiv_cpu_table {
 	unsigned int num_entries;
 	struct equiv_cpu_entry *entry;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 75c5bc705d76..6cc7a2c181da 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -33,11 +33,12 @@
 
 #include <asm/cpu_device_id.h>
 #include <asm/perf_event.h>
-#include <asm/microcode.h>
 #include <asm/processor.h>
 #include <asm/cmdline.h>
 #include <asm/setup.h>
 
+#include "internal.h"
+
 #define DRIVER_VERSION	"2.2"
 
 static struct microcode_ops	*microcode_ops;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 98666729b80e..d983a3e64422 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -32,11 +32,12 @@
 
 #include <asm/intel-family.h>
 #include <asm/processor.h>
-#include <asm/microcode.h>
 #include <asm/tlbflush.h>
 #include <asm/setup.h>
 #include <asm/msr.h>
 
+#include "internal.h"
+
 static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
 
 /* Current microcode patch used in early patching on the APs. */
diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h
new file mode 100644
index 000000000000..8ee9392e5c68
--- /dev/null
+++ b/arch/x86/kernel/cpu/microcode/internal.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_MICROCODE_INTERNAL_H
+#define _X86_MICROCODE_INTERNAL_H
+
+#include <linux/earlycpio.h>
+#include <linux/initrd.h>
+
+#include <asm/cpu.h>
+#include <asm/microcode.h>
+
+struct ucode_patch {
+	struct list_head plist;
+	void *data;		/* Intel uses only this one */
+	unsigned int size;
+	u32 patch_id;
+	u16 equiv_cpu;
+};
+
+extern struct list_head microcode_cache;
+
+struct device;
+
+enum ucode_state {
+	UCODE_OK	= 0,
+	UCODE_NEW,
+	UCODE_UPDATED,
+	UCODE_NFOUND,
+	UCODE_ERROR,
+};
+
+struct microcode_ops {
+	enum ucode_state (*request_microcode_fw)(int cpu, struct device *dev);
+
+	void (*microcode_fini_cpu)(int cpu);
+
+	/*
+	 * The generic 'microcode_core' part guarantees that
+	 * the callbacks below run on a target cpu when they
+	 * are being called.
+	 * See also the "Synchronization" section in microcode_core.c.
+	 */
+	enum ucode_state (*apply_microcode)(int cpu);
+	int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
+};
+
+extern struct ucode_cpu_info ucode_cpu_info[];
+struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa);
+
+#define MAX_UCODE_COUNT 128
+
+#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
+#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
+#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
+#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
+#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
+#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
+#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
+
+#define CPUID_IS(a, b, c, ebx, ecx, edx)	\
+		(!(((ebx) ^ (a)) | ((edx) ^ (b)) | ((ecx) ^ (c))))
+
+/*
+ * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
+ * x86_cpuid_vendor() gets vendor id for BSP.
+ *
+ * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
+ * coding, we still use x86_cpuid_vendor() to get vendor id for AP.
+ *
+ * x86_cpuid_vendor() gets vendor information directly from CPUID.
+ */
+static inline int x86_cpuid_vendor(void)
+{
+	u32 eax = 0x00000000;
+	u32 ebx, ecx = 0, edx;
+
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+
+	if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
+		return X86_VENDOR_INTEL;
+
+	if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
+		return X86_VENDOR_AMD;
+
+	return X86_VENDOR_UNKNOWN;
+}
+
+static inline unsigned int x86_cpuid_family(void)
+{
+	u32 eax = 0x00000001;
+	u32 ebx, ecx = 0, edx;
+
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+
+	return x86_family(eax);
+}
+
+extern bool initrd_gone;
+
+#ifdef CONFIG_CPU_SUP_AMD
+void load_ucode_amd_bsp(unsigned int family);
+void load_ucode_amd_ap(unsigned int family);
+void load_ucode_amd_early(unsigned int cpuid_1_eax);
+int save_microcode_in_initrd_amd(unsigned int family);
+void reload_ucode_amd(unsigned int cpu);
+struct microcode_ops *init_amd_microcode(void);
+void exit_amd_microcode(void);
+#else /* CONFIG_MICROCODE_AMD */
+static inline void load_ucode_amd_bsp(unsigned int family) { }
+static inline void load_ucode_amd_ap(unsigned int family) { }
+static inline void load_ucode_amd_early(unsigned int family) { }
+static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
+static inline void reload_ucode_amd(unsigned int cpu) { }
+static inline struct microcode_ops *init_amd_microcode(void) { return NULL; }
+static inline void exit_amd_microcode(void) { }
+#endif /* !CONFIG_MICROCODE_AMD */
+
+#ifdef CONFIG_CPU_SUP_INTEL
+void load_ucode_intel_bsp(void);
+void load_ucode_intel_ap(void);
+int save_microcode_in_initrd_intel(void);
+void reload_ucode_intel(void);
+struct microcode_ops *init_intel_microcode(void);
+#else /* CONFIG_CPU_SUP_INTEL */
+static inline void load_ucode_intel_bsp(void) { }
+static inline void load_ucode_intel_ap(void) { }
+static inline int save_microcode_in_initrd_intel(void) { return -EINVAL; }
+static inline void reload_ucode_intel(void) { }
+static inline struct microcode_ops *init_intel_microcode(void) { return NULL; }
+#endif  /* !CONFIG_CPU_SUP_INTEL */
+
+#endif /* _X86_MICROCODE_INTERNAL_H */

From d44450c5939949fe37e4e94db69429de1ab1aad6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 12 Aug 2023 21:58:49 +0200
Subject: [PATCH 1051/2047] x86/microcode/intel: Remove debug code

This is really of dubious value.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230812195728.010895747@linutronix.de
---
 arch/x86/kernel/cpu/microcode/intel.c | 75 ---------------------------
 1 file changed, 75 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index d983a3e64422..8621ef46ab98 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -10,15 +10,7 @@
  * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
  *		      H Peter Anvin" <hpa@zytor.com>
  */
-
-/*
- * This needs to be before all headers so that pr_debug in printk.h doesn't turn
- * printk calls into no_printk().
- *
- *#define DEBUG
- */
 #define pr_fmt(fmt) "microcode: " fmt
-
 #include <linux/earlycpio.h>
 #include <linux/firmware.h>
 #include <linux/uaccess.h>
@@ -405,69 +397,6 @@ next:
 	return patch;
 }
 
-static void show_saved_mc(void)
-{
-#ifdef DEBUG
-	int i = 0, j;
-	unsigned int sig, pf, rev, total_size, data_size, date;
-	struct ucode_cpu_info uci;
-	struct ucode_patch *p;
-
-	if (list_empty(&microcode_cache)) {
-		pr_debug("no microcode data saved.\n");
-		return;
-	}
-
-	intel_cpu_collect_info(&uci);
-
-	sig	= uci.cpu_sig.sig;
-	pf	= uci.cpu_sig.pf;
-	rev	= uci.cpu_sig.rev;
-	pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
-
-	list_for_each_entry(p, &microcode_cache, plist) {
-		struct microcode_header_intel *mc_saved_header;
-		struct extended_sigtable *ext_header;
-		struct extended_signature *ext_sig;
-		int ext_sigcount;
-
-		mc_saved_header = (struct microcode_header_intel *)p->data;
-
-		sig	= mc_saved_header->sig;
-		pf	= mc_saved_header->pf;
-		rev	= mc_saved_header->rev;
-		date	= mc_saved_header->date;
-
-		total_size	= get_totalsize(mc_saved_header);
-		data_size	= intel_microcode_get_datasize(mc_saved_header);
-
-		pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, total size=0x%x, date = %04x-%02x-%02x\n",
-			 i++, sig, pf, rev, total_size,
-			 date & 0xffff,
-			 date >> 24,
-			 (date >> 16) & 0xff);
-
-		/* Look for ext. headers: */
-		if (total_size <= data_size + MC_HEADER_SIZE)
-			continue;
-
-		ext_header = (void *)mc_saved_header + data_size + MC_HEADER_SIZE;
-		ext_sigcount = ext_header->count;
-		ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
-
-		for (j = 0; j < ext_sigcount; j++) {
-			sig = ext_sig->sig;
-			pf = ext_sig->pf;
-
-			pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
-				 j, sig, pf);
-
-			ext_sig++;
-		}
-	}
-#endif
-}
-
 /*
  * Save this microcode patch. It will be loaded early when a CPU is
  * hot-added or resumes.
@@ -480,7 +409,6 @@ static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int s
 	mutex_lock(&x86_cpu_microcode_mutex);
 
 	save_microcode_patch(uci, mc, size);
-	show_saved_mc();
 
 	mutex_unlock(&x86_cpu_microcode_mutex);
 }
@@ -631,9 +559,6 @@ int __init save_microcode_in_initrd_intel(void)
 	intel_cpu_collect_info(&uci);
 
 	scan_microcode(cp.data, cp.size, &uci, true);
-
-	show_saved_mc();
-
 	return 0;
 }
 

From d2700f406713d04573803b9fe8a0fb02c015df39 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 12 Aug 2023 21:58:50 +0200
Subject: [PATCH 1052/2047] x86/microcode/intel: Remove pointless mutex

There is no concurrency.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230812195728.069849788@linutronix.de
---
 arch/x86/kernel/cpu/microcode/intel.c | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 8621ef46ab98..94dd6af9c963 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -397,22 +397,6 @@ next:
 	return patch;
 }
 
-/*
- * Save this microcode patch. It will be loaded early when a CPU is
- * hot-added or resumes.
- */
-static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size)
-{
-	/* Synchronization during CPU hotplug. */
-	static DEFINE_MUTEX(x86_cpu_microcode_mutex);
-
-	mutex_lock(&x86_cpu_microcode_mutex);
-
-	save_microcode_patch(uci, mc, size);
-
-	mutex_unlock(&x86_cpu_microcode_mutex);
-}
-
 static bool load_builtin_intel_microcode(struct cpio_data *cp)
 {
 	unsigned int eax = 1, ebx, ecx = 0, edx;
@@ -829,12 +813,8 @@ static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter)
 	vfree(uci->mc);
 	uci->mc = (struct microcode_intel *)new_mc;
 
-	/*
-	 * If early loading microcode is supported, save this mc into
-	 * permanent memory. So it will be loaded early when a CPU is hot added
-	 * or resumes.
-	 */
-	save_mc_for_early(uci, new_mc, new_mc_size);
+	/* Save for CPU hotplug */
+	save_microcode_patch(uci, new_mc, new_mc_size);
 
 	pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
 		 cpu, new_rev, uci->cpu_sig.rev);

From 582eb3aeed2d06b122fba95518b84506d3d4ceb9 Mon Sep 17 00:00:00 2001
From: Kemeng Shi <shikemeng@huaweicloud.com>
Date: Wed, 26 Jul 2023 01:20:51 +0800
Subject: [PATCH 1053/2047] kunit: replace KUNIT_TRIGGER_STATIC_STUB maro with
 KUNIT_STATIC_STUB_REDIRECT

We mix up KUNIT_TRIGGER_STATIC_STUB and KUNIT_STATIC_STUB_REDIRECT in
static_stub header. Just correct KUNIT_TRIGGER_STATIC_STUB to
KUNIT_STATIC_STUB_REDIRECT which is documented.

Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/static_stub.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/kunit/static_stub.h b/include/kunit/static_stub.h
index 9b80150a5d62..85315c80b303 100644
--- a/include/kunit/static_stub.h
+++ b/include/kunit/static_stub.h
@@ -11,7 +11,7 @@
 #if !IS_ENABLED(CONFIG_KUNIT)
 
 /* If CONFIG_KUNIT is not enabled, these stubs quietly disappear. */
-#define KUNIT_TRIGGER_STATIC_STUB(real_fn_name, args...) do {} while (0)
+#define KUNIT_STATIC_STUB_REDIRECT(real_fn_name, args...) do {} while (0)
 
 #else
 
@@ -30,7 +30,7 @@
  * This is a function prologue which is used to allow calls to the current
  * function to be redirected by a KUnit test. KUnit tests can call
  * kunit_activate_static_stub() to pass a replacement function in. The
- * replacement function will be called by KUNIT_TRIGGER_STATIC_STUB(), which
+ * replacement function will be called by KUNIT_STATIC_STUB_REDIRECT(), which
  * will then return from the function. If the caller is not in a KUnit context,
  * the function will continue execution as normal.
  *
@@ -87,7 +87,7 @@ void __kunit_activate_static_stub(struct kunit *test,
  * When activated, calls to real_fn_addr from within this test (even if called
  * indirectly) will instead call replacement_addr. The function pointed to by
  * real_fn_addr must begin with the static stub prologue in
- * KUNIT_TRIGGER_STATIC_STUB() for this to work. real_fn_addr and
+ * KUNIT_STATIC_STUB_REDIRECT() for this to work. real_fn_addr and
  * replacement_addr must have the same type.
  *
  * The redirection can be disabled again with kunit_deactivate_static_stub().

From 40f78b74b6d425115d1df78516d14c35384172e7 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:46 +0800
Subject: [PATCH 1054/2047] spi: amlogic-spifc-a1: switch to use
 devm_spi_alloc_host()

Switch to use modern name function devm_spi_alloc_host().

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-2-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-amlogic-spifc-a1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-amlogic-spifc-a1.c b/drivers/spi/spi-amlogic-spifc-a1.c
index 605e9e40455c..fadf6667cd51 100644
--- a/drivers/spi/spi-amlogic-spifc-a1.c
+++ b/drivers/spi/spi-amlogic-spifc-a1.c
@@ -326,7 +326,7 @@ static int amlogic_spifc_a1_probe(struct platform_device *pdev)
 	struct amlogic_spifc_a1 *spifc;
 	int ret;
 
-	ctrl = devm_spi_alloc_master(&pdev->dev, sizeof(*spifc));
+	ctrl = devm_spi_alloc_host(&pdev->dev, sizeof(*spifc));
 	if (!ctrl)
 		return -ENOMEM;
 

From 0c35cc597b211eea2f26eae35904204cbd8e0e94 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:47 +0800
Subject: [PATCH 1055/2047] spi: au1550: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-3-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-au1550.c | 74 ++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/drivers/spi/spi-au1550.c b/drivers/spi/spi-au1550.c
index 0b57e6afce0f..1011b1a8f241 100644
--- a/drivers/spi/spi-au1550.c
+++ b/drivers/spi/spi-au1550.c
@@ -54,7 +54,7 @@ struct au1550_spi {
 	int (*txrx_bufs)(struct spi_device *spi, struct spi_transfer *t);
 	irqreturn_t (*irq_callback)(struct au1550_spi *hw);
 
-	struct completion master_done;
+	struct completion host_done;
 
 	unsigned int usedma;
 	u32 dma_tx_id;
@@ -66,7 +66,7 @@ struct au1550_spi {
 	unsigned int dma_rx_tmpbuf_size;
 	u32 dma_rx_tmpbuf_addr;
 
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct device *dev;
 	struct au1550_spi_info *pdata;
 	struct resource *ioarea;
@@ -159,7 +159,7 @@ static void au1550_spi_reset_fifos(struct au1550_spi *hw)
  */
 static void au1550_spi_chipsel(struct spi_device *spi, int value)
 {
-	struct au1550_spi *hw = spi_master_get_devdata(spi->master);
+	struct au1550_spi *hw = spi_controller_get_devdata(spi->controller);
 	unsigned int cspol = spi->mode & SPI_CS_HIGH ? 1 : 0;
 	u32 cfg, stat;
 
@@ -219,7 +219,7 @@ static void au1550_spi_chipsel(struct spi_device *spi, int value)
 
 static int au1550_spi_setupxfer(struct spi_device *spi, struct spi_transfer *t)
 {
-	struct au1550_spi *hw = spi_master_get_devdata(spi->master);
+	struct au1550_spi *hw = spi_controller_get_devdata(spi->controller);
 	unsigned int bpw, hz;
 	u32 cfg, stat;
 
@@ -272,7 +272,7 @@ static int au1550_spi_setupxfer(struct spi_device *spi, struct spi_transfer *t)
  * no reliable way how to recognize that spi transfer is done
  * dma complete callbacks are called before real spi transfer is finished
  * and if only tx dma channel is set up (and rx fifo overflow event masked)
- * spi master done event irq is not generated unless rx fifo is empty (emptied)
+ * spi host done event irq is not generated unless rx fifo is empty (emptied)
  * so we need rx tmp buffer to use for rx dma if user does not provide one
  */
 static int au1550_spi_dma_rxtmp_alloc(struct au1550_spi *hw, unsigned int size)
@@ -303,7 +303,7 @@ static void au1550_spi_dma_rxtmp_free(struct au1550_spi *hw)
 
 static int au1550_spi_dma_txrxb(struct spi_device *spi, struct spi_transfer *t)
 {
-	struct au1550_spi *hw = spi_master_get_devdata(spi->master);
+	struct au1550_spi *hw = spi_controller_get_devdata(spi->controller);
 	dma_addr_t dma_tx_addr;
 	dma_addr_t dma_rx_addr;
 	u32 res;
@@ -387,7 +387,7 @@ static int au1550_spi_dma_txrxb(struct spi_device *spi, struct spi_transfer *t)
 	hw->regs->psc_spipcr = PSC_SPIPCR_MS;
 	wmb(); /* drain writebuffer */
 
-	wait_for_completion(&hw->master_done);
+	wait_for_completion(&hw->host_done);
 
 	au1xxx_dbdma_stop(hw->dma_tx_ch);
 	au1xxx_dbdma_stop(hw->dma_rx_ch);
@@ -449,7 +449,7 @@ static irqreturn_t au1550_spi_dma_irq_callback(struct au1550_spi *hw)
 				"dma transfer: unexpected SPI error (event=0x%x stat=0x%x)!\n",
 				evnt, stat);
 
-		complete(&hw->master_done);
+		complete(&hw->host_done);
 		return IRQ_HANDLED;
 	}
 
@@ -458,7 +458,7 @@ static irqreturn_t au1550_spi_dma_irq_callback(struct au1550_spi *hw)
 		au1550_spi_mask_ack_all(hw);
 		hw->rx_count = hw->len;
 		hw->tx_count = hw->len;
-		complete(&hw->master_done);
+		complete(&hw->host_done);
 	}
 	return IRQ_HANDLED;
 }
@@ -502,7 +502,7 @@ AU1550_SPI_TX_WORD(32, 0xffffff)
 static int au1550_spi_pio_txrxb(struct spi_device *spi, struct spi_transfer *t)
 {
 	u32 stat, mask;
-	struct au1550_spi *hw = spi_master_get_devdata(spi->master);
+	struct au1550_spi *hw = spi_controller_get_devdata(spi->controller);
 
 	hw->tx = t->tx_buf;
 	hw->rx = t->rx_buf;
@@ -537,7 +537,7 @@ static int au1550_spi_pio_txrxb(struct spi_device *spi, struct spi_transfer *t)
 	hw->regs->psc_spipcr = PSC_SPIPCR_MS;
 	wmb(); /* drain writebuffer */
 
-	wait_for_completion(&hw->master_done);
+	wait_for_completion(&hw->host_done);
 
 	return min(hw->rx_count, hw->tx_count);
 }
@@ -568,7 +568,7 @@ static irqreturn_t au1550_spi_pio_irq_callback(struct au1550_spi *hw)
 		dev_err(hw->dev,
 			"pio transfer: unexpected SPI error (event=0x%x stat=0x%x)!\n",
 			evnt, stat);
-		complete(&hw->master_done);
+		complete(&hw->host_done);
 		return IRQ_HANDLED;
 	}
 
@@ -605,11 +605,11 @@ static irqreturn_t au1550_spi_pio_irq_callback(struct au1550_spi *hw)
 	/*
 	 * Restart the SPI transmission in case of a transmit underflow.
 	 * This seems to work despite the notes in the Au1550 data book
-	 * of Figure 8-4 with flowchart for SPI master operation:
+	 * of Figure 8-4 with flowchart for SPI host operation:
 	 *
 	 * """Note 1: An XFR Error Interrupt occurs, unless masked,
 	 * for any of the following events: Tx FIFO Underflow,
-	 * Rx FIFO Overflow, or Multiple-master Error
+	 * Rx FIFO Overflow, or Multiple-host Error
 	 *    Note 2: In case of a Tx Underflow Error, all zeroes are
 	 * transmitted."""
 	 *
@@ -627,14 +627,14 @@ static irqreturn_t au1550_spi_pio_irq_callback(struct au1550_spi *hw)
 	if (hw->rx_count >= hw->len) {
 		/* transfer completed successfully */
 		au1550_spi_mask_ack_all(hw);
-		complete(&hw->master_done);
+		complete(&hw->host_done);
 	}
 	return IRQ_HANDLED;
 }
 
 static int au1550_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
 {
-	struct au1550_spi *hw = spi_master_get_devdata(spi->master);
+	struct au1550_spi *hw = spi_controller_get_devdata(spi->controller);
 
 	return hw->txrx_bufs(spi, t);
 }
@@ -723,24 +723,24 @@ static void au1550_spi_setup_psc_as_spi(struct au1550_spi *hw)
 static int au1550_spi_probe(struct platform_device *pdev)
 {
 	struct au1550_spi *hw;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct resource *r;
 	int err = 0;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct au1550_spi));
-	if (master == NULL) {
-		dev_err(&pdev->dev, "No memory for spi_master\n");
+	host = spi_alloc_host(&pdev->dev, sizeof(struct au1550_spi));
+	if (host == NULL) {
+		dev_err(&pdev->dev, "No memory for spi_controller\n");
 		err = -ENOMEM;
 		goto err_nomem;
 	}
 
 	/* the spi->mode bits understood by this driver: */
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST;
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 24);
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST;
+	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 24);
 
-	hw = spi_master_get_devdata(master);
+	hw = spi_controller_get_devdata(host);
 
-	hw->master = master;
+	hw->host = host;
 	hw->pdata = dev_get_platdata(&pdev->dev);
 	hw->dev = &pdev->dev;
 
@@ -798,9 +798,9 @@ static int au1550_spi_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, hw);
 
-	init_completion(&hw->master_done);
+	init_completion(&hw->host_done);
 
-	hw->bitbang.master = hw->master;
+	hw->bitbang.master = hw->host;
 	hw->bitbang.setup_transfer = au1550_spi_setupxfer;
 	hw->bitbang.chipselect = au1550_spi_chipsel;
 	hw->bitbang.txrx_bufs = au1550_spi_txrx_bufs;
@@ -858,8 +858,8 @@ static int au1550_spi_probe(struct platform_device *pdev)
 		goto err_no_irq;
 	}
 
-	master->bus_num = pdev->id;
-	master->num_chipselect = hw->pdata->num_chipselect;
+	host->bus_num = pdev->id;
+	host->num_chipselect = hw->pdata->num_chipselect;
 
 	/*
 	 *  precompute valid range for spi freq - from au1550 datasheet:
@@ -874,8 +874,8 @@ static int au1550_spi_probe(struct platform_device *pdev)
 		int min_div = (2 << 0) * (2 * (4 + 1));
 		int max_div = (2 << 3) * (2 * (63 + 1));
 
-		master->max_speed_hz = hw->pdata->mainclk_hz / min_div;
-		master->min_speed_hz =
+		host->max_speed_hz = hw->pdata->mainclk_hz / min_div;
+		host->min_speed_hz =
 				hw->pdata->mainclk_hz / (max_div + 1) + 1;
 	}
 
@@ -883,13 +883,13 @@ static int au1550_spi_probe(struct platform_device *pdev)
 
 	err = spi_bitbang_start(&hw->bitbang);
 	if (err) {
-		dev_err(&pdev->dev, "Failed to register SPI master\n");
+		dev_err(&pdev->dev, "Failed to register SPI host\n");
 		goto err_register;
 	}
 
 	dev_info(&pdev->dev,
-		"spi master registered: bus_num=%d num_chipselect=%d\n",
-		master->bus_num, master->num_chipselect);
+		"spi host registered: bus_num=%d num_chipselect=%d\n",
+		host->bus_num, host->num_chipselect);
 
 	return 0;
 
@@ -917,7 +917,7 @@ err_ioremap:
 
 err_no_iores:
 err_no_pdata:
-	spi_master_put(hw->master);
+	spi_controller_put(hw->host);
 
 err_nomem:
 	return err;
@@ -927,8 +927,8 @@ static void au1550_spi_remove(struct platform_device *pdev)
 {
 	struct au1550_spi *hw = platform_get_drvdata(pdev);
 
-	dev_info(&pdev->dev, "spi master remove: bus_num=%d\n",
-		hw->master->bus_num);
+	dev_info(&pdev->dev, "spi host remove: bus_num=%d\n",
+		hw->host->bus_num);
 
 	spi_bitbang_stop(&hw->bitbang);
 	free_irq(hw->irq, hw);
@@ -941,7 +941,7 @@ static void au1550_spi_remove(struct platform_device *pdev)
 		au1xxx_dbdma_chan_free(hw->dma_tx_ch);
 	}
 
-	spi_master_put(hw->master);
+	spi_controller_put(hw->host);
 }
 
 /* work with hotplug and coldplug */

From 24e9b75c0cc3b465cb8f355b2e1b239a54625ae3 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:48 +0800
Subject: [PATCH 1056/2047] spi: ep93xx: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-4-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ep93xx.c | 174 +++++++++++++++++++--------------------
 1 file changed, 87 insertions(+), 87 deletions(-)

diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c
index 3693b3d425c7..a1d60e51c053 100644
--- a/drivers/spi/spi-ep93xx.c
+++ b/drivers/spi/spi-ep93xx.c
@@ -104,15 +104,15 @@ struct ep93xx_spi {
 
 /**
  * ep93xx_spi_calc_divisors() - calculates SPI clock divisors
- * @master: SPI master
+ * @host: SPI host
  * @rate: desired SPI output clock rate
  * @div_cpsr: pointer to return the cpsr (pre-scaler) divider
  * @div_scr: pointer to return the scr divider
  */
-static int ep93xx_spi_calc_divisors(struct spi_master *master,
+static int ep93xx_spi_calc_divisors(struct spi_controller *host,
 				    u32 rate, u8 *div_cpsr, u8 *div_scr)
 {
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
 	unsigned long spi_clk_rate = clk_get_rate(espi->clk);
 	int cpsr, scr;
 
@@ -120,7 +120,7 @@ static int ep93xx_spi_calc_divisors(struct spi_master *master,
 	 * Make sure that max value is between values supported by the
 	 * controller.
 	 */
-	rate = clamp(rate, master->min_speed_hz, master->max_speed_hz);
+	rate = clamp(rate, host->min_speed_hz, host->max_speed_hz);
 
 	/*
 	 * Calculate divisors so that we can get speed according the
@@ -143,18 +143,18 @@ static int ep93xx_spi_calc_divisors(struct spi_master *master,
 	return -EINVAL;
 }
 
-static int ep93xx_spi_chip_setup(struct spi_master *master,
+static int ep93xx_spi_chip_setup(struct spi_controller *host,
 				 struct spi_device *spi,
 				 struct spi_transfer *xfer)
 {
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
 	u8 dss = bits_per_word_to_dss(xfer->bits_per_word);
 	u8 div_cpsr = 0;
 	u8 div_scr = 0;
 	u16 cr0;
 	int err;
 
-	err = ep93xx_spi_calc_divisors(master, xfer->speed_hz,
+	err = ep93xx_spi_calc_divisors(host, xfer->speed_hz,
 				       &div_cpsr, &div_scr);
 	if (err)
 		return err;
@@ -166,9 +166,9 @@ static int ep93xx_spi_chip_setup(struct spi_master *master,
 		cr0 |= SSPCR0_SPH;
 	cr0 |= dss;
 
-	dev_dbg(&master->dev, "setup: mode %d, cpsr %d, scr %d, dss %d\n",
+	dev_dbg(&host->dev, "setup: mode %d, cpsr %d, scr %d, dss %d\n",
 		spi->mode, div_cpsr, div_scr, dss);
-	dev_dbg(&master->dev, "setup: cr0 %#x\n", cr0);
+	dev_dbg(&host->dev, "setup: cr0 %#x\n", cr0);
 
 	writel(div_cpsr, espi->mmio + SSPCPSR);
 	writel(cr0, espi->mmio + SSPCR0);
@@ -176,10 +176,10 @@ static int ep93xx_spi_chip_setup(struct spi_master *master,
 	return 0;
 }
 
-static void ep93xx_do_write(struct spi_master *master)
+static void ep93xx_do_write(struct spi_controller *host)
 {
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
-	struct spi_transfer *xfer = master->cur_msg->state;
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
+	struct spi_transfer *xfer = host->cur_msg->state;
 	u32 val = 0;
 
 	if (xfer->bits_per_word > 8) {
@@ -194,10 +194,10 @@ static void ep93xx_do_write(struct spi_master *master)
 	writel(val, espi->mmio + SSPDR);
 }
 
-static void ep93xx_do_read(struct spi_master *master)
+static void ep93xx_do_read(struct spi_controller *host)
 {
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
-	struct spi_transfer *xfer = master->cur_msg->state;
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
+	struct spi_transfer *xfer = host->cur_msg->state;
 	u32 val;
 
 	val = readl(espi->mmio + SSPDR);
@@ -214,7 +214,7 @@ static void ep93xx_do_read(struct spi_master *master)
 
 /**
  * ep93xx_spi_read_write() - perform next RX/TX transfer
- * @master: SPI master
+ * @host: SPI host
  *
  * This function transfers next bytes (or half-words) to/from RX/TX FIFOs. If
  * called several times, the whole transfer will be completed. Returns
@@ -223,20 +223,20 @@ static void ep93xx_do_read(struct spi_master *master)
  * When this function is finished, RX FIFO should be empty and TX FIFO should be
  * full.
  */
-static int ep93xx_spi_read_write(struct spi_master *master)
+static int ep93xx_spi_read_write(struct spi_controller *host)
 {
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
-	struct spi_transfer *xfer = master->cur_msg->state;
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
+	struct spi_transfer *xfer = host->cur_msg->state;
 
 	/* read as long as RX FIFO has frames in it */
 	while ((readl(espi->mmio + SSPSR) & SSPSR_RNE)) {
-		ep93xx_do_read(master);
+		ep93xx_do_read(host);
 		espi->fifo_level--;
 	}
 
 	/* write as long as TX FIFO has room */
 	while (espi->fifo_level < SPI_FIFO_SIZE && espi->tx < xfer->len) {
-		ep93xx_do_write(master);
+		ep93xx_do_write(host);
 		espi->fifo_level++;
 	}
 
@@ -261,7 +261,7 @@ ep93xx_dma_data_to_trans_dir(enum dma_data_direction dir)
 
 /**
  * ep93xx_spi_dma_prepare() - prepares a DMA transfer
- * @master: SPI master
+ * @host: SPI host
  * @dir: DMA transfer direction
  *
  * Function configures the DMA, maps the buffer and prepares the DMA
@@ -269,11 +269,11 @@ ep93xx_dma_data_to_trans_dir(enum dma_data_direction dir)
  * in case of failure.
  */
 static struct dma_async_tx_descriptor *
-ep93xx_spi_dma_prepare(struct spi_master *master,
+ep93xx_spi_dma_prepare(struct spi_controller *host,
 		       enum dma_data_direction dir)
 {
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
-	struct spi_transfer *xfer = master->cur_msg->state;
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
+	struct spi_transfer *xfer = host->cur_msg->state;
 	struct dma_async_tx_descriptor *txd;
 	enum dma_slave_buswidth buswidth;
 	struct dma_slave_config conf;
@@ -348,7 +348,7 @@ ep93xx_spi_dma_prepare(struct spi_master *master,
 	}
 
 	if (WARN_ON(len)) {
-		dev_warn(&master->dev, "len = %zu expected 0!\n", len);
+		dev_warn(&host->dev, "len = %zu expected 0!\n", len);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -367,16 +367,16 @@ ep93xx_spi_dma_prepare(struct spi_master *master,
 
 /**
  * ep93xx_spi_dma_finish() - finishes with a DMA transfer
- * @master: SPI master
+ * @host: SPI host
  * @dir: DMA transfer direction
  *
  * Function finishes with the DMA transfer. After this, the DMA buffer is
  * unmapped.
  */
-static void ep93xx_spi_dma_finish(struct spi_master *master,
+static void ep93xx_spi_dma_finish(struct spi_controller *host,
 				  enum dma_data_direction dir)
 {
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
 	struct dma_chan *chan;
 	struct sg_table *sgt;
 
@@ -393,35 +393,35 @@ static void ep93xx_spi_dma_finish(struct spi_master *master,
 
 static void ep93xx_spi_dma_callback(void *callback_param)
 {
-	struct spi_master *master = callback_param;
+	struct spi_controller *host = callback_param;
 
-	ep93xx_spi_dma_finish(master, DMA_TO_DEVICE);
-	ep93xx_spi_dma_finish(master, DMA_FROM_DEVICE);
+	ep93xx_spi_dma_finish(host, DMA_TO_DEVICE);
+	ep93xx_spi_dma_finish(host, DMA_FROM_DEVICE);
 
-	spi_finalize_current_transfer(master);
+	spi_finalize_current_transfer(host);
 }
 
-static int ep93xx_spi_dma_transfer(struct spi_master *master)
+static int ep93xx_spi_dma_transfer(struct spi_controller *host)
 {
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
 	struct dma_async_tx_descriptor *rxd, *txd;
 
-	rxd = ep93xx_spi_dma_prepare(master, DMA_FROM_DEVICE);
+	rxd = ep93xx_spi_dma_prepare(host, DMA_FROM_DEVICE);
 	if (IS_ERR(rxd)) {
-		dev_err(&master->dev, "DMA RX failed: %ld\n", PTR_ERR(rxd));
+		dev_err(&host->dev, "DMA RX failed: %ld\n", PTR_ERR(rxd));
 		return PTR_ERR(rxd);
 	}
 
-	txd = ep93xx_spi_dma_prepare(master, DMA_TO_DEVICE);
+	txd = ep93xx_spi_dma_prepare(host, DMA_TO_DEVICE);
 	if (IS_ERR(txd)) {
-		ep93xx_spi_dma_finish(master, DMA_FROM_DEVICE);
-		dev_err(&master->dev, "DMA TX failed: %ld\n", PTR_ERR(txd));
+		ep93xx_spi_dma_finish(host, DMA_FROM_DEVICE);
+		dev_err(&host->dev, "DMA TX failed: %ld\n", PTR_ERR(txd));
 		return PTR_ERR(txd);
 	}
 
 	/* We are ready when RX is done */
 	rxd->callback = ep93xx_spi_dma_callback;
-	rxd->callback_param = master;
+	rxd->callback_param = host;
 
 	/* Now submit both descriptors and start DMA */
 	dmaengine_submit(rxd);
@@ -436,8 +436,8 @@ static int ep93xx_spi_dma_transfer(struct spi_master *master)
 
 static irqreturn_t ep93xx_spi_interrupt(int irq, void *dev_id)
 {
-	struct spi_master *master = dev_id;
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_id;
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
 	u32 val;
 
 	/*
@@ -447,15 +447,15 @@ static irqreturn_t ep93xx_spi_interrupt(int irq, void *dev_id)
 	if (readl(espi->mmio + SSPIIR) & SSPIIR_RORIS) {
 		/* clear the overrun interrupt */
 		writel(0, espi->mmio + SSPICR);
-		dev_warn(&master->dev,
+		dev_warn(&host->dev,
 			 "receive overrun, aborting the message\n");
-		master->cur_msg->status = -EIO;
+		host->cur_msg->status = -EIO;
 	} else {
 		/*
 		 * Interrupt is either RX (RIS) or TX (TIS). For both cases we
 		 * simply execute next data transfer.
 		 */
-		if (ep93xx_spi_read_write(master)) {
+		if (ep93xx_spi_read_write(host)) {
 			/*
 			 * In normal case, there still is some processing left
 			 * for current transfer. Let's wait for the next
@@ -474,26 +474,26 @@ static irqreturn_t ep93xx_spi_interrupt(int irq, void *dev_id)
 	val &= ~(SSPCR1_RORIE | SSPCR1_TIE | SSPCR1_RIE);
 	writel(val, espi->mmio + SSPCR1);
 
-	spi_finalize_current_transfer(master);
+	spi_finalize_current_transfer(host);
 
 	return IRQ_HANDLED;
 }
 
-static int ep93xx_spi_transfer_one(struct spi_master *master,
+static int ep93xx_spi_transfer_one(struct spi_controller *host,
 				   struct spi_device *spi,
 				   struct spi_transfer *xfer)
 {
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
 	u32 val;
 	int ret;
 
-	ret = ep93xx_spi_chip_setup(master, spi, xfer);
+	ret = ep93xx_spi_chip_setup(host, spi, xfer);
 	if (ret) {
-		dev_err(&master->dev, "failed to setup chip for transfer\n");
+		dev_err(&host->dev, "failed to setup chip for transfer\n");
 		return ret;
 	}
 
-	master->cur_msg->state = xfer;
+	host->cur_msg->state = xfer;
 	espi->rx = 0;
 	espi->tx = 0;
 
@@ -503,10 +503,10 @@ static int ep93xx_spi_transfer_one(struct spi_master *master,
 	 * So in these cases we will be using PIO and don't bother for DMA.
 	 */
 	if (espi->dma_rx && xfer->len > SPI_FIFO_SIZE)
-		return ep93xx_spi_dma_transfer(master);
+		return ep93xx_spi_dma_transfer(host);
 
 	/* Using PIO so prime the TX FIFO and enable interrupts */
-	ep93xx_spi_read_write(master);
+	ep93xx_spi_read_write(host);
 
 	val = readl(espi->mmio + SSPCR1);
 	val |= (SSPCR1_RORIE | SSPCR1_TIE | SSPCR1_RIE);
@@ -516,10 +516,10 @@ static int ep93xx_spi_transfer_one(struct spi_master *master,
 	return 1;
 }
 
-static int ep93xx_spi_prepare_message(struct spi_master *master,
+static int ep93xx_spi_prepare_message(struct spi_controller *host,
 				      struct spi_message *msg)
 {
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
 	unsigned long timeout;
 
 	/*
@@ -528,7 +528,7 @@ static int ep93xx_spi_prepare_message(struct spi_master *master,
 	timeout = jiffies + msecs_to_jiffies(SPI_TIMEOUT);
 	while (readl(espi->mmio + SSPSR) & SSPSR_RNE) {
 		if (time_after(jiffies, timeout)) {
-			dev_warn(&master->dev,
+			dev_warn(&host->dev,
 				 "timeout while flushing RX FIFO\n");
 			return -ETIMEDOUT;
 		}
@@ -544,9 +544,9 @@ static int ep93xx_spi_prepare_message(struct spi_master *master,
 	return 0;
 }
 
-static int ep93xx_spi_prepare_hardware(struct spi_master *master)
+static int ep93xx_spi_prepare_hardware(struct spi_controller *host)
 {
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
 	u32 val;
 	int ret;
 
@@ -561,9 +561,9 @@ static int ep93xx_spi_prepare_hardware(struct spi_master *master)
 	return 0;
 }
 
-static int ep93xx_spi_unprepare_hardware(struct spi_master *master)
+static int ep93xx_spi_unprepare_hardware(struct spi_controller *host)
 {
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
 	u32 val;
 
 	val = readl(espi->mmio + SSPCR1);
@@ -646,7 +646,7 @@ static void ep93xx_spi_release_dma(struct ep93xx_spi *espi)
 
 static int ep93xx_spi_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct ep93xx_spi_info *info;
 	struct ep93xx_spi *espi;
 	struct resource *res;
@@ -663,54 +663,54 @@ static int ep93xx_spi_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*espi));
-	if (!master)
+	host = spi_alloc_host(&pdev->dev, sizeof(*espi));
+	if (!host)
 		return -ENOMEM;
 
-	master->use_gpio_descriptors = true;
-	master->prepare_transfer_hardware = ep93xx_spi_prepare_hardware;
-	master->unprepare_transfer_hardware = ep93xx_spi_unprepare_hardware;
-	master->prepare_message = ep93xx_spi_prepare_message;
-	master->transfer_one = ep93xx_spi_transfer_one;
-	master->bus_num = pdev->id;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
+	host->use_gpio_descriptors = true;
+	host->prepare_transfer_hardware = ep93xx_spi_prepare_hardware;
+	host->unprepare_transfer_hardware = ep93xx_spi_unprepare_hardware;
+	host->prepare_message = ep93xx_spi_prepare_message;
+	host->transfer_one = ep93xx_spi_transfer_one;
+	host->bus_num = pdev->id;
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
 	/*
 	 * The SPI core will count the number of GPIO descriptors to figure
 	 * out the number of chip selects available on the platform.
 	 */
-	master->num_chipselect = 0;
+	host->num_chipselect = 0;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
-	espi = spi_master_get_devdata(master);
+	espi = spi_controller_get_devdata(host);
 
 	espi->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(espi->clk)) {
 		dev_err(&pdev->dev, "unable to get spi clock\n");
 		error = PTR_ERR(espi->clk);
-		goto fail_release_master;
+		goto fail_release_host;
 	}
 
 	/*
 	 * Calculate maximum and minimum supported clock rates
 	 * for the controller.
 	 */
-	master->max_speed_hz = clk_get_rate(espi->clk) / 2;
-	master->min_speed_hz = clk_get_rate(espi->clk) / (254 * 256);
+	host->max_speed_hz = clk_get_rate(espi->clk) / 2;
+	host->min_speed_hz = clk_get_rate(espi->clk) / (254 * 256);
 
 	espi->mmio = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(espi->mmio)) {
 		error = PTR_ERR(espi->mmio);
-		goto fail_release_master;
+		goto fail_release_host;
 	}
 	espi->sspdr_phys = res->start + SSPDR;
 
 	error = devm_request_irq(&pdev->dev, irq, ep93xx_spi_interrupt,
-				0, "ep93xx-spi", master);
+				0, "ep93xx-spi", host);
 	if (error) {
 		dev_err(&pdev->dev, "failed to request irq\n");
-		goto fail_release_master;
+		goto fail_release_host;
 	}
 
 	if (info->use_dma && ep93xx_spi_setup_dma(espi))
@@ -719,9 +719,9 @@ static int ep93xx_spi_probe(struct platform_device *pdev)
 	/* make sure that the hardware is disabled */
 	writel(0, espi->mmio + SSPCR1);
 
-	error = devm_spi_register_master(&pdev->dev, master);
+	error = devm_spi_register_controller(&pdev->dev, host);
 	if (error) {
-		dev_err(&pdev->dev, "failed to register SPI master\n");
+		dev_err(&pdev->dev, "failed to register SPI host\n");
 		goto fail_free_dma;
 	}
 
@@ -732,16 +732,16 @@ static int ep93xx_spi_probe(struct platform_device *pdev)
 
 fail_free_dma:
 	ep93xx_spi_release_dma(espi);
-fail_release_master:
-	spi_master_put(master);
+fail_release_host:
+	spi_controller_put(host);
 
 	return error;
 }
 
 static void ep93xx_spi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct ep93xx_spi *espi = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct ep93xx_spi *espi = spi_controller_get_devdata(host);
 
 	ep93xx_spi_release_dma(espi);
 }

From 45d9591105a63fa80d69552ebbd5073c3b16d6bd Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:49 +0800
Subject: [PATCH 1057/2047] spi: falcon: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-5-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-falcon.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/spi/spi-falcon.c b/drivers/spi/spi-falcon.c
index 8a8414cbb400..84279058f0f1 100644
--- a/drivers/spi/spi-falcon.c
+++ b/drivers/spi/spi-falcon.c
@@ -91,14 +91,14 @@
 
 struct falcon_sflash {
 	u32 sfcmd; /* for caching of opcode, direction, ... */
-	struct spi_master *master;
+	struct spi_controller *host;
 };
 
 int falcon_sflash_xfer(struct spi_device *spi, struct spi_transfer *t,
 		unsigned long flags)
 {
 	struct device *dev = &spi->dev;
-	struct falcon_sflash *priv = spi_master_get_devdata(spi->master);
+	struct falcon_sflash *priv = spi_controller_get_devdata(spi->controller);
 	const u8 *txp = t->tx_buf;
 	u8 *rxp = t->rx_buf;
 	unsigned int bytelen = ((8 * t->len + 7) / 8);
@@ -351,10 +351,10 @@ static int falcon_sflash_setup(struct spi_device *spi)
 	return 0;
 }
 
-static int falcon_sflash_xfer_one(struct spi_master *master,
+static int falcon_sflash_xfer_one(struct spi_controller *host,
 					struct spi_message *m)
 {
-	struct falcon_sflash *priv = spi_master_get_devdata(master);
+	struct falcon_sflash *priv = spi_controller_get_devdata(host);
 	struct spi_transfer *t;
 	unsigned long spi_flags;
 	unsigned long flags;
@@ -382,7 +382,7 @@ static int falcon_sflash_xfer_one(struct spi_master *master,
 	}
 
 	m->status = ret;
-	spi_finalize_current_message(master);
+	spi_finalize_current_message(host);
 
 	return 0;
 }
@@ -390,25 +390,25 @@ static int falcon_sflash_xfer_one(struct spi_master *master,
 static int falcon_sflash_probe(struct platform_device *pdev)
 {
 	struct falcon_sflash *priv;
-	struct spi_master *master;
+	struct spi_controller *host;
 	int ret;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*priv));
-	if (!master)
+	host = spi_alloc_host(&pdev->dev, sizeof(*priv));
+	if (!host)
 		return -ENOMEM;
 
-	priv = spi_master_get_devdata(master);
-	priv->master = master;
+	priv = spi_controller_get_devdata(host);
+	priv->host = host;
 
-	master->mode_bits = SPI_MODE_3;
-	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
-	master->setup = falcon_sflash_setup;
-	master->transfer_one_message = falcon_sflash_xfer_one;
-	master->dev.of_node = pdev->dev.of_node;
+	host->mode_bits = SPI_MODE_3;
+	host->flags = SPI_CONTROLLER_HALF_DUPLEX;
+	host->setup = falcon_sflash_setup;
+	host->transfer_one_message = falcon_sflash_xfer_one;
+	host->dev.of_node = pdev->dev.of_node;
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	ret = devm_spi_register_controller(&pdev->dev, host);
 	if (ret)
-		spi_master_put(master);
+		spi_controller_put(host);
 	return ret;
 }
 

From d40f10d009d405a82b533633c126be7fcc88b4ce Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:50 +0800
Subject: [PATCH 1058/2047] spi: fsi: switch to use spi_alloc_host()

Switch to use modern name function spi_alloc_host().

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-6-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-fsi.c b/drivers/spi/spi-fsi.c
index ba3b17d7c9ec..fc9e33be1e0e 100644
--- a/drivers/spi/spi-fsi.c
+++ b/drivers/spi/spi-fsi.c
@@ -542,7 +542,7 @@ static int fsi_spi_probe(struct device *dev)
 		if (of_property_read_u32(np, "reg", &base))
 			continue;
 
-		ctlr = spi_alloc_master(dev, sizeof(*ctx));
+		ctlr = spi_alloc_host(dev, sizeof(*ctx));
 		if (!ctlr) {
 			of_node_put(np);
 			break;

From 6230d6cad1a70d78f3fe3a0cda2beb1e478c4a40 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:51 +0800
Subject: [PATCH 1059/2047] spi: fsl-dspi: switch to use modern name

Change legacy name master/target to modern name host/target or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-7-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-dspi.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 150d2ebf234b..8318249f8a1f 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -23,7 +23,7 @@
 #define DRIVER_NAME			"fsl-dspi"
 
 #define SPI_MCR				0x00
-#define SPI_MCR_MASTER			BIT(31)
+#define SPI_MCR_HOST			BIT(31)
 #define SPI_MCR_PCSIS(x)		((x) << 16)
 #define SPI_MCR_CLR_TXF			BIT(11)
 #define SPI_MCR_CLR_RXF			BIT(10)
@@ -340,7 +340,7 @@ static u32 dspi_pop_tx_pushr(struct fsl_dspi *dspi)
 {
 	u16 cmd = dspi->tx_cmd, data = dspi_pop_tx(dspi);
 
-	if (spi_controller_is_slave(dspi->ctlr))
+	if (spi_controller_is_target(dspi->ctlr))
 		return data;
 
 	if (dspi->len > 0)
@@ -430,7 +430,7 @@ static int dspi_next_xfer_dma_submit(struct fsl_dspi *dspi)
 	dma_async_issue_pending(dma->chan_rx);
 	dma_async_issue_pending(dma->chan_tx);
 
-	if (spi_controller_is_slave(dspi->ctlr)) {
+	if (spi_controller_is_target(dspi->ctlr)) {
 		wait_for_completion_interruptible(&dspi->dma->cmd_rx_complete);
 		return 0;
 	}
@@ -1061,7 +1061,7 @@ static int dspi_setup(struct spi_device *spi)
 	if (spi->mode & SPI_CPHA)
 		chip->ctar_val |= SPI_CTAR_CPHA;
 
-	if (!spi_controller_is_slave(dspi->ctlr)) {
+	if (!spi_controller_is_target(dspi->ctlr)) {
 		chip->ctar_val |= SPI_CTAR_PCSSCK(pcssck) |
 				  SPI_CTAR_CSSCK(cssck) |
 				  SPI_CTAR_PASC(pasc) |
@@ -1216,8 +1216,8 @@ static int dspi_init(struct fsl_dspi *dspi)
 
 	if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE)
 		mcr |= SPI_MCR_XSPI;
-	if (!spi_controller_is_slave(dspi->ctlr))
-		mcr |= SPI_MCR_MASTER;
+	if (!spi_controller_is_target(dspi->ctlr))
+		mcr |= SPI_MCR_HOST;
 
 	regmap_write(dspi->regmap, SPI_MCR, mcr);
 	regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR);
@@ -1240,13 +1240,13 @@ static int dspi_init(struct fsl_dspi *dspi)
 	return 0;
 }
 
-static int dspi_slave_abort(struct spi_master *master)
+static int dspi_target_abort(struct spi_controller *host)
 {
-	struct fsl_dspi *dspi = spi_master_get_devdata(master);
+	struct fsl_dspi *dspi = spi_controller_get_devdata(host);
 
 	/*
 	 * Terminate all pending DMA transactions for the SPI working
-	 * in SLAVE mode.
+	 * in TARGET mode.
 	 */
 	if (dspi->devtype_data->trans_mode == DSPI_DMA_MODE) {
 		dmaengine_terminate_sync(dspi->dma->chan_rx);
@@ -1277,7 +1277,7 @@ static int dspi_probe(struct platform_device *pdev)
 	if (!dspi)
 		return -ENOMEM;
 
-	ctlr = spi_alloc_master(&pdev->dev, 0);
+	ctlr = spi_alloc_host(&pdev->dev, 0);
 	if (!ctlr)
 		return -ENOMEM;
 
@@ -1292,7 +1292,7 @@ static int dspi_probe(struct platform_device *pdev)
 	ctlr->dev.of_node = pdev->dev.of_node;
 
 	ctlr->cleanup = dspi_cleanup;
-	ctlr->slave_abort = dspi_slave_abort;
+	ctlr->target_abort = dspi_target_abort;
 	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
 	ctlr->use_gpio_descriptors = true;
 
@@ -1317,7 +1317,7 @@ static int dspi_probe(struct platform_device *pdev)
 		ctlr->bus_num = bus_num;
 
 		if (of_property_read_bool(np, "spi-slave"))
-			ctlr->slave = true;
+			ctlr->target = true;
 
 		dspi->devtype_data = of_device_get_match_data(&pdev->dev);
 		if (!dspi->devtype_data) {

From a87935899f9c51fc6272c13339fbfd2ccec37d21 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:52 +0800
Subject: [PATCH 1060/2047] spi: fsl-espi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-8-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-espi.c | 76 +++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
index b3d2d3db5850..ea647ee94da8 100644
--- a/drivers/spi/spi-fsl-espi.c
+++ b/drivers/spi/spi-fsl-espi.c
@@ -148,7 +148,7 @@ static inline void fsl_espi_write_reg8(struct fsl_espi *espi, int offset,
 
 static int fsl_espi_check_message(struct spi_message *m)
 {
-	struct fsl_espi *espi = spi_master_get_devdata(m->spi->master);
+	struct fsl_espi *espi = spi_controller_get_devdata(m->spi->controller);
 	struct spi_transfer *t, *first;
 
 	if (m->frame_length > SPCOM_TRANLEN_MAX) {
@@ -323,7 +323,7 @@ start:
 static void fsl_espi_setup_transfer(struct spi_device *spi,
 					struct spi_transfer *t)
 {
-	struct fsl_espi *espi = spi_master_get_devdata(spi->master);
+	struct fsl_espi *espi = spi_controller_get_devdata(spi->controller);
 	int bits_per_word = t ? t->bits_per_word : spi->bits_per_word;
 	u32 pm, hz = t ? t->speed_hz : spi->max_speed_hz;
 	struct fsl_espi_cs *cs = spi_get_ctldata(spi);
@@ -351,7 +351,7 @@ static void fsl_espi_setup_transfer(struct spi_device *spi,
 
 static int fsl_espi_bufs(struct spi_device *spi, struct spi_transfer *t)
 {
-	struct fsl_espi *espi = spi_master_get_devdata(spi->master);
+	struct fsl_espi *espi = spi_controller_get_devdata(spi->controller);
 	unsigned int rx_len = t->len;
 	u32 mask, spcom;
 	int ret;
@@ -396,7 +396,7 @@ static int fsl_espi_bufs(struct spi_device *spi, struct spi_transfer *t)
 
 static int fsl_espi_trans(struct spi_message *m, struct spi_transfer *trans)
 {
-	struct fsl_espi *espi = spi_master_get_devdata(m->spi->master);
+	struct fsl_espi *espi = spi_controller_get_devdata(m->spi->controller);
 	struct spi_device *spi = m->spi;
 	int ret;
 
@@ -432,7 +432,7 @@ static int fsl_espi_trans(struct spi_message *m, struct spi_transfer *trans)
 	return ret;
 }
 
-static int fsl_espi_do_one_msg(struct spi_master *master,
+static int fsl_espi_do_one_msg(struct spi_controller *host,
 			       struct spi_message *m)
 {
 	unsigned int rx_nbits = 0, delay_nsecs = 0;
@@ -470,7 +470,7 @@ out:
 	if (m->status == -EINPROGRESS)
 		m->status = ret;
 
-	spi_finalize_current_message(master);
+	spi_finalize_current_message(host);
 
 	return ret;
 }
@@ -488,7 +488,7 @@ static int fsl_espi_setup(struct spi_device *spi)
 		spi_set_ctldata(spi, cs);
 	}
 
-	espi = spi_master_get_devdata(spi->master);
+	espi = spi_controller_get_devdata(spi->controller);
 
 	pm_runtime_get_sync(espi->dev);
 
@@ -584,8 +584,8 @@ static irqreturn_t fsl_espi_irq(s32 irq, void *context_data)
 #ifdef CONFIG_PM
 static int fsl_espi_runtime_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct fsl_espi *espi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct fsl_espi *espi = spi_controller_get_devdata(host);
 	u32 regval;
 
 	regval = fsl_espi_read_reg(espi, ESPI_SPMODE);
@@ -597,8 +597,8 @@ static int fsl_espi_runtime_suspend(struct device *dev)
 
 static int fsl_espi_runtime_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct fsl_espi *espi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct fsl_espi *espi = spi_controller_get_devdata(host);
 	u32 regval;
 
 	regval = fsl_espi_read_reg(espi, ESPI_SPMODE);
@@ -616,8 +616,8 @@ static size_t fsl_espi_max_message_size(struct spi_device *spi)
 
 static void fsl_espi_init_regs(struct device *dev, bool initial)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct fsl_espi *espi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct fsl_espi *espi = spi_controller_get_devdata(host);
 	struct device_node *nc;
 	u32 csmode, cs, prop;
 	int ret;
@@ -629,10 +629,10 @@ static void fsl_espi_init_regs(struct device *dev, bool initial)
 	fsl_espi_write_reg(espi, ESPI_SPIE, 0xffffffff);
 
 	/* Init eSPI CS mode register */
-	for_each_available_child_of_node(master->dev.of_node, nc) {
+	for_each_available_child_of_node(host->dev.of_node, nc) {
 		/* get chip select */
 		ret = of_property_read_u32(nc, "reg", &cs);
-		if (ret || cs >= master->num_chipselect)
+		if (ret || cs >= host->num_chipselect)
 			continue;
 
 		csmode = CSMODE_INIT_VAL;
@@ -664,28 +664,28 @@ static void fsl_espi_init_regs(struct device *dev, bool initial)
 static int fsl_espi_probe(struct device *dev, struct resource *mem,
 			  unsigned int irq, unsigned int num_cs)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct fsl_espi *espi;
 	int ret;
 
-	master = spi_alloc_master(dev, sizeof(struct fsl_espi));
-	if (!master)
+	host = spi_alloc_host(dev, sizeof(struct fsl_espi));
+	if (!host)
 		return -ENOMEM;
 
-	dev_set_drvdata(dev, master);
+	dev_set_drvdata(dev, host);
 
-	master->mode_bits = SPI_RX_DUAL | SPI_CPOL | SPI_CPHA | SPI_CS_HIGH |
-			    SPI_LSB_FIRST | SPI_LOOP;
-	master->dev.of_node = dev->of_node;
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
-	master->setup = fsl_espi_setup;
-	master->cleanup = fsl_espi_cleanup;
-	master->transfer_one_message = fsl_espi_do_one_msg;
-	master->auto_runtime_pm = true;
-	master->max_message_size = fsl_espi_max_message_size;
-	master->num_chipselect = num_cs;
+	host->mode_bits = SPI_RX_DUAL | SPI_CPOL | SPI_CPHA | SPI_CS_HIGH |
+			  SPI_LSB_FIRST | SPI_LOOP;
+	host->dev.of_node = dev->of_node;
+	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
+	host->setup = fsl_espi_setup;
+	host->cleanup = fsl_espi_cleanup;
+	host->transfer_one_message = fsl_espi_do_one_msg;
+	host->auto_runtime_pm = true;
+	host->max_message_size = fsl_espi_max_message_size;
+	host->num_chipselect = num_cs;
 
-	espi = spi_master_get_devdata(master);
+	espi = spi_controller_get_devdata(host);
 	spin_lock_init(&espi->lock);
 
 	espi->dev = dev;
@@ -696,8 +696,8 @@ static int fsl_espi_probe(struct device *dev, struct resource *mem,
 		goto err_probe;
 	}
 	/* determined by clock divider fields DIV16/PM in register SPMODEx */
-	master->min_speed_hz = DIV_ROUND_UP(espi->spibrg, 4 * 16 * 16);
-	master->max_speed_hz = DIV_ROUND_UP(espi->spibrg, 4);
+	host->min_speed_hz = DIV_ROUND_UP(espi->spibrg, 4 * 16 * 16);
+	host->max_speed_hz = DIV_ROUND_UP(espi->spibrg, 4);
 
 	init_completion(&espi->done);
 
@@ -720,7 +720,7 @@ static int fsl_espi_probe(struct device *dev, struct resource *mem,
 	pm_runtime_enable(dev);
 	pm_runtime_get_sync(dev);
 
-	ret = devm_spi_register_master(dev, master);
+	ret = devm_spi_register_controller(dev, host);
 	if (ret < 0)
 		goto err_pm;
 
@@ -736,7 +736,7 @@ err_pm:
 	pm_runtime_disable(dev);
 	pm_runtime_set_suspended(dev);
 err_probe:
-	spi_master_put(master);
+	spi_controller_put(host);
 	return ret;
 }
 
@@ -791,10 +791,10 @@ static void of_fsl_espi_remove(struct platform_device *dev)
 #ifdef CONFIG_PM_SLEEP
 static int of_fsl_espi_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
+	struct spi_controller *host = dev_get_drvdata(dev);
 	int ret;
 
-	ret = spi_master_suspend(master);
+	ret = spi_controller_suspend(host);
 	if (ret)
 		return ret;
 
@@ -803,7 +803,7 @@ static int of_fsl_espi_suspend(struct device *dev)
 
 static int of_fsl_espi_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
+	struct spi_controller *host = dev_get_drvdata(dev);
 	int ret;
 
 	fsl_espi_init_regs(dev, false);
@@ -812,7 +812,7 @@ static int of_fsl_espi_resume(struct device *dev)
 	if (ret < 0)
 		return ret;
 
-	return spi_master_resume(master);
+	return spi_controller_resume(host);
 }
 #endif /* CONFIG_PM_SLEEP */
 

From 2e2af40b57ad4464eb6c3bffa1fd500551a32c4a Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:53 +0800
Subject: [PATCH 1061/2047] spi: fsl-lpspi: switch to use modern name

Change legacy name master/target to modern name host/target

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-9-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-lpspi.c | 54 ++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index e32e8cab5aa8..11991eb12636 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -72,7 +72,7 @@
 #define CFGR1_PINCFG	(BIT(24)|BIT(25))
 #define CFGR1_PCSPOL	BIT(8)
 #define CFGR1_NOSTALL	BIT(3)
-#define CFGR1_MASTER	BIT(0)
+#define CFGR1_HOST	BIT(0)
 #define FSR_TXCOUNT	(0xFF)
 #define RSR_RXEMPTY	BIT(1)
 #define TCR_CPOL	BIT(31)
@@ -96,7 +96,7 @@ struct fsl_lpspi_data {
 	unsigned long base_phys;
 	struct clk *clk_ipg;
 	struct clk *clk_per;
-	bool is_slave;
+	bool is_target;
 	bool is_only_cs1;
 	bool is_first_byte;
 
@@ -113,7 +113,7 @@ struct fsl_lpspi_data {
 	struct lpspi_config config;
 	struct completion xfer_done;
 
-	bool slave_aborted;
+	bool target_aborted;
 
 	/* DMA */
 	bool usedma;
@@ -234,7 +234,7 @@ static void fsl_lpspi_write_tx_fifo(struct fsl_lpspi_data *fsl_lpspi)
 	}
 
 	if (txfifo_cnt < fsl_lpspi->txfifosize) {
-		if (!fsl_lpspi->is_slave) {
+		if (!fsl_lpspi->is_target) {
 			temp = readl(fsl_lpspi->base + IMX7ULP_TCR);
 			temp &= ~TCR_CONTC;
 			writel(temp, fsl_lpspi->base + IMX7ULP_TCR);
@@ -258,7 +258,7 @@ static void fsl_lpspi_set_cmd(struct fsl_lpspi_data *fsl_lpspi)
 	temp |= fsl_lpspi->config.bpw - 1;
 	temp |= (fsl_lpspi->config.mode & 0x3) << 30;
 	temp |= (fsl_lpspi->config.chip_select & 0x3) << 24;
-	if (!fsl_lpspi->is_slave) {
+	if (!fsl_lpspi->is_target) {
 		temp |= fsl_lpspi->config.prescale << 27;
 		/*
 		 * Set TCR_CONT will keep SS asserted after current transfer.
@@ -385,7 +385,7 @@ static int fsl_lpspi_config(struct fsl_lpspi_data *fsl_lpspi)
 	u32 temp;
 	int ret;
 
-	if (!fsl_lpspi->is_slave) {
+	if (!fsl_lpspi->is_target) {
 		ret = fsl_lpspi_set_bitrate(fsl_lpspi);
 		if (ret)
 			return ret;
@@ -393,8 +393,8 @@ static int fsl_lpspi_config(struct fsl_lpspi_data *fsl_lpspi)
 
 	fsl_lpspi_set_watermark(fsl_lpspi);
 
-	if (!fsl_lpspi->is_slave)
-		temp = CFGR1_MASTER;
+	if (!fsl_lpspi->is_target)
+		temp = CFGR1_HOST;
 	else
 		temp = CFGR1_PINCFG;
 	if (fsl_lpspi->config.mode & SPI_CS_HIGH)
@@ -461,12 +461,12 @@ static int fsl_lpspi_setup_transfer(struct spi_controller *controller,
 	return fsl_lpspi_config(fsl_lpspi);
 }
 
-static int fsl_lpspi_slave_abort(struct spi_controller *controller)
+static int fsl_lpspi_target_abort(struct spi_controller *controller)
 {
 	struct fsl_lpspi_data *fsl_lpspi =
 				spi_controller_get_devdata(controller);
 
-	fsl_lpspi->slave_aborted = true;
+	fsl_lpspi->target_aborted = true;
 	if (!fsl_lpspi->usedma)
 		complete(&fsl_lpspi->xfer_done);
 	else {
@@ -482,9 +482,9 @@ static int fsl_lpspi_wait_for_completion(struct spi_controller *controller)
 	struct fsl_lpspi_data *fsl_lpspi =
 				spi_controller_get_devdata(controller);
 
-	if (fsl_lpspi->is_slave) {
+	if (fsl_lpspi->is_target) {
 		if (wait_for_completion_interruptible(&fsl_lpspi->xfer_done) ||
-			fsl_lpspi->slave_aborted) {
+			fsl_lpspi->target_aborted) {
 			dev_dbg(fsl_lpspi->dev, "interrupted\n");
 			return -EINTR;
 		}
@@ -587,9 +587,9 @@ static int fsl_lpspi_dma_transfer(struct spi_controller *controller,
 	reinit_completion(&fsl_lpspi->dma_tx_completion);
 	dma_async_issue_pending(controller->dma_tx);
 
-	fsl_lpspi->slave_aborted = false;
+	fsl_lpspi->target_aborted = false;
 
-	if (!fsl_lpspi->is_slave) {
+	if (!fsl_lpspi->is_target) {
 		transfer_timeout = fsl_lpspi_calculate_timeout(fsl_lpspi,
 							       transfer->len);
 
@@ -615,7 +615,7 @@ static int fsl_lpspi_dma_transfer(struct spi_controller *controller,
 		}
 	} else {
 		if (wait_for_completion_interruptible(&fsl_lpspi->dma_tx_completion) ||
-			fsl_lpspi->slave_aborted) {
+			fsl_lpspi->target_aborted) {
 			dev_dbg(fsl_lpspi->dev,
 				"I/O Error in DMA TX interrupted\n");
 			dmaengine_terminate_all(controller->dma_tx);
@@ -625,7 +625,7 @@ static int fsl_lpspi_dma_transfer(struct spi_controller *controller,
 		}
 
 		if (wait_for_completion_interruptible(&fsl_lpspi->dma_rx_completion) ||
-			fsl_lpspi->slave_aborted) {
+			fsl_lpspi->target_aborted) {
 			dev_dbg(fsl_lpspi->dev,
 				"I/O Error in DMA RX interrupted\n");
 			dmaengine_terminate_all(controller->dma_tx);
@@ -700,7 +700,7 @@ static int fsl_lpspi_pio_transfer(struct spi_controller *controller,
 	fsl_lpspi->remain = t->len;
 
 	reinit_completion(&fsl_lpspi->xfer_done);
-	fsl_lpspi->slave_aborted = false;
+	fsl_lpspi->target_aborted = false;
 
 	fsl_lpspi_write_tx_fifo(fsl_lpspi);
 
@@ -826,15 +826,15 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 	int ret, irq;
 	u32 num_cs;
 	u32 temp;
-	bool is_slave;
+	bool is_target;
 
-	is_slave = of_property_read_bool((&pdev->dev)->of_node, "spi-slave");
-	if (is_slave)
-		controller = spi_alloc_slave(&pdev->dev,
-					sizeof(struct fsl_lpspi_data));
+	is_target = of_property_read_bool((&pdev->dev)->of_node, "spi-slave");
+	if (is_target)
+		controller = spi_alloc_target(&pdev->dev,
+					      sizeof(struct fsl_lpspi_data));
 	else
-		controller = spi_alloc_master(&pdev->dev,
-					sizeof(struct fsl_lpspi_data));
+		controller = spi_alloc_host(&pdev->dev,
+					    sizeof(struct fsl_lpspi_data));
 
 	if (!controller)
 		return -ENOMEM;
@@ -843,7 +843,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 
 	fsl_lpspi = spi_controller_get_devdata(controller);
 	fsl_lpspi->dev = &pdev->dev;
-	fsl_lpspi->is_slave = is_slave;
+	fsl_lpspi->is_target = is_target;
 	fsl_lpspi->is_only_cs1 = of_property_read_bool((&pdev->dev)->of_node,
 						"fsl,spi-only-use-cs1-sel");
 
@@ -912,8 +912,8 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
 	controller->dev.of_node = pdev->dev.of_node;
 	controller->bus_num = pdev->id;
 	controller->num_chipselect = num_cs;
-	controller->slave_abort = fsl_lpspi_slave_abort;
-	if (!fsl_lpspi->is_slave)
+	controller->target_abort = fsl_lpspi_target_abort;
+	if (!fsl_lpspi->is_target)
 		controller->use_gpio_descriptors = true;
 
 	ret = fsl_lpspi_dma_init(&pdev->dev, fsl_lpspi, controller);

From ec6a79529670bf6bea02ff134d881e165cd085ab Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:54 +0800
Subject: [PATCH 1062/2047] spi: fsl-qspi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-10-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-qspi.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c
index e3de81248893..79bac30e79af 100644
--- a/drivers/spi/spi-fsl-qspi.c
+++ b/drivers/spi/spi-fsl-qspi.c
@@ -367,7 +367,7 @@ static int fsl_qspi_check_buswidth(struct fsl_qspi *q, u8 width)
 static bool fsl_qspi_supports_op(struct spi_mem *mem,
 				 const struct spi_mem_op *op)
 {
-	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->controller);
 	int ret;
 
 	ret = fsl_qspi_check_buswidth(q, op->cmd.buswidth);
@@ -640,7 +640,7 @@ static int fsl_qspi_readl_poll_tout(struct fsl_qspi *q, void __iomem *base,
 
 static int fsl_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 {
-	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->controller);
 	void __iomem *base = q->iobase;
 	u32 addr_offset = 0;
 	int err = 0;
@@ -702,7 +702,7 @@ static int fsl_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 
 static int fsl_qspi_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
 {
-	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->controller);
 
 	if (op->data.dir == SPI_MEM_DATA_OUT) {
 		if (op->data.nbytes > q->devtype_data->txfifo)
@@ -808,7 +808,7 @@ static int fsl_qspi_default_setup(struct fsl_qspi *q)
 
 static const char *fsl_qspi_get_name(struct spi_mem *mem)
 {
-	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->master);
+	struct fsl_qspi *q = spi_controller_get_devdata(mem->spi->controller);
 	struct device *dev = &mem->spi->dev;
 	const char *name;
 
@@ -848,7 +848,7 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 	struct fsl_qspi *q;
 	int ret;
 
-	ctlr = spi_alloc_master(&pdev->dev, sizeof(*q));
+	ctlr = spi_alloc_host(&pdev->dev, sizeof(*q));
 	if (!ctlr)
 		return -ENOMEM;
 

From d32382ca5614614df0cbb91a1b7fab9c47bfef9b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:55 +0800
Subject: [PATCH 1063/2047] spi: fsl-spi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-11-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-spi.c | 76 +++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 9933842ab727..97faf984801f 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -88,7 +88,7 @@ static int fsl_spi_get_type(struct device *dev)
 
 static void fsl_spi_change_mode(struct spi_device *spi)
 {
-	struct mpc8xxx_spi *mspi = spi_master_get_devdata(spi->master);
+	struct mpc8xxx_spi *mspi = spi_controller_get_devdata(spi->controller);
 	struct spi_mpc8xxx_cs *cs = spi->controller_state;
 	struct fsl_spi_reg __iomem *reg_base = mspi->reg_base;
 	__be32 __iomem *mode = &reg_base->mode;
@@ -183,7 +183,7 @@ static int fsl_spi_setup_transfer(struct spi_device *spi,
 	u32 hz = 0;
 	struct spi_mpc8xxx_cs	*cs = spi->controller_state;
 
-	mpc8xxx_spi = spi_master_get_devdata(spi->master);
+	mpc8xxx_spi = spi_controller_get_devdata(spi->controller);
 
 	if (t) {
 		bits_per_word = t->bits_per_word;
@@ -252,7 +252,7 @@ static int fsl_spi_cpu_bufs(struct mpc8xxx_spi *mspi,
 static int fsl_spi_bufs(struct spi_device *spi, struct spi_transfer *t,
 			    bool is_dma_mapped)
 {
-	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
+	struct mpc8xxx_spi *mpc8xxx_spi = spi_controller_get_devdata(spi->controller);
 	struct fsl_spi_reg __iomem *reg_base;
 	unsigned int len = t->len;
 	u8 bits_per_word;
@@ -385,7 +385,7 @@ static int fsl_spi_setup(struct spi_device *spi)
 		spi_set_ctldata(spi, cs);
 		initial_setup = true;
 	}
-	mpc8xxx_spi = spi_master_get_devdata(spi->master);
+	mpc8xxx_spi = spi_controller_get_devdata(spi->controller);
 
 	reg_base = mpc8xxx_spi->reg_base;
 
@@ -479,7 +479,7 @@ static irqreturn_t fsl_spi_irq(s32 irq, void *context_data)
 
 static void fsl_spi_grlib_cs_control(struct spi_device *spi, bool on)
 {
-	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
+	struct mpc8xxx_spi *mpc8xxx_spi = spi_controller_get_devdata(spi->controller);
 	struct fsl_spi_reg __iomem *reg_base = mpc8xxx_spi->reg_base;
 	u32 slvsel;
 	u16 cs = spi_get_chipselect(spi, 0);
@@ -493,8 +493,8 @@ static void fsl_spi_grlib_cs_control(struct spi_device *spi, bool on)
 
 static void fsl_spi_grlib_probe(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct mpc8xxx_spi *mpc8xxx_spi = spi_controller_get_devdata(host);
 	struct fsl_spi_reg __iomem *reg_base = mpc8xxx_spi->reg_base;
 	int mbits;
 	u32 capabilities;
@@ -511,8 +511,8 @@ static void fsl_spi_grlib_probe(struct device *dev)
 		mpc8xxx_spi->native_chipselects = SPCAP_SSSZ(capabilities);
 		mpc8xxx_spi_write_reg(&reg_base->slvsel, 0xffffffff);
 	}
-	master->num_chipselect = mpc8xxx_spi->native_chipselects;
-	master->set_cs = fsl_spi_grlib_cs_control;
+	host->num_chipselect = mpc8xxx_spi->native_chipselects;
+	host->set_cs = fsl_spi_grlib_cs_control;
 }
 
 static void fsl_spi_cs_control(struct spi_device *spi, bool on)
@@ -526,35 +526,35 @@ static void fsl_spi_cs_control(struct spi_device *spi, bool on)
 	iowrite32be(on ? 0 : SPI_BOOT_SEL_BIT, pinfo->immr_spi_cs);
 }
 
-static struct spi_master *fsl_spi_probe(struct device *dev,
+static struct spi_controller *fsl_spi_probe(struct device *dev,
 		struct resource *mem, unsigned int irq)
 {
 	struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct mpc8xxx_spi *mpc8xxx_spi;
 	struct fsl_spi_reg __iomem *reg_base;
 	u32 regval;
 	int ret = 0;
 
-	master = spi_alloc_master(dev, sizeof(struct mpc8xxx_spi));
-	if (master == NULL) {
+	host = spi_alloc_host(dev, sizeof(struct mpc8xxx_spi));
+	if (host == NULL) {
 		ret = -ENOMEM;
 		goto err;
 	}
 
-	dev_set_drvdata(dev, master);
+	dev_set_drvdata(dev, host);
 
 	mpc8xxx_spi_probe(dev, mem, irq);
 
-	master->setup = fsl_spi_setup;
-	master->cleanup = fsl_spi_cleanup;
-	master->prepare_message = fsl_spi_prepare_message;
-	master->transfer_one = fsl_spi_transfer_one;
-	master->unprepare_message = fsl_spi_unprepare_message;
-	master->use_gpio_descriptors = true;
-	master->set_cs = fsl_spi_cs_control;
+	host->setup = fsl_spi_setup;
+	host->cleanup = fsl_spi_cleanup;
+	host->prepare_message = fsl_spi_prepare_message;
+	host->transfer_one = fsl_spi_transfer_one;
+	host->unprepare_message = fsl_spi_unprepare_message;
+	host->use_gpio_descriptors = true;
+	host->set_cs = fsl_spi_cs_control;
 
-	mpc8xxx_spi = spi_master_get_devdata(master);
+	mpc8xxx_spi = spi_controller_get_devdata(host);
 	mpc8xxx_spi->max_bits_per_word = 32;
 	mpc8xxx_spi->type = fsl_spi_get_type(dev);
 
@@ -572,13 +572,13 @@ static struct spi_master *fsl_spi_probe(struct device *dev,
 		fsl_spi_grlib_probe(dev);
 
 	if (mpc8xxx_spi->flags & SPI_CPM_MODE)
-		master->bits_per_word_mask =
+		host->bits_per_word_mask =
 			(SPI_BPW_RANGE_MASK(4, 8) | SPI_BPW_MASK(16) | SPI_BPW_MASK(32));
 	else
-		master->bits_per_word_mask =
+		host->bits_per_word_mask =
 			(SPI_BPW_RANGE_MASK(4, 16) | SPI_BPW_MASK(32));
 
-	master->bits_per_word_mask &=
+	host->bits_per_word_mask &=
 		SPI_BPW_RANGE_MASK(1, mpc8xxx_spi->max_bits_per_word);
 
 	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE)
@@ -615,19 +615,19 @@ static struct spi_master *fsl_spi_probe(struct device *dev,
 
 	mpc8xxx_spi_write_reg(&reg_base->mode, regval);
 
-	ret = devm_spi_register_master(dev, master);
+	ret = devm_spi_register_controller(dev, host);
 	if (ret < 0)
 		goto err_probe;
 
 	dev_info(dev, "at 0x%p (irq = %d), %s mode\n", reg_base,
 		 mpc8xxx_spi->irq, mpc8xxx_spi_strmode(mpc8xxx_spi->flags));
 
-	return master;
+	return host;
 
 err_probe:
 	fsl_spi_cpm_free(mpc8xxx_spi);
 err_cpm_init:
-	spi_master_put(master);
+	spi_controller_put(host);
 err:
 	return ERR_PTR(ret);
 }
@@ -636,7 +636,7 @@ static int of_fsl_spi_probe(struct platform_device *ofdev)
 {
 	struct device *dev = &ofdev->dev;
 	struct device_node *np = ofdev->dev.of_node;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct resource mem;
 	int irq, type;
 	int ret;
@@ -689,9 +689,9 @@ static int of_fsl_spi_probe(struct platform_device *ofdev)
 		goto unmap_out;
 	}
 
-	master = fsl_spi_probe(dev, &mem, irq);
+	host = fsl_spi_probe(dev, &mem, irq);
 
-	return PTR_ERR_OR_ZERO(master);
+	return PTR_ERR_OR_ZERO(host);
 
 unmap_out:
 #if IS_ENABLED(CONFIG_FSL_SOC)
@@ -703,8 +703,8 @@ unmap_out:
 
 static void of_fsl_spi_remove(struct platform_device *ofdev)
 {
-	struct spi_master *master = platform_get_drvdata(ofdev);
-	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(ofdev);
+	struct mpc8xxx_spi *mpc8xxx_spi = spi_controller_get_devdata(host);
 
 	fsl_spi_cpm_free(mpc8xxx_spi);
 }
@@ -730,7 +730,7 @@ static int plat_mpc8xxx_spi_probe(struct platform_device *pdev)
 {
 	struct resource *mem;
 	int irq;
-	struct spi_master *master;
+	struct spi_controller *host;
 
 	if (!dev_get_platdata(&pdev->dev))
 		return -EINVAL;
@@ -743,14 +743,14 @@ static int plat_mpc8xxx_spi_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	master = fsl_spi_probe(&pdev->dev, mem, irq);
-	return PTR_ERR_OR_ZERO(master);
+	host = fsl_spi_probe(&pdev->dev, mem, irq);
+	return PTR_ERR_OR_ZERO(host);
 }
 
 static void plat_mpc8xxx_spi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct mpc8xxx_spi *mpc8xxx_spi = spi_controller_get_devdata(host);
 
 	fsl_spi_cpm_free(mpc8xxx_spi);
 }

From 20becf43e8ca3a9ff00ed24e19695347e0a2f0e7 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:56 +0800
Subject: [PATCH 1064/2047] spi: gpio: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-12-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-gpio.c | 72 +++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 8aa905e407a8..d8db4564b406 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * SPI master driver using generic bitbanged GPIO
+ * SPI host driver using generic bitbanged GPIO
  *
  * Copyright (C) 2006,2008 David Brownell
  * Copyright (C) 2017 Linus Walleij
@@ -17,7 +17,7 @@
 
 
 /*
- * This bitbanging SPI master driver should help make systems usable
+ * This bitbanging SPI host driver should help make systems usable
  * when a native hardware SPI engine is not available, perhaps because
  * its driver isn't yet working or because the I/O pins it requires
  * are used for other purposes.
@@ -26,7 +26,7 @@
  *
  * spi->controller_state ... reserved for bitbang framework code
  *
- * spi->master->dev.driver_data ... points to spi_gpio->bitbang
+ * spi->controller->dev.driver_data ... points to spi_gpio->bitbang
  */
 
 struct spi_gpio {
@@ -77,7 +77,7 @@ spi_to_spi_gpio(const struct spi_device *spi)
 	const struct spi_bitbang	*bang;
 	struct spi_gpio			*spi_gpio;
 
-	bang = spi_master_get_devdata(spi->master);
+	bang = spi_controller_get_devdata(spi->controller);
 	spi_gpio = container_of(bang, struct spi_gpio, bitbang);
 	return spi_gpio;
 }
@@ -180,7 +180,7 @@ static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi,
 static u32 spi_gpio_spec_txrx_word_mode0(struct spi_device *spi,
 		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	flags = spi->master->flags;
+	flags = spi->controller->flags;
 	if (unlikely(spi->mode & SPI_LSB_FIRST))
 		return bitbang_txrx_le_cpha0(spi, nsecs, 0, flags, word, bits);
 	else
@@ -190,7 +190,7 @@ static u32 spi_gpio_spec_txrx_word_mode0(struct spi_device *spi,
 static u32 spi_gpio_spec_txrx_word_mode1(struct spi_device *spi,
 		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	flags = spi->master->flags;
+	flags = spi->controller->flags;
 	if (unlikely(spi->mode & SPI_LSB_FIRST))
 		return bitbang_txrx_le_cpha1(spi, nsecs, 0, flags, word, bits);
 	else
@@ -200,7 +200,7 @@ static u32 spi_gpio_spec_txrx_word_mode1(struct spi_device *spi,
 static u32 spi_gpio_spec_txrx_word_mode2(struct spi_device *spi,
 		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	flags = spi->master->flags;
+	flags = spi->controller->flags;
 	if (unlikely(spi->mode & SPI_LSB_FIRST))
 		return bitbang_txrx_le_cpha0(spi, nsecs, 1, flags, word, bits);
 	else
@@ -210,7 +210,7 @@ static u32 spi_gpio_spec_txrx_word_mode2(struct spi_device *spi,
 static u32 spi_gpio_spec_txrx_word_mode3(struct spi_device *spi,
 		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	flags = spi->master->flags;
+	flags = spi->controller->flags;
 	if (unlikely(spi->mode & SPI_LSB_FIRST))
 		return bitbang_txrx_le_cpha1(spi, nsecs, 1, flags, word, bits);
 	else
@@ -310,7 +310,7 @@ static void spi_gpio_cleanup(struct spi_device *spi)
  * On platforms which can do so, configure MISO with a weak pullup unless
  * there's an external pullup on that signal.  That saves power by avoiding
  * floating signals.  (A weak pulldown would save power too, but many
- * drivers expect to see all-ones data as the no slave "response".)
+ * drivers expect to see all-ones data as the no target "response".)
  */
 static int spi_gpio_request(struct device *dev, struct spi_gpio *spi_gpio)
 {
@@ -334,27 +334,27 @@ static const struct of_device_id spi_gpio_dt_ids[] = {
 MODULE_DEVICE_TABLE(of, spi_gpio_dt_ids);
 
 static int spi_gpio_probe_dt(struct platform_device *pdev,
-			     struct spi_master *master)
+			     struct spi_controller *host)
 {
-	master->dev.of_node = pdev->dev.of_node;
-	master->use_gpio_descriptors = true;
+	host->dev.of_node = pdev->dev.of_node;
+	host->use_gpio_descriptors = true;
 
 	return 0;
 }
 #else
 static inline int spi_gpio_probe_dt(struct platform_device *pdev,
-				    struct spi_master *master)
+				    struct spi_controller *host)
 {
 	return 0;
 }
 #endif
 
 static int spi_gpio_probe_pdata(struct platform_device *pdev,
-				struct spi_master *master)
+				struct spi_controller *host)
 {
 	struct device *dev = &pdev->dev;
 	struct spi_gpio_platform_data *pdata = dev_get_platdata(dev);
-	struct spi_gpio *spi_gpio = spi_master_get_devdata(master);
+	struct spi_gpio *spi_gpio = spi_controller_get_devdata(host);
 	int i;
 
 #ifdef GENERIC_BITBANG
@@ -362,18 +362,18 @@ static int spi_gpio_probe_pdata(struct platform_device *pdev,
 		return -ENODEV;
 #endif
 	/*
-	 * The master needs to think there is a chipselect even if not
+	 * The host needs to think there is a chipselect even if not
 	 * connected
 	 */
-	master->num_chipselect = pdata->num_chipselect ?: 1;
+	host->num_chipselect = pdata->num_chipselect ?: 1;
 
-	spi_gpio->cs_gpios = devm_kcalloc(dev, master->num_chipselect,
+	spi_gpio->cs_gpios = devm_kcalloc(dev, host->num_chipselect,
 					  sizeof(*spi_gpio->cs_gpios),
 					  GFP_KERNEL);
 	if (!spi_gpio->cs_gpios)
 		return -ENOMEM;
 
-	for (i = 0; i < master->num_chipselect; i++) {
+	for (i = 0; i < host->num_chipselect; i++) {
 		spi_gpio->cs_gpios[i] = devm_gpiod_get_index(dev, "cs", i,
 							     GPIOD_OUT_HIGH);
 		if (IS_ERR(spi_gpio->cs_gpios[i]))
@@ -386,31 +386,31 @@ static int spi_gpio_probe_pdata(struct platform_device *pdev,
 static int spi_gpio_probe(struct platform_device *pdev)
 {
 	int				status;
-	struct spi_master		*master;
+	struct spi_controller		*host;
 	struct spi_gpio			*spi_gpio;
 	struct device			*dev = &pdev->dev;
 	struct spi_bitbang		*bb;
 
-	master = devm_spi_alloc_master(dev, sizeof(*spi_gpio));
-	if (!master)
+	host = devm_spi_alloc_host(dev, sizeof(*spi_gpio));
+	if (!host)
 		return -ENOMEM;
 
 	if (pdev->dev.of_node)
-		status = spi_gpio_probe_dt(pdev, master);
+		status = spi_gpio_probe_dt(pdev, host);
 	else
-		status = spi_gpio_probe_pdata(pdev, master);
+		status = spi_gpio_probe_pdata(pdev, host);
 
 	if (status)
 		return status;
 
-	spi_gpio = spi_master_get_devdata(master);
+	spi_gpio = spi_controller_get_devdata(host);
 
 	status = spi_gpio_request(dev, spi_gpio);
 	if (status)
 		return status;
 
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
-	master->mode_bits = SPI_3WIRE | SPI_3WIRE_HIZ | SPI_CPHA | SPI_CPOL |
+	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
+	host->mode_bits = SPI_3WIRE | SPI_3WIRE_HIZ | SPI_CPHA | SPI_CPOL |
 			    SPI_CS_HIGH | SPI_LSB_FIRST;
 	if (!spi_gpio->mosi) {
 		/* HW configuration without MOSI pin
@@ -419,25 +419,25 @@ static int spi_gpio_probe(struct platform_device *pdev)
 		 * a MOSI pin connected the host can still do RX by
 		 * changing the direction of the line.
 		 */
-		master->flags = SPI_CONTROLLER_NO_TX;
+		host->flags = SPI_CONTROLLER_NO_TX;
 	}
 
-	master->bus_num = pdev->id;
-	master->setup = spi_gpio_setup;
-	master->cleanup = spi_gpio_cleanup;
+	host->bus_num = pdev->id;
+	host->setup = spi_gpio_setup;
+	host->cleanup = spi_gpio_cleanup;
 
 	bb = &spi_gpio->bitbang;
-	bb->master = master;
+	bb->master = host;
 	/*
 	 * There is some additional business, apart from driving the CS GPIO
 	 * line, that we need to do on selection. This makes the local
 	 * callback for chipselect always get called.
 	 */
-	master->flags |= SPI_CONTROLLER_GPIO_SS;
+	host->flags |= SPI_CONTROLLER_GPIO_SS;
 	bb->chipselect = spi_gpio_chipselect;
 	bb->set_line_direction = spi_gpio_set_direction;
 
-	if (master->flags & SPI_CONTROLLER_NO_TX) {
+	if (host->flags & SPI_CONTROLLER_NO_TX) {
 		bb->txrx_word[SPI_MODE_0] = spi_gpio_spec_txrx_word_mode0;
 		bb->txrx_word[SPI_MODE_1] = spi_gpio_spec_txrx_word_mode1;
 		bb->txrx_word[SPI_MODE_2] = spi_gpio_spec_txrx_word_mode2;
@@ -454,7 +454,7 @@ static int spi_gpio_probe(struct platform_device *pdev)
 	if (status)
 		return status;
 
-	return devm_spi_register_master(&pdev->dev, master);
+	return devm_spi_register_controller(&pdev->dev, host);
 }
 
 MODULE_ALIAS("platform:" DRIVER_NAME);
@@ -468,6 +468,6 @@ static struct platform_driver spi_gpio_driver = {
 };
 module_platform_driver(spi_gpio_driver);
 
-MODULE_DESCRIPTION("SPI master driver using generic bitbanged GPIO ");
+MODULE_DESCRIPTION("SPI host driver using generic bitbanged GPIO ");
 MODULE_AUTHOR("David Brownell");
 MODULE_LICENSE("GPL");

From 6588d43ae8e3046996b315cc6dcb2a7e0c2bb547 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:57 +0800
Subject: [PATCH 1065/2047] spi: gxp: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-13-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-gxp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-gxp.c b/drivers/spi/spi-gxp.c
index 88ab3032736b..fd2fac236bbd 100644
--- a/drivers/spi/spi-gxp.c
+++ b/drivers/spi/spi-gxp.c
@@ -199,7 +199,7 @@ static ssize_t gxp_spi_write(struct gxp_spi_chip *chip, const struct spi_mem_op
 
 static int do_gxp_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op)
 {
-	struct gxp_spi *spifi = spi_controller_get_devdata(mem->spi->master);
+	struct gxp_spi *spifi = spi_controller_get_devdata(mem->spi->controller);
 	struct gxp_spi_chip *chip = &spifi->chips[spi_get_chipselect(mem->spi, 0)];
 	int ret;
 
@@ -235,7 +235,7 @@ static const struct spi_controller_mem_ops gxp_spi_mem_ops = {
 
 static int gxp_spi_setup(struct spi_device *spi)
 {
-	struct gxp_spi *spifi = spi_controller_get_devdata(spi->master);
+	struct gxp_spi *spifi = spi_controller_get_devdata(spi->controller);
 	unsigned int cs = spi_get_chipselect(spi, 0);
 	struct gxp_spi_chip *chip = &spifi->chips[cs];
 
@@ -257,7 +257,7 @@ static int gxp_spifi_probe(struct platform_device *pdev)
 
 	data = of_device_get_match_data(&pdev->dev);
 
-	ctlr = devm_spi_alloc_master(dev, sizeof(*spifi));
+	ctlr = devm_spi_alloc_host(dev, sizeof(*spifi));
 	if (!ctlr)
 		return -ENOMEM;
 

From 3dc6e684b4f3854a867b5745db868d6f60466f7b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:58 +0800
Subject: [PATCH 1066/2047] spi: bcmbca-hsspi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-14-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcmbca-hsspi.c | 66 +++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/drivers/spi/spi-bcmbca-hsspi.c b/drivers/spi/spi-bcmbca-hsspi.c
index ca1b4741e9f4..9f64afd8164e 100644
--- a/drivers/spi/spi-bcmbca-hsspi.c
+++ b/drivers/spi/spi-bcmbca-hsspi.c
@@ -127,7 +127,7 @@ static ssize_t wait_mode_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
 	struct spi_controller *ctrl = dev_get_drvdata(dev);
-	struct bcmbca_hsspi *bs = spi_master_get_devdata(ctrl);
+	struct bcmbca_hsspi *bs = spi_controller_get_devdata(ctrl);
 
 	return sprintf(buf, "%d\n", bs->wait_mode);
 }
@@ -136,7 +136,7 @@ static ssize_t wait_mode_store(struct device *dev, struct device_attribute *attr
 			  const char *buf, size_t count)
 {
 	struct spi_controller *ctrl = dev_get_drvdata(dev);
-	struct bcmbca_hsspi *bs = spi_master_get_devdata(ctrl);
+	struct bcmbca_hsspi *bs = spi_controller_get_devdata(ctrl);
 	u32 val;
 
 	if (kstrtou32(buf, 10, &val))
@@ -250,7 +250,7 @@ static int bcmbca_hsspi_wait_cmd(struct bcmbca_hsspi *bs, unsigned int cs)
 static int bcmbca_hsspi_do_txrx(struct spi_device *spi, struct spi_transfer *t,
 								struct spi_message *msg)
 {
-	struct bcmbca_hsspi *bs = spi_master_get_devdata(spi->master);
+	struct bcmbca_hsspi *bs = spi_controller_get_devdata(spi->controller);
 	unsigned int chip_select = spi_get_chipselect(spi, 0);
 	u16 opcode = 0, val;
 	int pending = t->len;
@@ -328,7 +328,7 @@ static int bcmbca_hsspi_do_txrx(struct spi_device *spi, struct spi_transfer *t,
 
 static int bcmbca_hsspi_setup(struct spi_device *spi)
 {
-	struct bcmbca_hsspi *bs = spi_master_get_devdata(spi->master);
+	struct bcmbca_hsspi *bs = spi_controller_get_devdata(spi->controller);
 	u32 reg;
 
 	reg = __raw_readl(bs->regs +
@@ -366,10 +366,10 @@ static int bcmbca_hsspi_setup(struct spi_device *spi)
 	return 0;
 }
 
-static int bcmbca_hsspi_transfer_one(struct spi_master *master,
+static int bcmbca_hsspi_transfer_one(struct spi_controller *host,
 				      struct spi_message *msg)
 {
-	struct bcmbca_hsspi *bs = spi_master_get_devdata(master);
+	struct bcmbca_hsspi *bs = spi_controller_get_devdata(host);
 	struct spi_transfer *t;
 	struct spi_device *spi = msg->spi;
 	int status = -EINVAL;
@@ -409,7 +409,7 @@ static int bcmbca_hsspi_transfer_one(struct spi_master *master,
 		bcmbca_hsspi_set_cs(bs, spi_get_chipselect(spi, 0), false);
 
 	msg->status = status;
-	spi_finalize_current_message(master);
+	spi_finalize_current_message(host);
 
 	return 0;
 }
@@ -431,7 +431,7 @@ static irqreturn_t bcmbca_hsspi_interrupt(int irq, void *dev_id)
 
 static int bcmbca_hsspi_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct bcmbca_hsspi *bs;
 	struct resource *res_mem;
 	void __iomem *spim_ctrl;
@@ -487,13 +487,13 @@ static int bcmbca_hsspi_probe(struct platform_device *pdev)
 		}
 	}
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*bs));
-	if (!master) {
+	host = spi_alloc_host(&pdev->dev, sizeof(*bs));
+	if (!host) {
 		ret = -ENOMEM;
 		goto out_disable_pll_clk;
 	}
 
-	bs = spi_master_get_devdata(master);
+	bs = spi_controller_get_devdata(host);
 	bs->pdev = pdev;
 	bs->clk = clk;
 	bs->pll_clk = pll_clk;
@@ -507,9 +507,9 @@ static int bcmbca_hsspi_probe(struct platform_device *pdev)
 	mutex_init(&bs->msg_mutex);
 	init_completion(&bs->done);
 
-	master->dev.of_node = dev->of_node;
+	host->dev.of_node = dev->of_node;
 	if (!dev->of_node)
-		master->bus_num = HSSPI_BUS_NUM;
+		host->bus_num = HSSPI_BUS_NUM;
 
 	of_property_read_u32(dev->of_node, "num-cs", &num_cs);
 	if (num_cs > 8) {
@@ -517,15 +517,15 @@ static int bcmbca_hsspi_probe(struct platform_device *pdev)
 			 num_cs);
 		num_cs = HSSPI_SPI_MAX_CS;
 	}
-	master->num_chipselect = num_cs;
-	master->setup = bcmbca_hsspi_setup;
-	master->transfer_one_message = bcmbca_hsspi_transfer_one;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH |
-	    SPI_RX_DUAL | SPI_TX_DUAL;
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->auto_runtime_pm = true;
+	host->num_chipselect = num_cs;
+	host->setup = bcmbca_hsspi_setup;
+	host->transfer_one_message = bcmbca_hsspi_transfer_one;
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH |
+			  SPI_RX_DUAL | SPI_TX_DUAL;
+	host->bits_per_word_mask = SPI_BPW_MASK(8);
+	host->auto_runtime_pm = true;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
 	/* Initialize the hardware */
 	__raw_writel(0, bs->regs + HSSPI_INT_MASK_REG);
@@ -543,7 +543,7 @@ static int bcmbca_hsspi_probe(struct platform_device *pdev)
 		ret = devm_request_irq(dev, irq, bcmbca_hsspi_interrupt, IRQF_SHARED,
 			       pdev->name, bs);
 		if (ret)
-			goto out_put_master;
+			goto out_put_host;
 	}
 
 	pm_runtime_enable(&pdev->dev);
@@ -555,7 +555,7 @@ static int bcmbca_hsspi_probe(struct platform_device *pdev)
 	}
 
 	/* register and we are done */
-	ret = devm_spi_register_master(dev, master);
+	ret = devm_spi_register_controller(dev, host);
 	if (ret)
 		goto out_sysgroup_disable;
 
@@ -567,8 +567,8 @@ out_sysgroup_disable:
 	sysfs_remove_group(&pdev->dev.kobj, &bcmbca_hsspi_group);
 out_pm_disable:
 	pm_runtime_disable(&pdev->dev);
-out_put_master:
-	spi_master_put(master);
+out_put_host:
+	spi_controller_put(host);
 out_disable_pll_clk:
 	clk_disable_unprepare(pll_clk);
 out_disable_clk:
@@ -578,8 +578,8 @@ out_disable_clk:
 
 static void bcmbca_hsspi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct bcmbca_hsspi *bs = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct bcmbca_hsspi *bs = spi_controller_get_devdata(host);
 
 	/* reset the hardware and block queue progress */
 	__raw_writel(0, bs->regs + HSSPI_INT_MASK_REG);
@@ -591,10 +591,10 @@ static void bcmbca_hsspi_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int bcmbca_hsspi_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct bcmbca_hsspi *bs = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct bcmbca_hsspi *bs = spi_controller_get_devdata(host);
 
-	spi_master_suspend(master);
+	spi_controller_suspend(host);
 	clk_disable_unprepare(bs->pll_clk);
 	clk_disable_unprepare(bs->clk);
 
@@ -603,8 +603,8 @@ static int bcmbca_hsspi_suspend(struct device *dev)
 
 static int bcmbca_hsspi_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct bcmbca_hsspi *bs = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct bcmbca_hsspi *bs = spi_controller_get_devdata(host);
 	int ret;
 
 	ret = clk_prepare_enable(bs->clk);
@@ -619,7 +619,7 @@ static int bcmbca_hsspi_resume(struct device *dev)
 		}
 	}
 
-	spi_master_resume(master);
+	spi_controller_resume(host);
 
 	return 0;
 }

From f64e6ee3725ec41049a2c724b624e19f87192c7d Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:40:59 +0800
Subject: [PATCH 1067/2047] spi: hisi-sfc-v3xx: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-15-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-hisi-sfc-v3xx.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/spi/spi-hisi-sfc-v3xx.c b/drivers/spi/spi-hisi-sfc-v3xx.c
index 7cbcb065bb44..9d22018f7985 100644
--- a/drivers/spi/spi-hisi-sfc-v3xx.c
+++ b/drivers/spi/spi-hisi-sfc-v3xx.c
@@ -152,7 +152,7 @@ static int hisi_sfc_v3xx_adjust_op_size(struct spi_mem *mem,
 	uintptr_t addr = (uintptr_t)op->data.buf.in;
 	int max_byte_count;
 
-	host = spi_controller_get_devdata(spi->master);
+	host = spi_controller_get_devdata(spi->controller);
 
 	max_byte_count = host->max_cmd_dword * 4;
 
@@ -174,7 +174,7 @@ static bool hisi_sfc_v3xx_supports_op(struct spi_mem *mem,
 	struct spi_device *spi = mem->spi;
 	struct hisi_sfc_v3xx_host *host;
 
-	host = spi_controller_get_devdata(spi->master);
+	host = spi_controller_get_devdata(spi->controller);
 
 	if (op->data.buswidth > 4 || op->dummy.buswidth > 4 ||
 	    op->addr.buswidth > 4 || op->cmd.buswidth > 4)
@@ -363,7 +363,7 @@ static int hisi_sfc_v3xx_exec_op(struct spi_mem *mem,
 	struct spi_device *spi = mem->spi;
 	u8 chip_select = spi_get_chipselect(spi, 0);
 
-	host = spi_controller_get_devdata(spi->master);
+	host = spi_controller_get_devdata(spi->controller);
 
 	return hisi_sfc_v3xx_generic_exec_op(host, op, chip_select);
 }
@@ -431,7 +431,7 @@ static int hisi_sfc_v3xx_probe(struct platform_device *pdev)
 	u32 version, glb_config;
 	int ret;
 
-	ctlr = spi_alloc_master(&pdev->dev, sizeof(*host));
+	ctlr = spi_alloc_host(&pdev->dev, sizeof(*host));
 	if (!ctlr)
 		return -ENOMEM;
 
@@ -448,13 +448,13 @@ static int hisi_sfc_v3xx_probe(struct platform_device *pdev)
 	host->regbase = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(host->regbase)) {
 		ret = PTR_ERR(host->regbase);
-		goto err_put_master;
+		goto err_put_host;
 	}
 
 	host->irq = platform_get_irq_optional(pdev, 0);
 	if (host->irq == -EPROBE_DEFER) {
 		ret = -EPROBE_DEFER;
-		goto err_put_master;
+		goto err_put_host;
 	}
 
 	hisi_sfc_v3xx_disable_int(host);
@@ -496,15 +496,15 @@ static int hisi_sfc_v3xx_probe(struct platform_device *pdev)
 
 	ret = devm_spi_register_controller(dev, ctlr);
 	if (ret)
-		goto err_put_master;
+		goto err_put_host;
 
 	dev_info(&pdev->dev, "hw version 0x%x, %s mode.\n",
 		 version, host->irq ? "irq" : "polling");
 
 	return 0;
 
-err_put_master:
-	spi_master_put(ctlr);
+err_put_host:
+	spi_controller_put(ctlr);
 	return ret;
 }
 

From d6e19216e8167adbd6c44f5e0f8aa127267a8b24 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:41:00 +0800
Subject: [PATCH 1068/2047] spi: img-spfi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-16-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-img-spfi.c | 118 ++++++++++++++++++-------------------
 1 file changed, 59 insertions(+), 59 deletions(-)

diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c
index d775f87770e3..d8360f94d3b7 100644
--- a/drivers/spi/spi-img-spfi.c
+++ b/drivers/spi/spi-img-spfi.c
@@ -86,7 +86,7 @@
 
 struct img_spfi {
 	struct device *dev;
-	struct spi_master *master;
+	struct spi_controller *host;
 	spinlock_t lock;
 
 	void __iomem *regs;
@@ -221,11 +221,11 @@ static unsigned int spfi_pio_read8(struct img_spfi *spfi, u8 *buf,
 	return count;
 }
 
-static int img_spfi_start_pio(struct spi_master *master,
+static int img_spfi_start_pio(struct spi_controller *host,
 			       struct spi_device *spi,
 			       struct spi_transfer *xfer)
 {
-	struct img_spfi *spfi = spi_master_get_devdata(spi->master);
+	struct img_spfi *spfi = spi_controller_get_devdata(spi->controller);
 	unsigned int tx_bytes = 0, rx_bytes = 0;
 	const void *tx_buf = xfer->tx_buf;
 	void *rx_buf = xfer->rx_buf;
@@ -285,7 +285,7 @@ static void img_spfi_dma_rx_cb(void *data)
 	spin_lock_irqsave(&spfi->lock, flags);
 	spfi->rx_dma_busy = false;
 	if (!spfi->tx_dma_busy)
-		spi_finalize_current_transfer(spfi->master);
+		spi_finalize_current_transfer(spfi->host);
 	spin_unlock_irqrestore(&spfi->lock, flags);
 }
 
@@ -299,15 +299,15 @@ static void img_spfi_dma_tx_cb(void *data)
 	spin_lock_irqsave(&spfi->lock, flags);
 	spfi->tx_dma_busy = false;
 	if (!spfi->rx_dma_busy)
-		spi_finalize_current_transfer(spfi->master);
+		spi_finalize_current_transfer(spfi->host);
 	spin_unlock_irqrestore(&spfi->lock, flags);
 }
 
-static int img_spfi_start_dma(struct spi_master *master,
+static int img_spfi_start_dma(struct spi_controller *host,
 			      struct spi_device *spi,
 			      struct spi_transfer *xfer)
 {
-	struct img_spfi *spfi = spi_master_get_devdata(spi->master);
+	struct img_spfi *spfi = spi_controller_get_devdata(spi->controller);
 	struct dma_async_tx_descriptor *rxdesc = NULL, *txdesc = NULL;
 	struct dma_slave_config rxconf, txconf;
 
@@ -384,10 +384,10 @@ stop_dma:
 	return -EIO;
 }
 
-static void img_spfi_handle_err(struct spi_master *master,
+static void img_spfi_handle_err(struct spi_controller *host,
 				struct spi_message *msg)
 {
-	struct img_spfi *spfi = spi_master_get_devdata(master);
+	struct img_spfi *spfi = spi_controller_get_devdata(host);
 	unsigned long flags;
 
 	/*
@@ -405,9 +405,9 @@ static void img_spfi_handle_err(struct spi_master *master,
 	spin_unlock_irqrestore(&spfi->lock, flags);
 }
 
-static int img_spfi_prepare(struct spi_master *master, struct spi_message *msg)
+static int img_spfi_prepare(struct spi_controller *host, struct spi_message *msg)
 {
-	struct img_spfi *spfi = spi_master_get_devdata(master);
+	struct img_spfi *spfi = spi_controller_get_devdata(host);
 	u32 val;
 
 	val = spfi_readl(spfi, SPFI_PORT_STATE);
@@ -427,20 +427,20 @@ static int img_spfi_prepare(struct spi_master *master, struct spi_message *msg)
 	return 0;
 }
 
-static int img_spfi_unprepare(struct spi_master *master,
+static int img_spfi_unprepare(struct spi_controller *host,
 			      struct spi_message *msg)
 {
-	struct img_spfi *spfi = spi_master_get_devdata(master);
+	struct img_spfi *spfi = spi_controller_get_devdata(host);
 
 	spfi_reset(spfi);
 
 	return 0;
 }
 
-static void img_spfi_config(struct spi_master *master, struct spi_device *spi,
+static void img_spfi_config(struct spi_controller *host, struct spi_device *spi,
 			    struct spi_transfer *xfer)
 {
-	struct img_spfi *spfi = spi_master_get_devdata(spi->master);
+	struct img_spfi *spfi = spi_controller_get_devdata(spi->controller);
 	u32 val, div;
 
 	/*
@@ -476,11 +476,11 @@ static void img_spfi_config(struct spi_master *master, struct spi_device *spi,
 	spfi_writel(spfi, val, SPFI_CONTROL);
 }
 
-static int img_spfi_transfer_one(struct spi_master *master,
+static int img_spfi_transfer_one(struct spi_controller *host,
 				 struct spi_device *spi,
 				 struct spi_transfer *xfer)
 {
-	struct img_spfi *spfi = spi_master_get_devdata(spi->master);
+	struct img_spfi *spfi = spi_controller_get_devdata(spi->controller);
 	int ret;
 
 	if (xfer->len > SPFI_TRANSACTION_TSIZE_MASK) {
@@ -490,16 +490,16 @@ static int img_spfi_transfer_one(struct spi_master *master,
 		return -EINVAL;
 	}
 
-	img_spfi_config(master, spi, xfer);
-	if (master->can_dma && master->can_dma(master, spi, xfer))
-		ret = img_spfi_start_dma(master, spi, xfer);
+	img_spfi_config(host, spi, xfer);
+	if (host->can_dma && host->can_dma(host, spi, xfer))
+		ret = img_spfi_start_dma(host, spi, xfer);
 	else
-		ret = img_spfi_start_pio(master, spi, xfer);
+		ret = img_spfi_start_pio(host, spi, xfer);
 
 	return ret;
 }
 
-static bool img_spfi_can_dma(struct spi_master *master, struct spi_device *spi,
+static bool img_spfi_can_dma(struct spi_controller *host, struct spi_device *spi,
 			     struct spi_transfer *xfer)
 {
 	if (xfer->len > SPFI_32BIT_FIFO_SIZE)
@@ -524,20 +524,20 @@ static irqreturn_t img_spfi_irq(int irq, void *dev_id)
 
 static int img_spfi_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct img_spfi *spfi;
 	struct resource *res;
 	int ret;
 	u32 max_speed_hz;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*spfi));
-	if (!master)
+	host = spi_alloc_host(&pdev->dev, sizeof(*spfi));
+	if (!host)
 		return -ENOMEM;
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
-	spfi = spi_master_get_devdata(master);
+	spfi = spi_controller_get_devdata(host);
 	spfi->dev = &pdev->dev;
-	spfi->master = master;
+	spfi->host = host;
 	spin_lock_init(&spfi->lock);
 
 	spfi->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
@@ -582,15 +582,15 @@ static int img_spfi_probe(struct platform_device *pdev)
 	 */
 	spfi_writel(spfi, SPFI_INTERRUPT_IACCESS, SPFI_INTERRUPT_ENABLE);
 
-	master->auto_runtime_pm = true;
-	master->bus_num = pdev->id;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_TX_DUAL | SPI_RX_DUAL;
+	host->auto_runtime_pm = true;
+	host->bus_num = pdev->id;
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_TX_DUAL | SPI_RX_DUAL;
 	if (of_property_read_bool(spfi->dev->of_node, "img,supports-quad-mode"))
-		master->mode_bits |= SPI_TX_QUAD | SPI_RX_QUAD;
-	master->dev.of_node = pdev->dev.of_node;
-	master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(8);
-	master->max_speed_hz = clk_get_rate(spfi->spfi_clk) / 4;
-	master->min_speed_hz = clk_get_rate(spfi->spfi_clk) / 512;
+		host->mode_bits |= SPI_TX_QUAD | SPI_RX_QUAD;
+	host->dev.of_node = pdev->dev.of_node;
+	host->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(8);
+	host->max_speed_hz = clk_get_rate(spfi->spfi_clk) / 4;
+	host->min_speed_hz = clk_get_rate(spfi->spfi_clk) / 512;
 
 	/*
 	 * Maximum speed supported by spfi is limited to the lower value
@@ -601,15 +601,15 @@ static int img_spfi_probe(struct platform_device *pdev)
 	 */
 	if (!of_property_read_u32(spfi->dev->of_node, "spfi-max-frequency",
 				  &max_speed_hz)) {
-		if (master->max_speed_hz > max_speed_hz)
-			master->max_speed_hz = max_speed_hz;
+		if (host->max_speed_hz > max_speed_hz)
+			host->max_speed_hz = max_speed_hz;
 	}
 
-	master->transfer_one = img_spfi_transfer_one;
-	master->prepare_message = img_spfi_prepare;
-	master->unprepare_message = img_spfi_unprepare;
-	master->handle_err = img_spfi_handle_err;
-	master->use_gpio_descriptors = true;
+	host->transfer_one = img_spfi_transfer_one;
+	host->prepare_message = img_spfi_prepare;
+	host->unprepare_message = img_spfi_unprepare;
+	host->handle_err = img_spfi_handle_err;
+	host->use_gpio_descriptors = true;
 
 	spfi->tx_ch = dma_request_chan(spfi->dev, "tx");
 	if (IS_ERR(spfi->tx_ch)) {
@@ -636,15 +636,15 @@ static int img_spfi_probe(struct platform_device *pdev)
 		spfi->rx_ch = NULL;
 		dev_warn(spfi->dev, "Failed to get DMA channels, falling back to PIO mode\n");
 	} else {
-		master->dma_tx = spfi->tx_ch;
-		master->dma_rx = spfi->rx_ch;
-		master->can_dma = img_spfi_can_dma;
+		host->dma_tx = spfi->tx_ch;
+		host->dma_rx = spfi->rx_ch;
+		host->can_dma = img_spfi_can_dma;
 	}
 
 	pm_runtime_set_active(spfi->dev);
 	pm_runtime_enable(spfi->dev);
 
-	ret = devm_spi_register_master(spfi->dev, master);
+	ret = devm_spi_register_controller(spfi->dev, host);
 	if (ret)
 		goto disable_pm;
 
@@ -660,15 +660,15 @@ disable_pm:
 disable_pclk:
 	clk_disable_unprepare(spfi->sys_clk);
 put_spi:
-	spi_master_put(master);
+	spi_controller_put(host);
 
 	return ret;
 }
 
 static void img_spfi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct img_spfi *spfi = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct img_spfi *spfi = spi_controller_get_devdata(host);
 
 	if (spfi->tx_ch)
 		dma_release_channel(spfi->tx_ch);
@@ -685,8 +685,8 @@ static void img_spfi_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM
 static int img_spfi_runtime_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct img_spfi *spfi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct img_spfi *spfi = spi_controller_get_devdata(host);
 
 	clk_disable_unprepare(spfi->spfi_clk);
 	clk_disable_unprepare(spfi->sys_clk);
@@ -696,8 +696,8 @@ static int img_spfi_runtime_suspend(struct device *dev)
 
 static int img_spfi_runtime_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct img_spfi *spfi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct img_spfi *spfi = spi_controller_get_devdata(host);
 	int ret;
 
 	ret = clk_prepare_enable(spfi->sys_clk);
@@ -716,15 +716,15 @@ static int img_spfi_runtime_resume(struct device *dev)
 #ifdef CONFIG_PM_SLEEP
 static int img_spfi_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
+	struct spi_controller *host = dev_get_drvdata(dev);
 
-	return spi_master_suspend(master);
+	return spi_controller_suspend(host);
 }
 
 static int img_spfi_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct img_spfi *spfi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct img_spfi *spfi = spi_controller_get_devdata(host);
 	int ret;
 
 	ret = pm_runtime_resume_and_get(dev);
@@ -733,7 +733,7 @@ static int img_spfi_resume(struct device *dev)
 	spfi_reset(spfi);
 	pm_runtime_put(dev);
 
-	return spi_master_resume(master);
+	return spi_controller_resume(host);
 }
 #endif /* CONFIG_PM_SLEEP */
 

From 756d5bf0770662ebf14f0eb274d862be5cf3e044 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:41:01 +0800
Subject: [PATCH 1069/2047] spi: imx: switch to use modern name

Change legacy name master/slave to modern name host/target.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-17-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-imx.c | 114 +++++++++++++++++++++---------------------
 1 file changed, 57 insertions(+), 57 deletions(-)

diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 3634fe8f6d68..a8a74c7cb79f 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -52,7 +52,7 @@ MODULE_PARM_DESC(polling_limit_us,
 /* The maximum bytes that a sdma BD can transfer. */
 #define MAX_SDMA_BD_BYTES (1 << 15)
 #define MX51_ECSPI_CTRL_MAX_BURST	512
-/* The maximum bytes that IMX53_ECSPI can transfer in slave mode.*/
+/* The maximum bytes that IMX53_ECSPI can transfer in target mode.*/
 #define MX53_MAX_TRANSFER_BYTES		512
 
 enum spi_imx_devtype {
@@ -77,7 +77,7 @@ struct spi_imx_devtype_data {
 	void (*setup_wml)(struct spi_imx_data *spi_imx);
 	void (*disable)(struct spi_imx_data *spi_imx);
 	bool has_dmamode;
-	bool has_slavemode;
+	bool has_targetmode;
 	unsigned int fifo_size;
 	bool dynamic_burst;
 	/*
@@ -113,10 +113,10 @@ struct spi_imx_data {
 	unsigned int dynamic_burst;
 	bool rx_only;
 
-	/* Slave mode */
-	bool slave_mode;
-	bool slave_aborted;
-	unsigned int slave_burst;
+	/* Target mode */
+	bool target_mode;
+	bool target_aborted;
+	unsigned int target_burst;
 
 	/* DMA */
 	bool usedma;
@@ -240,7 +240,7 @@ static bool spi_imx_can_dma(struct spi_controller *controller, struct spi_device
 	if (!controller->dma_rx)
 		return false;
 
-	if (spi_imx->slave_mode)
+	if (spi_imx->target_mode)
 		return false;
 
 	if (transfer->len < spi_imx->devtype_data->fifo_size)
@@ -404,12 +404,12 @@ static void spi_imx_buf_tx_swap(struct spi_imx_data *spi_imx)
 	writel(val, spi_imx->base + MXC_CSPITXDATA);
 }
 
-static void mx53_ecspi_rx_slave(struct spi_imx_data *spi_imx)
+static void mx53_ecspi_rx_target(struct spi_imx_data *spi_imx)
 {
 	u32 val = be32_to_cpu(readl(spi_imx->base + MXC_CSPIRXDATA));
 
 	if (spi_imx->rx_buf) {
-		int n_bytes = spi_imx->slave_burst % sizeof(val);
+		int n_bytes = spi_imx->target_burst % sizeof(val);
 
 		if (!n_bytes)
 			n_bytes = sizeof(val);
@@ -418,13 +418,13 @@ static void mx53_ecspi_rx_slave(struct spi_imx_data *spi_imx)
 		       ((u8 *)&val) + sizeof(val) - n_bytes, n_bytes);
 
 		spi_imx->rx_buf += n_bytes;
-		spi_imx->slave_burst -= n_bytes;
+		spi_imx->target_burst -= n_bytes;
 	}
 
 	spi_imx->remainder -= sizeof(u32);
 }
 
-static void mx53_ecspi_tx_slave(struct spi_imx_data *spi_imx)
+static void mx53_ecspi_tx_target(struct spi_imx_data *spi_imx)
 {
 	u32 val = 0;
 	int n_bytes = spi_imx->count % sizeof(val);
@@ -535,8 +535,8 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx,
 	u32 current_cfg = cfg;
 	int channel = mx51_ecspi_channel(spi);
 
-	/* set Master or Slave mode */
-	if (spi_imx->slave_mode)
+	/* set Host or Target mode */
+	if (spi_imx->target_mode)
 		ctrl &= ~MX51_ECSPI_CTRL_MODE_MASK;
 	else
 		ctrl |= MX51_ECSPI_CTRL_MODE_MASK;
@@ -564,11 +564,11 @@ static int mx51_ecspi_prepare_message(struct spi_imx_data *spi_imx,
 	writel(testreg, spi_imx->base + MX51_ECSPI_TESTREG);
 
 	/*
-	 * eCSPI burst completion by Chip Select signal in Slave mode
+	 * eCSPI burst completion by Chip Select signal in Target mode
 	 * is not functional for imx53 Soc, config SPI burst completed when
 	 * BURST_LENGTH + 1 bits are received
 	 */
-	if (spi_imx->slave_mode && is_imx53_ecspi(spi_imx))
+	if (spi_imx->target_mode && is_imx53_ecspi(spi_imx))
 		cfg &= ~MX51_ECSPI_CONFIG_SBBCTRL(channel);
 	else
 		cfg |= MX51_ECSPI_CONFIG_SBBCTRL(channel);
@@ -655,8 +655,8 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx,
 
 	/* Clear BL field and set the right value */
 	ctrl &= ~MX51_ECSPI_CTRL_BL_MASK;
-	if (spi_imx->slave_mode && is_imx53_ecspi(spi_imx))
-		ctrl |= (spi_imx->slave_burst * 8 - 1)
+	if (spi_imx->target_mode && is_imx53_ecspi(spi_imx))
+		ctrl |= (spi_imx->target_burst * 8 - 1)
 			<< MX51_ECSPI_CTRL_BL_OFFSET;
 	else {
 		if (spi_imx->count >= 512)
@@ -721,7 +721,7 @@ static void mx51_ecspi_reset(struct spi_imx_data *spi_imx)
 #define MX31_INTREG_RREN	(1 << 3)
 
 #define MX31_CSPICTRL_ENABLE	(1 << 0)
-#define MX31_CSPICTRL_MASTER	(1 << 1)
+#define MX31_CSPICTRL_HOST	(1 << 1)
 #define MX31_CSPICTRL_XCH	(1 << 2)
 #define MX31_CSPICTRL_SMC	(1 << 3)
 #define MX31_CSPICTRL_POL	(1 << 4)
@@ -778,7 +778,7 @@ static int mx31_prepare_message(struct spi_imx_data *spi_imx,
 static int mx31_prepare_transfer(struct spi_imx_data *spi_imx,
 				 struct spi_device *spi)
 {
-	unsigned int reg = MX31_CSPICTRL_ENABLE | MX31_CSPICTRL_MASTER;
+	unsigned int reg = MX31_CSPICTRL_ENABLE | MX31_CSPICTRL_HOST;
 	unsigned int clk;
 
 	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, spi_imx->spi_bus_clk, &clk) <<
@@ -848,7 +848,7 @@ static void mx31_reset(struct spi_imx_data *spi_imx)
 #define MX21_CSPICTRL_SSPOL	(1 << 8)
 #define MX21_CSPICTRL_XCH	(1 << 9)
 #define MX21_CSPICTRL_ENABLE	(1 << 10)
-#define MX21_CSPICTRL_MASTER	(1 << 11)
+#define MX21_CSPICTRL_HOST	(1 << 11)
 #define MX21_CSPICTRL_DR_SHIFT	14
 #define MX21_CSPICTRL_CS_SHIFT	19
 
@@ -882,7 +882,7 @@ static int mx21_prepare_message(struct spi_imx_data *spi_imx,
 static int mx21_prepare_transfer(struct spi_imx_data *spi_imx,
 				 struct spi_device *spi)
 {
-	unsigned int reg = MX21_CSPICTRL_ENABLE | MX21_CSPICTRL_MASTER;
+	unsigned int reg = MX21_CSPICTRL_ENABLE | MX21_CSPICTRL_HOST;
 	unsigned int max = is_imx27_cspi(spi_imx) ? 16 : 18;
 	unsigned int clk;
 
@@ -924,7 +924,7 @@ static void mx21_reset(struct spi_imx_data *spi_imx)
 #define MX1_CSPICTRL_PHA	(1 << 5)
 #define MX1_CSPICTRL_XCH	(1 << 8)
 #define MX1_CSPICTRL_ENABLE	(1 << 9)
-#define MX1_CSPICTRL_MASTER	(1 << 10)
+#define MX1_CSPICTRL_HOST	(1 << 10)
 #define MX1_CSPICTRL_DR_SHIFT	13
 
 static void mx1_intctrl(struct spi_imx_data *spi_imx, int enable)
@@ -957,7 +957,7 @@ static int mx1_prepare_message(struct spi_imx_data *spi_imx,
 static int mx1_prepare_transfer(struct spi_imx_data *spi_imx,
 				struct spi_device *spi)
 {
-	unsigned int reg = MX1_CSPICTRL_ENABLE | MX1_CSPICTRL_MASTER;
+	unsigned int reg = MX1_CSPICTRL_ENABLE | MX1_CSPICTRL_HOST;
 	unsigned int clk;
 
 	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, spi_imx->spi_bus_clk, &clk) <<
@@ -996,7 +996,7 @@ static struct spi_imx_devtype_data imx1_cspi_devtype_data = {
 	.fifo_size = 8,
 	.has_dmamode = false,
 	.dynamic_burst = false,
-	.has_slavemode = false,
+	.has_targetmode = false,
 	.devtype = IMX1_CSPI,
 };
 
@@ -1010,7 +1010,7 @@ static struct spi_imx_devtype_data imx21_cspi_devtype_data = {
 	.fifo_size = 8,
 	.has_dmamode = false,
 	.dynamic_burst = false,
-	.has_slavemode = false,
+	.has_targetmode = false,
 	.devtype = IMX21_CSPI,
 };
 
@@ -1025,7 +1025,7 @@ static struct spi_imx_devtype_data imx27_cspi_devtype_data = {
 	.fifo_size = 8,
 	.has_dmamode = false,
 	.dynamic_burst = false,
-	.has_slavemode = false,
+	.has_targetmode = false,
 	.devtype = IMX27_CSPI,
 };
 
@@ -1039,7 +1039,7 @@ static struct spi_imx_devtype_data imx31_cspi_devtype_data = {
 	.fifo_size = 8,
 	.has_dmamode = false,
 	.dynamic_burst = false,
-	.has_slavemode = false,
+	.has_targetmode = false,
 	.devtype = IMX31_CSPI,
 };
 
@@ -1054,7 +1054,7 @@ static struct spi_imx_devtype_data imx35_cspi_devtype_data = {
 	.fifo_size = 8,
 	.has_dmamode = true,
 	.dynamic_burst = false,
-	.has_slavemode = false,
+	.has_targetmode = false,
 	.devtype = IMX35_CSPI,
 };
 
@@ -1069,7 +1069,7 @@ static struct spi_imx_devtype_data imx51_ecspi_devtype_data = {
 	.fifo_size = 64,
 	.has_dmamode = true,
 	.dynamic_burst = true,
-	.has_slavemode = true,
+	.has_targetmode = true,
 	.disable = mx51_ecspi_disable,
 	.devtype = IMX51_ECSPI,
 };
@@ -1083,7 +1083,7 @@ static struct spi_imx_devtype_data imx53_ecspi_devtype_data = {
 	.reset = mx51_ecspi_reset,
 	.fifo_size = 64,
 	.has_dmamode = true,
-	.has_slavemode = true,
+	.has_targetmode = true,
 	.disable = mx51_ecspi_disable,
 	.devtype = IMX53_ECSPI,
 };
@@ -1099,7 +1099,7 @@ static struct spi_imx_devtype_data imx6ul_ecspi_devtype_data = {
 	.fifo_size = 64,
 	.has_dmamode = true,
 	.dynamic_burst = true,
-	.has_slavemode = true,
+	.has_targetmode = true,
 	.tx_glitch_fixed = true,
 	.disable = mx51_ecspi_disable,
 	.devtype = IMX51_ECSPI,
@@ -1164,7 +1164,7 @@ static void spi_imx_push(struct spi_imx_data *spi_imx)
 		spi_imx->txfifo++;
 	}
 
-	if (!spi_imx->slave_mode)
+	if (!spi_imx->target_mode)
 		spi_imx->devtype_data->trigger(spi_imx);
 }
 
@@ -1268,7 +1268,7 @@ static int spi_imx_setupxfer(struct spi_device *spi,
 	 * words, we have to use multiple word-size bursts, we can't use
 	 * dynamic_burst in that case.
 	 */
-	if (spi_imx->devtype_data->dynamic_burst && !spi_imx->slave_mode &&
+	if (spi_imx->devtype_data->dynamic_burst && !spi_imx->target_mode &&
 	    !(spi->mode & SPI_CS_WORD) &&
 	    (spi_imx->bits_per_word == 8 ||
 	    spi_imx->bits_per_word == 16 ||
@@ -1300,10 +1300,10 @@ static int spi_imx_setupxfer(struct spi_device *spi,
 	spi_imx->rx_only = ((t->tx_buf == NULL)
 			|| (t->tx_buf == spi->controller->dummy_tx));
 
-	if (is_imx53_ecspi(spi_imx) && spi_imx->slave_mode) {
-		spi_imx->rx = mx53_ecspi_rx_slave;
-		spi_imx->tx = mx53_ecspi_tx_slave;
-		spi_imx->slave_burst = t->len;
+	if (is_imx53_ecspi(spi_imx) && spi_imx->target_mode) {
+		spi_imx->rx = mx53_ecspi_rx_target;
+		spi_imx->tx = mx53_ecspi_tx_target;
+		spi_imx->target_burst = t->len;
 	}
 
 	spi_imx->devtype_data->prepare_transfer(spi_imx, spi);
@@ -1568,8 +1568,8 @@ static int spi_imx_poll_transfer(struct spi_device *spi,
 	return 0;
 }
 
-static int spi_imx_pio_transfer_slave(struct spi_device *spi,
-				      struct spi_transfer *transfer)
+static int spi_imx_pio_transfer_target(struct spi_device *spi,
+				       struct spi_transfer *transfer)
 {
 	struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller);
 	int ret = 0;
@@ -1588,22 +1588,22 @@ static int spi_imx_pio_transfer_slave(struct spi_device *spi,
 	spi_imx->remainder = 0;
 
 	reinit_completion(&spi_imx->xfer_done);
-	spi_imx->slave_aborted = false;
+	spi_imx->target_aborted = false;
 
 	spi_imx_push(spi_imx);
 
 	spi_imx->devtype_data->intctrl(spi_imx, MXC_INT_TE | MXC_INT_RDR);
 
 	if (wait_for_completion_interruptible(&spi_imx->xfer_done) ||
-	    spi_imx->slave_aborted) {
+	    spi_imx->target_aborted) {
 		dev_dbg(&spi->dev, "interrupted\n");
 		ret = -EINTR;
 	}
 
-	/* ecspi has a HW issue when works in Slave mode,
+	/* ecspi has a HW issue when works in Target mode,
 	 * after 64 words writtern to TXFIFO, even TXFIFO becomes empty,
 	 * ECSPI_TXDATA keeps shift out the last word data,
-	 * so we have to disable ECSPI when in slave mode after the
+	 * so we have to disable ECSPI when in target mode after the
 	 * transfer completes
 	 */
 	if (spi_imx->devtype_data->disable)
@@ -1626,8 +1626,8 @@ static int spi_imx_transfer_one(struct spi_controller *controller,
 	while (spi_imx->devtype_data->rx_available(spi_imx))
 		readl(spi_imx->base + MXC_CSPIRXDATA);
 
-	if (spi_imx->slave_mode)
-		return spi_imx_pio_transfer_slave(spi, transfer);
+	if (spi_imx->target_mode)
+		return spi_imx_pio_transfer_target(spi, transfer);
 
 	/*
 	 * If we decided in spi_imx_can_dma() that we want to do a DMA
@@ -1693,11 +1693,11 @@ spi_imx_unprepare_message(struct spi_controller *controller, struct spi_message
 	return 0;
 }
 
-static int spi_imx_slave_abort(struct spi_controller *controller)
+static int spi_imx_target_abort(struct spi_controller *controller)
 {
 	struct spi_imx_data *spi_imx = spi_controller_get_devdata(controller);
 
-	spi_imx->slave_aborted = true;
+	spi_imx->target_aborted = true;
 	complete(&spi_imx->xfer_done);
 
 	return 0;
@@ -1712,17 +1712,17 @@ static int spi_imx_probe(struct platform_device *pdev)
 	int ret, irq, spi_drctl;
 	const struct spi_imx_devtype_data *devtype_data =
 			of_device_get_match_data(&pdev->dev);
-	bool slave_mode;
+	bool target_mode;
 	u32 val;
 
-	slave_mode = devtype_data->has_slavemode &&
-			of_property_read_bool(np, "spi-slave");
-	if (slave_mode)
-		controller = spi_alloc_slave(&pdev->dev,
-					     sizeof(struct spi_imx_data));
-	else
-		controller = spi_alloc_master(&pdev->dev,
+	target_mode = devtype_data->has_targetmode &&
+		      of_property_read_bool(np, "spi-slave");
+	if (target_mode)
+		controller = spi_alloc_target(&pdev->dev,
 					      sizeof(struct spi_imx_data));
+	else
+		controller = spi_alloc_host(&pdev->dev,
+					    sizeof(struct spi_imx_data));
 	if (!controller)
 		return -ENOMEM;
 
@@ -1741,7 +1741,7 @@ static int spi_imx_probe(struct platform_device *pdev)
 	spi_imx = spi_controller_get_devdata(controller);
 	spi_imx->controller = controller;
 	spi_imx->dev = &pdev->dev;
-	spi_imx->slave_mode = slave_mode;
+	spi_imx->target_mode = target_mode;
 
 	spi_imx->devtype_data = devtype_data;
 
@@ -1761,7 +1761,7 @@ static int spi_imx_probe(struct platform_device *pdev)
 	controller->cleanup = spi_imx_cleanup;
 	controller->prepare_message = spi_imx_prepare_message;
 	controller->unprepare_message = spi_imx_unprepare_message;
-	controller->slave_abort = spi_imx_slave_abort;
+	controller->target_abort = spi_imx_target_abort;
 	controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_NO_CS |
 				SPI_MOSI_IDLE_LOW;
 

From 452edead18d7ba14b018f85f9eda0922a23e7412 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:41:02 +0800
Subject: [PATCH 1070/2047] spi: ingenic: switch to use devm_spi_alloc_host()

Switch to use modern name function devm_spi_alloc_host().

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-18-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ingenic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-ingenic.c b/drivers/spi/spi-ingenic.c
index cfa665a80bc6..cc366936d72b 100644
--- a/drivers/spi/spi-ingenic.c
+++ b/drivers/spi/spi-ingenic.c
@@ -392,7 +392,7 @@ static int spi_ingenic_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	ctlr = devm_spi_alloc_master(dev, sizeof(*priv));
+	ctlr = devm_spi_alloc_host(dev, sizeof(*priv));
 	if (!ctlr) {
 		dev_err(dev, "Unable to allocate SPI controller.\n");
 		return -ENOMEM;

From 5fa0ade1851f7766e75ebb453ef8d6de089318d4 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:41:03 +0800
Subject: [PATCH 1071/2047] spi: intel: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-19-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-intel.c | 42 ++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/spi/spi-intel.c b/drivers/spi/spi-intel.c
index bc6d22149e7e..98ec4dc22b81 100644
--- a/drivers/spi/spi-intel.c
+++ b/drivers/spi/spi-intel.c
@@ -143,7 +143,7 @@
  * @base: Beginning of MMIO space
  * @pregs: Start of protection registers
  * @sregs: Start of software sequencer registers
- * @master: Pointer to the SPI controller structure
+ * @host: Pointer to the SPI controller structure
  * @nregions: Maximum number of regions
  * @pr_num: Maximum number of protected range registers
  * @chip0_size: Size of the first flash chip in bytes
@@ -161,7 +161,7 @@ struct intel_spi {
 	void __iomem *base;
 	void __iomem *pregs;
 	void __iomem *sregs;
-	struct spi_controller *master;
+	struct spi_controller *host;
 	size_t nregions;
 	size_t pr_num;
 	size_t chip0_size;
@@ -747,7 +747,7 @@ intel_spi_match_mem_op(struct intel_spi *ispi, const struct spi_mem_op *op)
 static bool intel_spi_supports_mem_op(struct spi_mem *mem,
 				      const struct spi_mem_op *op)
 {
-	struct intel_spi *ispi = spi_master_get_devdata(mem->spi->master);
+	struct intel_spi *ispi = spi_controller_get_devdata(mem->spi->controller);
 	const struct intel_spi_mem_op *iop;
 
 	iop = intel_spi_match_mem_op(ispi, op);
@@ -778,7 +778,7 @@ static bool intel_spi_supports_mem_op(struct spi_mem *mem,
 
 static int intel_spi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op)
 {
-	struct intel_spi *ispi = spi_master_get_devdata(mem->spi->master);
+	struct intel_spi *ispi = spi_controller_get_devdata(mem->spi->controller);
 	const struct intel_spi_mem_op *iop;
 
 	iop = intel_spi_match_mem_op(ispi, op);
@@ -790,7 +790,7 @@ static int intel_spi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *o
 
 static const char *intel_spi_get_name(struct spi_mem *mem)
 {
-	const struct intel_spi *ispi = spi_master_get_devdata(mem->spi->master);
+	const struct intel_spi *ispi = spi_controller_get_devdata(mem->spi->controller);
 
 	/*
 	 * Return name of the flash controller device to be compatible
@@ -801,7 +801,7 @@ static const char *intel_spi_get_name(struct spi_mem *mem)
 
 static int intel_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)
 {
-	struct intel_spi *ispi = spi_master_get_devdata(desc->mem->spi->master);
+	struct intel_spi *ispi = spi_controller_get_devdata(desc->mem->spi->controller);
 	const struct intel_spi_mem_op *iop;
 
 	iop = intel_spi_match_mem_op(ispi, &desc->info.op_tmpl);
@@ -815,7 +815,7 @@ static int intel_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)
 static ssize_t intel_spi_dirmap_read(struct spi_mem_dirmap_desc *desc, u64 offs,
 				     size_t len, void *buf)
 {
-	struct intel_spi *ispi = spi_master_get_devdata(desc->mem->spi->master);
+	struct intel_spi *ispi = spi_controller_get_devdata(desc->mem->spi->controller);
 	const struct intel_spi_mem_op *iop = desc->priv;
 	struct spi_mem_op op = desc->info.op_tmpl;
 	int ret;
@@ -832,7 +832,7 @@ static ssize_t intel_spi_dirmap_read(struct spi_mem_dirmap_desc *desc, u64 offs,
 static ssize_t intel_spi_dirmap_write(struct spi_mem_dirmap_desc *desc, u64 offs,
 				      size_t len, const void *buf)
 {
-	struct intel_spi *ispi = spi_master_get_devdata(desc->mem->spi->master);
+	struct intel_spi *ispi = spi_controller_get_devdata(desc->mem->spi->controller);
 	const struct intel_spi_mem_op *iop = desc->priv;
 	struct spi_mem_op op = desc->info.op_tmpl;
 	int ret;
@@ -1332,14 +1332,14 @@ static int intel_spi_read_desc(struct intel_spi *ispi)
 
 	nc = (buf[1] & FLMAP0_NC_MASK) >> FLMAP0_NC_SHIFT;
 	if (!nc)
-		ispi->master->num_chipselect = 1;
+		ispi->host->num_chipselect = 1;
 	else if (nc == 1)
-		ispi->master->num_chipselect = 2;
+		ispi->host->num_chipselect = 2;
 	else
 		return -EINVAL;
 
 	dev_dbg(ispi->dev, "%u flash components found\n",
-		ispi->master->num_chipselect);
+		ispi->host->num_chipselect);
 	return 0;
 }
 
@@ -1365,7 +1365,7 @@ static int intel_spi_populate_chip(struct intel_spi *ispi)
 	snprintf(chip.modalias, 8, "spi-nor");
 	chip.platform_data = pdata;
 
-	if (!spi_new_device(ispi->master, &chip))
+	if (!spi_new_device(ispi->host, &chip))
 		return -ENODEV;
 
 	ret = intel_spi_read_desc(ispi);
@@ -1373,13 +1373,13 @@ static int intel_spi_populate_chip(struct intel_spi *ispi)
 		return ret;
 
 	/* Add the second chip if present */
-	if (ispi->master->num_chipselect < 2)
+	if (ispi->host->num_chipselect < 2)
 		return 0;
 
 	chip.platform_data = NULL;
 	chip.chip_select = 1;
 
-	if (!spi_new_device(ispi->master, &chip))
+	if (!spi_new_device(ispi->host, &chip))
 		return -ENODEV;
 	return 0;
 }
@@ -1396,31 +1396,31 @@ static int intel_spi_populate_chip(struct intel_spi *ispi)
 int intel_spi_probe(struct device *dev, struct resource *mem,
 		    const struct intel_spi_boardinfo *info)
 {
-	struct spi_controller *master;
+	struct spi_controller *host;
 	struct intel_spi *ispi;
 	int ret;
 
-	master = devm_spi_alloc_master(dev, sizeof(*ispi));
-	if (!master)
+	host = devm_spi_alloc_host(dev, sizeof(*ispi));
+	if (!host)
 		return -ENOMEM;
 
-	master->mem_ops = &intel_spi_mem_ops;
+	host->mem_ops = &intel_spi_mem_ops;
 
-	ispi = spi_master_get_devdata(master);
+	ispi = spi_controller_get_devdata(host);
 
 	ispi->base = devm_ioremap_resource(dev, mem);
 	if (IS_ERR(ispi->base))
 		return PTR_ERR(ispi->base);
 
 	ispi->dev = dev;
-	ispi->master = master;
+	ispi->host = host;
 	ispi->info = info;
 
 	ret = intel_spi_init(ispi);
 	if (ret)
 		return ret;
 
-	ret = devm_spi_register_master(dev, master);
+	ret = devm_spi_register_controller(dev, host);
 	if (ret)
 		return ret;
 

From 810ee62dd81d05c2305132804c021c2ac406b2b5 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:41:04 +0800
Subject: [PATCH 1072/2047] spi: jcore: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-20-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-jcore.c | 44 ++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/spi/spi-jcore.c b/drivers/spi/spi-jcore.c
index c42a3358e8c9..e37ca22e04ba 100644
--- a/drivers/spi/spi-jcore.c
+++ b/drivers/spi/spi-jcore.c
@@ -33,7 +33,7 @@
 #define JCORE_SPI_WAIT_RDY_MAX_LOOP	2000000
 
 struct jcore_spi {
-	struct spi_master *master;
+	struct spi_controller *host;
 	void __iomem *base;
 	unsigned int cs_reg;
 	unsigned int speed_reg;
@@ -59,7 +59,7 @@ static void jcore_spi_program(struct jcore_spi *hw)
 	void __iomem *ctrl_reg = hw->base + CTRL_REG;
 
 	if (jcore_spi_wait(ctrl_reg))
-		dev_err(hw->master->dev.parent,
+		dev_err(hw->host->dev.parent,
 			"timeout waiting to program ctrl reg.\n");
 
 	writel(hw->cs_reg | hw->speed_reg, ctrl_reg);
@@ -67,10 +67,10 @@ static void jcore_spi_program(struct jcore_spi *hw)
 
 static void jcore_spi_chipsel(struct spi_device *spi, bool value)
 {
-	struct jcore_spi *hw = spi_master_get_devdata(spi->master);
+	struct jcore_spi *hw = spi_controller_get_devdata(spi->controller);
 	u32 csbit = 1U << (2 * spi_get_chipselect(spi, 0));
 
-	dev_dbg(hw->master->dev.parent, "chipselect %d\n", spi_get_chipselect(spi, 0));
+	dev_dbg(hw->host->dev.parent, "chipselect %d\n", spi_get_chipselect(spi, 0));
 
 	if (value)
 		hw->cs_reg |= csbit;
@@ -90,14 +90,14 @@ static void jcore_spi_baudrate(struct jcore_spi *hw, int speed)
 	else
 		hw->speed_reg = ((hw->clock_freq / 2 / speed) - 1) << 27;
 	jcore_spi_program(hw);
-	dev_dbg(hw->master->dev.parent, "speed=%d reg=0x%x\n",
+	dev_dbg(hw->host->dev.parent, "speed=%d reg=0x%x\n",
 		speed, hw->speed_reg);
 }
 
-static int jcore_spi_txrx(struct spi_master *master, struct spi_device *spi,
+static int jcore_spi_txrx(struct spi_controller *host, struct spi_device *spi,
 			  struct spi_transfer *t)
 {
-	struct jcore_spi *hw = spi_master_get_devdata(master);
+	struct jcore_spi *hw = spi_controller_get_devdata(host);
 
 	void __iomem *ctrl_reg = hw->base + CTRL_REG;
 	void __iomem *data_reg = hw->base + DATA_REG;
@@ -130,7 +130,7 @@ static int jcore_spi_txrx(struct spi_master *master, struct spi_device *spi,
 			*rx++ = readl(data_reg);
 	}
 
-	spi_finalize_current_transfer(master);
+	spi_finalize_current_transfer(host);
 
 	if (count < len)
 		return -EREMOTEIO;
@@ -142,26 +142,26 @@ static int jcore_spi_probe(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
 	struct jcore_spi *hw;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct resource *res;
 	u32 clock_freq;
 	struct clk *clk;
 	int err = -ENODEV;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct jcore_spi));
-	if (!master)
+	host = spi_alloc_host(&pdev->dev, sizeof(struct jcore_spi));
+	if (!host)
 		return err;
 
-	/* Setup the master state. */
-	master->num_chipselect = 3;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
-	master->transfer_one = jcore_spi_txrx;
-	master->set_cs = jcore_spi_chipsel;
-	master->dev.of_node = node;
-	master->bus_num = pdev->id;
+	/* Setup the host state. */
+	host->num_chipselect = 3;
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	host->transfer_one = jcore_spi_txrx;
+	host->set_cs = jcore_spi_chipsel;
+	host->dev.of_node = node;
+	host->bus_num = pdev->id;
 
-	hw = spi_master_get_devdata(master);
-	hw->master = master;
+	hw = spi_controller_get_devdata(host);
+	hw->host = host;
 	platform_set_drvdata(pdev, hw);
 
 	/* Find and map our resources */
@@ -200,7 +200,7 @@ static int jcore_spi_probe(struct platform_device *pdev)
 	jcore_spi_baudrate(hw, 400000);
 
 	/* Register our spi controller */
-	err = devm_spi_register_master(&pdev->dev, master);
+	err = devm_spi_register_controller(&pdev->dev, host);
 	if (err)
 		goto exit;
 
@@ -209,7 +209,7 @@ static int jcore_spi_probe(struct platform_device *pdev)
 exit_busy:
 	err = -EBUSY;
 exit:
-	spi_master_put(master);
+	spi_controller_put(host);
 	return err;
 }
 

From ea11a8bb79d6c2655d145ec003deb77ab03dcb71 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Aug 2023 20:41:05 +0800
Subject: [PATCH 1073/2047] spi: lantiq: switch to use modern name

Change legacy name master/slave to modern name host/target or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230807124105.3429709-21-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-lantiq-ssc.c | 96 ++++++++++++++++++------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/drivers/spi/spi-lantiq-ssc.c b/drivers/spi/spi-lantiq-ssc.c
index 65568272cfd9..938e9e577e4f 100644
--- a/drivers/spi/spi-lantiq-ssc.c
+++ b/drivers/spi/spi-lantiq-ssc.c
@@ -92,7 +92,7 @@
 #define LTQ_SPI_STAT_RE		BIT(9)	/* Receive error flag */
 #define LTQ_SPI_STAT_TE		BIT(8)	/* Transmit error flag */
 #define LTQ_SPI_STAT_ME		BIT(7)	/* Mode error flag */
-#define LTQ_SPI_STAT_MS		BIT(1)	/* Master/slave select bit */
+#define LTQ_SPI_STAT_MS		BIT(1)	/* Host/target select bit */
 #define LTQ_SPI_STAT_EN		BIT(0)	/* Enable bit */
 #define LTQ_SPI_STAT_ERRORS	(LTQ_SPI_STAT_ME | LTQ_SPI_STAT_TE | \
 				 LTQ_SPI_STAT_RE | LTQ_SPI_STAT_AE | \
@@ -110,8 +110,8 @@
 #define LTQ_SPI_WHBSTATE_CLRME	BIT(6)	/* Clear mode error flag */
 #define LTQ_SPI_WHBSTATE_SETRUE	BIT(5)	/* Set receive underflow error flag */
 #define LTQ_SPI_WHBSTATE_CLRRUE	BIT(4)	/* Clear receive underflow error flag */
-#define LTQ_SPI_WHBSTATE_SETMS	BIT(3)	/* Set master select bit */
-#define LTQ_SPI_WHBSTATE_CLRMS	BIT(2)	/* Clear master select bit */
+#define LTQ_SPI_WHBSTATE_SETMS	BIT(3)	/* Set host select bit */
+#define LTQ_SPI_WHBSTATE_CLRMS	BIT(2)	/* Clear host select bit */
 #define LTQ_SPI_WHBSTATE_SETEN	BIT(1)	/* Set enable bit (operational mode) */
 #define LTQ_SPI_WHBSTATE_CLREN	BIT(0)	/* Clear enable bit (config mode */
 #define LTQ_SPI_WHBSTATE_CLR_ERRORS	(LTQ_SPI_WHBSTATE_CLRRUE | \
@@ -163,7 +163,7 @@ struct lantiq_ssc_hwcfg {
 };
 
 struct lantiq_ssc_spi {
-	struct spi_master		*master;
+	struct spi_controller		*host;
 	struct device			*dev;
 	void __iomem			*regbase;
 	struct clk			*spi_clk;
@@ -367,7 +367,7 @@ static void lantiq_ssc_hw_init(const struct lantiq_ssc_spi *spi)
 	hw_setup_bits_per_word(spi, spi->bits_per_word);
 	hw_setup_clock_mode(spi, SPI_MODE_0);
 
-	/* Enable master mode and clear error flags */
+	/* Enable host mode and clear error flags */
 	lantiq_ssc_writel(spi, LTQ_SPI_WHBSTATE_SETMS |
 			       LTQ_SPI_WHBSTATE_CLR_ERRORS,
 			       LTQ_SPI_WHBSTATE);
@@ -387,8 +387,8 @@ static void lantiq_ssc_hw_init(const struct lantiq_ssc_spi *spi)
 
 static int lantiq_ssc_setup(struct spi_device *spidev)
 {
-	struct spi_master *master = spidev->master;
-	struct lantiq_ssc_spi *spi = spi_master_get_devdata(master);
+	struct spi_controller *host = spidev->controller;
+	struct lantiq_ssc_spi *spi = spi_controller_get_devdata(host);
 	unsigned int cs = spi_get_chipselect(spidev, 0);
 	u32 gpocon;
 
@@ -416,10 +416,10 @@ static int lantiq_ssc_setup(struct spi_device *spidev)
 	return 0;
 }
 
-static int lantiq_ssc_prepare_message(struct spi_master *master,
+static int lantiq_ssc_prepare_message(struct spi_controller *host,
 				      struct spi_message *message)
 {
-	struct lantiq_ssc_spi *spi = spi_master_get_devdata(master);
+	struct lantiq_ssc_spi *spi = spi_controller_get_devdata(host);
 
 	hw_enter_config_mode(spi);
 	hw_setup_clock_mode(spi, message->spi->mode);
@@ -461,10 +461,10 @@ static void hw_setup_transfer(struct lantiq_ssc_spi *spi,
 	lantiq_ssc_writel(spi, con, LTQ_SPI_CON);
 }
 
-static int lantiq_ssc_unprepare_message(struct spi_master *master,
+static int lantiq_ssc_unprepare_message(struct spi_controller *host,
 					struct spi_message *message)
 {
-	struct lantiq_ssc_spi *spi = spi_master_get_devdata(master);
+	struct lantiq_ssc_spi *spi = spi_controller_get_devdata(host);
 
 	flush_workqueue(spi->wq);
 
@@ -693,8 +693,8 @@ static irqreturn_t lantiq_ssc_err_interrupt(int irq, void *data)
 	lantiq_ssc_maskl(spi, 0, LTQ_SPI_WHBSTATE_CLR_ERRORS, LTQ_SPI_WHBSTATE);
 
 	/* set bad status so it can be retried */
-	if (spi->master->cur_msg)
-		spi->master->cur_msg->status = -EIO;
+	if (spi->host->cur_msg)
+		spi->host->cur_msg->status = -EIO;
 	queue_work(spi->wq, &spi->work);
 	spin_unlock(&spi->lock);
 
@@ -772,22 +772,22 @@ static void lantiq_ssc_bussy_work(struct work_struct *work)
 		u32 stat = lantiq_ssc_readl(spi, LTQ_SPI_STAT);
 
 		if (!(stat & LTQ_SPI_STAT_BSY)) {
-			spi_finalize_current_transfer(spi->master);
+			spi_finalize_current_transfer(spi->host);
 			return;
 		}
 
 		cond_resched();
 	} while (!time_after_eq(jiffies, end));
 
-	if (spi->master->cur_msg)
-		spi->master->cur_msg->status = -EIO;
-	spi_finalize_current_transfer(spi->master);
+	if (spi->host->cur_msg)
+		spi->host->cur_msg->status = -EIO;
+	spi_finalize_current_transfer(spi->host);
 }
 
-static void lantiq_ssc_handle_err(struct spi_master *master,
+static void lantiq_ssc_handle_err(struct spi_controller *host,
 				  struct spi_message *message)
 {
-	struct lantiq_ssc_spi *spi = spi_master_get_devdata(master);
+	struct lantiq_ssc_spi *spi = spi_controller_get_devdata(host);
 
 	/* flush FIFOs on timeout */
 	rx_fifo_flush(spi);
@@ -796,7 +796,7 @@ static void lantiq_ssc_handle_err(struct spi_master *master,
 
 static void lantiq_ssc_set_cs(struct spi_device *spidev, bool enable)
 {
-	struct lantiq_ssc_spi *spi = spi_master_get_devdata(spidev->master);
+	struct lantiq_ssc_spi *spi = spi_controller_get_devdata(spidev->controller);
 	unsigned int cs = spi_get_chipselect(spidev, 0);
 	u32 fgpo;
 
@@ -808,11 +808,11 @@ static void lantiq_ssc_set_cs(struct spi_device *spidev, bool enable)
 	lantiq_ssc_writel(spi, fgpo, LTQ_SPI_FPGO);
 }
 
-static int lantiq_ssc_transfer_one(struct spi_master *master,
+static int lantiq_ssc_transfer_one(struct spi_controller *host,
 				   struct spi_device *spidev,
 				   struct spi_transfer *t)
 {
-	struct lantiq_ssc_spi *spi = spi_master_get_devdata(master);
+	struct lantiq_ssc_spi *spi = spi_controller_get_devdata(host);
 
 	hw_setup_transfer(spi, spidev, t);
 
@@ -904,7 +904,7 @@ MODULE_DEVICE_TABLE(of, lantiq_ssc_match);
 static int lantiq_ssc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct lantiq_ssc_spi *spi;
 	const struct lantiq_ssc_hwcfg *hwcfg;
 	u32 id, supports_dma, revision;
@@ -913,33 +913,33 @@ static int lantiq_ssc_probe(struct platform_device *pdev)
 
 	hwcfg = of_device_get_match_data(dev);
 
-	master = spi_alloc_master(dev, sizeof(struct lantiq_ssc_spi));
-	if (!master)
+	host = spi_alloc_host(dev, sizeof(struct lantiq_ssc_spi));
+	if (!host)
 		return -ENOMEM;
 
-	spi = spi_master_get_devdata(master);
-	spi->master = master;
+	spi = spi_controller_get_devdata(host);
+	spi->host = host;
 	spi->dev = dev;
 	spi->hwcfg = hwcfg;
 	platform_set_drvdata(pdev, spi);
 	spi->regbase = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(spi->regbase)) {
 		err = PTR_ERR(spi->regbase);
-		goto err_master_put;
+		goto err_host_put;
 	}
 
 	err = hwcfg->cfg_irq(pdev, spi);
 	if (err)
-		goto err_master_put;
+		goto err_host_put;
 
 	spi->spi_clk = devm_clk_get(dev, "gate");
 	if (IS_ERR(spi->spi_clk)) {
 		err = PTR_ERR(spi->spi_clk);
-		goto err_master_put;
+		goto err_host_put;
 	}
 	err = clk_prepare_enable(spi->spi_clk);
 	if (err)
-		goto err_master_put;
+		goto err_host_put;
 
 	/*
 	 * Use the old clk_get_fpi() function on Lantiq platform, till it
@@ -965,19 +965,19 @@ static int lantiq_ssc_probe(struct platform_device *pdev)
 	spi->bits_per_word = 8;
 	spi->speed_hz = 0;
 
-	master->dev.of_node = pdev->dev.of_node;
-	master->num_chipselect = num_cs;
-	master->use_gpio_descriptors = true;
-	master->setup = lantiq_ssc_setup;
-	master->set_cs = lantiq_ssc_set_cs;
-	master->handle_err = lantiq_ssc_handle_err;
-	master->prepare_message = lantiq_ssc_prepare_message;
-	master->unprepare_message = lantiq_ssc_unprepare_message;
-	master->transfer_one = lantiq_ssc_transfer_one;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST | SPI_CS_HIGH |
-				SPI_LOOP;
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 8) |
-				     SPI_BPW_MASK(16) | SPI_BPW_MASK(32);
+	host->dev.of_node = pdev->dev.of_node;
+	host->num_chipselect = num_cs;
+	host->use_gpio_descriptors = true;
+	host->setup = lantiq_ssc_setup;
+	host->set_cs = lantiq_ssc_set_cs;
+	host->handle_err = lantiq_ssc_handle_err;
+	host->prepare_message = lantiq_ssc_prepare_message;
+	host->unprepare_message = lantiq_ssc_unprepare_message;
+	host->transfer_one = lantiq_ssc_transfer_one;
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST | SPI_CS_HIGH |
+			  SPI_LOOP;
+	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 8) |
+				   SPI_BPW_MASK(16) | SPI_BPW_MASK(32);
 
 	spi->wq = alloc_ordered_workqueue(dev_name(dev), WQ_MEM_RECLAIM);
 	if (!spi->wq) {
@@ -998,9 +998,9 @@ static int lantiq_ssc_probe(struct platform_device *pdev)
 		"Lantiq SSC SPI controller (Rev %i, TXFS %u, RXFS %u, DMA %u)\n",
 		revision, spi->tx_fifo_size, spi->rx_fifo_size, supports_dma);
 
-	err = devm_spi_register_master(dev, master);
+	err = devm_spi_register_controller(dev, host);
 	if (err) {
-		dev_err(dev, "failed to register spi_master\n");
+		dev_err(dev, "failed to register spi host\n");
 		goto err_wq_destroy;
 	}
 
@@ -1012,8 +1012,8 @@ err_clk_put:
 	clk_put(spi->fpi_clk);
 err_clk_disable:
 	clk_disable_unprepare(spi->spi_clk);
-err_master_put:
-	spi_master_put(master);
+err_host_put:
+	spi_controller_put(host);
 
 	return err;
 }

From 0f9440a646fd1245a9206f171a4de25e6b092d42 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:12:45 +0200
Subject: [PATCH 1074/2047] spi: sc18is602: fix Wvoid-pointer-to-enum-cast
 warning

'id' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  spi-sc18is602.c:269:12: error: cast to smaller integer type 'enum chips' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810091247.70149-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sc18is602.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index c67a24daaaf9..9f98bf350697 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -266,7 +266,7 @@ static int sc18is602_probe(struct i2c_client *client)
 	hw->ctrl = 0xff;
 
 	if (client->dev.of_node)
-		hw->id = (enum chips)of_device_get_match_data(&client->dev);
+		hw->id = (uintptr_t)of_device_get_match_data(&client->dev);
 	else
 		hw->id = id->driver_data;
 

From 94d6cf7e72a3ef90d7e16d770c3dc1b9cd40d8dd Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:12:46 +0200
Subject: [PATCH 1075/2047] spi: pxa2xx: fix Wvoid-pointer-to-enum-cast warning

'type' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  spi-pxa2xx.c:1347:10: error: cast to smaller integer type 'enum pxa_ssp_type' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810091247.70149-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 1bab18a0f262..21f156587669 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1344,7 +1344,7 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev)
 
 	match = device_get_match_data(dev);
 	if (match)
-		type = (enum pxa_ssp_type)match;
+		type = (uintptr_t)match;
 	else if (is_lpss_priv) {
 		u32 value;
 

From 675b8e35b5cbf4aaa1339079f44b88b9f1bb2f1b Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:12:47 +0200
Subject: [PATCH 1076/2047] spi: amd: fix Wvoid-pointer-to-enum-cast warning

'version' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  spi-amd.c:401:21: error: cast to smaller integer type 'enum amd_spi_versions' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810091247.70149-3-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
index e4345ad5cd36..5d9b246b6963 100644
--- a/drivers/spi/spi-amd.c
+++ b/drivers/spi/spi-amd.c
@@ -398,7 +398,7 @@ static int amd_spi_probe(struct platform_device *pdev)
 
 	dev_dbg(dev, "io_remap_address: %p\n", amd_spi->io_remap_addr);
 
-	amd_spi->version = (enum amd_spi_versions) device_get_match_data(dev);
+	amd_spi->version = (uintptr_t) device_get_match_data(dev);
 
 	/* Initialize the spi_controller fields */
 	host->bus_num = 0;

From c7fcb99877f9f542c918509b2801065adcaf46fa Mon Sep 17 00:00:00 2001
From: Cyril Hrubis <chrubis@suse.cz>
Date: Wed, 2 Aug 2023 17:19:05 +0200
Subject: [PATCH 1077/2047] sched/rt: Fix sysctl_sched_rr_timeslice intial
 value

There is a 10% rounding error in the intial value of the
sysctl_sched_rr_timeslice with CONFIG_HZ_300=y.

This was found with LTP test sched_rr_get_interval01:

sched_rr_get_interval01.c:57: TPASS: sched_rr_get_interval() passed
sched_rr_get_interval01.c:64: TPASS: Time quantum 0s 99999990ns
sched_rr_get_interval01.c:72: TFAIL: /proc/sys/kernel/sched_rr_timeslice_ms != 100 got 90
sched_rr_get_interval01.c:57: TPASS: sched_rr_get_interval() passed
sched_rr_get_interval01.c:64: TPASS: Time quantum 0s 99999990ns
sched_rr_get_interval01.c:72: TFAIL: /proc/sys/kernel/sched_rr_timeslice_ms != 100 got 90

What this test does is to compare the return value from the
sched_rr_get_interval() and the sched_rr_timeslice_ms sysctl file and
fails if they do not match.

The problem it found is the intial sysctl file value which was computed as:

static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;

which works fine as long as MSEC_PER_SEC is multiple of HZ, however it
introduces 10% rounding error for CONFIG_HZ_300:

(MSEC_PER_SEC / HZ) * (100 * HZ / 1000)

(1000 / 300) * (100 * 300 / 1000)

3 * 30 = 90

This can be easily fixed by reversing the order of the multiplication
and division. After this fix we get:

(MSEC_PER_SEC * (100 * HZ / 1000)) / HZ

(1000 * (100 * 300 / 1000)) / 300

(1000 * 30) / 300 = 100

Fixes: 975e155ed873 ("sched/rt: Show the 'sched_rr_timeslice' SCHED_RR timeslice tuning knob in milliseconds")
Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Petr Vorel <pvorel@suse.cz>
Acked-by: Mel Gorman <mgorman@suse.de>
Tested-by: Petr Vorel <pvorel@suse.cz>
Link: https://lore.kernel.org/r/20230802151906.25258-2-chrubis@suse.cz
---
 kernel/sched/rt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 00e0e5074115..185d3d749f6b 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -25,7 +25,7 @@ unsigned int sysctl_sched_rt_period = 1000000;
 int sysctl_sched_rt_runtime = 950000;
 
 #ifdef CONFIG_SYSCTL
-static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
+static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ;
 static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
 static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,

From c1fc6484e1fb7cc2481d169bfef129a1b0676abe Mon Sep 17 00:00:00 2001
From: Cyril Hrubis <chrubis@suse.cz>
Date: Wed, 2 Aug 2023 17:19:06 +0200
Subject: [PATCH 1078/2047] sched/rt: sysctl_sched_rr_timeslice show default
 timeslice after reset

The sched_rr_timeslice can be reset to default by writing value that is
<= 0. However after reading from this file we always got the last value
written, which is not useful at all.

$ echo -1 > /proc/sys/kernel/sched_rr_timeslice_ms
$ cat /proc/sys/kernel/sched_rr_timeslice_ms
-1

Fix this by setting the variable that holds the sysctl file value to the
jiffies_to_msecs(RR_TIMESLICE) in case that <= 0 value was written.

Signed-off-by: Cyril Hrubis <chrubis@suse.cz>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Petr Vorel <pvorel@suse.cz>
Acked-by: Mel Gorman <mgorman@suse.de>
Tested-by: Petr Vorel <pvorel@suse.cz>
Link: https://lore.kernel.org/r/20230802151906.25258-3-chrubis@suse.cz
---
 kernel/sched/rt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 185d3d749f6b..0597ba0f85ff 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -3062,6 +3062,9 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
 		sched_rr_timeslice =
 			sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
 			msecs_to_jiffies(sysctl_sched_rr_timeslice);
+
+		if (sysctl_sched_rr_timeslice <= 0)
+			sysctl_sched_rr_timeslice = jiffies_to_msecs(RR_TIMESLICE);
 	}
 	mutex_unlock(&mutex);
 

From 7537b90c0036759e0b1b43dfbc6224dc5e900b13 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 1 Aug 2023 22:41:22 +0200
Subject: [PATCH 1079/2047] sched: Simplify get_nohz_timer_target()

Use guards to reduce gotos and simplify control flow.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lore.kernel.org/r/20230801211811.828443100@infradead.org
---
 kernel/sched/core.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a97eab3e775a..6cda29655cb0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1097,25 +1097,22 @@ int get_nohz_timer_target(void)
 
 	hk_mask = housekeeping_cpumask(HK_TYPE_TIMER);
 
-	rcu_read_lock();
+	guard(rcu)();
+
 	for_each_domain(cpu, sd) {
 		for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
 			if (cpu == i)
 				continue;
 
-			if (!idle_cpu(i)) {
-				cpu = i;
-				goto unlock;
-			}
+			if (!idle_cpu(i))
+				return i;
 		}
 	}
 
 	if (default_cpu == -1)
 		default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER);
-	cpu = default_cpu;
-unlock:
-	rcu_read_unlock();
-	return cpu;
+
+	return default_cpu;
 }
 
 /*

From 0f92cdf36f848f1c077924f857a49789e00331c0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 1 Aug 2023 22:41:23 +0200
Subject: [PATCH 1080/2047] sched: Simplify sysctl_sched_uclamp_handler()

Use guards to reduce gotos and simplify control flow.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lore.kernel.org/r/20230801211811.896559109@infradead.org
---
 kernel/sched/core.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6cda29655cb0..6e8a8e9d2cad 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1827,7 +1827,8 @@ static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
 	int old_min, old_max, old_min_rt;
 	int result;
 
-	mutex_lock(&uclamp_mutex);
+	guard(mutex)(&uclamp_mutex);
+
 	old_min = sysctl_sched_uclamp_util_min;
 	old_max = sysctl_sched_uclamp_util_max;
 	old_min_rt = sysctl_sched_uclamp_util_min_rt_default;
@@ -1836,7 +1837,7 @@ static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
 	if (result)
 		goto undo;
 	if (!write)
-		goto done;
+		return 0;
 
 	if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max ||
 	    sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE	||
@@ -1872,16 +1873,12 @@ static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
 	 * Otherwise, keep it simple and do just a lazy update at each next
 	 * task enqueue time.
 	 */
-
-	goto done;
+	return 0;
 
 undo:
 	sysctl_sched_uclamp_util_min = old_min;
 	sysctl_sched_uclamp_util_max = old_max;
 	sysctl_sched_uclamp_util_min_rt_default = old_min_rt;
-done:
-	mutex_unlock(&uclamp_mutex);
-
 	return result;
 }
 #endif

From 5bb76f1ddf2a7dd98f5a89d7755600ed1b4a7fcd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 1 Aug 2023 22:41:24 +0200
Subject: [PATCH 1081/2047] sched: Simplify: migrate_swap_stop()

Use guards to reduce gotos and simplify control flow.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lore.kernel.org/r/20230801211811.964370836@infradead.org
---
 kernel/sched/core.c  | 23 +++++++----------------
 kernel/sched/sched.h | 20 ++++++++++++++++++++
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6e8a8e9d2cad..66478a62f217 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3433,7 +3433,6 @@ static int migrate_swap_stop(void *data)
 {
 	struct migration_swap_arg *arg = data;
 	struct rq *src_rq, *dst_rq;
-	int ret = -EAGAIN;
 
 	if (!cpu_active(arg->src_cpu) || !cpu_active(arg->dst_cpu))
 		return -EAGAIN;
@@ -3441,33 +3440,25 @@ static int migrate_swap_stop(void *data)
 	src_rq = cpu_rq(arg->src_cpu);
 	dst_rq = cpu_rq(arg->dst_cpu);
 
-	double_raw_lock(&arg->src_task->pi_lock,
-			&arg->dst_task->pi_lock);
-	double_rq_lock(src_rq, dst_rq);
+	guard(double_raw_spinlock)(&arg->src_task->pi_lock, &arg->dst_task->pi_lock);
+	guard(double_rq_lock)(src_rq, dst_rq);
 
 	if (task_cpu(arg->dst_task) != arg->dst_cpu)
-		goto unlock;
+		return -EAGAIN;
 
 	if (task_cpu(arg->src_task) != arg->src_cpu)
-		goto unlock;
+		return -EAGAIN;
 
 	if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
-		goto unlock;
+		return -EAGAIN;
 
 	if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
-		goto unlock;
+		return -EAGAIN;
 
 	__migrate_swap_task(arg->src_task, arg->dst_cpu);
 	__migrate_swap_task(arg->dst_task, arg->src_cpu);
 
-	ret = 0;
-
-unlock:
-	double_rq_unlock(src_rq, dst_rq);
-	raw_spin_unlock(&arg->dst_task->pi_lock);
-	raw_spin_unlock(&arg->src_task->pi_lock);
-
-	return ret;
+	return 0;
 }
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9c5035ca3b06..c299a585d38f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2614,6 +2614,12 @@ static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
 static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
 #endif
 
+#define DEFINE_LOCK_GUARD_2(name, type, _lock, _unlock, ...)		\
+__DEFINE_UNLOCK_GUARD(name, type, _unlock, type *lock2; __VA_ARGS__) \
+static inline class_##name##_t class_##name##_constructor(type *lock, type *lock2) \
+{ class_##name##_t _t = { .lock = lock, .lock2 = lock2 }, *_T = &_t;	\
+  _lock; return _t; }
+
 #ifdef CONFIG_SMP
 
 static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
@@ -2743,6 +2749,16 @@ static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
 	raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
 }
 
+static inline void double_raw_unlock(raw_spinlock_t *l1, raw_spinlock_t *l2)
+{
+	raw_spin_unlock(l1);
+	raw_spin_unlock(l2);
+}
+
+DEFINE_LOCK_GUARD_2(double_raw_spinlock, raw_spinlock_t,
+		    double_raw_lock(_T->lock, _T->lock2),
+		    double_raw_unlock(_T->lock, _T->lock2))
+
 /*
  * double_rq_unlock - safely unlock two runqueues
  *
@@ -2800,6 +2816,10 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
 
 #endif
 
+DEFINE_LOCK_GUARD_2(double_rq_lock, struct rq,
+		    double_rq_lock(_T->lock, _T->lock2),
+		    double_rq_unlock(_T->lock, _T->lock2))
+
 extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
 extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
 

From 4eb054f92b066ec0a0cba6896ee8eff4c91dfc9e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 1 Aug 2023 22:41:25 +0200
Subject: [PATCH 1082/2047] sched: Simplify wake_up_if_idle()

Use guards to reduce gotos and simplify control flow.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lore.kernel.org/r/20230801211812.032678917@infradead.org
---
 kernel/sched/core.c  | 20 ++++++--------------
 kernel/sched/sched.h | 15 +++++++++++++++
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 66478a62f217..65ebf43206b6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3939,21 +3939,13 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags
 void wake_up_if_idle(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
-	struct rq_flags rf;
 
-	rcu_read_lock();
-
-	if (!is_idle_task(rcu_dereference(rq->curr)))
-		goto out;
-
-	rq_lock_irqsave(rq, &rf);
-	if (is_idle_task(rq->curr))
-		resched_curr(rq);
-	/* Else CPU is not idle, do nothing here: */
-	rq_unlock_irqrestore(rq, &rf);
-
-out:
-	rcu_read_unlock();
+	guard(rcu)();
+	if (is_idle_task(rcu_dereference(rq->curr))) {
+		guard(rq_lock_irqsave)(rq);
+		if (is_idle_task(rq->curr))
+			resched_curr(rq);
+	}
 }
 
 bool cpus_share_cache(int this_cpu, int that_cpu)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c299a585d38f..3a01b7a2bf66 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1705,6 +1705,21 @@ rq_unlock(struct rq *rq, struct rq_flags *rf)
 	raw_spin_rq_unlock(rq);
 }
 
+DEFINE_LOCK_GUARD_1(rq_lock, struct rq,
+		    rq_lock(_T->lock, &_T->rf),
+		    rq_unlock(_T->lock, &_T->rf),
+		    struct rq_flags rf)
+
+DEFINE_LOCK_GUARD_1(rq_lock_irq, struct rq,
+		    rq_lock_irq(_T->lock, &_T->rf),
+		    rq_unlock_irq(_T->lock, &_T->rf),
+		    struct rq_flags rf)
+
+DEFINE_LOCK_GUARD_1(rq_lock_irqsave, struct rq,
+		    rq_lock_irqsave(_T->lock, &_T->rf),
+		    rq_unlock_irqrestore(_T->lock, &_T->rf),
+		    struct rq_flags rf)
+
 static inline struct rq *
 this_rq_lock_irq(struct rq_flags *rf)
 	__acquires(rq->lock)

From 857d315f1201cfcf60e5849c96d2b4dd20f90ebf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 1 Aug 2023 22:41:26 +0200
Subject: [PATCH 1083/2047] sched: Simplify ttwu()

Use guards to reduce gotos and simplify control flow.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lore.kernel.org/r/20230801211812.101069260@infradead.org
---
 kernel/sched/core.c | 221 ++++++++++++++++++++++----------------------
 1 file changed, 109 insertions(+), 112 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 65ebf43206b6..68bd68d351a5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3733,14 +3733,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 		struct sched_domain *sd;
 
 		__schedstat_inc(p->stats.nr_wakeups_remote);
-		rcu_read_lock();
+
+		guard(rcu)();
 		for_each_domain(rq->cpu, sd) {
 			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 				__schedstat_inc(sd->ttwu_wake_remote);
 				break;
 			}
 		}
-		rcu_read_unlock();
 	}
 
 	if (wake_flags & WF_MIGRATED)
@@ -4199,10 +4199,9 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
 static int
 try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 {
-	unsigned long flags;
+	guard(preempt)();
 	int cpu, success = 0;
 
-	preempt_disable();
 	if (p == current) {
 		/*
 		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
@@ -4229,129 +4228,127 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 * reordered with p->state check below. This pairs with smp_store_mb()
 	 * in set_current_state() that the waiting thread does.
 	 */
-	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	smp_mb__after_spinlock();
-	if (!ttwu_state_match(p, state, &success))
-		goto unlock;
+	scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
+		smp_mb__after_spinlock();
+		if (!ttwu_state_match(p, state, &success))
+			break;
 
-	trace_sched_waking(p);
+		trace_sched_waking(p);
 
-	/*
-	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-	 * in smp_cond_load_acquire() below.
-	 *
-	 * sched_ttwu_pending()			try_to_wake_up()
-	 *   STORE p->on_rq = 1			  LOAD p->state
-	 *   UNLOCK rq->lock
-	 *
-	 * __schedule() (switch to task 'p')
-	 *   LOCK rq->lock			  smp_rmb();
-	 *   smp_mb__after_spinlock();
-	 *   UNLOCK rq->lock
-	 *
-	 * [task p]
-	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-	 *
-	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-	 * __schedule().  See the comment for smp_mb__after_spinlock().
-	 *
-	 * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
-	 */
-	smp_rmb();
-	if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
-		goto unlock;
+		/*
+		 * Ensure we load p->on_rq _after_ p->state, otherwise it would
+		 * be possible to, falsely, observe p->on_rq == 0 and get stuck
+		 * in smp_cond_load_acquire() below.
+		 *
+		 * sched_ttwu_pending()			try_to_wake_up()
+		 *   STORE p->on_rq = 1			  LOAD p->state
+		 *   UNLOCK rq->lock
+		 *
+		 * __schedule() (switch to task 'p')
+		 *   LOCK rq->lock			  smp_rmb();
+		 *   smp_mb__after_spinlock();
+		 *   UNLOCK rq->lock
+		 *
+		 * [task p]
+		 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
+		 *
+		 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+		 * __schedule().  See the comment for smp_mb__after_spinlock().
+		 *
+		 * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
+		 */
+		smp_rmb();
+		if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
+			break;
 
 #ifdef CONFIG_SMP
-	/*
-	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-	 * possible to, falsely, observe p->on_cpu == 0.
-	 *
-	 * One must be running (->on_cpu == 1) in order to remove oneself
-	 * from the runqueue.
-	 *
-	 * __schedule() (switch to task 'p')	try_to_wake_up()
-	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-	 *   UNLOCK rq->lock
-	 *
-	 * __schedule() (put 'p' to sleep)
-	 *   LOCK rq->lock			  smp_rmb();
-	 *   smp_mb__after_spinlock();
-	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-	 *
-	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-	 * __schedule().  See the comment for smp_mb__after_spinlock().
-	 *
-	 * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
-	 * schedule()'s deactivate_task() has 'happened' and p will no longer
-	 * care about it's own p->state. See the comment in __schedule().
-	 */
-	smp_acquire__after_ctrl_dep();
+		/*
+		 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
+		 * possible to, falsely, observe p->on_cpu == 0.
+		 *
+		 * One must be running (->on_cpu == 1) in order to remove oneself
+		 * from the runqueue.
+		 *
+		 * __schedule() (switch to task 'p')	try_to_wake_up()
+		 *   STORE p->on_cpu = 1		  LOAD p->on_rq
+		 *   UNLOCK rq->lock
+		 *
+		 * __schedule() (put 'p' to sleep)
+		 *   LOCK rq->lock			  smp_rmb();
+		 *   smp_mb__after_spinlock();
+		 *   STORE p->on_rq = 0			  LOAD p->on_cpu
+		 *
+		 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+		 * __schedule().  See the comment for smp_mb__after_spinlock().
+		 *
+		 * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
+		 * schedule()'s deactivate_task() has 'happened' and p will no longer
+		 * care about it's own p->state. See the comment in __schedule().
+		 */
+		smp_acquire__after_ctrl_dep();
 
-	/*
-	 * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
-	 * == 0), which means we need to do an enqueue, change p->state to
-	 * TASK_WAKING such that we can unlock p->pi_lock before doing the
-	 * enqueue, such as ttwu_queue_wakelist().
-	 */
-	WRITE_ONCE(p->__state, TASK_WAKING);
+		/*
+		 * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
+		 * == 0), which means we need to do an enqueue, change p->state to
+		 * TASK_WAKING such that we can unlock p->pi_lock before doing the
+		 * enqueue, such as ttwu_queue_wakelist().
+		 */
+		WRITE_ONCE(p->__state, TASK_WAKING);
 
-	/*
-	 * If the owning (remote) CPU is still in the middle of schedule() with
-	 * this task as prev, considering queueing p on the remote CPUs wake_list
-	 * which potentially sends an IPI instead of spinning on p->on_cpu to
-	 * let the waker make forward progress. This is safe because IRQs are
-	 * disabled and the IPI will deliver after on_cpu is cleared.
-	 *
-	 * Ensure we load task_cpu(p) after p->on_cpu:
-	 *
-	 * set_task_cpu(p, cpu);
-	 *   STORE p->cpu = @cpu
-	 * __schedule() (switch to task 'p')
-	 *   LOCK rq->lock
-	 *   smp_mb__after_spin_lock()		smp_cond_load_acquire(&p->on_cpu)
-	 *   STORE p->on_cpu = 1		LOAD p->cpu
-	 *
-	 * to ensure we observe the correct CPU on which the task is currently
-	 * scheduling.
-	 */
-	if (smp_load_acquire(&p->on_cpu) &&
-	    ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
-		goto unlock;
+		/*
+		 * If the owning (remote) CPU is still in the middle of schedule() with
+		 * this task as prev, considering queueing p on the remote CPUs wake_list
+		 * which potentially sends an IPI instead of spinning on p->on_cpu to
+		 * let the waker make forward progress. This is safe because IRQs are
+		 * disabled and the IPI will deliver after on_cpu is cleared.
+		 *
+		 * Ensure we load task_cpu(p) after p->on_cpu:
+		 *
+		 * set_task_cpu(p, cpu);
+		 *   STORE p->cpu = @cpu
+		 * __schedule() (switch to task 'p')
+		 *   LOCK rq->lock
+		 *   smp_mb__after_spin_lock()		smp_cond_load_acquire(&p->on_cpu)
+		 *   STORE p->on_cpu = 1		LOAD p->cpu
+		 *
+		 * to ensure we observe the correct CPU on which the task is currently
+		 * scheduling.
+		 */
+		if (smp_load_acquire(&p->on_cpu) &&
+		    ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
+			break;
 
-	/*
-	 * If the owning (remote) CPU is still in the middle of schedule() with
-	 * this task as prev, wait until it's done referencing the task.
-	 *
-	 * Pairs with the smp_store_release() in finish_task().
-	 *
-	 * This ensures that tasks getting woken will be fully ordered against
-	 * their previous state and preserve Program Order.
-	 */
-	smp_cond_load_acquire(&p->on_cpu, !VAL);
+		/*
+		 * If the owning (remote) CPU is still in the middle of schedule() with
+		 * this task as prev, wait until it's done referencing the task.
+		 *
+		 * Pairs with the smp_store_release() in finish_task().
+		 *
+		 * This ensures that tasks getting woken will be fully ordered against
+		 * their previous state and preserve Program Order.
+		 */
+		smp_cond_load_acquire(&p->on_cpu, !VAL);
 
-	cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
-	if (task_cpu(p) != cpu) {
-		if (p->in_iowait) {
-			delayacct_blkio_end(p);
-			atomic_dec(&task_rq(p)->nr_iowait);
+		cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
+		if (task_cpu(p) != cpu) {
+			if (p->in_iowait) {
+				delayacct_blkio_end(p);
+				atomic_dec(&task_rq(p)->nr_iowait);
+			}
+
+			wake_flags |= WF_MIGRATED;
+			psi_ttwu_dequeue(p);
+			set_task_cpu(p, cpu);
 		}
-
-		wake_flags |= WF_MIGRATED;
-		psi_ttwu_dequeue(p);
-		set_task_cpu(p, cpu);
-	}
 #else
-	cpu = task_cpu(p);
+		cpu = task_cpu(p);
 #endif /* CONFIG_SMP */
 
-	ttwu_queue(p, cpu, wake_flags);
-unlock:
-	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+		ttwu_queue(p, cpu, wake_flags);
+	}
 out:
 	if (success)
 		ttwu_stat(p, task_cpu(p), wake_flags);
-	preempt_enable();
 
 	return success;
 }

From 4bdada79f3464d85f6e187213c088e7c934e0554 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 1 Aug 2023 22:41:27 +0200
Subject: [PATCH 1084/2047] sched: Simplify sched_exec()

Use guards to reduce gotos and simplify control flow.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lore.kernel.org/r/20230801211812.168490417@infradead.org
---
 kernel/sched/core.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 68bd68d351a5..cd7f2ed1377d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5498,23 +5498,20 @@ unsigned int nr_iowait(void)
 void sched_exec(void)
 {
 	struct task_struct *p = current;
-	unsigned long flags;
+	struct migration_arg arg;
 	int dest_cpu;
 
-	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), WF_EXEC);
-	if (dest_cpu == smp_processor_id())
-		goto unlock;
+	scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
+		dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), WF_EXEC);
+		if (dest_cpu == smp_processor_id())
+			return;
 
-	if (likely(cpu_active(dest_cpu))) {
-		struct migration_arg arg = { p, dest_cpu };
+		if (unlikely(!cpu_active(dest_cpu)))
+			return;
 
-		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-		stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
-		return;
+		arg = (struct migration_arg){ p, dest_cpu };
 	}
-unlock:
-	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+	stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
 }
 
 #endif

From 6dafc713e3b0d8ffbd696d200d8c9dd212ddcdfc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 1 Aug 2023 22:41:28 +0200
Subject: [PATCH 1085/2047] sched: Simplify sched_tick_remote()

Use guards to reduce gotos and simplify control flow.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lore.kernel.org/r/20230801211812.236247952@infradead.org
---
 kernel/sched/core.c | 39 ++++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cd7f2ed1377d..1b2fa91a1ef5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5721,9 +5721,6 @@ static void sched_tick_remote(struct work_struct *work)
 	struct tick_work *twork = container_of(dwork, struct tick_work, work);
 	int cpu = twork->cpu;
 	struct rq *rq = cpu_rq(cpu);
-	struct task_struct *curr;
-	struct rq_flags rf;
-	u64 delta;
 	int os;
 
 	/*
@@ -5733,30 +5730,26 @@ static void sched_tick_remote(struct work_struct *work)
 	 * statistics and checks timeslices in a time-independent way, regardless
 	 * of when exactly it is running.
 	 */
-	if (!tick_nohz_tick_stopped_cpu(cpu))
-		goto out_requeue;
+	if (tick_nohz_tick_stopped_cpu(cpu)) {
+		guard(rq_lock_irq)(rq);
+		struct task_struct *curr = rq->curr;
 
-	rq_lock_irq(rq, &rf);
-	curr = rq->curr;
-	if (cpu_is_offline(cpu))
-		goto out_unlock;
+		if (cpu_online(cpu)) {
+			update_rq_clock(rq);
 
-	update_rq_clock(rq);
+			if (!is_idle_task(curr)) {
+				/*
+				 * Make sure the next tick runs within a
+				 * reasonable amount of time.
+				 */
+				u64 delta = rq_clock_task(rq) - curr->se.exec_start;
+				WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+			}
+			curr->sched_class->task_tick(rq, curr, 0);
 
-	if (!is_idle_task(curr)) {
-		/*
-		 * Make sure the next tick runs within a reasonable
-		 * amount of time.
-		 */
-		delta = rq_clock_task(rq) - curr->se.exec_start;
-		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+			calc_load_nohz_remote(rq);
+		}
 	}
-	curr->sched_class->task_tick(rq, curr, 0);
-
-	calc_load_nohz_remote(rq);
-out_unlock:
-	rq_unlock_irq(rq, &rf);
-out_requeue:
 
 	/*
 	 * Run the remote tick once per second (1Hz). This arbitrary

From b4e1fa1e14286f7a825b10d8ebb2e9c0f77c241b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 1 Aug 2023 22:41:29 +0200
Subject: [PATCH 1086/2047] sched: Simplify try_steal_cookie()

Use guards to reduce gotos and simplify control flow.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lore.kernel.org/r/20230801211812.304154828@infradead.org
---
 kernel/sched/core.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1b2fa91a1ef5..f113a4449fde 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6298,19 +6298,19 @@ static bool try_steal_cookie(int this, int that)
 	unsigned long cookie;
 	bool success = false;
 
-	local_irq_disable();
-	double_rq_lock(dst, src);
+	guard(irq)();
+	guard(double_rq_lock)(dst, src);
 
 	cookie = dst->core->core_cookie;
 	if (!cookie)
-		goto unlock;
+		return false;
 
 	if (dst->curr != dst->idle)
-		goto unlock;
+		return false;
 
 	p = sched_core_find(src, cookie);
 	if (!p)
-		goto unlock;
+		return false;
 
 	do {
 		if (p == src->core_pick || p == src->curr)
@@ -6322,9 +6322,10 @@ static bool try_steal_cookie(int this, int that)
 		if (p->core_occupation > dst->idle->core_occupation)
 			goto next;
 		/*
-		 * sched_core_find() and sched_core_next() will ensure that task @p
-		 * is not throttled now, we also need to check whether the runqueue
-		 * of the destination CPU is being throttled.
+		 * sched_core_find() and sched_core_next() will ensure
+		 * that task @p is not throttled now, we also need to
+		 * check whether the runqueue of the destination CPU is
+		 * being throttled.
 		 */
 		if (sched_task_is_throttled(p, this))
 			goto next;
@@ -6342,10 +6343,6 @@ next:
 		p = sched_core_next(p, cookie);
 	} while (p);
 
-unlock:
-	double_rq_unlock(dst, src);
-	local_irq_enable();
-
 	return success;
 }
 

From 7170509cadbb76e5fa7d7b090d2cbdb93d56a2de Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 1 Aug 2023 22:41:30 +0200
Subject: [PATCH 1087/2047] sched: Simplify
 sched_core_cpu_{starting,deactivate}()

Use guards to reduce gotos and simplify control flow.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lore.kernel.org/r/20230801211812.371787909@infradead.org
---
 kernel/sched/core.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f113a4449fde..efe3848978a0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6400,20 +6400,24 @@ static void queue_core_balance(struct rq *rq)
 	queue_balance_callback(rq, &per_cpu(core_balance_head, rq->cpu), sched_core_balance);
 }
 
+DEFINE_LOCK_GUARD_1(core_lock, int,
+		    sched_core_lock(*_T->lock, &_T->flags),
+		    sched_core_unlock(*_T->lock, &_T->flags),
+		    unsigned long flags)
+
 static void sched_core_cpu_starting(unsigned int cpu)
 {
 	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
 	struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
-	unsigned long flags;
 	int t;
 
-	sched_core_lock(cpu, &flags);
+	guard(core_lock)(&cpu);
 
 	WARN_ON_ONCE(rq->core != rq);
 
 	/* if we're the first, we'll be our own leader */
 	if (cpumask_weight(smt_mask) == 1)
-		goto unlock;
+		return;
 
 	/* find the leader */
 	for_each_cpu(t, smt_mask) {
@@ -6427,7 +6431,7 @@ static void sched_core_cpu_starting(unsigned int cpu)
 	}
 
 	if (WARN_ON_ONCE(!core_rq)) /* whoopsie */
-		goto unlock;
+		return;
 
 	/* install and validate core_rq */
 	for_each_cpu(t, smt_mask) {
@@ -6438,29 +6442,25 @@ static void sched_core_cpu_starting(unsigned int cpu)
 
 		WARN_ON_ONCE(rq->core != core_rq);
 	}
-
-unlock:
-	sched_core_unlock(cpu, &flags);
 }
 
 static void sched_core_cpu_deactivate(unsigned int cpu)
 {
 	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
 	struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
-	unsigned long flags;
 	int t;
 
-	sched_core_lock(cpu, &flags);
+	guard(core_lock)(&cpu);
 
 	/* if we're the last man standing, nothing to do */
 	if (cpumask_weight(smt_mask) == 1) {
 		WARN_ON_ONCE(rq->core != rq);
-		goto unlock;
+		return;
 	}
 
 	/* if we're not the leader, nothing to do */
 	if (rq->core != rq)
-		goto unlock;
+		return;
 
 	/* find a new leader */
 	for_each_cpu(t, smt_mask) {
@@ -6471,7 +6471,7 @@ static void sched_core_cpu_deactivate(unsigned int cpu)
 	}
 
 	if (WARN_ON_ONCE(!core_rq)) /* impossible */
-		goto unlock;
+		return;
 
 	/* copy the shared state to the new leader */
 	core_rq->core_task_seq             = rq->core_task_seq;
@@ -6493,9 +6493,6 @@ static void sched_core_cpu_deactivate(unsigned int cpu)
 		rq = cpu_rq(t);
 		rq->core = core_rq;
 	}
-
-unlock:
-	sched_core_unlock(cpu, &flags);
 }
 
 static inline void sched_core_cpu_dying(unsigned int cpu)

From 89eed1ab1161e7d60595917e3b982e03dfcc0f8d Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Sun, 30 Jul 2023 00:03:16 +0200
Subject: [PATCH 1088/2047] rust: upgrade to Rust 1.71.1

This is the second upgrade to the Rust toolchain, from 1.68.2 to 1.71.1
(i.e. the latest).

See the upgrade policy [1] and the comments on the first upgrade in
commit 3ed03f4da06e ("rust: upgrade to Rust 1.68.2").

# Unstable features

No unstable features (that we use) were stabilized.

Therefore, the only unstable feature allowed to be used outside
the `kernel` crate is still `new_uninit`, though other code to be
upstreamed may increase the list.

Please see [2] for details.

# Required changes

For the upgrade, this patch requires the following changes:

  - Removal of the `__rust_*` allocator functions, together with
    the addition of the `__rust_no_alloc_shim_is_unstable` static.
    See [3] for details.

  - Some more compiler builtins added due to `<f{32,64}>::midpoint()`
    that got added in Rust 1.71 [4].

# `alloc` upgrade and reviewing

The vast majority of changes are due to our `alloc` fork being upgraded
at once.

There are two kinds of changes to be aware of: the ones coming from
upstream, which we should follow as closely as possible, and the updates
needed in our added fallible APIs to keep them matching the newer
infallible APIs coming from upstream.

Instead of taking a look at the diff of this patch, an alternative
approach is reviewing a diff of the changes between upstream `alloc` and
the kernel's. This allows to easily inspect the kernel additions only,
especially to check if the fallible methods we already have still match
the infallible ones in the new version coming from upstream.

Another approach is reviewing the changes introduced in the additions in
the kernel fork between the two versions. This is useful to spot
potentially unintended changes to our additions.

To apply these approaches, one may follow steps similar to the following
to generate a pair of patches that show the differences between upstream
Rust and the kernel (for the subset of `alloc` we use) before and after
applying this patch:

    # Get the difference with respect to the old version.
    git -C rust checkout $(linux/scripts/min-tool-version.sh rustc)
    git -C linux ls-tree -r --name-only HEAD -- rust/alloc |
        cut -d/ -f3- |
        grep -Fv README.md |
        xargs -IPATH cp rust/library/alloc/src/PATH linux/rust/alloc/PATH
    git -C linux diff --patch-with-stat --summary -R > old.patch
    git -C linux restore rust/alloc

    # Apply this patch.
    git -C linux am rust-upgrade.patch

    # Get the difference with respect to the new version.
    git -C rust checkout $(linux/scripts/min-tool-version.sh rustc)
    git -C linux ls-tree -r --name-only HEAD -- rust/alloc |
        cut -d/ -f3- |
        grep -Fv README.md |
        xargs -IPATH cp rust/library/alloc/src/PATH linux/rust/alloc/PATH
    git -C linux diff --patch-with-stat --summary -R > new.patch
    git -C linux restore rust/alloc

Now one may check the `new.patch` to take a look at the additions (first
approach) or at the difference between those two patches (second
approach). For the latter, a side-by-side tool is recommended.

Link: https://rust-for-linux.com/rust-version-policy [1]
Link: https://github.com/Rust-for-Linux/linux/issues/2 [2]
Link: https://github.com/rust-lang/rust/pull/86844 [3]
Link: https://github.com/rust-lang/rust/pull/92048 [4]
Closes: https://github.com/Rust-for-Linux/linux/issues/68
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Trevor Gross <tmgross@umich.edu>
Link: https://lore.kernel.org/r/20230729220317.416771-1-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 Documentation/process/changes.rst |   2 +-
 rust/Makefile                     |   4 +-
 rust/alloc/alloc.rs               |  20 +++--
 rust/alloc/boxed.rs               | 131 +++++++++++++++---------------
 rust/alloc/lib.rs                 |  50 ++++++------
 rust/alloc/raw_vec.rs             |  18 ++--
 rust/alloc/slice.rs               |  43 +++++++---
 rust/alloc/vec/drain.rs           |   8 +-
 rust/alloc/vec/drain_filter.rs    |   8 +-
 rust/alloc/vec/into_iter.rs       |  35 +++++---
 rust/alloc/vec/mod.rs             |  84 +++++--------------
 rust/compiler_builtins.rs         |   7 ++
 rust/kernel/allocator.rs          |  51 +-----------
 scripts/min-tool-version.sh       |   2 +-
 14 files changed, 209 insertions(+), 254 deletions(-)

diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 5561dae94f85..1382bccc8818 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -31,7 +31,7 @@ you probably needn't concern yourself with pcmciautils.
 ====================== ===============  ========================================
 GNU C                  5.1              gcc --version
 Clang/LLVM (optional)  11.0.0           clang --version
-Rust (optional)        1.68.2           rustc --version
+Rust (optional)        1.71.1           rustc --version
 bindgen (optional)     0.56.0           bindgen --version
 GNU make               3.82             make --version
 bash                   4.2              bash --version
diff --git a/rust/Makefile b/rust/Makefile
index 1ffdff6c2a34..b278908c19e5 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -379,8 +379,8 @@ rust-analyzer:
 		$(if $(KBUILD_EXTMOD),$(extmod_prefix),$(objtree))/rust-project.json
 
 redirect-intrinsics = \
-	__eqsf2 __gesf2 __lesf2 __nesf2 __unordsf2 \
-	__unorddf2 \
+	__addsf3 __eqsf2 __gesf2 __lesf2 __ltsf2 __mulsf3 __nesf2 __unordsf2 \
+	__adddf3 __ledf2 __ltdf2 __muldf3 __unorddf2 \
 	__muloti4 __multi3 \
 	__udivmodti4 __udivti3 __umodti3
 
diff --git a/rust/alloc/alloc.rs b/rust/alloc/alloc.rs
index acf22d45e6f2..0b6bf5b6da43 100644
--- a/rust/alloc/alloc.rs
+++ b/rust/alloc/alloc.rs
@@ -16,8 +16,6 @@ use core::ptr::{self, NonNull};
 #[doc(inline)]
 pub use core::alloc::*;
 
-use core::marker::Destruct;
-
 #[cfg(test)]
 mod tests;
 
@@ -41,6 +39,9 @@ extern "Rust" {
     #[rustc_allocator_zeroed]
     #[rustc_nounwind]
     fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8;
+
+    #[cfg(not(bootstrap))]
+    static __rust_no_alloc_shim_is_unstable: u8;
 }
 
 /// The global memory allocator.
@@ -94,7 +95,14 @@ pub use std::alloc::Global;
 #[must_use = "losing the pointer will leak memory"]
 #[inline]
 pub unsafe fn alloc(layout: Layout) -> *mut u8 {
-    unsafe { __rust_alloc(layout.size(), layout.align()) }
+    unsafe {
+        // Make sure we don't accidentally allow omitting the allocator shim in
+        // stable code until it is actually stabilized.
+        #[cfg(not(bootstrap))]
+        core::ptr::read_volatile(&__rust_no_alloc_shim_is_unstable);
+
+        __rust_alloc(layout.size(), layout.align())
+    }
 }
 
 /// Deallocate memory with the global allocator.
@@ -333,16 +341,12 @@ unsafe fn exchange_malloc(size: usize, align: usize) -> *mut u8 {
 
 #[cfg_attr(not(test), lang = "box_free")]
 #[inline]
-#[rustc_const_unstable(feature = "const_box", issue = "92521")]
 // This signature has to be the same as `Box`, otherwise an ICE will happen.
 // When an additional parameter to `Box` is added (like `A: Allocator`), this has to be added here as
 // well.
 // For example if `Box` is changed to  `struct Box<T: ?Sized, A: Allocator>(Unique<T>, A)`,
 // this function has to be changed to `fn box_free<T: ?Sized, A: Allocator>(Unique<T>, A)` as well.
-pub(crate) const unsafe fn box_free<T: ?Sized, A: ~const Allocator + ~const Destruct>(
-    ptr: Unique<T>,
-    alloc: A,
-) {
+pub(crate) unsafe fn box_free<T: ?Sized, A: Allocator>(ptr: Unique<T>, alloc: A) {
     unsafe {
         let size = size_of_val(ptr.as_ref());
         let align = min_align_of_val(ptr.as_ref());
diff --git a/rust/alloc/boxed.rs b/rust/alloc/boxed.rs
index 14af9860c36c..c8173cea8317 100644
--- a/rust/alloc/boxed.rs
+++ b/rust/alloc/boxed.rs
@@ -152,16 +152,13 @@ use core::any::Any;
 use core::async_iter::AsyncIterator;
 use core::borrow;
 use core::cmp::Ordering;
-use core::convert::{From, TryFrom};
 use core::error::Error;
 use core::fmt;
 use core::future::Future;
 use core::hash::{Hash, Hasher};
-#[cfg(not(no_global_oom_handling))]
-use core::iter::FromIterator;
-use core::iter::{FusedIterator, Iterator};
+use core::iter::FusedIterator;
 use core::marker::Tuple;
-use core::marker::{Destruct, Unpin, Unsize};
+use core::marker::Unsize;
 use core::mem;
 use core::ops::{
     CoerceUnsized, Deref, DerefMut, DispatchFromDyn, Generator, GeneratorState, Receiver,
@@ -218,6 +215,7 @@ impl<T> Box<T> {
     #[inline(always)]
     #[stable(feature = "rust1", since = "1.0.0")]
     #[must_use]
+    #[rustc_diagnostic_item = "box_new"]
     pub fn new(x: T) -> Self {
         #[rustc_box]
         Box::new(x)
@@ -287,9 +285,7 @@ impl<T> Box<T> {
     #[must_use]
     #[inline(always)]
     pub fn pin(x: T) -> Pin<Box<T>> {
-        (#[rustc_box]
-        Box::new(x))
-        .into()
+        Box::new(x).into()
     }
 
     /// Allocates memory on the heap then places `x` into it,
@@ -381,12 +377,11 @@ impl<T, A: Allocator> Box<T, A> {
     /// ```
     #[cfg(not(no_global_oom_handling))]
     #[unstable(feature = "allocator_api", issue = "32838")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
     #[must_use]
     #[inline]
-    pub const fn new_in(x: T, alloc: A) -> Self
+    pub fn new_in(x: T, alloc: A) -> Self
     where
-        A: ~const Allocator + ~const Destruct,
+        A: Allocator,
     {
         let mut boxed = Self::new_uninit_in(alloc);
         unsafe {
@@ -411,12 +406,10 @@ impl<T, A: Allocator> Box<T, A> {
     /// # Ok::<(), std::alloc::AllocError>(())
     /// ```
     #[unstable(feature = "allocator_api", issue = "32838")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
     #[inline]
-    pub const fn try_new_in(x: T, alloc: A) -> Result<Self, AllocError>
+    pub fn try_new_in(x: T, alloc: A) -> Result<Self, AllocError>
     where
-        T: ~const Destruct,
-        A: ~const Allocator + ~const Destruct,
+        A: Allocator,
     {
         let mut boxed = Self::try_new_uninit_in(alloc)?;
         unsafe {
@@ -446,13 +439,12 @@ impl<T, A: Allocator> Box<T, A> {
     /// assert_eq!(*five, 5)
     /// ```
     #[unstable(feature = "allocator_api", issue = "32838")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
     #[cfg(not(no_global_oom_handling))]
     #[must_use]
     // #[unstable(feature = "new_uninit", issue = "63291")]
-    pub const fn new_uninit_in(alloc: A) -> Box<mem::MaybeUninit<T>, A>
+    pub fn new_uninit_in(alloc: A) -> Box<mem::MaybeUninit<T>, A>
     where
-        A: ~const Allocator + ~const Destruct,
+        A: Allocator,
     {
         let layout = Layout::new::<mem::MaybeUninit<T>>();
         // NOTE: Prefer match over unwrap_or_else since closure sometimes not inlineable.
@@ -487,10 +479,9 @@ impl<T, A: Allocator> Box<T, A> {
     /// ```
     #[unstable(feature = "allocator_api", issue = "32838")]
     // #[unstable(feature = "new_uninit", issue = "63291")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
-    pub const fn try_new_uninit_in(alloc: A) -> Result<Box<mem::MaybeUninit<T>, A>, AllocError>
+    pub fn try_new_uninit_in(alloc: A) -> Result<Box<mem::MaybeUninit<T>, A>, AllocError>
     where
-        A: ~const Allocator + ~const Destruct,
+        A: Allocator,
     {
         let layout = Layout::new::<mem::MaybeUninit<T>>();
         let ptr = alloc.allocate(layout)?.cast();
@@ -518,13 +509,12 @@ impl<T, A: Allocator> Box<T, A> {
     ///
     /// [zeroed]: mem::MaybeUninit::zeroed
     #[unstable(feature = "allocator_api", issue = "32838")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
     #[cfg(not(no_global_oom_handling))]
     // #[unstable(feature = "new_uninit", issue = "63291")]
     #[must_use]
-    pub const fn new_zeroed_in(alloc: A) -> Box<mem::MaybeUninit<T>, A>
+    pub fn new_zeroed_in(alloc: A) -> Box<mem::MaybeUninit<T>, A>
     where
-        A: ~const Allocator + ~const Destruct,
+        A: Allocator,
     {
         let layout = Layout::new::<mem::MaybeUninit<T>>();
         // NOTE: Prefer match over unwrap_or_else since closure sometimes not inlineable.
@@ -559,10 +549,9 @@ impl<T, A: Allocator> Box<T, A> {
     /// [zeroed]: mem::MaybeUninit::zeroed
     #[unstable(feature = "allocator_api", issue = "32838")]
     // #[unstable(feature = "new_uninit", issue = "63291")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
-    pub const fn try_new_zeroed_in(alloc: A) -> Result<Box<mem::MaybeUninit<T>, A>, AllocError>
+    pub fn try_new_zeroed_in(alloc: A) -> Result<Box<mem::MaybeUninit<T>, A>, AllocError>
     where
-        A: ~const Allocator + ~const Destruct,
+        A: Allocator,
     {
         let layout = Layout::new::<mem::MaybeUninit<T>>();
         let ptr = alloc.allocate_zeroed(layout)?.cast();
@@ -578,12 +567,11 @@ impl<T, A: Allocator> Box<T, A> {
     /// construct a (pinned) `Box` in a different way than with [`Box::new_in`].
     #[cfg(not(no_global_oom_handling))]
     #[unstable(feature = "allocator_api", issue = "32838")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
     #[must_use]
     #[inline(always)]
-    pub const fn pin_in(x: T, alloc: A) -> Pin<Self>
+    pub fn pin_in(x: T, alloc: A) -> Pin<Self>
     where
-        A: 'static + ~const Allocator + ~const Destruct,
+        A: 'static + Allocator,
     {
         Self::into_pin(Self::new_in(x, alloc))
     }
@@ -592,8 +580,7 @@ impl<T, A: Allocator> Box<T, A> {
     ///
     /// This conversion does not allocate on the heap and happens in place.
     #[unstable(feature = "box_into_boxed_slice", issue = "71582")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
-    pub const fn into_boxed_slice(boxed: Self) -> Box<[T], A> {
+    pub fn into_boxed_slice(boxed: Self) -> Box<[T], A> {
         let (raw, alloc) = Box::into_raw_with_allocator(boxed);
         unsafe { Box::from_raw_in(raw as *mut [T; 1], alloc) }
     }
@@ -610,12 +597,8 @@ impl<T, A: Allocator> Box<T, A> {
     /// assert_eq!(Box::into_inner(c), 5);
     /// ```
     #[unstable(feature = "box_into_inner", issue = "80437")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
     #[inline]
-    pub const fn into_inner(boxed: Self) -> T
-    where
-        Self: ~const Destruct,
-    {
+    pub fn into_inner(boxed: Self) -> T {
         *boxed
     }
 }
@@ -829,9 +812,8 @@ impl<T, A: Allocator> Box<mem::MaybeUninit<T>, A> {
     /// assert_eq!(*five, 5)
     /// ```
     #[unstable(feature = "new_uninit", issue = "63291")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
     #[inline]
-    pub const unsafe fn assume_init(self) -> Box<T, A> {
+    pub unsafe fn assume_init(self) -> Box<T, A> {
         let (raw, alloc) = Box::into_raw_with_allocator(self);
         unsafe { Box::from_raw_in(raw as *mut T, alloc) }
     }
@@ -864,9 +846,8 @@ impl<T, A: Allocator> Box<mem::MaybeUninit<T>, A> {
     /// }
     /// ```
     #[unstable(feature = "new_uninit", issue = "63291")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
     #[inline]
-    pub const fn write(mut boxed: Self, value: T) -> Box<T, A> {
+    pub fn write(mut boxed: Self, value: T) -> Box<T, A> {
         unsafe {
             (*boxed).write(value);
             boxed.assume_init()
@@ -1110,9 +1091,8 @@ impl<T: ?Sized, A: Allocator> Box<T, A> {
     ///
     /// [memory layout]: self#memory-layout
     #[unstable(feature = "allocator_api", issue = "32838")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
     #[inline]
-    pub const fn into_raw_with_allocator(b: Self) -> (*mut T, A) {
+    pub fn into_raw_with_allocator(b: Self) -> (*mut T, A) {
         let (leaked, alloc) = Box::into_unique(b);
         (leaked.as_ptr(), alloc)
     }
@@ -1122,10 +1102,9 @@ impl<T: ?Sized, A: Allocator> Box<T, A> {
         issue = "none",
         reason = "use `Box::leak(b).into()` or `Unique::from(Box::leak(b))` instead"
     )]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
     #[inline]
     #[doc(hidden)]
-    pub const fn into_unique(b: Self) -> (Unique<T>, A) {
+    pub fn into_unique(b: Self) -> (Unique<T>, A) {
         // Box is recognized as a "unique pointer" by Stacked Borrows, but internally it is a
         // raw pointer for the type system. Turning it directly into a raw pointer would not be
         // recognized as "releasing" the unique pointer to permit aliased raw accesses,
@@ -1183,9 +1162,8 @@ impl<T: ?Sized, A: Allocator> Box<T, A> {
     /// assert_eq!(*static_ref, [4, 2, 3]);
     /// ```
     #[stable(feature = "box_leak", since = "1.26.0")]
-    #[rustc_const_unstable(feature = "const_box", issue = "92521")]
     #[inline]
-    pub const fn leak<'a>(b: Self) -> &'a mut T
+    pub fn leak<'a>(b: Self) -> &'a mut T
     where
         A: 'a,
     {
@@ -1246,16 +1224,16 @@ unsafe impl<#[may_dangle] T: ?Sized, A: Allocator> Drop for Box<T, A> {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<T: Default> Default for Box<T> {
     /// Creates a `Box<T>`, with the `Default` value for T.
+    #[inline]
     fn default() -> Self {
-        #[rustc_box]
         Box::new(T::default())
     }
 }
 
 #[cfg(not(no_global_oom_handling))]
 #[stable(feature = "rust1", since = "1.0.0")]
-#[rustc_const_unstable(feature = "const_default_impls", issue = "87864")]
-impl<T> const Default for Box<[T]> {
+impl<T> Default for Box<[T]> {
+    #[inline]
     fn default() -> Self {
         let ptr: Unique<[T]> = Unique::<[T; 0]>::dangling();
         Box(ptr, Global)
@@ -1264,8 +1242,8 @@ impl<T> const Default for Box<[T]> {
 
 #[cfg(not(no_global_oom_handling))]
 #[stable(feature = "default_box_extra", since = "1.17.0")]
-#[rustc_const_unstable(feature = "const_default_impls", issue = "87864")]
-impl const Default for Box<str> {
+impl Default for Box<str> {
+    #[inline]
     fn default() -> Self {
         // SAFETY: This is the same as `Unique::cast<U>` but with an unsized `U = str`.
         let ptr: Unique<str> = unsafe {
@@ -1461,8 +1439,7 @@ impl<T> From<T> for Box<T> {
 }
 
 #[stable(feature = "pin", since = "1.33.0")]
-#[rustc_const_unstable(feature = "const_box", issue = "92521")]
-impl<T: ?Sized, A: Allocator> const From<Box<T, A>> for Pin<Box<T, A>>
+impl<T: ?Sized, A: Allocator> From<Box<T, A>> for Pin<Box<T, A>>
 where
     A: 'static,
 {
@@ -1482,9 +1459,36 @@ where
     }
 }
 
+/// Specialization trait used for `From<&[T]>`.
+#[cfg(not(no_global_oom_handling))]
+trait BoxFromSlice<T> {
+    fn from_slice(slice: &[T]) -> Self;
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<T: Clone> BoxFromSlice<T> for Box<[T]> {
+    #[inline]
+    default fn from_slice(slice: &[T]) -> Self {
+        slice.to_vec().into_boxed_slice()
+    }
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<T: Copy> BoxFromSlice<T> for Box<[T]> {
+    #[inline]
+    fn from_slice(slice: &[T]) -> Self {
+        let len = slice.len();
+        let buf = RawVec::with_capacity(len);
+        unsafe {
+            ptr::copy_nonoverlapping(slice.as_ptr(), buf.ptr(), len);
+            buf.into_box(slice.len()).assume_init()
+        }
+    }
+}
+
 #[cfg(not(no_global_oom_handling))]
 #[stable(feature = "box_from_slice", since = "1.17.0")]
-impl<T: Copy> From<&[T]> for Box<[T]> {
+impl<T: Clone> From<&[T]> for Box<[T]> {
     /// Converts a `&[T]` into a `Box<[T]>`
     ///
     /// This conversion allocates on the heap
@@ -1498,19 +1502,15 @@ impl<T: Copy> From<&[T]> for Box<[T]> {
     ///
     /// println!("{boxed_slice:?}");
     /// ```
+    #[inline]
     fn from(slice: &[T]) -> Box<[T]> {
-        let len = slice.len();
-        let buf = RawVec::with_capacity(len);
-        unsafe {
-            ptr::copy_nonoverlapping(slice.as_ptr(), buf.ptr(), len);
-            buf.into_box(slice.len()).assume_init()
-        }
+        <Self as BoxFromSlice<T>>::from_slice(slice)
     }
 }
 
 #[cfg(not(no_global_oom_handling))]
 #[stable(feature = "box_from_cow", since = "1.45.0")]
-impl<T: Copy> From<Cow<'_, [T]>> for Box<[T]> {
+impl<T: Clone> From<Cow<'_, [T]>> for Box<[T]> {
     /// Converts a `Cow<'_, [T]>` into a `Box<[T]>`
     ///
     /// When `cow` is the `Cow::Borrowed` variant, this
@@ -1620,7 +1620,6 @@ impl<T, const N: usize> From<[T; N]> for Box<[T]> {
     /// println!("{boxed:?}");
     /// ```
     fn from(array: [T; N]) -> Box<[T]> {
-        #[rustc_box]
         Box::new(array)
     }
 }
@@ -1899,8 +1898,7 @@ impl<T: ?Sized, A: Allocator> fmt::Pointer for Box<T, A> {
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
-#[rustc_const_unstable(feature = "const_box", issue = "92521")]
-impl<T: ?Sized, A: Allocator> const Deref for Box<T, A> {
+impl<T: ?Sized, A: Allocator> Deref for Box<T, A> {
     type Target = T;
 
     fn deref(&self) -> &T {
@@ -1909,8 +1907,7 @@ impl<T: ?Sized, A: Allocator> const Deref for Box<T, A> {
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
-#[rustc_const_unstable(feature = "const_box", issue = "92521")]
-impl<T: ?Sized, A: Allocator> const DerefMut for Box<T, A> {
+impl<T: ?Sized, A: Allocator> DerefMut for Box<T, A> {
     fn deref_mut(&mut self) -> &mut T {
         &mut **self
     }
diff --git a/rust/alloc/lib.rs b/rust/alloc/lib.rs
index 5f374378b0d4..85e91356ecb3 100644
--- a/rust/alloc/lib.rs
+++ b/rust/alloc/lib.rs
@@ -89,35 +89,37 @@
 #![warn(missing_debug_implementations)]
 #![warn(missing_docs)]
 #![allow(explicit_outlives_requirements)]
+#![warn(multiple_supertrait_upcastable)]
 //
 // Library features:
+// tidy-alphabetical-start
+#![cfg_attr(not(no_global_oom_handling), feature(const_alloc_error))]
+#![cfg_attr(not(no_global_oom_handling), feature(const_btree_len))]
+#![cfg_attr(test, feature(is_sorted))]
+#![cfg_attr(test, feature(new_uninit))]
 #![feature(alloc_layout_extra)]
 #![feature(allocator_api)]
 #![feature(array_chunks)]
 #![feature(array_into_iter_constructors)]
 #![feature(array_methods)]
 #![feature(array_windows)]
+#![feature(ascii_char)]
 #![feature(assert_matches)]
 #![feature(async_iterator)]
 #![feature(coerce_unsized)]
-#![cfg_attr(not(no_global_oom_handling), feature(const_alloc_error))]
-#![feature(const_box)]
-#![cfg_attr(not(no_global_oom_handling), feature(const_btree_len))]
-#![cfg_attr(not(no_borrow), feature(const_cow_is_borrowed))]
-#![feature(const_convert)]
-#![feature(const_size_of_val)]
 #![feature(const_align_of_val)]
-#![feature(const_ptr_read)]
-#![feature(const_maybe_uninit_zeroed)]
-#![feature(const_maybe_uninit_write)]
+#![feature(const_box)]
+#![cfg_attr(not(no_borrow), feature(const_cow_is_borrowed))]
+#![feature(const_eval_select)]
 #![feature(const_maybe_uninit_as_mut_ptr)]
+#![feature(const_maybe_uninit_write)]
+#![feature(const_maybe_uninit_zeroed)]
+#![feature(const_pin)]
 #![feature(const_refs_to_cell)]
+#![feature(const_size_of_val)]
+#![feature(const_waker)]
 #![feature(core_intrinsics)]
 #![feature(core_panic)]
-#![feature(const_eval_select)]
-#![feature(const_pin)]
-#![feature(const_waker)]
-#![feature(cstr_from_bytes_until_nul)]
 #![feature(dispatch_from_dyn)]
 #![feature(error_generic_member_access)]
 #![feature(error_in_core)]
@@ -128,7 +130,6 @@
 #![feature(hasher_prefixfree_extras)]
 #![feature(inline_const)]
 #![feature(inplace_iteration)]
-#![cfg_attr(test, feature(is_sorted))]
 #![feature(iter_advance_by)]
 #![feature(iter_next_chunk)]
 #![feature(iter_repeat_n)]
@@ -136,8 +137,6 @@
 #![feature(maybe_uninit_slice)]
 #![feature(maybe_uninit_uninit_array)]
 #![feature(maybe_uninit_uninit_array_transpose)]
-#![cfg_attr(test, feature(new_uninit))]
-#![feature(nonnull_slice_from_raw_parts)]
 #![feature(pattern)]
 #![feature(pointer_byte_offsets)]
 #![feature(provide_any)]
@@ -153,6 +152,7 @@
 #![feature(slice_ptr_get)]
 #![feature(slice_ptr_len)]
 #![feature(slice_range)]
+#![feature(std_internals)]
 #![feature(str_internals)]
 #![feature(strict_provenance)]
 #![feature(trusted_len)]
@@ -163,40 +163,42 @@
 #![feature(unicode_internals)]
 #![feature(unsize)]
 #![feature(utf8_chunks)]
-#![feature(std_internals)]
+// tidy-alphabetical-end
 //
 // Language features:
+// tidy-alphabetical-start
+#![cfg_attr(not(test), feature(generator_trait))]
+#![cfg_attr(test, feature(panic_update_hook))]
+#![cfg_attr(test, feature(test))]
 #![feature(allocator_internals)]
 #![feature(allow_internal_unstable)]
 #![feature(associated_type_bounds)]
+#![feature(c_unwind)]
 #![feature(cfg_sanitize)]
-#![feature(const_deref)]
 #![feature(const_mut_refs)]
-#![feature(const_ptr_write)]
 #![feature(const_precise_live_drops)]
+#![feature(const_ptr_write)]
 #![feature(const_trait_impl)]
 #![feature(const_try)]
 #![feature(dropck_eyepatch)]
 #![feature(exclusive_range_pattern)]
 #![feature(fundamental)]
-#![cfg_attr(not(test), feature(generator_trait))]
 #![feature(hashmap_internals)]
 #![feature(lang_items)]
 #![feature(min_specialization)]
+#![feature(multiple_supertrait_upcastable)]
 #![feature(negative_impls)]
 #![feature(never_type)]
+#![feature(pointer_is_aligned)]
 #![feature(rustc_allow_const_fn_unstable)]
 #![feature(rustc_attrs)]
-#![feature(pointer_is_aligned)]
 #![feature(slice_internals)]
 #![feature(staged_api)]
 #![feature(stmt_expr_attributes)]
-#![cfg_attr(test, feature(test))]
 #![feature(unboxed_closures)]
 #![feature(unsized_fn_params)]
-#![feature(c_unwind)]
 #![feature(with_negative_coherence)]
-#![cfg_attr(test, feature(panic_update_hook))]
+// tidy-alphabetical-end
 //
 // Rustdoc features:
 #![feature(doc_cfg)]
diff --git a/rust/alloc/raw_vec.rs b/rust/alloc/raw_vec.rs
index 5db87eac53b7..65d5ce15828e 100644
--- a/rust/alloc/raw_vec.rs
+++ b/rust/alloc/raw_vec.rs
@@ -6,7 +6,6 @@ use core::alloc::LayoutError;
 use core::cmp;
 use core::intrinsics;
 use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties};
-use core::ops::Drop;
 use core::ptr::{self, NonNull, Unique};
 use core::slice;
 
@@ -274,10 +273,15 @@ impl<T, A: Allocator> RawVec<T, A> {
         if T::IS_ZST || self.cap == 0 {
             None
         } else {
-            // We have an allocated chunk of memory, so we can bypass runtime
-            // checks to get our current layout.
+            // We could use Layout::array here which ensures the absence of isize and usize overflows
+            // and could hypothetically handle differences between stride and size, but this memory
+            // has already been allocated so we know it can't overflow and currently rust does not
+            // support such types. So we can do better by skipping some checks and avoid an unwrap.
+            let _: () = const { assert!(mem::size_of::<T>() % mem::align_of::<T>() == 0) };
             unsafe {
-                let layout = Layout::array::<T>(self.cap).unwrap_unchecked();
+                let align = mem::align_of::<T>();
+                let size = mem::size_of::<T>().unchecked_mul(self.cap);
+                let layout = Layout::from_size_align_unchecked(size, align);
                 Some((self.ptr.cast().into(), layout))
             }
         }
@@ -465,11 +469,13 @@ impl<T, A: Allocator> RawVec<T, A> {
         assert!(cap <= self.capacity(), "Tried to shrink to a larger capacity");
 
         let (ptr, layout) = if let Some(mem) = self.current_memory() { mem } else { return Ok(()) };
-
+        // See current_memory() why this assert is here
+        let _: () = const { assert!(mem::size_of::<T>() % mem::align_of::<T>() == 0) };
         let ptr = unsafe {
             // `Layout::array` cannot overflow here because it would have
             // overflowed earlier when capacity was larger.
-            let new_layout = Layout::array::<T>(cap).unwrap_unchecked();
+            let new_size = mem::size_of::<T>().unchecked_mul(cap);
+            let new_layout = Layout::from_size_align_unchecked(new_size, layout.align());
             self.alloc
                 .shrink(ptr, layout, new_layout)
                 .map_err(|_| AllocError { layout: new_layout, non_exhaustive: () })?
diff --git a/rust/alloc/slice.rs b/rust/alloc/slice.rs
index 245e01590df7..6ac463bd3edc 100644
--- a/rust/alloc/slice.rs
+++ b/rust/alloc/slice.rs
@@ -784,6 +784,38 @@ impl<T, A: Allocator> BorrowMut<[T]> for Vec<T, A> {
     }
 }
 
+// Specializable trait for implementing ToOwned::clone_into. This is
+// public in the crate and has the Allocator parameter so that
+// vec::clone_from use it too.
+#[cfg(not(no_global_oom_handling))]
+pub(crate) trait SpecCloneIntoVec<T, A: Allocator> {
+    fn clone_into(&self, target: &mut Vec<T, A>);
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<T: Clone, A: Allocator> SpecCloneIntoVec<T, A> for [T] {
+    default fn clone_into(&self, target: &mut Vec<T, A>) {
+        // drop anything in target that will not be overwritten
+        target.truncate(self.len());
+
+        // target.len <= self.len due to the truncate above, so the
+        // slices here are always in-bounds.
+        let (init, tail) = self.split_at(target.len());
+
+        // reuse the contained values' allocations/resources.
+        target.clone_from_slice(init);
+        target.extend_from_slice(tail);
+    }
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<T: Copy, A: Allocator> SpecCloneIntoVec<T, A> for [T] {
+    fn clone_into(&self, target: &mut Vec<T, A>) {
+        target.clear();
+        target.extend_from_slice(self);
+    }
+}
+
 #[cfg(not(no_global_oom_handling))]
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<T: Clone> ToOwned for [T] {
@@ -799,16 +831,7 @@ impl<T: Clone> ToOwned for [T] {
     }
 
     fn clone_into(&self, target: &mut Vec<T>) {
-        // drop anything in target that will not be overwritten
-        target.truncate(self.len());
-
-        // target.len <= self.len due to the truncate above, so the
-        // slices here are always in-bounds.
-        let (init, tail) = self.split_at(target.len());
-
-        // reuse the contained values' allocations/resources.
-        target.clone_from_slice(init);
-        target.extend_from_slice(tail);
+        SpecCloneIntoVec::clone_into(self, target);
     }
 }
 
diff --git a/rust/alloc/vec/drain.rs b/rust/alloc/vec/drain.rs
index d503d2f478ce..78177a9e2ad0 100644
--- a/rust/alloc/vec/drain.rs
+++ b/rust/alloc/vec/drain.rs
@@ -18,7 +18,7 @@ use super::Vec;
 ///
 /// ```
 /// let mut v = vec![0, 1, 2];
-/// let iter: std::vec::Drain<_> = v.drain(..);
+/// let iter: std::vec::Drain<'_, _> = v.drain(..);
 /// ```
 #[stable(feature = "drain", since = "1.6.0")]
 pub struct Drain<
@@ -114,9 +114,7 @@ impl<'a, T, A: Allocator> Drain<'a, T, A> {
             let unyielded_ptr = this.iter.as_slice().as_ptr();
 
             // ZSTs have no identity, so we don't need to move them around.
-            let needs_move = mem::size_of::<T>() != 0;
-
-            if needs_move {
+            if !T::IS_ZST {
                 let start_ptr = source_vec.as_mut_ptr().add(start);
 
                 // memmove back unyielded elements
@@ -199,7 +197,7 @@ impl<T, A: Allocator> Drop for Drain<'_, T, A> {
             }
         }
 
-        let iter = mem::replace(&mut self.iter, (&mut []).iter());
+        let iter = mem::take(&mut self.iter);
         let drop_len = iter.len();
 
         let mut vec = self.vec;
diff --git a/rust/alloc/vec/drain_filter.rs b/rust/alloc/vec/drain_filter.rs
index 4b019220657d..09efff090e42 100644
--- a/rust/alloc/vec/drain_filter.rs
+++ b/rust/alloc/vec/drain_filter.rs
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0 OR MIT
 
 use crate::alloc::{Allocator, Global};
-use core::mem::{self, ManuallyDrop};
+use core::mem::{ManuallyDrop, SizedTypeProperties};
 use core::ptr;
 use core::slice;
 
@@ -18,7 +18,7 @@ use super::Vec;
 /// #![feature(drain_filter)]
 ///
 /// let mut v = vec![0, 1, 2];
-/// let iter: std::vec::DrainFilter<_, _> = v.drain_filter(|x| *x % 2 == 0);
+/// let iter: std::vec::DrainFilter<'_, _, _> = v.drain_filter(|x| *x % 2 == 0);
 /// ```
 #[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")]
 #[derive(Debug)]
@@ -98,9 +98,7 @@ where
 
         unsafe {
             // ZSTs have no identity, so we don't need to move them around.
-            let needs_move = mem::size_of::<T>() != 0;
-
-            if needs_move && this.idx < this.old_len && this.del > 0 {
+            if !T::IS_ZST && this.idx < this.old_len && this.del > 0 {
                 let ptr = this.vec.as_mut_ptr();
                 let src = ptr.add(this.idx);
                 let dst = src.sub(this.del);
diff --git a/rust/alloc/vec/into_iter.rs b/rust/alloc/vec/into_iter.rs
index 34a2a70d6ded..aac0ec16aef1 100644
--- a/rust/alloc/vec/into_iter.rs
+++ b/rust/alloc/vec/into_iter.rs
@@ -13,6 +13,7 @@ use core::iter::{
 };
 use core::marker::PhantomData;
 use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties};
+use core::num::NonZeroUsize;
 #[cfg(not(no_global_oom_handling))]
 use core::ops::Deref;
 use core::ptr::{self, NonNull};
@@ -109,7 +110,7 @@ impl<T, A: Allocator> IntoIter<T, A> {
     /// ```
     /// # let mut into_iter = Vec::<u8>::with_capacity(10).into_iter();
     /// let mut into_iter = std::mem::replace(&mut into_iter, Vec::new().into_iter());
-    /// (&mut into_iter).for_each(core::mem::drop);
+    /// (&mut into_iter).for_each(drop);
     /// std::mem::forget(into_iter);
     /// ```
     ///
@@ -215,7 +216,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
     }
 
     #[inline]
-    fn advance_by(&mut self, n: usize) -> Result<(), usize> {
+    fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
         let step_size = self.len().min(n);
         let to_drop = ptr::slice_from_raw_parts_mut(self.ptr as *mut T, step_size);
         if T::IS_ZST {
@@ -229,10 +230,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
         unsafe {
             ptr::drop_in_place(to_drop);
         }
-        if step_size < n {
-            return Err(step_size);
-        }
-        Ok(())
+        NonZeroUsize::new(n - step_size).map_or(Ok(()), Err)
     }
 
     #[inline]
@@ -315,7 +313,7 @@ impl<T, A: Allocator> DoubleEndedIterator for IntoIter<T, A> {
     }
 
     #[inline]
-    fn advance_back_by(&mut self, n: usize) -> Result<(), usize> {
+    fn advance_back_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
         let step_size = self.len().min(n);
         if T::IS_ZST {
             // SAFETY: same as for advance_by()
@@ -329,10 +327,7 @@ impl<T, A: Allocator> DoubleEndedIterator for IntoIter<T, A> {
         unsafe {
             ptr::drop_in_place(to_drop);
         }
-        if step_size < n {
-            return Err(step_size);
-        }
-        Ok(())
+        NonZeroUsize::new(n - step_size).map_or(Ok(()), Err)
     }
 }
 
@@ -349,6 +344,24 @@ impl<T, A: Allocator> FusedIterator for IntoIter<T, A> {}
 #[unstable(feature = "trusted_len", issue = "37572")]
 unsafe impl<T, A: Allocator> TrustedLen for IntoIter<T, A> {}
 
+#[stable(feature = "default_iters", since = "1.70.0")]
+impl<T, A> Default for IntoIter<T, A>
+where
+    A: Allocator + Default,
+{
+    /// Creates an empty `vec::IntoIter`.
+    ///
+    /// ```
+    /// # use std::vec;
+    /// let iter: vec::IntoIter<u8> = Default::default();
+    /// assert_eq!(iter.len(), 0);
+    /// assert_eq!(iter.as_slice(), &[]);
+    /// ```
+    fn default() -> Self {
+        super::Vec::new_in(Default::default()).into_iter()
+    }
+}
+
 #[doc(hidden)]
 #[unstable(issue = "none", feature = "std_internals")]
 #[rustc_unsafe_specialization_marker]
diff --git a/rust/alloc/vec/mod.rs b/rust/alloc/vec/mod.rs
index 94995913566b..05c70de0227e 100644
--- a/rust/alloc/vec/mod.rs
+++ b/rust/alloc/vec/mod.rs
@@ -58,13 +58,9 @@
 #[cfg(not(no_global_oom_handling))]
 use core::cmp;
 use core::cmp::Ordering;
-use core::convert::TryFrom;
 use core::fmt;
 use core::hash::{Hash, Hasher};
-use core::intrinsics::assume;
 use core::iter;
-#[cfg(not(no_global_oom_handling))]
-use core::iter::FromIterator;
 use core::marker::PhantomData;
 use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties};
 use core::ops::{self, Index, IndexMut, Range, RangeBounds};
@@ -381,8 +377,8 @@ mod spec_extend;
 /// Currently, `Vec` does not guarantee the order in which elements are dropped.
 /// The order has changed in the past and may change again.
 ///
-/// [`get`]: ../../std/vec/struct.Vec.html#method.get
-/// [`get_mut`]: ../../std/vec/struct.Vec.html#method.get_mut
+/// [`get`]: slice::get
+/// [`get_mut`]: slice::get_mut
 /// [`String`]: crate::string::String
 /// [`&str`]: type@str
 /// [`shrink_to_fit`]: Vec::shrink_to_fit
@@ -708,14 +704,14 @@ impl<T, A: Allocator> Vec<T, A> {
     ///
     /// // The vector contains no items, even though it has capacity for more
     /// assert_eq!(vec.len(), 0);
-    /// assert_eq!(vec.capacity(), 10);
+    /// assert!(vec.capacity() >= 10);
     ///
     /// // These are all done without reallocating...
     /// for i in 0..10 {
     ///     vec.push(i);
     /// }
     /// assert_eq!(vec.len(), 10);
-    /// assert_eq!(vec.capacity(), 10);
+    /// assert!(vec.capacity() >= 10);
     ///
     /// // ...but this may make the vector reallocate
     /// vec.push(11);
@@ -766,14 +762,14 @@ impl<T, A: Allocator> Vec<T, A> {
     ///
     /// // The vector contains no items, even though it has capacity for more
     /// assert_eq!(vec.len(), 0);
-    /// assert_eq!(vec.capacity(), 10);
+    /// assert!(vec.capacity() >= 10);
     ///
     /// // These are all done without reallocating...
     /// for i in 0..10 {
     ///     vec.push(i);
     /// }
     /// assert_eq!(vec.len(), 10);
-    /// assert_eq!(vec.capacity(), 10);
+    /// assert!(vec.capacity() >= 10);
     ///
     /// // ...but this may make the vector reallocate
     /// vec.push(11);
@@ -999,7 +995,7 @@ impl<T, A: Allocator> Vec<T, A> {
     /// ```
     /// let mut vec: Vec<i32> = Vec::with_capacity(10);
     /// vec.push(42);
-    /// assert_eq!(vec.capacity(), 10);
+    /// assert!(vec.capacity() >= 10);
     /// ```
     #[inline]
     #[stable(feature = "rust1", since = "1.0.0")]
@@ -1150,7 +1146,7 @@ impl<T, A: Allocator> Vec<T, A> {
     /// ```
     /// let mut vec = Vec::with_capacity(10);
     /// vec.extend([1, 2, 3]);
-    /// assert_eq!(vec.capacity(), 10);
+    /// assert!(vec.capacity() >= 10);
     /// vec.shrink_to_fit();
     /// assert!(vec.capacity() >= 3);
     /// ```
@@ -1177,7 +1173,7 @@ impl<T, A: Allocator> Vec<T, A> {
     /// ```
     /// let mut vec = Vec::with_capacity(10);
     /// vec.extend([1, 2, 3]);
-    /// assert_eq!(vec.capacity(), 10);
+    /// assert!(vec.capacity() >= 10);
     /// vec.shrink_to(4);
     /// assert!(vec.capacity() >= 4);
     /// vec.shrink_to(0);
@@ -1212,7 +1208,7 @@ impl<T, A: Allocator> Vec<T, A> {
     /// let mut vec = Vec::with_capacity(10);
     /// vec.extend([1, 2, 3]);
     ///
-    /// assert_eq!(vec.capacity(), 10);
+    /// assert!(vec.capacity() >= 10);
     /// let slice = vec.into_boxed_slice();
     /// assert_eq!(slice.into_vec().capacity(), 3);
     /// ```
@@ -1358,11 +1354,7 @@ impl<T, A: Allocator> Vec<T, A> {
     pub fn as_ptr(&self) -> *const T {
         // We shadow the slice method of the same name to avoid going through
         // `deref`, which creates an intermediate reference.
-        let ptr = self.buf.ptr();
-        unsafe {
-            assume(!ptr.is_null());
-        }
-        ptr
+        self.buf.ptr()
     }
 
     /// Returns an unsafe mutable pointer to the vector's buffer, or a dangling
@@ -1395,11 +1387,7 @@ impl<T, A: Allocator> Vec<T, A> {
     pub fn as_mut_ptr(&mut self) -> *mut T {
         // We shadow the slice method of the same name to avoid going through
         // `deref_mut`, which creates an intermediate reference.
-        let ptr = self.buf.ptr();
-        unsafe {
-            assume(!ptr.is_null());
-        }
-        ptr
+        self.buf.ptr()
     }
 
     /// Returns a reference to the underlying allocator.
@@ -2891,35 +2879,6 @@ impl<T, A: Allocator> ops::DerefMut for Vec<T, A> {
     }
 }
 
-#[cfg(not(no_global_oom_handling))]
-trait SpecCloneFrom {
-    fn clone_from(this: &mut Self, other: &Self);
-}
-
-#[cfg(not(no_global_oom_handling))]
-impl<T: Clone, A: Allocator> SpecCloneFrom for Vec<T, A> {
-    default fn clone_from(this: &mut Self, other: &Self) {
-        // drop anything that will not be overwritten
-        this.truncate(other.len());
-
-        // self.len <= other.len due to the truncate above, so the
-        // slices here are always in-bounds.
-        let (init, tail) = other.split_at(this.len());
-
-        // reuse the contained values' allocations/resources.
-        this.clone_from_slice(init);
-        this.extend_from_slice(tail);
-    }
-}
-
-#[cfg(not(no_global_oom_handling))]
-impl<T: Copy, A: Allocator> SpecCloneFrom for Vec<T, A> {
-    fn clone_from(this: &mut Self, other: &Self) {
-        this.clear();
-        this.extend_from_slice(other);
-    }
-}
-
 #[cfg(not(no_global_oom_handling))]
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<T: Clone, A: Allocator + Clone> Clone for Vec<T, A> {
@@ -2940,7 +2899,7 @@ impl<T: Clone, A: Allocator + Clone> Clone for Vec<T, A> {
     }
 
     fn clone_from(&mut self, other: &Self) {
-        SpecCloneFrom::clone_from(self, other)
+        crate::slice::SpecCloneIntoVec::clone_into(other.as_slice(), self);
     }
 }
 
@@ -2948,7 +2907,6 @@ impl<T: Clone, A: Allocator + Clone> Clone for Vec<T, A> {
 /// as required by the `core::borrow::Borrow` implementation.
 ///
 /// ```
-/// #![feature(build_hasher_simple_hash_one)]
 /// use std::hash::BuildHasher;
 ///
 /// let b = std::collections::hash_map::RandomState::new();
@@ -3330,7 +3288,7 @@ impl<'a, T: Copy + 'a, A: Allocator + 'a> Extend<&'a T> for Vec<T, A> {
     }
 }
 
-/// Implements comparison of vectors, [lexicographically](core::cmp::Ord#lexicographical-comparison).
+/// Implements comparison of vectors, [lexicographically](Ord#lexicographical-comparison).
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<T: PartialOrd, A: Allocator> PartialOrd for Vec<T, A> {
     #[inline]
@@ -3342,7 +3300,7 @@ impl<T: PartialOrd, A: Allocator> PartialOrd for Vec<T, A> {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<T: Eq, A: Allocator> Eq for Vec<T, A> {}
 
-/// Implements ordering of vectors, [lexicographically](core::cmp::Ord#lexicographical-comparison).
+/// Implements ordering of vectors, [lexicographically](Ord#lexicographical-comparison).
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<T: Ord, A: Allocator> Ord for Vec<T, A> {
     #[inline]
@@ -3365,8 +3323,7 @@ unsafe impl<#[may_dangle] T, A: Allocator> Drop for Vec<T, A> {
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
-#[rustc_const_unstable(feature = "const_default_impls", issue = "87864")]
-impl<T> const Default for Vec<T> {
+impl<T> Default for Vec<T> {
     /// Creates an empty `Vec<T>`.
     ///
     /// The vector will not allocate until elements are pushed onto it.
@@ -3462,10 +3419,7 @@ impl<T, const N: usize> From<[T; N]> for Vec<T> {
     /// ```
     #[cfg(not(test))]
     fn from(s: [T; N]) -> Vec<T> {
-        <[T]>::into_vec(
-            #[rustc_box]
-            Box::new(s),
-        )
+        <[T]>::into_vec(Box::new(s))
     }
 
     #[cfg(test)]
@@ -3490,8 +3444,8 @@ where
     ///
     /// ```
     /// # use std::borrow::Cow;
-    /// let o: Cow<[i32]> = Cow::Owned(vec![1, 2, 3]);
-    /// let b: Cow<[i32]> = Cow::Borrowed(&[1, 2, 3]);
+    /// let o: Cow<'_, [i32]> = Cow::Owned(vec![1, 2, 3]);
+    /// let b: Cow<'_, [i32]> = Cow::Borrowed(&[1, 2, 3]);
     /// assert_eq!(Vec::from(o), Vec::from(b));
     /// ```
     fn from(s: Cow<'a, [T]>) -> Vec<T> {
diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs
index 43378357ece9..fb8ac3f211de 100644
--- a/rust/compiler_builtins.rs
+++ b/rust/compiler_builtins.rs
@@ -37,14 +37,21 @@ macro_rules! define_panicking_intrinsics(
 );
 
 define_panicking_intrinsics!("`f32` should not be used", {
+    __addsf3,
     __eqsf2,
     __gesf2,
     __lesf2,
+    __ltsf2,
+    __mulsf3,
     __nesf2,
     __unordsf2,
 });
 
 define_panicking_intrinsics!("`f64` should not be used", {
+    __adddf3,
+    __ledf2,
+    __ltdf2,
+    __muldf3,
     __unorddf2,
 });
 
diff --git a/rust/kernel/allocator.rs b/rust/kernel/allocator.rs
index 72d66c23883c..a8f3d5be1af1 100644
--- a/rust/kernel/allocator.rs
+++ b/rust/kernel/allocator.rs
@@ -83,53 +83,6 @@ unsafe impl GlobalAlloc for KernelAllocator {
 #[global_allocator]
 static ALLOCATOR: KernelAllocator = KernelAllocator;
 
-// `rustc` only generates these for some crate types. Even then, we would need
-// to extract the object file that has them from the archive. For the moment,
-// let's generate them ourselves instead.
-//
-// Note: Although these are *safe* functions, they are called by the compiler
-// with parameters that obey the same `GlobalAlloc` function safety
-// requirements: size and align should form a valid layout, and size is
-// greater than 0.
-//
-// Note that `#[no_mangle]` implies exported too, nowadays.
+// See <https://github.com/rust-lang/rust/pull/86844>.
 #[no_mangle]
-fn __rust_alloc(size: usize, align: usize) -> *mut u8 {
-    // SAFETY: See assumption above.
-    let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
-
-    // SAFETY: `ptr::null_mut()` is null, per assumption above the size of `layout` is greater
-    // than 0.
-    unsafe { krealloc_aligned(ptr::null_mut(), layout, bindings::GFP_KERNEL) }
-}
-
-#[no_mangle]
-fn __rust_dealloc(ptr: *mut u8, _size: usize, _align: usize) {
-    unsafe { bindings::kfree(ptr as *const core::ffi::c_void) };
-}
-
-#[no_mangle]
-fn __rust_realloc(ptr: *mut u8, _old_size: usize, align: usize, new_size: usize) -> *mut u8 {
-    // SAFETY: See assumption above.
-    let new_layout = unsafe { Layout::from_size_align_unchecked(new_size, align) };
-
-    // SAFETY: Per assumption above, `ptr` is allocated by `__rust_*` before, and the size of
-    // `new_layout` is greater than 0.
-    unsafe { krealloc_aligned(ptr, new_layout, bindings::GFP_KERNEL) }
-}
-
-#[no_mangle]
-fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8 {
-    // SAFETY: See assumption above.
-    let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
-
-    // SAFETY: `ptr::null_mut()` is null, per assumption above the size of `layout` is greater
-    // than 0.
-    unsafe {
-        krealloc_aligned(
-            ptr::null_mut(),
-            layout,
-            bindings::GFP_KERNEL | bindings::__GFP_ZERO,
-        )
-    }
-}
+static __rust_no_alloc_shim_is_unstable: u8 = 0;
diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh
index 2ade63149466..8da3b53fe36a 100755
--- a/scripts/min-tool-version.sh
+++ b/scripts/min-tool-version.sh
@@ -31,7 +31,7 @@ llvm)
 	fi
 	;;
 rustc)
-	echo 1.68.2
+	echo 1.71.1
 	;;
 bindgen)
 	echo 0.56.0

From 9418e686047421dcd416f694ea72fb9edeef3688 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Sun, 30 Jul 2023 00:03:17 +0200
Subject: [PATCH 1089/2047] rust: enable `no_mangle_with_rust_abi` Clippy lint

Introduced in Rust 1.69.0 [1], this lint prevents forgetting to set
the C ABI when using `#[no_mangle]` (or thinking it is implied).

For instance, it would have prevented the issue [2] fixed by commit
c682e4c37d2b ("rust: kernel: Mark rust_fmt_argument as extern "C"").

    error: `#[no_mangle]` set on a function with the default (`Rust`) ABI
      --> rust/kernel/print.rs:21:1
       |
    21 | / unsafe fn rust_fmt_argument(
    22 | |     buf: *mut c_char,
    23 | |     end: *mut c_char,
    24 | |     ptr: *const c_void,
    25 | | ) -> *mut c_char {
       | |________________^
       |
       = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#no_mangle_with_rust_abi
       = note: requested on the command line with `-D clippy::no-mangle-with-rust-abi`
    help: set an ABI
       |
    21 | unsafe extern "C" fn rust_fmt_argument(
       |        ++++++++++
    help: or explicitly set the default
       |
    21 | unsafe extern "Rust" fn rust_fmt_argument(
       |        +++++++++++++

Thus enable it.

In rare cases, we may need to use the Rust ABI even with `#[no_mangle]`
(e.g. one case, before 1.71.0, would have been the `__rust_*`
functions). In those cases, we would need to `#[allow(...)]` the lint,
since using `extern "Rust"` explicitly (as the compiler suggests)
currently gets overwritten by `rustfmt` [3].

Link: https://github.com/rust-lang/rust-clippy/issues/10347 [1]
Link: https://github.com/Rust-for-Linux/linux/pull/967 [2]
Link: https://github.com/rust-lang/rustfmt/issues/5701 [3]
Reviewed-by: Trevor Gross <tmgross@umich.edu>
Reviewed-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Link: https://lore.kernel.org/r/20230729220317.416771-2-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index a5880570a526..0c7f6240df2e 100644
--- a/Makefile
+++ b/Makefile
@@ -467,6 +467,7 @@ export rust_common_flags := --edition=2021 \
 			    -Dclippy::let_unit_value -Dclippy::mut_mut \
 			    -Dclippy::needless_bitwise_bool \
 			    -Dclippy::needless_continue \
+			    -Dclippy::no_mangle_with_rust_abi \
 			    -Wclippy::dbg_macro
 
 KBUILD_HOSTCFLAGS   := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS)

From e062abaec65b970c4d7ecf26bc1558a1b6f92970 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 2 Aug 2023 13:57:03 +0200
Subject: [PATCH 1090/2047] super: remove get_tree_single_reconf()

The get_tree_single_reconf() helper isn't used anywhere. Remove it.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
Message-Id: <20230802-vfs-super-exclusive-v2-1-95dc4e41b870@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/super.c                 | 28 +++++-----------------------
 include/linux/fs_context.h |  3 ---
 2 files changed, 5 insertions(+), 26 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index e781226e2880..96bec768684a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1136,7 +1136,7 @@ static int test_single_super(struct super_block *s, struct fs_context *fc)
 	return 1;
 }
 
-static int vfs_get_super(struct fs_context *fc, bool reconf,
+static int vfs_get_super(struct fs_context *fc,
 		int (*test)(struct super_block *, struct fs_context *),
 		int (*fill_super)(struct super_block *sb,
 				  struct fs_context *fc))
@@ -1154,19 +1154,9 @@ static int vfs_get_super(struct fs_context *fc, bool reconf,
 			goto error;
 
 		sb->s_flags |= SB_ACTIVE;
-		fc->root = dget(sb->s_root);
-	} else {
-		fc->root = dget(sb->s_root);
-		if (reconf) {
-			err = reconfigure_super(fc);
-			if (err < 0) {
-				dput(fc->root);
-				fc->root = NULL;
-				goto error;
-			}
-		}
 	}
 
+	fc->root = dget(sb->s_root);
 	return 0;
 
 error:
@@ -1178,7 +1168,7 @@ int get_tree_nodev(struct fs_context *fc,
 		  int (*fill_super)(struct super_block *sb,
 				    struct fs_context *fc))
 {
-	return vfs_get_super(fc, false, NULL, fill_super);
+	return vfs_get_super(fc, NULL, fill_super);
 }
 EXPORT_SYMBOL(get_tree_nodev);
 
@@ -1186,25 +1176,17 @@ int get_tree_single(struct fs_context *fc,
 		  int (*fill_super)(struct super_block *sb,
 				    struct fs_context *fc))
 {
-	return vfs_get_super(fc, false, test_single_super, fill_super);
+	return vfs_get_super(fc, test_single_super, fill_super);
 }
 EXPORT_SYMBOL(get_tree_single);
 
-int get_tree_single_reconf(struct fs_context *fc,
-		  int (*fill_super)(struct super_block *sb,
-				    struct fs_context *fc))
-{
-	return vfs_get_super(fc, true, test_single_super, fill_super);
-}
-EXPORT_SYMBOL(get_tree_single_reconf);
-
 int get_tree_keyed(struct fs_context *fc,
 		  int (*fill_super)(struct super_block *sb,
 				    struct fs_context *fc),
 		void *key)
 {
 	fc->s_fs_info = key;
-	return vfs_get_super(fc, false, test_keyed_super, fill_super);
+	return vfs_get_super(fc, test_keyed_super, fill_super);
 }
 EXPORT_SYMBOL(get_tree_keyed);
 
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index ff6341e09925..851b3fe2549c 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -150,9 +150,6 @@ extern int get_tree_nodev(struct fs_context *fc,
 extern int get_tree_single(struct fs_context *fc,
 			 int (*fill_super)(struct super_block *sb,
 					   struct fs_context *fc));
-extern int get_tree_single_reconf(struct fs_context *fc,
-			 int (*fill_super)(struct super_block *sb,
-					   struct fs_context *fc));
 extern int get_tree_keyed(struct fs_context *fc,
 			 int (*fill_super)(struct super_block *sb,
 					   struct fs_context *fc),

From dae8b08d5d83b7550917af06cfba76f0b908bf15 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 2 Aug 2023 13:57:04 +0200
Subject: [PATCH 1091/2047] fs: add vfs_cmd_create()

Split the steps to create a superblock into a tiny helper. This will
make the next patch easier to follow.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
Message-Id: <20230802-vfs-super-exclusive-v2-2-95dc4e41b870@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fsopen.c | 51 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 17 deletions(-)

diff --git a/fs/fsopen.c b/fs/fsopen.c
index fc9d2d9fd234..1de2b3576958 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -209,6 +209,39 @@ err:
 	return ret;
 }
 
+static int vfs_cmd_create(struct fs_context *fc)
+{
+	struct super_block *sb;
+	int ret;
+
+	if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
+		return -EBUSY;
+
+	if (!mount_capable(fc))
+		return -EPERM;
+
+	fc->phase = FS_CONTEXT_CREATING;
+
+	ret = vfs_get_tree(fc);
+	if (ret) {
+		fc->phase = FS_CONTEXT_FAILED;
+		return ret;
+	}
+
+	sb = fc->root->d_sb;
+	ret = security_sb_kern_mount(sb);
+	if (unlikely(ret)) {
+		fc_drop_locked(fc);
+		fc->phase = FS_CONTEXT_FAILED;
+		return ret;
+	}
+
+	/* vfs_get_tree() callchains will have grabbed @s_umount */
+	up_write(&sb->s_umount);
+	fc->phase = FS_CONTEXT_AWAITING_MOUNT;
+	return 0;
+}
+
 /*
  * Check the state and apply the configuration.  Note that this function is
  * allowed to 'steal' the value by setting param->xxx to NULL before returning.
@@ -224,23 +257,7 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
 		return ret;
 	switch (cmd) {
 	case FSCONFIG_CMD_CREATE:
-		if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
-			return -EBUSY;
-		if (!mount_capable(fc))
-			return -EPERM;
-		fc->phase = FS_CONTEXT_CREATING;
-		ret = vfs_get_tree(fc);
-		if (ret)
-			break;
-		sb = fc->root->d_sb;
-		ret = security_sb_kern_mount(sb);
-		if (unlikely(ret)) {
-			fc_drop_locked(fc);
-			break;
-		}
-		up_write(&sb->s_umount);
-		fc->phase = FS_CONTEXT_AWAITING_MOUNT;
-		return 0;
+		return vfs_cmd_create(fc);
 	case FSCONFIG_CMD_RECONFIGURE:
 		if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
 			return -EBUSY;

From 11a51d8c13a75f6b24cffeda8e5e11fc8a749f1e Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 2 Aug 2023 13:57:05 +0200
Subject: [PATCH 1092/2047] fs: add vfs_cmd_reconfigure()

Split the steps to reconfigure a superblock into a tiny helper instead
of open-coding it in the switch.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
Message-Id: <20230802-vfs-super-exclusive-v2-3-95dc4e41b870@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fsopen.c | 47 +++++++++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/fs/fsopen.c b/fs/fsopen.c
index 1de2b3576958..a69b7c9cc59c 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -242,6 +242,34 @@ static int vfs_cmd_create(struct fs_context *fc)
 	return 0;
 }
 
+static int vfs_cmd_reconfigure(struct fs_context *fc)
+{
+	struct super_block *sb;
+	int ret;
+
+	if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
+		return -EBUSY;
+
+	fc->phase = FS_CONTEXT_RECONFIGURING;
+
+	sb = fc->root->d_sb;
+	if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
+		fc->phase = FS_CONTEXT_FAILED;
+		return -EPERM;
+	}
+
+	down_write(&sb->s_umount);
+	ret = reconfigure_super(fc);
+	up_write(&sb->s_umount);
+	if (ret) {
+		fc->phase = FS_CONTEXT_FAILED;
+		return ret;
+	}
+
+	vfs_clean_context(fc);
+	return 0;
+}
+
 /*
  * Check the state and apply the configuration.  Note that this function is
  * allowed to 'steal' the value by setting param->xxx to NULL before returning.
@@ -249,7 +277,6 @@ static int vfs_cmd_create(struct fs_context *fc)
 static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
 			       struct fs_parameter *param)
 {
-	struct super_block *sb;
 	int ret;
 
 	ret = finish_clean_context(fc);
@@ -259,21 +286,7 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
 	case FSCONFIG_CMD_CREATE:
 		return vfs_cmd_create(fc);
 	case FSCONFIG_CMD_RECONFIGURE:
-		if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
-			return -EBUSY;
-		fc->phase = FS_CONTEXT_RECONFIGURING;
-		sb = fc->root->d_sb;
-		if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
-			ret = -EPERM;
-			break;
-		}
-		down_write(&sb->s_umount);
-		ret = reconfigure_super(fc);
-		up_write(&sb->s_umount);
-		if (ret)
-			break;
-		vfs_clean_context(fc);
-		return 0;
+		return vfs_cmd_reconfigure(fc);
 	default:
 		if (fc->phase != FS_CONTEXT_CREATE_PARAMS &&
 		    fc->phase != FS_CONTEXT_RECONF_PARAMS)
@@ -281,8 +294,6 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
 
 		return vfs_parse_fs_param(fc, param);
 	}
-	fc->phase = FS_CONTEXT_FAILED;
-	return ret;
 }
 
 /**

From 22ed7ecdaefe0cac0c6e6295e83048af60435b13 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Wed, 2 Aug 2023 13:57:06 +0200
Subject: [PATCH 1093/2047] fs: add FSCONFIG_CMD_CREATE_EXCL

Summary
=======

This introduces FSCONFIG_CMD_CREATE_EXCL which will allows userspace to
implement something like mount -t ext4 --exclusive /dev/sda /B which
fails if a superblock for the requested filesystem does already exist:

Before this patch
-----------------

$ sudo ./move-mount -f xfs -o source=/dev/sda4 /A
Requesting filesystem type xfs
Mount options requested: source=/dev/sda4
Attaching mount at /A
Moving single attached mount
Setting key(source) with val(/dev/sda4)

$ sudo ./move-mount -f xfs -o source=/dev/sda4 /B
Requesting filesystem type xfs
Mount options requested: source=/dev/sda4
Attaching mount at /B
Moving single attached mount
Setting key(source) with val(/dev/sda4)

After this patch with --exclusive as a switch for FSCONFIG_CMD_CREATE_EXCL
--------------------------------------------------------------------------

$ sudo ./move-mount -f xfs --exclusive -o source=/dev/sda4 /A
Requesting filesystem type xfs
Request exclusive superblock creation
Mount options requested: source=/dev/sda4
Attaching mount at /A
Moving single attached mount
Setting key(source) with val(/dev/sda4)

$ sudo ./move-mount -f xfs --exclusive -o source=/dev/sda4 /B
Requesting filesystem type xfs
Request exclusive superblock creation
Mount options requested: source=/dev/sda4
Attaching mount at /B
Moving single attached mount
Setting key(source) with val(/dev/sda4)
Device or resource busy | move-mount.c: 300: do_fsconfig: i xfs: reusing existing filesystem not allowed

Details
=======

As mentioned on the list (cf. [1]-[3]) mount requests like
mount -t ext4 /dev/sda /A are ambigous for userspace. Either a new
superblock has been created and mounted or an existing superblock has
been reused and a bind-mount has been created.

This becomes clear in the following example where two processes create
the same mount for the same block device:

P1                                                              P2
fd_fs = fsopen("ext4");                                         fd_fs = fsopen("ext4");
fsconfig(fd_fs, FSCONFIG_SET_STRING, "source", "/dev/sda");     fsconfig(fd_fs, FSCONFIG_SET_STRING, "source", "/dev/sda");
fsconfig(fd_fs, FSCONFIG_SET_STRING, "dax", "always");          fsconfig(fd_fs, FSCONFIG_SET_STRING, "resuid", "1000");

// wins and creates superblock
fsconfig(fd_fs, FSCONFIG_CMD_CREATE, ...)
                                                                // finds compatible superblock of P1
                                                                // spins until P1 sets SB_BORN and grabs a reference
                                                                fsconfig(fd_fs, FSCONFIG_CMD_CREATE, ...)

fd_mnt1 = fsmount(fd_fs);                                       fd_mnt2 = fsmount(fd_fs);
move_mount(fd_mnt1, "/A")                                       move_mount(fd_mnt2, "/B")

Not just does P2 get a bind-mount but the mount options that P2
requestes are silently ignored. The VFS itself doesn't, can't and
shouldn't enforce filesystem specific mount option compatibility. It
only enforces incompatibility for read-only <-> read-write transitions:

mount -t ext4       /dev/sda /A
mount -t ext4 -o ro /dev/sda /B

The read-only request will fail with EBUSY as the VFS can't just
silently transition a superblock from read-write to read-only or vica
versa without risking security issues.

To userspace this silent superblock reuse can become a security issue in
because there is currently no straightforward way for userspace to know
that they did indeed manage to create a new superblock and didn't just
reuse an existing one.

This adds a new FSCONFIG_CMD_CREATE_EXCL command to fsconfig() that
returns EBUSY if an existing superblock would be reused. Userspace that
needs to be sure that it did create a new superblock with the requested
mount options can request superblock creation using this command. If the
command succeeds they can be sure that they did create a new superblock
with the requested mount options.

This requires the new mount api. With the old mount api it would be
necessary to plumb this through every legacy filesystem's
file_system_type->mount() method. If they want this feature they are
most welcome to switch to the new mount api.

Following is an analysis of the effect of FSCONFIG_CMD_CREATE_EXCL on
each high-level superblock creation helper:

(1) get_tree_nodev()

    Always allocate new superblock. Hence, FSCONFIG_CMD_CREATE and
    FSCONFIG_CMD_CREATE_EXCL are equivalent.

    The binderfs or overlayfs filesystems are examples.

(4) get_tree_keyed()

    Finds an existing superblock based on sb->s_fs_info. Hence,
    FSCONFIG_CMD_CREATE would reuse an existing superblock whereas
    FSCONFIG_CMD_CREATE_EXCL would reject it with EBUSY.

    The mqueue or nfsd filesystems are examples.

(2) get_tree_bdev()

    This effectively works like get_tree_keyed().

    The ext4 or xfs filesystems are examples.

(3) get_tree_single()

    Only one superblock of this filesystem type can ever exist.
    Hence, FSCONFIG_CMD_CREATE would reuse an existing superblock
    whereas FSCONFIG_CMD_CREATE_EXCL would reject it with EBUSY.

    The securityfs or configfs filesystems are examples.

    Note that some single-instance filesystems never destroy the
    superblock once it has been created during the first mount. For
    example, if securityfs has been mounted at least onces then the
    created superblock will never be destroyed again as long as there is
    still an LSM making use it. Consequently, even if securityfs is
    unmounted and the superblock seemingly destroyed it really isn't
    which means that FSCONFIG_CMD_CREATE_EXCL will continue rejecting
    reusing an existing superblock.

    This is acceptable thugh since special purpose filesystems such as
    this shouldn't have a need to use FSCONFIG_CMD_CREATE_EXCL anyway
    and if they do it's probably to make sure that mount options aren't
    ignored.

Following is an analysis of the effect of FSCONFIG_CMD_CREATE_EXCL on
filesystems that make use of the low-level sget_fc() helper directly.
They're all effectively variants on get_tree_keyed(), get_tree_bdev(),
or get_tree_nodev():

(5) mtd_get_sb()

    Similar logic to get_tree_keyed().

(6) afs_get_tree()

    Similar logic to get_tree_keyed().

(7) ceph_get_tree()

    Similar logic to get_tree_keyed().

    Already explicitly allows forcing the allocation of a new superblock
    via CEPH_OPT_NOSHARE. This turns it into get_tree_nodev().

(8) fuse_get_tree_submount()

    Similar logic to get_tree_nodev().

(9) fuse_get_tree()

    Forces reuse of existing FUSE superblock.

    Forces reuse of existing superblock if passed in file refers to an
    existing FUSE connection.
    If FSCONFIG_CMD_CREATE_EXCL is specified together with an fd
    referring to an existing FUSE connections this would cause the
    superblock reusal to fail. If reusing is the intent then
    FSCONFIG_CMD_CREATE_EXCL shouldn't be specified.

(10) fuse_get_tree()
     -> get_tree_nodev()

    Same logic as in get_tree_nodev().

(11) fuse_get_tree()
     -> get_tree_bdev()

    Same logic as in get_tree_bdev().

(12) virtio_fs_get_tree()

     Same logic as get_tree_keyed().

(13) gfs2_meta_get_tree()

     Forces reuse of existing gfs2 superblock.

     Mounting gfs2meta enforces that a gf2s superblock must already
     exist. If not, it will error out. Consequently, mounting gfs2meta
     with FSCONFIG_CMD_CREATE_EXCL would always fail. If reusing is the
     intent then FSCONFIG_CMD_CREATE_EXCL shouldn't be specified.

(14) kernfs_get_tree()

     Similar logic to get_tree_keyed().

(15) nfs_get_tree_common()

    Similar logic to get_tree_keyed().

    Already explicitly allows forcing the allocation of a new superblock
    via NFS_MOUNT_UNSHARED. This effectively turns it into
    get_tree_nodev().

Link: [1] https://lore.kernel.org/linux-block/20230704-fasching-wertarbeit-7c6ffb01c83d@brauner
Link: [2] https://lore.kernel.org/linux-block/20230705-pumpwerk-vielversprechend-a4b1fd947b65@brauner
Link: [3] https://lore.kernel.org/linux-fsdevel/20230725-einnahmen-warnschilder-17779aec0a97@brauner
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
Message-Id: <20230802-vfs-super-exclusive-v2-4-95dc4e41b870@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fs_context.c            |  1 +
 fs/fsopen.c                | 12 ++++++++++--
 fs/super.c                 | 36 +++++++++++++++++++++++++++---------
 include/linux/fs_context.h |  1 +
 include/uapi/linux/mount.h |  3 ++-
 5 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/fs/fs_context.c b/fs/fs_context.c
index 851214d1d013..30d82d2979af 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -692,6 +692,7 @@ void vfs_clean_context(struct fs_context *fc)
 	security_free_mnt_opts(&fc->security);
 	kfree(fc->source);
 	fc->source = NULL;
+	fc->exclusive = false;
 
 	fc->purpose = FS_CONTEXT_FOR_RECONFIGURE;
 	fc->phase = FS_CONTEXT_AWAITING_RECONF;
diff --git a/fs/fsopen.c b/fs/fsopen.c
index a69b7c9cc59c..ce03f6521c88 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -209,7 +209,7 @@ err:
 	return ret;
 }
 
-static int vfs_cmd_create(struct fs_context *fc)
+static int vfs_cmd_create(struct fs_context *fc, bool exclusive)
 {
 	struct super_block *sb;
 	int ret;
@@ -220,7 +220,12 @@ static int vfs_cmd_create(struct fs_context *fc)
 	if (!mount_capable(fc))
 		return -EPERM;
 
+	/* require the new mount api */
+	if (exclusive && fc->ops == &legacy_fs_context_ops)
+		return -EOPNOTSUPP;
+
 	fc->phase = FS_CONTEXT_CREATING;
+	fc->exclusive = exclusive;
 
 	ret = vfs_get_tree(fc);
 	if (ret) {
@@ -284,7 +289,9 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
 		return ret;
 	switch (cmd) {
 	case FSCONFIG_CMD_CREATE:
-		return vfs_cmd_create(fc);
+		return vfs_cmd_create(fc, false);
+	case FSCONFIG_CMD_CREATE_EXCL:
+		return vfs_cmd_create(fc, true);
 	case FSCONFIG_CMD_RECONFIGURE:
 		return vfs_cmd_reconfigure(fc);
 	default:
@@ -381,6 +388,7 @@ SYSCALL_DEFINE5(fsconfig,
 			return -EINVAL;
 		break;
 	case FSCONFIG_CMD_CREATE:
+	case FSCONFIG_CMD_CREATE_EXCL:
 	case FSCONFIG_CMD_RECONFIGURE:
 		if (_key || _value || aux)
 			return -EINVAL;
diff --git a/fs/super.c b/fs/super.c
index 96bec768684a..e04a403dbffd 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -546,17 +546,31 @@ bool mount_capable(struct fs_context *fc)
  * @test: Comparison callback
  * @set: Setup callback
  *
- * Find or create a superblock using the parameters stored in the filesystem
- * context and the two callback functions.
+ * Create a new superblock or find an existing one.
  *
- * If an extant superblock is matched, then that will be returned with an
- * elevated reference count that the caller must transfer or discard.
+ * The @test callback is used to find a matching existing superblock.
+ * Whether or not the requested parameters in @fc are taken into account
+ * is specific to the @test callback that is used. They may even be
+ * completely ignored.
+ *
+ * If an extant superblock is matched, it will be returned unless:
+ *
+ * (1) the namespace the filesystem context @fc and the extant
+ *     superblock's namespace differ
+ *
+ * (2) the filesystem context @fc has requested that reusing an extant
+ *     superblock is not allowed
+ *
+ * In both cases EBUSY will be returned.
  *
  * If no match is made, a new superblock will be allocated and basic
- * initialisation will be performed (s_type, s_fs_info and s_id will be set and
- * the set() callback will be invoked), the superblock will be published and it
- * will be returned in a partially constructed state with SB_BORN and SB_ACTIVE
- * as yet unset.
+ * initialisation will be performed (s_type, s_fs_info and s_id will be
+ * set and the @set callback will be invoked), the superblock will be
+ * published and it will be returned in a partially constructed state
+ * with SB_BORN and SB_ACTIVE as yet unset.
+ *
+ * Return: On success, an extant or newly created superblock is
+ *         returned. On failure an error pointer is returned.
  */
 struct super_block *sget_fc(struct fs_context *fc,
 			    int (*test)(struct super_block *, struct fs_context *),
@@ -603,9 +617,13 @@ retry:
 	return s;
 
 share_extant_sb:
-	if (user_ns != old->s_user_ns) {
+	if (user_ns != old->s_user_ns || fc->exclusive) {
 		spin_unlock(&sb_lock);
 		destroy_unused_super(s);
+		if (fc->exclusive)
+			warnfc(fc, "reusing existing filesystem not allowed");
+		else
+			warnfc(fc, "reusing existing filesystem in another namespace not allowed");
 		return ERR_PTR(-EBUSY);
 	}
 	if (!grab_super(old))
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 851b3fe2549c..a33a3b1d9016 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -109,6 +109,7 @@ struct fs_context {
 	bool			need_free:1;	/* Need to call ops->free() */
 	bool			global:1;	/* Goes into &init_user_ns */
 	bool			oldapi:1;	/* Coming from mount(2) */
+	bool			exclusive:1;    /* create new superblock, reject existing one */
 };
 
 struct fs_context_operations {
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 8eb0d7b758d2..bb242fdcfe6b 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -100,8 +100,9 @@ enum fsconfig_command {
 	FSCONFIG_SET_PATH	= 3,	/* Set parameter, supplying an object by path */
 	FSCONFIG_SET_PATH_EMPTY	= 4,	/* Set parameter, supplying an object by (empty) path */
 	FSCONFIG_SET_FD		= 5,	/* Set parameter, supplying an object by fd */
-	FSCONFIG_CMD_CREATE	= 6,	/* Invoke superblock creation */
+	FSCONFIG_CMD_CREATE	= 6,	/* Create new or reuse existing superblock */
 	FSCONFIG_CMD_RECONFIGURE = 7,	/* Invoke superblock reconfiguration */
+	FSCONFIG_CMD_CREATE_EXCL = 8,	/* Create new superblock, fail if reusing existing superblock */
 };
 
 /*

From edff54ac96eb25f01d26cc68923a6dbbb5b2f440 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Mon, 14 Aug 2023 23:07:59 +0200
Subject: [PATCH 1094/2047] regulator: rtq2208: Switch back to use struct
 i2c_driver's .probe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

struct i2c_driver::probe_new is about to go away. Switch the driver to
use the probe callback with the same prototype.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20230814210759.26395-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rtq2208-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/rtq2208-regulator.c b/drivers/regulator/rtq2208-regulator.c
index 2463aea4192c..2d54844c4226 100644
--- a/drivers/regulator/rtq2208-regulator.c
+++ b/drivers/regulator/rtq2208-regulator.c
@@ -574,7 +574,7 @@ static struct i2c_driver rtq2208_driver = {
 		.name = "rtq2208",
 		.of_match_table = rtq2208_device_tables,
 	},
-	.probe_new = rtq2208_probe,
+	.probe = rtq2208_probe,
 };
 module_i2c_driver(rtq2208_driver);
 

From 9d0cce2bc3874dd03f7471ec00ae4acb5a77e43c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 1 Aug 2023 12:11:18 -0700
Subject: [PATCH 1095/2047] rcu-tasks: Fix boot-time RCU tasks debug-only
 deadlock

In kernels built with CONFIG_PROVE_RCU=y (for example, lockdep kernels),
the following sequence of events can occur:

o	rcu_init_tasks_generic() is invoked just before init is spawned.
	It invokes rcu_spawn_tasks_kthread() and friends.

o	rcu_spawn_tasks_kthread() invokes rcu_spawn_tasks_kthread_generic(),
	which uses kthread_run() to create the needed kthread.

o	Control returns to rcu_init_tasks_generic(), which, because this
	is a CONFIG_PROVE_RCU=y kernel, invokes the version of the
	rcu_tasks_initiate_self_tests() function that actually does
	something, including invoking synchronize_rcu_tasks(), which
	in turn invokes synchronize_rcu_tasks_generic().

o	synchronize_rcu_tasks_generic() sees that the ->kthread_ptr is
	still NULL, because the newly spawned kthread has not yet
	started.

o	The new kthread starts, preempting synchronize_rcu_tasks_generic()
	just after its check.  This kthread invokes rcu_tasks_one_gp(),
	which acquires ->tasks_gp_mutex, and, seeing no work, blocks
	in rcuwait_wait_event().  Note that this step requires either
	a preemptible kernel or a fault-injection-style sleep at the
	beginning of mutex_lock().

o	synchronize_rcu_tasks_generic() resumes and invokes rcu_tasks_one_gp().

o	rcu_tasks_one_gp() attempts to acquire ->tasks_gp_mutex, which
	is still held by the newly spawned kthread's rcu_tasks_one_gp()
	function.  Deadlock.

Because the only reason for ->tasks_gp_mutex is to handle pre-kthread
synchronous grace periods, this commit avoids this deadlock by having
rcu_tasks_one_gp() momentarily release ->tasks_gp_mutex while invoking
rcuwait_wait_event().  This allows the call to rcu_tasks_one_gp() from
synchronize_rcu_tasks_generic() proceed.

Note that it is not necessary to release the mutex anywhere else in
rcu_tasks_one_gp() because rcuwait_wait_event() is the only function
that can block indefinitely.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Roy Hopkins <rhopkins@suse.de>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: Roy Hopkins <rhopkins@suse.de>
---
 kernel/rcu/tasks.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index a4cd4ea2402c..5572f4a22996 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -570,10 +570,12 @@ static void rcu_tasks_one_gp(struct rcu_tasks *rtp, bool midboot)
 	if (unlikely(midboot)) {
 		needgpcb = 0x2;
 	} else {
+		mutex_unlock(&rtp->tasks_gp_mutex);
 		set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
 		rcuwait_wait_event(&rtp->cbs_wait,
 				   (needgpcb = rcu_tasks_need_gpcb(rtp)),
 				   TASK_IDLE);
+		mutex_lock(&rtp->tasks_gp_mutex);
 	}
 
 	if (needgpcb & 0x2) {

From 67d5404d274376890d6d095a10e6565854918f8e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 19 Jul 2023 15:50:07 -0700
Subject: [PATCH 1096/2047] torture: Add a kthread-creation callback to
 _torture_create_kthread()

This commit adds a kthread-creation callback to the
_torture_create_kthread() function, which allows callers of a new
torture_create_kthread_cb() macro to specify a function to be invoked
after the kthread is created but before it is awakened for the first time.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: kernel-team@android.com
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Acked-by: John Stultz <jstultz@google.com>
---
 include/linux/torture.h | 7 +++++--
 kernel/torture.c        | 6 +++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/linux/torture.h b/include/linux/torture.h
index 7038104463e4..bb466eec01e4 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -108,12 +108,15 @@ bool torture_must_stop(void);
 bool torture_must_stop_irq(void);
 void torture_kthread_stopping(char *title);
 int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
-			     char *f, struct task_struct **tp);
+			     char *f, struct task_struct **tp, void (*cbf)(struct task_struct *tp));
 void _torture_stop_kthread(char *m, struct task_struct **tp);
 
 #define torture_create_kthread(n, arg, tp) \
 	_torture_create_kthread(n, (arg), #n, "Creating " #n " task", \
-				"Failed to create " #n, &(tp))
+				"Failed to create " #n, &(tp), NULL)
+#define torture_create_kthread_cb(n, arg, tp, cbf) \
+	_torture_create_kthread(n, (arg), #n, "Creating " #n " task", \
+				"Failed to create " #n, &(tp), cbf)
 #define torture_stop_kthread(n, tp) \
 	_torture_stop_kthread("Stopping " #n " task", &(tp))
 
diff --git a/kernel/torture.c b/kernel/torture.c
index 8be83fdc6be1..b88a1a86d9da 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -932,7 +932,7 @@ EXPORT_SYMBOL_GPL(torture_kthread_stopping);
  * it starts, you will need to open-code your own.
  */
 int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
-			    char *f, struct task_struct **tp)
+			    char *f, struct task_struct **tp, void (*cbf)(struct task_struct *tp))
 {
 	int ret = 0;
 
@@ -944,6 +944,10 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
 		*tp = NULL;
 		return ret;
 	}
+
+	if (cbf)
+		cbf(*tp);
+
 	wake_up_process(*tp);  // Process is sleeping, so ordering provided.
 	torture_shuffle_task_register(*tp);
 	return ret;

From 5d248bb39fe1388943acb6510f8f48fa5570e0ec Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Fri, 2 Jun 2023 22:02:10 +0000
Subject: [PATCH 1097/2047] torture: Add lock_torture writer_fifo module
 parameter

This commit adds a module parameter that causes the locktorture writer
to run at real-time priority.

To use it:
insmod /lib/modules/torture.ko random_shuffle=1
insmod /lib/modules/locktorture.ko torture_type=mutex_lock rt_boost=1 rt_boost_factor=50 nested_locks=3 writer_fifo=1
													^^^^^^^^^^^^^

A predecessor to this patch has been helpful to uncover issues with the
proxy-execution series.

[ paulmck: Remove locktorture-specific code from kernel/torture.c. ]

Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: kernel-team@android.com
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
[jstultz: Include header change to build, reword commit message]
Signed-off-by: John Stultz <jstultz@google.com>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  4 ++++
 kernel/locking/locktorture.c                    | 12 +++++++-----
 kernel/torture.c                                |  3 ++-
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1457995fd41..7b94455e9ae2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2918,6 +2918,10 @@
 	locktorture.torture_type= [KNL]
 			Specify the locking implementation to test.
 
+	locktorture.writer_fifo= [KNL]
+			Run the write-side locktorture kthreads at
+			sched_set_fifo() real-time priority.
+
 	locktorture.verbose= [KNL]
 			Enable additional printk() statements.
 
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 949d3deae506..270c7f80ce84 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -45,6 +45,7 @@ torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
 torture_param(int, rt_boost, 2,
 		   "Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
 torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens.");
+torture_param(int, writer_fifo, 0, "Run writers at sched_set_fifo() priority");
 torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 torture_param(int, nested_locks, 0, "Number of nested locks (max = 8)");
 /* Going much higher trips "BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!" errors */
@@ -809,7 +810,8 @@ static int lock_torture_writer(void *arg)
 	bool skip_main_lock;
 
 	VERBOSE_TOROUT_STRING("lock_torture_writer task started");
-	set_user_nice(current, MAX_NICE);
+	if (!rt_task(current))
+		set_user_nice(current, MAX_NICE);
 
 	do {
 		if ((torture_random(&rand) & 0xfffff) == 0)
@@ -1015,8 +1017,7 @@ static void lock_torture_cleanup(void)
 
 	if (writer_tasks) {
 		for (i = 0; i < cxt.nrealwriters_stress; i++)
-			torture_stop_kthread(lock_torture_writer,
-					     writer_tasks[i]);
+			torture_stop_kthread(lock_torture_writer, writer_tasks[i]);
 		kfree(writer_tasks);
 		writer_tasks = NULL;
 	}
@@ -1244,8 +1245,9 @@ static int __init lock_torture_init(void)
 			goto create_reader;
 
 		/* Create writer. */
-		firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i],
-						  writer_tasks[i]);
+		firsterr = torture_create_kthread_cb(lock_torture_writer, &cxt.lwsa[i],
+						     writer_tasks[i],
+						     writer_fifo ? sched_set_fifo : NULL);
 		if (torture_init_error(firsterr))
 			goto unwind;
 
diff --git a/kernel/torture.c b/kernel/torture.c
index b88a1a86d9da..a1ac493488e2 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -37,6 +37,7 @@
 #include <linux/ktime.h>
 #include <asm/byteorder.h>
 #include <linux/torture.h>
+#include <linux/sched/rt.h>
 #include "rcu/rcu.h"
 
 MODULE_LICENSE("GPL");
@@ -734,7 +735,7 @@ bool stutter_wait(const char *title)
 	cond_resched_tasks_rcu_qs();
 	spt = READ_ONCE(stutter_pause_test);
 	for (; spt; spt = READ_ONCE(stutter_pause_test)) {
-		if (!ret) {
+		if (!ret && !rt_task(current)) {
 			sched_set_normal(current, MAX_NICE);
 			ret = true;
 		}

From 872948c665f50a1446e8a34b1ed57bb0b3a9ca4a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 26 Jun 2023 14:59:29 -0700
Subject: [PATCH 1098/2047] torture: Make torture_hrtimeout_*() use TASK_IDLE

Given that it is expected that more code will use torture_hrtimeout_*(),
including for longer timeouts, make it use TASK_IDLE instead of
TASK_UNINTERRUPTIBLE.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/torture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/torture.c b/kernel/torture.c
index a1ac493488e2..68c831a43a58 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -93,7 +93,7 @@ int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_s
 
 	if (trsp)
 		hto += (torture_random(trsp) >> 3) % fuzzt_ns;
-	set_current_state(TASK_UNINTERRUPTIBLE);
+	set_current_state(TASK_IDLE);
 	return schedule_hrtimeout(&hto, HRTIMER_MODE_REL);
 }
 EXPORT_SYMBOL_GPL(torture_hrtimeout_ns);

From 3f0c06e1cba6207703733474bc7b55292bc86c6a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 26 Jun 2023 15:19:56 -0700
Subject: [PATCH 1099/2047] torture: Move torture_onoff() timeouts to hrtimers

In order to gain better race coverage, move the CPU-hotplug-related
timed waits in torture_onoff() to torture_hrtimeout_jiffies().

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/torture.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/torture.c b/kernel/torture.c
index 68c831a43a58..2c441d5a1bdd 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -354,12 +354,12 @@ torture_onoff(void *arg)
 
 	if (onoff_holdoff > 0) {
 		VERBOSE_TOROUT_STRING("torture_onoff begin holdoff");
-		schedule_timeout_interruptible(onoff_holdoff);
+		torture_hrtimeout_jiffies(onoff_holdoff, &rand);
 		VERBOSE_TOROUT_STRING("torture_onoff end holdoff");
 	}
 	while (!torture_must_stop()) {
 		if (disable_onoff_at_boot && !rcu_inkernel_boot_has_ended()) {
-			schedule_timeout_interruptible(HZ / 10);
+			torture_hrtimeout_jiffies(HZ / 10, &rand);
 			continue;
 		}
 		cpu = (torture_random(&rand) >> 4) % (maxcpu + 1);
@@ -369,7 +369,7 @@ torture_onoff(void *arg)
 			torture_online(cpu,
 				       &n_online_attempts, &n_online_successes,
 				       &sum_online, &min_online, &max_online);
-		schedule_timeout_interruptible(onoff_interval);
+		torture_hrtimeout_jiffies(onoff_interval, &rand);
 	}
 
 stop:

From dea81dcfd3497e75eb23e7543434f88c34289d31 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 26 Jun 2023 16:27:02 -0700
Subject: [PATCH 1100/2047] torture: Move torture_shuffle() timeouts to
 hrtimers

In order to gain better race coverage, move the CPU-migration timed
waits in torture_shuffle() to torture_hrtimeout_jiffies().

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/torture.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/torture.c b/kernel/torture.c
index 2c441d5a1bdd..c5311154bc4d 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -557,9 +557,11 @@ static void torture_shuffle_tasks(void)
  */
 static int torture_shuffle(void *arg)
 {
+	DEFINE_TORTURE_RANDOM(rand);
+
 	VERBOSE_TOROUT_STRING("torture_shuffle task started");
 	do {
-		schedule_timeout_interruptible(shuffle_interval);
+		torture_hrtimeout_jiffies(shuffle_interval, &rand);
 		torture_shuffle_tasks();
 		torture_shutdown_absorb("torture_shuffle");
 	} while (!torture_must_stop());

From 10af43671e8bf4ac153c4991a17cdf57bc6d2cfe Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 26 Jun 2023 16:52:35 -0700
Subject: [PATCH 1101/2047] torture: Move stutter_wait() timeouts to hrtimers

In order to gain better race coverage, move the test start/stop
waits in stutter_wait() to torture_hrtimeout_jiffies().

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/torture.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/torture.c b/kernel/torture.c
index c5311154bc4d..984651de4b55 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -742,7 +742,7 @@ bool stutter_wait(const char *title)
 			ret = true;
 		}
 		if (spt == 1) {
-			schedule_timeout_interruptible(1);
+			torture_hrtimeout_jiffies(1, NULL);
 		} else if (spt == 2) {
 			while (READ_ONCE(stutter_pause_test)) {
 				if (!(i++ & 0xffff))
@@ -750,7 +750,7 @@ bool stutter_wait(const char *title)
 				cond_resched();
 			}
 		} else {
-			schedule_timeout_interruptible(round_jiffies_relative(HZ));
+			torture_hrtimeout_jiffies(round_jiffies_relative(HZ), NULL);
 		}
 		torture_shutdown_absorb(title);
 	}

From 6cab60ceb1d32b795ae22fea5719fb6150e4cda9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 12 Jul 2023 14:56:51 -0700
Subject: [PATCH 1102/2047] torture: Stop right-shifting torture_random()
 return values

Now that torture_random() uses swahw32(), its callers no longer see
not-so-random low-order bits, as these are now swapped up into the upper
16 bits of the torture_random() function's return value.  This commit
therefore removes the right-shifting of torture_random() return values.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/torture.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/torture.c b/kernel/torture.c
index 984651de4b55..b28b05bbef02 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -92,7 +92,7 @@ int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_s
 	ktime_t hto = baset_ns;
 
 	if (trsp)
-		hto += (torture_random(trsp) >> 3) % fuzzt_ns;
+		hto += torture_random(trsp) % fuzzt_ns;
 	set_current_state(TASK_IDLE);
 	return schedule_hrtimeout(&hto, HRTIMER_MODE_REL);
 }
@@ -362,7 +362,7 @@ torture_onoff(void *arg)
 			torture_hrtimeout_jiffies(HZ / 10, &rand);
 			continue;
 		}
-		cpu = (torture_random(&rand) >> 4) % (maxcpu + 1);
+		cpu = torture_random(&rand) % (maxcpu + 1);
 		if (!torture_offline(cpu,
 				     &n_offline_attempts, &n_offline_successes,
 				     &sum_offline, &min_offline, &max_offline))

From bc19e86e285f679d39e1671e72e2f02ac04de999 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 12 Jul 2023 16:16:37 -0700
Subject: [PATCH 1103/2047] rcutorture: Stop right-shifting torture_random()
 return values

Now that torture_random() uses swahw32(), its callers no longer see
not-so-random low-order bits, as these are now swapped up into the upper
16 bits of the torture_random() function's return value.  This commit
therefore removes the right-shifting of torture_random() return values.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcutorture.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d291a1438c30..ade42d6a9d9b 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1877,7 +1877,7 @@ static int
 rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
 {
 	int mask = rcutorture_extend_mask_max();
-	unsigned long randmask1 = torture_random(trsp) >> 8;
+	unsigned long randmask1 = torture_random(trsp);
 	unsigned long randmask2 = randmask1 >> 3;
 	unsigned long preempts = RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED;
 	unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ;
@@ -1936,7 +1936,7 @@ rcutorture_loop_extend(int *readstate, struct torture_random_state *trsp,
 	if (!((mask - 1) & mask))
 		return rtrsp;  /* Current RCU reader not extendable. */
 	/* Bias towards larger numbers of loops. */
-	i = (torture_random(trsp) >> 3);
+	i = torture_random(trsp);
 	i = ((i | (i >> 3)) & RCUTORTURE_RDR_MAX_LOOPS) + 1;
 	for (j = 0; j < i; j++) {
 		mask = rcutorture_extend_mask(*readstate, trsp);
@@ -2137,7 +2137,7 @@ static int rcu_nocb_toggle(void *arg)
 		toggle_fuzz = NSEC_PER_USEC;
 	do {
 		r = torture_random(&rand);
-		cpu = (r >> 4) % (maxcpu + 1);
+		cpu = (r >> 1) % (maxcpu + 1);
 		if (r & 0x1) {
 			rcu_nocb_cpu_offload(cpu);
 			atomic_long_inc(&n_nocb_offload);

From 08ab786556ff177086ce93b26daf2a58edd10968 Mon Sep 17 00:00:00 2001
From: Aakash Sen Sharma <aakashsensharma@gmail.com>
Date: Tue, 13 Jun 2023 01:13:11 +0530
Subject: [PATCH 1104/2047] rust: bindgen: upgrade to 0.65.1

In LLVM 16, anonymous items may return names like `(unnamed union at ..)`
rather than empty names [1], which breaks Rust-enabled builds because
bindgen assumed an empty name instead of detecting them via
`clang_Cursor_isAnonymous` [2]:

    $ make rustdoc LLVM=1 CLIPPY=1 -j$(nproc)
      RUSTC L rust/core.o
      BINDGEN rust/bindings/bindings_generated.rs
      BINDGEN rust/bindings/bindings_helpers_generated.rs
      BINDGEN rust/uapi/uapi_generated.rs
    thread 'main' panicked at '"ftrace_branch_data_union_(anonymous_at__/_/include/linux/compiler_types_h_146_2)" is not a valid Ident', .../proc-macro2-1.0.24/src/fallback.rs:693:9
    ...
    thread 'main' panicked at '"ftrace_branch_data_union_(anonymous_at__/_/include/linux/compiler_types_h_146_2)" is not a valid Ident', .../proc-macro2-1.0.24/src/fallback.rs:693:9
    ...

This was fixed in bindgen 0.62.0. Therefore, upgrade bindgen to
a more recent version, 0.65.1, to support LLVM 16.

Since bindgen 0.58.0 changed the `--{white,black}list-*` flags to
`--{allow,block}list-*` [3], update them on our side too.

In addition, bindgen 0.61.0 moved its CLI utility into a binary crate
called `bindgen-cli` [4]. Thus update the installation command in the
Quick Start guide.

Moreover, bindgen 0.61.0 changed the default functionality to bind
`size_t` to `usize` [5] and added the `--no-size_t-is-usize` flag
to not bind `size_t` as `usize`. Then bindgen 0.65.0 removed
the `--size_t-is-usize` flag [6]. Thus stop passing the flag to bindgen.

Finally, bindgen 0.61.0 added support for the `noreturn` attribute (in
its different forms) [7]. Thus remove the infinite loop in our Rust
panic handler after calling `BUG()`, since bindgen now correctly
generates a `BUG()` binding that returns `!` instead of `()`.

Link: https://github.com/llvm/llvm-project/commit/19e984ef8f49bc3ccced15621989fa9703b2cd5b [1]
Link: https://github.com/rust-lang/rust-bindgen/pull/2319 [2]
Link: https://github.com/rust-lang/rust-bindgen/pull/1990 [3]
Link: https://github.com/rust-lang/rust-bindgen/pull/2284 [4]
Link: https://github.com/rust-lang/rust-bindgen/commit/cc78b6fdb6e829e5fb8fa1639f2182cb49333569 [5]
Link: https://github.com/rust-lang/rust-bindgen/pull/2408 [6]
Link: https://github.com/rust-lang/rust-bindgen/issues/2094 [7]
Signed-off-by: Aakash Sen Sharma <aakashsensharma@gmail.com>
Closes: https://github.com/Rust-for-Linux/linux/issues/1013
Tested-by: Ariel Miculas <amiculas@cisco.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Link: https://lore.kernel.org/r/20230612194311.24826-1-aakashsensharma@gmail.com
[ Reworded commit message. Mentioned the `bindgen-cli` binary crate
  change, linked to it and updated the Quick Start guide. Re-added a
  deleted "as" word in a code comment and reflowed comment to respect
  the maximum length. ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 Documentation/process/changes.rst  |  2 +-
 Documentation/rust/quick-start.rst |  2 +-
 rust/Makefile                      |  6 +++---
 rust/helpers.c                     | 15 +++++++--------
 rust/kernel/lib.rs                 |  3 ---
 scripts/min-tool-version.sh        |  2 +-
 6 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 1382bccc8818..0bbd040f6a55 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -32,7 +32,7 @@ you probably needn't concern yourself with pcmciautils.
 GNU C                  5.1              gcc --version
 Clang/LLVM (optional)  11.0.0           clang --version
 Rust (optional)        1.71.1           rustc --version
-bindgen (optional)     0.56.0           bindgen --version
+bindgen (optional)     0.65.1           bindgen --version
 GNU make               3.82             make --version
 bash                   4.2              bash --version
 binutils               2.25             ld -v
diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst
index 58a183bb90b1..2cdf75eeae1d 100644
--- a/Documentation/rust/quick-start.rst
+++ b/Documentation/rust/quick-start.rst
@@ -98,7 +98,7 @@ the ``bindgen`` tool. A particular version is required.
 
 Install it via (note that this will download and build the tool from source)::
 
-	cargo install --locked --version $(scripts/min-tool-version.sh bindgen) bindgen
+	cargo install --locked --version $(scripts/min-tool-version.sh bindgen) bindgen-cli
 
 ``bindgen`` needs to find a suitable ``libclang`` in order to work. If it is
 not found (or a different ``libclang`` than the one found should be used),
diff --git a/rust/Makefile b/rust/Makefile
index b278908c19e5..c3dfb3f6071b 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -300,7 +300,7 @@ quiet_cmd_bindgen = BINDGEN $@
 	$(BINDGEN) $< $(bindgen_target_flags) \
 		--use-core --with-derive-default --ctypes-prefix core::ffi --no-layout-tests \
 		--no-debug '.*' \
-		--size_t-is-usize -o $@ -- $(bindgen_c_flags_final) -DMODULE \
+		-o $@ -- $(bindgen_c_flags_final) -DMODULE \
 		$(bindgen_target_cflags) $(bindgen_target_extra)
 
 $(obj)/bindings/bindings_generated.rs: private bindgen_target_flags = \
@@ -320,8 +320,8 @@ $(obj)/uapi/uapi_generated.rs: $(src)/uapi/uapi_helper.h \
 # given it is `libclang`; but for consistency, future Clang changes and/or
 # a potential future GCC backend for `bindgen`, we disable it too.
 $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_flags = \
-    --blacklist-type '.*' --whitelist-var '' \
-    --whitelist-function 'rust_helper_.*'
+    --blocklist-type '.*' --allowlist-var '' \
+    --allowlist-function 'rust_helper_.*'
 $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_cflags = \
     -I$(objtree)/$(obj) -Wno-missing-prototypes -Wno-missing-declarations
 $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_extra = ; \
diff --git a/rust/helpers.c b/rust/helpers.c
index f946f2ea640a..ebd69490127b 100644
--- a/rust/helpers.c
+++ b/rust/helpers.c
@@ -138,19 +138,18 @@ void rust_helper_put_task_struct(struct task_struct *t)
 EXPORT_SYMBOL_GPL(rust_helper_put_task_struct);
 
 /*
- * We use `bindgen`'s `--size_t-is-usize` option to bind the C `size_t` type
- * as the Rust `usize` type, so we can use it in contexts where Rust
- * expects a `usize` like slice (array) indices. `usize` is defined to be
- * the same as C's `uintptr_t` type (can hold any pointer) but not
- * necessarily the same as `size_t` (can hold the size of any single
- * object). Most modern platforms use the same concrete integer type for
+ * `bindgen` binds the C `size_t` type as the Rust `usize` type, so we can
+ * use it in contexts where Rust expects a `usize` like slice (array) indices.
+ * `usize` is defined to be the same as C's `uintptr_t` type (can hold any
+ * pointer) but not necessarily the same as `size_t` (can hold the size of any
+ * single object). Most modern platforms use the same concrete integer type for
  * both of them, but in case we find ourselves on a platform where
  * that's not true, fail early instead of risking ABI or
  * integer-overflow issues.
  *
  * If your platform fails this assertion, it means that you are in
- * danger of integer-overflow bugs (even if you attempt to remove
- * `--size_t-is-usize`). It may be easiest to change the kernel ABI on
+ * danger of integer-overflow bugs (even if you attempt to add
+ * `--no-size_t-is-usize`). It may be easiest to change the kernel ABI on
  * your platform such that `size_t` matches `uintptr_t` (i.e., to increase
  * `size_t`, because `uintptr_t` has to be at least as big as `size_t`).
  */
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 85b261209977..d59041ff5ff2 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -93,7 +93,4 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! {
     pr_emerg!("{}\n", info);
     // SAFETY: FFI call.
     unsafe { bindings::BUG() };
-    // Bindgen currently does not recognize `__noreturn` so `BUG` returns `()`
-    // instead of `!`. See <https://github.com/rust-lang/rust-bindgen/issues/2094>.
-    loop {}
 }
diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh
index 8da3b53fe36a..d65ab8bfeaf4 100755
--- a/scripts/min-tool-version.sh
+++ b/scripts/min-tool-version.sh
@@ -34,7 +34,7 @@ rustc)
 	echo 1.71.1
 	;;
 bindgen)
-	echo 0.56.0
+	echo 0.65.1
 	;;
 *)
 	echo "$1: unknown tool" >&2

From d80a8f1b58c2bc8d7c6bfb65401ea4f7ec8cddc2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 8 Aug 2023 07:34:20 -0400
Subject: [PATCH 1105/2047] vfs, security: Fix automount superblock LSM init
 problem, preventing NFS sb sharing

When NFS superblocks are created by automounting, their LSM parameters
aren't set in the fs_context struct prior to sget_fc() being called,
leading to failure to match existing superblocks.

This bug leads to messages like the following appearing in dmesg when
fscache is enabled:

    NFS: Cache volume key already in use (nfs,4.2,2,108,106a8c0,1,,,,100000,100000,2ee,3a98,1d4c,3a98,1)

Fix this by adding a new LSM hook to load fc->security for submount
creation.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/165962680944.3334508.6610023900349142034.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165962729225.3357250.14350728846471527137.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/165970659095.2812394.6868894171102318796.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/166133579016.3678898.6283195019480567275.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/217595.1662033775@warthog.procyon.org.uk/ # v5
Fixes: 9bc61ab18b1d ("vfs: Introduce fs_context, switch vfs_kern_mount() to it.")
Fixes: 779df6a5480f ("NFS: Ensure security label is set for root inode")
Tested-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: "Christian Brauner (Microsoft)" <brauner@kernel.org>
Acked-by: Paul Moore <paul@paul-moore.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Message-Id: <20230808-master-v9-1-e0ecde888221@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fs_context.c               | 23 +++++++++++++++-
 include/linux/lsm_hook_defs.h |  1 +
 include/linux/security.h      |  6 +++++
 security/security.c           | 14 ++++++++++
 security/selinux/hooks.c      | 22 +++++++++++++++
 security/smack/smack_lsm.c    | 51 +++++++++++++++++++++++++++++++++++
 6 files changed, 116 insertions(+), 1 deletion(-)

diff --git a/fs/fs_context.c b/fs/fs_context.c
index 851214d1d013..375023e40161 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -315,10 +315,31 @@ struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
 }
 EXPORT_SYMBOL(fs_context_for_reconfigure);
 
+/**
+ * fs_context_for_submount: allocate a new fs_context for a submount
+ * @type: file_system_type of the new context
+ * @reference: reference dentry from which to copy relevant info
+ *
+ * Allocate a new fs_context suitable for a submount. This also ensures that
+ * the fc->security object is inherited from @reference (if needed).
+ */
 struct fs_context *fs_context_for_submount(struct file_system_type *type,
 					   struct dentry *reference)
 {
-	return alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT);
+	struct fs_context *fc;
+	int ret;
+
+	fc = alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT);
+	if (IS_ERR(fc))
+		return fc;
+
+	ret = security_fs_context_submount(fc, reference->d_sb);
+	if (ret) {
+		put_fs_context(fc);
+		return ERR_PTR(ret);
+	}
+
+	return fc;
 }
 EXPORT_SYMBOL(fs_context_for_submount);
 
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 7308a1a7599b..af796986baee 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -54,6 +54,7 @@ LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, struct file *f
 LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm)
 LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm)
 LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm)
+LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference)
 LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc,
 	 struct fs_context *src_sc)
 LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc,
diff --git a/include/linux/security.h b/include/linux/security.h
index 32828502f09e..bac98ea18f78 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -293,6 +293,7 @@ int security_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file);
 int security_bprm_check(struct linux_binprm *bprm);
 void security_bprm_committing_creds(struct linux_binprm *bprm);
 void security_bprm_committed_creds(struct linux_binprm *bprm);
+int security_fs_context_submount(struct fs_context *fc, struct super_block *reference);
 int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc);
 int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param);
 int security_sb_alloc(struct super_block *sb);
@@ -629,6 +630,11 @@ static inline void security_bprm_committed_creds(struct linux_binprm *bprm)
 {
 }
 
+static inline int security_fs_context_submount(struct fs_context *fc,
+					   struct super_block *reference)
+{
+	return 0;
+}
 static inline int security_fs_context_dup(struct fs_context *fc,
 					  struct fs_context *src_fc)
 {
diff --git a/security/security.c b/security/security.c
index b720424ca37d..549104a447e3 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1138,6 +1138,20 @@ void security_bprm_committed_creds(struct linux_binprm *bprm)
 	call_void_hook(bprm_committed_creds, bprm);
 }
 
+/**
+ * security_fs_context_submount() - Initialise fc->security
+ * @fc: new filesystem context
+ * @reference: dentry reference for submount/remount
+ *
+ * Fill out the ->security field for a new fs_context.
+ *
+ * Return: Returns 0 on success or negative error code on failure.
+ */
+int security_fs_context_submount(struct fs_context *fc, struct super_block *reference)
+{
+	return call_int_hook(fs_context_submount, 0, fc, reference);
+}
+
 /**
  * security_fs_context_dup() - Duplicate a fs_context LSM blob
  * @fc: destination filesystem context
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d06e350fedee..afd663744041 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2745,6 +2745,27 @@ static int selinux_umount(struct vfsmount *mnt, int flags)
 				   FILESYSTEM__UNMOUNT, NULL);
 }
 
+static int selinux_fs_context_submount(struct fs_context *fc,
+				   struct super_block *reference)
+{
+	const struct superblock_security_struct *sbsec;
+	struct selinux_mnt_opts *opts;
+
+	opts = kzalloc(sizeof(*opts), GFP_KERNEL);
+	if (!opts)
+		return -ENOMEM;
+
+	sbsec = selinux_superblock(reference);
+	if (sbsec->flags & FSCONTEXT_MNT)
+		opts->fscontext_sid = sbsec->sid;
+	if (sbsec->flags & CONTEXT_MNT)
+		opts->context_sid = sbsec->mntpoint_sid;
+	if (sbsec->flags & DEFCONTEXT_MNT)
+		opts->defcontext_sid = sbsec->def_sid;
+	fc->security = opts;
+	return 0;
+}
+
 static int selinux_fs_context_dup(struct fs_context *fc,
 				  struct fs_context *src_fc)
 {
@@ -7182,6 +7203,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
 	/*
 	 * PUT "CLONING" (ACCESSING + ALLOCATING) HOOKS HERE
 	 */
+	LSM_HOOK_INIT(fs_context_submount, selinux_fs_context_submount),
 	LSM_HOOK_INIT(fs_context_dup, selinux_fs_context_dup),
 	LSM_HOOK_INIT(fs_context_parse_param, selinux_fs_context_parse_param),
 	LSM_HOOK_INIT(sb_eat_lsm_opts, selinux_sb_eat_lsm_opts),
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 6e270cf3fd30..a8201cf22f20 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -614,6 +614,56 @@ out_opt_err:
 	return -EINVAL;
 }
 
+/**
+ * smack_fs_context_submount - Initialise security data for a filesystem context
+ * @fc: The filesystem context.
+ * @reference: reference superblock
+ *
+ * Returns 0 on success or -ENOMEM on error.
+ */
+static int smack_fs_context_submount(struct fs_context *fc,
+				 struct super_block *reference)
+{
+	struct superblock_smack *sbsp;
+	struct smack_mnt_opts *ctx;
+	struct inode_smack *isp;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	fc->security = ctx;
+
+	sbsp = smack_superblock(reference);
+	isp = smack_inode(reference->s_root->d_inode);
+
+	if (sbsp->smk_default) {
+		ctx->fsdefault = kstrdup(sbsp->smk_default->smk_known, GFP_KERNEL);
+		if (!ctx->fsdefault)
+			return -ENOMEM;
+	}
+
+	if (sbsp->smk_floor) {
+		ctx->fsfloor = kstrdup(sbsp->smk_floor->smk_known, GFP_KERNEL);
+		if (!ctx->fsfloor)
+			return -ENOMEM;
+	}
+
+	if (sbsp->smk_hat) {
+		ctx->fshat = kstrdup(sbsp->smk_hat->smk_known, GFP_KERNEL);
+		if (!ctx->fshat)
+			return -ENOMEM;
+	}
+
+	if (isp->smk_flags & SMK_INODE_TRANSMUTE) {
+		if (sbsp->smk_root) {
+			ctx->fstransmute = kstrdup(sbsp->smk_root->smk_known, GFP_KERNEL);
+			if (!ctx->fstransmute)
+				return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
 /**
  * smack_fs_context_dup - Duplicate the security data on fs_context duplication
  * @fc: The new filesystem context.
@@ -4876,6 +4926,7 @@ static struct security_hook_list smack_hooks[] __ro_after_init = {
 	LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme),
 	LSM_HOOK_INIT(syslog, smack_syslog),
 
+	LSM_HOOK_INIT(fs_context_submount, smack_fs_context_submount),
 	LSM_HOOK_INIT(fs_context_dup, smack_fs_context_dup),
 	LSM_HOOK_INIT(fs_context_parse_param, smack_fs_context_parse_param),
 

From 8a237adf213d73671992266eff7437f1b9f40567 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 27 Jun 2023 17:08:15 -0300
Subject: [PATCH 1106/2047] fs/buffer.c: disable per-CPU buffer_head cache for
 isolated CPUs

For certain types of applications (for example PLC software or
RAN processing), upon occurrence of an event, it is necessary to
complete a certain task in a maximum amount of time (deadline).

One way to express this requirement is with a pair of numbers,
deadline time and execution time, where:

        * deadline time: length of time between event and deadline.
        * execution time: length of time it takes for processing of event
                          to occur on a particular hardware platform
                          (uninterrupted).

The particular values depend on use-case. For the case
where the realtime application executes in a virtualized
guest, an IPI which must be serviced in the host will cause
the following sequence of events:

        1) VM-exit
        2) execution of IPI (and function call)
        3) VM-entry

Which causes an excess of 50us latency as observed by cyclictest
(this violates the latency requirement of vRAN application with 1ms TTI,
for example).

invalidate_bh_lrus calls an IPI on each CPU that has non empty
per-CPU cache:

        on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);

The performance when using the per-CPU LRU cache is as follows:

 42 ns per __find_get_block
 68 ns per __find_get_block_slow

Given that the main use cases for latency sensitive applications
do not involve block I/O (data necessary for program operation is
locked in RAM), disable per-CPU buffer_head caches for isolated CPUs.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Acked-by: Frederic Weisbecker <frederic@kernel.org>
Message-Id: <ZJtBrybavtb1x45V@tpad>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/buffer.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index bd091329026c..3aa2380a75e1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -49,6 +49,7 @@
 #include <trace/events/block.h>
 #include <linux/fscrypt.h>
 #include <linux/fsverity.h>
+#include <linux/sched/isolation.h>
 
 #include "internal.h"
 
@@ -1352,7 +1353,7 @@ static void bh_lru_install(struct buffer_head *bh)
 	 * failing page migration.
 	 * Skip putting upcoming bh into bh_lru until migration is done.
 	 */
-	if (lru_cache_disabled()) {
+	if (lru_cache_disabled() || cpu_is_isolated(smp_processor_id())) {
 		bh_lru_unlock();
 		return;
 	}
@@ -1382,6 +1383,10 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
 
 	check_irqs_on();
 	bh_lru_lock();
+	if (cpu_is_isolated(smp_processor_id())) {
+		bh_lru_unlock();
+		return NULL;
+	}
 	for (i = 0; i < BH_LRU_SIZE; i++) {
 		struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
 

From d220efa20bba1ecc3fba3b14b2bf404a1557acd0 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Date: Sun, 25 Jun 2023 20:20:47 +0200
Subject: [PATCH 1107/2047] docs: filesystems: idmappings: clarify from where
 idmappings are taken

Let's clarify from where we take idmapping of each type:
- caller
- filesystem
- mount

Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Christian Brauner <brauner@kernel.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Message-Id: <20230625182047.26854-1-aleksandr.mikhalitsyn@canonical.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/idmappings.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst
index ad6d21640576..f3d168c9f0b9 100644
--- a/Documentation/filesystems/idmappings.rst
+++ b/Documentation/filesystems/idmappings.rst
@@ -373,6 +373,13 @@ kernel maps the caller's userspace id down into a kernel id according to the
 caller's idmapping and then maps that kernel id up according to the
 filesystem's idmapping.
 
+From the implementation point it's worth mentioning how idmappings are represented.
+All idmappings are taken from the corresponding user namespace.
+
+    - caller's idmapping (usually taken from ``current_user_ns()``)
+    - filesystem's idmapping (``sb->s_user_ns``)
+    - mount's idmapping (``mnt_idmap(vfsmnt)``)
+
 Let's see some examples with caller/filesystem idmapping but without mount
 idmappings. This will exhibit some problems we can hit. After that we will
 revisit/reconsider these examples, this time using mount idmappings, to see how

From 89cbd4c036ba44eadc495820e72c5c36273abb76 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 11 Aug 2023 09:43:59 +0800
Subject: [PATCH 1108/2047] fs: Fix one kernel-doc comment

Fix one kernel-doc comment to silence the warning:

fs/read_write.c:88: warning: Function parameter or member 'maxsize' not described in 'generic_file_llseek_size'

Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Message-Id: <20230811014359.4960-1-yang.lee@linux.alibaba.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/read_write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index b07de77ef126..4771701c896b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -71,7 +71,7 @@ EXPORT_SYMBOL(vfs_setpos);
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
  * @whence:	type of seek
- * @size:	max size of this file in file system
+ * @maxsize:	max size of this file in file system
  * @eof:	offset used for SEEK_END position
  *
  * This is a variant of generic_file_llseek that allows passing in a custom

From 45e0d4b95b65fedd18aa3e1b6571e16a2e6e0aa9 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Fri, 11 Aug 2023 21:48:14 +0200
Subject: [PATCH 1109/2047] vfs: fix up the assert in i_readcount_dec

Drops a race where 2 threads could spot a positive value and both
proceed to dec to -1, without reporting anything.

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Message-Id: <20230811194814.1612336-1-mjguzik@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 11055d00fab8..57e2a0a9eea1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2607,8 +2607,7 @@ static inline bool inode_is_open_for_write(const struct inode *inode)
 #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
 static inline void i_readcount_dec(struct inode *inode)
 {
-	BUG_ON(!atomic_read(&inode->i_readcount));
-	atomic_dec(&inode->i_readcount);
+	BUG_ON(atomic_dec_return(&inode->i_readcount) < 0);
 }
 static inline void i_readcount_inc(struct inode *inode)
 {

From 45071e1c2897138f337000e12f2393e769f125b3 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Sun, 13 Aug 2023 10:23:49 +0200
Subject: [PATCH 1110/2047] init: Add support for rootwait timeout parameter

Add an optional timeout arg to 'rootwait' as the maximum time in
seconds to wait for the root device to show up before attempting
forced mount of the root filesystem.

Use case:
In case of device mapper usage for the rootfs (e.g. root=/dev/dm-0),
if the mapper is not able to create the virtual block for any reason
(wrong arguments, bad dm-verity signature, etc), the `rootwait` param
causes the kernel to wait forever. It may however be desirable to only
wait for a given time and then panic (force mount) to cause device reset.
This gives the bootloader a chance to detect the problem and to take some
measures, such as marking the booted partition as bad (for A/B case) or
entering a recovery mode.

In success case, mounting happens as soon as the root device is ready,
unlike the existing 'rootdelay' parameter which performs an unconditional
pause.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Message-Id: <20230813082349.513386-1-loic.poulain@linaro.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 .../admin-guide/kernel-parameters.txt         |  4 ++
 init/do_mounts.c                              | 38 ++++++++++++++++++-
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1457995fd41..387cf9c2a2c5 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5501,6 +5501,10 @@
 			Useful for devices that are detected asynchronously
 			(e.g. USB and MMC devices).
 
+	rootwait=	[KNL] Maximum time (in seconds) to wait for root device
+			to show up before attempting to mount the root
+			filesystem.
+
 	rproc_mem=nn[KMG][@address]
 			[KNL,ARM,CMA] Remoteproc physical memory block.
 			Memory area to be used by remote processor image,
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 1aa015883519..5dfd30b13f48 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/ramfs.h>
 #include <linux/shmem_fs.h>
+#include <linux/ktime.h>
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs_sb.h>
@@ -71,12 +72,37 @@ static int __init rootwait_setup(char *str)
 {
 	if (*str)
 		return 0;
-	root_wait = 1;
+	root_wait = -1;
 	return 1;
 }
 
 __setup("rootwait", rootwait_setup);
 
+static int __init rootwait_timeout_setup(char *str)
+{
+	int sec;
+
+	if (kstrtoint(str, 0, &sec) || sec < 0) {
+		pr_warn("ignoring invalid rootwait value\n");
+		goto ignore;
+	}
+
+	if (check_mul_overflow(sec, MSEC_PER_SEC, &root_wait)) {
+		pr_warn("ignoring excessive rootwait value\n");
+		goto ignore;
+	}
+
+	return 1;
+
+ignore:
+	/* Fallback to indefinite wait */
+	root_wait = -1;
+
+	return 1;
+}
+
+__setup("rootwait=", rootwait_timeout_setup);
+
 static char * __initdata root_mount_data;
 static int __init root_data_setup(char *str)
 {
@@ -384,14 +410,22 @@ void __init mount_root(char *root_device_name)
 /* wait for any asynchronous scanning to complete */
 static void __init wait_for_root(char *root_device_name)
 {
+	ktime_t end;
+
 	if (ROOT_DEV != 0)
 		return;
 
 	pr_info("Waiting for root device %s...\n", root_device_name);
 
+	end = ktime_add_ms(ktime_get_raw(), root_wait);
+
 	while (!driver_probe_done() ||
-	       early_lookup_bdev(root_device_name, &ROOT_DEV) < 0)
+	       early_lookup_bdev(root_device_name, &ROOT_DEV) < 0) {
 		msleep(5);
+		if (root_wait > 0 && ktime_after(ktime_get_raw(), end))
+			break;
+	}
+
 	async_synchronize_full();
 
 }

From 805946a2ce31241299a725adef8debce56069495 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 14:59:50 +0800
Subject: [PATCH 1111/2047] mmc: sunxi: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Link: https://lore.kernel.org/r/20230727070051.17778-1-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sunxi-mmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
index 69dcb8805e05..d3bd0ac99ec4 100644
--- a/drivers/mmc/host/sunxi-mmc.c
+++ b/drivers/mmc/host/sunxi-mmc.c
@@ -1486,7 +1486,7 @@ error_free_host:
 	return ret;
 }
 
-static int sunxi_mmc_remove(struct platform_device *pdev)
+static void sunxi_mmc_remove(struct platform_device *pdev)
 {
 	struct mmc_host	*mmc = platform_get_drvdata(pdev);
 	struct sunxi_mmc_host *host = mmc_priv(mmc);
@@ -1499,8 +1499,6 @@ static int sunxi_mmc_remove(struct platform_device *pdev)
 	}
 	dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
 	mmc_free_host(mmc);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -1556,7 +1554,7 @@ static struct platform_driver sunxi_mmc_driver = {
 		.pm = &sunxi_mmc_pm_ops,
 	},
 	.probe		= sunxi_mmc_probe,
-	.remove		= sunxi_mmc_remove,
+	.remove_new	= sunxi_mmc_remove,
 };
 module_platform_driver(sunxi_mmc_driver);
 

From cac6d238834d82ea50b977b918962e558de74420 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 14:59:51 +0800
Subject: [PATCH 1112/2047] mmc: bcm2835: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://lore.kernel.org/r/20230727070051.17778-2-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/bcm2835.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c
index eea208856ce0..35d8fdea668b 100644
--- a/drivers/mmc/host/bcm2835.c
+++ b/drivers/mmc/host/bcm2835.c
@@ -1431,7 +1431,7 @@ err:
 	return ret;
 }
 
-static int bcm2835_remove(struct platform_device *pdev)
+static void bcm2835_remove(struct platform_device *pdev)
 {
 	struct bcm2835_host *host = platform_get_drvdata(pdev);
 	struct mmc_host *mmc = mmc_from_priv(host);
@@ -1449,8 +1449,6 @@ static int bcm2835_remove(struct platform_device *pdev)
 		dma_release_channel(host->dma_chan_rxtx);
 
 	mmc_free_host(mmc);
-
-	return 0;
 }
 
 static const struct of_device_id bcm2835_match[] = {
@@ -1461,7 +1459,7 @@ MODULE_DEVICE_TABLE(of, bcm2835_match);
 
 static struct platform_driver bcm2835_driver = {
 	.probe      = bcm2835_probe,
-	.remove     = bcm2835_remove,
+	.remove_new = bcm2835_remove,
 	.driver     = {
 		.name		= "sdhost-bcm2835",
 		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,

From 67ad8238d016027ffaffbbe1edab3fbc7e641edc Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 14:59:52 +0800
Subject: [PATCH 1113/2047] mmc: jz4740: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/20230727070051.17778-3-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/jz4740_mmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index 1846a05210e3..f379ce5b582d 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -1163,7 +1163,7 @@ err_free_host:
 	return ret;
 }
 
-static int jz4740_mmc_remove(struct platform_device *pdev)
+static void jz4740_mmc_remove(struct platform_device *pdev)
 {
 	struct jz4740_mmc_host *host = platform_get_drvdata(pdev);
 
@@ -1179,8 +1179,6 @@ static int jz4740_mmc_remove(struct platform_device *pdev)
 		jz4740_mmc_release_dma_channels(host);
 
 	mmc_free_host(host->mmc);
-
-	return 0;
 }
 
 static int jz4740_mmc_suspend(struct device *dev)
@@ -1198,7 +1196,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(jz4740_mmc_pm_ops, jz4740_mmc_suspend,
 
 static struct platform_driver jz4740_mmc_driver = {
 	.probe = jz4740_mmc_probe,
-	.remove = jz4740_mmc_remove,
+	.remove_new = jz4740_mmc_remove,
 	.driver = {
 		.name = "jz4740-mmc",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From ab02d58f242185f765f2264811600db6a7963eb4 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 14:59:53 +0800
Subject: [PATCH 1114/2047] mmc: litex_mmc: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Gabriel Somlo <gsomlo@gmail.com>
Link: https://lore.kernel.org/r/20230727070051.17778-4-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/litex_mmc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/litex_mmc.c b/drivers/mmc/host/litex_mmc.c
index 9af6b0902efe..4ec8072dc60b 100644
--- a/drivers/mmc/host/litex_mmc.c
+++ b/drivers/mmc/host/litex_mmc.c
@@ -629,12 +629,11 @@ static int litex_mmc_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int litex_mmc_remove(struct platform_device *pdev)
+static void litex_mmc_remove(struct platform_device *pdev)
 {
 	struct litex_mmc_host *host = platform_get_drvdata(pdev);
 
 	mmc_remove_host(host->mmc);
-	return 0;
 }
 
 static const struct of_device_id litex_match[] = {
@@ -645,7 +644,7 @@ MODULE_DEVICE_TABLE(of, litex_match);
 
 static struct platform_driver litex_mmc_driver = {
 	.probe = litex_mmc_probe,
-	.remove = litex_mmc_remove,
+	.remove_new = litex_mmc_remove,
 	.driver = {
 		.name = "litex-mmc",
 		.of_match_table = litex_match,

From 19334c5369cd60907fae3983d88a08986994514b Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 14:59:54 +0800
Subject: [PATCH 1115/2047] mmc: mtk-sd: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20230727070051.17778-5-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mtk-sd.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 6c68c642ef61..5392200cfdf7 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -2885,7 +2885,7 @@ host_free:
 	return ret;
 }
 
-static int msdc_drv_remove(struct platform_device *pdev)
+static void msdc_drv_remove(struct platform_device *pdev)
 {
 	struct mmc_host *mmc;
 	struct msdc_host *host;
@@ -2909,8 +2909,6 @@ static int msdc_drv_remove(struct platform_device *pdev)
 			host->dma.bd, host->dma.bd_addr);
 
 	mmc_free_host(mmc);
-
-	return 0;
 }
 
 static void msdc_save_reg(struct msdc_host *host)
@@ -3052,7 +3050,7 @@ static const struct dev_pm_ops msdc_dev_pm_ops = {
 
 static struct platform_driver mt_msdc_driver = {
 	.probe = msdc_drv_probe,
-	.remove = msdc_drv_remove,
+	.remove_new = msdc_drv_remove,
 	.driver = {
 		.name = "mtk-msdc",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From 49f96e466d419516d75e93d58c79b90a9cf4f57b Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 14:59:55 +0800
Subject: [PATCH 1116/2047] mmc: cb710: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Link: https://lore.kernel.org/r/20230727070051.17778-6-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/cb710-mmc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/cb710-mmc.c b/drivers/mmc/host/cb710-mmc.c
index 6d623b2681c3..0aec33b88bef 100644
--- a/drivers/mmc/host/cb710-mmc.c
+++ b/drivers/mmc/host/cb710-mmc.c
@@ -745,7 +745,7 @@ err_free_mmc:
 	return err;
 }
 
-static int cb710_mmc_exit(struct platform_device *pdev)
+static void cb710_mmc_exit(struct platform_device *pdev)
 {
 	struct cb710_slot *slot = cb710_pdev_to_slot(pdev);
 	struct mmc_host *mmc = cb710_slot_to_mmc(slot);
@@ -766,13 +766,12 @@ static int cb710_mmc_exit(struct platform_device *pdev)
 	tasklet_kill(&reader->finish_req_tasklet);
 
 	mmc_free_host(mmc);
-	return 0;
 }
 
 static struct platform_driver cb710_mmc_driver = {
 	.driver.name = "cb710-mmc",
 	.probe = cb710_mmc_init,
-	.remove = cb710_mmc_exit,
+	.remove_new = cb710_mmc_exit,
 #ifdef CONFIG_PM
 	.suspend = cb710_mmc_suspend,
 	.resume = cb710_mmc_resume,

From bc1711e8332da03648d8fe1950189237e66313af Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 14:59:56 +0800
Subject: [PATCH 1117/2047] mmc: davinci_mmc: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-7-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/davinci_mmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index 9e99de1206ef..ee3b1a4e0848 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -1344,7 +1344,7 @@ ioremap_fail:
 	return ret;
 }
 
-static int __exit davinci_mmcsd_remove(struct platform_device *pdev)
+static void __exit davinci_mmcsd_remove(struct platform_device *pdev)
 {
 	struct mmc_davinci_host *host = platform_get_drvdata(pdev);
 
@@ -1353,8 +1353,6 @@ static int __exit davinci_mmcsd_remove(struct platform_device *pdev)
 	davinci_release_dma_channels(host);
 	clk_disable_unprepare(host->clk);
 	mmc_free_host(host->mmc);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -1401,7 +1399,7 @@ static struct platform_driver davinci_mmcsd_driver = {
 		.of_match_table = davinci_mmc_dt_ids,
 	},
 	.probe		= davinci_mmcsd_probe,
-	.remove		= __exit_p(davinci_mmcsd_remove),
+	.remove_new	= __exit_p(davinci_mmcsd_remove),
 	.id_table	= davinci_mmc_devtype,
 };
 

From c4a7b258ddadbd1c44b4bf064835d5a52edb5429 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 14:59:57 +0800
Subject: [PATCH 1118/2047] mmc: dw_mmc: hi3798cv200: Convert to platform
 remove callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-8-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc-hi3798cv200.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/dw_mmc-hi3798cv200.c b/drivers/mmc/host/dw_mmc-hi3798cv200.c
index 6f22fe054087..e9470c50a348 100644
--- a/drivers/mmc/host/dw_mmc-hi3798cv200.c
+++ b/drivers/mmc/host/dw_mmc-hi3798cv200.c
@@ -171,7 +171,7 @@ static int dw_mci_hi3798cv200_probe(struct platform_device *pdev)
 	return dw_mci_pltfm_register(pdev, &hi3798cv200_data);
 }
 
-static int dw_mci_hi3798cv200_remove(struct platform_device *pdev)
+static void dw_mci_hi3798cv200_remove(struct platform_device *pdev)
 {
 	struct dw_mci *host = platform_get_drvdata(pdev);
 	struct hi3798cv200_priv *priv = host->priv;
@@ -180,8 +180,6 @@ static int dw_mci_hi3798cv200_remove(struct platform_device *pdev)
 	clk_disable_unprepare(priv->sample_clk);
 
 	dw_mci_pltfm_remove(pdev);
-
-	return 0;
 }
 
 static const struct of_device_id dw_mci_hi3798cv200_match[] = {
@@ -192,7 +190,7 @@ static const struct of_device_id dw_mci_hi3798cv200_match[] = {
 MODULE_DEVICE_TABLE(of, dw_mci_hi3798cv200_match);
 static struct platform_driver dw_mci_hi3798cv200_driver = {
 	.probe = dw_mci_hi3798cv200_probe,
-	.remove = dw_mci_hi3798cv200_remove,
+	.remove_new = dw_mci_hi3798cv200_remove,
 	.driver = {
 		.name = "dwmmc_hi3798cv200",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From 603b72754f49694f7297fc40c8a43c23efb14e3c Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 14:59:58 +0800
Subject: [PATCH 1119/2047] mmc: sdhci-pic32: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-9-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pic32.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-pic32.c b/drivers/mmc/host/sdhci-pic32.c
index 6696b6bdd88e..7a0351a9c74e 100644
--- a/drivers/mmc/host/sdhci-pic32.c
+++ b/drivers/mmc/host/sdhci-pic32.c
@@ -210,7 +210,7 @@ err:
 	return ret;
 }
 
-static int pic32_sdhci_remove(struct platform_device *pdev)
+static void pic32_sdhci_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct pic32_sdhci_priv *sdhci_pdata = sdhci_priv(host);
@@ -221,8 +221,6 @@ static int pic32_sdhci_remove(struct platform_device *pdev)
 	clk_disable_unprepare(sdhci_pdata->base_clk);
 	clk_disable_unprepare(sdhci_pdata->sys_clk);
 	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 
 static const struct of_device_id pic32_sdhci_id_table[] = {
@@ -238,7 +236,7 @@ static struct platform_driver pic32_sdhci_driver = {
 		.of_match_table = of_match_ptr(pic32_sdhci_id_table),
 	},
 	.probe		= pic32_sdhci_probe,
-	.remove		= pic32_sdhci_remove,
+	.remove_new	= pic32_sdhci_remove,
 };
 
 module_platform_driver(pic32_sdhci_driver);

From 9479a6313108aa3c9ec0433056debf65733c346b Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 14:59:59 +0800
Subject: [PATCH 1120/2047] mmc: sdhci: milbeaut: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-10-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-milbeaut.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-milbeaut.c b/drivers/mmc/host/sdhci-milbeaut.c
index 701b11a14c6f..83706edc9796 100644
--- a/drivers/mmc/host/sdhci-milbeaut.c
+++ b/drivers/mmc/host/sdhci-milbeaut.c
@@ -313,7 +313,7 @@ err:
 	return ret;
 }
 
-static int sdhci_milbeaut_remove(struct platform_device *pdev)
+static void sdhci_milbeaut_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct f_sdhost_priv *priv = sdhci_priv(host);
@@ -326,8 +326,6 @@ static int sdhci_milbeaut_remove(struct platform_device *pdev)
 
 	sdhci_free_host(host);
 	platform_set_drvdata(pdev, NULL);
-
-	return 0;
 }
 
 static struct platform_driver sdhci_milbeaut_driver = {
@@ -337,7 +335,7 @@ static struct platform_driver sdhci_milbeaut_driver = {
 		.of_match_table = mlb_dt_ids,
 	},
 	.probe	= sdhci_milbeaut_probe,
-	.remove	= sdhci_milbeaut_remove,
+	.remove_new = sdhci_milbeaut_remove,
 };
 
 module_platform_driver(sdhci_milbeaut_driver);

From b76028c767511d74911c0f53f3b48fa0378f3213 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:00 +0800
Subject: [PATCH 1121/2047] mmc: omap_hsmmc: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-11-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 93de00a06aba..e120aeb869b8 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1979,7 +1979,7 @@ err:
 	return ret;
 }
 
-static int omap_hsmmc_remove(struct platform_device *pdev)
+static void omap_hsmmc_remove(struct platform_device *pdev)
 {
 	struct omap_hsmmc_host *host = platform_get_drvdata(pdev);
 
@@ -1997,8 +1997,6 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
 	clk_disable_unprepare(host->dbclk);
 
 	mmc_free_host(host->mmc);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -2123,7 +2121,7 @@ static const struct dev_pm_ops omap_hsmmc_dev_pm_ops = {
 
 static struct platform_driver omap_hsmmc_driver = {
 	.probe		= omap_hsmmc_probe,
-	.remove		= omap_hsmmc_remove,
+	.remove_new	= omap_hsmmc_remove,
 	.driver		= {
 		.name = DRIVER_NAME,
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From 738eeb967e53ab929669f07aa82e6c43caf36b59 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:01 +0800
Subject: [PATCH 1122/2047] mmc: sdhci-of-at91: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Claudiu Beznea <claudiu.beznea@tuxon.dev>
Link: https://lore.kernel.org/r/20230727070051.17778-12-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-at91.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 808307a211f2..db01ea9fa203 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -443,7 +443,7 @@ sdhci_pltfm_free:
 	return ret;
 }
 
-static int sdhci_at91_remove(struct platform_device *pdev)
+static void sdhci_at91_remove(struct platform_device *pdev)
 {
 	struct sdhci_host	*host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host	*pltfm_host = sdhci_priv(host);
@@ -461,8 +461,6 @@ static int sdhci_at91_remove(struct platform_device *pdev)
 	clk_disable_unprepare(gck);
 	clk_disable_unprepare(hclock);
 	clk_disable_unprepare(mainck);
-
-	return 0;
 }
 
 static struct platform_driver sdhci_at91_driver = {
@@ -473,7 +471,7 @@ static struct platform_driver sdhci_at91_driver = {
 		.pm	= &sdhci_at91_dev_pm_ops,
 	},
 	.probe		= sdhci_at91_probe,
-	.remove		= sdhci_at91_remove,
+	.remove_new	= sdhci_at91_remove,
 };
 
 module_platform_driver(sdhci_at91_driver);

From e5ae9c1e5bcfe1745fc0207731c445514b96de79 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:02 +0800
Subject: [PATCH 1123/2047] mmc: omap: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-13-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 6a259563690d..9fb8995b43a1 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1506,7 +1506,7 @@ err_free_iclk:
 	return ret;
 }
 
-static int mmc_omap_remove(struct platform_device *pdev)
+static void mmc_omap_remove(struct platform_device *pdev)
 {
 	struct mmc_omap_host *host = platform_get_drvdata(pdev);
 	int i;
@@ -1532,8 +1532,6 @@ static int mmc_omap_remove(struct platform_device *pdev)
 		dma_release_channel(host->dma_rx);
 
 	destroy_workqueue(host->mmc_omap_wq);
-
-	return 0;
 }
 
 #if IS_BUILTIN(CONFIG_OF)
@@ -1546,7 +1544,7 @@ MODULE_DEVICE_TABLE(of, mmc_omap_match);
 
 static struct platform_driver mmc_omap_driver = {
 	.probe		= mmc_omap_probe,
-	.remove		= mmc_omap_remove,
+	.remove_new	= mmc_omap_remove,
 	.driver		= {
 		.name	= DRIVER_NAME,
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From 41a734a7c64827dc3fff65b55f31052724dbd19c Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:03 +0800
Subject: [PATCH 1124/2047] mmc: dw_mmc: exynos: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-14-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc-exynos.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c
index 9f20ac524c8b..698408e8bad0 100644
--- a/drivers/mmc/host/dw_mmc-exynos.c
+++ b/drivers/mmc/host/dw_mmc-exynos.c
@@ -664,15 +664,13 @@ static int dw_mci_exynos_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int dw_mci_exynos_remove(struct platform_device *pdev)
+static void dw_mci_exynos_remove(struct platform_device *pdev)
 {
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_set_suspended(&pdev->dev);
 	pm_runtime_put_noidle(&pdev->dev);
 
 	dw_mci_pltfm_remove(pdev);
-
-	return 0;
 }
 
 static const struct dev_pm_ops dw_mci_exynos_pmops = {
@@ -685,7 +683,7 @@ static const struct dev_pm_ops dw_mci_exynos_pmops = {
 
 static struct platform_driver dw_mci_exynos_pltfm_driver = {
 	.probe		= dw_mci_exynos_probe,
-	.remove		= dw_mci_exynos_remove,
+	.remove_new	= dw_mci_exynos_remove,
 	.driver		= {
 		.name		= "dwmmc_exynos",
 		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,

From c61394aa928180f0296c64907c8fff5a6bdd01aa Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:04 +0800
Subject: [PATCH 1125/2047] mmc: sdhci-pxav3: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-15-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pxav3.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index e39dcc998772..3af43ac05825 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -470,7 +470,7 @@ err_clk_get:
 	return ret;
 }
 
-static int sdhci_pxav3_remove(struct platform_device *pdev)
+static void sdhci_pxav3_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -486,8 +486,6 @@ static int sdhci_pxav3_remove(struct platform_device *pdev)
 	clk_disable_unprepare(pxa->clk_core);
 
 	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -570,7 +568,7 @@ static struct platform_driver sdhci_pxav3_driver = {
 		.pm	= &sdhci_pxav3_pmops,
 	},
 	.probe		= sdhci_pxav3_probe,
-	.remove		= sdhci_pxav3_remove,
+	.remove_new	= sdhci_pxav3_remove,
 };
 
 module_platform_driver(sdhci_pxav3_driver);

From aeaa4cb3097212462031ca5e1f073c759d5fad52 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:05 +0800
Subject: [PATCH 1126/2047] mmc: rtsx_pci: Drop if block with always false
 condition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

rtsx_pci_sdmmc_drv_remove() is only called for a device after
rtsx_pci_sdmmc_drv_probe() returned 0. In that case platform_set_drvdata()
was called with a non-NULL value and so platform_get_drvdata()
won't return NULL.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20230727070051.17778-16-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/rtsx_pci_sdmmc.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 8098726dcc0b..062d07c2ae5e 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -1529,9 +1529,6 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
 	struct rtsx_pcr *pcr;
 	struct mmc_host *mmc;
 
-	if (!host)
-		return 0;
-
 	pcr = host->pcr;
 	pcr->slots[RTSX_SD_CARD].p_dev = NULL;
 	pcr->slots[RTSX_SD_CARD].card_event = NULL;

From dda6da1b851847f71996954b6d26a37593962b94 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:06 +0800
Subject: [PATCH 1127/2047] mmc: rtsx_pci: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-17-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/rtsx_pci_sdmmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 062d07c2ae5e..87d78432a1e0 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -1523,7 +1523,7 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
+static void rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
 {
 	struct realtek_pci_sdmmc *host = platform_get_drvdata(pdev);
 	struct rtsx_pcr *pcr;
@@ -1563,8 +1563,6 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
 
 	dev_dbg(&(pdev->dev),
 		": Realtek PCI-E SDMMC controller has been removed\n");
-
-	return 0;
 }
 
 static const struct platform_device_id rtsx_pci_sdmmc_ids[] = {
@@ -1578,7 +1576,7 @@ MODULE_DEVICE_TABLE(platform, rtsx_pci_sdmmc_ids);
 
 static struct platform_driver rtsx_pci_sdmmc_driver = {
 	.probe		= rtsx_pci_sdmmc_drv_probe,
-	.remove		= rtsx_pci_sdmmc_drv_remove,
+	.remove_new	= rtsx_pci_sdmmc_drv_remove,
 	.id_table       = rtsx_pci_sdmmc_ids,
 	.driver		= {
 		.name	= DRV_NAME_RTSX_PCI_SDMMC,

From 8fc8c82dfa92485c5f8c6a6b12c65191c62c02da Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:07 +0800
Subject: [PATCH 1128/2047] mmc: sh_mmcif: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-18-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sh_mmcif.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 838dab179724..077d711e964e 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -1508,7 +1508,7 @@ err_host:
 	return ret;
 }
 
-static int sh_mmcif_remove(struct platform_device *pdev)
+static void sh_mmcif_remove(struct platform_device *pdev)
 {
 	struct sh_mmcif_host *host = platform_get_drvdata(pdev);
 
@@ -1532,8 +1532,6 @@ static int sh_mmcif_remove(struct platform_device *pdev)
 	mmc_free_host(host->mmc);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -1560,7 +1558,7 @@ static const struct dev_pm_ops sh_mmcif_dev_pm_ops = {
 
 static struct platform_driver sh_mmcif_driver = {
 	.probe		= sh_mmcif_probe,
-	.remove		= sh_mmcif_remove,
+	.remove_new	= sh_mmcif_remove,
 	.driver		= {
 		.name	= DRIVER_NAME,
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From e50aed55be4befe9c4dcae6f39b5e6de5fb4587f Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:08 +0800
Subject: [PATCH 1129/2047] mmc: meson-gx: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20230727070051.17778-19-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/meson-gx-mmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 49e3d1418902..9837dab096e6 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -1294,7 +1294,7 @@ err_init_clk:
 	return ret;
 }
 
-static int meson_mmc_remove(struct platform_device *pdev)
+static void meson_mmc_remove(struct platform_device *pdev)
 {
 	struct meson_host *host = dev_get_drvdata(&pdev->dev);
 
@@ -1305,8 +1305,6 @@ static int meson_mmc_remove(struct platform_device *pdev)
 	free_irq(host->irq, host);
 
 	clk_disable_unprepare(host->mmc_clk);
-
-	return 0;
 }
 
 static const struct meson_mmc_data meson_gx_data = {
@@ -1337,7 +1335,7 @@ MODULE_DEVICE_TABLE(of, meson_mmc_of_match);
 
 static struct platform_driver meson_mmc_driver = {
 	.probe		= meson_mmc_probe,
-	.remove		= meson_mmc_remove,
+	.remove_new	= meson_mmc_remove,
 	.driver		= {
 		.name = DRIVER_NAME,
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From f0cdeb7cc2e9513a109458f99cbad0a2e9af5a72 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:09 +0800
Subject: [PATCH 1130/2047] mmc: xenon: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-20-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-xenon.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c
index 08e838400b52..25ba7aecc3be 100644
--- a/drivers/mmc/host/sdhci-xenon.c
+++ b/drivers/mmc/host/sdhci-xenon.c
@@ -578,7 +578,7 @@ free_pltfm:
 	return err;
 }
 
-static int xenon_remove(struct platform_device *pdev)
+static void xenon_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -595,8 +595,6 @@ static int xenon_remove(struct platform_device *pdev)
 	clk_disable_unprepare(pltfm_host->clk);
 
 	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -705,7 +703,7 @@ static struct platform_driver sdhci_xenon_driver = {
 		.pm = &sdhci_xenon_dev_pm_ops,
 	},
 	.probe	= xenon_probe,
-	.remove	= xenon_remove,
+	.remove_new = xenon_remove,
 };
 
 module_platform_driver(sdhci_xenon_driver);

From 444176665f37856ab0df4996b089abb7ab1664b2 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:10 +0800
Subject: [PATCH 1131/2047] mmc: sdhci-s3c: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@kernel.org>
Link: https://lore.kernel.org/r/20230727070051.17778-21-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-s3c.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index 6245fb4584fe..0e8a8ac14e56 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -667,7 +667,7 @@ static int sdhci_s3c_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int sdhci_s3c_remove(struct platform_device *pdev)
+static void sdhci_s3c_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host =  platform_get_drvdata(pdev);
 	struct sdhci_s3c *sc = sdhci_priv(host);
@@ -687,8 +687,6 @@ static int sdhci_s3c_remove(struct platform_device *pdev)
 	clk_disable_unprepare(sc->clk_io);
 
 	sdhci_free_host(host);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -775,7 +773,7 @@ MODULE_DEVICE_TABLE(of, sdhci_s3c_dt_match);
 
 static struct platform_driver sdhci_s3c_driver = {
 	.probe		= sdhci_s3c_probe,
-	.remove		= sdhci_s3c_remove,
+	.remove_new	= sdhci_s3c_remove,
 	.id_table	= sdhci_s3c_driver_ids,
 	.driver		= {
 		.name	= "s3c-sdhci",

From 3372487a4c298843f257c9c30f7281092d1757bc Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:11 +0800
Subject: [PATCH 1132/2047] mmc: meson-mx-sdhc: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20230727070051.17778-22-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/meson-mx-sdhc-mmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/meson-mx-sdhc-mmc.c b/drivers/mmc/host/meson-mx-sdhc-mmc.c
index 97168cdfa8e9..528ec8166e7c 100644
--- a/drivers/mmc/host/meson-mx-sdhc-mmc.c
+++ b/drivers/mmc/host/meson-mx-sdhc-mmc.c
@@ -880,7 +880,7 @@ err_disable_pclk:
 	return ret;
 }
 
-static int meson_mx_sdhc_remove(struct platform_device *pdev)
+static void meson_mx_sdhc_remove(struct platform_device *pdev)
 {
 	struct meson_mx_sdhc_host *host = platform_get_drvdata(pdev);
 
@@ -889,8 +889,6 @@ static int meson_mx_sdhc_remove(struct platform_device *pdev)
 	meson_mx_sdhc_disable_clks(host->mmc);
 
 	clk_disable_unprepare(host->pclk);
-
-	return 0;
 }
 
 static const struct meson_mx_sdhc_data meson_mx_sdhc_data_meson8 = {
@@ -925,7 +923,7 @@ MODULE_DEVICE_TABLE(of, meson_mx_sdhc_of_match);
 
 static struct platform_driver meson_mx_sdhc_driver = {
 	.probe   = meson_mx_sdhc_probe,
-	.remove  = meson_mx_sdhc_remove,
+	.remove_new = meson_mx_sdhc_remove,
 	.driver  = {
 		.name = "meson-mx-sdhc",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From f8c9b4158e7c37d12f30f33dd97ad3ab067ac28c Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:12 +0800
Subject: [PATCH 1133/2047] mmc: rtsx_usb_sdmmc: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-23-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/rtsx_usb_sdmmc.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c
index 2c650cd58693..ded9b6849e35 100644
--- a/drivers/mmc/host/rtsx_usb_sdmmc.c
+++ b/drivers/mmc/host/rtsx_usb_sdmmc.c
@@ -1379,13 +1379,13 @@ static int rtsx_usb_sdmmc_drv_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int rtsx_usb_sdmmc_drv_remove(struct platform_device *pdev)
+static void rtsx_usb_sdmmc_drv_remove(struct platform_device *pdev)
 {
 	struct rtsx_usb_sdmmc *host = platform_get_drvdata(pdev);
 	struct mmc_host *mmc;
 
 	if (!host)
-		return 0;
+		return;
 
 	mmc = host->mmc;
 	host->host_removal = true;
@@ -1415,8 +1415,6 @@ static int rtsx_usb_sdmmc_drv_remove(struct platform_device *pdev)
 
 	dev_dbg(&(pdev->dev),
 		": Realtek USB SD/MMC module has been removed\n");
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -1455,7 +1453,7 @@ MODULE_DEVICE_TABLE(platform, rtsx_usb_sdmmc_ids);
 
 static struct platform_driver rtsx_usb_sdmmc_driver = {
 	.probe		= rtsx_usb_sdmmc_drv_probe,
-	.remove		= rtsx_usb_sdmmc_drv_remove,
+	.remove_new	= rtsx_usb_sdmmc_drv_remove,
 	.id_table       = rtsx_usb_sdmmc_ids,
 	.driver		= {
 		.name	= "rtsx_usb_sdmmc",

From 3f347f2c92fc386e0849aa20b045353e25b6996a Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:13 +0800
Subject: [PATCH 1134/2047] mmc: mxs-mmc: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-24-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mxs-mmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index 951c6c48b4f7..6751da9b60f9 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -673,7 +673,7 @@ out_mmc_free:
 	return ret;
 }
 
-static int mxs_mmc_remove(struct platform_device *pdev)
+static void mxs_mmc_remove(struct platform_device *pdev)
 {
 	struct mmc_host *mmc = platform_get_drvdata(pdev);
 	struct mxs_mmc_host *host = mmc_priv(mmc);
@@ -687,8 +687,6 @@ static int mxs_mmc_remove(struct platform_device *pdev)
 	clk_disable_unprepare(ssp->clk);
 
 	mmc_free_host(mmc);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -716,7 +714,7 @@ static SIMPLE_DEV_PM_OPS(mxs_mmc_pm_ops, mxs_mmc_suspend, mxs_mmc_resume);
 
 static struct platform_driver mxs_mmc_driver = {
 	.probe		= mxs_mmc_probe,
-	.remove		= mxs_mmc_remove,
+	.remove_new	= mxs_mmc_remove,
 	.driver		= {
 		.name	= DRIVER_NAME,
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From 3de205a07a70cb0a025adbb9797c1102e6f3f376 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:14 +0800
Subject: [PATCH 1135/2047] mmc: sdhci-of-arasan: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Michal Simek <michal.simek@amd.com>
Link: https://lore.kernel.org/r/20230727070051.17778-25-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-arasan.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 2f9de2518942..15025af326a2 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -2016,7 +2016,7 @@ err_pltfm_free:
 	return ret;
 }
 
-static int sdhci_arasan_remove(struct platform_device *pdev)
+static void sdhci_arasan_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -2034,8 +2034,6 @@ static int sdhci_arasan_remove(struct platform_device *pdev)
 	sdhci_pltfm_unregister(pdev);
 
 	clk_disable_unprepare(clk_ahb);
-
-	return 0;
 }
 
 static struct platform_driver sdhci_arasan_driver = {
@@ -2046,7 +2044,7 @@ static struct platform_driver sdhci_arasan_driver = {
 		.pm = &sdhci_arasan_dev_pm_ops,
 	},
 	.probe = sdhci_arasan_probe,
-	.remove = sdhci_arasan_remove,
+	.remove_new = sdhci_arasan_remove,
 };
 
 module_platform_driver(sdhci_arasan_driver);

From 5905a1f1f21cc6c9f4ac976920723da667fb18cc Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:15 +0800
Subject: [PATCH 1136/2047] mmc: sdhci-of-dwcmshc: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-26-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-dwcmshc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index 55e1e2d0494d..31c1892f4ecd 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -574,7 +574,7 @@ free_pltfm:
 	return err;
 }
 
-static int dwcmshc_remove(struct platform_device *pdev)
+static void dwcmshc_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -589,8 +589,6 @@ static int dwcmshc_remove(struct platform_device *pdev)
 		clk_bulk_disable_unprepare(RK35xx_MAX_CLKS,
 					   rk_priv->rockchip_clks);
 	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -657,7 +655,7 @@ static struct platform_driver sdhci_dwcmshc_driver = {
 		.pm = &dwcmshc_pmops,
 	},
 	.probe	= dwcmshc_probe,
-	.remove	= dwcmshc_remove,
+	.remove_new = dwcmshc_remove,
 };
 module_platform_driver(sdhci_dwcmshc_driver);
 

From c7d255148d2ac4f24adb91418ce824b785e13811 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:16 +0800
Subject: [PATCH 1137/2047] mmc: au1xmmc: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-27-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/au1xmmc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c
index 82dd0ae40305..b5a5c6a2fe8b 100644
--- a/drivers/mmc/host/au1xmmc.c
+++ b/drivers/mmc/host/au1xmmc.c
@@ -1114,7 +1114,7 @@ out0:
 	return ret;
 }
 
-static int au1xmmc_remove(struct platform_device *pdev)
+static void au1xmmc_remove(struct platform_device *pdev)
 {
 	struct au1xmmc_host *host = platform_get_drvdata(pdev);
 
@@ -1153,7 +1153,6 @@ static int au1xmmc_remove(struct platform_device *pdev)
 
 		mmc_free_host(host->mmc);
 	}
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -1185,7 +1184,7 @@ static int au1xmmc_resume(struct platform_device *pdev)
 
 static struct platform_driver au1xmmc_driver = {
 	.probe         = au1xmmc_probe,
-	.remove        = au1xmmc_remove,
+	.remove_new    = au1xmmc_remove,
 	.suspend       = au1xmmc_suspend,
 	.resume        = au1xmmc_resume,
 	.driver        = {

From fcbeadbec96df15339ee29e9fdc5a826e0621b15 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:17 +0800
Subject: [PATCH 1138/2047] mmc: cavium-octeon: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-28-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/cavium-octeon.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c
index 02429f7afb42..060ec4f4800f 100644
--- a/drivers/mmc/host/cavium-octeon.c
+++ b/drivers/mmc/host/cavium-octeon.c
@@ -296,7 +296,7 @@ error:
 	return ret;
 }
 
-static int octeon_mmc_remove(struct platform_device *pdev)
+static void octeon_mmc_remove(struct platform_device *pdev)
 {
 	struct cvm_mmc_host *host = platform_get_drvdata(pdev);
 	u64 dma_cfg;
@@ -311,7 +311,6 @@ static int octeon_mmc_remove(struct platform_device *pdev)
 	writeq(dma_cfg, host->dma_base + MIO_EMM_DMA_CFG(host));
 
 	octeon_mmc_set_shared_power(host, 0);
-	return 0;
 }
 
 static const struct of_device_id octeon_mmc_match[] = {
@@ -327,7 +326,7 @@ MODULE_DEVICE_TABLE(of, octeon_mmc_match);
 
 static struct platform_driver octeon_mmc_driver = {
 	.probe		= octeon_mmc_probe,
-	.remove		= octeon_mmc_remove,
+	.remove_new	= octeon_mmc_remove,
 	.driver		= {
 		.name	= KBUILD_MODNAME,
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From fac44eb82692a236819947e02ed37c84815e15c5 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:18 +0800
Subject: [PATCH 1139/2047] mmc: pxamci: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-29-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/pxamci.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index bd53b17efe12..fae3192c3a14 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -782,7 +782,7 @@ out:
 	return ret;
 }
 
-static int pxamci_remove(struct platform_device *pdev)
+static void pxamci_remove(struct platform_device *pdev)
 {
 	struct mmc_host *mmc = platform_get_drvdata(pdev);
 
@@ -806,13 +806,11 @@ static int pxamci_remove(struct platform_device *pdev)
 
 		mmc_free_host(mmc);
 	}
-
-	return 0;
 }
 
 static struct platform_driver pxamci_driver = {
 	.probe		= pxamci_probe,
-	.remove		= pxamci_remove,
+	.remove_new	= pxamci_remove,
 	.driver		= {
 		.name	= DRIVER_NAME,
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From 19d38f77216568947bf9c5d51514b39e53c230b5 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:19 +0800
Subject: [PATCH 1140/2047] mmc: moxart: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-30-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/moxart-mmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index 43c2a95bc29a..5cfdd3a86e54 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -687,7 +687,7 @@ out_mmc:
 	return ret;
 }
 
-static int moxart_remove(struct platform_device *pdev)
+static void moxart_remove(struct platform_device *pdev)
 {
 	struct mmc_host *mmc = dev_get_drvdata(&pdev->dev);
 	struct moxart_host *host = mmc_priv(mmc);
@@ -703,8 +703,6 @@ static int moxart_remove(struct platform_device *pdev)
 	writel(readl(host->base + REG_CLOCK_CONTROL) | CLK_OFF,
 	       host->base + REG_CLOCK_CONTROL);
 	mmc_free_host(mmc);
-
-	return 0;
 }
 
 static const struct of_device_id moxart_mmc_match[] = {
@@ -716,7 +714,7 @@ MODULE_DEVICE_TABLE(of, moxart_mmc_match);
 
 static struct platform_driver moxart_mmc_driver = {
 	.probe      = moxart_probe,
-	.remove     = moxart_remove,
+	.remove_new = moxart_remove,
 	.driver     = {
 		.name		= "mmc-moxart",
 		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,

From b9c3ea46595094eed23f9768b634e4e6f017e0d9 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:20 +0800
Subject: [PATCH 1141/2047] mmc: sdhci-omap: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-31-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-omap.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 2593e3fbd0d9..1e0bc7bace1b 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -1393,7 +1393,7 @@ err_pltfm_free:
 	return ret;
 }
 
-static int sdhci_omap_remove(struct platform_device *pdev)
+static void sdhci_omap_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct sdhci_host *host = platform_get_drvdata(pdev);
@@ -1407,8 +1407,6 @@ static int sdhci_omap_remove(struct platform_device *pdev)
 	/* Ensure device gets disabled despite userspace sysfs config */
 	pm_runtime_force_suspend(dev);
 	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -1477,7 +1475,7 @@ static const struct dev_pm_ops sdhci_omap_dev_pm_ops = {
 
 static struct platform_driver sdhci_omap_driver = {
 	.probe = sdhci_omap_probe,
-	.remove = sdhci_omap_remove,
+	.remove_new = sdhci_omap_remove,
 	.driver = {
 		   .name = "sdhci-omap",
 		   .probe_type = PROBE_PREFER_ASYNCHRONOUS,

From f67cd7f6d3678dbe8a370655f41d4a9d85c73bbc Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:21 +0800
Subject: [PATCH 1142/2047] mmc: sdhci-of-aspeed: remove unneeded variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The variable 'dead' is redundant, let's remove it.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20230727070051.17778-32-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-aspeed.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci-of-aspeed.c b/drivers/mmc/host/sdhci-of-aspeed.c
index 25b4073f698b..b4867bb4a564 100644
--- a/drivers/mmc/host/sdhci-of-aspeed.c
+++ b/drivers/mmc/host/sdhci-of-aspeed.c
@@ -454,12 +454,11 @@ static int aspeed_sdhci_remove(struct platform_device *pdev)
 {
 	struct sdhci_pltfm_host *pltfm_host;
 	struct sdhci_host *host;
-	int dead = 0;
 
 	host = platform_get_drvdata(pdev);
 	pltfm_host = sdhci_priv(host);
 
-	sdhci_remove_host(host, dead);
+	sdhci_remove_host(host, 0);
 
 	clk_disable_unprepare(pltfm_host->clk);
 

From a29e8b51a07e1785bb2a0cf334c801f2af7d69c5 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:22 +0800
Subject: [PATCH 1143/2047] mmc: sdhci-of-aspeed: Convert to platform remove
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-33-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-aspeed.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/mmc/host/sdhci-of-aspeed.c b/drivers/mmc/host/sdhci-of-aspeed.c
index b4867bb4a564..42d54532cabe 100644
--- a/drivers/mmc/host/sdhci-of-aspeed.c
+++ b/drivers/mmc/host/sdhci-of-aspeed.c
@@ -450,7 +450,7 @@ err_pltfm_free:
 	return ret;
 }
 
-static int aspeed_sdhci_remove(struct platform_device *pdev)
+static void aspeed_sdhci_remove(struct platform_device *pdev)
 {
 	struct sdhci_pltfm_host *pltfm_host;
 	struct sdhci_host *host;
@@ -463,8 +463,6 @@ static int aspeed_sdhci_remove(struct platform_device *pdev)
 	clk_disable_unprepare(pltfm_host->clk);
 
 	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 
 static const struct aspeed_sdhci_pdata ast2400_sdhci_pdata = {
@@ -520,7 +518,7 @@ static struct platform_driver aspeed_sdhci_driver = {
 		.of_match_table = aspeed_sdhci_of_match,
 	},
 	.probe		= aspeed_sdhci_probe,
-	.remove		= aspeed_sdhci_remove,
+	.remove_new	= aspeed_sdhci_remove,
 };
 
 static int aspeed_sdc_probe(struct platform_device *pdev)
@@ -573,13 +571,11 @@ err_clk:
 	return ret;
 }
 
-static int aspeed_sdc_remove(struct platform_device *pdev)
+static void aspeed_sdc_remove(struct platform_device *pdev)
 {
 	struct aspeed_sdc *sdc = dev_get_drvdata(&pdev->dev);
 
 	clk_disable_unprepare(sdc->clk);
-
-	return 0;
 }
 
 static const struct of_device_id aspeed_sdc_of_match[] = {
@@ -599,7 +595,7 @@ static struct platform_driver aspeed_sdc_driver = {
 		.of_match_table = aspeed_sdc_of_match,
 	},
 	.probe		= aspeed_sdc_probe,
-	.remove		= aspeed_sdc_remove,
+	.remove_new	= aspeed_sdc_remove,
 };
 
 #if defined(CONFIG_MMC_SDHCI_OF_ASPEED_TEST)

From 20c57c3c0f1c5982c33edc6d5ad9f920a97a9e08 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:23 +0800
Subject: [PATCH 1144/2047] mmc: meson-mx-sdio: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20230727070051.17778-34-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/meson-mx-sdio.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/meson-mx-sdio.c b/drivers/mmc/host/meson-mx-sdio.c
index 3a19a05ef55a..a11577f2ee69 100644
--- a/drivers/mmc/host/meson-mx-sdio.c
+++ b/drivers/mmc/host/meson-mx-sdio.c
@@ -728,7 +728,7 @@ error_unregister_slot_pdev:
 	return ret;
 }
 
-static int meson_mx_mmc_remove(struct platform_device *pdev)
+static void meson_mx_mmc_remove(struct platform_device *pdev)
 {
 	struct meson_mx_mmc_host *host = platform_get_drvdata(pdev);
 	struct device *slot_dev = mmc_dev(host->mmc);
@@ -743,8 +743,6 @@ static int meson_mx_mmc_remove(struct platform_device *pdev)
 	clk_disable_unprepare(host->core_clk);
 
 	mmc_free_host(host->mmc);
-
-	return 0;
 }
 
 static const struct of_device_id meson_mx_mmc_of_match[] = {
@@ -756,7 +754,7 @@ MODULE_DEVICE_TABLE(of, meson_mx_mmc_of_match);
 
 static struct platform_driver meson_mx_mmc_driver = {
 	.probe   = meson_mx_mmc_probe,
-	.remove  = meson_mx_mmc_remove,
+	.remove_new = meson_mx_mmc_remove,
 	.driver  = {
 		.name = "meson-mx-sdio",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From c618ba0f419d8231c23868893af6fdb42c128567 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:24 +0800
Subject: [PATCH 1145/2047] mmc: sdhci-sprd: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Chunyan Zhang <zhang.lyra@gmail.com>
Link: https://lore.kernel.org/r/20230727070051.17778-35-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-sprd.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c
index a21d6524d7ca..28639d6621fd 100644
--- a/drivers/mmc/host/sdhci-sprd.c
+++ b/drivers/mmc/host/sdhci-sprd.c
@@ -719,7 +719,7 @@ pltfm_free:
 	return ret;
 }
 
-static int sdhci_sprd_remove(struct platform_device *pdev)
+static void sdhci_sprd_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_sprd_host *sprd_host = TO_SPRD_HOST(host);
@@ -731,8 +731,6 @@ static int sdhci_sprd_remove(struct platform_device *pdev)
 	clk_disable_unprepare(sprd_host->clk_2x_enable);
 
 	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 
 static const struct of_device_id sdhci_sprd_of_match[] = {
@@ -799,7 +797,7 @@ static const struct dev_pm_ops sdhci_sprd_pm_ops = {
 
 static struct platform_driver sdhci_sprd_driver = {
 	.probe = sdhci_sprd_probe,
-	.remove = sdhci_sprd_remove,
+	.remove_new = sdhci_sprd_remove,
 	.driver = {
 		.name = "sdhci_sprd_r11",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From 8d9b1788ea4232d4f6dbd7fdbf9a983d6b630810 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:25 +0800
Subject: [PATCH 1146/2047] mmc: sdhci-tegra: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-36-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-tegra.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 1e1eac953691..1ad0a6b3a2eb 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -1817,7 +1817,7 @@ err_parse_dt:
 	return rc;
 }
 
-static int sdhci_tegra_remove(struct platform_device *pdev)
+static void sdhci_tegra_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -1833,8 +1833,6 @@ static int sdhci_tegra_remove(struct platform_device *pdev)
 
 	clk_disable_unprepare(tegra_host->tmclk);
 	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 
 static int __maybe_unused sdhci_tegra_runtime_suspend(struct device *dev)
@@ -1932,7 +1930,7 @@ static struct platform_driver sdhci_tegra_driver = {
 		.pm	= &sdhci_tegra_dev_pm_ops,
 	},
 	.probe		= sdhci_tegra_probe,
-	.remove		= sdhci_tegra_remove,
+	.remove_new	= sdhci_tegra_remove,
 };
 
 module_platform_driver(sdhci_tegra_driver);

From a2b6de8072c3012d50f9851ee80ba129cac246b3 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:26 +0800
Subject: [PATCH 1147/2047] mmc: sdhci-acpi: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-37-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-acpi.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index edf2e6c14dc6..acf5fc3ad7e4 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -917,7 +917,7 @@ err_free:
 	return err;
 }
 
-static int sdhci_acpi_remove(struct platform_device *pdev)
+static void sdhci_acpi_remove(struct platform_device *pdev)
 {
 	struct sdhci_acpi_host *c = platform_get_drvdata(pdev);
 	struct device *dev = &pdev->dev;
@@ -939,8 +939,6 @@ static int sdhci_acpi_remove(struct platform_device *pdev)
 		c->slot->free_slot(pdev);
 
 	sdhci_free_host(c->host);
-
-	return 0;
 }
 
 static void __maybe_unused sdhci_acpi_reset_signal_voltage_if_needed(
@@ -1033,7 +1031,7 @@ static struct platform_driver sdhci_acpi_driver = {
 		.pm			= &sdhci_acpi_pm_ops,
 	},
 	.probe	= sdhci_acpi_probe,
-	.remove	= sdhci_acpi_remove,
+	.remove_new = sdhci_acpi_remove,
 };
 
 module_platform_driver(sdhci_acpi_driver);

From 53b9222e3d74bf5f0f851741f0e42efe463bf0d6 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:27 +0800
Subject: [PATCH 1148/2047] mmc: sdhci-esdhc-imx: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-38-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index e767fe058023..a8bdb3c5ecd5 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -1802,7 +1802,7 @@ free_sdhci:
 	return err;
 }
 
-static int sdhci_esdhc_imx_remove(struct platform_device *pdev)
+static void sdhci_esdhc_imx_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -1824,8 +1824,6 @@ static int sdhci_esdhc_imx_remove(struct platform_device *pdev)
 		cpu_latency_qos_remove_request(&imx_data->pm_qos_req);
 
 	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -1986,7 +1984,7 @@ static struct platform_driver sdhci_esdhc_imx_driver = {
 		.pm	= &sdhci_esdhc_pmops,
 	},
 	.probe		= sdhci_esdhc_imx_probe,
-	.remove		= sdhci_esdhc_imx_remove,
+	.remove_new	= sdhci_esdhc_imx_remove,
 };
 
 module_platform_driver(sdhci_esdhc_imx_driver);

From a7dde463c701e89d51989582ee101a848d8c12ad Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:28 +0800
Subject: [PATCH 1149/2047] mmc: sdhci-msm: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-39-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-msm.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index c0ce187e740a..668e0aceeeba 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -2668,7 +2668,7 @@ pltfm_free:
 	return ret;
 }
 
-static int sdhci_msm_remove(struct platform_device *pdev)
+static void sdhci_msm_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -2687,7 +2687,6 @@ static int sdhci_msm_remove(struct platform_device *pdev)
 	if (!IS_ERR(msm_host->bus_clk))
 		clk_disable_unprepare(msm_host->bus_clk);
 	sdhci_pltfm_free(pdev);
-	return 0;
 }
 
 static __maybe_unused int sdhci_msm_runtime_suspend(struct device *dev)
@@ -2740,7 +2739,7 @@ static const struct dev_pm_ops sdhci_msm_pm_ops = {
 
 static struct platform_driver sdhci_msm_driver = {
 	.probe = sdhci_msm_probe,
-	.remove = sdhci_msm_remove,
+	.remove_new = sdhci_msm_remove,
 	.driver = {
 		   .name = "sdhci_msm",
 		   .of_match_table = sdhci_msm_dt_match,

From 9f13caa4cb4aec8268af53a732873b5d833a18a5 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:29 +0800
Subject: [PATCH 1150/2047] mmc: alcor: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-40-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/alcor.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/alcor.c b/drivers/mmc/host/alcor.c
index d01df01d4b4d..42aa43740ba8 100644
--- a/drivers/mmc/host/alcor.c
+++ b/drivers/mmc/host/alcor.c
@@ -1125,7 +1125,7 @@ free_host:
 	return ret;
 }
 
-static int alcor_pci_sdmmc_drv_remove(struct platform_device *pdev)
+static void alcor_pci_sdmmc_drv_remove(struct platform_device *pdev)
 {
 	struct alcor_sdmmc_host *host = dev_get_drvdata(&pdev->dev);
 	struct mmc_host *mmc = mmc_from_priv(host);
@@ -1136,8 +1136,6 @@ static int alcor_pci_sdmmc_drv_remove(struct platform_device *pdev)
 	alcor_hw_uninit(host);
 	mmc_remove_host(mmc);
 	mmc_free_host(mmc);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -1177,7 +1175,7 @@ MODULE_DEVICE_TABLE(platform, alcor_pci_sdmmc_ids);
 
 static struct platform_driver alcor_pci_sdmmc_driver = {
 	.probe		= alcor_pci_sdmmc_drv_probe,
-	.remove		= alcor_pci_sdmmc_drv_remove,
+	.remove_new	= alcor_pci_sdmmc_drv_remove,
 	.id_table	= alcor_pci_sdmmc_ids,
 	.driver		= {
 		.name	= DRV_NAME_ALCOR_PCI_SDMMC,

From 3a1d0a8d1cd6c3a7b7c7bbc35dae7443752a2344 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:30 +0800
Subject: [PATCH 1151/2047] mmc: dw_mmc: rockchip: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20230727070051.17778-41-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc-rockchip.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index 2a99f15f527f..b07190ba4b7a 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -371,15 +371,13 @@ static int dw_mci_rockchip_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int dw_mci_rockchip_remove(struct platform_device *pdev)
+static void dw_mci_rockchip_remove(struct platform_device *pdev)
 {
 	pm_runtime_get_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_put_noidle(&pdev->dev);
 
 	dw_mci_pltfm_remove(pdev);
-
-	return 0;
 }
 
 static const struct dev_pm_ops dw_mci_rockchip_dev_pm_ops = {
@@ -392,7 +390,7 @@ static const struct dev_pm_ops dw_mci_rockchip_dev_pm_ops = {
 
 static struct platform_driver dw_mci_rockchip_pltfm_driver = {
 	.probe		= dw_mci_rockchip_probe,
-	.remove		= dw_mci_rockchip_remove,
+	.remove_new	= dw_mci_rockchip_remove,
 	.driver		= {
 		.name		= "dwmmc_rockchip",
 		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,

From 65c86da4b1b7775e8c293b20c004b127913e66f6 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:31 +0800
Subject: [PATCH 1152/2047] mmc: owl: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-42-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/owl-mmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/owl-mmc.c b/drivers/mmc/host/owl-mmc.c
index 62d2993573d0..fc08f25c34eb 100644
--- a/drivers/mmc/host/owl-mmc.c
+++ b/drivers/mmc/host/owl-mmc.c
@@ -668,7 +668,7 @@ err_free_host:
 	return ret;
 }
 
-static int owl_mmc_remove(struct platform_device *pdev)
+static void owl_mmc_remove(struct platform_device *pdev)
 {
 	struct mmc_host	*mmc = platform_get_drvdata(pdev);
 	struct owl_mmc_host *owl_host = mmc_priv(mmc);
@@ -677,8 +677,6 @@ static int owl_mmc_remove(struct platform_device *pdev)
 	disable_irq(owl_host->irq);
 	dma_release_channel(owl_host->dma);
 	mmc_free_host(mmc);
-
-	return 0;
 }
 
 static const struct of_device_id owl_mmc_of_match[] = {
@@ -694,7 +692,7 @@ static struct platform_driver owl_mmc_driver = {
 		.of_match_table = owl_mmc_of_match,
 	},
 	.probe		= owl_mmc_probe,
-	.remove		= owl_mmc_remove,
+	.remove_new	= owl_mmc_remove,
 };
 module_platform_driver(owl_mmc_driver);
 

From f9b85b78656b5c132066d24690de2ff5c72e5b90 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:32 +0800
Subject: [PATCH 1153/2047] mmc: wbsd: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-43-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/wbsd.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index 0589d169e8fb..001a468bc149 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -1754,11 +1754,9 @@ static int wbsd_probe(struct platform_device *dev)
 	return wbsd_init(&dev->dev, param_io, param_irq, param_dma, 0);
 }
 
-static int wbsd_remove(struct platform_device *dev)
+static void wbsd_remove(struct platform_device *dev)
 {
 	wbsd_shutdown(&dev->dev, 0);
-
-	return 0;
 }
 
 /*
@@ -1900,8 +1898,7 @@ static struct platform_device *wbsd_device;
 
 static struct platform_driver wbsd_driver = {
 	.probe		= wbsd_probe,
-	.remove		= wbsd_remove,
-
+	.remove_new	= wbsd_remove,
 	.suspend	= wbsd_platform_suspend,
 	.resume		= wbsd_platform_resume,
 	.driver		= {

From 7a0b007f00a3d69ad08d47cfc52fabf97eca092d Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:33 +0800
Subject: [PATCH 1154/2047] mmc: usdhi60rol0: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Link: https://lore.kernel.org/r/20230727070051.17778-44-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/usdhi6rol0.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index 2e17903658fc..6e421445d56c 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -1884,7 +1884,7 @@ e_free_mmc:
 	return ret;
 }
 
-static int usdhi6_remove(struct platform_device *pdev)
+static void usdhi6_remove(struct platform_device *pdev)
 {
 	struct usdhi6_host *host = platform_get_drvdata(pdev);
 
@@ -1895,13 +1895,11 @@ static int usdhi6_remove(struct platform_device *pdev)
 	usdhi6_dma_release(host);
 	clk_disable_unprepare(host->clk);
 	mmc_free_host(host->mmc);
-
-	return 0;
 }
 
 static struct platform_driver usdhi6_driver = {
 	.probe		= usdhi6_probe,
-	.remove		= usdhi6_remove,
+	.remove_new	= usdhi6_remove,
 	.driver		= {
 		.name	= "usdhi6rol0",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From ee65ea2b8302f0dc189356f7d130fa0509b1c252 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:34 +0800
Subject: [PATCH 1155/2047] mmc: atmel-mci: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Claudiu Beznea <claudiu.beznea@tuxon.dev>
Link: https://lore.kernel.org/r/20230727070051.17778-45-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/atmel-mci.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 2fdf352b3d86..1f89d02c4685 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -2599,7 +2599,7 @@ err_dma_probe_defer:
 	return ret;
 }
 
-static int atmci_remove(struct platform_device *pdev)
+static void atmci_remove(struct platform_device *pdev)
 {
 	struct atmel_mci	*host = platform_get_drvdata(pdev);
 	unsigned int		i;
@@ -2629,8 +2629,6 @@ static int atmci_remove(struct platform_device *pdev)
 
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_put_noidle(&pdev->dev);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -2663,7 +2661,7 @@ static const struct dev_pm_ops atmci_dev_pm_ops = {
 
 static struct platform_driver atmci_driver = {
 	.probe		= atmci_probe,
-	.remove		= atmci_remove,
+	.remove_new	= atmci_remove,
 	.driver		= {
 		.name		= "atmel_mci",
 		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,

From 887c1331ef2570a7ae58f41a33d3aaddfe2e5587 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:35 +0800
Subject: [PATCH 1156/2047] mmc: sdhci-st: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-46-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-st.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
index 97f350953c4b..14fc2c386bd3 100644
--- a/drivers/mmc/host/sdhci-st.c
+++ b/drivers/mmc/host/sdhci-st.c
@@ -431,7 +431,7 @@ err_pltfm_init:
 	return ret;
 }
 
-static int sdhci_st_remove(struct platform_device *pdev)
+static void sdhci_st_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -443,8 +443,6 @@ static int sdhci_st_remove(struct platform_device *pdev)
 	clk_disable_unprepare(pdata->icnclk);
 
 	reset_control_assert(rstc);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -507,7 +505,7 @@ MODULE_DEVICE_TABLE(of, st_sdhci_match);
 
 static struct platform_driver sdhci_st_driver = {
 	.probe = sdhci_st_probe,
-	.remove = sdhci_st_remove,
+	.remove_new = sdhci_st_remove,
 	.driver = {
 		   .name = "sdhci-st",
 		   .probe_type = PROBE_PREFER_ASYNCHRONOUS,

From 2ababbdc933836937359e0ec7ed0bdfda96000a0 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:36 +0800
Subject: [PATCH 1157/2047] mmc: wmt-sdmmc: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-47-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/wmt-sdmmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
index f860f363be56..77d5f1d24489 100644
--- a/drivers/mmc/host/wmt-sdmmc.c
+++ b/drivers/mmc/host/wmt-sdmmc.c
@@ -879,7 +879,7 @@ fail1:
 	return ret;
 }
 
-static int wmt_mci_remove(struct platform_device *pdev)
+static void wmt_mci_remove(struct platform_device *pdev)
 {
 	struct mmc_host *mmc;
 	struct wmt_mci_priv *priv;
@@ -917,8 +917,6 @@ static int wmt_mci_remove(struct platform_device *pdev)
 	mmc_free_host(mmc);
 
 	dev_info(&pdev->dev, "WMT MCI device removed\n");
-
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -988,7 +986,7 @@ static const struct dev_pm_ops wmt_mci_pm = {
 
 static struct platform_driver wmt_mci_driver = {
 	.probe = wmt_mci_probe,
-	.remove = wmt_mci_remove,
+	.remove_new = wmt_mci_remove,
 	.driver = {
 		.name = DRIVER_NAME,
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From bd0e512bff1a264d924c80dd10c2f53770849f2f Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:37 +0800
Subject: [PATCH 1158/2047] mmc: sdhci-esdhc-mcf: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Angelo Dureghello <angelo.dureghello@timesys.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-48-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-mcf.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-esdhc-mcf.c b/drivers/mmc/host/sdhci-esdhc-mcf.c
index 05926bf5ecf9..a07f8333cd6b 100644
--- a/drivers/mmc/host/sdhci-esdhc-mcf.c
+++ b/drivers/mmc/host/sdhci-esdhc-mcf.c
@@ -489,7 +489,7 @@ err_exit:
 	return err;
 }
 
-static int sdhci_esdhc_mcf_remove(struct platform_device *pdev)
+static void sdhci_esdhc_mcf_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -502,8 +502,6 @@ static int sdhci_esdhc_mcf_remove(struct platform_device *pdev)
 	clk_disable_unprepare(mcf_data->clk_per);
 
 	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 
 static struct platform_driver sdhci_esdhc_mcf_driver = {
@@ -512,7 +510,7 @@ static struct platform_driver sdhci_esdhc_mcf_driver = {
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
 	},
 	.probe = sdhci_esdhc_mcf_probe,
-	.remove = sdhci_esdhc_mcf_remove,
+	.remove_new = sdhci_esdhc_mcf_remove,
 };
 
 module_platform_driver(sdhci_esdhc_mcf_driver);

From 3ed9c648eacdaff791c429b75d121a20f15e6967 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:38 +0800
Subject: [PATCH 1159/2047] mmc: sunplus-mmc: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-49-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sunplus-mmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sunplus-mmc.c b/drivers/mmc/host/sunplus-mmc.c
index 37ebed040807..13c7cc0b6180 100644
--- a/drivers/mmc/host/sunplus-mmc.c
+++ b/drivers/mmc/host/sunplus-mmc.c
@@ -939,7 +939,7 @@ clk_disable:
 	return ret;
 }
 
-static int spmmc_drv_remove(struct platform_device *dev)
+static void spmmc_drv_remove(struct platform_device *dev)
 {
 	struct spmmc_host *host = platform_get_drvdata(dev);
 
@@ -948,8 +948,6 @@ static int spmmc_drv_remove(struct platform_device *dev)
 	clk_disable_unprepare(host->clk);
 	pm_runtime_put_noidle(&dev->dev);
 	pm_runtime_disable(&dev->dev);
-
-	return 0;
 }
 
 static int spmmc_pm_runtime_suspend(struct device *dev)
@@ -984,7 +982,7 @@ MODULE_DEVICE_TABLE(of, spmmc_of_table);
 
 static struct platform_driver spmmc_driver = {
 	.probe = spmmc_drv_probe,
-	.remove = spmmc_drv_remove,
+	.remove_new = spmmc_drv_remove,
 	.driver = {
 		.name = "spmmc",
 		.pm = pm_ptr(&spmmc_pm_ops),

From ba082d6ed5d0857d57644f61acb20e4cf508c1a3 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:39 +0800
Subject: [PATCH 1160/2047] mmc: sdhci-spear: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-50-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-spear.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
index 73ca07cdfaa9..c81bdfa97b89 100644
--- a/drivers/mmc/host/sdhci-spear.c
+++ b/drivers/mmc/host/sdhci-spear.c
@@ -117,7 +117,7 @@ err:
 	return ret;
 }
 
-static int sdhci_remove(struct platform_device *pdev)
+static void sdhci_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct spear_sdhci *sdhci = sdhci_priv(host);
@@ -131,8 +131,6 @@ static int sdhci_remove(struct platform_device *pdev)
 	sdhci_remove_host(host, dead);
 	clk_disable_unprepare(sdhci->clk);
 	sdhci_free_host(host);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -184,7 +182,7 @@ static struct platform_driver sdhci_driver = {
 		.of_match_table = sdhci_spear_id_table,
 	},
 	.probe		= sdhci_probe,
-	.remove		= sdhci_remove,
+	.remove_new	= sdhci_remove,
 };
 
 module_platform_driver(sdhci_driver);

From 0484ed31748e293e4c4897faa8c3e3e2a4f398e5 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:40 +0800
Subject: [PATCH 1161/2047] mmc: mxcmmc: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-51-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mxcmmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 9b6bdc1b0758..5b3ab0e20505 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -1162,7 +1162,7 @@ out_free:
 	return ret;
 }
 
-static int mxcmci_remove(struct platform_device *pdev)
+static void mxcmci_remove(struct platform_device *pdev)
 {
 	struct mmc_host *mmc = platform_get_drvdata(pdev);
 	struct mxcmci_host *host = mmc_priv(mmc);
@@ -1179,8 +1179,6 @@ static int mxcmci_remove(struct platform_device *pdev)
 	clk_disable_unprepare(host->clk_ipg);
 
 	mmc_free_host(mmc);
-
-	return 0;
 }
 
 static int mxcmci_suspend(struct device *dev)
@@ -1214,7 +1212,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume);
 
 static struct platform_driver mxcmci_driver = {
 	.probe		= mxcmci_probe,
-	.remove		= mxcmci_remove,
+	.remove_new	= mxcmci_remove,
 	.driver		= {
 		.name		= DRIVER_NAME,
 		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,

From 0fbfbfbaafb45fd9919568fb86e6c0f0bbc5668a Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:41 +0800
Subject: [PATCH 1162/2047] mmc: mvsdio: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-52-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mvsdio.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index b4f6a0a2fcb5..ca01b7d204ba 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -796,7 +796,7 @@ out:
 	return ret;
 }
 
-static int mvsd_remove(struct platform_device *pdev)
+static void mvsd_remove(struct platform_device *pdev)
 {
 	struct mmc_host *mmc = platform_get_drvdata(pdev);
 
@@ -809,8 +809,6 @@ static int mvsd_remove(struct platform_device *pdev)
 	if (!IS_ERR(host->clk))
 		clk_disable_unprepare(host->clk);
 	mmc_free_host(mmc);
-
-	return 0;
 }
 
 static const struct of_device_id mvsdio_dt_ids[] = {
@@ -821,7 +819,7 @@ MODULE_DEVICE_TABLE(of, mvsdio_dt_ids);
 
 static struct platform_driver mvsd_driver = {
 	.probe		= mvsd_probe,
-	.remove		= mvsd_remove,
+	.remove_new	= mvsd_remove,
 	.driver		= {
 		.name	= DRIVER_NAME,
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From 7f6b0361bdbc6438400d4610d6eb63f6f3fb4a38 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:42 +0800
Subject: [PATCH 1163/2047] mmc: pwrseq_simple: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-53-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/pwrseq_simple.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index 3bac1e71411b..df9588503ad0 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -142,18 +142,16 @@ static int mmc_pwrseq_simple_probe(struct platform_device *pdev)
 	return mmc_pwrseq_register(&pwrseq->pwrseq);
 }
 
-static int mmc_pwrseq_simple_remove(struct platform_device *pdev)
+static void mmc_pwrseq_simple_remove(struct platform_device *pdev)
 {
 	struct mmc_pwrseq_simple *pwrseq = platform_get_drvdata(pdev);
 
 	mmc_pwrseq_unregister(&pwrseq->pwrseq);
-
-	return 0;
 }
 
 static struct platform_driver mmc_pwrseq_simple_driver = {
 	.probe = mmc_pwrseq_simple_probe,
-	.remove = mmc_pwrseq_simple_remove,
+	.remove_new = mmc_pwrseq_simple_remove,
 	.driver = {
 		.name = "pwrseq_simple",
 		.of_match_table = mmc_pwrseq_simple_of_match,

From 884e869602e37c9cfd9bc5183c0b8e9f5b7435a9 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:43 +0800
Subject: [PATCH 1164/2047] mmc: pwrseq: sd8787: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-54-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/pwrseq_sd8787.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/core/pwrseq_sd8787.c b/drivers/mmc/core/pwrseq_sd8787.c
index 0c5f5e371e1f..0c5808fc3206 100644
--- a/drivers/mmc/core/pwrseq_sd8787.c
+++ b/drivers/mmc/core/pwrseq_sd8787.c
@@ -113,18 +113,16 @@ static int mmc_pwrseq_sd8787_probe(struct platform_device *pdev)
 	return mmc_pwrseq_register(&pwrseq->pwrseq);
 }
 
-static int mmc_pwrseq_sd8787_remove(struct platform_device *pdev)
+static void mmc_pwrseq_sd8787_remove(struct platform_device *pdev)
 {
 	struct mmc_pwrseq_sd8787 *pwrseq = platform_get_drvdata(pdev);
 
 	mmc_pwrseq_unregister(&pwrseq->pwrseq);
-
-	return 0;
 }
 
 static struct platform_driver mmc_pwrseq_sd8787_driver = {
 	.probe = mmc_pwrseq_sd8787_probe,
-	.remove = mmc_pwrseq_sd8787_remove,
+	.remove_new = mmc_pwrseq_sd8787_remove,
 	.driver = {
 		.name = "pwrseq_sd8787",
 		.of_match_table = mmc_pwrseq_sd8787_of_match,

From 5c57f6c47da2f228854cd9547f005efad8a0e07b Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:44 +0800
Subject: [PATCH 1165/2047] mmc: pwrseq: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-55-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/pwrseq_emmc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/core/pwrseq_emmc.c b/drivers/mmc/core/pwrseq_emmc.c
index f6dde9edd7a3..3b6d69cefb4e 100644
--- a/drivers/mmc/core/pwrseq_emmc.c
+++ b/drivers/mmc/core/pwrseq_emmc.c
@@ -90,14 +90,12 @@ static int mmc_pwrseq_emmc_probe(struct platform_device *pdev)
 	return mmc_pwrseq_register(&pwrseq->pwrseq);
 }
 
-static int mmc_pwrseq_emmc_remove(struct platform_device *pdev)
+static void mmc_pwrseq_emmc_remove(struct platform_device *pdev)
 {
 	struct mmc_pwrseq_emmc *pwrseq = platform_get_drvdata(pdev);
 
 	unregister_restart_handler(&pwrseq->reset_nb);
 	mmc_pwrseq_unregister(&pwrseq->pwrseq);
-
-	return 0;
 }
 
 static const struct of_device_id mmc_pwrseq_emmc_of_match[] = {
@@ -109,7 +107,7 @@ MODULE_DEVICE_TABLE(of, mmc_pwrseq_emmc_of_match);
 
 static struct platform_driver mmc_pwrseq_emmc_driver = {
 	.probe = mmc_pwrseq_emmc_probe,
-	.remove = mmc_pwrseq_emmc_remove,
+	.remove_new = mmc_pwrseq_emmc_remove,
 	.driver = {
 		.name = "pwrseq_emmc",
 		.of_match_table = mmc_pwrseq_emmc_of_match,

From 80c602b1144f926839d5a74e16fc0fb3c1284649 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:45 +0800
Subject: [PATCH 1166/2047] mmc: renesas_sdhi: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230727070051.17778-56-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/renesas_sdhi.h               | 2 +-
 drivers/mmc/host/renesas_sdhi_core.c          | 4 +---
 drivers/mmc/host/renesas_sdhi_internal_dmac.c | 2 +-
 drivers/mmc/host/renesas_sdhi_sys_dmac.c      | 2 +-
 4 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/renesas_sdhi.h b/drivers/mmc/host/renesas_sdhi.h
index 68da3da9e2e5..c1fb9740eab0 100644
--- a/drivers/mmc/host/renesas_sdhi.h
+++ b/drivers/mmc/host/renesas_sdhi.h
@@ -101,5 +101,5 @@ int renesas_sdhi_probe(struct platform_device *pdev,
 		       const struct tmio_mmc_dma_ops *dma_ops,
 		       const struct renesas_sdhi_of_data *of_data,
 		       const struct renesas_sdhi_quirks *quirks);
-int renesas_sdhi_remove(struct platform_device *pdev);
+void renesas_sdhi_remove(struct platform_device *pdev);
 #endif
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index 10dbdb4abc93..c675dec587ef 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -1149,15 +1149,13 @@ efree:
 }
 EXPORT_SYMBOL_GPL(renesas_sdhi_probe);
 
-int renesas_sdhi_remove(struct platform_device *pdev)
+void renesas_sdhi_remove(struct platform_device *pdev)
 {
 	struct tmio_mmc_host *host = platform_get_drvdata(pdev);
 
 	tmio_mmc_host_remove(host);
 	renesas_sdhi_clk_disable(host);
 	tmio_mmc_host_free(host);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(renesas_sdhi_remove);
 
diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
index 4cc5bcb3213a..53d34c3eddce 100644
--- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
@@ -610,7 +610,7 @@ static struct platform_driver renesas_internal_dmac_sdhi_driver = {
 		.of_match_table = renesas_sdhi_internal_dmac_of_match,
 	},
 	.probe		= renesas_sdhi_internal_dmac_probe,
-	.remove		= renesas_sdhi_remove,
+	.remove_new	= renesas_sdhi_remove,
 };
 
 module_platform_driver(renesas_internal_dmac_sdhi_driver);
diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c
index 452982e670d2..9cf7f9feab72 100644
--- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c
@@ -471,7 +471,7 @@ static struct platform_driver renesas_sys_dmac_sdhi_driver = {
 		.of_match_table = renesas_sdhi_sys_dmac_of_match,
 	},
 	.probe		= renesas_sdhi_sys_dmac_probe,
-	.remove		= renesas_sdhi_remove,
+	.remove_new	= renesas_sdhi_remove,
 };
 
 module_platform_driver(renesas_sys_dmac_sdhi_driver);

From 1691c261aec44f5262dbcf8846100dc4f256f3dd Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:46 +0800
Subject: [PATCH 1167/2047] mmc: Convert to platform remove callback returning
 void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Florian Fainelli <florian.fainelli@broadcom.com> # Broadcom
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-57-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-bcm-kona.c  | 2 +-
 drivers/mmc/host/sdhci-brcmstb.c   | 2 +-
 drivers/mmc/host/sdhci-cadence.c   | 2 +-
 drivers/mmc/host/sdhci-dove.c      | 2 +-
 drivers/mmc/host/sdhci-iproc.c     | 2 +-
 drivers/mmc/host/sdhci-of-esdhc.c  | 2 +-
 drivers/mmc/host/sdhci-of-hlwd.c   | 2 +-
 drivers/mmc/host/sdhci-of-sparx5.c | 2 +-
 drivers/mmc/host/sdhci-pltfm.c     | 4 +---
 drivers/mmc/host/sdhci-pltfm.h     | 2 +-
 drivers/mmc/host/sdhci-pxav2.c     | 2 +-
 11 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/mmc/host/sdhci-bcm-kona.c b/drivers/mmc/host/sdhci-bcm-kona.c
index ef1fb383e1e7..02bb871e838f 100644
--- a/drivers/mmc/host/sdhci-bcm-kona.c
+++ b/drivers/mmc/host/sdhci-bcm-kona.c
@@ -318,7 +318,7 @@ static struct platform_driver sdhci_bcm_kona_driver = {
 		.of_match_table = sdhci_bcm_kona_of_match,
 	},
 	.probe		= sdhci_bcm_kona_probe,
-	.remove		= sdhci_pltfm_unregister,
+	.remove_new	= sdhci_pltfm_unregister,
 };
 module_platform_driver(sdhci_bcm_kona_driver);
 
diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c
index 4c22337199cf..a2b6d8f2eeb6 100644
--- a/drivers/mmc/host/sdhci-brcmstb.c
+++ b/drivers/mmc/host/sdhci-brcmstb.c
@@ -430,7 +430,7 @@ static struct platform_driver sdhci_brcmstb_driver = {
 		.of_match_table = of_match_ptr(sdhci_brcm_of_match),
 	},
 	.probe		= sdhci_brcmstb_probe,
-	.remove		= sdhci_pltfm_unregister,
+	.remove_new	= sdhci_pltfm_unregister,
 	.shutdown	= sdhci_brcmstb_shutdown,
 };
 
diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c
index 25d9849ef0d1..8cdc178ec1ba 100644
--- a/drivers/mmc/host/sdhci-cadence.c
+++ b/drivers/mmc/host/sdhci-cadence.c
@@ -617,7 +617,7 @@ static struct platform_driver sdhci_cdns_driver = {
 		.of_match_table = sdhci_cdns_match,
 	},
 	.probe = sdhci_cdns_probe,
-	.remove = sdhci_pltfm_unregister,
+	.remove_new = sdhci_pltfm_unregister,
 };
 module_platform_driver(sdhci_cdns_driver);
 
diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c
index 5e5bf82e5976..75335dbf223c 100644
--- a/drivers/mmc/host/sdhci-dove.c
+++ b/drivers/mmc/host/sdhci-dove.c
@@ -110,7 +110,7 @@ static struct platform_driver sdhci_dove_driver = {
 		.of_match_table = sdhci_dove_of_match_table,
 	},
 	.probe		= sdhci_dove_probe,
-	.remove		= sdhci_pltfm_unregister,
+	.remove_new	= sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_dove_driver);
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index 6a93b1ee0b30..b1547bcbec32 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -432,7 +432,7 @@ static struct platform_driver sdhci_iproc_driver = {
 		.pm = &sdhci_pltfm_pmops,
 	},
 	.probe = sdhci_iproc_probe,
-	.remove = sdhci_pltfm_unregister,
+	.remove_new = sdhci_pltfm_unregister,
 	.shutdown = sdhci_iproc_shutdown,
 };
 module_platform_driver(sdhci_iproc_driver);
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 48ca1cf15b19..5072b59f6165 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -1521,7 +1521,7 @@ static struct platform_driver sdhci_esdhc_driver = {
 		.pm = &esdhc_of_dev_pm_ops,
 	},
 	.probe = sdhci_esdhc_probe,
-	.remove = sdhci_pltfm_unregister,
+	.remove_new = sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_esdhc_driver);
diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c
index 12675797b296..cba3ba48e9dc 100644
--- a/drivers/mmc/host/sdhci-of-hlwd.c
+++ b/drivers/mmc/host/sdhci-of-hlwd.c
@@ -85,7 +85,7 @@ static struct platform_driver sdhci_hlwd_driver = {
 		.pm = &sdhci_pltfm_pmops,
 	},
 	.probe = sdhci_hlwd_probe,
-	.remove = sdhci_pltfm_unregister,
+	.remove_new = sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_hlwd_driver);
diff --git a/drivers/mmc/host/sdhci-of-sparx5.c b/drivers/mmc/host/sdhci-of-sparx5.c
index 2e983cf49bc6..653480dbe6bb 100644
--- a/drivers/mmc/host/sdhci-of-sparx5.c
+++ b/drivers/mmc/host/sdhci-of-sparx5.c
@@ -260,7 +260,7 @@ static struct platform_driver sdhci_sparx5_driver = {
 		.pm = &sdhci_pltfm_pmops,
 	},
 	.probe = sdhci_sparx5_probe,
-	.remove = sdhci_pltfm_unregister,
+	.remove_new = sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_sparx5_driver);
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index 673e750a8490..72d07b49b0a3 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -187,7 +187,7 @@ int sdhci_pltfm_register(struct platform_device *pdev,
 }
 EXPORT_SYMBOL_GPL(sdhci_pltfm_register);
 
-int sdhci_pltfm_unregister(struct platform_device *pdev)
+void sdhci_pltfm_unregister(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -196,8 +196,6 @@ int sdhci_pltfm_unregister(struct platform_device *pdev)
 	sdhci_remove_host(host, dead);
 	clk_disable_unprepare(pltfm_host->clk);
 	sdhci_pltfm_free(pdev);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(sdhci_pltfm_unregister);
 
diff --git a/drivers/mmc/host/sdhci-pltfm.h b/drivers/mmc/host/sdhci-pltfm.h
index 9bd717ff784b..6e6a443dafd9 100644
--- a/drivers/mmc/host/sdhci-pltfm.h
+++ b/drivers/mmc/host/sdhci-pltfm.h
@@ -102,7 +102,7 @@ extern void sdhci_pltfm_free(struct platform_device *pdev);
 extern int sdhci_pltfm_register(struct platform_device *pdev,
 				const struct sdhci_pltfm_data *pdata,
 				size_t priv_size);
-extern int sdhci_pltfm_unregister(struct platform_device *pdev);
+extern void sdhci_pltfm_unregister(struct platform_device *pdev);
 
 extern unsigned int sdhci_pltfm_clk_get_max_clock(struct sdhci_host *host);
 
diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
index 86b60b0447b3..a0c8470e9214 100644
--- a/drivers/mmc/host/sdhci-pxav2.c
+++ b/drivers/mmc/host/sdhci-pxav2.c
@@ -358,7 +358,7 @@ static struct platform_driver sdhci_pxav2_driver = {
 		.pm	= &sdhci_pltfm_pmops,
 	},
 	.probe		= sdhci_pxav2_probe,
-	.remove		= sdhci_pltfm_unregister,
+	.remove_new	= sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_pxav2_driver);

From 8d7770345db715660dd751e195d934960434c876 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:47 +0800
Subject: [PATCH 1168/2047] mmc: uniphier-sd: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230727070051.17778-58-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/uniphier-sd.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/uniphier-sd.c b/drivers/mmc/host/uniphier-sd.c
index b50cda4a9635..1404989e6151 100644
--- a/drivers/mmc/host/uniphier-sd.c
+++ b/drivers/mmc/host/uniphier-sd.c
@@ -726,15 +726,13 @@ free_host:
 	return ret;
 }
 
-static int uniphier_sd_remove(struct platform_device *pdev)
+static void uniphier_sd_remove(struct platform_device *pdev)
 {
 	struct tmio_mmc_host *host = platform_get_drvdata(pdev);
 
 	tmio_mmc_host_remove(host);
 	uniphier_sd_clk_disable(host);
 	tmio_mmc_host_free(host);
-
-	return 0;
 }
 
 static const struct of_device_id uniphier_sd_match[] = {
@@ -756,7 +754,7 @@ MODULE_DEVICE_TABLE(of, uniphier_sd_match);
 
 static struct platform_driver uniphier_sd_driver = {
 	.probe = uniphier_sd_probe,
-	.remove = uniphier_sd_remove,
+	.remove_new = uniphier_sd_remove,
 	.driver = {
 		.name = "uniphier-sd",
 		.probe_type = PROBE_PREFER_ASYNCHRONOUS,

From 854034e2bcccf932d0a0d3f9cf3149d6ebcf145c Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:48 +0800
Subject: [PATCH 1169/2047] mmc: sdhci_am654: Properly handle failures in
 .remove()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Returning an error code in a platform driver's remove function is wrong
most of the time and there is an effort to make the callback return
void. To prepare this rework the function not to exit early.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-59-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci_am654.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
index 7cdf0f54e3a5..1cc84739ef2f 100644
--- a/drivers/mmc/host/sdhci_am654.c
+++ b/drivers/mmc/host/sdhci_am654.c
@@ -870,16 +870,17 @@ static int sdhci_am654_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct device *dev = &pdev->dev;
 	int ret;
 
-	ret = pm_runtime_resume_and_get(&pdev->dev);
+	ret = pm_runtime_get_sync(dev);
 	if (ret < 0)
-		return ret;
+		dev_err(dev, "pm_runtime_get_sync() Failed\n");
 
 	sdhci_remove_host(host, true);
 	clk_disable_unprepare(pltfm_host->clk);
-	pm_runtime_disable(&pdev->dev);
-	pm_runtime_put_noidle(&pdev->dev);
+	pm_runtime_disable(dev);
+	pm_runtime_put_noidle(dev);
 	sdhci_pltfm_free(pdev);
 	return 0;
 }

From de29ade4fc35d27e22d4060f241c3dcae480d6c4 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:49 +0800
Subject: [PATCH 1170/2047] mmc: sdhci_am654: Convert to platform remove
 callback returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-60-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci_am654.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
index 1cc84739ef2f..c125485ba80e 100644
--- a/drivers/mmc/host/sdhci_am654.c
+++ b/drivers/mmc/host/sdhci_am654.c
@@ -866,7 +866,7 @@ err_pltfm_free:
 	return ret;
 }
 
-static int sdhci_am654_remove(struct platform_device *pdev)
+static void sdhci_am654_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -882,7 +882,6 @@ static int sdhci_am654_remove(struct platform_device *pdev)
 	pm_runtime_disable(dev);
 	pm_runtime_put_noidle(dev);
 	sdhci_pltfm_free(pdev);
-	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -994,7 +993,7 @@ static struct platform_driver sdhci_am654_driver = {
 		.of_match_table = sdhci_am654_of_match,
 	},
 	.probe = sdhci_am654_probe,
-	.remove = sdhci_am654_remove,
+	.remove_new = sdhci_am654_remove,
 };
 
 module_platform_driver(sdhci_am654_driver);

From 1930c059b2cb6e0696dc2fc0924a99854bdfc2e5 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Jul 2023 15:00:50 +0800
Subject: [PATCH 1171/2047] mmc: f-sdh30: Convert to platform remove callback
 returning void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is (mostly) ignored
and this typically results in resource leaks. To improve here there is a
quest to make the remove callback return void. In the first step of this
quest all drivers are converted to .remove_new() which already returns
void.

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230727070051.17778-61-frank.li@vivo.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci_f_sdh30.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/mmc/host/sdhci_f_sdh30.c b/drivers/mmc/host/sdhci_f_sdh30.c
index 3215063bcf86..47ae853f51aa 100644
--- a/drivers/mmc/host/sdhci_f_sdh30.c
+++ b/drivers/mmc/host/sdhci_f_sdh30.c
@@ -206,7 +206,7 @@ err:
 	return ret;
 }
 
-static int sdhci_f_sdh30_remove(struct platform_device *pdev)
+static void sdhci_f_sdh30_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
@@ -219,8 +219,6 @@ static int sdhci_f_sdh30_remove(struct platform_device *pdev)
 	reset_control_assert(rst);
 	clk_disable_unprepare(clk);
 	clk_disable_unprepare(clk_iface);
-
-	return 0;
 }
 
 #ifdef CONFIG_OF
@@ -248,8 +246,8 @@ static struct platform_driver sdhci_f_sdh30_driver = {
 		.acpi_match_table = ACPI_PTR(f_sdh30_acpi_ids),
 		.pm	= &sdhci_pltfm_pmops,
 	},
-	.probe	= sdhci_f_sdh30_probe,
-	.remove	= sdhci_f_sdh30_remove,
+	.probe = sdhci_f_sdh30_probe,
+	.remove_new = sdhci_f_sdh30_remove,
 };
 
 module_platform_driver(sdhci_f_sdh30_driver);

From 32261f9b728277f88a0affa022df64e6238c24a6 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:36 +0300
Subject: [PATCH 1172/2047] mmc: sdhci-pltfm: Add sdhci_pltfm_remove()

Add sdhci_pltfm_remove() to replace sdhci_pltfm_unregister().
The difference is that sdhci_pltfm_remove() does not do:
	clk_disable_unprepare(pltfm_host->clk);
which allows drivers to choose to use devm_clk_get_enabled() or
similar, for pltfm_host->clk.

Once all drivers using sdhci_pltfm_unregister() have been amended to use
sdhci_pltfm_remove() instead, sdhci_pltfm_unregister() will be removed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-2-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pltfm.c | 10 ++++++++++
 drivers/mmc/host/sdhci-pltfm.h |  1 +
 2 files changed, 11 insertions(+)

diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index 72d07b49b0a3..5a63c8818987 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -199,6 +199,16 @@ void sdhci_pltfm_unregister(struct platform_device *pdev)
 }
 EXPORT_SYMBOL_GPL(sdhci_pltfm_unregister);
 
+void sdhci_pltfm_remove(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
+
+	sdhci_remove_host(host, dead);
+	sdhci_pltfm_free(pdev);
+}
+EXPORT_SYMBOL_GPL(sdhci_pltfm_remove);
+
 #ifdef CONFIG_PM_SLEEP
 int sdhci_pltfm_suspend(struct device *dev)
 {
diff --git a/drivers/mmc/host/sdhci-pltfm.h b/drivers/mmc/host/sdhci-pltfm.h
index 6e6a443dafd9..402f4edc6ca5 100644
--- a/drivers/mmc/host/sdhci-pltfm.h
+++ b/drivers/mmc/host/sdhci-pltfm.h
@@ -103,6 +103,7 @@ extern int sdhci_pltfm_register(struct platform_device *pdev,
 				const struct sdhci_pltfm_data *pdata,
 				size_t priv_size);
 extern void sdhci_pltfm_unregister(struct platform_device *pdev);
+extern void sdhci_pltfm_remove(struct platform_device *pdev);
 
 extern unsigned int sdhci_pltfm_clk_get_max_clock(struct sdhci_host *host);
 

From 8ebb607a747f25c81adf372c46c2a0745d4ef692 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:37 +0300
Subject: [PATCH 1173/2047] mmc: sdhci-bcm-kona: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() because
sdhci_pltfm_unregister() is going to be removed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-3-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-bcm-kona.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-bcm-kona.c b/drivers/mmc/host/sdhci-bcm-kona.c
index 02bb871e838f..cb9152c6a65d 100644
--- a/drivers/mmc/host/sdhci-bcm-kona.c
+++ b/drivers/mmc/host/sdhci-bcm-kona.c
@@ -310,6 +310,16 @@ err_pltfm_free:
 	return ret;
 }
 
+static void sdhci_bcm_kona_remove(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct clk *clk = pltfm_host->clk;
+
+	sdhci_pltfm_remove(pdev);
+	clk_disable_unprepare(clk);
+}
+
 static struct platform_driver sdhci_bcm_kona_driver = {
 	.driver		= {
 		.name	= "sdhci-kona",
@@ -318,7 +328,7 @@ static struct platform_driver sdhci_bcm_kona_driver = {
 		.of_match_table = sdhci_bcm_kona_of_match,
 	},
 	.probe		= sdhci_bcm_kona_probe,
-	.remove_new	= sdhci_pltfm_unregister,
+	.remove_new	= sdhci_bcm_kona_remove,
 };
 module_platform_driver(sdhci_bcm_kona_driver);
 

From b6c90da3b0e1a8444cad92d496d409941195e2e4 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:38 +0300
Subject: [PATCH 1174/2047] mmc: sdhci-brcmstb: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() so that
devm_clk_get_optional_enabled() can be used for pltfm_host->clk.

This has the side effect that the order of operations on the error path
and remove path is not the same as it was before, but should be safe
nevertheless.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-4-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-brcmstb.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c
index a2b6d8f2eeb6..c23251bb95f3 100644
--- a/drivers/mmc/host/sdhci-brcmstb.c
+++ b/drivers/mmc/host/sdhci-brcmstb.c
@@ -264,23 +264,17 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev)
 
 	dev_dbg(&pdev->dev, "Probe found match for %s\n",  match->compatible);
 
-	clk = devm_clk_get_optional(&pdev->dev, NULL);
+	clk = devm_clk_get_optional_enabled(&pdev->dev, NULL);
 	if (IS_ERR(clk))
 		return dev_err_probe(&pdev->dev, PTR_ERR(clk),
-				     "Failed to get clock from Device Tree\n");
-
-	res = clk_prepare_enable(clk);
-	if (res)
-		return res;
+				     "Failed to get and enable clock from Device Tree\n");
 
 	memset(&brcmstb_pdata, 0, sizeof(brcmstb_pdata));
 	brcmstb_pdata.ops = match_priv->ops;
 	host = sdhci_pltfm_init(pdev, &brcmstb_pdata,
 				sizeof(struct sdhci_brcmstb_priv));
-	if (IS_ERR(host)) {
-		res = PTR_ERR(host);
-		goto err_clk;
-	}
+	if (IS_ERR(host))
+		return PTR_ERR(host);
 
 	pltfm_host = sdhci_priv(host);
 	priv = sdhci_pltfm_priv(pltfm_host);
@@ -369,9 +363,7 @@ add_host:
 
 err:
 	sdhci_pltfm_free(pdev);
-err_clk:
 	clk_disable_unprepare(base_clk);
-	clk_disable_unprepare(clk);
 	return res;
 }
 
@@ -430,7 +422,7 @@ static struct platform_driver sdhci_brcmstb_driver = {
 		.of_match_table = of_match_ptr(sdhci_brcm_of_match),
 	},
 	.probe		= sdhci_brcmstb_probe,
-	.remove_new	= sdhci_pltfm_unregister,
+	.remove_new	= sdhci_pltfm_remove,
 	.shutdown	= sdhci_brcmstb_shutdown,
 };
 

From 6996beab7120d4bc004ca663408b7bd66dbacfad Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:39 +0300
Subject: [PATCH 1175/2047] mmc: sdhci-cadence: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() so that
devm_clk_get_enabled() can be used for pltfm_host->clk.

This has the side effect that the order of operations on the error path
and remove path is not the same as it was before, but should be safe
nevertheless.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-5-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-cadence.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c
index 8cdc178ec1ba..be1505e8c536 100644
--- a/drivers/mmc/host/sdhci-cadence.c
+++ b/drivers/mmc/host/sdhci-cadence.c
@@ -487,14 +487,10 @@ static int sdhci_cdns_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	static const u16 version = SDHCI_SPEC_400 << SDHCI_SPEC_VER_SHIFT;
 
-	clk = devm_clk_get(dev, NULL);
+	clk = devm_clk_get_enabled(dev, NULL);
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
-	ret = clk_prepare_enable(clk);
-	if (ret)
-		return ret;
-
 	data = of_device_get_match_data(dev);
 	if (!data)
 		data = &sdhci_cdns_drv_data;
@@ -502,10 +498,8 @@ static int sdhci_cdns_probe(struct platform_device *pdev)
 	nr_phy_params = sdhci_cdns_phy_param_count(dev->of_node);
 	host = sdhci_pltfm_init(pdev, &data->pltfm_data,
 				struct_size(priv, phy_params, nr_phy_params));
-	if (IS_ERR(host)) {
-		ret = PTR_ERR(host);
-		goto disable_clk;
-	}
+	if (IS_ERR(host))
+		return PTR_ERR(host);
 
 	pltfm_host = sdhci_priv(host);
 	pltfm_host->clk = clk;
@@ -556,9 +550,6 @@ static int sdhci_cdns_probe(struct platform_device *pdev)
 	return 0;
 free:
 	sdhci_pltfm_free(pdev);
-disable_clk:
-	clk_disable_unprepare(clk);
-
 	return ret;
 }
 
@@ -617,7 +608,7 @@ static struct platform_driver sdhci_cdns_driver = {
 		.of_match_table = sdhci_cdns_match,
 	},
 	.probe = sdhci_cdns_probe,
-	.remove_new = sdhci_pltfm_unregister,
+	.remove_new = sdhci_pltfm_remove,
 };
 module_platform_driver(sdhci_cdns_driver);
 

From 584259d42434b067ef74b650d47ed651dd350325 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:40 +0300
Subject: [PATCH 1176/2047] mmc: sdhci-dove: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() so that
devm_clk_get_enabled() can be used for pltfm_host->clk.

This has the side effect that the order of operations on the error path
and remove path is not the same as it was before, but should be safe
nevertheless.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-6-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-dove.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c
index 75335dbf223c..88ec23417808 100644
--- a/drivers/mmc/host/sdhci-dove.c
+++ b/drivers/mmc/host/sdhci-dove.c
@@ -75,10 +75,7 @@ static int sdhci_dove_probe(struct platform_device *pdev)
 		return PTR_ERR(host);
 
 	pltfm_host = sdhci_priv(host);
-	pltfm_host->clk = devm_clk_get(&pdev->dev, NULL);
-
-	if (!IS_ERR(pltfm_host->clk))
-		clk_prepare_enable(pltfm_host->clk);
+	pltfm_host->clk = devm_clk_get_enabled(&pdev->dev, NULL);
 
 	ret = mmc_of_parse(host->mmc);
 	if (ret)
@@ -91,7 +88,6 @@ static int sdhci_dove_probe(struct platform_device *pdev)
 	return 0;
 
 err_sdhci_add:
-	clk_disable_unprepare(pltfm_host->clk);
 	sdhci_pltfm_free(pdev);
 	return ret;
 }
@@ -110,7 +106,7 @@ static struct platform_driver sdhci_dove_driver = {
 		.of_match_table = sdhci_dove_of_match_table,
 	},
 	.probe		= sdhci_dove_probe,
-	.remove_new	= sdhci_pltfm_unregister,
+	.remove_new	= sdhci_pltfm_remove,
 };
 
 module_platform_driver(sdhci_dove_driver);

From 080b5adf986de6bf95e2401ab5ed18a777cc19da Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:41 +0300
Subject: [PATCH 1177/2047] mmc: sdhci_f_sdh30: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() because
sdhci_pltfm_unregister() is going to be removed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-7-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci_f_sdh30.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci_f_sdh30.c b/drivers/mmc/host/sdhci_f_sdh30.c
index 47ae853f51aa..c58e7cb1e2a7 100644
--- a/drivers/mmc/host/sdhci_f_sdh30.c
+++ b/drivers/mmc/host/sdhci_f_sdh30.c
@@ -214,7 +214,7 @@ static void sdhci_f_sdh30_remove(struct platform_device *pdev)
 	struct reset_control *rst = priv->rst;
 	struct clk *clk = priv->clk;
 
-	sdhci_pltfm_unregister(pdev);
+	sdhci_pltfm_remove(pdev);
 
 	reset_control_assert(rst);
 	clk_disable_unprepare(clk);

From 3f3771341e4e2396232025e3f39875f3c0cdb039 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:42 +0300
Subject: [PATCH 1178/2047] mmc: sdhci-iproc: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() so that
devm_clk_get_enabled() can be used for pltfm_host->clk.

This has the side effect that the order of operations on the error path
and remove path is not the same as it was before, but should be safe
nevertheless.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-8-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-iproc.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index b1547bcbec32..10235fdff246 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -386,16 +386,11 @@ static int sdhci_iproc_probe(struct platform_device *pdev)
 	host->mmc->caps |= iproc_host->data->mmc_caps;
 
 	if (dev->of_node) {
-		pltfm_host->clk = devm_clk_get(dev, NULL);
+		pltfm_host->clk = devm_clk_get_enabled(dev, NULL);
 		if (IS_ERR(pltfm_host->clk)) {
 			ret = PTR_ERR(pltfm_host->clk);
 			goto err;
 		}
-		ret = clk_prepare_enable(pltfm_host->clk);
-		if (ret) {
-			dev_err(dev, "failed to enable host clk\n");
-			goto err;
-		}
 	}
 
 	if (iproc_host->data->missing_caps) {
@@ -406,13 +401,10 @@ static int sdhci_iproc_probe(struct platform_device *pdev)
 
 	ret = sdhci_add_host(host);
 	if (ret)
-		goto err_clk;
+		goto err;
 
 	return 0;
 
-err_clk:
-	if (dev->of_node)
-		clk_disable_unprepare(pltfm_host->clk);
 err:
 	sdhci_pltfm_free(pdev);
 	return ret;
@@ -432,7 +424,7 @@ static struct platform_driver sdhci_iproc_driver = {
 		.pm = &sdhci_pltfm_pmops,
 	},
 	.probe = sdhci_iproc_probe,
-	.remove_new = sdhci_pltfm_unregister,
+	.remove_new = sdhci_pltfm_remove,
 	.shutdown = sdhci_iproc_shutdown,
 };
 module_platform_driver(sdhci_iproc_driver);

From ef1c3a7ec03b57e407bf3003095a5d9acdf14358 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:43 +0300
Subject: [PATCH 1179/2047] mmc: sdhci-of-arasan: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() because
sdhci_pltfm_unregister() is going to be removed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-9-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-arasan.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 15025af326a2..5edd024347bd 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -2022,6 +2022,7 @@ static void sdhci_arasan_remove(struct platform_device *pdev)
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
 	struct clk *clk_ahb = sdhci_arasan->clk_ahb;
+	struct clk *clk_xin = pltfm_host->clk;
 
 	if (!IS_ERR(sdhci_arasan->phy)) {
 		if (sdhci_arasan->is_phy_on)
@@ -2031,8 +2032,9 @@ static void sdhci_arasan_remove(struct platform_device *pdev)
 
 	sdhci_arasan_unregister_sdclk(&pdev->dev);
 
-	sdhci_pltfm_unregister(pdev);
+	sdhci_pltfm_remove(pdev);
 
+	clk_disable_unprepare(clk_xin);
 	clk_disable_unprepare(clk_ahb);
 }
 

From 774caef5dffdfb7f8cb4c27c3da91b9903ea6a6e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:44 +0300
Subject: [PATCH 1180/2047] mmc: sdhci-of-at91: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() because
sdhci_pltfm_unregister() is going to be removed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-10-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-at91.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index db01ea9fa203..23a9faad2ff8 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -456,7 +456,7 @@ static void sdhci_at91_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_put_noidle(&pdev->dev);
 
-	sdhci_pltfm_unregister(pdev);
+	sdhci_pltfm_remove(pdev);
 
 	clk_disable_unprepare(gck);
 	clk_disable_unprepare(hclock);

From 4a035a41dff076fc56f8bd693145d55a338e9fc8 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:45 +0300
Subject: [PATCH 1181/2047] mmc: sdhci-of-esdhc: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() because
sdhci_pltfm_unregister() is going to be removed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-11-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-esdhc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 5072b59f6165..3ae9aa25745a 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -1521,7 +1521,7 @@ static struct platform_driver sdhci_esdhc_driver = {
 		.pm = &esdhc_of_dev_pm_ops,
 	},
 	.probe = sdhci_esdhc_probe,
-	.remove_new = sdhci_pltfm_unregister,
+	.remove_new = sdhci_pltfm_remove,
 };
 
 module_platform_driver(sdhci_esdhc_driver);

From ed581f291d6a9ec28d5950c1bc76cda3046ede0c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:46 +0300
Subject: [PATCH 1182/2047] mmc: sdhci-of-hlwd: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() because
sdhci_pltfm_unregister() is going to be removed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-12-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-hlwd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c
index cba3ba48e9dc..06e88bb22ea5 100644
--- a/drivers/mmc/host/sdhci-of-hlwd.c
+++ b/drivers/mmc/host/sdhci-of-hlwd.c
@@ -85,7 +85,7 @@ static struct platform_driver sdhci_hlwd_driver = {
 		.pm = &sdhci_pltfm_pmops,
 	},
 	.probe = sdhci_hlwd_probe,
-	.remove_new = sdhci_pltfm_unregister,
+	.remove_new = sdhci_pltfm_remove,
 };
 
 module_platform_driver(sdhci_hlwd_driver);

From 18ba91ac6d726556026febb7a04dcb248ac3571c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:47 +0300
Subject: [PATCH 1183/2047] mmc: sdhci-of-sparx5: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() so that
devm_clk_get_enabled() can be used for pltfm_host->clk.

This has the side effect that the order of operations on the error path
and remove path is not the same as it was before, but should be safe
nevertheless.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-13-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-sparx5.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/mmc/host/sdhci-of-sparx5.c b/drivers/mmc/host/sdhci-of-sparx5.c
index 653480dbe6bb..64b77e7d14cd 100644
--- a/drivers/mmc/host/sdhci-of-sparx5.c
+++ b/drivers/mmc/host/sdhci-of-sparx5.c
@@ -184,15 +184,12 @@ static int sdhci_sparx5_probe(struct platform_device *pdev)
 	sdhci_sparx5 = sdhci_pltfm_priv(pltfm_host);
 	sdhci_sparx5->host = host;
 
-	pltfm_host->clk = devm_clk_get(&pdev->dev, "core");
+	pltfm_host->clk = devm_clk_get_enabled(&pdev->dev, "core");
 	if (IS_ERR(pltfm_host->clk)) {
 		ret = PTR_ERR(pltfm_host->clk);
-		dev_err(&pdev->dev, "failed to get core clk: %d\n", ret);
+		dev_err(&pdev->dev, "failed to get and enable core clk: %d\n", ret);
 		goto free_pltfm;
 	}
-	ret = clk_prepare_enable(pltfm_host->clk);
-	if (ret)
-		goto free_pltfm;
 
 	if (!of_property_read_u32(np, "microchip,clock-delay", &value) &&
 	    (value > 0 && value <= MSHC_DLY_CC_MAX))
@@ -202,13 +199,13 @@ static int sdhci_sparx5_probe(struct platform_device *pdev)
 
 	ret = mmc_of_parse(host->mmc);
 	if (ret)
-		goto err_clk;
+		goto free_pltfm;
 
 	sdhci_sparx5->cpu_ctrl = syscon_regmap_lookup_by_compatible(syscon);
 	if (IS_ERR(sdhci_sparx5->cpu_ctrl)) {
 		dev_err(&pdev->dev, "No CPU syscon regmap !\n");
 		ret = PTR_ERR(sdhci_sparx5->cpu_ctrl);
-		goto err_clk;
+		goto free_pltfm;
 	}
 
 	if (sdhci_sparx5->delay_clock >= 0)
@@ -225,7 +222,7 @@ static int sdhci_sparx5_probe(struct platform_device *pdev)
 
 	ret = sdhci_add_host(host);
 	if (ret)
-		goto err_clk;
+		goto free_pltfm;
 
 	/* Set AXI bus master to use un-cached access (for DMA) */
 	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA) &&
@@ -239,8 +236,6 @@ static int sdhci_sparx5_probe(struct platform_device *pdev)
 
 	return ret;
 
-err_clk:
-	clk_disable_unprepare(pltfm_host->clk);
 free_pltfm:
 	sdhci_pltfm_free(pdev);
 	return ret;
@@ -260,7 +255,7 @@ static struct platform_driver sdhci_sparx5_driver = {
 		.pm = &sdhci_pltfm_pmops,
 	},
 	.probe = sdhci_sparx5_probe,
-	.remove_new = sdhci_pltfm_unregister,
+	.remove_new = sdhci_pltfm_remove,
 };
 
 module_platform_driver(sdhci_sparx5_driver);

From c21f1b0dc6418e029b52fad1c243ed46fea37328 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:48 +0300
Subject: [PATCH 1184/2047] mmc: sdhci-pxav2: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() so that
devm_clk_get_enabled() can be used for pltfm_host->clk.

This has the side effect that the order of operations on the error path
and remove path is not the same as it was before, but should be safe
nevertheless.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-14-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pxav2.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
index a0c8470e9214..b75cbea88b40 100644
--- a/drivers/mmc/host/sdhci-pxav2.c
+++ b/drivers/mmc/host/sdhci-pxav2.c
@@ -268,26 +268,21 @@ static int sdhci_pxav2_probe(struct platform_device *pdev)
 	pltfm_host = sdhci_priv(host);
 	pxav2_host = sdhci_pltfm_priv(pltfm_host);
 
-	clk = devm_clk_get(dev, "io");
-	if (IS_ERR(clk) && PTR_ERR(clk) != -EPROBE_DEFER)
-		clk = devm_clk_get(dev, NULL);
+	clk = devm_clk_get_optional_enabled(dev, "io");
+	if (!clk)
+		clk = devm_clk_get_enabled(dev, NULL);
 	if (IS_ERR(clk)) {
 		ret = PTR_ERR(clk);
 		dev_err_probe(dev, ret, "failed to get io clock\n");
 		goto free;
 	}
 	pltfm_host->clk = clk;
-	ret = clk_prepare_enable(clk);
-	if (ret) {
-		dev_err(dev, "failed to enable io clock\n");
-		goto free;
-	}
 
 	clk_core = devm_clk_get_optional_enabled(dev, "core");
 	if (IS_ERR(clk_core)) {
 		ret = PTR_ERR(clk_core);
 		dev_err_probe(dev, ret, "failed to enable core clock\n");
-		goto disable_clk;
+		goto free;
 	}
 
 	host->quirks = SDHCI_QUIRK_BROKEN_ADMA
@@ -339,12 +334,10 @@ static int sdhci_pxav2_probe(struct platform_device *pdev)
 
 	ret = sdhci_add_host(host);
 	if (ret)
-		goto disable_clk;
+		goto free;
 
 	return 0;
 
-disable_clk:
-	clk_disable_unprepare(clk);
 free:
 	sdhci_pltfm_free(pdev);
 	return ret;
@@ -358,7 +351,7 @@ static struct platform_driver sdhci_pxav2_driver = {
 		.pm	= &sdhci_pltfm_pmops,
 	},
 	.probe		= sdhci_pxav2_probe,
-	.remove_new	= sdhci_pltfm_unregister,
+	.remove_new	= sdhci_pltfm_remove,
 };
 
 module_platform_driver(sdhci_pxav2_driver);

From b1284d7c62f0b9ded4532717f29b16f2f199738e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:49 +0300
Subject: [PATCH 1185/2047] mmc: sdhci-st: Use sdhci_pltfm_remove()

Use sdhci_pltfm_remove() instead of sdhci_pltfm_unregister() because
sdhci_pltfm_unregister() is going to be removed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-15-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-st.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
index 14fc2c386bd3..d12532b96b51 100644
--- a/drivers/mmc/host/sdhci-st.c
+++ b/drivers/mmc/host/sdhci-st.c
@@ -437,10 +437,12 @@ static void sdhci_st_remove(struct platform_device *pdev)
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct st_mmc_platform_data *pdata = sdhci_pltfm_priv(pltfm_host);
 	struct reset_control *rstc = pdata->rstc;
+	struct clk *clk = pltfm_host->clk;
 
-	sdhci_pltfm_unregister(pdev);
+	sdhci_pltfm_remove(pdev);
 
 	clk_disable_unprepare(pdata->icnclk);
+	clk_disable_unprepare(clk);
 
 	reset_control_assert(rstc);
 }

From f0255cdca3972f01ac0bb9cfee83ed9548d19dd2 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:50 +0300
Subject: [PATCH 1186/2047] mmc: sdhci-pltfm: Remove sdhci_pltfm_unregister()

Now that sdhci_pltfm_unregister() is unused, remove it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-16-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pltfm.c | 12 ------------
 drivers/mmc/host/sdhci-pltfm.h |  1 -
 2 files changed, 13 deletions(-)

diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index 5a63c8818987..894f3bbe2b0f 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -187,18 +187,6 @@ int sdhci_pltfm_register(struct platform_device *pdev,
 }
 EXPORT_SYMBOL_GPL(sdhci_pltfm_register);
 
-void sdhci_pltfm_unregister(struct platform_device *pdev)
-{
-	struct sdhci_host *host = platform_get_drvdata(pdev);
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
-
-	sdhci_remove_host(host, dead);
-	clk_disable_unprepare(pltfm_host->clk);
-	sdhci_pltfm_free(pdev);
-}
-EXPORT_SYMBOL_GPL(sdhci_pltfm_unregister);
-
 void sdhci_pltfm_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
diff --git a/drivers/mmc/host/sdhci-pltfm.h b/drivers/mmc/host/sdhci-pltfm.h
index 402f4edc6ca5..bebc450d5098 100644
--- a/drivers/mmc/host/sdhci-pltfm.h
+++ b/drivers/mmc/host/sdhci-pltfm.h
@@ -102,7 +102,6 @@ extern void sdhci_pltfm_free(struct platform_device *pdev);
 extern int sdhci_pltfm_register(struct platform_device *pdev,
 				const struct sdhci_pltfm_data *pdata,
 				size_t priv_size);
-extern void sdhci_pltfm_unregister(struct platform_device *pdev);
 extern void sdhci_pltfm_remove(struct platform_device *pdev);
 
 extern unsigned int sdhci_pltfm_clk_get_max_clock(struct sdhci_host *host);

From 899171dc4e7ce9ac90b27610d79ea511402e60c0 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 11 Aug 2023 16:03:51 +0300
Subject: [PATCH 1187/2047] mmc: sdhci-pltfm: Rename sdhci_pltfm_register()

Now that sdhci_pltfm_unregister() has been removed, rename
sdhci_pltfm_register() to sdhci_pltfm_init_and_add_host() to better
reflect what it does.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230811130351.7038-17-adrian.hunter@intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-hlwd.c | 2 +-
 drivers/mmc/host/sdhci-pltfm.c   | 8 ++++----
 drivers/mmc/host/sdhci-pltfm.h   | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c
index 06e88bb22ea5..9c1c0ce610ef 100644
--- a/drivers/mmc/host/sdhci-of-hlwd.c
+++ b/drivers/mmc/host/sdhci-of-hlwd.c
@@ -68,7 +68,7 @@ static const struct sdhci_pltfm_data sdhci_hlwd_pdata = {
 
 static int sdhci_hlwd_probe(struct platform_device *pdev)
 {
-	return sdhci_pltfm_register(pdev, &sdhci_hlwd_pdata, 0);
+	return sdhci_pltfm_init_and_add_host(pdev, &sdhci_hlwd_pdata, 0);
 }
 
 static const struct of_device_id sdhci_hlwd_of_match[] = {
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index 894f3bbe2b0f..a72e123a585d 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -166,9 +166,9 @@ void sdhci_pltfm_free(struct platform_device *pdev)
 }
 EXPORT_SYMBOL_GPL(sdhci_pltfm_free);
 
-int sdhci_pltfm_register(struct platform_device *pdev,
-			const struct sdhci_pltfm_data *pdata,
-			size_t priv_size)
+int sdhci_pltfm_init_and_add_host(struct platform_device *pdev,
+				  const struct sdhci_pltfm_data *pdata,
+				  size_t priv_size)
 {
 	struct sdhci_host *host;
 	int ret = 0;
@@ -185,7 +185,7 @@ int sdhci_pltfm_register(struct platform_device *pdev,
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(sdhci_pltfm_register);
+EXPORT_SYMBOL_GPL(sdhci_pltfm_init_and_add_host);
 
 void sdhci_pltfm_remove(struct platform_device *pdev)
 {
diff --git a/drivers/mmc/host/sdhci-pltfm.h b/drivers/mmc/host/sdhci-pltfm.h
index bebc450d5098..b81d5b0fd616 100644
--- a/drivers/mmc/host/sdhci-pltfm.h
+++ b/drivers/mmc/host/sdhci-pltfm.h
@@ -99,9 +99,9 @@ extern struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev,
 					  size_t priv_size);
 extern void sdhci_pltfm_free(struct platform_device *pdev);
 
-extern int sdhci_pltfm_register(struct platform_device *pdev,
-				const struct sdhci_pltfm_data *pdata,
-				size_t priv_size);
+extern int sdhci_pltfm_init_and_add_host(struct platform_device *pdev,
+					 const struct sdhci_pltfm_data *pdata,
+					 size_t priv_size);
 extern void sdhci_pltfm_remove(struct platform_device *pdev);
 
 extern unsigned int sdhci_pltfm_clk_get_max_clock(struct sdhci_host *host);

From 0242737dc4eb9f6e9a5ea594b3f93efa0b12f28d Mon Sep 17 00:00:00 2001
From: Yicong Yang <yangyicong@hisilicon.com>
Date: Mon, 14 Aug 2023 20:40:12 +0800
Subject: [PATCH 1188/2047] perf/smmuv3: Enable HiSilicon Erratum 162001900
 quirk for HIP08/09

Some HiSilicon SMMU PMCG suffers the erratum 162001900 that the PMU
disable control sometimes fail to disable the counters. This will lead
to error or inaccurate data since before we enable the counters the
counter's still counting for the event used in last perf session.

This patch tries to fix this by hardening the global disable process.
Before disable the PMU, writing an invalid event type (0xffff) to
focibly stop the counters. Correspondingly restore each events on
pmu::pmu_enable().

Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20230814124012.58013-1-yangyicong@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/arch/arm64/silicon-errata.rst |  3 ++
 drivers/acpi/arm64/iort.c                   |  5 ++-
 drivers/perf/arm_smmuv3_pmu.c               | 46 ++++++++++++++++++++-
 include/linux/acpi_iort.h                   |  1 +
 4 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index 496cdca5cb99..d54626cfcbda 100644
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -195,6 +195,9 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Hisilicon      | Hip08 SMMU PMCG | #162001800      | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon      | Hip08 SMMU PMCG | #162001900      | N/A                         |
+|                | Hip09 SMMU PMCG |                 |                             |
++----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
 | Qualcomm Tech. | Kryo/Falkor v1  | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 +----------------+-----------------+-----------------+-----------------------------+
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 3631230a61c8..2c1640fd2b16 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1711,7 +1711,10 @@ static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res,
 static struct acpi_platform_list pmcg_plat_info[] __initdata = {
 	/* HiSilicon Hip08 Platform */
 	{"HISI  ", "HIP08   ", 0, ACPI_SIG_IORT, greater_than_or_equal,
-	 "Erratum #162001800", IORT_SMMU_V3_PMCG_HISI_HIP08},
+	 "Erratum #162001800, Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP08},
+	/* HiSilicon Hip09 Platform */
+	{"HISI  ", "HIP09   ", 0, ACPI_SIG_IORT, greater_than_or_equal,
+	 "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09},
 	{ }
 };
 
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 25a269d431e4..0e17c57ddb87 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -115,6 +115,7 @@
 #define SMMU_PMCG_PA_SHIFT              12
 
 #define SMMU_PMCG_EVCNTR_RDONLY         BIT(0)
+#define SMMU_PMCG_HARDEN_DISABLE        BIT(1)
 
 static int cpuhp_state_num;
 
@@ -159,6 +160,20 @@ static inline void smmu_pmu_enable(struct pmu *pmu)
 	writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR);
 }
 
+static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu,
+				       struct perf_event *event, int idx);
+
+static inline void smmu_pmu_enable_quirk_hip08_09(struct pmu *pmu)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+	unsigned int idx;
+
+	for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters)
+		smmu_pmu_apply_event_filter(smmu_pmu, smmu_pmu->events[idx], idx);
+
+	smmu_pmu_enable(pmu);
+}
+
 static inline void smmu_pmu_disable(struct pmu *pmu)
 {
 	struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
@@ -167,6 +182,22 @@ static inline void smmu_pmu_disable(struct pmu *pmu)
 	writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL);
 }
 
+static inline void smmu_pmu_disable_quirk_hip08_09(struct pmu *pmu)
+{
+	struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+	unsigned int idx;
+
+	/*
+	 * The global disable of PMU sometimes fail to stop the counting.
+	 * Harden this by writing an invalid event type to each used counter
+	 * to forcibly stop counting.
+	 */
+	for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters)
+		writel(0xffff, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx));
+
+	smmu_pmu_disable(pmu);
+}
+
 static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu,
 					      u32 idx, u64 value)
 {
@@ -765,7 +796,10 @@ static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu)
 	switch (model) {
 	case IORT_SMMU_V3_PMCG_HISI_HIP08:
 		/* HiSilicon Erratum 162001800 */
-		smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY;
+		smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY | SMMU_PMCG_HARDEN_DISABLE;
+		break;
+	case IORT_SMMU_V3_PMCG_HISI_HIP09:
+		smmu_pmu->options |= SMMU_PMCG_HARDEN_DISABLE;
 		break;
 	}
 
@@ -890,6 +924,16 @@ static int smmu_pmu_probe(struct platform_device *pdev)
 	if (!dev->of_node)
 		smmu_pmu_get_acpi_options(smmu_pmu);
 
+	/*
+	 * For platforms suffer this quirk, the PMU disable sometimes fails to
+	 * stop the counters. This will leads to inaccurate or error counting.
+	 * Forcibly disable the counters with these quirk handler.
+	 */
+	if (smmu_pmu->options & SMMU_PMCG_HARDEN_DISABLE) {
+		smmu_pmu->pmu.pmu_enable = smmu_pmu_enable_quirk_hip08_09;
+		smmu_pmu->pmu.pmu_disable = smmu_pmu_disable_quirk_hip08_09;
+	}
+
 	/* Pick one CPU to be the preferred one to use */
 	smmu_pmu->on_cpu = raw_smp_processor_id();
 	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index ee7cb6aaff71..1cb65592c95d 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -21,6 +21,7 @@
  */
 #define IORT_SMMU_V3_PMCG_GENERIC        0x00000000 /* Generic SMMUv3 PMCG */
 #define IORT_SMMU_V3_PMCG_HISI_HIP08     0x00000001 /* HiSilicon HIP08 PMCG */
+#define IORT_SMMU_V3_PMCG_HISI_HIP09     0x00000002 /* HiSilicon HIP09 PMCG */
 
 int iort_register_domain_token(int trans_id, phys_addr_t base,
 			       struct fwnode_handle *fw_node);

From 1b0e3ea9301a422003d385cda8f8dee6c878ad05 Mon Sep 17 00:00:00 2001
From: Yicong Yang <yangyicong@hisilicon.com>
Date: Mon, 14 Aug 2023 21:16:42 +0800
Subject: [PATCH 1189/2047] perf/smmuv3: Add MODULE_ALIAS for module auto
 loading

On my ACPI based arm64 server, if the SMMUv3 PMU is configured as
module it won't be loaded automatically after booting even if the
device has already been scanned and added. It's because the module
lacks a platform alias, the uevent mechanism and userspace tools
like udevd make use of this to find the target driver module of the
device. This patch adds the missing platform alias of the module,
then module will be loaded automatically if device exists.

Before this patch:
[root@localhost tmp]# modinfo arm_smmuv3_pmu | grep alias
alias:          of:N*T*Carm,smmu-v3-pmcgC*
alias:          of:N*T*Carm,smmu-v3-pmcg

After this patch:
[root@localhost tmp]# modinfo arm_smmuv3_pmu | grep alias
alias:          platform:arm-smmu-v3-pmcg
alias:          of:N*T*Carm,smmu-v3-pmcgC*
alias:          of:N*T*Carm,smmu-v3-pmcg

Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20230814131642.65263-1-yangyicong@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_smmuv3_pmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 0e17c57ddb87..6303b82566f9 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -1028,6 +1028,7 @@ static void __exit arm_smmu_pmu_exit(void)
 
 module_exit(arm_smmu_pmu_exit);
 
+MODULE_ALIAS("platform:arm-smmu-v3-pmcg");
 MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension");
 MODULE_AUTHOR("Neil Leeder <nleeder@codeaurora.org>");
 MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>");

From 0fc7769e54e747c8fd1b4899af2ac43cb68daa1c Mon Sep 17 00:00:00 2001
From: Kevin-Lu <kevin-lu@ti.com>
Date: Tue, 15 Aug 2023 17:56:31 +0800
Subject: [PATCH 1190/2047] MAINTAINERS: Add entries for TEXAS INSTRUMENTS ASoC
 DRIVERS

Add the MAINTAINERS entries for TEXAS INSTRUMENTS ASoC DRIVERS.

Signed-off-by: Kevin-Lu <kevin-lu@ti.com>
Link: https://lore.kernel.org/r/20230815095631.1655-1-kevin-lu@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 MAINTAINERS | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 53b7ca804465..88174736be51 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21063,6 +21063,39 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml
 F:	sound/soc/ti/
 
+TEXAS INSTRUMENTS AUDIO (ASoC/HDA) DRIVERS
+M:	Shenghao Ding <shenghao-ding@ti.com>
+M:	Kevin Lu <kevin-lu@ti.com>
+M:	Baojun Xu <x1077012@ti.com>
+L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
+S:	Maintained
+F:	Documentation/devicetree/bindings/sound/tas2552.txt
+F:	Documentation/devicetree/bindings/sound/tas2562.yaml
+F:	Documentation/devicetree/bindings/sound/tas2770.yaml
+F:	Documentation/devicetree/bindings/sound/tas27xx.yaml
+F:	Documentation/devicetree/bindings/sound/ti,pcm1681.txt
+F:	Documentation/devicetree/bindings/sound/ti,pcm3168a.yaml
+F:	Documentation/devicetree/bindings/sound/ti,tlv320*.yaml
+F:	Documentation/devicetree/bindings/sound/tlv320adcx140.yaml
+F:	Documentation/devicetree/bindings/sound/tlv320aic31xx.txt
+F:	Documentation/devicetree/bindings/sound/tpa6130a2.txt
+F:	include/sound/tas2*.h
+F:	include/sound/tlv320*.h
+F:	include/sound/tpa6130a2-plat.h
+F:	sound/pci/hda/tas2781_hda_i2c.c
+F:	sound/soc/codecs/pcm1681.c
+F:	sound/soc/codecs/pcm1789*.*
+F:	sound/soc/codecs/pcm179x*.*
+F:	sound/soc/codecs/pcm186x*.*
+F:	sound/soc/codecs/pcm3008.*
+F:	sound/soc/codecs/pcm3060*.*
+F:	sound/soc/codecs/pcm3168a*.*
+F:	sound/soc/codecs/pcm5102a.c
+F:	sound/soc/codecs/pcm512x*.*
+F:	sound/soc/codecs/tas2*.*
+F:	sound/soc/codecs/tlv320*.*
+F:	sound/soc/codecs/tpa6130a2.*
+
 TEXAS INSTRUMENTS DMA DRIVERS
 M:	Peter Ujfalusi <peter.ujfalusi@gmail.com>
 L:	dmaengine@vger.kernel.org

From ba0294df2dbdc006ffbf037da28bba64e9f6d709 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Sat, 12 Aug 2023 20:57:48 +0200
Subject: [PATCH 1191/2047] gpio: sim: replace memmove() + strstrip() with
 skip_spaces() + strim()

Turns out we can avoid the memmove() by using skip_spaces() and strim().
We did that in gpio-consumer, let's do it in gpio-sim.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-sim.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index 1fbbb49985f6..b7161c83c104 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -642,16 +642,13 @@ static bool gpio_sim_device_is_live_unlocked(struct gpio_sim_device *dev)
 
 static char *gpio_sim_strdup_trimmed(const char *str, size_t count)
 {
-	char *dup, *trimmed;
+	char *trimmed;
 
-	dup = kstrndup(str, count, GFP_KERNEL);
-	if (!dup)
+	trimmed = kstrndup(skip_spaces(str), count, GFP_KERNEL);
+	if (!trimmed)
 		return NULL;
 
-	trimmed = strstrip(dup);
-	memmove(dup, trimmed, strlen(trimmed) + 1);
-
-	return dup;
+	return strim(trimmed);
 }
 
 static ssize_t gpio_sim_device_config_dev_name_show(struct config_item *item,

From 373c36bf7914e3198ac2654dede499f340c52950 Mon Sep 17 00:00:00 2001
From: Alexander Danilenko <al.b.danilenko@gmail.com>
Date: Tue, 15 Aug 2023 12:20:58 +0300
Subject: [PATCH 1192/2047] spi: tegra114: Remove unnecessary NULL-pointer
 checks

cs_setup, cs_hold and cs_inactive points to fields of spi_device struct,
so there is no sense in checking them for NULL.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Fixes: 04e6bb0d6bb1 ("spi: modify set_cs_timing parameter")
Signed-off-by: Alexander Danilenko <al.b.danilenko@gmail.com>
Link: https://lore.kernel.org/r/20230815092058.4083-1-al.b.danilenko@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra114.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
index ca4bdd45f56e..460f232dad50 100644
--- a/drivers/spi/spi-tegra114.c
+++ b/drivers/spi/spi-tegra114.c
@@ -722,27 +722,23 @@ static int tegra_spi_set_hw_cs_timing(struct spi_device *spi)
 	struct spi_delay *setup = &spi->cs_setup;
 	struct spi_delay *hold = &spi->cs_hold;
 	struct spi_delay *inactive = &spi->cs_inactive;
-	u8 setup_dly, hold_dly, inactive_dly;
+	u8 setup_dly, hold_dly;
 	u32 setup_hold;
 	u32 spi_cs_timing;
 	u32 inactive_cycles;
 	u8 cs_state;
 
-	if ((setup && setup->unit != SPI_DELAY_UNIT_SCK) ||
-	    (hold && hold->unit != SPI_DELAY_UNIT_SCK) ||
-	    (inactive && inactive->unit != SPI_DELAY_UNIT_SCK)) {
+	if (setup->unit != SPI_DELAY_UNIT_SCK ||
+	    hold->unit != SPI_DELAY_UNIT_SCK ||
+	    inactive->unit != SPI_DELAY_UNIT_SCK) {
 		dev_err(&spi->dev,
 			"Invalid delay unit %d, should be SPI_DELAY_UNIT_SCK\n",
 			SPI_DELAY_UNIT_SCK);
 		return -EINVAL;
 	}
 
-	setup_dly = setup ? setup->value : 0;
-	hold_dly = hold ? hold->value : 0;
-	inactive_dly = inactive ? inactive->value : 0;
-
-	setup_dly = min_t(u8, setup_dly, MAX_SETUP_HOLD_CYCLES);
-	hold_dly = min_t(u8, hold_dly, MAX_SETUP_HOLD_CYCLES);
+	setup_dly = min_t(u8, setup->value, MAX_SETUP_HOLD_CYCLES);
+	hold_dly = min_t(u8, hold->value, MAX_SETUP_HOLD_CYCLES);
 	if (setup_dly && hold_dly) {
 		setup_hold = SPI_SETUP_HOLD(setup_dly - 1, hold_dly - 1);
 		spi_cs_timing = SPI_CS_SETUP_HOLD(tspi->spi_cs_timing1,
@@ -754,7 +750,7 @@ static int tegra_spi_set_hw_cs_timing(struct spi_device *spi)
 		}
 	}
 
-	inactive_cycles = min_t(u8, inactive_dly, MAX_INACTIVE_CYCLES);
+	inactive_cycles = min_t(u8, inactive->value, MAX_INACTIVE_CYCLES);
 	if (inactive_cycles)
 		inactive_cycles--;
 	cs_state = inactive_cycles ? 0 : 1;

From 7a0fd5e1678505534573b3c14c6ff69ed8592596 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 11 Aug 2023 17:18:38 +0200
Subject: [PATCH 1193/2047] compiler_types: Introduce the Clang __preserve_most
 function attribute

[1]: "On X86-64 and AArch64 targets, this attribute changes the calling
convention of a function. The preserve_most calling convention attempts
to make the code in the caller as unintrusive as possible. This
convention behaves identically to the C calling convention on how
arguments and return values are passed, but it uses a different set of
caller/callee-saved registers. This alleviates the burden of saving and
recovering a large register set before and after the call in the caller.
If the arguments are passed in callee-saved registers, then they will be
preserved by the callee across the call. This doesn't apply for values
returned in callee-saved registers.

 * On X86-64 the callee preserves all general purpose registers, except
   for R11. R11 can be used as a scratch register. Floating-point
   registers (XMMs/YMMs) are not preserved and need to be saved by the
   caller.

 * On AArch64 the callee preserve all general purpose registers, except
   x0-X8 and X16-X18."

[1] https://clang.llvm.org/docs/AttributeReference.html#preserve-most

Introduce the attribute to compiler_types.h as __preserve_most.

Use of this attribute results in better code generation for calls to
very rarely called functions, such as error-reporting functions, or
rarely executed slow paths.

Beware that the attribute conflicts with instrumentation calls inserted
on function entry which do not use __preserve_most themselves. Notably,
function tracing which assumes the normal C calling convention for the
given architecture.  Where the attribute is supported, __preserve_most
will imply notrace. It is recommended to restrict use of the attribute
to functions that should or already disable tracing.

Note: The additional preprocessor check against architecture should not
be necessary if __has_attribute() only returns true where supported;
also see https://github.com/ClangBuiltLinux/linux/issues/1908. But until
__has_attribute() does the right thing, we also guard by known-supported
architectures to avoid build warnings on other architectures.

The attribute may be supported by a future GCC version (see
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110899).

Signed-off-by: Marco Elver <elver@google.com>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230811151847.1594958-1-elver@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/compiler_types.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 547ea1ff806e..c523c6683789 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -106,6 +106,34 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
 #define __cold
 #endif
 
+/*
+ * On x86-64 and arm64 targets, __preserve_most changes the calling convention
+ * of a function to make the code in the caller as unintrusive as possible. This
+ * convention behaves identically to the C calling convention on how arguments
+ * and return values are passed, but uses a different set of caller- and callee-
+ * saved registers.
+ *
+ * The purpose is to alleviates the burden of saving and recovering a large
+ * register set before and after the call in the caller.  This is beneficial for
+ * rarely taken slow paths, such as error-reporting functions that may be called
+ * from hot paths.
+ *
+ * Note: This may conflict with instrumentation inserted on function entry which
+ * does not use __preserve_most or equivalent convention (if in assembly). Since
+ * function tracing assumes the normal C calling convention, where the attribute
+ * is supported, __preserve_most implies notrace.  It is recommended to restrict
+ * use of the attribute to functions that should or already disable tracing.
+ *
+ * Optional: not supported by gcc.
+ *
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#preserve-most
+ */
+#if __has_attribute(__preserve_most__) && (defined(CONFIG_X86_64) || defined(CONFIG_ARM64))
+# define __preserve_most notrace __attribute__((__preserve_most__))
+#else
+# define __preserve_most
+#endif
+
 /* Builtins */
 
 /*

From b16c42c8fde808b4f047d94f1f2aeda93487670d Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 11 Aug 2023 17:18:39 +0200
Subject: [PATCH 1194/2047] list_debug: Introduce inline wrappers for debug
 checks

Turn the list debug checking functions __list_*_valid() into inline
functions that wrap the out-of-line functions. Care is taken to ensure
the inline wrappers are always inlined, so that additional compiler
instrumentation (such as sanitizers) does not result in redundant
outlining.

This change is preparation for performing checks in the inline wrappers.

No functional change intended.

Signed-off-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20230811151847.1594958-2-elver@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/arm64/kvm/hyp/nvhe/list_debug.c |  6 ++---
 include/linux/list.h                 | 37 +++++++++++++++++++++++++---
 lib/list_debug.c                     | 11 ++++-----
 3 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c
index d68abd7ea124..16266a939a4c 100644
--- a/arch/arm64/kvm/hyp/nvhe/list_debug.c
+++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c
@@ -26,8 +26,8 @@ static inline __must_check bool nvhe_check_data_corruption(bool v)
 
 /* The predicates checked here are taken from lib/list_debug.c. */
 
-bool __list_add_valid(struct list_head *new, struct list_head *prev,
-		      struct list_head *next)
+bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
+				struct list_head *next)
 {
 	if (NVHE_CHECK_DATA_CORRUPTION(next->prev != prev) ||
 	    NVHE_CHECK_DATA_CORRUPTION(prev->next != next) ||
@@ -37,7 +37,7 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev,
 	return true;
 }
 
-bool __list_del_entry_valid(struct list_head *entry)
+bool __list_del_entry_valid_or_report(struct list_head *entry)
 {
 	struct list_head *prev, *next;
 
diff --git a/include/linux/list.h b/include/linux/list.h
index f10344dbad4d..130c6a1bb45c 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -39,10 +39,39 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
 }
 
 #ifdef CONFIG_DEBUG_LIST
-extern bool __list_add_valid(struct list_head *new,
-			      struct list_head *prev,
-			      struct list_head *next);
-extern bool __list_del_entry_valid(struct list_head *entry);
+/*
+ * Performs the full set of list corruption checks before __list_add().
+ * On list corruption reports a warning, and returns false.
+ */
+extern bool __list_add_valid_or_report(struct list_head *new,
+				       struct list_head *prev,
+				       struct list_head *next);
+
+/*
+ * Performs list corruption checks before __list_add(). Returns false if a
+ * corruption is detected, true otherwise.
+ */
+static __always_inline bool __list_add_valid(struct list_head *new,
+					     struct list_head *prev,
+					     struct list_head *next)
+{
+	return __list_add_valid_or_report(new, prev, next);
+}
+
+/*
+ * Performs the full set of list corruption checks before __list_del_entry().
+ * On list corruption reports a warning, and returns false.
+ */
+extern bool __list_del_entry_valid_or_report(struct list_head *entry);
+
+/*
+ * Performs list corruption checks before __list_del_entry(). Returns false if a
+ * corruption is detected, true otherwise.
+ */
+static __always_inline bool __list_del_entry_valid(struct list_head *entry)
+{
+	return __list_del_entry_valid_or_report(entry);
+}
 #else
 static inline bool __list_add_valid(struct list_head *new,
 				struct list_head *prev,
diff --git a/lib/list_debug.c b/lib/list_debug.c
index d98d43f80958..2def33b1491f 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -17,8 +17,8 @@
  * attempt).
  */
 
-bool __list_add_valid(struct list_head *new, struct list_head *prev,
-		      struct list_head *next)
+bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
+				struct list_head *next)
 {
 	if (CHECK_DATA_CORRUPTION(prev == NULL,
 			"list_add corruption. prev is NULL.\n") ||
@@ -37,9 +37,9 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev,
 
 	return true;
 }
-EXPORT_SYMBOL(__list_add_valid);
+EXPORT_SYMBOL(__list_add_valid_or_report);
 
-bool __list_del_entry_valid(struct list_head *entry)
+bool __list_del_entry_valid_or_report(struct list_head *entry)
 {
 	struct list_head *prev, *next;
 
@@ -65,6 +65,5 @@ bool __list_del_entry_valid(struct list_head *entry)
 		return false;
 
 	return true;
-
 }
-EXPORT_SYMBOL(__list_del_entry_valid);
+EXPORT_SYMBOL(__list_del_entry_valid_or_report);

From aebc7b0d8d91bbc69e976909963046bc48bca4fd Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 11 Aug 2023 17:18:40 +0200
Subject: [PATCH 1195/2047] list: Introduce CONFIG_LIST_HARDENED

Numerous production kernel configs (see [1, 2]) are choosing to enable
CONFIG_DEBUG_LIST, which is also being recommended by KSPP for hardened
configs [3]. The motivation behind this is that the option can be used
as a security hardening feature (e.g. CVE-2019-2215 and CVE-2019-2025
are mitigated by the option [4]).

The feature has never been designed with performance in mind, yet common
list manipulation is happening across hot paths all over the kernel.

Introduce CONFIG_LIST_HARDENED, which performs list pointer checking
inline, and only upon list corruption calls the reporting slow path.

To generate optimal machine code with CONFIG_LIST_HARDENED:

  1. Elide checking for pointer values which upon dereference would
     result in an immediate access fault (i.e. minimal hardening
     checks).  The trade-off is lower-quality error reports.

  2. Use the __preserve_most function attribute (available with Clang,
     but not yet with GCC) to minimize the code footprint for calling
     the reporting slow path. As a result, function size of callers is
     reduced by avoiding saving registers before calling the rarely
     called reporting slow path.

     Note that all TUs in lib/Makefile already disable function tracing,
     including list_debug.c, and __preserve_most's implied notrace has
     no effect in this case.

  3. Because the inline checks are a subset of the full set of checks in
     __list_*_valid_or_report(), always return false if the inline
     checks failed.  This avoids redundant compare and conditional
     branch right after return from the slow path.

As a side-effect of the checks being inline, if the compiler can prove
some condition to always be true, it can completely elide some checks.

Since DEBUG_LIST is functionally a superset of LIST_HARDENED, the
Kconfig variables are changed to reflect that: DEBUG_LIST selects
LIST_HARDENED, whereas LIST_HARDENED itself has no dependency on
DEBUG_LIST.

Running netperf with CONFIG_LIST_HARDENED (using a Clang compiler with
"preserve_most") shows throughput improvements, in my case of ~7% on
average (up to 20-30% on some test cases).

Link: https://r.android.com/1266735 [1]
Link: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/blob/main/config [2]
Link: https://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project/Recommended_Settings [3]
Link: https://googleprojectzero.blogspot.com/2019/11/bad-binder-android-in-wild-exploit.html [4]
Signed-off-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20230811151847.1594958-3-elver@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/arm64/kvm/hyp/nvhe/Makefile     |  2 +-
 arch/arm64/kvm/hyp/nvhe/list_debug.c |  2 +
 drivers/misc/lkdtm/bugs.c            |  4 +-
 include/linux/list.h                 | 64 +++++++++++++++++++++++++---
 lib/Kconfig.debug                    |  9 +++-
 lib/Makefile                         |  2 +-
 lib/list_debug.c                     |  5 ++-
 security/Kconfig.hardening           | 13 ++++++
 8 files changed, 88 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 9ddc025e4b86..2250253a6429 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -25,7 +25,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
 	 cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
 hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
 	 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
-hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
+hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
 hyp-obj-y += $(lib-objs)
 
 ##
diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c
index 16266a939a4c..46a2d4f2b3c6 100644
--- a/arch/arm64/kvm/hyp/nvhe/list_debug.c
+++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c
@@ -26,6 +26,7 @@ static inline __must_check bool nvhe_check_data_corruption(bool v)
 
 /* The predicates checked here are taken from lib/list_debug.c. */
 
+__list_valid_slowpath
 bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
 				struct list_head *next)
 {
@@ -37,6 +38,7 @@ bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
 	return true;
 }
 
+__list_valid_slowpath
 bool __list_del_entry_valid_or_report(struct list_head *entry)
 {
 	struct list_head *prev, *next;
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index 3c95600ab2f7..963b4dee6a7d 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -393,7 +393,7 @@ static void lkdtm_CORRUPT_LIST_ADD(void)
 		pr_err("Overwrite did not happen, but no BUG?!\n");
 	else {
 		pr_err("list_add() corruption not detected!\n");
-		pr_expected_config(CONFIG_DEBUG_LIST);
+		pr_expected_config(CONFIG_LIST_HARDENED);
 	}
 }
 
@@ -420,7 +420,7 @@ static void lkdtm_CORRUPT_LIST_DEL(void)
 		pr_err("Overwrite did not happen, but no BUG?!\n");
 	else {
 		pr_err("list_del() corruption not detected!\n");
-		pr_expected_config(CONFIG_DEBUG_LIST);
+		pr_expected_config(CONFIG_LIST_HARDENED);
 	}
 }
 
diff --git a/include/linux/list.h b/include/linux/list.h
index 130c6a1bb45c..164b4d0e9d2a 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -38,39 +38,91 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
 	WRITE_ONCE(list->prev, list);
 }
 
+#ifdef CONFIG_LIST_HARDENED
+
 #ifdef CONFIG_DEBUG_LIST
+# define __list_valid_slowpath
+#else
+# define __list_valid_slowpath __cold __preserve_most
+#endif
+
 /*
  * Performs the full set of list corruption checks before __list_add().
  * On list corruption reports a warning, and returns false.
  */
-extern bool __list_add_valid_or_report(struct list_head *new,
-				       struct list_head *prev,
-				       struct list_head *next);
+extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new,
+							     struct list_head *prev,
+							     struct list_head *next);
 
 /*
  * Performs list corruption checks before __list_add(). Returns false if a
  * corruption is detected, true otherwise.
+ *
+ * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking
+ * inline to catch non-faulting corruptions, and only if a corruption is
+ * detected calls the reporting function __list_add_valid_or_report().
  */
 static __always_inline bool __list_add_valid(struct list_head *new,
 					     struct list_head *prev,
 					     struct list_head *next)
 {
-	return __list_add_valid_or_report(new, prev, next);
+	bool ret = true;
+
+	if (!IS_ENABLED(CONFIG_DEBUG_LIST)) {
+		/*
+		 * With the hardening version, elide checking if next and prev
+		 * are NULL, since the immediate dereference of them below would
+		 * result in a fault if NULL.
+		 *
+		 * With the reduced set of checks, we can afford to inline the
+		 * checks, which also gives the compiler a chance to elide some
+		 * of them completely if they can be proven at compile-time. If
+		 * one of the pre-conditions does not hold, the slow-path will
+		 * show a report which pre-condition failed.
+		 */
+		if (likely(next->prev == prev && prev->next == next && new != prev && new != next))
+			return true;
+		ret = false;
+	}
+
+	ret &= __list_add_valid_or_report(new, prev, next);
+	return ret;
 }
 
 /*
  * Performs the full set of list corruption checks before __list_del_entry().
  * On list corruption reports a warning, and returns false.
  */
-extern bool __list_del_entry_valid_or_report(struct list_head *entry);
+extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry);
 
 /*
  * Performs list corruption checks before __list_del_entry(). Returns false if a
  * corruption is detected, true otherwise.
+ *
+ * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking
+ * inline to catch non-faulting corruptions, and only if a corruption is
+ * detected calls the reporting function __list_del_entry_valid_or_report().
  */
 static __always_inline bool __list_del_entry_valid(struct list_head *entry)
 {
-	return __list_del_entry_valid_or_report(entry);
+	bool ret = true;
+
+	if (!IS_ENABLED(CONFIG_DEBUG_LIST)) {
+		struct list_head *prev = entry->prev;
+		struct list_head *next = entry->next;
+
+		/*
+		 * With the hardening version, elide checking if next and prev
+		 * are NULL, LIST_POISON1 or LIST_POISON2, since the immediate
+		 * dereference of them below would result in a fault.
+		 */
+		if (likely(prev->next == entry && next->prev == entry))
+			return true;
+		ret = false;
+	}
+
+	ret &= __list_del_entry_valid_or_report(entry);
+	return ret;
 }
 #else
 static inline bool __list_add_valid(struct list_head *new,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index fbc89baf7de6..c38745ad46eb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1674,9 +1674,14 @@ menu "Debug kernel data structures"
 config DEBUG_LIST
 	bool "Debug linked list manipulation"
 	depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION
+	select LIST_HARDENED
 	help
-	  Enable this to turn on extended checks in the linked-list
-	  walking routines.
+	  Enable this to turn on extended checks in the linked-list walking
+	  routines.
+
+	  This option trades better quality error reports for performance, and
+	  is more suitable for kernel debugging. If you care about performance,
+	  you should only enable CONFIG_LIST_HARDENED instead.
 
 	  If unsure, say N.
 
diff --git a/lib/Makefile b/lib/Makefile
index 42d307ade225..110936c9a68b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -161,7 +161,7 @@ obj-$(CONFIG_BTREE) += btree.o
 obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o
 obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
+obj-$(CONFIG_LIST_HARDENED) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
 obj-$(CONFIG_BITREVERSE) += bitrev.o
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 2def33b1491f..db602417febf 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -2,7 +2,8 @@
  * Copyright 2006, Red Hat, Inc., Dave Jones
  * Released under the General Public License (GPL).
  *
- * This file contains the linked list validation for DEBUG_LIST.
+ * This file contains the linked list validation and error reporting for
+ * LIST_HARDENED and DEBUG_LIST.
  */
 
 #include <linux/export.h>
@@ -17,6 +18,7 @@
  * attempt).
  */
 
+__list_valid_slowpath
 bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
 				struct list_head *next)
 {
@@ -39,6 +41,7 @@ bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
 }
 EXPORT_SYMBOL(__list_add_valid_or_report);
 
+__list_valid_slowpath
 bool __list_del_entry_valid_or_report(struct list_head *entry)
 {
 	struct list_head *prev, *next;
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index 0f295961e773..ffc3c702b461 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -279,6 +279,19 @@ config ZERO_CALL_USED_REGS
 
 endmenu
 
+menu "Hardening of kernel data structures"
+
+config LIST_HARDENED
+	bool "Check integrity of linked list manipulation"
+	help
+	  Minimal integrity checking in the linked-list manipulation routines
+	  to catch memory corruptions that are not guaranteed to result in an
+	  immediate access fault.
+
+	  If unsure, say N.
+
+endmenu
+
 config CC_HAS_RANDSTRUCT
 	def_bool $(cc-option,-frandomize-layout-seed-file=/dev/null)
 	# Randstruct was first added in Clang 15, but it isn't safe to use until

From aa9f10d57056cea51d41283d3785bccbbb9f459e Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 11 Aug 2023 17:18:41 +0200
Subject: [PATCH 1196/2047] hardening: Move BUG_ON_DATA_CORRUPTION to hardening
 options

BUG_ON_DATA_CORRUPTION is turning detected corruptions of list data
structures from WARNings into BUGs. This can be useful to stop further
corruptions or even exploitation attempts.

However, the option has less to do with debugging than with hardening.
With the introduction of LIST_HARDENED, it makes more sense to move it
to the hardening options, where it selects LIST_HARDENED instead.

Without this change, combining BUG_ON_DATA_CORRUPTION with LIST_HARDENED
alone wouldn't be possible, because DEBUG_LIST would always be selected
by BUG_ON_DATA_CORRUPTION.

Signed-off-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20230811151847.1594958-4-elver@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 lib/Kconfig.debug          | 12 +-----------
 security/Kconfig.hardening | 10 ++++++++++
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c38745ad46eb..c7348d1fabe5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1673,7 +1673,7 @@ menu "Debug kernel data structures"
 
 config DEBUG_LIST
 	bool "Debug linked list manipulation"
-	depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION
+	depends on DEBUG_KERNEL
 	select LIST_HARDENED
 	help
 	  Enable this to turn on extended checks in the linked-list walking
@@ -1715,16 +1715,6 @@ config DEBUG_NOTIFIERS
 	  This is a relatively cheap check but if you care about maximum
 	  performance, say N.
 
-config BUG_ON_DATA_CORRUPTION
-	bool "Trigger a BUG when data corruption is detected"
-	select DEBUG_LIST
-	help
-	  Select this option if the kernel should BUG when it encounters
-	  data corruption in kernel memory structures when they get checked
-	  for validity.
-
-	  If unsure, say N.
-
 config DEBUG_MAPLE_TREE
 	bool "Debug maple trees"
 	depends on DEBUG_KERNEL
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index ffc3c702b461..2cff851ebfd7 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -290,6 +290,16 @@ config LIST_HARDENED
 
 	  If unsure, say N.
 
+config BUG_ON_DATA_CORRUPTION
+	bool "Trigger a BUG when data corruption is detected"
+	select LIST_HARDENED
+	help
+	  Select this option if the kernel should BUG when it encounters
+	  data corruption in kernel memory structures when they get checked
+	  for validity.
+
+	  If unsure, say N.
+
 endmenu
 
 config CC_HAS_RANDSTRUCT

From 967afdf808cf66908a55c55b8ec5937cc20676ce Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Wed, 2 Aug 2023 07:25:56 -0600
Subject: [PATCH 1197/2047] alpha: Replace one-element array with
 flexible-array member

One-element and zero-length arrays are deprecated. So, replace
one-element array in struct osf_dirent with flexible-array
member.

This results in no differences in binary output.

Signed-off-by: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/ZMpZZBShlLqyD3ax@work
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/alpha/kernel/osf_sys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index d98701ee36c6..5db88b627439 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -97,7 +97,7 @@ struct osf_dirent {
 	unsigned int d_ino;
 	unsigned short d_reclen;
 	unsigned short d_namlen;
-	char d_name[1];
+	char d_name[];
 };
 
 struct osf_dirent_callback {

From b93ec212bcaccf61313ddbf2527319a7585805ee Mon Sep 17 00:00:00 2001
From: "GONG, Ruiqi" <gongruiqi1@huawei.com>
Date: Wed, 16 Aug 2023 11:32:10 +0800
Subject: [PATCH 1198/2047] doc: idmappings: fix an error and rephrase a
 paragraph

If the modified paragraph is referring to the idmapping mentioned in the
previous paragraph (i.e. `u0:k10000:r10000`), then it is `u0` that the
upper idmapset starts with, not `u1000`.

Fix this error and rephrase this paragraph a bit to make this reference
more explicit.

Reported-by: Wang Lei <wanglei249@huawei.com>
Signed-off-by: GONG, Ruiqi <gongruiqi1@huawei.com>
Message-Id: <20230816033210.914262-1-gongruiqi@huaweicloud.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 Documentation/filesystems/idmappings.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst
index f3d168c9f0b9..d095c5838f94 100644
--- a/Documentation/filesystems/idmappings.rst
+++ b/Documentation/filesystems/idmappings.rst
@@ -146,9 +146,10 @@ For the rest of this document we will prefix all userspace ids with ``u`` and
 all kernel ids with ``k``. Ranges of idmappings will be prefixed with ``r``. So
 an idmapping will be written as ``u0:k10000:r10000``.
 
-For example, the id ``u1000`` is an id in the upper idmapset or "userspace
-idmapset" starting with ``u1000``. And it is mapped to ``k11000`` which is a
-kernel id in the lower idmapset or "kernel idmapset" starting with ``k10000``.
+For example, within this idmapping, the id ``u1000`` is an id in the upper
+idmapset or "userspace idmapset" starting with ``u0``. And it is mapped to
+``k11000`` which is a kernel id in the lower idmapset or "kernel idmapset"
+starting with ``k10000``.
 
 A kernel id is always created by an idmapping. Such idmappings are associated
 with user namespaces. Since we mainly care about how idmappings work we're not

From e221804dad4e6fe3a0cf192ba3c42cd2f328bdac Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@amd.com>
Date: Wed, 16 Aug 2023 12:21:22 +1000
Subject: [PATCH 1199/2047] x86/sev: Do not handle #VC for DR7 read/write

With MSR_AMD64_SEV_DEBUG_SWAP enabled, the guest is not expected to
receive a #VC for reads or writes of DR7.

Update the SNP_FEATURES_PRESENT mask with MSR_AMD64_SNP_DEBUG_SWAP so
an SNP guest doesn't gracefully terminate during SNP feature negotiation
if MSR_AMD64_SEV_DEBUG_SWAP is enabled.

Since a guest is not expected to receive a #VC on DR7 accesses when
MSR_AMD64_SEV_DEBUG_SWAP is enabled, return an error from the #VC
handler in this situation.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Carlos Bilbao <carlos.bilbao@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Link: https://lore.kernel.org/r/20230816022122.981998-1-aik@amd.com
---
 arch/x86/boot/compressed/sev.c | 2 +-
 arch/x86/kernel/sev.c          | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index c3e343bd4760..e83e71089cf1 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -365,7 +365,7 @@ static void enforce_vmpl0(void)
  * by the guest kernel. As and when a new feature is implemented in the
  * guest kernel, a corresponding bit should be added to the mask.
  */
-#define SNP_FEATURES_PRESENT (0)
+#define SNP_FEATURES_PRESENT	MSR_AMD64_SNP_DEBUG_SWAP
 
 void snp_check_features(void)
 {
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 1ee7bed453de..d380c9399480 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -1575,6 +1575,9 @@ static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
 	long val, *reg = vc_insn_get_rm(ctxt);
 	enum es_result ret;
 
+	if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
+		return ES_VMM_ERROR;
+
 	if (!reg)
 		return ES_DECODE_FAILED;
 
@@ -1612,6 +1615,9 @@ static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
 	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
 	long *reg = vc_insn_get_rm(ctxt);
 
+	if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
+		return ES_VMM_ERROR;
+
 	if (!reg)
 		return ES_DECODE_FAILED;
 

From ecd02b6dee29b3d3a180cfda39d989752d0e913a Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 16 Aug 2023 17:39:38 +0800
Subject: [PATCH 1200/2047] spi: dw-mmio: keep old name same as documentation

The documentation has not use the new name(host/target),
so keep the comment words same as documentation used.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230816093938.1274806-1-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-dw-mmio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
index 9e041a28ed47..805264c9c65c 100644
--- a/drivers/spi/spi-dw-mmio.c
+++ b/drivers/spi/spi-dw-mmio.c
@@ -68,7 +68,7 @@ struct dw_spi_mscc {
 		((((val) << 1) | BIT(0)) << ELBA_SPICS_OFFSET(cs))
 
 /*
- * The Designware SPI controller (referred to as host in the documentation)
+ * The Designware SPI controller (referred to as master in the documentation)
  * automatically deasserts chip select when the tx fifo is empty. The chip
  * selects then needs to be either driven as GPIOs or, for the first 4 using
  * the SPI boot controller registers. the final chip select is an OR gate
@@ -142,7 +142,7 @@ static int dw_spi_mscc_jaguar2_init(struct platform_device *pdev,
 }
 
 /*
- * The Designware SPI controller (referred to as host in the
+ * The Designware SPI controller (referred to as master in the
  * documentation) automatically deasserts chip select when the tx fifo
  * is empty. The chip selects then needs to be driven by a CS override
  * register. enable is an active low signal.

From 46f53bde6e69edf8a2e0943babb3f160b30ee436 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 16 Aug 2023 17:40:03 +0800
Subject: [PATCH 1201/2047] spi: rpc-if: switch to use devm_spi_alloc_host()

Switch to use modern name function devm_spi_alloc_host().

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230816094013.1275068-15-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rpc-if.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-rpc-if.c b/drivers/spi/spi-rpc-if.c
index 2f78124a1b59..e11146932828 100644
--- a/drivers/spi/spi-rpc-if.c
+++ b/drivers/spi/spi-rpc-if.c
@@ -134,7 +134,7 @@ static int rpcif_spi_probe(struct platform_device *pdev)
 	struct rpcif *rpc;
 	int error;
 
-	ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*rpc));
+	ctlr = devm_spi_alloc_host(&pdev->dev, sizeof(*rpc));
 	if (!ctlr)
 		return -ENOMEM;
 

From 8ddccc8a7d06f7ea4d8579970c95609d1b1de77b Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Thu, 6 Jul 2023 12:28:17 +0200
Subject: [PATCH 1202/2047] s390/boot: cleanup number of page table levels
 setup

The separate vmalloc area size check against _REGION2_SIZE
is needed in case user provided insanely large value using
vmalloc= kernel command line parameter. That could lead to
overflow and selecting 3 page table levels instead of 4.

Use size_add() for the overflow check and get rid of the
extra vmalloc area check.

With the current values of CONFIG_MAX_PHYSMEM_BITS and
PAGES_PER_SECTION the sum of maximal possible size of
identity mapping and vmemmap area (derived from these
macros) plus modules area size MODULES_LEN can not
overflow. Thus, that sum is used as first addend while
vmalloc area size is second addend for size_add().

Suggested-by: Heiko Carstens <hca@linux.ibm.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/boot/startup.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index a1f792fcc710..b058e2a575c1 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -177,6 +177,7 @@ static unsigned long setup_kernel_memory_layout(void)
 	unsigned long asce_limit;
 	unsigned long rte_size;
 	unsigned long pages;
+	unsigned long vsize;
 	unsigned long vmax;
 
 	pages = ident_map_size / PAGE_SIZE;
@@ -184,11 +185,9 @@ static unsigned long setup_kernel_memory_layout(void)
 	vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
 
 	/* choose kernel address space layout: 4 or 3 levels. */
-	vmemmap_start = round_up(ident_map_size, _REGION3_SIZE);
-	if (IS_ENABLED(CONFIG_KASAN) ||
-	    vmalloc_size > _REGION2_SIZE ||
-	    vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
-		    _REGION2_SIZE) {
+	vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size + MODULES_LEN;
+	vsize = size_add(vsize, vmalloc_size);
+	if (IS_ENABLED(CONFIG_KASAN) || (vsize > _REGION2_SIZE)) {
 		asce_limit = _REGION1_SIZE;
 		rte_size = _REGION2_SIZE;
 	} else {

From a984f27ec26323204045c306f8b25bc61e042626 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Fri, 4 Aug 2023 15:39:06 +0200
Subject: [PATCH 1203/2047] s390/mm: define Real Memory Copy size and mask
 macros

Make Real Memory Copy area size and mask explicit.
This does not bring any functional change and only
needed for clarity.

Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/boot/startup.c        | 2 +-
 arch/s390/include/asm/maccess.h | 3 +++
 arch/s390/mm/dump_pagetables.c  | 2 +-
 arch/s390/mm/maccess.c          | 7 ++++---
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index b058e2a575c1..d61428190cdd 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -204,7 +204,7 @@ static unsigned long setup_kernel_memory_layout(void)
 	/* force vmalloc and modules below kasan shadow */
 	vmax = min(vmax, KASAN_SHADOW_START);
 #endif
-	__memcpy_real_area = round_down(vmax - PAGE_SIZE, PAGE_SIZE);
+	__memcpy_real_area = round_down(vmax - MEMCPY_REAL_SIZE, PAGE_SIZE);
 	__abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
 				   sizeof(struct lowcore));
 	MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE);
diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h
index cfec3141fdba..50225940d971 100644
--- a/arch/s390/include/asm/maccess.h
+++ b/arch/s390/include/asm/maccess.h
@@ -4,6 +4,9 @@
 
 #include <linux/types.h>
 
+#define MEMCPY_REAL_SIZE	PAGE_SIZE
+#define MEMCPY_REAL_MASK	PAGE_MASK
+
 struct iov_iter;
 
 extern unsigned long __memcpy_real_area;
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index ba5f80268878..afa5db750d92 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -297,7 +297,7 @@ static int pt_dump_init(void)
 	address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
 	address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE;
 	address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area;
-	address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + PAGE_SIZE;
+	address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + MEMCPY_REAL_SIZE;
 	address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
 	address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
 	address_markers[VMALLOC_NR].start_address = VMALLOC_START;
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index cbe1df1e9c18..c805b3e2592b 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -86,11 +86,12 @@ size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count)
 	void *chunk;
 	pte_t pte;
 
+	BUILD_BUG_ON(MEMCPY_REAL_SIZE != PAGE_SIZE);
 	while (count) {
-		phys = src & PAGE_MASK;
-		offset = src & ~PAGE_MASK;
+		phys = src & MEMCPY_REAL_MASK;
+		offset = src & ~MEMCPY_REAL_MASK;
 		chunk = (void *)(__memcpy_real_area + offset);
-		len = min(count, PAGE_SIZE - offset);
+		len = min(count, MEMCPY_REAL_SIZE - offset);
 		pte = mk_pte_phys(phys, PAGE_KERNEL_RO);
 
 		mutex_lock(&memcpy_real_mutex);

From 09cd4ffafb2fbb1a18a88890b13520c501409d99 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Fri, 4 Aug 2023 15:24:28 +0200
Subject: [PATCH 1204/2047] s390/boot: account Real Memory Copy and Lowcore
 areas

Real Memory Copy and (absolute) Lowcore areas are
not accounted when virtual memory layout is set up.

Fixes: 4df29d2b9024 ("s390/smp: rework absolute lowcore access")
Fixes: 2f0e8aae26a2 ("s390/mm: rework memcpy_real() to avoid DAT-off mode")
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/boot/startup.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index d61428190cdd..a81f92563037 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -185,7 +185,8 @@ static unsigned long setup_kernel_memory_layout(void)
 	vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
 
 	/* choose kernel address space layout: 4 or 3 levels. */
-	vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size + MODULES_LEN;
+	vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size +
+		MODULES_LEN + MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE;
 	vsize = size_add(vsize, vmalloc_size);
 	if (IS_ENABLED(CONFIG_KASAN) || (vsize > _REGION2_SIZE)) {
 		asce_limit = _REGION1_SIZE;

From 5cfdff02e97a46f90b3ba408af62ad3dcb0dc586 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Sat, 5 Aug 2023 10:59:09 +0200
Subject: [PATCH 1205/2047] s390/boot: fix multi-line comments style

Make multi-line comment style consistent across the source.

Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/boot/startup.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index a81f92563037..b9681cb22753 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -195,8 +195,9 @@ static unsigned long setup_kernel_memory_layout(void)
 		asce_limit = _REGION2_SIZE;
 		rte_size = _REGION3_SIZE;
 	}
+
 	/*
-	 * forcing modules and vmalloc area under the ultravisor
+	 * Forcing modules and vmalloc area under the ultravisor
 	 * secure storage limit, so that any vmalloc allocation
 	 * we do could be used to back secure guest storage.
 	 */
@@ -288,8 +289,9 @@ void startup_kernel(void)
 
 	setup_lpp();
 	safe_addr = mem_safe_offset();
+
 	/*
-	 * reserve decompressor memory together with decompression heap, buffer and
+	 * Reserve decompressor memory together with decompression heap, buffer and
 	 * memory which might be occupied by uncompressed kernel at default 1Mb
 	 * position (if KASLR is off or failed).
 	 */

From 2d1494fb31405df0dfb6006fdb2b24e7880258cd Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 12 Aug 2023 17:12:54 +0200
Subject: [PATCH 1206/2047] s390/mm: make virt_to_pfn() a static inline

Making virt_to_pfn() a static inline taking a strongly typed
(const void *) makes the contract of a passing a pointer of that
type to the function explicit and exposes any misuse of the
macro virt_to_pfn() acting polymorphic and accepting many types
such as (void *), (unitptr_t) or (unsigned long) as arguments
without warnings.

For symmetry do the same with pfn_to_virt() reflecting the
current layout in asm-generic/page.h.

Doing this reveals a number of offenders in the arch code and
the S390-specific drivers, so just bite the bullet and fix up
all of those as well.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Link: https://lore.kernel.org/r/20230812-virt-to-phys-s390-v2-1-6c40f31fe36f@linaro.org
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/kfence.h |  2 +-
 arch/s390/include/asm/page.h   | 12 ++++++++++--
 arch/s390/mm/cmm.c             |  2 +-
 arch/s390/mm/vmem.c            |  2 +-
 drivers/s390/block/scm_blk.c   |  2 +-
 drivers/s390/char/vmcp.c       |  2 +-
 6 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/arch/s390/include/asm/kfence.h b/arch/s390/include/asm/kfence.h
index d55ba878378b..e47fd8cbe701 100644
--- a/arch/s390/include/asm/kfence.h
+++ b/arch/s390/include/asm/kfence.h
@@ -35,7 +35,7 @@ static __always_inline void kfence_split_mapping(void)
 
 static inline bool kfence_protect_page(unsigned long addr, bool protect)
 {
-	__kernel_map_pages(virt_to_page(addr), 1, !protect);
+	__kernel_map_pages(virt_to_page((void *)addr), 1, !protect);
 	return true;
 }
 
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index a9c138fcd2ad..cfec0743314e 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -191,8 +191,16 @@ int arch_make_page_accessible(struct page *page);
 #define phys_to_page(phys)	pfn_to_page(phys_to_pfn(phys))
 #define page_to_phys(page)	pfn_to_phys(page_to_pfn(page))
 
-#define pfn_to_virt(pfn)	__va(pfn_to_phys(pfn))
-#define virt_to_pfn(kaddr)	(phys_to_pfn(__pa(kaddr)))
+static inline void *pfn_to_virt(unsigned long pfn)
+{
+	return __va(pfn_to_phys(pfn));
+}
+
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+	return phys_to_pfn(__pa(kaddr));
+}
+
 #define pfn_to_kaddr(pfn)	pfn_to_virt(pfn)
 
 #define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 5300c6867d5e..f47515313226 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -90,7 +90,7 @@ static long cmm_alloc_pages(long nr, long *counter,
 			} else
 				free_page((unsigned long) npa);
 		}
-		diag10_range(virt_to_pfn(addr), 1);
+		diag10_range(virt_to_pfn((void *)addr), 1);
 		pa->pages[pa->index++] = addr;
 		(*counter)++;
 		spin_unlock(&cmm_lock);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index be69cb2d47eb..3391efb59641 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -36,7 +36,7 @@ static void vmem_free_pages(unsigned long addr, int order)
 {
 	/* We don't expect boot memory to be removed ever. */
 	if (!slab_is_available() ||
-	    WARN_ON_ONCE(PageReserved(virt_to_page(addr))))
+	    WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr))))
 		return;
 	free_pages(addr, order);
 }
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 0c1df1d5f1ac..3a9cc8a4a230 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -134,7 +134,7 @@ static void scm_request_done(struct scm_request *scmrq)
 
 		if ((msb->flags & MSB_FLAG_IDA) && aidaw &&
 		    IS_ALIGNED(aidaw, PAGE_SIZE))
-			mempool_free(virt_to_page(aidaw), aidaw_pool);
+			mempool_free(virt_to_page((void *)aidaw), aidaw_pool);
 	}
 
 	spin_lock_irqsave(&list_lock, flags);
diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index 4cebfaaa22b4..eb0520a9d4af 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c
@@ -89,7 +89,7 @@ static void vmcp_response_free(struct vmcp_session *session)
 	order = get_order(session->bufsize);
 	nr_pages = ALIGN(session->bufsize, PAGE_SIZE) >> PAGE_SHIFT;
 	if (session->cma_alloc) {
-		page = virt_to_page((unsigned long)session->response);
+		page = virt_to_page(session->response);
 		cma_release(vmcp_cma, page, nr_pages);
 		session->cma_alloc = 0;
 	} else {

From c8f40a0bccefd613748d080147469a4652d6e74c Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Date: Thu, 10 Aug 2023 10:22:36 +0200
Subject: [PATCH 1207/2047] s390/dcssblk: fix kernel crash with list_add
 corruption

Commit fb08a1908cb1 ("dax: simplify the dax_device <-> gendisk
association") introduced new logic for gendisk association, requiring
drivers to explicitly call dax_add_host() and dax_remove_host().

For dcssblk driver, some dax_remove_host() calls were missing, e.g. in
device remove path. The commit also broke error handling for out_dax case
in device add path, resulting in an extra put_device() w/o the previous
get_device() in that case.

This lead to stale xarray entries after device add / remove cycles. In the
case when a previously used struct gendisk pointer (xarray index) would be
used again, because blk_alloc_disk() happened to return such a pointer, the
xa_insert() in dax_add_host() would fail and go to out_dax, doing the extra
put_device() in the error path. In combination with an already flawed error
handling in dcssblk (device_register() cleanup), which needs to be
addressed in a separate patch, this resulted in a missing device_del() /
klist_del(), and eventually in the kernel crash with list_add corruption on
a subsequent device_add() / klist_add().

Fix this by adding the missing dax_remove_host() calls, and also move the
put_device() in the error path to restore the previous logic.

Fixes: fb08a1908cb1 ("dax: simplify the dax_device <-> gendisk association")
Cc: <stable@vger.kernel.org> # 5.17+
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/block/dcssblk.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 6eafd0a34483..06bcb6c78909 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -412,6 +412,7 @@ removeseg:
 	}
 	list_del(&dev_info->lh);
 
+	dax_remove_host(dev_info->gd);
 	kill_dax(dev_info->dax_dev);
 	put_dax(dev_info->dax_dev);
 	del_gendisk(dev_info->gd);
@@ -707,9 +708,9 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 	goto out;
 
 out_dax_host:
+	put_device(&dev_info->dev);
 	dax_remove_host(dev_info->gd);
 out_dax:
-	put_device(&dev_info->dev);
 	kill_dax(dev_info->dax_dev);
 	put_dax(dev_info->dax_dev);
 put_dev:
@@ -789,6 +790,7 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch
 	}
 
 	list_del(&dev_info->lh);
+	dax_remove_host(dev_info->gd);
 	kill_dax(dev_info->dax_dev);
 	put_dax(dev_info->dax_dev);
 	del_gendisk(dev_info->gd);

From ea5717cb13468323a7c3dd394748301802991f39 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Tue, 15 Aug 2023 09:26:06 +0200
Subject: [PATCH 1208/2047] s390/ipl: add missing secure/has_secure file to ipl
 type 'unknown'

OS installers are relying on /sys/firmware/ipl/has_secure to be
present on machines supporting secure boot. This file is present
for all IPL types, but not the unknown type, which prevents a secure
installation when an LPAR is booted in HMC via FTP(s), because
this is an unknown IPL type in linux. While at it, also add the secure
file.

Fixes: c9896acc7851 ("s390/ipl: Provide has_secure sysfs attribute")
Cc: stable@vger.kernel.org
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/ipl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 85a00d97a314..dfcb2b563e2b 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -640,6 +640,8 @@ static struct attribute_group ipl_ccw_attr_group_lpar = {
 
 static struct attribute *ipl_unknown_attrs[] = {
 	&sys_ipl_type_attr.attr,
+	&sys_ipl_secure_attr.attr,
+	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 

From 7645dcddc2666f57e37ed3dda50b8b273b62c88a Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Tue, 15 Aug 2023 10:27:08 +0200
Subject: [PATCH 1209/2047] s390/ipl: add common ipl parameter attribute group

All ipl types have 'secure','has_secure' and type parameters. Move
these to a common ipl parameter group so that they don't need to be
present in each ipl parameter group.

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/ipl.c | 28 ++++++----------------------
 1 file changed, 6 insertions(+), 22 deletions(-)

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index dfcb2b563e2b..7f3a84e414a4 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -557,15 +557,12 @@ static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
 	__ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
 
 static struct attribute *ipl_fcp_attrs[] = {
-	&sys_ipl_type_attr.attr,
 	&sys_ipl_device_attr.attr,
 	&sys_ipl_fcp_wwpn_attr.attr,
 	&sys_ipl_fcp_lun_attr.attr,
 	&sys_ipl_fcp_bootprog_attr.attr,
 	&sys_ipl_fcp_br_lba_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
-	&sys_ipl_secure_attr.attr,
-	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
@@ -575,14 +572,11 @@ static struct attribute_group ipl_fcp_attr_group = {
 };
 
 static struct attribute *ipl_nvme_attrs[] = {
-	&sys_ipl_type_attr.attr,
 	&sys_ipl_nvme_fid_attr.attr,
 	&sys_ipl_nvme_nsid_attr.attr,
 	&sys_ipl_nvme_bootprog_attr.attr,
 	&sys_ipl_nvme_br_lba_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
-	&sys_ipl_secure_attr.attr,
-	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
@@ -592,13 +586,10 @@ static struct attribute_group ipl_nvme_attr_group = {
 };
 
 static struct attribute *ipl_eckd_attrs[] = {
-	&sys_ipl_type_attr.attr,
 	&sys_ipl_eckd_bootprog_attr.attr,
 	&sys_ipl_eckd_br_chr_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
 	&sys_ipl_device_attr.attr,
-	&sys_ipl_secure_attr.attr,
-	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
@@ -610,21 +601,15 @@ static struct attribute_group ipl_eckd_attr_group = {
 /* CCW ipl device attributes */
 
 static struct attribute *ipl_ccw_attrs_vm[] = {
-	&sys_ipl_type_attr.attr,
 	&sys_ipl_device_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
 	&sys_ipl_vm_parm_attr.attr,
-	&sys_ipl_secure_attr.attr,
-	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
 static struct attribute *ipl_ccw_attrs_lpar[] = {
-	&sys_ipl_type_attr.attr,
 	&sys_ipl_device_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
-	&sys_ipl_secure_attr.attr,
-	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
@@ -636,17 +621,15 @@ static struct attribute_group ipl_ccw_attr_group_lpar = {
 	.attrs = ipl_ccw_attrs_lpar
 };
 
-/* UNKNOWN ipl device attributes */
-
-static struct attribute *ipl_unknown_attrs[] = {
+static struct attribute *ipl_common_attrs[] = {
 	&sys_ipl_type_attr.attr,
 	&sys_ipl_secure_attr.attr,
 	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
-static struct attribute_group ipl_unknown_attr_group = {
-	.attrs = ipl_unknown_attrs,
+static struct attribute_group ipl_common_attr_group = {
+	.attrs = ipl_common_attrs,
 };
 
 static struct kset *ipl_kset;
@@ -670,6 +653,9 @@ static int __init ipl_init(void)
 		rc = -ENOMEM;
 		goto out;
 	}
+	rc = sysfs_create_group(&ipl_kset->kobj, &ipl_common_attr_group);
+	if (rc)
+		goto out;
 	switch (ipl_info.type) {
 	case IPL_TYPE_CCW:
 		if (MACHINE_IS_VM)
@@ -691,8 +677,6 @@ static int __init ipl_init(void)
 		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group);
 		break;
 	default:
-		rc = sysfs_create_group(&ipl_kset->kobj,
-					&ipl_unknown_attr_group);
 		break;
 	}
 out:

From 4c1d2f56d685406fc6b452ca5f797bda62a06609 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Sat, 12 Aug 2023 19:55:49 -0400
Subject: [PATCH 1210/2047] perf/arm-dmc620: Fix
 dmc620_pmu_irqs_lock/cpu_hotplug_lock circular lock dependency

The following circular locking dependency was reported when running
cpus online/offline test on an arm64 system.

[   84.195923] Chain exists of:
                 dmc620_pmu_irqs_lock --> cpu_hotplug_lock --> cpuhp_state-down

[   84.207305]  Possible unsafe locking scenario:

[   84.213212]        CPU0                    CPU1
[   84.217729]        ----                    ----
[   84.222247]   lock(cpuhp_state-down);
[   84.225899]                                lock(cpu_hotplug_lock);
[   84.232068]                                lock(cpuhp_state-down);
[   84.238237]   lock(dmc620_pmu_irqs_lock);
[   84.242236]
                *** DEADLOCK ***

The following locking order happens when dmc620_pmu_get_irq() calls
cpuhp_state_add_instance_nocalls().

	lock(dmc620_pmu_irqs_lock) --> lock(cpu_hotplug_lock)

On the other hand, the calling sequence

  cpuhp_thread_fun()
    => cpuhp_invoke_callback()
      => dmc620_pmu_cpu_teardown()

leads to the locking sequence

	lock(cpuhp_state-down) => lock(dmc620_pmu_irqs_lock)

Here dmc620_pmu_irqs_lock protects both the dmc620_pmu_irqs and the
pmus_node lists in various dmc620_pmu instances. dmc620_pmu_get_irq()
requires protected access to dmc620_pmu_irqs whereas
dmc620_pmu_cpu_teardown() needs protection to the pmus_node lists.
Break this circular locking dependency by using two separate locks to
protect dmc620_pmu_irqs list and the pmus_node lists respectively.

Suggested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Link: https://lore.kernel.org/r/20230812235549.494174-1-longman@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_dmc620_pmu.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 9d0f01c4455a..30cea6859574 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -66,8 +66,13 @@
 #define DMC620_PMU_COUNTERn_OFFSET(n) \
 	(DMC620_PMU_COUNTERS_BASE + 0x28 * (n))
 
-static LIST_HEAD(dmc620_pmu_irqs);
+/*
+ * dmc620_pmu_irqs_lock: protects dmc620_pmu_irqs list
+ * dmc620_pmu_node_lock: protects pmus_node lists in all dmc620_pmu instances
+ */
 static DEFINE_MUTEX(dmc620_pmu_irqs_lock);
+static DEFINE_MUTEX(dmc620_pmu_node_lock);
+static LIST_HEAD(dmc620_pmu_irqs);
 
 struct dmc620_pmu_irq {
 	struct hlist_node node;
@@ -475,9 +480,9 @@ static int dmc620_pmu_get_irq(struct dmc620_pmu *dmc620_pmu, int irq_num)
 		return PTR_ERR(irq);
 
 	dmc620_pmu->irq = irq;
-	mutex_lock(&dmc620_pmu_irqs_lock);
+	mutex_lock(&dmc620_pmu_node_lock);
 	list_add_rcu(&dmc620_pmu->pmus_node, &irq->pmus_node);
-	mutex_unlock(&dmc620_pmu_irqs_lock);
+	mutex_unlock(&dmc620_pmu_node_lock);
 
 	return 0;
 }
@@ -486,9 +491,11 @@ static void dmc620_pmu_put_irq(struct dmc620_pmu *dmc620_pmu)
 {
 	struct dmc620_pmu_irq *irq = dmc620_pmu->irq;
 
-	mutex_lock(&dmc620_pmu_irqs_lock);
+	mutex_lock(&dmc620_pmu_node_lock);
 	list_del_rcu(&dmc620_pmu->pmus_node);
+	mutex_unlock(&dmc620_pmu_node_lock);
 
+	mutex_lock(&dmc620_pmu_irqs_lock);
 	if (!refcount_dec_and_test(&irq->refcount)) {
 		mutex_unlock(&dmc620_pmu_irqs_lock);
 		return;
@@ -638,10 +645,10 @@ static int dmc620_pmu_cpu_teardown(unsigned int cpu,
 		return 0;
 
 	/* We're only reading, but this isn't the place to be involving RCU */
-	mutex_lock(&dmc620_pmu_irqs_lock);
+	mutex_lock(&dmc620_pmu_node_lock);
 	list_for_each_entry(dmc620_pmu, &irq->pmus_node, pmus_node)
 		perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target);
-	mutex_unlock(&dmc620_pmu_irqs_lock);
+	mutex_unlock(&dmc620_pmu_node_lock);
 
 	WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target)));
 	irq->cpu = target;

From 46862da15e37efedb7d2d21e167f506c0b533772 Mon Sep 17 00:00:00 2001
From: Ding Xiang <dingxiang@cmss.chinamobile.com>
Date: Tue, 15 Aug 2023 15:49:15 +0800
Subject: [PATCH 1211/2047] kselftest/arm64: fix a memleak in zt_regs_run()

If memcmp() does not return 0, "zeros" need to be freed to prevent memleak

Signed-off-by: Ding Xiang <dingxiang@cmss.chinamobile.com>
Link: https://lore.kernel.org/r/20230815074915.245528-1-dingxiang@cmss.chinamobile.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/signal/testcases/zt_regs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c
index e1eb4d5c027a..2e384d731618 100644
--- a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c
@@ -65,6 +65,7 @@ int zt_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
 	if (memcmp(zeros, (char *)zt + ZT_SIG_REGS_OFFSET,
 		   ZT_SIG_REGS_SIZE(zt->nregs)) != 0) {
 		fprintf(stderr, "ZT data invalid\n");
+		free(zeros);
 		return 1;
 	}
 

From 83a6d80c2bfd1d348e5e7079af21a924fdc5c972 Mon Sep 17 00:00:00 2001
From: Yicong Yang <yangyicong@hisilicon.com>
Date: Tue, 15 Aug 2023 21:10:10 +0800
Subject: [PATCH 1212/2047] drivers/perf: hisi: Schedule perf session according
 to locality

The PCIe PMUs locate on different NUMA node but currently we don't
consider it and likely stack all the sessions on the same CPU:

[root@localhost tmp]# cat /sys/devices/hisi_pcie*/cpumask
0
0
0
0
0
0

This can be optimize a bit to use a local CPU for the PMU.

Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20230815131010.2147-1-yangyicong@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/hisilicon/hisi_pcie_pmu.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index e10fc7cb9493..5a00adb2de8c 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -665,8 +665,8 @@ static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
 	struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
 
 	if (pcie_pmu->on_cpu == -1) {
-		pcie_pmu->on_cpu = cpu;
-		WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)));
+		pcie_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(&pcie_pmu->pdev->dev));
+		WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(pcie_pmu->on_cpu)));
 	}
 
 	return 0;
@@ -676,14 +676,23 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 {
 	struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
 	unsigned int target;
+	cpumask_t mask;
+	int numa_node;
 
 	/* Nothing to do if this CPU doesn't own the PMU */
 	if (pcie_pmu->on_cpu != cpu)
 		return 0;
 
 	pcie_pmu->on_cpu = -1;
-	/* Choose a new CPU from all online cpus. */
-	target = cpumask_any_but(cpu_online_mask, cpu);
+
+	/* Choose a local CPU from all online cpus. */
+	numa_node = dev_to_node(&pcie_pmu->pdev->dev);
+	if (cpumask_and(&mask, cpumask_of_node(numa_node), cpu_online_mask) &&
+	    cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
+		target = cpumask_any(&mask);
+	else
+		target = cpumask_any_but(cpu_online_mask, cpu);
+
 	if (target >= nr_cpu_ids) {
 		pci_err(pcie_pmu->pdev, "There is no CPU to set\n");
 		return 0;

From e89ecd8368860bf05437eabd07d292c316221cfc Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Fri, 11 Aug 2023 09:54:37 +0800
Subject: [PATCH 1213/2047] perf/imx_ddr: speed up overflow frequency of cycle

For i.MX8MP, we cannot ensure that cycle counter overflow occurs at least
4 times as often as other events. Due to byte counters will count for any
event configured, it will overflow more often. And if byte counters
overflow that related counters would stop since they share the
COUNTER_CNTL. We can speed up cycle counter overflow frequency by setting
counter parameter (CP) field of cycle counter. In this way, we can avoid
stop counting byte counters when interrupt didn't come and the byte
counters can be fetched or updated from each cycle counter overflow
interrupt.

Because we initialize CP filed to shorten counter0 overflow time, the cycle
counter will start couting from a fixed/base value each time. We need to
remove the base from the result too. Therefore, we could get precise result
from cycle counter.

Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://lore.kernel.org/r/20230811015438.1999307-1-xu.yang_2@nxp.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/fsl_imx8_ddr_perf.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 1cb3861ab0e0..7d4e49565738 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -27,6 +27,8 @@
 #define CNTL_CLEAR_MASK		0xFFFFFFFD
 #define CNTL_OVER_MASK		0xFFFFFFFE
 
+#define CNTL_CP_SHIFT		16
+#define CNTL_CP_MASK		(0xFF << CNTL_CP_SHIFT)
 #define CNTL_CSV_SHIFT		24
 #define CNTL_CSV_MASK		(0xFFU << CNTL_CSV_SHIFT)
 
@@ -34,6 +36,8 @@
 #define EVENT_CYCLES_COUNTER	0
 #define NUM_COUNTERS		4
 
+/* For removing bias if cycle counter CNTL.CP is set to 0xf0 */
+#define CYCLES_COUNTER_MASK	0x0FFFFFFF
 #define AXI_MASKING_REVERT	0xffff0000	/* AXI_MASKING(MSB 16bits) + AXI_ID(LSB 16bits) */
 
 #define to_ddr_pmu(p)		container_of(p, struct ddr_pmu, pmu)
@@ -426,6 +430,17 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
 		writel(0, pmu->base + reg);
 		val = CNTL_EN | CNTL_CLEAR;
 		val |= FIELD_PREP(CNTL_CSV_MASK, config);
+
+		/*
+		 * On i.MX8MP we need to bias the cycle counter to overflow more often.
+		 * We do this by initializing bits [23:16] of the counter value via the
+		 * COUNTER_CTRL Counter Parameter (CP) field.
+		 */
+		if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) {
+			if (counter == EVENT_CYCLES_COUNTER)
+				val |= FIELD_PREP(CNTL_CP_MASK, 0xf0);
+		}
+
 		writel(val, pmu->base + reg);
 	} else {
 		/* Disable counter */
@@ -465,6 +480,12 @@ static void ddr_perf_event_update(struct perf_event *event)
 	int ret;
 
 	new_raw_count = ddr_perf_read_counter(pmu, counter);
+	/* Remove the bias applied in ddr_perf_counter_enable(). */
+	if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) {
+		if (counter == EVENT_CYCLES_COUNTER)
+			new_raw_count &= CYCLES_COUNTER_MASK;
+	}
+
 	local64_add(new_raw_count, &event->count);
 
 	/*

From f4e2bd91ddf5e8543cbe7ad80b3fba3d2dc63fa3 Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Fri, 11 Aug 2023 09:54:38 +0800
Subject: [PATCH 1214/2047] perf/imx_ddr: don't enable counter0 if none of 4
 counters are used

In current driver, counter0 will be enabled after ddr_perf_pmu_enable()
is called even though none of the 4 counters are used. This will cause
counter0 continue to count until ddr_perf_pmu_disabled() is called. If
pmu is not disabled all the time, the pmu interrupt will be asserted
from time to time due to counter0 will overflow and irq handler will
clear it. It's not an expected behavior. This patch will not enable
counter0 if none of 4 counters are used.

Fixes: 9a66d36cc7ac ("drivers/perf: imx_ddr: Add DDR performance counter support to perf")
Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://lore.kernel.org/r/20230811015438.1999307-2-xu.yang_2@nxp.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/fsl_imx8_ddr_perf.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 7d4e49565738..92611c98120f 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -104,6 +104,7 @@ struct ddr_pmu {
 	const struct fsl_ddr_devtype_data *devtype_data;
 	int irq;
 	int id;
+	int active_counter;
 };
 
 static ssize_t ddr_perf_identifier_show(struct device *dev,
@@ -515,6 +516,10 @@ static void ddr_perf_event_start(struct perf_event *event, int flags)
 
 	ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
 
+	if (!pmu->active_counter++)
+		ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID,
+			EVENT_CYCLES_COUNTER, true);
+
 	hwc->state = 0;
 }
 
@@ -568,6 +573,10 @@ static void ddr_perf_event_stop(struct perf_event *event, int flags)
 	ddr_perf_counter_enable(pmu, event->attr.config, counter, false);
 	ddr_perf_event_update(event);
 
+	if (!--pmu->active_counter)
+		ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID,
+			EVENT_CYCLES_COUNTER, false);
+
 	hwc->state |= PERF_HES_STOPPED;
 }
 
@@ -585,25 +594,10 @@ static void ddr_perf_event_del(struct perf_event *event, int flags)
 
 static void ddr_perf_pmu_enable(struct pmu *pmu)
 {
-	struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
-
-	/* enable cycle counter if cycle is not active event list */
-	if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
-		ddr_perf_counter_enable(ddr_pmu,
-				      EVENT_CYCLES_ID,
-				      EVENT_CYCLES_COUNTER,
-				      true);
 }
 
 static void ddr_perf_pmu_disable(struct pmu *pmu)
 {
-	struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
-
-	if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
-		ddr_perf_counter_enable(ddr_pmu,
-				      EVENT_CYCLES_ID,
-				      EVENT_CYCLES_COUNTER,
-				      false);
 }
 
 static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,

From e08302ee46c9a5101e3902350ba5e27fcb18894b Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Tue, 15 Aug 2023 15:59:31 +0100
Subject: [PATCH 1215/2047] kselftest/arm64: build BTI tests in output
 directory

The arm64 BTI selftests are currently built in the source directory,
then the generated binaries are copied to the output directory.
This leaves the object files around in a potentially otherwise pristine
source tree, tainting it for out-of-tree kernel builds.

Prepend $(OUTPUT) to every reference to an object file in the Makefile,
and remove the extra handling and copying. This puts all generated files
under the output directory.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230815145931.2522557-1-andre.przywara@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/bti/Makefile    | 45 +++++++++----------
 .../selftests/arm64/bti/gen/.gitignore        |  2 -
 2 files changed, 20 insertions(+), 27 deletions(-)
 delete mode 100644 tools/testing/selftests/arm64/bti/gen/.gitignore

diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile
index ccdac414ad94..05e4ee523a53 100644
--- a/tools/testing/selftests/arm64/bti/Makefile
+++ b/tools/testing/selftests/arm64/bti/Makefile
@@ -2,8 +2,6 @@
 
 TEST_GEN_PROGS := btitest nobtitest
 
-PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS))
-
 # These tests are built as freestanding binaries since otherwise BTI
 # support in ld.so is required which is not currently widespread; when
 # it is available it will still be useful to test this separately as the
@@ -18,44 +16,41 @@ CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS)
 BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $<
 NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $<
 
-%-bti.o: %.c
+$(OUTPUT)/%-bti.o: %.c
 	$(BTI_CC_COMMAND)
 
-%-bti.o: %.S
+$(OUTPUT)/%-bti.o: %.S
 	$(BTI_CC_COMMAND)
 
-%-nobti.o: %.c
+$(OUTPUT)/%-nobti.o: %.c
 	$(NOBTI_CC_COMMAND)
 
-%-nobti.o: %.S
+$(OUTPUT)/%-nobti.o: %.S
 	$(NOBTI_CC_COMMAND)
 
 BTI_OBJS =                                      \
-	test-bti.o                           \
-	signal-bti.o                            \
-	start-bti.o                             \
-	syscall-bti.o                           \
-	system-bti.o                            \
-	teststubs-bti.o                         \
-	trampoline-bti.o
-gen/btitest: $(BTI_OBJS)
+	$(OUTPUT)/test-bti.o                    \
+	$(OUTPUT)/signal-bti.o                  \
+	$(OUTPUT)/start-bti.o                   \
+	$(OUTPUT)/syscall-bti.o                 \
+	$(OUTPUT)/system-bti.o                  \
+	$(OUTPUT)/teststubs-bti.o               \
+	$(OUTPUT)/trampoline-bti.o
+$(OUTPUT)/btitest: $(BTI_OBJS)
 	$(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
 
 NOBTI_OBJS =                                    \
-	test-nobti.o                         \
-	signal-nobti.o                          \
-	start-nobti.o                           \
-	syscall-nobti.o                         \
-	system-nobti.o                          \
-	teststubs-nobti.o                       \
-	trampoline-nobti.o
-gen/nobtitest: $(NOBTI_OBJS)
+	$(OUTPUT)/test-nobti.o                  \
+	$(OUTPUT)/signal-nobti.o                \
+	$(OUTPUT)/start-nobti.o                 \
+	$(OUTPUT)/syscall-nobti.o               \
+	$(OUTPUT)/system-nobti.o                \
+	$(OUTPUT)/teststubs-nobti.o             \
+	$(OUTPUT)/trampoline-nobti.o
+$(OUTPUT)/nobtitest: $(NOBTI_OBJS)
 	$(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
 
 # Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
 # to account for any OUTPUT target-dirs optionally provided by
 # the toplevel makefile
 include ../../lib.mk
-
-$(TEST_GEN_PROGS): $(PROGS)
-	cp $(PROGS) $(OUTPUT)/
diff --git a/tools/testing/selftests/arm64/bti/gen/.gitignore b/tools/testing/selftests/arm64/bti/gen/.gitignore
deleted file mode 100644
index 73869fabada4..000000000000
--- a/tools/testing/selftests/arm64/bti/gen/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-btitest
-nobtitest

From 2d218b45848b92b03b220bf4d9bef29f058f866f Mon Sep 17 00:00:00 2001
From: Chao Song <chao.song@linux.intel.com>
Date: Wed, 16 Aug 2023 16:33:11 +0300
Subject: [PATCH 1216/2047] ASoC: SOF: ipc4-pcm: fix possible null pointer
 deference

The call to snd_sof_find_spcm_dai() could return NULL,
add nullable check for the return value to avoid null
pointer defenrece.

Fixes: 7cb19007baba ("ASoC: SOF: ipc4-pcm: add hw_params")
Signed-off-by: Chao Song <chao.song@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230816133311.7523-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc4-pcm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c
index 0c905bd0fab4..027416eb2f50 100644
--- a/sound/soc/sof/ipc4-pcm.c
+++ b/sound/soc/sof/ipc4-pcm.c
@@ -708,6 +708,9 @@ static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component,
 	struct snd_sof_pcm *spcm;
 
 	spcm = snd_sof_find_spcm_dai(component, rtd);
+	if (!spcm)
+		return -EINVAL;
+
 	time_info = spcm->stream[substream->stream].private;
 	/* delay calculation is not supported by current fw_reg ABI */
 	if (!time_info)

From 18b8f57a7f51b4c834e5e974ec38133c02e9eb58 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 15 Aug 2023 15:06:39 +0100
Subject: [PATCH 1217/2047] arm64: sysreg: Generate C compiler warnings on
 {read,write}_sysreg_s arguments

Evaluate the register before the asm section so that the C compiler
generates warnings when there is an issue with the register argument.

This will prevent possible future issues such as the one seen here [1]
where a missing bracket caused the shift and addition operators to be
evaluated in the wrong order, but no warning was emitted. The GNU
assembler has no warning for when expressions evaluate differently to C
due to different operator precedence, but the C compiler has some
warnings that may suggest something is wrong. For example in this case
the following warning would have been emitted:

  error: operator '>>' has lower precedence than '+'; '+' will be evaluated first [-Werror,-Wshift-op-parentheses]

There are currently no existing warnings that need to be fixed.

[1]: https://lore.kernel.org/linux-perf-users/20230728162011.GA22050@willie-the-truck/

Signed-off-by: James Clark <james.clark@arm.com>
Link: https://lore.kernel.org/r/20230815140639.614769-1-james.clark@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/sysreg.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index b481935e9314..16464bf9a8aa 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -803,15 +803,21 @@
 /*
  * For registers without architectural names, or simply unsupported by
  * GAS.
+ *
+ * __check_r forces warnings to be generated by the compiler when
+ * evaluating r which wouldn't normally happen due to being passed to
+ * the assembler via __stringify(r).
  */
 #define read_sysreg_s(r) ({						\
 	u64 __val;							\
+	u32 __maybe_unused __check_r = (u32)(r);			\
 	asm volatile(__mrs_s("%0", r) : "=r" (__val));			\
 	__val;								\
 })
 
 #define write_sysreg_s(v, r) do {					\
 	u64 __val = (u64)(v);						\
+	u32 __maybe_unused __check_r = (u32)(r);			\
 	asm volatile(__msr_s(r, "%x0") : : "rZ" (__val));		\
 } while (0)
 

From e21ac64e669e960688e79bf5babeed63132dac8a Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Wed, 16 Aug 2023 14:55:49 +0100
Subject: [PATCH 1218/2047] regulator: raa215300: Fix resource leak in case of
 error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The clk_register_clkdev() allocates memory by calling vclkdev_alloc() and
this memory is not freed in the error path. Similarly, resources allocated
by clk_register_fixed_rate() are not freed in the error path.

Fix these issues by using devm_clk_hw_register_fixed_rate() and
devm_clk_hw_register_clkdev().

After this, the static variable clk is not needed. Replace it with 
local variable hw in probe() and drop calling clk_unregister_fixed_rate()
from raa215300_rtc_unregister_device().

Fixes: 7bce16630837 ("regulator: Add Renesas PMIC RAA215300 driver")
Cc: stable@kernel.org
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://lore.kernel.org/r/20230816135550.146657-2-biju.das.jz@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/raa215300.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/regulator/raa215300.c b/drivers/regulator/raa215300.c
index 848a03a2fbd4..898f53cac02c 100644
--- a/drivers/regulator/raa215300.c
+++ b/drivers/regulator/raa215300.c
@@ -38,8 +38,6 @@
 #define RAA215300_REG_BLOCK_EN_RTC_EN	BIT(6)
 #define RAA215300_RTC_DEFAULT_ADDR	0x6f
 
-static struct clk *clk;
-
 static const struct regmap_config raa215300_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -49,10 +47,6 @@ static const struct regmap_config raa215300_regmap_config = {
 static void raa215300_rtc_unregister_device(void *data)
 {
 	i2c_unregister_device(data);
-	if (!clk) {
-		clk_unregister_fixed_rate(clk);
-		clk = NULL;
-	}
 }
 
 static int raa215300_clk_present(struct i2c_client *client, const char *name)
@@ -130,10 +124,16 @@ static int raa215300_i2c_probe(struct i2c_client *client)
 		u32 addr = RAA215300_RTC_DEFAULT_ADDR;
 		struct i2c_board_info info = {};
 		struct i2c_client *rtc_client;
+		struct clk_hw *hw;
 		ssize_t size;
 
-		clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 32000);
-		clk_register_clkdev(clk, clk_name, NULL);
+		hw = devm_clk_hw_register_fixed_rate(dev, clk_name, NULL, 0, 32000);
+		if (IS_ERR(hw))
+			return PTR_ERR(hw);
+
+		ret = devm_clk_hw_register_clkdev(dev, hw, clk_name, NULL);
+		if (ret)
+			return dev_err_probe(dev, ret, "Failed to initialize clkdev\n");
 
 		if (np) {
 			int i;

From 727d7c1c3695657873d62030b968ba97c8698c54 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Wed, 16 Aug 2023 14:55:50 +0100
Subject: [PATCH 1219/2047] regulator: raa215300: Add const definition

Add const definition to the initialized local variable name to avoid
overriding. Also the second parameter in strscpy is const char * instead of
char *.

Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://lore.kernel.org/r/20230816135550.146657-3-biju.das.jz@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/raa215300.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/raa215300.c b/drivers/regulator/raa215300.c
index 898f53cac02c..bdbf1e4ce468 100644
--- a/drivers/regulator/raa215300.c
+++ b/drivers/regulator/raa215300.c
@@ -119,7 +119,7 @@ static int raa215300_i2c_probe(struct i2c_client *client)
 	}
 
 	if (clk_name) {
-		char *name = pmic_version >= 0x12 ? "isl1208" : "raa215300_a0";
+		const char *name = pmic_version >= 0x12 ? "isl1208" : "raa215300_a0";
 		struct device_node *np = client->dev.of_node;
 		u32 addr = RAA215300_RTC_DEFAULT_ADDR;
 		struct i2c_board_info info = {};

From 7eb4ee66739ea187d3855f6dbf2ab48a512b4ae0 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Tue, 15 Aug 2023 12:09:12 +0800
Subject: [PATCH 1220/2047] kselftest/arm64: add SHA1 and related features to
 hwcap test

Add the SHA1 and related features check in the set of hwcap tests.

Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230815040915.3966955-2-zengheng4@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/hwcap.c | 39 +++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 112b6a219382..83d3b6dcc0d8 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -87,6 +87,24 @@ static void rng_sigill(void)
 	asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0");
 }
 
+static void sha1_sigill(void)
+{
+	/* SHA1H S0, S0 */
+	asm volatile(".inst 0x5e280800" : : : );
+}
+
+static void sha2_sigill(void)
+{
+	/* SHA256H Q0, Q0, V0.4S */
+	asm volatile(".inst 0x5e004000" : : : );
+}
+
+static void sha512_sigill(void)
+{
+	/* SHA512H Q0, Q0, V0.2D */
+	asm volatile(".inst 0xce608000" : : : );
+}
+
 static void sme_sigill(void)
 {
 	/* RDSVL x0, #0 */
@@ -327,6 +345,27 @@ static const struct hwcap_data {
 		.hwcap_bit = HWCAP2_RPRFM,
 		.cpuinfo = "rprfm",
 	},
+	{
+		.name = "SHA1",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_SHA1,
+		.cpuinfo = "sha1",
+		.sigill_fn = sha1_sigill,
+	},
+	{
+		.name = "SHA2",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_SHA2,
+		.cpuinfo = "sha2",
+		.sigill_fn = sha2_sigill,
+	},
+	{
+		.name = "SHA512",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_SHA512,
+		.cpuinfo = "sha512",
+		.sigill_fn = sha512_sigill,
+	},
 	{
 		.name = "SME",
 		.at_hwcap = AT_HWCAP2,

From 3fc3c0d1246cfe440d9c44f2134fde432b311ab3 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Tue, 15 Aug 2023 12:09:13 +0800
Subject: [PATCH 1221/2047] kselftest/arm64: add AES feature check to hwcap
 test

Add the AES feature check in the set of hwcap tests.

Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230815040915.3966955-3-zengheng4@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/hwcap.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 83d3b6dcc0d8..96c2611f1ac0 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -34,6 +34,12 @@
  */
 typedef void (*sig_fn)(void);
 
+static void aes_sigill(void)
+{
+	/* AESE V0.16B, V0.16B */
+	asm volatile(".inst 0x4e284800" : : : );
+}
+
 static void atomics_sigill(void)
 {
 	/* STADD W0, [SP] */
@@ -273,6 +279,13 @@ static const struct hwcap_data {
 	sig_fn sigbus_fn;
 	bool sigbus_reliable;
 } hwcaps[] = {
+	{
+		.name = "AES",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_AES,
+		.cpuinfo = "aes",
+		.sigill_fn = aes_sigill,
+	},
 	{
 		.name = "CRC32",
 		.at_hwcap = AT_HWCAP,

From 2c3ce0e7dd5154ffa017cd4dab3b14c09272c4a5 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Tue, 15 Aug 2023 12:09:14 +0800
Subject: [PATCH 1222/2047] kselftest/arm64: add pmull feature to hwcap test

Add the pmull feature check in the set of hwcap tests.

Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230815040915.3966955-4-zengheng4@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/hwcap.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 96c2611f1ac0..b3492a944503 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -88,6 +88,12 @@ static void mops_sigill(void)
 		     : "cc", "memory");
 }
 
+static void pmull_sigill(void)
+{
+	/* PMULL V0.1Q, V0.1D, V0.1D */
+	asm volatile(".inst 0x0ee0e000" : : : );
+}
+
 static void rng_sigill(void)
 {
 	asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0");
@@ -345,6 +351,13 @@ static const struct hwcap_data {
 		.sigill_fn = mops_sigill,
 		.sigill_reliable = true,
 	},
+	{
+		.name = "PMULL",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_PMULL,
+		.cpuinfo = "pmull",
+		.sigill_fn = pmull_sigill,
+	},
 	{
 		.name = "RNG",
 		.at_hwcap = AT_HWCAP2,

From fcb0b51a5dca1f69f408dfe5fd970cf167d41538 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Tue, 15 Aug 2023 12:09:15 +0800
Subject: [PATCH 1223/2047] kselftest/arm64: add jscvt feature to hwcap test

Add the jscvt feature check in the set of hwcap tests.

Due to the requirement of jscvt feature, a compiler configuration
of v8.3 or above is needed to support assembly. Therefore, hand
encode is used here instead.

Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230815040915.3966955-5-zengheng4@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/hwcap.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index b3492a944503..fe844b825a1e 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -68,6 +68,12 @@ static void ilrcpc_sigill(void)
 	asm volatile(".inst 0x994083e0" : : : );
 }
 
+static void jscvt_sigill(void)
+{
+	/* FJCVTZS W0, D0 */
+	asm volatile(".inst 0x1e7e0000" : : : );
+}
+
 static void lrcpc_sigill(void)
 {
 	/* LDAPR W0, [SP, #0] */
@@ -313,6 +319,13 @@ static const struct hwcap_data {
 		.cpuinfo = "fp",
 		.sigill_fn = fp_sigill,
 	},
+	{
+		.name = "JSCVT",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_JSCVT,
+		.cpuinfo = "jscvt",
+		.sigill_fn = jscvt_sigill,
+	},
 	{
 		.name = "LRCPC",
 		.at_hwcap = AT_HWCAP,

From d232606773a0b09ec7f1ffc25f63abe801d011fd Mon Sep 17 00:00:00 2001
From: Justin Stitt <justinstitt@google.com>
Date: Fri, 11 Aug 2023 16:33:51 +0000
Subject: [PATCH 1224/2047] arm64/sysreg: refactor deprecated strncpy

`strncpy` is deprecated for use on NUL-terminated destination strings
[1]. Which seems to be the case here due to the forceful setting of `buf`'s
tail to 0.

A suitable replacement is `strscpy` [2] due to the fact that it
guarantees NUL-termination on its destination buffer argument which is
_not_ the case for `strncpy`!

In this case, we can simplify the logic and also check for any silent
truncation by using `strscpy`'s return value.

This should have no functional change and yet uses a more robust and
less ambiguous interface whilst reducing code complexity.

Link: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings[1]
Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2]
Link: https://github.com/KSPP/linux/issues/90
Suggested-by: Kees Cook <keescook@chromium.org>
Cc: linux-hardening@vger.kernel.org
Signed-off-by: Justin Stitt <justinstitt@google.com>
Link: https://lore.kernel.org/r/20230811-strncpy-arch-arm64-v2-1-ba84eabffadb@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/idreg-override.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 2fe2491b692c..aee12c75b738 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -262,9 +262,9 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
 		if (!len)
 			return;
 
-		len = min(len, ARRAY_SIZE(buf) - 1);
-		strncpy(buf, cmdline, len);
-		buf[len] = 0;
+		len = strscpy(buf, cmdline, ARRAY_SIZE(buf));
+		if (len == -E2BIG)
+			len = ARRAY_SIZE(buf) - 1;
 
 		if (strcmp(buf, "--") == 0)
 			return;

From 1e9519b6fbe44caac13504724ef02c69124b3cd9 Mon Sep 17 00:00:00 2001
From: Hui Min Mina Chou <minachou@andestech.com>
Date: Tue, 18 Jul 2023 11:43:51 +0800
Subject: [PATCH 1225/2047] selftests/filesystems: Add six consecutive 'x'
 characters to mktemp

In busybox, the mktemp requires that the generated filename be
suffixed with at least six consecutive 'X' characters. Otherwise,
it will return an "Invalid argument" error.

Signed-off-by: Hui Min Mina Chou <minachou@andestech.com>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/filesystems/fat/run_fat_tests.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh
index 7f35dc3d15df..d61264d4795d 100755
--- a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh
+++ b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh
@@ -12,7 +12,7 @@ set -u
 set -o pipefail
 
 BASE_DIR="$(dirname $0)"
-TMP_DIR="$(mktemp -d /tmp/fat_tests_tmp.XXXX)"
+TMP_DIR="$(mktemp -d /tmp/fat_tests_tmp.XXXXXX)"
 IMG_PATH="${TMP_DIR}/fat.img"
 MNT_PATH="${TMP_DIR}/mnt"
 

From 68b4d2d58389f9d1db7ffc02644498f215ee8c70 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Sun, 6 Aug 2023 12:29:59 +0100
Subject: [PATCH 1226/2047] selftests/user_events: Reenable build

The user_events selftests were removed from the standard set of
selftests due to the uapi header it relies on having been temporarily
removed.  That header is now reinstated so we can reenable the tests.

Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/Makefile             | 1 +
 tools/testing/selftests/user_events/Makefile | 8 --------
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 666b56f22a41..4e221bb620b4 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -89,6 +89,7 @@ TARGETS += tmpfs
 TARGETS += tpm2
 TARGETS += tty
 TARGETS += user
+TARGETS += user_events
 TARGETS += vDSO
 TARGETS += mm
 TARGETS += x86
diff --git a/tools/testing/selftests/user_events/Makefile b/tools/testing/selftests/user_events/Makefile
index 9e95bd41b0b4..10fcd0066203 100644
--- a/tools/testing/selftests/user_events/Makefile
+++ b/tools/testing/selftests/user_events/Makefile
@@ -2,14 +2,6 @@
 CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
 LDLIBS += -lrt -lpthread -lm
 
-# Note:
-# This test depends on <linux/user_events.h> exported in uapi
-# The following commit removed user_events.h out of uapi:
-# commit 5cfff569cab8bf544bab62c911c5d6efd5af5e05
-# tracing: Move user_events.h temporarily out of include/uapi
-# This test will not compile until user_events.h is added
-# back to uapi.
-
 TEST_GEN_PROGS = ftrace_test dyn_test perf_test abi_test
 
 TEST_FILES := settings

From 888c72d581983ba935d73daed90a7ae4838fc6e1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Sun, 6 Aug 2023 00:14:07 +0100
Subject: [PATCH 1227/2047] selftests: Hook more tests into the build
 infrastructure

We have some dmabuf-heaps and perf_events tests but they are not hooked
up to the kselftest build infrastructure which is a bit of an obstacle
to running them in systems with generic infrastructure for selftests.
Add them to the top level kselftest Makefile so they get built as
standard.

Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 4e221bb620b4..a8573a3a5430 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -12,6 +12,7 @@ TARGETS += core
 TARGETS += cpufreq
 TARGETS += cpu-hotplug
 TARGETS += damon
+TARGETS += dmabuf-heaps
 TARGETS += drivers/dma-buf
 TARGETS += drivers/s390x/uvdevice
 TARGETS += drivers/net/bonding
@@ -56,6 +57,7 @@ TARGETS += net/mptcp
 TARGETS += net/openvswitch
 TARGETS += netfilter
 TARGETS += nsfs
+TARGETS += perf_events
 TARGETS += pidfd
 TARGETS += pid_namespace
 TARGETS += powerpc
@@ -88,6 +90,7 @@ endif
 TARGETS += tmpfs
 TARGETS += tpm2
 TARGETS += tty
+TARGETS += uevents
 TARGETS += user
 TARGETS += user_events
 TARGETS += vDSO

From fbf4dec702774286db409815ffb077711a96b824 Mon Sep 17 00:00:00 2001
From: "Nysal Jan K.A" <nysal@linux.ibm.com>
Date: Mon, 14 Aug 2023 13:39:27 +0530
Subject: [PATCH 1228/2047] selftests/futex: Order calls to futex_lock_pi

Observed occassional failures in the futex_wait_timeout test:

ok 1 futex_wait relative succeeds
ok 2 futex_wait_bitset realtime succeeds
ok 3 futex_wait_bitset monotonic succeeds
ok 4 futex_wait_requeue_pi realtime succeeds
ok 5 futex_wait_requeue_pi monotonic succeeds
not ok 6 futex_lock_pi realtime returned 0
......

The test expects the child thread to complete some steps before
the parent thread gets to run. There is an implicit expectation
of the order of invocation of futex_lock_pi between the child thread
and the parent thread. Make this order explicit. If the order is
not met, the futex_lock_pi call in the parent thread succeeds and
will not timeout.

Fixes: f4addd54b161 ("selftests: futex: Expand timeout test")
Signed-off-by: Nysal Jan K.A <nysal@linux.ibm.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 .../selftests/futex/functional/futex_wait_timeout.c        | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index 3651ce17beeb..d183f878360b 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -24,6 +24,7 @@
 
 static long timeout_ns = 100000;	/* 100us default timeout */
 static futex_t futex_pi;
+static pthread_barrier_t barrier;
 
 void usage(char *prog)
 {
@@ -48,6 +49,8 @@ void *get_pi_lock(void *arg)
 	if (ret != 0)
 		error("futex_lock_pi failed\n", ret);
 
+	pthread_barrier_wait(&barrier);
+
 	/* Blocks forever */
 	ret = futex_wait(&lock, 0, NULL, 0);
 	error("futex_wait failed\n", ret);
@@ -130,6 +133,7 @@ int main(int argc, char *argv[])
 	       basename(argv[0]));
 	ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
 
+	pthread_barrier_init(&barrier, NULL, 2);
 	pthread_create(&thread, NULL, get_pi_lock, NULL);
 
 	/* initialize relative timeout */
@@ -163,6 +167,9 @@ int main(int argc, char *argv[])
 	res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0);
 	test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
 
+	/* Wait until the other thread calls futex_lock_pi() */
+	pthread_barrier_wait(&barrier);
+	pthread_barrier_destroy(&barrier);
 	/*
 	 * FUTEX_LOCK_PI with CLOCK_REALTIME
 	 * Due to historical reasons, FUTEX_LOCK_PI supports only realtime

From 7fb10233e0617603402abffc0d8d2cbf5ef55560 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Tue, 15 Aug 2023 16:56:10 +0100
Subject: [PATCH 1229/2047] selftests: cachestat: properly link in librt

Libraries should be listed last on the compiler's command line, so that
the linker can look for and find still unresolved symbols. The librt
library, required for the shm_* functions, was announced using CFLAGS,
which puts the library *before* the source files, and fails compilation
on my system:
======================
gcc -isystem /src/linux-selftests/usr/include -Wall -lrt test_cachestat.c
  -o /src/linux-selftests/kselftest/cachestat/test_cachestat
/usr/bin/ld: /tmp/cceQWO3u.o: in function `test_cachestat_shmem':
test_cachestat.c:(.text+0x890): undefined reference to `shm_open'
/usr/bin/ld: test_cachestat.c:(.text+0x99c): undefined reference to `shm_unlink'
collect2: error: ld returned 1 exit status
make[4]: *** [../lib.mk:181: /src/linux-selftests/kselftest/cachestat/test_cachestat] Error 1
======================

Announce the library using the LDLIBS variable, which ensures the proper
ordering on the command line.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/cachestat/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/cachestat/Makefile b/tools/testing/selftests/cachestat/Makefile
index fca73aaa7d14..778b54ebb036 100644
--- a/tools/testing/selftests/cachestat/Makefile
+++ b/tools/testing/selftests/cachestat/Makefile
@@ -3,6 +3,6 @@ TEST_GEN_PROGS := test_cachestat
 
 CFLAGS += $(KHDR_INCLUDES)
 CFLAGS += -Wall
-CFLAGS += -lrt
+LDLIBS += -lrt
 
 include ../lib.mk

From 9b1db732866bee060b9bca9493e5ebf5e8874c48 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Tue, 15 Aug 2023 16:56:11 +0100
Subject: [PATCH 1230/2047] selftests: cachestat: use proper syscall number
 macro

At the moment the cachestat syscall number is hard coded into the test
source code.
Remove that and replace it with the proper __NR_cachestat macro.
That ensures compatibility should other architectures pick a different
number.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/cachestat/test_cachestat.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index 54d09b820ed4..a5a4ac8dcb76 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -19,7 +19,6 @@ static const char * const dev_files[] = {
 	"/dev/zero", "/dev/null", "/dev/urandom",
 	"/proc/version", "/proc"
 };
-static const int cachestat_nr = 451;
 
 void print_cachestat(struct cachestat *cs)
 {
@@ -126,7 +125,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
 		}
 	}
 
-	syscall_ret = syscall(cachestat_nr, fd, &cs_range, &cs, 0);
+	syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0);
 
 	ksft_print_msg("Cachestat call returned %ld\n", syscall_ret);
 
@@ -152,7 +151,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
 			ksft_print_msg("fsync fails.\n");
 			ret = false;
 		} else {
-			syscall_ret = syscall(cachestat_nr, fd, &cs_range, &cs, 0);
+			syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0);
 
 			ksft_print_msg("Cachestat call (after fsync) returned %ld\n",
 				syscall_ret);
@@ -213,7 +212,7 @@ bool test_cachestat_shmem(void)
 		goto close_fd;
 	}
 
-	syscall_ret = syscall(cachestat_nr, fd, &cs_range, &cs, 0);
+	syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0);
 
 	if (syscall_ret) {
 		ksft_print_msg("Cachestat returned non-zero.\n");

From b71645d6af10196c46cbe3732de2ea7d36b3ff6d Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Sat, 5 Aug 2023 11:38:15 +0800
Subject: [PATCH 1231/2047] tracing: Fix cpu buffers unavailable due to
 'record_disabled' missed

Trace ring buffer can no longer record anything after executing
following commands at the shell prompt:

  # cd /sys/kernel/tracing
  # cat tracing_cpumask
  fff
  # echo 0 > tracing_cpumask
  # echo 1 > snapshot
  # echo fff > tracing_cpumask
  # echo 1 > tracing_on
  # echo "hello world" > trace_marker
  -bash: echo: write error: Bad file descriptor

The root cause is that:
  1. After `echo 0 > tracing_cpumask`, 'record_disabled' of cpu buffers
     in 'tr->array_buffer.buffer' became 1 (see tracing_set_cpumask());
  2. After `echo 1 > snapshot`, 'tr->array_buffer.buffer' is swapped
     with 'tr->max_buffer.buffer', then the 'record_disabled' became 0
     (see update_max_tr());
  3. After `echo fff > tracing_cpumask`, the 'record_disabled' become -1;
Then array_buffer and max_buffer are both unavailable due to value of
'record_disabled' is not 0.

To fix it, enable or disable both array_buffer and max_buffer at the same
time in tracing_set_cpumask().

Link: https://lkml.kernel.org/r/20230805033816.3284594-2-zhengyejian1@huawei.com

Cc: <mhiramat@kernel.org>
Cc: <vnagarnaik@google.com>
Cc: <shuah@kernel.org>
Fixes: 71babb2705e2 ("tracing: change CPU ring buffer state from tracing_cpumask")
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b8870078ef58..b0e8eb6ea8ac 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5277,11 +5277,17 @@ int tracing_set_cpumask(struct trace_array *tr,
 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
+#ifdef CONFIG_TRACER_MAX_TRACE
+			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
+#endif
 		}
 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
+#ifdef CONFIG_TRACER_MAX_TRACE
+			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
+#endif
 		}
 	}
 	arch_spin_unlock(&tr->max_lock);

From 5450be6befc63670f2c05aa6773a8e437b39a3fe Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Sat, 5 Aug 2023 11:38:16 +0800
Subject: [PATCH 1232/2047] selftests/ftrace: Add a basic testcase for snapshot

This testcase is constrived to reproduce a problem that the cpu buffers
become unavailable which is due to 'record_disabled' of array_buffer and
max_buffer being messed up.

Local test result after bugfix:
  # ./ftracetest test.d/00basic/snapshot1.tc
  === Ftrace unit tests ===
  [1] Snapshot and tracing_cpumask        [PASS]
  [2] (instance)  Snapshot and tracing_cpumask    [PASS]

  # of passed:  2
  # of failed:  0
  # of unresolved:  0
  # of untested:  0
  # of unsupported:  0
  # of xfailed:  0
  # of undefined(test bug):  0

Link: https://lkml.kernel.org/r/20230805033816.3284594-3-zhengyejian1@huawei.com

Cc: <mhiramat@kernel.org>
Cc: <vnagarnaik@google.com>
Cc: <shuah@kernel.org>
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 .../ftrace/test.d/00basic/snapshot1.tc        | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc

diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
new file mode 100644
index 000000000000..63b76cf2a360
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Snapshot and tracing_cpumask
+# requires: trace_marker tracing_cpumask snapshot
+# flags: instance
+
+# This testcase is constrived to reproduce a problem that the cpu buffers
+# become unavailable which is due to 'record_disabled' of array_buffer and
+# max_buffer being messed up.
+
+# Store origin cpumask
+ORIG_CPUMASK=`cat tracing_cpumask`
+
+# Stop tracing all cpu
+echo 0 > tracing_cpumask
+
+# Take a snapshot of the main buffer
+echo 1 > snapshot
+
+# Restore origin cpumask, note that there should be some cpus being traced
+echo ${ORIG_CPUMASK} > tracing_cpumask
+
+# Set tracing on
+echo 1 > tracing_on
+
+# Write a log into buffer
+echo "test input 1" > trace_marker
+
+# Ensure the log writed so that cpu buffers are still available
+grep -q "test input 1" trace
+exit 0

From ddeea494a16f32522bce16ee65f191d05d4b8282 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Wed, 16 Aug 2023 17:49:26 +0200
Subject: [PATCH 1233/2047] tracing/synthetic: Use union instead of casts

The current code uses a lot of casts to access the fields member in struct
synth_trace_events with different sizes.  This makes the code hard to
read, and had already introduced an endianness bug. Use a union and struct
instead.

Link: https://lkml.kernel.org/r/20230816154928.4171614-2-svens@linux.ibm.com

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Fixes: 00cf3d672a9dd ("tracing: Allow synthetic events to pass around stacktraces")
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_events.h      | 11 ++++
 kernel/trace/trace.h              |  8 +++
 kernel/trace/trace_events_synth.c | 87 +++++++++++++------------------
 3 files changed, 56 insertions(+), 50 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 3930e676436c..1e8bbdb8da90 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -59,6 +59,17 @@ int trace_raw_output_prep(struct trace_iterator *iter,
 extern __printf(2, 3)
 void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...);
 
+/* Used to find the offset and length of dynamic fields in trace events */
+struct trace_dynamic_info {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	u16	offset;
+	u16	len;
+#else
+	u16	len;
+	u16	offset;
+#endif
+};
+
 /*
  * The trace entry - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e1edc2197fc8..95956f75bea5 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1295,6 +1295,14 @@ static inline void trace_branch_disable(void)
 /* set ring buffers to default size if not already done so */
 int tracing_update_buffers(void);
 
+union trace_synth_field {
+	u8				as_u8;
+	u16				as_u16;
+	u32				as_u32;
+	u64				as_u64;
+	struct trace_dynamic_info	as_dynamic;
+};
+
 struct ftrace_event_field {
 	struct list_head	link;
 	const char		*name;
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index dd398afc8e25..7fff8235075f 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -127,7 +127,7 @@ static bool synth_event_match(const char *system, const char *event,
 
 struct synth_trace_event {
 	struct trace_entry	ent;
-	u64			fields[];
+	union trace_synth_field	fields[];
 };
 
 static int synth_event_define_fields(struct trace_event_call *call)
@@ -321,19 +321,19 @@ static const char *synth_field_fmt(char *type)
 
 static void print_synth_event_num_val(struct trace_seq *s,
 				      char *print_fmt, char *name,
-				      int size, u64 val, char *space)
+				      int size, union trace_synth_field *val, char *space)
 {
 	switch (size) {
 	case 1:
-		trace_seq_printf(s, print_fmt, name, (u8)val, space);
+		trace_seq_printf(s, print_fmt, name, val->as_u8, space);
 		break;
 
 	case 2:
-		trace_seq_printf(s, print_fmt, name, (u16)val, space);
+		trace_seq_printf(s, print_fmt, name, val->as_u16, space);
 		break;
 
 	case 4:
-		trace_seq_printf(s, print_fmt, name, (u32)val, space);
+		trace_seq_printf(s, print_fmt, name, val->as_u32, space);
 		break;
 
 	default:
@@ -374,36 +374,26 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
 		/* parameter values */
 		if (se->fields[i]->is_string) {
 			if (se->fields[i]->is_dynamic) {
-				u32 offset, data_offset;
-				char *str_field;
-
-				offset = (u32)entry->fields[n_u64];
-				data_offset = offset & 0xffff;
-
-				str_field = (char *)entry + data_offset;
+				union trace_synth_field *data = &entry->fields[n_u64];
 
 				trace_seq_printf(s, print_fmt, se->fields[i]->name,
 						 STR_VAR_LEN_MAX,
-						 str_field,
+						 (char *)entry + data->as_dynamic.offset,
 						 i == se->n_fields - 1 ? "" : " ");
 				n_u64++;
 			} else {
 				trace_seq_printf(s, print_fmt, se->fields[i]->name,
 						 STR_VAR_LEN_MAX,
-						 (char *)&entry->fields[n_u64],
+						 (char *)&entry->fields[n_u64].as_u64,
 						 i == se->n_fields - 1 ? "" : " ");
 				n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
 			}
 		} else if (se->fields[i]->is_stack) {
-			u32 offset, data_offset, len;
 			unsigned long *p, *end;
+			union trace_synth_field *data = &entry->fields[n_u64];
 
-			offset = (u32)entry->fields[n_u64];
-			data_offset = offset & 0xffff;
-			len = offset >> 16;
-
-			p = (void *)entry + data_offset;
-			end = (void *)p + len - (sizeof(long) - 1);
+			p = (void *)entry + data->as_dynamic.offset;
+			end = (void *)p + data->as_dynamic.len - (sizeof(long) - 1);
 
 			trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name);
 
@@ -419,13 +409,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
 			print_synth_event_num_val(s, print_fmt,
 						  se->fields[i]->name,
 						  se->fields[i]->size,
-						  entry->fields[n_u64],
+						  &entry->fields[n_u64],
 						  space);
 
 			if (strcmp(se->fields[i]->type, "gfp_t") == 0) {
 				trace_seq_puts(s, " (");
 				trace_print_flags_seq(s, "|",
-						      entry->fields[n_u64],
+						      entry->fields[n_u64].as_u64,
 						      __flags);
 				trace_seq_putc(s, ')');
 			}
@@ -454,21 +444,16 @@ static unsigned int trace_string(struct synth_trace_event *entry,
 	int ret;
 
 	if (is_dynamic) {
-		u32 data_offset;
+		union trace_synth_field *data = &entry->fields[*n_u64];
 
-		data_offset = struct_size(entry, fields, event->n_u64);
-		data_offset += data_size;
-
-		len = fetch_store_strlen((unsigned long)str_val);
-
-		data_offset |= len << 16;
-		*(u32 *)&entry->fields[*n_u64] = data_offset;
+		data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size;
+		data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val);
 
 		ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry);
 
 		(*n_u64)++;
 	} else {
-		str_field = (char *)&entry->fields[*n_u64];
+		str_field = (char *)&entry->fields[*n_u64].as_u64;
 
 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
 		if ((unsigned long)str_val < TASK_SIZE)
@@ -492,6 +477,7 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
 				 unsigned int data_size,
 				 unsigned int *n_u64)
 {
+	union trace_synth_field *data = &entry->fields[*n_u64];
 	unsigned int len;
 	u32 data_offset;
 	void *data_loc;
@@ -515,8 +501,9 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
 	memcpy(data_loc, stack, len);
 
 	/* Fill in the field that holds the offset/len combo */
-	data_offset |= len << 16;
-	*(u32 *)&entry->fields[*n_u64] = data_offset;
+
+	data->as_dynamic.offset = data_offset;
+	data->as_dynamic.len = len;
 
 	(*n_u64)++;
 
@@ -592,19 +579,19 @@ static notrace void trace_event_raw_event_synth(void *__data,
 
 			switch (field->size) {
 			case 1:
-				*(u8 *)&entry->fields[n_u64] = (u8)val;
+				entry->fields[n_u64].as_u8 = (u8)val;
 				break;
 
 			case 2:
-				*(u16 *)&entry->fields[n_u64] = (u16)val;
+				entry->fields[n_u64].as_u16 = (u16)val;
 				break;
 
 			case 4:
-				*(u32 *)&entry->fields[n_u64] = (u32)val;
+				entry->fields[n_u64].as_u32 = (u32)val;
 				break;
 
 			default:
-				entry->fields[n_u64] = val;
+				entry->fields[n_u64].as_u64 = val;
 				break;
 			}
 			n_u64++;
@@ -1791,19 +1778,19 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
 
 			switch (field->size) {
 			case 1:
-				*(u8 *)&state.entry->fields[n_u64] = (u8)val;
+				state.entry->fields[n_u64].as_u8 = (u8)val;
 				break;
 
 			case 2:
-				*(u16 *)&state.entry->fields[n_u64] = (u16)val;
+				state.entry->fields[n_u64].as_u16 = (u16)val;
 				break;
 
 			case 4:
-				*(u32 *)&state.entry->fields[n_u64] = (u32)val;
+				state.entry->fields[n_u64].as_u32 = (u32)val;
 				break;
 
 			default:
-				state.entry->fields[n_u64] = val;
+				state.entry->fields[n_u64].as_u64 = val;
 				break;
 			}
 			n_u64++;
@@ -1884,19 +1871,19 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
 
 			switch (field->size) {
 			case 1:
-				*(u8 *)&state.entry->fields[n_u64] = (u8)val;
+				state.entry->fields[n_u64].as_u8 = (u8)val;
 				break;
 
 			case 2:
-				*(u16 *)&state.entry->fields[n_u64] = (u16)val;
+				state.entry->fields[n_u64].as_u16 = (u16)val;
 				break;
 
 			case 4:
-				*(u32 *)&state.entry->fields[n_u64] = (u32)val;
+				state.entry->fields[n_u64].as_u32 = (u32)val;
 				break;
 
 			default:
-				state.entry->fields[n_u64] = val;
+				state.entry->fields[n_u64].as_u64 = val;
 				break;
 			}
 			n_u64++;
@@ -2031,19 +2018,19 @@ static int __synth_event_add_val(const char *field_name, u64 val,
 	} else {
 		switch (field->size) {
 		case 1:
-			*(u8 *)&trace_state->entry->fields[field->offset] = (u8)val;
+			trace_state->entry->fields[field->offset].as_u8 = (u8)val;
 			break;
 
 		case 2:
-			*(u16 *)&trace_state->entry->fields[field->offset] = (u16)val;
+			trace_state->entry->fields[field->offset].as_u16 = (u16)val;
 			break;
 
 		case 4:
-			*(u32 *)&trace_state->entry->fields[field->offset] = (u32)val;
+			trace_state->entry->fields[field->offset].as_u32 = (u32)val;
 			break;
 
 		default:
-			trace_state->entry->fields[field->offset] = val;
+			trace_state->entry->fields[field->offset].as_u64 = val;
 			break;
 		}
 	}

From 887f92e09ef34a949745ad26ce82be69e2dabcf6 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Wed, 16 Aug 2023 17:49:27 +0200
Subject: [PATCH 1234/2047] tracing/synthetic: Skip first entry for stack
 traces

While debugging another issue I noticed that the stack trace output
contains the number of entries on top:

         <idle>-0       [000] d..4.   203.322502: wake_lat: pid=0 delta=2268270616 stack=STACK:
=> 0x10
=> __schedule+0xac6/0x1a98
=> schedule+0x126/0x2c0
=> schedule_timeout+0x242/0x2c0
=> __wait_for_common+0x434/0x680
=> __wait_rcu_gp+0x198/0x3e0
=> synchronize_rcu+0x112/0x138
=> ring_buffer_reset_online_cpus+0x140/0x2e0
=> tracing_reset_online_cpus+0x15c/0x1d0
=> tracing_set_clock+0x180/0x1d8
=> hist_register_trigger+0x486/0x670
=> event_hist_trigger_parse+0x494/0x1318
=> trigger_process_regex+0x1d4/0x258
=> event_trigger_write+0xb4/0x170
=> vfs_write+0x210/0xad0
=> ksys_write+0x122/0x208

Fix this by skipping the first element. Also replace the pointer
logic with an index variable which is easier to read.

Link: https://lkml.kernel.org/r/20230816154928.4171614-3-svens@linux.ibm.com

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Fixes: 00cf3d672a9d ("tracing: Allow synthetic events to pass around stacktraces")
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 7fff8235075f..80a2a832f857 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -350,7 +350,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
 	struct trace_seq *s = &iter->seq;
 	struct synth_trace_event *entry;
 	struct synth_event *se;
-	unsigned int i, n_u64;
+	unsigned int i, j, n_u64;
 	char print_fmt[32];
 	const char *fmt;
 
@@ -389,18 +389,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
 				n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
 			}
 		} else if (se->fields[i]->is_stack) {
-			unsigned long *p, *end;
 			union trace_synth_field *data = &entry->fields[n_u64];
-
-			p = (void *)entry + data->as_dynamic.offset;
-			end = (void *)p + data->as_dynamic.len - (sizeof(long) - 1);
+			unsigned long *p = (void *)entry + data->as_dynamic.offset;
 
 			trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name);
-
-			for (; *p && p < end; p++)
-				trace_seq_printf(s, "=> %pS\n", (void *)*p);
+			for (j = 1; j < data->as_dynamic.len / sizeof(long); j++)
+				trace_seq_printf(s, "=> %pS\n", (void *)p[j]);
 			n_u64++;
-
 		} else {
 			struct trace_print_flags __flags[] = {
 			    __def_gfpflag_names, {-1, NULL} };
@@ -490,10 +485,6 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
 			break;
 	}
 
-	/* Include the zero'd element if it fits */
-	if (len < HIST_STACKTRACE_DEPTH)
-		len++;
-
 	len *= sizeof(long);
 
 	/* Find the dynamic section to copy the stack into. */

From c4d6b5438116c184027b2e911c0f2c7c406fb47c Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Wed, 16 Aug 2023 17:49:28 +0200
Subject: [PATCH 1235/2047] tracing/synthetic: Allocate one additional element
 for size

While debugging another issue I noticed that the stack trace contains one
invalid entry at the end:

<idle>-0       [008] d..4.    26.484201: wake_lat: pid=0 delta=2629976084 000000009cc24024 stack=STACK:
=> __schedule+0xac6/0x1a98
=> schedule+0x126/0x2c0
=> schedule_timeout+0x150/0x2c0
=> kcompactd+0x9ca/0xc20
=> kthread+0x2f6/0x3d8
=> __ret_from_fork+0x8a/0xe8
=> 0x6b6b6b6b6b6b6b6b

This is because the code failed to add the one element containing the
number of entries to field_size.

Link: https://lkml.kernel.org/r/20230816154928.4171614-4-svens@linux.ibm.com

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Fixes: 00cf3d672a9d ("tracing: Allow synthetic events to pass around stacktraces")
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 80a2a832f857..9897d0bfcab7 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -528,7 +528,8 @@ static notrace void trace_event_raw_event_synth(void *__data,
 		str_val = (char *)(long)var_ref_vals[val_idx];
 
 		if (event->dynamic_fields[i]->is_stack) {
-			len = *((unsigned long *)str_val);
+			/* reserve one extra element for size */
+			len = *((unsigned long *)str_val) + 1;
 			len *= sizeof(unsigned long);
 		} else {
 			len = fetch_store_strlen((unsigned long)str_val);

From 30bed99e0c6335b711119b9fda806da7b4031dfb Mon Sep 17 00:00:00 2001
From: Justin Stitt <justinstitt@google.com>
Date: Mon, 7 Aug 2023 18:22:30 +0000
Subject: [PATCH 1236/2047] um: vector: refactor deprecated strncpy

`strncpy` is deprecated for use on NUL-terminated destination strings [1].

A suitable replacement is `strscpy` [2] due to the fact that it
guarantees NUL-termination on its destination buffer argument which is
_not_ the case for `strncpy`!

In this case, we are able to drop the now superfluous `... - 1`
instances because `strscpy` will automatically truncate the last byte by
setting it to a NUL byte if the source size exceeds the destination size
or if the source string is not NUL-terminated.

I've also opted to remove the seemingly useless char* casts. I'm not
sure why they're present at all since (after expanding the `ifr_name`
macro) `ifr.ifr_ifrn.ifrn_name` is a char* already.

All in all, `strscpy` is a more robust and less ambiguous interface
while also letting us remove some `... -1`'s which cleans things up a
bit.

[1]: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings
[2]: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html

Link: https://github.com/KSPP/linux/issues/90
Cc: linux-hardening@vger.kernel.org
Signed-off-by: Justin Stitt <justinstitt@google.com>
Acked-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Link: https://lore.kernel.org/r/20230807-arch-um-drivers-v1-1-10d602c5577a@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/um/drivers/vector_user.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
index c650e428432b..c719e1ec4645 100644
--- a/arch/um/drivers/vector_user.c
+++ b/arch/um/drivers/vector_user.c
@@ -141,7 +141,7 @@ static int create_tap_fd(char *iface)
 	}
 	memset(&ifr, 0, sizeof(ifr));
 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
-	strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+	strscpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
 
 	err = ioctl(fd, TUNSETIFF, (void *) &ifr);
 	if (err != 0) {
@@ -171,7 +171,7 @@ static int create_raw_fd(char *iface, int flags, int proto)
 		goto raw_fd_cleanup;
 	}
 	memset(&ifr, 0, sizeof(ifr));
-	strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+	strscpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
 	if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
 		err = -errno;
 		goto raw_fd_cleanup;

From be8dffa04de3894bad0bf31d1531a2f06353b70e Mon Sep 17 00:00:00 2001
From: Justin Stitt <justinstitt@google.com>
Date: Wed, 9 Aug 2023 18:19:32 +0000
Subject: [PATCH 1237/2047] um: refactor deprecated strncpy to memcpy

Use `memcpy` since `console_buf` is not expected to be NUL-terminated
and it more accurately describes what is happening with the buffers
`console_buf` and `string` as per Kees' analysis [1].

Also mark char buffer as `__nonstring` as per Kees' suggestion [2].

This change now makes it more clear what this code does and that
`console_buf` is not expected to be NUL-terminated.

Link: https://lore.kernel.org/all/202308081708.D5ADC80F@keescook/ [1]
Link: https://github.com/KSPP/linux/issues/90 [2]
Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings
Cc: linux-hardening@vger.kernel.org
Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Justin Stitt <justinstitt@google.com>
Link: https://lore.kernel.org/r/20230809-arch-um-v3-1-f63e1122d77e@google.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/um/drivers/mconsole_kern.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 5026e7b9adfe..ff4bda95b9c7 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -554,7 +554,7 @@ struct mconsole_output {
 
 static DEFINE_SPINLOCK(client_lock);
 static LIST_HEAD(clients);
-static char console_buf[MCONSOLE_MAX_DATA];
+static char console_buf[MCONSOLE_MAX_DATA] __nonstring;
 
 static void console_write(struct console *console, const char *string,
 			  unsigned int len)
@@ -567,7 +567,7 @@ static void console_write(struct console *console, const char *string,
 
 	while (len > 0) {
 		n = min((size_t) len, ARRAY_SIZE(console_buf));
-		strncpy(console_buf, string, n);
+		memcpy(console_buf, string, n);
 		string += n;
 		len -= n;
 

From cf007647475b5090819c5fe8da771073145c7334 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 10 Aug 2023 12:54:18 -0700
Subject: [PATCH 1238/2047] ARM: ptrace: Restore syscall restart tracing

Since commit 4e57a4ddf6b0 ("ARM: 9107/1: syscall: always store
thread_info->abi_syscall"), the seccomp selftests "syscall_restart" has
been broken. This was caused by the restart syscall not being stored to
"abi_syscall" during restart setup before branching to the "local_restart"
label. Tracers would see the wrong syscall, and scno would get overwritten
while returning from the TIF_WORK path. Add the missing store.

Cc: Russell King <linux@armlinux.org.uk>
Cc: Arnd Bergmann <arnd@kernel.org>
Cc: Lecopzer Chen <lecopzer.chen@mediatek.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Fixes: 4e57a4ddf6b0 ("ARM: 9107/1: syscall: always store thread_info->abi_syscall")
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20230810195422.2304827-1-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/arm/kernel/entry-common.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index bcc4c9ec3aa4..5c31e9de7a60 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -90,6 +90,7 @@ slow_work_pending:
 	cmp	r0, #0
 	beq	no_work_pending
 	movlt	scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
+	str	scno, [tsk, #TI_ABI_SYSCALL]	@ make sure tracers see update
 	ldmia	sp, {r0 - r6}			@ have to reload r0 - r6
 	b	local_restart			@ ... and off we go
 ENDPROC(ret_fast_syscall)

From 4697b5848bd933f68ebd04836362c8de0cacaf71 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 10 Aug 2023 12:54:19 -0700
Subject: [PATCH 1239/2047] ARM: ptrace: Restore syscall skipping for tracers

Since commit 4e57a4ddf6b0 ("ARM: 9107/1: syscall: always store
thread_info->abi_syscall"), the seccomp selftests "syscall_errno"
and "syscall_faked" have been broken. Both seccomp and PTRACE depend
on using the special value of "-1" for skipping syscalls. This value
wasn't working because it was getting masked by __NR_SYSCALL_MASK in
both PTRACE_SET_SYSCALL and get_syscall_nr().

Explicitly test for -1 in PTRACE_SET_SYSCALL and get_syscall_nr(),
leaving it exposed when present, allowing tracers to skip syscalls
again.

Cc: Russell King <linux@armlinux.org.uk>
Cc: Arnd Bergmann <arnd@kernel.org>
Cc: Lecopzer Chen <lecopzer.chen@mediatek.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: linux-arm-kernel@lists.infradead.org
Fixes: 4e57a4ddf6b0 ("ARM: 9107/1: syscall: always store thread_info->abi_syscall")
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20230810195422.2304827-2-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/arm/include/asm/syscall.h | 3 +++
 arch/arm/kernel/ptrace.c       | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index dfeed440254a..fe4326d938c1 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -25,6 +25,9 @@ static inline int syscall_get_nr(struct task_struct *task,
 	if (IS_ENABLED(CONFIG_AEABI) && !IS_ENABLED(CONFIG_OABI_COMPAT))
 		return task_thread_info(task)->abi_syscall;
 
+	if (task_thread_info(task)->abi_syscall == -1)
+		return -1;
+
 	return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK;
 }
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 2d8e2516906b..fef32d73f912 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -783,8 +783,9 @@ long arch_ptrace(struct task_struct *child, long request,
 			break;
 
 		case PTRACE_SET_SYSCALL:
-			task_thread_info(child)->abi_syscall = data &
-							__NR_SYSCALL_MASK;
+			if (data != -1)
+				data &= __NR_SYSCALL_MASK;
+			task_thread_info(child)->abi_syscall = data;
 			ret = 0;
 			break;
 

From ed2b9e1b6d82c729b2757b449097f28f108b023b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 16 Jun 2023 16:07:24 -0700
Subject: [PATCH 1240/2047] srcu,notifier: Remove #ifdefs in favor of SRCU Tiny
 srcu_usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit removes two #ifdef directives from include/linux/notifier.h
by causing SRCU Tiny to provide a dummy srcu_usage structure and a dummy
__SRCU_USAGE_INIT() macro.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: "Michał Mirosław" <mirq-linux@rere.qmqm.pl>
Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Cc: Sachin Sant <sachinp@linux.ibm.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/notifier.h | 11 -----------
 include/linux/srcutiny.h |  4 ++++
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 86544707236a..45702bdcbceb 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -73,9 +73,7 @@ struct raw_notifier_head {
 
 struct srcu_notifier_head {
 	struct mutex mutex;
-#ifdef CONFIG_TREE_SRCU
 	struct srcu_usage srcuu;
-#endif
 	struct srcu_struct srcu;
 	struct notifier_block __rcu *head;
 };
@@ -106,7 +104,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
 #define RAW_NOTIFIER_INIT(name)	{				\
 		.head = NULL }
 
-#ifdef CONFIG_TREE_SRCU
 #define SRCU_NOTIFIER_INIT(name, pcpu)				\
 	{							\
 		.mutex = __MUTEX_INITIALIZER(name.mutex),	\
@@ -114,14 +111,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
 		.srcuu = __SRCU_USAGE_INIT(name.srcuu),		\
 		.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
 	}
-#else
-#define SRCU_NOTIFIER_INIT(name, pcpu)				\
-	{							\
-		.mutex = __MUTEX_INITIALIZER(name.mutex),	\
-		.head = NULL,					\
-		.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
-	}
-#endif
 
 #define ATOMIC_NOTIFIER_HEAD(name)				\
 	struct atomic_notifier_head name =			\
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index ebd72491af99..447133171d95 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -48,6 +48,10 @@ void srcu_drive_gp(struct work_struct *wp);
 #define DEFINE_STATIC_SRCU(name) \
 	static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name, name)
 
+// Dummy structure for srcu_notifier_head.
+struct srcu_usage { };
+#define __SRCU_USAGE_INIT(name) { }
+
 void synchronize_srcu(struct srcu_struct *ssp);
 
 /*

From 343640cb5b4ed349b3656ee4b100b36e2ae7e2da Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 26 Jun 2023 18:37:05 -0700
Subject: [PATCH 1241/2047] rcu: Mark __rcu_irq_enter_check_tick()
 ->rcu_urgent_qs load

The rcu_request_urgent_qs_task() function does a cross-CPU store
to ->rcu_urgent_qs, so this commit therefore marks the load in
__rcu_irq_enter_check_tick() with READ_ONCE().

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/rcu/tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f788981272f8..cb1caefa8bd0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -632,7 +632,7 @@ void __rcu_irq_enter_check_tick(void)
 	// prevents self-deadlock.  So we can safely recheck under the lock.
 	// Note that the nohz_full state currently cannot change.
 	raw_spin_lock_rcu_node(rdp->mynode);
-	if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
+	if (READ_ONCE(rdp->rcu_urgent_qs) && !rdp->rcu_forced_tick) {
 		// A nohz_full CPU is in the kernel and RCU needs a
 		// quiescent state.  Turn on the tick!
 		WRITE_ONCE(rdp->rcu_forced_tick, true);

From 3292ba0229dbe5f3e79b78b230354ada2888dc29 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 5 Jul 2023 16:20:58 -0700
Subject: [PATCH 1242/2047] rcu: Make the rcu_nocb_poll boot parameter usable
 via boot config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The rcu_nocb_poll kernel boot parameter is defined via early_param(),
whose parsing functions are invoked from parse_early_param() which
is in turn invoked by setup_arch(), which is very early indeed.  It
is invoked so early that the console output timestamps read 0.000000,
in other words, before time begins.

This use of early_param() means that the rcu_nocb_poll kernel boot
parameter cannot usefully be embedded into the kernel image.  Yes, you
can embed it, but setup_boot_config() is invoked from start_kernel()
too late for it to be parsed.

But it makes no sense to parse this parameter so early.  After all,
it cannot do anything until the rcuog kthreads are created, which is
long after rcu_init() time, let alone setup_boot_config() time.

This commit therefore switches the rcu_nocb_poll kernel boot parameter
from early_param() to __setup(), which allows boot-config parsing of
this parameter, in turn allowing it to be embedded into the kernel image.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/rcu/tree_nocb.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 43229d2b0c44..5598212d1f27 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -77,9 +77,9 @@ __setup("rcu_nocbs", rcu_nocb_setup);
 static int __init parse_rcu_nocb_poll(char *arg)
 {
 	rcu_nocb_poll = true;
-	return 0;
+	return 1;
 }
-early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
+__setup("rcu_nocb_poll", parse_rcu_nocb_poll);
 
 /*
  * Don't bother bypassing ->cblist if the call_rcu() rate is low.

From efd04f8a8b45b8b98704b5860e363bab239b8bae Mon Sep 17 00:00:00 2001
From: Alan Huang <mmpgouride@gmail.com>
Date: Thu, 6 Jul 2023 10:28:48 +0000
Subject: [PATCH 1243/2047] rcu: Use WRITE_ONCE() for assignments to ->next for
 rculist_nulls

When the objects managed by rculist_nulls are allocated with
SLAB_TYPESAFE_BY_RCU, old readers may still hold references to an object
even though it is just now being added, which means the modification of
->next is visible to readers.  This patch therefore uses WRITE_ONCE()
for assignments to ->next.

Signed-off-by: Alan Huang <mmpgouride@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 include/linux/rculist_nulls.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index ba4c00dd8005..89186c499dd4 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -101,7 +101,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
 {
 	struct hlist_nulls_node *first = h->first;
 
-	n->next = first;
+	WRITE_ONCE(n->next, first);
 	WRITE_ONCE(n->pprev, &h->first);
 	rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
 	if (!is_a_nulls(first))
@@ -137,7 +137,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
 		last = i;
 
 	if (last) {
-		n->next = last->next;
+		WRITE_ONCE(n->next, last->next);
 		n->pprev = &last->next;
 		rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
 	} else {

From ba6ea77d0e75113b52fdae6e01473476cb48a83c Mon Sep 17 00:00:00 2001
From: Liao Chang <liaochang1@huawei.com>
Date: Tue, 15 Aug 2023 09:40:02 +0800
Subject: [PATCH 1244/2047] cpufreq: Prefer to print cpuid in MIN/MAX QoS
 register error message

When a cpufreq_policy is allocated, the cpus, related_cpus and real_cpus
of policy are still unset. Therefore, it is preferable to print the
passed 'cpu' parameter instead of a empty 'cpus' cpumask in error
message when registering MIN/MAX QoS notifier fails.

Signed-off-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 50bbc969ffe5..a757f90aa9d6 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1234,16 +1234,16 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
 	ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MIN,
 				    &policy->nb_min);
 	if (ret) {
-		dev_err(dev, "Failed to register MIN QoS notifier: %d (%*pbl)\n",
-			ret, cpumask_pr_args(policy->cpus));
+		dev_err(dev, "Failed to register MIN QoS notifier: %d (CPU%u)\n",
+			ret, cpu);
 		goto err_kobj_remove;
 	}
 
 	ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MAX,
 				    &policy->nb_max);
 	if (ret) {
-		dev_err(dev, "Failed to register MAX QoS notifier: %d (%*pbl)\n",
-			ret, cpumask_pr_args(policy->cpus));
+		dev_err(dev, "Failed to register MAX QoS notifier: %d (CPU%u)\n",
+			ret, cpu);
 		goto err_min_qos_notifier;
 	}
 

From 6a4fec4f6d30a325a1b27be70729145484e6fe9f Mon Sep 17 00:00:00 2001
From: Liao Chang <liaochang1@huawei.com>
Date: Tue, 15 Aug 2023 09:40:17 +0800
Subject: [PATCH 1245/2047] cpufreq: cppc: cppc_cpufreq_get_rate() returns zero
 in all error cases.

The cpufreq framework used to use the zero of return value to reflect
the cppc_cpufreq_get_rate() had failed to get current frequecy and treat
all positive integer to be succeed. Since cppc_get_perf_ctrs() returns a
negative integer in error case, so it is better to convert the value to
zero as the return value of cppc_cpufreq_get_rate().

Signed-off-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cppc_cpufreq.c | 4 ++--
 include/linux/cpufreq.h        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 022e3555407c..05642759faee 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -849,13 +849,13 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
 
 	ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
 	if (ret)
-		return ret;
+		return 0;
 
 	udelay(2); /* 2usec delay between sampling */
 
 	ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
 	if (ret)
-		return ret;
+		return 0;
 
 	delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
 					       &fb_ctrs_t1);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 172ff51c1b2a..99da27466b8f 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -370,7 +370,7 @@ struct cpufreq_driver {
 	int		(*target_intermediate)(struct cpufreq_policy *policy,
 					       unsigned int index);
 
-	/* should be defined, if possible */
+	/* should be defined, if possible, return 0 on error */
 	unsigned int	(*get)(unsigned int cpu);
 
 	/* Called to update policy limits on firmware notifications. */

From e613d8cff54736d4fa73730ccaa0ecbe39140782 Mon Sep 17 00:00:00 2001
From: Liao Chang <liaochang1@huawei.com>
Date: Thu, 17 Aug 2023 07:47:56 +0000
Subject: [PATCH 1246/2047] cpufreq: cppc: Set fie_disabled to FIE_DISABLED if
 fails to create kworker_fie

The function cppc_freq_invariance_init() may failed to create
kworker_fie, make it more robust by setting fie_disabled to FIE_DISBALED
to prevent an invalid pointer dereference in kthread_destroy_worker(),
which called from cppc_freq_invariance_exit().

Signed-off-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cppc_cpufreq.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 05642759faee..fe08ca419b3d 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -249,15 +249,19 @@ static void __init cppc_freq_invariance_init(void)
 		return;
 
 	kworker_fie = kthread_create_worker(0, "cppc_fie");
-	if (IS_ERR(kworker_fie))
+	if (IS_ERR(kworker_fie)) {
+		pr_warn("%s: failed to create kworker_fie: %ld\n", __func__,
+			PTR_ERR(kworker_fie));
+		fie_disabled = FIE_DISABLED;
 		return;
+	}
 
 	ret = sched_setattr_nocheck(kworker_fie->task, &attr);
 	if (ret) {
 		pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__,
 			ret);
 		kthread_destroy_worker(kworker_fie);
-		return;
+		fie_disabled = FIE_DISABLED;
 	}
 }
 
@@ -267,7 +271,6 @@ static void cppc_freq_invariance_exit(void)
 		return;
 
 	kthread_destroy_worker(kworker_fie);
-	kworker_fie = NULL;
 }
 
 #else

From 9a99a996d1ec53819580eba93d60b38d72befe3d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 17 Aug 2023 11:23:32 +0200
Subject: [PATCH 1247/2047] thermal: core: Introduce thermal_zone_device_exec()

Introduce a new helper function, thermal_zone_device_exec(), that can
be used by drivers to run a given callback routine under the zone lock.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_core.c | 19 +++++++++++++++++++
 include/linux/thermal.h        |  4 ++++
 2 files changed, 23 insertions(+)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index b38abe8a080a..6b273e11ab2f 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -497,6 +497,25 @@ void thermal_zone_device_update(struct thermal_zone_device *tz,
 }
 EXPORT_SYMBOL_GPL(thermal_zone_device_update);
 
+/**
+ * thermal_zone_device_exec - Run a callback under the zone lock.
+ * @tz: Thermal zone.
+ * @cb: Callback to run.
+ * @data: Data to pass to the callback.
+ */
+void thermal_zone_device_exec(struct thermal_zone_device *tz,
+			      void (*cb)(struct thermal_zone_device *,
+					 unsigned long),
+			      unsigned long data)
+{
+	mutex_lock(&tz->lock);
+
+	cb(tz, data);
+
+	mutex_unlock(&tz->lock);
+}
+EXPORT_SYMBOL_GPL(thermal_zone_device_exec);
+
 static void thermal_zone_device_check(struct work_struct *work)
 {
 	struct thermal_zone_device *tz = container_of(work, struct
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 87837094d549..4d40bfa14cdd 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -323,6 +323,10 @@ int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
 				       struct thermal_cooling_device *);
 void thermal_zone_device_update(struct thermal_zone_device *,
 				enum thermal_notify_event);
+void thermal_zone_device_exec(struct thermal_zone_device *tz,
+			      void (*cb)(struct thermal_zone_device *,
+					 unsigned long),
+			      unsigned long data);
 
 struct thermal_cooling_device *thermal_cooling_device_register(const char *,
 		void *, const struct thermal_cooling_device_ops *);

From cba440fab30106c9ebd30dbbb64d4fb7b997a8e8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 7 Aug 2023 20:04:47 +0200
Subject: [PATCH 1248/2047] thermal: core: Add priv pointer to struct
 thermal_trip

Add a new field called priv to struct thermal_trip to allow thermal
drivers to store pointers to their local data associated with trip
points.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/linux/thermal.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 4d40bfa14cdd..f1b43d8a7bd3 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -81,11 +81,13 @@ struct thermal_zone_device_ops {
  * @temperature: temperature value in miliCelsius
  * @hysteresis: relative hysteresis in miliCelsius
  * @type: trip point type
+ * @priv: pointer to driver data associated with this trip
  */
 struct thermal_trip {
 	int temperature;
 	int hysteresis;
 	enum thermal_trip_type type;
+	void *priv;
 };
 
 struct thermal_cooling_device_ops {

From bf07b4a3ba3446ba4bbe1ddac235162dbda2ce57 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 7 Aug 2023 20:06:13 +0200
Subject: [PATCH 1249/2047] ACPI: thermal: Clean up
 acpi_thermal_register_thermal_zone()

Rename the trips variable in acpi_thermal_register_thermal_zone() to
trip_count so its name better reflects the purpose, rearrange white
space in the loop over active trips for clarity and reduce code
duplication related to calling thermal_zone_device_register() by
using an extra local variable to store the passive delay value.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/thermal.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 45a32b0148d9..4db33da40144 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -740,33 +740,29 @@ static void acpi_thermal_zone_sysfs_remove(struct acpi_thermal *tz)
 
 static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz)
 {
-	int trips = 0;
+	int passive_delay = 0;
+	int trip_count = 0;
 	int result;
 	int i;
 
 	if (tz->trips.critical.valid)
-		trips++;
+		trip_count++;
 
 	if (tz->trips.hot.valid)
-		trips++;
+		trip_count++;
 
-	if (tz->trips.passive.valid)
-		trips++;
+	if (tz->trips.passive.valid) {
+		trip_count++;
+		passive_delay = tz->trips.passive.tsp * 100;
+	}
 
-	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].valid;
-	     i++, trips++);
-
-	if (tz->trips.passive.valid)
-		tz->thermal_zone = thermal_zone_device_register("acpitz", trips, 0, tz,
-								&acpi_thermal_zone_ops, NULL,
-								tz->trips.passive.tsp * 100,
-								tz->polling_frequency * 100);
-	else
-		tz->thermal_zone =
-			thermal_zone_device_register("acpitz", trips, 0, tz,
-						     &acpi_thermal_zone_ops, NULL,
-						     0, tz->polling_frequency * 100);
+	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].valid; i++)
+		trip_count++;
 
+	tz->thermal_zone = thermal_zone_device_register("acpitz", trip_count, 0,
+							tz, &acpi_thermal_zone_ops,
+							NULL, passive_delay,
+							tz->polling_frequency * 100);
 	if (IS_ERR(tz->thermal_zone))
 		return -ENODEV;
 

From 68f4f0378c0cee5c6b6308c1fd6eb7677f354cad Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 17 Aug 2023 11:24:27 +0200
Subject: [PATCH 1250/2047] ACPI: thermal: Carry out trip point updates under
 zone lock

There is a race condition between acpi_thermal_trips_update() and
acpi_thermal_check_fn(), because the trip points may get updated while
the latter is running which in theory may lead to inconsistent results.
For example, if two trips are updated together, using the temperature
value of one of them from before the update and the temperature value
of the other one from after the update may not lead to the expected
outcome.

Moreover, if thermal_get_trend() runs when a trip points update is in
progress, it may end up using stale trip point temperatures.

To address this, make acpi_thermal_trips_update() call
thermal_zone_device_exec() to carry out the trip points update and
use a new  acpi_thermal_adjust_thermal_zone() wrapper around
__acpi_thermal_trips_update() as the callback function for the latter.

While at it, change the acpi_thermal_trips_update() return data type
to void as that function always returns 0 anyway.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/thermal.c | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 4db33da40144..582984c0e8ea 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -185,7 +185,7 @@ static int acpi_thermal_get_polling_frequency(struct acpi_thermal *tz)
 	return 0;
 }
 
-static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
+static void __acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 {
 	acpi_status status;
 	unsigned long long tmp;
@@ -393,17 +393,40 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 			ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "device");
 		}
 	}
+}
 
-	return 0;
+static void acpi_thermal_adjust_thermal_zone(struct thermal_zone_device *thermal,
+					     unsigned long data)
+{
+	__acpi_thermal_trips_update(thermal_zone_device_priv(thermal), data);
+}
+
+static void acpi_queue_thermal_check(struct acpi_thermal *tz)
+{
+	if (!work_pending(&tz->thermal_check_work))
+		queue_work(acpi_thermal_pm_queue, &tz->thermal_check_work);
+}
+
+static void acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
+{
+	/*
+	 * Use thermal_zone_device_exec() to carry out the trip points
+	 * update, so as to protect thermal_get_trend() from getting stale
+	 * trip point temperatures and to prevent thermal_zone_device_update()
+	 * invoked from acpi_thermal_check_fn() from producing inconsistent
+	 * results.
+	 */
+	thermal_zone_device_exec(tz->thermal_zone,
+				 acpi_thermal_adjust_thermal_zone, flag);
+	acpi_queue_thermal_check(tz);
 }
 
 static int acpi_thermal_get_trip_points(struct acpi_thermal *tz)
 {
-	int i, ret = acpi_thermal_trips_update(tz, ACPI_TRIPS_INIT);
 	bool valid;
+	int i;
 
-	if (ret)
-		return ret;
+	__acpi_thermal_trips_update(tz, ACPI_TRIPS_INIT);
 
 	valid = tz->trips.critical.valid |
 		tz->trips.hot.valid |
@@ -799,12 +822,6 @@ static void acpi_thermal_unregister_thermal_zone(struct acpi_thermal *tz)
                                  Driver Interface
    -------------------------------------------------------------------------- */
 
-static void acpi_queue_thermal_check(struct acpi_thermal *tz)
-{
-	if (!work_pending(&tz->thermal_check_work))
-		queue_work(acpi_thermal_pm_queue, &tz->thermal_check_work);
-}
-
 static void acpi_thermal_notify(acpi_handle handle, u32 event, void *data)
 {
 	struct acpi_device *device = data;
@@ -819,13 +836,11 @@ static void acpi_thermal_notify(acpi_handle handle, u32 event, void *data)
 		break;
 	case ACPI_THERMAL_NOTIFY_THRESHOLDS:
 		acpi_thermal_trips_update(tz, ACPI_TRIPS_THRESHOLDS);
-		acpi_queue_thermal_check(tz);
 		acpi_bus_generate_netlink_event(device->pnp.device_class,
 						dev_name(&device->dev), event, 0);
 		break;
 	case ACPI_THERMAL_NOTIFY_DEVICES:
 		acpi_thermal_trips_update(tz, ACPI_TRIPS_DEVICES);
-		acpi_queue_thermal_check(tz);
 		acpi_bus_generate_netlink_event(device->pnp.device_class,
 						dev_name(&device->dev), event, 0);
 		break;

From 68b77785a1db293582558bc1460b19e8355b8f7a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 7 Aug 2023 20:10:06 +0200
Subject: [PATCH 1251/2047] ACPI: thermal: Introduce struct acpi_thermal_trip

Add struct acpi_thermal_trip to contain the temperature and valid flag
of each trip point in the driver's local data structures.

This helps to make the subsequent changes more straightforward.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/thermal.c | 96 ++++++++++++++++++++----------------------
 1 file changed, 45 insertions(+), 51 deletions(-)

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 582984c0e8ea..bfbbbbdc658c 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -92,34 +92,27 @@ MODULE_PARM_DESC(psv, "Disable or override all passive trip points.");
 
 static struct workqueue_struct *acpi_thermal_pm_queue;
 
-struct acpi_thermal_critical {
-	unsigned long temperature;
-	bool valid;
-};
-
-struct acpi_thermal_hot {
+struct acpi_thermal_trip {
 	unsigned long temperature;
 	bool valid;
 };
 
 struct acpi_thermal_passive {
+	struct acpi_thermal_trip trip;
 	struct acpi_handle_list devices;
-	unsigned long temperature;
 	unsigned long tc1;
 	unsigned long tc2;
 	unsigned long tsp;
-	bool valid;
 };
 
 struct acpi_thermal_active {
+	struct acpi_thermal_trip trip;
 	struct acpi_handle_list devices;
-	unsigned long temperature;
-	bool valid;
 };
 
 struct acpi_thermal_trips {
-	struct acpi_thermal_critical critical;
-	struct acpi_thermal_hot hot;
+	struct acpi_thermal_trip critical;
+	struct acpi_thermal_trip hot;
 	struct acpi_thermal_passive passive;
 	struct acpi_thermal_active active[ACPI_THERMAL_MAX_ACTIVE];
 };
@@ -250,9 +243,9 @@ static void __acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 	}
 
 	/* Passive (optional) */
-	if (((flag & ACPI_TRIPS_PASSIVE) && tz->trips.passive.valid) ||
+	if (((flag & ACPI_TRIPS_PASSIVE) && tz->trips.passive.trip.valid) ||
 	    flag == ACPI_TRIPS_INIT) {
-		valid = tz->trips.passive.valid;
+		valid = tz->trips.passive.trip.valid;
 		if (psv == -1) {
 			status = AE_SUPPORT;
 		} else if (psv > 0) {
@@ -264,44 +257,44 @@ static void __acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 		}
 
 		if (ACPI_FAILURE(status)) {
-			tz->trips.passive.valid = false;
+			tz->trips.passive.trip.valid = false;
 		} else {
-			tz->trips.passive.temperature = tmp;
-			tz->trips.passive.valid = true;
+			tz->trips.passive.trip.temperature = tmp;
+			tz->trips.passive.trip.valid = true;
 			if (flag == ACPI_TRIPS_INIT) {
 				status = acpi_evaluate_integer(tz->device->handle,
 							       "_TC1", NULL, &tmp);
 				if (ACPI_FAILURE(status))
-					tz->trips.passive.valid = false;
+					tz->trips.passive.trip.valid = false;
 				else
 					tz->trips.passive.tc1 = tmp;
 
 				status = acpi_evaluate_integer(tz->device->handle,
 							       "_TC2", NULL, &tmp);
 				if (ACPI_FAILURE(status))
-					tz->trips.passive.valid = false;
+					tz->trips.passive.trip.valid = false;
 				else
 					tz->trips.passive.tc2 = tmp;
 
 				status = acpi_evaluate_integer(tz->device->handle,
 							       "_TSP", NULL, &tmp);
 				if (ACPI_FAILURE(status))
-					tz->trips.passive.valid = false;
+					tz->trips.passive.trip.valid = false;
 				else
 					tz->trips.passive.tsp = tmp;
 			}
 		}
 	}
-	if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.passive.valid) {
+	if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.passive.trip.valid) {
 		memset(&devices, 0, sizeof(struct acpi_handle_list));
 		status = acpi_evaluate_reference(tz->device->handle, "_PSL",
 						 NULL, &devices);
 		if (ACPI_FAILURE(status)) {
 			acpi_handle_info(tz->device->handle,
 					 "Invalid passive threshold\n");
-			tz->trips.passive.valid = false;
+			tz->trips.passive.trip.valid = false;
 		} else {
-			tz->trips.passive.valid = true;
+			tz->trips.passive.trip.valid = true;
 		}
 
 		if (memcmp(&tz->trips.passive.devices, &devices,
@@ -312,24 +305,24 @@ static void __acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 		}
 	}
 	if ((flag & ACPI_TRIPS_PASSIVE) || (flag & ACPI_TRIPS_DEVICES)) {
-		if (valid != tz->trips.passive.valid)
+		if (valid != tz->trips.passive.trip.valid)
 			ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "state");
 	}
 
 	/* Active (optional) */
 	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
 		char name[5] = { '_', 'A', 'C', ('0' + i), '\0' };
-		valid = tz->trips.active[i].valid;
+		valid = tz->trips.active[i].trip.valid;
 
 		if (act == -1)
 			break; /* disable all active trip points */
 
 		if (flag == ACPI_TRIPS_INIT || ((flag & ACPI_TRIPS_ACTIVE) &&
-		    tz->trips.active[i].valid)) {
+		    tz->trips.active[i].trip.valid)) {
 			status = acpi_evaluate_integer(tz->device->handle,
 						       name, NULL, &tmp);
 			if (ACPI_FAILURE(status)) {
-				tz->trips.active[i].valid = false;
+				tz->trips.active[i].trip.valid = false;
 				if (i == 0)
 					break;
 
@@ -337,35 +330,36 @@ static void __acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 					break;
 
 				if (i == 1)
-					tz->trips.active[0].temperature = celsius_to_deci_kelvin(act);
+					tz->trips.active[0].trip.temperature =
+							celsius_to_deci_kelvin(act);
 				else
 					/*
 					 * Don't allow override higher than
 					 * the next higher trip point
 					 */
-					tz->trips.active[i-1].temperature =
+					tz->trips.active[i-1].trip.temperature =
 						min_t(unsigned long,
-						      tz->trips.active[i-2].temperature,
+						      tz->trips.active[i-2].trip.temperature,
 						      celsius_to_deci_kelvin(act));
 
 				break;
 			} else {
-				tz->trips.active[i].temperature = tmp;
-				tz->trips.active[i].valid = true;
+				tz->trips.active[i].trip.temperature = tmp;
+				tz->trips.active[i].trip.valid = true;
 			}
 		}
 
 		name[2] = 'L';
-		if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.active[i].valid) {
+		if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.active[i].trip.valid) {
 			memset(&devices, 0, sizeof(struct acpi_handle_list));
 			status = acpi_evaluate_reference(tz->device->handle,
 							 name, NULL, &devices);
 			if (ACPI_FAILURE(status)) {
 				acpi_handle_info(tz->device->handle,
 						 "Invalid active%d threshold\n", i);
-				tz->trips.active[i].valid = false;
+				tz->trips.active[i].trip.valid = false;
 			} else {
-				tz->trips.active[i].valid = true;
+				tz->trips.active[i].trip.valid = true;
 			}
 
 			if (memcmp(&tz->trips.active[i].devices, &devices,
@@ -376,10 +370,10 @@ static void __acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 			}
 		}
 		if ((flag & ACPI_TRIPS_ACTIVE) || (flag & ACPI_TRIPS_DEVICES))
-			if (valid != tz->trips.active[i].valid)
+			if (valid != tz->trips.active[i].trip.valid)
 				ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "state");
 
-		if (!tz->trips.active[i].valid)
+		if (!tz->trips.active[i].trip.valid)
 			break;
 	}
 
@@ -430,10 +424,10 @@ static int acpi_thermal_get_trip_points(struct acpi_thermal *tz)
 
 	valid = tz->trips.critical.valid |
 		tz->trips.hot.valid |
-		tz->trips.passive.valid;
+		tz->trips.passive.trip.valid;
 
 	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++)
-		valid = valid || tz->trips.active[i].valid;
+		valid = valid || tz->trips.active[i].trip.valid;
 
 	if (!valid) {
 		pr_warn(FW_BUG "No valid trip found\n");
@@ -486,7 +480,7 @@ static int thermal_get_trip_type(struct thermal_zone_device *thermal,
 		trip--;
 	}
 
-	if (tz->trips.passive.valid) {
+	if (tz->trips.passive.trip.valid) {
 		if (!trip) {
 			*type = THERMAL_TRIP_PASSIVE;
 			return 0;
@@ -494,7 +488,7 @@ static int thermal_get_trip_type(struct thermal_zone_device *thermal,
 		trip--;
 	}
 
-	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].valid; i++) {
+	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].trip.valid; i++) {
 		if (!trip) {
 			*type = THERMAL_TRIP_ACTIVE;
 			return 0;
@@ -534,10 +528,10 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
 		trip--;
 	}
 
-	if (tz->trips.passive.valid) {
+	if (tz->trips.passive.trip.valid) {
 		if (!trip) {
 			*temp = deci_kelvin_to_millicelsius_with_offset(
-					tz->trips.passive.temperature,
+					tz->trips.passive.trip.temperature,
 					tz->kelvin_offset);
 			return 0;
 		}
@@ -545,10 +539,10 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
 	}
 
 	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE &&
-		tz->trips.active[i].valid; i++) {
+		tz->trips.active[i].trip.valid; i++) {
 		if (!trip) {
 			*temp = deci_kelvin_to_millicelsius_with_offset(
-					tz->trips.active[i].temperature,
+					tz->trips.active[i].trip.temperature,
 					tz->kelvin_offset);
 			return 0;
 		}
@@ -604,7 +598,7 @@ static int thermal_get_trend(struct thermal_zone_device *thermal,
 	 * before this callback being invoked
 	 */
 	i = tz->trips.passive.tc1 * (tz->temperature - tz->last_temperature) +
-	    tz->trips.passive.tc2 * (tz->temperature - tz->trips.passive.temperature);
+	    tz->trips.passive.tc2 * (tz->temperature - tz->trips.passive.trip.temperature);
 
 	if (i > 0)
 		*trend = THERMAL_TREND_RAISING;
@@ -655,7 +649,7 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
 	if (tz->trips.hot.valid)
 		trip++;
 
-	if (tz->trips.passive.valid) {
+	if (tz->trips.passive.trip.valid) {
 		trip++;
 		for (i = 0; i < tz->trips.passive.devices.count; i++) {
 			handle = tz->trips.passive.devices.handles[i];
@@ -680,7 +674,7 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
 	}
 
 	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
-		if (!tz->trips.active[i].valid)
+		if (!tz->trips.active[i].trip.valid)
 			break;
 
 		trip++;
@@ -774,12 +768,12 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz)
 	if (tz->trips.hot.valid)
 		trip_count++;
 
-	if (tz->trips.passive.valid) {
+	if (tz->trips.passive.trip.valid) {
 		trip_count++;
 		passive_delay = tz->trips.passive.tsp * 100;
 	}
 
-	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].valid; i++)
+	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].trip.valid; i++)
 		trip_count++;
 
 	tz->thermal_zone = thermal_zone_device_register("acpitz", trip_count, 0,
@@ -1049,7 +1043,7 @@ static int acpi_thermal_resume(struct device *dev)
 		return -EINVAL;
 
 	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
-		if (!tz->trips.active[i].valid)
+		if (!tz->trips.active[i].trip.valid)
 			break;
 
 		for (j = 0; j < tz->trips.active[i].devices.count; j++) {

From 96b8b4365db44af0a94b91a805cd5beed45d24fa Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 7 Aug 2023 20:11:07 +0200
Subject: [PATCH 1252/2047] thermal: core: Rework and rename
 __for_each_thermal_trip()

Rework the currently unused __for_each_thermal_trip() to pass original
pointers to struct thermal_trip objects to the callback, so it can be
used for updating trip data (e.g. temperatures), rename it to
for_each_thermal_trip() and make it available to modular drivers.

Suggested-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_core.h |  4 ----
 drivers/thermal/thermal_trip.c | 18 ++++++++----------
 include/linux/thermal.h        |  3 +++
 3 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 17c1bbed734d..04513f9fbfa1 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -54,10 +54,6 @@ int for_each_thermal_cooling_device(int (*cb)(struct thermal_cooling_device *,
 int for_each_thermal_governor(int (*cb)(struct thermal_governor *, void *),
 			      void *thermal_governor);
 
-int __for_each_thermal_trip(struct thermal_zone_device *,
-			    int (*cb)(struct thermal_trip *, void *),
-			    void *);
-
 struct thermal_zone_device *thermal_zone_get_by_id(int id);
 
 struct thermal_attr {
diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c
index 907f3a4d7bc8..53115cfdfd42 100644
--- a/drivers/thermal/thermal_trip.c
+++ b/drivers/thermal/thermal_trip.c
@@ -9,28 +9,26 @@
  */
 #include "thermal_core.h"
 
-int __for_each_thermal_trip(struct thermal_zone_device *tz,
-			    int (*cb)(struct thermal_trip *, void *),
-			    void *data)
+int for_each_thermal_trip(struct thermal_zone_device *tz,
+			  int (*cb)(struct thermal_trip *, void *),
+			  void *data)
 {
 	int i, ret;
-	struct thermal_trip trip;
 
 	lockdep_assert_held(&tz->lock);
 
+	if (!tz->trips)
+		return -ENODATA;
+
 	for (i = 0; i < tz->num_trips; i++) {
-
-		ret = __thermal_zone_get_trip(tz, i, &trip);
-		if (ret)
-			return ret;
-
-		ret = cb(&trip, data);
+		ret = cb(&tz->trips[i], data);
 		if (ret)
 			return ret;
 	}
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(for_each_thermal_trip);
 
 int thermal_zone_get_num_trips(struct thermal_zone_device *tz)
 {
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index f1b43d8a7bd3..f1d73642177f 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -289,6 +289,9 @@ int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
 int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id,
 			  const struct thermal_trip *trip);
 
+int for_each_thermal_trip(struct thermal_zone_device *tz,
+			  int (*cb)(struct thermal_trip *, void *),
+			  void *data);
 int thermal_zone_get_num_trips(struct thermal_zone_device *tz);
 
 int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp);

From ec23c1c462ded022ec0c3ed3fa92af6ca25b9599 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 9 Aug 2023 13:07:23 +0200
Subject: [PATCH 1253/2047] ACPI: thermal: Use trip point table to register
 thermal zones

Make the ACPI thermal driver use thermal_zone_device_register_with_trips()
to register its thermal zones.

For this purpose, make it create a trip point table that will be passed to
thermal_zone_device_register_with_trips() as an argument.

Also use the thermal_zone_update_trip_temp() helper introduced
previously to update temperatures of the passive and active trip
points after a trip points change notification from the platform
firmware.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/thermal.c | 93 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 86 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index bfbbbbdc658c..c2fe806fd3d7 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -125,6 +125,7 @@ struct acpi_thermal {
 	unsigned long polling_frequency;
 	volatile u8 zombie;
 	struct acpi_thermal_trips trips;
+	struct thermal_trip *trip_table;
 	struct acpi_handle_list devices;
 	struct thermal_zone_device *thermal_zone;
 	int kelvin_offset;	/* in millidegrees */
@@ -178,6 +179,15 @@ static int acpi_thermal_get_polling_frequency(struct acpi_thermal *tz)
 	return 0;
 }
 
+static int acpi_thermal_temp(struct acpi_thermal *tz, int temp_deci_k)
+{
+	if (temp_deci_k == THERMAL_TEMP_INVALID)
+		return THERMAL_TEMP_INVALID;
+
+	return deci_kelvin_to_millicelsius_with_offset(temp_deci_k,
+						       tz->kelvin_offset);
+}
+
 static void __acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 {
 	acpi_status status;
@@ -389,10 +399,30 @@ static void __acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 	}
 }
 
+static int acpi_thermal_adjust_trip(struct thermal_trip *trip, void *data)
+{
+	struct acpi_thermal_trip *acpi_trip = trip->priv;
+	struct acpi_thermal *tz = data;
+
+	if (!acpi_trip)
+		return 0;
+
+	if (acpi_trip->valid)
+		trip->temperature = acpi_thermal_temp(tz, acpi_trip->temperature);
+	else
+		trip->temperature = THERMAL_TEMP_INVALID;
+
+	return 0;
+}
+
 static void acpi_thermal_adjust_thermal_zone(struct thermal_zone_device *thermal,
 					     unsigned long data)
 {
-	__acpi_thermal_trips_update(thermal_zone_device_priv(thermal), data);
+	struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
+
+	__acpi_thermal_trips_update(tz, data);
+
+	for_each_thermal_trip(tz->thermal_zone, acpi_thermal_adjust_trip, tz);
 }
 
 static void acpi_queue_thermal_check(struct acpi_thermal *tz)
@@ -757,6 +787,8 @@ static void acpi_thermal_zone_sysfs_remove(struct acpi_thermal *tz)
 
 static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz)
 {
+	struct acpi_thermal_trip *acpi_trip;
+	struct thermal_trip *trip;
 	int passive_delay = 0;
 	int trip_count = 0;
 	int result;
@@ -776,12 +808,56 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz)
 	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].trip.valid; i++)
 		trip_count++;
 
-	tz->thermal_zone = thermal_zone_device_register("acpitz", trip_count, 0,
-							tz, &acpi_thermal_zone_ops,
-							NULL, passive_delay,
-							tz->polling_frequency * 100);
-	if (IS_ERR(tz->thermal_zone))
-		return -ENODEV;
+	trip = kcalloc(trip_count, sizeof(*trip), GFP_KERNEL);
+	if (!trip)
+		return -ENOMEM;
+
+	tz->trip_table = trip;
+
+	if (tz->trips.critical.valid) {
+		trip->type = THERMAL_TRIP_CRITICAL;
+		trip->temperature = acpi_thermal_temp(tz, tz->trips.critical.temperature);
+		trip++;
+	}
+
+	if (tz->trips.hot.valid) {
+		trip->type = THERMAL_TRIP_HOT;
+		trip->temperature = acpi_thermal_temp(tz, tz->trips.hot.temperature);
+		trip++;
+	}
+
+	acpi_trip = &tz->trips.passive.trip;
+	if (acpi_trip->valid) {
+		trip->type = THERMAL_TRIP_PASSIVE;
+		trip->temperature = acpi_thermal_temp(tz, acpi_trip->temperature);
+		trip->priv = acpi_trip;
+		trip++;
+	}
+
+	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
+		acpi_trip = &tz->trips.active[i].trip;
+
+		if (!acpi_trip->valid)
+			break;
+
+		trip->type = THERMAL_TRIP_ACTIVE;
+		trip->temperature = acpi_thermal_temp(tz, acpi_trip->temperature);
+		trip->priv = acpi_trip;
+		trip++;
+	}
+
+	tz->thermal_zone = thermal_zone_device_register_with_trips("acpitz",
+								   tz->trip_table,
+								   trip_count,
+								   0, tz,
+								   &acpi_thermal_zone_ops,
+								   NULL,
+								   passive_delay,
+								   tz->polling_frequency * 100);
+	if (IS_ERR(tz->thermal_zone)) {
+		result = PTR_ERR(tz->thermal_zone);
+		goto free_trip_table;
+	}
 
 	result = acpi_thermal_zone_sysfs_add(tz);
 	if (result)
@@ -800,6 +876,8 @@ remove_links:
 	acpi_thermal_zone_sysfs_remove(tz);
 unregister_tzd:
 	thermal_zone_device_unregister(tz->thermal_zone);
+free_trip_table:
+	kfree(tz->trip_table);
 
 	return result;
 }
@@ -808,6 +886,7 @@ static void acpi_thermal_unregister_thermal_zone(struct acpi_thermal *tz)
 {
 	acpi_thermal_zone_sysfs_remove(tz);
 	thermal_zone_device_unregister(tz->thermal_zone);
+	kfree(tz->trip_table);
 	tz->thermal_zone = NULL;
 }
 

From 9caaad2ca02cb31fc235249f971cabdf8e11e28d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 7 Aug 2023 20:17:07 +0200
Subject: [PATCH 1254/2047] ACPI: thermal: Rework thermal_get_trend()

Rework the ACPI thermal driver's .get_trend() callback function,
thermal_get_trend(), so that it does not call thermal_get_trip_type()
and thermal_get_trip_temp() which are going to be dropped.

This reduces the overhead of the function too, because it will always
carry out a trip point lookup once after the change.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/thermal.c | 68 +++++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 30 deletions(-)

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index c2fe806fd3d7..bd540e4fb4d0 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -598,46 +598,54 @@ static int thermal_get_crit_temp(struct thermal_zone_device *thermal,
 }
 
 static int thermal_get_trend(struct thermal_zone_device *thermal,
-			     int trip, enum thermal_trend *trend)
+			     int trip_index, enum thermal_trend *trend)
 {
 	struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
-	enum thermal_trip_type type;
-	int i;
+	struct acpi_thermal_trip *acpi_trip;
+	int t, i;
 
-	if (thermal_get_trip_type(thermal, trip, &type))
+	if (!tz || trip_index < 0)
 		return -EINVAL;
 
-	if (type == THERMAL_TRIP_ACTIVE) {
-		int trip_temp;
-		int temp = deci_kelvin_to_millicelsius_with_offset(
-					tz->temperature, tz->kelvin_offset);
-		if (thermal_get_trip_temp(thermal, trip, &trip_temp))
-			return -EINVAL;
+	if (tz->trips.critical.valid)
+		trip_index--;
 
-		if (temp > trip_temp) {
+	if (tz->trips.hot.valid)
+		trip_index--;
+
+	if (trip_index < 0)
+		return -EINVAL;
+
+	acpi_trip = &tz->trips.passive.trip;
+	if (acpi_trip->valid && !trip_index--) {
+		t = tz->trips.passive.tc1 * (tz->temperature -
+						tz->last_temperature) +
+			tz->trips.passive.tc2 * (tz->temperature -
+						acpi_trip->temperature);
+		if (t > 0)
 			*trend = THERMAL_TREND_RAISING;
-			return 0;
-		} else {
-			/* Fall back on default trend */
-			return -EINVAL;
+		else if (t < 0)
+			*trend = THERMAL_TREND_DROPPING;
+		else
+			*trend = THERMAL_TREND_STABLE;
+
+		return 0;
+	}
+
+	t = acpi_thermal_temp(tz, tz->temperature);
+
+	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
+		acpi_trip = &tz->trips.active[i].trip;
+		if (acpi_trip->valid && !trip_index--) {
+			if (t > acpi_thermal_temp(tz, acpi_trip->temperature)) {
+				*trend = THERMAL_TREND_RAISING;
+				return 0;
+			}
+			break;
 		}
 	}
 
-	/*
-	 * tz->temperature has already been updated by generic thermal layer,
-	 * before this callback being invoked
-	 */
-	i = tz->trips.passive.tc1 * (tz->temperature - tz->last_temperature) +
-	    tz->trips.passive.tc2 * (tz->temperature - tz->trips.passive.trip.temperature);
-
-	if (i > 0)
-		*trend = THERMAL_TREND_RAISING;
-	else if (i < 0)
-		*trend = THERMAL_TREND_DROPPING;
-	else
-		*trend = THERMAL_TREND_STABLE;
-
-	return 0;
+	return -EINVAL;
 }
 
 static void acpi_thermal_zone_device_hot(struct thermal_zone_device *thermal)

From 2a74c4aca1d70c4ec8104997470fda2fed5de322 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 7 Aug 2023 20:18:49 +0200
Subject: [PATCH 1255/2047] ACPI: thermal: Drop unnecessary thermal zone
 callbacks

Drop the .get_trip_type(), .get_trip_temp() and .get_crit_temp() thermal
zone callbacks that are not necessary any more from the ACPI thermal
driver along with the corresponding callback functions.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/thermal.c | 115 -----------------------------------------
 1 file changed, 115 deletions(-)

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index bd540e4fb4d0..2b4cb06d00ba 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -485,118 +485,6 @@ static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp)
 	return 0;
 }
 
-static int thermal_get_trip_type(struct thermal_zone_device *thermal,
-				 int trip, enum thermal_trip_type *type)
-{
-	struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
-	int i;
-
-	if (!tz || trip < 0)
-		return -EINVAL;
-
-	if (tz->trips.critical.valid) {
-		if (!trip) {
-			*type = THERMAL_TRIP_CRITICAL;
-			return 0;
-		}
-		trip--;
-	}
-
-	if (tz->trips.hot.valid) {
-		if (!trip) {
-			*type = THERMAL_TRIP_HOT;
-			return 0;
-		}
-		trip--;
-	}
-
-	if (tz->trips.passive.trip.valid) {
-		if (!trip) {
-			*type = THERMAL_TRIP_PASSIVE;
-			return 0;
-		}
-		trip--;
-	}
-
-	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].trip.valid; i++) {
-		if (!trip) {
-			*type = THERMAL_TRIP_ACTIVE;
-			return 0;
-		}
-		trip--;
-	}
-
-	return -EINVAL;
-}
-
-static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
-				 int trip, int *temp)
-{
-	struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
-	int i;
-
-	if (!tz || trip < 0)
-		return -EINVAL;
-
-	if (tz->trips.critical.valid) {
-		if (!trip) {
-			*temp = deci_kelvin_to_millicelsius_with_offset(
-					tz->trips.critical.temperature,
-					tz->kelvin_offset);
-			return 0;
-		}
-		trip--;
-	}
-
-	if (tz->trips.hot.valid) {
-		if (!trip) {
-			*temp = deci_kelvin_to_millicelsius_with_offset(
-					tz->trips.hot.temperature,
-					tz->kelvin_offset);
-			return 0;
-		}
-		trip--;
-	}
-
-	if (tz->trips.passive.trip.valid) {
-		if (!trip) {
-			*temp = deci_kelvin_to_millicelsius_with_offset(
-					tz->trips.passive.trip.temperature,
-					tz->kelvin_offset);
-			return 0;
-		}
-		trip--;
-	}
-
-	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE &&
-		tz->trips.active[i].trip.valid; i++) {
-		if (!trip) {
-			*temp = deci_kelvin_to_millicelsius_with_offset(
-					tz->trips.active[i].trip.temperature,
-					tz->kelvin_offset);
-			return 0;
-		}
-		trip--;
-	}
-
-	return -EINVAL;
-}
-
-static int thermal_get_crit_temp(struct thermal_zone_device *thermal,
-				int *temperature)
-{
-	struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
-
-	if (tz->trips.critical.valid) {
-		*temperature = deci_kelvin_to_millicelsius_with_offset(
-					tz->trips.critical.temperature,
-					tz->kelvin_offset);
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
 static int thermal_get_trend(struct thermal_zone_device *thermal,
 			     int trip_index, enum thermal_trend *trend)
 {
@@ -759,9 +647,6 @@ static struct thermal_zone_device_ops acpi_thermal_zone_ops = {
 	.bind = acpi_thermal_bind_cooling_device,
 	.unbind	= acpi_thermal_unbind_cooling_device,
 	.get_temp = thermal_get_temp,
-	.get_trip_type = thermal_get_trip_type,
-	.get_trip_temp = thermal_get_trip_temp,
-	.get_crit_temp = thermal_get_crit_temp,
 	.get_trend = thermal_get_trend,
 	.hot = acpi_thermal_zone_device_hot,
 	.critical = acpi_thermal_zone_device_critical,

From 4ab4b3b11dca4cda61b38147f4dd81b1b7b39e95 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 17 Aug 2023 11:25:29 +0200
Subject: [PATCH 1256/2047] ACPI: thermal: Eliminate code duplication from
 acpi_thermal_notify()

Move the acpi_bus_generate_netlink_event() invocation into
acpi_thermal_trips_update() which allows the code duplication in
acpi_thermal_notify() to be cleaned up, but for this purpose the
event value needs to be passed to acpi_thermal_trips_update() and
from there to acpi_thermal_adjust_thermal_zone() which has to
determine the flag value for __acpi_thermal_trips_update() by
itself.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/thermal.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 2b4cb06d00ba..419590f41ed5 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -419,8 +419,10 @@ static void acpi_thermal_adjust_thermal_zone(struct thermal_zone_device *thermal
 					     unsigned long data)
 {
 	struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
+	int flag = data == ACPI_THERMAL_NOTIFY_THRESHOLDS ?
+				ACPI_TRIPS_THRESHOLDS : ACPI_TRIPS_DEVICES;
 
-	__acpi_thermal_trips_update(tz, data);
+	__acpi_thermal_trips_update(tz, flag);
 
 	for_each_thermal_trip(tz->thermal_zone, acpi_thermal_adjust_trip, tz);
 }
@@ -431,8 +433,10 @@ static void acpi_queue_thermal_check(struct acpi_thermal *tz)
 		queue_work(acpi_thermal_pm_queue, &tz->thermal_check_work);
 }
 
-static void acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
+static void acpi_thermal_trips_update(struct acpi_thermal *tz, u32 event)
 {
+	struct acpi_device *adev = tz->device;
+
 	/*
 	 * Use thermal_zone_device_exec() to carry out the trip points
 	 * update, so as to protect thermal_get_trend() from getting stale
@@ -441,8 +445,10 @@ static void acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 	 * results.
 	 */
 	thermal_zone_device_exec(tz->thermal_zone,
-				 acpi_thermal_adjust_thermal_zone, flag);
+				 acpi_thermal_adjust_thermal_zone, event);
 	acpi_queue_thermal_check(tz);
+	acpi_bus_generate_netlink_event(adev->pnp.device_class,
+					dev_name(&adev->dev), event, 0);
 }
 
 static int acpi_thermal_get_trip_points(struct acpi_thermal *tz)
@@ -801,14 +807,8 @@ static void acpi_thermal_notify(acpi_handle handle, u32 event, void *data)
 		acpi_queue_thermal_check(tz);
 		break;
 	case ACPI_THERMAL_NOTIFY_THRESHOLDS:
-		acpi_thermal_trips_update(tz, ACPI_TRIPS_THRESHOLDS);
-		acpi_bus_generate_netlink_event(device->pnp.device_class,
-						dev_name(&device->dev), event, 0);
-		break;
 	case ACPI_THERMAL_NOTIFY_DEVICES:
-		acpi_thermal_trips_update(tz, ACPI_TRIPS_DEVICES);
-		acpi_bus_generate_netlink_event(device->pnp.device_class,
-						dev_name(&device->dev), event, 0);
+		acpi_thermal_trips_update(tz, event);
 		break;
 	default:
 		acpi_handle_debug(device->handle, "Unsupported event [0x%x]\n",

From 5484e31bbbff285f9505c4766373f840ffb746e5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 10 Aug 2023 20:36:36 +0200
Subject: [PATCH 1257/2047] cpuidle: menu: Skip tick_nohz_get_sleep_length()
 call in some cases

Because the cost of calling tick_nohz_get_sleep_length() may increase
in the future, reorder the code in menu_select() so it first uses the
statistics to determine the expected idle duration.  If that value is
higher than RESIDENCY_THRESHOLD_NS, tick_nohz_get_sleep_length() will
be called to obtain the time till the closest timer and refine the
idle duration prediction if necessary.

This causes the governor to always take the full overhead of
get_typical_interval() with the assumption that the cost will be
amortized by skipping the tick_nohz_get_sleep_length() call in the
cases when the predicted idle duration is relatively very small.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Doug Smythies <dsmythies@telus.net>
---
 drivers/cpuidle/governors/gov.h  | 14 +++++++
 drivers/cpuidle/governors/menu.c | 65 +++++++++++++++++++-------------
 drivers/cpuidle/governors/teo.c  |  9 +----
 3 files changed, 54 insertions(+), 34 deletions(-)
 create mode 100644 drivers/cpuidle/governors/gov.h

diff --git a/drivers/cpuidle/governors/gov.h b/drivers/cpuidle/governors/gov.h
new file mode 100644
index 000000000000..99e067d9668c
--- /dev/null
+++ b/drivers/cpuidle/governors/gov.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Common definitions for cpuidle governors. */
+
+#ifndef __CPUIDLE_GOVERNOR_H
+#define __CPUIDLE_GOVERNOR_H
+
+/*
+ * Idle state target residency threshold used for deciding whether or not to
+ * check the time till the closest expected timer event.
+ */
+#define RESIDENCY_THRESHOLD_NS	(15 * NSEC_PER_USEC)
+
+#endif /* __CPUIDLE_GOVERNOR_H */
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index c4922684f305..b96e3da0fedd 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -19,6 +19,8 @@
 #include <linux/sched/stat.h>
 #include <linux/math64.h>
 
+#include "gov.h"
+
 #define BUCKETS 12
 #define INTERVAL_SHIFT 3
 #define INTERVALS (1UL << INTERVAL_SHIFT)
@@ -166,8 +168,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
  * of points is below a threshold. If it is... then use the
  * average of these 8 points as the estimated value.
  */
-static unsigned int get_typical_interval(struct menu_device *data,
-					 unsigned int predicted_us)
+static unsigned int get_typical_interval(struct menu_device *data)
 {
 	int i, divisor;
 	unsigned int min, max, thresh, avg;
@@ -195,11 +196,7 @@ again:
 		}
 	}
 
-	/*
-	 * If the result of the computation is going to be discarded anyway,
-	 * avoid the computation altogether.
-	 */
-	if (min >= predicted_us)
+	if (!max)
 		return UINT_MAX;
 
 	if (divisor == INTERVALS)
@@ -267,7 +264,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 {
 	struct menu_device *data = this_cpu_ptr(&menu_devices);
 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
-	unsigned int predicted_us;
 	u64 predicted_ns;
 	u64 interactivity_req;
 	unsigned int nr_iowaiters;
@@ -279,16 +275,41 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		data->needs_update = 0;
 	}
 
-	/* determine the expected residency time, round up */
-	delta = tick_nohz_get_sleep_length(&delta_tick);
-	if (unlikely(delta < 0)) {
-		delta = 0;
-		delta_tick = 0;
-	}
-	data->next_timer_ns = delta;
-
 	nr_iowaiters = nr_iowait_cpu(dev->cpu);
-	data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
+
+	/* Find the shortest expected idle interval. */
+	predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
+	if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
+		unsigned int timer_us;
+
+		/* Determine the time till the closest timer. */
+		delta = tick_nohz_get_sleep_length(&delta_tick);
+		if (unlikely(delta < 0)) {
+			delta = 0;
+			delta_tick = 0;
+		}
+
+		data->next_timer_ns = delta;
+		data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
+
+		/* Round up the result for half microseconds. */
+		timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
+					data->next_timer_ns *
+						data->correction_factor[data->bucket],
+				   RESOLUTION * DECAY * NSEC_PER_USEC);
+		/* Use the lowest expected idle interval to pick the idle state. */
+		predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns);
+	} else {
+		/*
+		 * Because the next timer event is not going to be determined
+		 * in this case, assume that without the tick the closest timer
+		 * will be in distant future and that the closest tick will occur
+		 * after 1/2 of the tick period.
+		 */
+		data->next_timer_ns = KTIME_MAX;
+		delta_tick = TICK_NSEC / 2;
+		data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
+	}
 
 	if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
 	    ((data->next_timer_ns < drv->states[1].target_residency_ns ||
@@ -303,16 +324,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		return 0;
 	}
 
-	/* Round up the result for half microseconds. */
-	predicted_us = div_u64(data->next_timer_ns *
-			       data->correction_factor[data->bucket] +
-			       (RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
-			       RESOLUTION * DECAY * NSEC_PER_USEC);
-	/* Use the lowest expected idle interval to pick the idle state. */
-	predicted_ns = (u64)min(predicted_us,
-				get_typical_interval(data, predicted_us)) *
-				NSEC_PER_USEC;
-
 	if (tick_nohz_tick_stopped()) {
 		/*
 		 * If the tick is already stopped, the cost of possible short
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 70846b669255..31050fdc633f 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -140,6 +140,8 @@
 #include <linux/sched/topology.h>
 #include <linux/tick.h>
 
+#include "gov.h"
+
 /*
  * The number of bits to shift the CPU's capacity by in order to determine
  * the utilized threshold.
@@ -152,7 +154,6 @@
  */
 #define UTIL_THRESHOLD_SHIFT 6
 
-
 /*
  * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
  * is used for decreasing metrics on a regular basis.
@@ -166,12 +167,6 @@
  */
 #define NR_RECENT	9
 
-/*
- * Idle state target residency threshold used for deciding whether or not to
- * check the time till the closest expected timer event.
- */
-#define RESIDENCY_THRESHOLD_NS	(15 * NSEC_PER_USEC)
-
 /**
  * struct teo_bin - Metrics used by the TEO cpuidle governor.
  * @intercepts: The "intercepts" metric.

From 12a95123bfe1dd1a6020a35f5e67a560591bb02a Mon Sep 17 00:00:00 2001
From: Lucas Tanure <tanureal@opensource.cirrus.com>
Date: Fri, 4 Aug 2023 11:45:57 +0100
Subject: [PATCH 1258/2047] soundwire: bus: Allow SoundWire peripherals to
 register IRQ handlers

Currently the in-band alerts for SoundWire peripherals can only
be communicated to the driver through the interrupt_callback
function. This however is slightly inconvenient for devices that wish
to share IRQ handling code between SoundWire and I2C/SPI, the later
would normally register an IRQ handler with the IRQ subsystem. However
there is no reason the SoundWire in-band IRQs can not also be
communicated as an actual IRQ to the driver.

Add support for SoundWire peripherals to register a normal IRQ
handler to receive SoundWire in-band alerts, allowing code to be
shared across control buses. Note that we allow users to use both the
interrupt_callback and the IRQ handler, this is useful for devices
which must clear additional chip specific SoundWire registers that are
not a part of the normal IRQ flow, or the SoundWire specification.

Signed-off-by: Lucas Tanure <tanureal@opensource.cirrus.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Acked-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20230804104602.395892-2-ckeepax@opensource.cirrus.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/soundwire/bus.c       | 32 ++++++++++++++++++++++++++++++++
 drivers/soundwire/bus_type.c  | 12 ++++++++++++
 include/linux/soundwire/sdw.h |  9 +++++++++
 3 files changed, 53 insertions(+)

diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index dba920ec88f6..cf55386256f3 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -3,6 +3,7 @@
 
 #include <linux/acpi.h>
 #include <linux/delay.h>
+#include <linux/irq.h>
 #include <linux/mod_devicetable.h>
 #include <linux/pm_runtime.h>
 #include <linux/soundwire/sdw_registers.h>
@@ -25,6 +26,23 @@ static int sdw_get_id(struct sdw_bus *bus)
 	return 0;
 }
 
+static int sdw_irq_map(struct irq_domain *h, unsigned int virq,
+		       irq_hw_number_t hw)
+{
+	struct sdw_bus *bus = h->host_data;
+
+	irq_set_chip_data(virq, bus);
+	irq_set_chip(virq, &bus->irq_chip);
+	irq_set_nested_thread(virq, 1);
+	irq_set_noprobe(virq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops sdw_domain_ops = {
+	.map	= sdw_irq_map,
+};
+
 /**
  * sdw_bus_master_add() - add a bus Master instance
  * @bus: bus instance
@@ -151,6 +169,14 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent,
 	bus->params.curr_bank = SDW_BANK0;
 	bus->params.next_bank = SDW_BANK1;
 
+	bus->irq_chip.name = dev_name(bus->dev);
+	bus->domain = irq_domain_create_linear(fwnode, SDW_MAX_DEVICES,
+					       &sdw_domain_ops, bus);
+	if (!bus->domain) {
+		dev_err(bus->dev, "Failed to add IRQ domain\n");
+		return -EINVAL;
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(sdw_bus_master_add);
@@ -187,6 +213,9 @@ static int sdw_delete_slave(struct device *dev, void *data)
 void sdw_bus_master_delete(struct sdw_bus *bus)
 {
 	device_for_each_child(bus->dev, NULL, sdw_delete_slave);
+
+	irq_domain_remove(bus->domain);
+
 	sdw_master_device_del(bus);
 
 	sdw_bus_debugfs_exit(bus);
@@ -1725,6 +1754,9 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave)
 				struct device *dev = &slave->dev;
 				struct sdw_driver *drv = drv_to_sdw_driver(dev->driver);
 
+				if (slave->prop.use_domain_irq && slave->irq)
+					handle_nested_irq(slave->irq);
+
 				if (drv->ops && drv->ops->interrupt_callback) {
 					slave_intr.sdca_cascade = sdca_cascade;
 					slave_intr.control_port = clear;
diff --git a/drivers/soundwire/bus_type.c b/drivers/soundwire/bus_type.c
index 1f43ee848eac..fafbc284e82d 100644
--- a/drivers/soundwire/bus_type.c
+++ b/drivers/soundwire/bus_type.c
@@ -122,6 +122,12 @@ static int sdw_drv_probe(struct device *dev)
 	if (drv->ops && drv->ops->read_prop)
 		drv->ops->read_prop(slave);
 
+	if (slave->prop.use_domain_irq) {
+		slave->irq = irq_create_mapping(slave->bus->domain, slave->dev_num);
+		if (!slave->irq)
+			dev_warn(dev, "Failed to map IRQ\n");
+	}
+
 	/* init the sysfs as we have properties now */
 	ret = sdw_slave_sysfs_init(slave);
 	if (ret < 0)
@@ -166,7 +172,13 @@ static int sdw_drv_remove(struct device *dev)
 	int ret = 0;
 
 	mutex_lock(&slave->sdw_dev_lock);
+
 	slave->probed = false;
+
+	if (slave->prop.use_domain_irq)
+		irq_dispose_mapping(irq_find_mapping(slave->bus->domain,
+						     slave->dev_num));
+
 	mutex_unlock(&slave->sdw_dev_lock);
 
 	if (drv->remove)
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index f523ceabd059..8923387a7405 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -6,6 +6,8 @@
 
 #include <linux/bug.h>
 #include <linux/lockdep_types.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/mod_devicetable.h>
 #include <linux/bitfield.h>
 
@@ -370,6 +372,7 @@ struct sdw_dpn_prop {
  * @clock_reg_supported: the Peripheral implements the clock base and scale
  * registers introduced with the SoundWire 1.2 specification. SDCA devices
  * do not need to set this boolean property as the registers are required.
+ * @use_domain_irq: call actual IRQ handler on slave, as well as callback
  */
 struct sdw_slave_prop {
 	u32 mipi_revision;
@@ -394,6 +397,7 @@ struct sdw_slave_prop {
 	u8 scp_int1_mask;
 	u32 quirks;
 	bool clock_reg_supported;
+	bool use_domain_irq;
 };
 
 #define SDW_SLAVE_QUIRKS_INVALID_INITIAL_PARITY	BIT(0)
@@ -641,6 +645,7 @@ struct sdw_slave_ops {
  * struct sdw_slave - SoundWire Slave
  * @id: MIPI device ID
  * @dev: Linux device
+ * @irq: IRQ number
  * @status: Status reported by the Slave
  * @bus: Bus handle
  * @prop: Slave properties
@@ -670,6 +675,7 @@ struct sdw_slave_ops {
 struct sdw_slave {
 	struct sdw_slave_id id;
 	struct device dev;
+	int irq;
 	enum sdw_slave_status status;
 	struct sdw_bus *bus;
 	struct sdw_slave_prop prop;
@@ -885,6 +891,7 @@ struct sdw_master_ops {
  * is used to compute and program bus bandwidth, clock, frame shape,
  * transport and port parameters
  * @debugfs: Bus debugfs
+ * @domain: IRQ domain
  * @defer_msg: Defer message
  * @clk_stop_timeout: Clock stop timeout computed
  * @bank_switch_timeout: Bank switch timeout computed
@@ -920,6 +927,8 @@ struct sdw_bus {
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debugfs;
 #endif
+	struct irq_chip irq_chip;
+	struct irq_domain *domain;
 	struct sdw_defer defer_msg;
 	unsigned int clk_stop_timeout;
 	u32 bank_switch_timeout;

From ec77cad8d55c0fb39c0b17a682f78df4b92373a5 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Fri, 4 Aug 2023 11:45:58 +0100
Subject: [PATCH 1259/2047] dt-bindings: mfd: cirrus,cs42l43: Add initial DT
 binding

The CS42L43 is an audio CODEC with integrated MIPI SoundWire interface
(Version 1.2.1 compliant), I2C, SPI, and I2S/TDM interfaces designed
for portable applications. It provides a high dynamic range, stereo
DAC for headphone output, two integrated Class D amplifiers for
loudspeakers, and two ADCs for wired headset microphone input or
stereo line input. PDM inputs are provided for digital microphones.

Add a YAML DT binding document for this device.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20230804104602.395892-3-ckeepax@opensource.cirrus.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 .../bindings/sound/cirrus,cs42l43.yaml        | 313 ++++++++++++++++++
 1 file changed, 313 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml

diff --git a/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml b/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml
new file mode 100644
index 000000000000..7a6de938b11d
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml
@@ -0,0 +1,313 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/cirrus,cs42l43.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cirrus Logic CS42L43 Audio CODEC
+
+maintainers:
+  - patches@opensource.cirrus.com
+
+description: |
+  The CS42L43 is an audio CODEC with integrated MIPI SoundWire interface
+  (Version 1.2.1 compliant), I2C, SPI, and I2S/TDM interfaces designed
+  for portable applications. It provides a high dynamic range, stereo
+  DAC for headphone output, two integrated Class D amplifiers for
+  loudspeakers, and two ADCs for wired headset microphone input or
+  stereo line input. PDM inputs are provided for digital microphones.
+
+allOf:
+  - $ref: dai-common.yaml#
+
+properties:
+  compatible:
+    enum:
+      - cirrus,cs42l43
+
+  reg:
+    maxItems: 1
+
+  vdd-p-supply:
+    description:
+      Power supply for the high voltage interface.
+
+  vdd-a-supply:
+    description:
+      Power supply for internal analog circuits.
+
+  vdd-d-supply:
+    description:
+      Power supply for internal digital circuits. Can be internally supplied.
+
+  vdd-io-supply:
+    description:
+      Power supply for external interface and internal digital logic.
+
+  vdd-cp-supply:
+    description:
+      Power supply for the amplifier 3 and 4 charge pump.
+
+  vdd-amp-supply:
+    description:
+      Power supply for amplifier 1 and 2.
+
+  reset-gpios:
+    maxItems: 1
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    const: 2
+
+  interrupts:
+    maxItems: 1
+
+  "#sound-dai-cells":
+    const: 1
+
+  clocks:
+    items:
+      - description: Synchronous audio clock provided on mclk_in.
+
+  clock-names:
+    const: mclk
+
+  cirrus,bias-low:
+    type: boolean
+    description:
+      Select a 1.8V headset micbias rather than 2.8V.
+
+  cirrus,bias-sense-microamp:
+    description:
+      Current at which the headset micbias sense clamp will engage, 0 to
+      disable.
+    enum: [ 0, 14, 23, 41, 50, 60, 68, 86, 95 ]
+    default: 0
+
+  cirrus,bias-ramp-ms:
+    description:
+      Time in milliseconds the hardware allows for the headset micbias to
+      ramp up.
+    enum: [ 10, 40, 90, 170 ]
+    default: 170
+
+  cirrus,detect-us:
+    description:
+      Time in microseconds the type detection will run for. Long values will
+      cause more audible effects, but give more accurate detection.
+    enum: [ 20, 100, 1000, 10000, 50000, 75000, 100000, 200000 ]
+    default: 10000
+
+  cirrus,button-automute:
+    type: boolean
+    description:
+      Enable the hardware automuting of decimator 1 when a headset button is
+      pressed.
+
+  cirrus,buttons-ohms:
+    description:
+      Impedance in Ohms for each headset button, these should be listed in
+      ascending order.
+    minItems: 1
+    maxItems: 6
+
+  cirrus,tip-debounce-ms:
+    description:
+      Software debounce on tip sense triggering in milliseconds.
+    default: 0
+
+  cirrus,tip-invert:
+    type: boolean
+    description:
+      Indicates tip detect polarity, inverted implies open-circuit whilst the
+      jack is inserted.
+
+  cirrus,tip-disable-pullup:
+    type: boolean
+    description:
+      Indicates if the internal pullup on the tip detect should be disabled.
+
+  cirrus,tip-fall-db-ms:
+    description:
+      Time in milliseconds a falling edge on the tip detect should be hardware
+      debounced for. Note the falling edge is considered after the invert.
+    enum: [ 0, 125, 250, 500, 750, 1000, 1250, 1500 ]
+    default: 500
+
+  cirrus,tip-rise-db-ms:
+    description:
+      Time in milliseconds a rising edge on the tip detect should be hardware
+      debounced for. Note the rising edge is considered after the invert.
+    enum: [ 0, 125, 250, 500, 750, 1000, 1250, 1500 ]
+    default: 500
+
+  cirrus,use-ring-sense:
+    type: boolean
+    description:
+      Indicates if the ring sense should be used.
+
+  cirrus,ring-invert:
+    type: boolean
+    description:
+      Indicates ring detect polarity, inverted implies open-circuit whilst the
+      jack is inserted.
+
+  cirrus,ring-disable-pullup:
+    type: boolean
+    description:
+      Indicates if the internal pullup on the ring detect should be disabled.
+
+  cirrus,ring-fall-db-ms:
+    description:
+      Time in milliseconds a falling edge on the ring detect should be hardware
+      debounced for. Note the falling edge is considered after the invert.
+    enum: [ 0, 125, 250, 500, 750, 1000, 1250, 1500 ]
+    default: 500
+
+  cirrus,ring-rise-db-ms:
+    description:
+      Time in milliseconds a rising edge on the ring detect should be hardware
+      debounced for. Note the rising edge is considered after the invert.
+    enum: [ 0, 125, 250, 500, 750, 1000, 1250, 1500 ]
+    default: 500
+
+  pinctrl:
+    type: object
+    $ref: /schemas/pinctrl/pinctrl.yaml#
+    additionalProperties: false
+
+    properties:
+      gpio-controller: true
+
+      "#gpio-cells":
+        const: 2
+
+      gpio-ranges:
+        items:
+          - description: A phandle to the CODEC pinctrl node
+            minimum: 0
+          - const: 0
+          - const: 0
+          - const: 3
+
+    patternProperties:
+      "-state$":
+        oneOf:
+          - $ref: "#/$defs/cirrus-cs42l43-state"
+          - patternProperties:
+              "-pins$":
+                $ref: "#/$defs/cirrus-cs42l43-state"
+            additionalProperties: false
+
+  spi:
+    type: object
+    $ref: /schemas/spi/spi-controller.yaml#
+    unevaluatedProperties: false
+
+$defs:
+  cirrus-cs42l43-state:
+    type: object
+
+    allOf:
+      - $ref: /schemas/pinctrl/pincfg-node.yaml#
+      - $ref: /schemas/pinctrl/pinmux-node.yaml#
+
+    oneOf:
+      - required: [ groups ]
+      - required: [ pins ]
+
+    additionalProperties: false
+
+    properties:
+      groups:
+        enum: [ gpio1, gpio2, gpio3, asp, pdmout2, pdmout1, i2c, spi ]
+
+      pins:
+        enum: [ gpio1, gpio2, gpio3,
+                asp_dout, asp_fsync, asp_bclk,
+                pdmout2_clk, pdmout2_data, pdmout1_clk, pdmout1_data,
+                i2c_sda, i2c_scl,
+                spi_miso, spi_sck, spi_ssb ]
+
+      function:
+        enum: [ gpio, spdif, irq, mic-shutter, spk-shutter ]
+
+      drive-strength:
+        description: Set drive strength in mA
+        enum: [ 1, 2, 4, 8, 9, 10, 12, 16 ]
+
+      input-debounce:
+        description: Set input debounce in uS
+        enum: [ 0, 85 ]
+
+required:
+  - compatible
+  - reg
+  - vdd-p-supply
+  - vdd-a-supply
+  - vdd-io-supply
+  - vdd-cp-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        cs42l43: codec@1a {
+            compatible = "cirrus,cs42l43";
+            reg = <0x1a>;
+
+            vdd-p-supply = <&vdd5v0>;
+            vdd-a-supply = <&vdd1v8>;
+            vdd-io-supply = <&vdd1v8>;
+            vdd-cp-supply = <&vdd1v8>;
+            vdd-amp-supply = <&vdd5v0>;
+
+            reset-gpios = <&gpio 0>;
+
+            interrupt-controller;
+            #interrupt-cells = <2>;
+            interrupt-parent = <&gpio>;
+            interrupts = <56 IRQ_TYPE_LEVEL_LOW>;
+
+            #sound-dai-cells = <1>;
+
+            clocks = <&clks 0>;
+            clock-names = "mclk";
+
+            cs42l43_pins: pinctrl {
+                gpio-controller;
+                #gpio-cells = <2>;
+                gpio-ranges = <&cs42l43_pins 0 0 3>;
+
+                pinctrl-names = "default";
+                pinctrl-0 = <&pinsettings>;
+
+                pinsettings: default-state {
+                    shutter-pins {
+                        groups = "gpio3";
+                        function = "mic-shutter";
+                    };
+                };
+            };
+
+            spi {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                cs-gpios = <&cs42l43_pins 1 0>;
+
+                sensor@0 {
+                    compatible = "bosch,bme680";
+                    reg = <0>;
+                    spi-max-frequency = <1400000>;
+                };
+            };
+        };
+    };

From ace6d14481386ec6c1b63cc2b24c71433a583dc2 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Fri, 4 Aug 2023 11:45:59 +0100
Subject: [PATCH 1260/2047] mfd: cs42l43: Add support for cs42l43 core driver

The CS42L43 is an audio CODEC with integrated MIPI SoundWire interface
(Version 1.2.1 compliant), I2C, SPI, and I2S/TDM interfaces designed
for portable applications. It provides a high dynamic range, stereo
DAC for headphone output, two integrated Class D amplifiers for
loudspeakers, and two ADCs for wired headset microphone input or
stereo line input. PDM inputs are provided for digital microphones.

The MFD component registers and initialises the device and provides
PM/system power management.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20230804104602.395892-4-ckeepax@opensource.cirrus.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 MAINTAINERS                      |    2 +
 drivers/mfd/Kconfig              |   23 +
 drivers/mfd/Makefile             |    3 +
 drivers/mfd/cs42l43-i2c.c        |   98 +++
 drivers/mfd/cs42l43-sdw.c        |  239 ++++++
 drivers/mfd/cs42l43.c            | 1188 ++++++++++++++++++++++++++++++
 drivers/mfd/cs42l43.h            |   28 +
 include/linux/mfd/cs42l43-regs.h | 1184 +++++++++++++++++++++++++++++
 include/linux/mfd/cs42l43.h      |  102 +++
 9 files changed, 2867 insertions(+)
 create mode 100644 drivers/mfd/cs42l43-i2c.c
 create mode 100644 drivers/mfd/cs42l43-sdw.c
 create mode 100644 drivers/mfd/cs42l43.c
 create mode 100644 drivers/mfd/cs42l43.h
 create mode 100644 include/linux/mfd/cs42l43-regs.h
 create mode 100644 include/linux/mfd/cs42l43.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 3be1bdfe8ecc..45aa18c45d20 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4879,7 +4879,9 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 L:	patches@opensource.cirrus.com
 S:	Maintained
 F:	Documentation/devicetree/bindings/sound/cirrus,cs*
+F:	drivers/mfd/cs42l43*
 F:	include/dt-bindings/sound/cs*
+F:	include/linux/mfd/cs42l43*
 F:	include/sound/cs*
 F:	sound/pci/hda/cs*
 F:	sound/pci/hda/hda_cs_dsp_ctl.*
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 6f5b259a6d6a..85be64579fc9 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -237,6 +237,29 @@ config MFD_CROS_EC_DEV
 	  To compile this driver as a module, choose M here: the module will be
 	  called cros-ec-dev.
 
+config MFD_CS42L43
+	tristate
+	select MFD_CORE
+	select REGMAP
+
+config MFD_CS42L43_I2C
+	tristate "Cirrus Logic CS42L43 (I2C)"
+	depends on I2C
+	select REGMAP_I2C
+	select MFD_CS42L43
+	help
+	  Select this to support the Cirrus Logic CS42L43 PC CODEC with
+	  headphone and class D speaker drivers over I2C.
+
+config MFD_CS42L43_SDW
+	tristate "Cirrus Logic CS42L43 (SoundWire)"
+	depends on SOUNDWIRE
+	select REGMAP_SOUNDWIRE
+	select MFD_CS42L43
+	help
+	  Select this to support the Cirrus Logic CS42L43 PC CODEC with
+	  headphone and class D speaker drivers over SoundWire.
+
 config MFD_MADERA
 	tristate "Cirrus Logic Madera codecs"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index f3d1f1dc73b5..c66f07edcd0e 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -13,6 +13,9 @@ obj-$(CONFIG_ARCH_BCM2835)	+= bcm2835-pm.o
 obj-$(CONFIG_MFD_BCM590XX)	+= bcm590xx.o
 obj-$(CONFIG_MFD_BD9571MWV)	+= bd9571mwv.o
 obj-$(CONFIG_MFD_CROS_EC_DEV)	+= cros_ec_dev.o
+obj-$(CONFIG_MFD_CS42L43)	+= cs42l43.o
+obj-$(CONFIG_MFD_CS42L43_I2C)	+= cs42l43-i2c.o
+obj-$(CONFIG_MFD_CS42L43_SDW)	+= cs42l43-sdw.o
 obj-$(CONFIG_MFD_ENE_KB3930)	+= ene-kb3930.o
 obj-$(CONFIG_MFD_EXYNOS_LPASS)	+= exynos-lpass.o
 obj-$(CONFIG_MFD_GATEWORKS_GSC)	+= gateworks-gsc.o
diff --git a/drivers/mfd/cs42l43-i2c.c b/drivers/mfd/cs42l43-i2c.c
new file mode 100644
index 000000000000..4922211680c9
--- /dev/null
+++ b/drivers/mfd/cs42l43-i2c.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CS42L43 I2C driver
+ *
+ * Copyright (C) 2022-2023 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/mfd/cs42l43-regs.h>
+#include <linux/module.h>
+
+#include "cs42l43.h"
+
+static const struct regmap_config cs42l43_i2c_regmap = {
+	.reg_bits		= 32,
+	.reg_stride		= 4,
+	.val_bits		= 32,
+	.reg_format_endian	= REGMAP_ENDIAN_BIG,
+	.val_format_endian	= REGMAP_ENDIAN_BIG,
+
+	.max_register		= CS42L43_MCU_RAM_MAX,
+	.readable_reg		= cs42l43_readable_register,
+	.volatile_reg		= cs42l43_volatile_register,
+	.precious_reg		= cs42l43_precious_register,
+
+	.cache_type		= REGCACHE_MAPLE,
+	.reg_defaults		= cs42l43_reg_default,
+	.num_reg_defaults	= ARRAY_SIZE(cs42l43_reg_default),
+};
+
+static int cs42l43_i2c_probe(struct i2c_client *i2c)
+{
+	struct cs42l43 *cs42l43;
+	int ret;
+
+	cs42l43 = devm_kzalloc(&i2c->dev, sizeof(*cs42l43), GFP_KERNEL);
+	if (!cs42l43)
+		return -ENOMEM;
+
+	cs42l43->dev = &i2c->dev;
+	cs42l43->irq = i2c->irq;
+	/* A device on an I2C is always attached by definition. */
+	cs42l43->attached = true;
+
+	cs42l43->regmap = devm_regmap_init_i2c(i2c, &cs42l43_i2c_regmap);
+	if (IS_ERR(cs42l43->regmap)) {
+		ret = PTR_ERR(cs42l43->regmap);
+		dev_err(cs42l43->dev, "Failed to allocate regmap: %d\n", ret);
+		return ret;
+	}
+
+	return cs42l43_dev_probe(cs42l43);
+}
+
+static void cs42l43_i2c_remove(struct i2c_client *i2c)
+{
+	struct cs42l43 *cs42l43 = dev_get_drvdata(&i2c->dev);
+
+	cs42l43_dev_remove(cs42l43);
+}
+
+#if IS_ENABLED(CONFIG_OF)
+static const struct of_device_id cs42l43_of_match[] = {
+	{ .compatible = "cirrus,cs42l43", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, cs42l43_of_match);
+#endif
+
+#if IS_ENABLED(CONFIG_ACPI)
+static const struct acpi_device_id cs42l43_acpi_match[] = {
+	{ "CSC4243", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, cs42l43_acpi_match);
+#endif
+
+static struct i2c_driver cs42l43_i2c_driver = {
+	.driver = {
+		.name			= "cs42l43",
+		.pm			= pm_ptr(&cs42l43_pm_ops),
+		.of_match_table		= of_match_ptr(cs42l43_of_match),
+		.acpi_match_table	= ACPI_PTR(cs42l43_acpi_match),
+	},
+
+	.probe		= cs42l43_i2c_probe,
+	.remove		= cs42l43_i2c_remove,
+};
+module_i2c_driver(cs42l43_i2c_driver);
+
+MODULE_IMPORT_NS(MFD_CS42L43);
+
+MODULE_DESCRIPTION("CS42L43 I2C Driver");
+MODULE_AUTHOR("Charles Keepax <ckeepax@opensource.cirrus.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/cs42l43-sdw.c b/drivers/mfd/cs42l43-sdw.c
new file mode 100644
index 000000000000..7392b3d2e6b9
--- /dev/null
+++ b/drivers/mfd/cs42l43-sdw.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CS42L43 SoundWire driver
+ *
+ * Copyright (C) 2022-2023 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/mfd/cs42l43-regs.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/soundwire/sdw.h>
+#include <linux/soundwire/sdw_registers.h>
+#include <linux/soundwire/sdw_type.h>
+
+#include "cs42l43.h"
+
+enum cs42l43_sdw_ports {
+	CS42L43_DMIC_DEC_ASP_PORT = 1,
+	CS42L43_SPK_TX_PORT,
+	CS42L43_SPDIF_HP_PORT,
+	CS42L43_SPK_RX_PORT,
+	CS42L43_ASP_PORT,
+};
+
+static const struct regmap_config cs42l43_sdw_regmap = {
+	.reg_bits		= 32,
+	.reg_stride		= 4,
+	.val_bits		= 32,
+	.reg_format_endian	= REGMAP_ENDIAN_LITTLE,
+	.val_format_endian	= REGMAP_ENDIAN_LITTLE,
+
+	.max_register		= CS42L43_MCU_RAM_MAX,
+	.readable_reg		= cs42l43_readable_register,
+	.volatile_reg		= cs42l43_volatile_register,
+	.precious_reg		= cs42l43_precious_register,
+
+	.cache_type		= REGCACHE_MAPLE,
+	.reg_defaults		= cs42l43_reg_default,
+	.num_reg_defaults	= ARRAY_SIZE(cs42l43_reg_default),
+};
+
+static int cs42l43_read_prop(struct sdw_slave *sdw)
+{
+	struct sdw_slave_prop *prop = &sdw->prop;
+	struct device *dev = &sdw->dev;
+	struct sdw_dpn_prop *dpn;
+	unsigned long addr;
+	int nval;
+	int i;
+	u32 bit;
+
+	prop->use_domain_irq = true;
+	prop->paging_support = true;
+	prop->wake_capable = true;
+	prop->source_ports = BIT(CS42L43_DMIC_DEC_ASP_PORT) | BIT(CS42L43_SPK_TX_PORT);
+	prop->sink_ports = BIT(CS42L43_SPDIF_HP_PORT) |
+			   BIT(CS42L43_SPK_RX_PORT) | BIT(CS42L43_ASP_PORT);
+	prop->quirks = SDW_SLAVE_QUIRKS_INVALID_INITIAL_PARITY;
+	prop->scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY |
+			      SDW_SCP_INT1_IMPL_DEF;
+
+	nval = hweight32(prop->source_ports);
+	prop->src_dpn_prop = devm_kcalloc(dev, nval, sizeof(*prop->src_dpn_prop),
+					  GFP_KERNEL);
+	if (!prop->src_dpn_prop)
+		return -ENOMEM;
+
+	i = 0;
+	dpn = prop->src_dpn_prop;
+	addr = prop->source_ports;
+	for_each_set_bit(bit, &addr, 32) {
+		dpn[i].num = bit;
+		dpn[i].max_ch = 2;
+		dpn[i].type = SDW_DPN_FULL;
+		dpn[i].max_word = 24;
+		i++;
+	}
+	/*
+	 * All ports are 2 channels max, except the first one,
+	 * CS42L43_DMIC_DEC_ASP_PORT.
+	 */
+	dpn[CS42L43_DMIC_DEC_ASP_PORT].max_ch = 4;
+
+	nval = hweight32(prop->sink_ports);
+	prop->sink_dpn_prop = devm_kcalloc(dev, nval, sizeof(*prop->sink_dpn_prop),
+					   GFP_KERNEL);
+	if (!prop->sink_dpn_prop)
+		return -ENOMEM;
+
+	i = 0;
+	dpn = prop->sink_dpn_prop;
+	addr = prop->sink_ports;
+	for_each_set_bit(bit, &addr, 32) {
+		dpn[i].num = bit;
+		dpn[i].max_ch = 2;
+		dpn[i].type = SDW_DPN_FULL;
+		dpn[i].max_word = 24;
+		i++;
+	}
+
+	return 0;
+}
+
+static int cs42l43_sdw_update_status(struct sdw_slave *sdw, enum sdw_slave_status status)
+{
+	struct cs42l43 *cs42l43 = dev_get_drvdata(&sdw->dev);
+
+	switch (status) {
+	case SDW_SLAVE_ATTACHED:
+		dev_dbg(cs42l43->dev, "Device attach\n");
+
+		sdw_write_no_pm(sdw, CS42L43_GEN_INT_MASK_1,
+				CS42L43_INT_STAT_GEN1_MASK);
+
+		cs42l43->attached = true;
+
+		complete(&cs42l43->device_attach);
+		break;
+	case SDW_SLAVE_UNATTACHED:
+		dev_dbg(cs42l43->dev, "Device detach\n");
+
+		cs42l43->attached = false;
+
+		reinit_completion(&cs42l43->device_attach);
+		complete(&cs42l43->device_detach);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int cs42l43_sdw_interrupt(struct sdw_slave *sdw,
+				 struct sdw_slave_intr_status *status)
+{
+	/*
+	 * The IRQ itself was handled through the regmap_irq handler, this is
+	 * just clearing up the additional Cirrus SoundWire registers that are
+	 * not covered by the SoundWire framework or the IRQ handler itself.
+	 * There is only a single bit in GEN_INT_STAT_1 and it doesn't clear if
+	 * IRQs are still pending so doing a read/write here after handling the
+	 * IRQ is fine.
+	 */
+	sdw_read_no_pm(sdw, CS42L43_GEN_INT_STAT_1);
+	sdw_write_no_pm(sdw, CS42L43_GEN_INT_STAT_1, CS42L43_INT_STAT_GEN1_MASK);
+
+	return 0;
+}
+
+static int cs42l43_sdw_bus_config(struct sdw_slave *sdw,
+				  struct sdw_bus_params *params)
+{
+	struct cs42l43 *cs42l43 = dev_get_drvdata(&sdw->dev);
+	int ret = 0;
+
+	mutex_lock(&cs42l43->pll_lock);
+
+	if (cs42l43->sdw_freq != params->curr_dr_freq / 2) {
+		if (cs42l43->sdw_pll_active) {
+			dev_err(cs42l43->dev,
+				"PLL active can't change SoundWire bus clock\n");
+			ret = -EBUSY;
+		} else {
+			cs42l43->sdw_freq = params->curr_dr_freq / 2;
+		}
+	}
+
+	mutex_unlock(&cs42l43->pll_lock);
+
+	return ret;
+}
+
+static const struct sdw_slave_ops cs42l43_sdw_ops = {
+	.read_prop		= cs42l43_read_prop,
+	.update_status		= cs42l43_sdw_update_status,
+	.interrupt_callback	= cs42l43_sdw_interrupt,
+	.bus_config		= cs42l43_sdw_bus_config,
+};
+
+static int cs42l43_sdw_probe(struct sdw_slave *sdw, const struct sdw_device_id *id)
+{
+	struct cs42l43 *cs42l43;
+	struct device *dev = &sdw->dev;
+	int ret;
+
+	cs42l43 = devm_kzalloc(dev, sizeof(*cs42l43), GFP_KERNEL);
+	if (!cs42l43)
+		return -ENOMEM;
+
+	cs42l43->dev = dev;
+	cs42l43->sdw = sdw;
+
+	cs42l43->regmap = devm_regmap_init_sdw(sdw, &cs42l43_sdw_regmap);
+	if (IS_ERR(cs42l43->regmap)) {
+		ret = PTR_ERR(cs42l43->regmap);
+		dev_err(cs42l43->dev, "Failed to allocate regmap: %d\n", ret);
+		return ret;
+	}
+
+	return cs42l43_dev_probe(cs42l43);
+}
+
+static int cs42l43_sdw_remove(struct sdw_slave *sdw)
+{
+	struct cs42l43 *cs42l43 = dev_get_drvdata(&sdw->dev);
+
+	cs42l43_dev_remove(cs42l43);
+
+	return 0;
+}
+
+static const struct sdw_device_id cs42l43_sdw_id[] = {
+	SDW_SLAVE_ENTRY(0x01FA, 0x4243, 0),
+	{}
+};
+MODULE_DEVICE_TABLE(sdw, cs42l43_sdw_id);
+
+static struct sdw_driver cs42l43_sdw_driver = {
+	.driver = {
+		.name		= "cs42l43",
+		.pm		= pm_ptr(&cs42l43_pm_ops),
+	},
+
+	.probe		= cs42l43_sdw_probe,
+	.remove		= cs42l43_sdw_remove,
+	.id_table	= cs42l43_sdw_id,
+	.ops		= &cs42l43_sdw_ops,
+};
+module_sdw_driver(cs42l43_sdw_driver);
+
+MODULE_IMPORT_NS(MFD_CS42L43);
+
+MODULE_DESCRIPTION("CS42L43 SoundWire Driver");
+MODULE_AUTHOR("Lucas Tanure <tanureal@opensource.cirrus.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/cs42l43.c b/drivers/mfd/cs42l43.c
new file mode 100644
index 000000000000..37b23e9bae82
--- /dev/null
+++ b/drivers/mfd/cs42l43.c
@@ -0,0 +1,1188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CS42L43 core driver
+ *
+ * Copyright (C) 2022-2023 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ */
+
+#include <linux/bitops.h>
+#include <linux/build_bug.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/firmware.h>
+#include <linux/jiffies.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/cs42l43-regs.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/soundwire/sdw.h>
+
+#include "cs42l43.h"
+
+#define CS42L43_RESET_DELAY			20
+
+#define CS42L43_SDW_ATTACH_TIMEOUT		500
+#define CS42L43_SDW_DETACH_TIMEOUT		100
+
+#define CS42L43_MCU_BOOT_STAGE1			1
+#define CS42L43_MCU_BOOT_STAGE2			2
+#define CS42L43_MCU_BOOT_STAGE3			3
+#define CS42L43_MCU_BOOT_STAGE4			4
+#define CS42L43_MCU_POLL			5000
+#define CS42L43_MCU_CMD_TIMEOUT			20000
+#define CS42L43_MCU_UPDATE_FORMAT		3
+#define CS42L43_MCU_UPDATE_OFFSET		0x100000
+#define CS42L43_MCU_UPDATE_TIMEOUT		500000
+#define CS42L43_MCU_UPDATE_RETRIES		5
+
+#define CS42L43_MCU_SUPPORTED_REV		0x2105
+#define CS42L43_MCU_SHADOW_REGS_REQUIRED_REV	0x2200
+#define CS42L43_MCU_SUPPORTED_BIOS_REV		0x0001
+
+#define CS42L43_VDDP_DELAY			50
+#define CS42L43_VDDD_DELAY			1000
+
+#define CS42L43_AUTOSUSPEND_TIME		250
+
+struct cs42l43_patch_header {
+	__le16 version;
+	__le16 size;
+	u8 reserved;
+	u8 secure;
+	__le16 bss_size;
+	__le32 apply_addr;
+	__le32 checksum;
+	__le32 sha;
+	__le16 swrev;
+	__le16 patchid;
+	__le16 ipxid;
+	__le16 romver;
+	__le32 load_addr;
+} __packed;
+
+static const struct reg_sequence cs42l43_reva_patch[] = {
+	{ 0x4000,					0x00000055 },
+	{ 0x4000,					0x000000AA },
+	{ 0x10084,					0x00000000 },
+	{ 0x1741C,					0x00CD2000 },
+	{ 0x1718C,					0x00000003 },
+	{ 0x4000,					0x00000000 },
+	{ CS42L43_CCM_BLK_CLK_CONTROL,			0x00000002 },
+	{ CS42L43_HPPATHVOL,				0x011B011B },
+	{ CS42L43_OSC_DIV_SEL,				0x00000001 },
+	{ CS42L43_DACCNFG2,				0x00000005 },
+	{ CS42L43_MIC_DETECT_CONTROL_ANDROID,		0x80790079 },
+	{ CS42L43_RELID,				0x0000000F },
+};
+
+const struct reg_default cs42l43_reg_default[CS42L43_N_DEFAULTS] = {
+	{ CS42L43_DRV_CTRL1,				0x000186C0 },
+	{ CS42L43_DRV_CTRL3,				0x286DB018 },
+	{ CS42L43_DRV_CTRL4,				0x000006D8 },
+	{ CS42L43_DRV_CTRL_5,				0x136C00C0 },
+	{ CS42L43_GPIO_CTRL1,				0x00000707 },
+	{ CS42L43_GPIO_CTRL2,				0x00000000 },
+	{ CS42L43_GPIO_FN_SEL,				0x00000000 },
+	{ CS42L43_MCLK_SRC_SEL,				0x00000000 },
+	{ CS42L43_SAMPLE_RATE1,				0x00000003 },
+	{ CS42L43_SAMPLE_RATE2,				0x00000003 },
+	{ CS42L43_SAMPLE_RATE3,				0x00000003 },
+	{ CS42L43_SAMPLE_RATE4,				0x00000003 },
+	{ CS42L43_PLL_CONTROL,				0x00000000 },
+	{ CS42L43_FS_SELECT1,				0x00000000 },
+	{ CS42L43_FS_SELECT2,				0x00000000 },
+	{ CS42L43_FS_SELECT3,				0x00000000 },
+	{ CS42L43_FS_SELECT4,				0x00000000 },
+	{ CS42L43_PDM_CONTROL,				0x00000000 },
+	{ CS42L43_ASP_CLK_CONFIG1,			0x00010001 },
+	{ CS42L43_ASP_CLK_CONFIG2,			0x00000000 },
+	{ CS42L43_OSC_DIV_SEL,				0x00000001 },
+	{ CS42L43_ADC_B_CTRL1,				0x00000000 },
+	{ CS42L43_ADC_B_CTRL2,				0x00000000 },
+	{ CS42L43_DECIM_HPF_WNF_CTRL1,			0x00000001 },
+	{ CS42L43_DECIM_HPF_WNF_CTRL2,			0x00000001 },
+	{ CS42L43_DECIM_HPF_WNF_CTRL3,			0x00000001 },
+	{ CS42L43_DECIM_HPF_WNF_CTRL4,			0x00000001 },
+	{ CS42L43_DMIC_PDM_CTRL,			0x00000000 },
+	{ CS42L43_DECIM_VOL_CTRL_CH1_CH2,		0x20122012 },
+	{ CS42L43_DECIM_VOL_CTRL_CH3_CH4,		0x20122012 },
+	{ CS42L43_INTP_VOLUME_CTRL1,			0x00000180 },
+	{ CS42L43_INTP_VOLUME_CTRL2,			0x00000180 },
+	{ CS42L43_AMP1_2_VOL_RAMP,			0x00000022 },
+	{ CS42L43_ASP_CTRL,				0x00000004 },
+	{ CS42L43_ASP_FSYNC_CTRL1,			0x000000FA },
+	{ CS42L43_ASP_FSYNC_CTRL2,			0x00000001 },
+	{ CS42L43_ASP_FSYNC_CTRL3,			0x00000000 },
+	{ CS42L43_ASP_FSYNC_CTRL4,			0x000001F4 },
+	{ CS42L43_ASP_DATA_CTRL,			0x0000003A },
+	{ CS42L43_ASP_RX_EN,				0x00000000 },
+	{ CS42L43_ASP_TX_EN,				0x00000000 },
+	{ CS42L43_ASP_RX_CH1_CTRL,			0x00170001 },
+	{ CS42L43_ASP_RX_CH2_CTRL,			0x00170031 },
+	{ CS42L43_ASP_RX_CH3_CTRL,			0x00170061 },
+	{ CS42L43_ASP_RX_CH4_CTRL,			0x00170091 },
+	{ CS42L43_ASP_RX_CH5_CTRL,			0x001700C1 },
+	{ CS42L43_ASP_RX_CH6_CTRL,			0x001700F1 },
+	{ CS42L43_ASP_TX_CH1_CTRL,			0x00170001 },
+	{ CS42L43_ASP_TX_CH2_CTRL,			0x00170031 },
+	{ CS42L43_ASP_TX_CH3_CTRL,			0x00170061 },
+	{ CS42L43_ASP_TX_CH4_CTRL,			0x00170091 },
+	{ CS42L43_ASP_TX_CH5_CTRL,			0x001700C1 },
+	{ CS42L43_ASP_TX_CH6_CTRL,			0x001700F1 },
+	{ CS42L43_ASPTX1_INPUT,				0x00800000 },
+	{ CS42L43_ASPTX2_INPUT,				0x00800000 },
+	{ CS42L43_ASPTX3_INPUT,				0x00800000 },
+	{ CS42L43_ASPTX4_INPUT,				0x00800000 },
+	{ CS42L43_ASPTX5_INPUT,				0x00800000 },
+	{ CS42L43_ASPTX6_INPUT,				0x00800000 },
+	{ CS42L43_SWIRE_DP1_CH1_INPUT,			0x00800000 },
+	{ CS42L43_SWIRE_DP1_CH2_INPUT,			0x00800000 },
+	{ CS42L43_SWIRE_DP1_CH3_INPUT,			0x00800000 },
+	{ CS42L43_SWIRE_DP1_CH4_INPUT,			0x00800000 },
+	{ CS42L43_SWIRE_DP2_CH1_INPUT,			0x00800000 },
+	{ CS42L43_SWIRE_DP2_CH2_INPUT,			0x00800000 },
+	{ CS42L43_SWIRE_DP3_CH1_INPUT,			0x00800000 },
+	{ CS42L43_SWIRE_DP3_CH2_INPUT,			0x00800000 },
+	{ CS42L43_SWIRE_DP4_CH1_INPUT,			0x00800000 },
+	{ CS42L43_SWIRE_DP4_CH2_INPUT,			0x00800000 },
+	{ CS42L43_ASRC_INT1_INPUT1,			0x00800000 },
+	{ CS42L43_ASRC_INT2_INPUT1,			0x00800000 },
+	{ CS42L43_ASRC_INT3_INPUT1,			0x00800000 },
+	{ CS42L43_ASRC_INT4_INPUT1,			0x00800000 },
+	{ CS42L43_ASRC_DEC1_INPUT1,			0x00800000 },
+	{ CS42L43_ASRC_DEC2_INPUT1,			0x00800000 },
+	{ CS42L43_ASRC_DEC3_INPUT1,			0x00800000 },
+	{ CS42L43_ASRC_DEC4_INPUT1,			0x00800000 },
+	{ CS42L43_ISRC1INT1_INPUT1,			0x00800000 },
+	{ CS42L43_ISRC1INT2_INPUT1,			0x00800000 },
+	{ CS42L43_ISRC1DEC1_INPUT1,			0x00800000 },
+	{ CS42L43_ISRC1DEC2_INPUT1,			0x00800000 },
+	{ CS42L43_ISRC2INT1_INPUT1,			0x00800000 },
+	{ CS42L43_ISRC2INT2_INPUT1,			0x00800000 },
+	{ CS42L43_ISRC2DEC1_INPUT1,			0x00800000 },
+	{ CS42L43_ISRC2DEC2_INPUT1,			0x00800000 },
+	{ CS42L43_EQ1MIX_INPUT1,			0x00800000 },
+	{ CS42L43_EQ1MIX_INPUT2,			0x00800000 },
+	{ CS42L43_EQ1MIX_INPUT3,			0x00800000 },
+	{ CS42L43_EQ1MIX_INPUT4,			0x00800000 },
+	{ CS42L43_EQ2MIX_INPUT1,			0x00800000 },
+	{ CS42L43_EQ2MIX_INPUT2,			0x00800000 },
+	{ CS42L43_EQ2MIX_INPUT3,			0x00800000 },
+	{ CS42L43_EQ2MIX_INPUT4,			0x00800000 },
+	{ CS42L43_SPDIF1_INPUT1,			0x00800000 },
+	{ CS42L43_SPDIF2_INPUT1,			0x00800000 },
+	{ CS42L43_AMP1MIX_INPUT1,			0x00800000 },
+	{ CS42L43_AMP1MIX_INPUT2,			0x00800000 },
+	{ CS42L43_AMP1MIX_INPUT3,			0x00800000 },
+	{ CS42L43_AMP1MIX_INPUT4,			0x00800000 },
+	{ CS42L43_AMP2MIX_INPUT1,			0x00800000 },
+	{ CS42L43_AMP2MIX_INPUT2,			0x00800000 },
+	{ CS42L43_AMP2MIX_INPUT3,			0x00800000 },
+	{ CS42L43_AMP2MIX_INPUT4,			0x00800000 },
+	{ CS42L43_AMP3MIX_INPUT1,			0x00800000 },
+	{ CS42L43_AMP3MIX_INPUT2,			0x00800000 },
+	{ CS42L43_AMP3MIX_INPUT3,			0x00800000 },
+	{ CS42L43_AMP3MIX_INPUT4,			0x00800000 },
+	{ CS42L43_AMP4MIX_INPUT1,			0x00800000 },
+	{ CS42L43_AMP4MIX_INPUT2,			0x00800000 },
+	{ CS42L43_AMP4MIX_INPUT3,			0x00800000 },
+	{ CS42L43_AMP4MIX_INPUT4,			0x00800000 },
+	{ CS42L43_ASRC_INT_ENABLES,			0x00000100 },
+	{ CS42L43_ASRC_DEC_ENABLES,			0x00000100 },
+	{ CS42L43_PDNCNTL,				0x00000000 },
+	{ CS42L43_RINGSENSE_DEB_CTRL,			0x0000001B },
+	{ CS42L43_TIPSENSE_DEB_CTRL,			0x0000001B },
+	{ CS42L43_HS2,					0x050106F3 },
+	{ CS42L43_STEREO_MIC_CTRL,			0x00000000 },
+	{ CS42L43_STEREO_MIC_CLAMP_CTRL,		0x00000001 },
+	{ CS42L43_BLOCK_EN2,				0x00000000 },
+	{ CS42L43_BLOCK_EN3,				0x00000000 },
+	{ CS42L43_BLOCK_EN4,				0x00000000 },
+	{ CS42L43_BLOCK_EN5,				0x00000000 },
+	{ CS42L43_BLOCK_EN6,				0x00000000 },
+	{ CS42L43_BLOCK_EN7,				0x00000000 },
+	{ CS42L43_BLOCK_EN8,				0x00000000 },
+	{ CS42L43_BLOCK_EN9,				0x00000000 },
+	{ CS42L43_BLOCK_EN10,				0x00000000 },
+	{ CS42L43_BLOCK_EN11,				0x00000000 },
+	{ CS42L43_TONE_CH1_CTRL,			0x00000000 },
+	{ CS42L43_TONE_CH2_CTRL,			0x00000000 },
+	{ CS42L43_MIC_DETECT_CONTROL_1,			0x00000003 },
+	{ CS42L43_HS_BIAS_SENSE_AND_CLAMP_AUTOCONTROL,	0x02000003 },
+	{ CS42L43_MIC_DETECT_CONTROL_ANDROID,		0x80790079 },
+	{ CS42L43_ISRC1_CTRL,				0x00000000 },
+	{ CS42L43_ISRC2_CTRL,				0x00000000 },
+	{ CS42L43_CTRL_REG,				0x00000006 },
+	{ CS42L43_FDIV_FRAC,				0x40000000 },
+	{ CS42L43_CAL_RATIO,				0x00000080 },
+	{ CS42L43_SPI_CLK_CONFIG1,			0x00000000 },
+	{ CS42L43_SPI_CONFIG1,				0x00000000 },
+	{ CS42L43_SPI_CONFIG2,				0x00000000 },
+	{ CS42L43_SPI_CONFIG3,				0x00000001 },
+	{ CS42L43_SPI_CONFIG4,				0x00000000 },
+	{ CS42L43_TRAN_CONFIG3,				0x00000000 },
+	{ CS42L43_TRAN_CONFIG4,				0x00000000 },
+	{ CS42L43_TRAN_CONFIG5,				0x00000000 },
+	{ CS42L43_TRAN_CONFIG6,				0x00000000 },
+	{ CS42L43_TRAN_CONFIG7,				0x00000000 },
+	{ CS42L43_TRAN_CONFIG8,				0x00000000 },
+	{ CS42L43_DACCNFG1,				0x00000008 },
+	{ CS42L43_DACCNFG2,				0x00000005 },
+	{ CS42L43_HPPATHVOL,				0x011B011B },
+	{ CS42L43_PGAVOL,				0x00003470 },
+	{ CS42L43_LOADDETENA,				0x00000000 },
+	{ CS42L43_CTRL,					0x00000037 },
+	{ CS42L43_COEFF_DATA_IN0,			0x00000000 },
+	{ CS42L43_COEFF_RD_WR0,				0x00000000 },
+	{ CS42L43_START_EQZ0,				0x00000000 },
+	{ CS42L43_MUTE_EQ_IN0,				0x00000000 },
+	{ CS42L43_DECIM_MASK,				0x0000000F },
+	{ CS42L43_EQ_MIX_MASK,				0x0000000F },
+	{ CS42L43_ASP_MASK,				0x000000FF },
+	{ CS42L43_PLL_MASK,				0x00000003 },
+	{ CS42L43_SOFT_MASK,				0x0000FFFF },
+	{ CS42L43_SWIRE_MASK,				0x00007FFF },
+	{ CS42L43_MSM_MASK,				0x00000FFF },
+	{ CS42L43_ACC_DET_MASK,				0x00000FFF },
+	{ CS42L43_I2C_TGT_MASK,				0x00000003 },
+	{ CS42L43_SPI_MSTR_MASK,			0x00000007 },
+	{ CS42L43_SW_TO_SPI_BRIDGE_MASK,		0x00000001 },
+	{ CS42L43_OTP_MASK,				0x00000007 },
+	{ CS42L43_CLASS_D_AMP_MASK,			0x00003FFF },
+	{ CS42L43_GPIO_INT_MASK,			0x0000003F },
+	{ CS42L43_ASRC_MASK,				0x0000000F },
+	{ CS42L43_HPOUT_MASK,				0x00000003 },
+};
+EXPORT_SYMBOL_NS_GPL(cs42l43_reg_default, MFD_CS42L43);
+
+bool cs42l43_readable_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case CS42L43_DEVID:
+	case CS42L43_REVID:
+	case CS42L43_RELID:
+	case CS42L43_SFT_RESET:
+	case CS42L43_DRV_CTRL1:
+	case CS42L43_DRV_CTRL3:
+	case CS42L43_DRV_CTRL4:
+	case CS42L43_DRV_CTRL_5:
+	case CS42L43_GPIO_CTRL1:
+	case CS42L43_GPIO_CTRL2:
+	case CS42L43_GPIO_STS:
+	case CS42L43_GPIO_FN_SEL:
+	case CS42L43_MCLK_SRC_SEL:
+	case CS42L43_SAMPLE_RATE1 ... CS42L43_SAMPLE_RATE4:
+	case CS42L43_PLL_CONTROL:
+	case CS42L43_FS_SELECT1 ... CS42L43_FS_SELECT4:
+	case CS42L43_PDM_CONTROL:
+	case CS42L43_ASP_CLK_CONFIG1 ... CS42L43_ASP_CLK_CONFIG2:
+	case CS42L43_OSC_DIV_SEL:
+	case CS42L43_ADC_B_CTRL1 ...  CS42L43_ADC_B_CTRL2:
+	case CS42L43_DECIM_HPF_WNF_CTRL1 ... CS42L43_DECIM_HPF_WNF_CTRL4:
+	case CS42L43_DMIC_PDM_CTRL:
+	case CS42L43_DECIM_VOL_CTRL_CH1_CH2 ... CS42L43_DECIM_VOL_CTRL_CH3_CH4:
+	case CS42L43_INTP_VOLUME_CTRL1 ... CS42L43_INTP_VOLUME_CTRL2:
+	case CS42L43_AMP1_2_VOL_RAMP:
+	case CS42L43_ASP_CTRL:
+	case CS42L43_ASP_FSYNC_CTRL1 ... CS42L43_ASP_FSYNC_CTRL4:
+	case CS42L43_ASP_DATA_CTRL:
+	case CS42L43_ASP_RX_EN ... CS42L43_ASP_TX_EN:
+	case CS42L43_ASP_RX_CH1_CTRL ... CS42L43_ASP_RX_CH6_CTRL:
+	case CS42L43_ASP_TX_CH1_CTRL ... CS42L43_ASP_TX_CH6_CTRL:
+	case CS42L43_OTP_REVISION_ID:
+	case CS42L43_ASPTX1_INPUT:
+	case CS42L43_ASPTX2_INPUT:
+	case CS42L43_ASPTX3_INPUT:
+	case CS42L43_ASPTX4_INPUT:
+	case CS42L43_ASPTX5_INPUT:
+	case CS42L43_ASPTX6_INPUT:
+	case CS42L43_SWIRE_DP1_CH1_INPUT:
+	case CS42L43_SWIRE_DP1_CH2_INPUT:
+	case CS42L43_SWIRE_DP1_CH3_INPUT:
+	case CS42L43_SWIRE_DP1_CH4_INPUT:
+	case CS42L43_SWIRE_DP2_CH1_INPUT:
+	case CS42L43_SWIRE_DP2_CH2_INPUT:
+	case CS42L43_SWIRE_DP3_CH1_INPUT:
+	case CS42L43_SWIRE_DP3_CH2_INPUT:
+	case CS42L43_SWIRE_DP4_CH1_INPUT:
+	case CS42L43_SWIRE_DP4_CH2_INPUT:
+	case CS42L43_ASRC_INT1_INPUT1:
+	case CS42L43_ASRC_INT2_INPUT1:
+	case CS42L43_ASRC_INT3_INPUT1:
+	case CS42L43_ASRC_INT4_INPUT1:
+	case CS42L43_ASRC_DEC1_INPUT1:
+	case CS42L43_ASRC_DEC2_INPUT1:
+	case CS42L43_ASRC_DEC3_INPUT1:
+	case CS42L43_ASRC_DEC4_INPUT1:
+	case CS42L43_ISRC1INT1_INPUT1:
+	case CS42L43_ISRC1INT2_INPUT1:
+	case CS42L43_ISRC1DEC1_INPUT1:
+	case CS42L43_ISRC1DEC2_INPUT1:
+	case CS42L43_ISRC2INT1_INPUT1:
+	case CS42L43_ISRC2INT2_INPUT1:
+	case CS42L43_ISRC2DEC1_INPUT1:
+	case CS42L43_ISRC2DEC2_INPUT1:
+	case CS42L43_EQ1MIX_INPUT1 ... CS42L43_EQ1MIX_INPUT4:
+	case CS42L43_EQ2MIX_INPUT1 ... CS42L43_EQ2MIX_INPUT4:
+	case CS42L43_SPDIF1_INPUT1:
+	case CS42L43_SPDIF2_INPUT1:
+	case CS42L43_AMP1MIX_INPUT1 ... CS42L43_AMP1MIX_INPUT4:
+	case CS42L43_AMP2MIX_INPUT1 ... CS42L43_AMP2MIX_INPUT4:
+	case CS42L43_AMP3MIX_INPUT1 ... CS42L43_AMP3MIX_INPUT4:
+	case CS42L43_AMP4MIX_INPUT1 ... CS42L43_AMP4MIX_INPUT4:
+	case CS42L43_ASRC_INT_ENABLES ... CS42L43_ASRC_DEC_ENABLES:
+	case CS42L43_PDNCNTL:
+	case CS42L43_RINGSENSE_DEB_CTRL:
+	case CS42L43_TIPSENSE_DEB_CTRL:
+	case CS42L43_TIP_RING_SENSE_INTERRUPT_STATUS:
+	case CS42L43_HS2:
+	case CS42L43_HS_STAT:
+	case CS42L43_MCU_SW_INTERRUPT:
+	case CS42L43_STEREO_MIC_CTRL:
+	case CS42L43_STEREO_MIC_CLAMP_CTRL:
+	case CS42L43_BLOCK_EN2 ... CS42L43_BLOCK_EN11:
+	case CS42L43_TONE_CH1_CTRL ... CS42L43_TONE_CH2_CTRL:
+	case CS42L43_MIC_DETECT_CONTROL_1:
+	case CS42L43_DETECT_STATUS_1:
+	case CS42L43_HS_BIAS_SENSE_AND_CLAMP_AUTOCONTROL:
+	case CS42L43_MIC_DETECT_CONTROL_ANDROID:
+	case CS42L43_ISRC1_CTRL:
+	case CS42L43_ISRC2_CTRL:
+	case CS42L43_CTRL_REG:
+	case CS42L43_FDIV_FRAC:
+	case CS42L43_CAL_RATIO:
+	case CS42L43_SPI_CLK_CONFIG1:
+	case CS42L43_SPI_CONFIG1 ... CS42L43_SPI_CONFIG4:
+	case CS42L43_SPI_STATUS1 ... CS42L43_SPI_STATUS2:
+	case CS42L43_TRAN_CONFIG1 ... CS42L43_TRAN_CONFIG8:
+	case CS42L43_TRAN_STATUS1 ... CS42L43_TRAN_STATUS3:
+	case CS42L43_TX_DATA:
+	case CS42L43_RX_DATA:
+	case CS42L43_DACCNFG1 ... CS42L43_DACCNFG2:
+	case CS42L43_HPPATHVOL:
+	case CS42L43_PGAVOL:
+	case CS42L43_LOADDETRESULTS:
+	case CS42L43_LOADDETENA:
+	case CS42L43_CTRL:
+	case CS42L43_COEFF_DATA_IN0:
+	case CS42L43_COEFF_RD_WR0:
+	case CS42L43_INIT_DONE0:
+	case CS42L43_START_EQZ0:
+	case CS42L43_MUTE_EQ_IN0:
+	case CS42L43_DECIM_INT ... CS42L43_HPOUT_INT:
+	case CS42L43_DECIM_MASK ... CS42L43_HPOUT_MASK:
+	case CS42L43_DECIM_INT_SHADOW ... CS42L43_HP_OUT_SHADOW:
+	case CS42L43_BOOT_CONTROL:
+	case CS42L43_BLOCK_EN:
+	case CS42L43_SHUTTER_CONTROL:
+	case CS42L43_MCU_SW_REV ... CS42L43_MCU_RAM_MAX:
+		return true;
+	default:
+		return false;
+	}
+}
+EXPORT_SYMBOL_NS_GPL(cs42l43_readable_register, MFD_CS42L43);
+
+bool cs42l43_precious_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case CS42L43_SFT_RESET:
+	case CS42L43_TX_DATA:
+	case CS42L43_RX_DATA:
+	case CS42L43_DECIM_INT ... CS42L43_HPOUT_INT:
+	case CS42L43_MCU_SW_REV ... CS42L43_MCU_RAM_MAX:
+		return true;
+	default:
+		return false;
+	}
+}
+EXPORT_SYMBOL_NS_GPL(cs42l43_precious_register, MFD_CS42L43);
+
+bool cs42l43_volatile_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case CS42L43_DEVID:
+	case CS42L43_REVID:
+	case CS42L43_RELID:
+	case CS42L43_GPIO_STS:
+	case CS42L43_OTP_REVISION_ID:
+	case CS42L43_TIP_RING_SENSE_INTERRUPT_STATUS:
+	case CS42L43_HS_STAT:
+	case CS42L43_MCU_SW_INTERRUPT:
+	case CS42L43_DETECT_STATUS_1:
+	case CS42L43_SPI_STATUS1 ... CS42L43_SPI_STATUS2:
+	case CS42L43_TRAN_CONFIG1 ... CS42L43_TRAN_CONFIG2:
+	case CS42L43_TRAN_CONFIG8:
+	case CS42L43_TRAN_STATUS1 ... CS42L43_TRAN_STATUS3:
+	case CS42L43_LOADDETRESULTS:
+	case CS42L43_INIT_DONE0:
+	case CS42L43_DECIM_INT_SHADOW ... CS42L43_HP_OUT_SHADOW:
+	case CS42L43_BOOT_CONTROL:
+	case CS42L43_BLOCK_EN:
+		return true;
+	default:
+		return cs42l43_precious_register(dev, reg);
+	}
+}
+EXPORT_SYMBOL_NS_GPL(cs42l43_volatile_register, MFD_CS42L43);
+
+#define CS42L43_IRQ_OFFSET(reg) ((CS42L43_##reg##_INT) - CS42L43_DECIM_INT)
+
+#define CS42L43_IRQ_REG(name, reg) REGMAP_IRQ_REG(CS42L43_##name, \
+						  CS42L43_IRQ_OFFSET(reg), \
+						  CS42L43_##name##_INT_MASK)
+
+static const struct regmap_irq cs42l43_regmap_irqs[] = {
+	CS42L43_IRQ_REG(PLL_LOST_LOCK,				PLL),
+	CS42L43_IRQ_REG(PLL_READY,				PLL),
+
+	CS42L43_IRQ_REG(HP_STARTUP_DONE,			MSM),
+	CS42L43_IRQ_REG(HP_SHUTDOWN_DONE,			MSM),
+	CS42L43_IRQ_REG(HSDET_DONE,				MSM),
+	CS42L43_IRQ_REG(TIPSENSE_UNPLUG_DB,			MSM),
+	CS42L43_IRQ_REG(TIPSENSE_PLUG_DB,			MSM),
+	CS42L43_IRQ_REG(RINGSENSE_UNPLUG_DB,			MSM),
+	CS42L43_IRQ_REG(RINGSENSE_PLUG_DB,			MSM),
+	CS42L43_IRQ_REG(TIPSENSE_UNPLUG_PDET,			MSM),
+	CS42L43_IRQ_REG(TIPSENSE_PLUG_PDET,			MSM),
+	CS42L43_IRQ_REG(RINGSENSE_UNPLUG_PDET,			MSM),
+	CS42L43_IRQ_REG(RINGSENSE_PLUG_PDET,			MSM),
+
+	CS42L43_IRQ_REG(HS2_BIAS_SENSE,				ACC_DET),
+	CS42L43_IRQ_REG(HS1_BIAS_SENSE,				ACC_DET),
+	CS42L43_IRQ_REG(DC_DETECT1_FALSE,			ACC_DET),
+	CS42L43_IRQ_REG(DC_DETECT1_TRUE,			ACC_DET),
+	CS42L43_IRQ_REG(HSBIAS_CLAMPED,				ACC_DET),
+	CS42L43_IRQ_REG(HS3_4_BIAS_SENSE,			ACC_DET),
+
+	CS42L43_IRQ_REG(AMP2_CLK_STOP_FAULT,			CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP1_CLK_STOP_FAULT,			CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP2_VDDSPK_FAULT,			CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP1_VDDSPK_FAULT,			CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP2_SHUTDOWN_DONE,			CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP1_SHUTDOWN_DONE,			CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP2_STARTUP_DONE,			CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP1_STARTUP_DONE,			CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP2_THERM_SHDN,			CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP1_THERM_SHDN,			CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP2_THERM_WARN,			CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP1_THERM_WARN,			CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP2_SCDET,				CLASS_D_AMP),
+	CS42L43_IRQ_REG(AMP1_SCDET,				CLASS_D_AMP),
+
+	CS42L43_IRQ_REG(GPIO3_FALL,				GPIO),
+	CS42L43_IRQ_REG(GPIO3_RISE,				GPIO),
+	CS42L43_IRQ_REG(GPIO2_FALL,				GPIO),
+	CS42L43_IRQ_REG(GPIO2_RISE,				GPIO),
+	CS42L43_IRQ_REG(GPIO1_FALL,				GPIO),
+	CS42L43_IRQ_REG(GPIO1_RISE,				GPIO),
+
+	CS42L43_IRQ_REG(HP_ILIMIT,				HPOUT),
+	CS42L43_IRQ_REG(HP_LOADDET_DONE,			HPOUT),
+};
+
+static const struct regmap_irq_chip cs42l43_irq_chip = {
+	.name = "cs42l43",
+
+	.status_base = CS42L43_DECIM_INT,
+	.mask_base = CS42L43_DECIM_MASK,
+	.num_regs = 16,
+
+	.irqs = cs42l43_regmap_irqs,
+	.num_irqs = ARRAY_SIZE(cs42l43_regmap_irqs),
+
+	.runtime_pm = true,
+};
+
+static const char * const cs42l43_core_supplies[] = {
+	"vdd-a", "vdd-io", "vdd-cp",
+};
+
+static const char * const cs42l43_parent_supplies[] = { "vdd-amp" };
+
+static const struct mfd_cell cs42l43_devs[] = {
+	{ .name = "cs42l43-pinctrl", },
+	{ .name = "cs42l43-spi", },
+	{
+		.name = "cs42l43-codec",
+		.parent_supplies = cs42l43_parent_supplies,
+		.num_parent_supplies = ARRAY_SIZE(cs42l43_parent_supplies),
+	},
+};
+
+/*
+ * If the device is connected over Soundwire, as well as soft resetting the
+ * device, this function will also way for the device to detach from the bus
+ * before returning.
+ */
+static int cs42l43_soft_reset(struct cs42l43 *cs42l43)
+{
+	static const struct reg_sequence reset[] = {
+		{ CS42L43_SFT_RESET, CS42L43_SFT_RESET_VAL },
+	};
+
+	reinit_completion(&cs42l43->device_detach);
+
+	/*
+	 * Apply cache only because the soft reset will cause the device to
+	 * detach from the soundwire bus.
+	 */
+	regcache_cache_only(cs42l43->regmap, true);
+	regmap_multi_reg_write_bypassed(cs42l43->regmap, reset, ARRAY_SIZE(reset));
+
+	msleep(CS42L43_RESET_DELAY);
+
+	if (cs42l43->sdw) {
+		unsigned long timeout = msecs_to_jiffies(CS42L43_SDW_DETACH_TIMEOUT);
+		unsigned long time;
+
+		time = wait_for_completion_timeout(&cs42l43->device_detach, timeout);
+		if (!time) {
+			dev_err(cs42l43->dev, "Timed out waiting for device detach\n");
+			return -ETIMEDOUT;
+		}
+	}
+
+	return -EAGAIN;
+}
+
+/*
+ * This function is essentially a no-op on I2C, but will wait for the device to
+ * attach when the device is used on a SoundWire bus.
+ */
+static int cs42l43_wait_for_attach(struct cs42l43 *cs42l43)
+{
+	if (!cs42l43->attached) {
+		unsigned long timeout = msecs_to_jiffies(CS42L43_SDW_ATTACH_TIMEOUT);
+		unsigned long time;
+
+		time = wait_for_completion_timeout(&cs42l43->device_attach, timeout);
+		if (!time) {
+			dev_err(cs42l43->dev, "Timed out waiting for device re-attach\n");
+			return -ETIMEDOUT;
+		}
+	}
+
+	regcache_cache_only(cs42l43->regmap, false);
+
+	/* The hardware requires enabling OSC_DIV before doing any SoundWire reads. */
+	if (cs42l43->sdw)
+		regmap_write(cs42l43->regmap, CS42L43_OSC_DIV_SEL,
+			     CS42L43_OSC_DIV2_EN_MASK);
+
+	return 0;
+}
+
+/*
+ * This function will advance the firmware into boot stage 3 from boot stage 2.
+ * Boot stage 3 is required to send commands to the firmware. This is achieved
+ * by setting the firmware NEED configuration register to zero, this indicates
+ * no configuration is required forcing the firmware to advance to boot stage 3.
+ *
+ * Later revisions of the firmware require the use of an alternative register
+ * for this purpose, which is indicated through the shadow flag.
+ */
+static int cs42l43_mcu_stage_2_3(struct cs42l43 *cs42l43, bool shadow)
+{
+	unsigned int need_reg = CS42L43_NEED_CONFIGS;
+	unsigned int val;
+	int ret;
+
+	if (shadow)
+		need_reg = CS42L43_FW_SH_BOOT_CFG_NEED_CONFIGS;
+
+	regmap_write(cs42l43->regmap, need_reg, 0);
+
+	ret = regmap_read_poll_timeout(cs42l43->regmap, CS42L43_BOOT_STATUS,
+				       val, (val == CS42L43_MCU_BOOT_STAGE3),
+				       CS42L43_MCU_POLL, CS42L43_MCU_CMD_TIMEOUT);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to move to stage 3: %d, 0x%x\n", ret, val);
+		return ret;
+	}
+
+	return -EAGAIN;
+}
+
+/*
+ * This function will return the firmware to boot stage 2 from boot stage 3.
+ * Boot stage 2 is required to apply updates to the firmware. This is achieved
+ * by setting the firmware NEED configuration register to FW_PATCH_NEED_CFG,
+ * setting the HAVE configuration register to 0, and soft resetting. The
+ * firmware will see it is missing a patch configuration and will pause in boot
+ * stage 2.
+ *
+ * Note: Unlike cs42l43_mcu_stage_2_3 there is no need to consider the shadow
+ * register here as the driver will only return to boot stage 2 if the firmware
+ * requires update which means the revision does not include shadow register
+ * support.
+ */
+static int cs42l43_mcu_stage_3_2(struct cs42l43 *cs42l43)
+{
+	regmap_write(cs42l43->regmap, CS42L43_FW_MISSION_CTRL_NEED_CONFIGS,
+		     CS42L43_FW_PATCH_NEED_CFG_MASK);
+	regmap_write(cs42l43->regmap, CS42L43_FW_MISSION_CTRL_HAVE_CONFIGS, 0);
+
+	return cs42l43_soft_reset(cs42l43);
+}
+
+/*
+ * Disable the firmware running on the device such that the driver can access
+ * the registers without fear of the MCU changing them under it.
+ */
+static int cs42l43_mcu_disable(struct cs42l43 *cs42l43)
+{
+	unsigned int val;
+	int ret;
+
+	regmap_write(cs42l43->regmap, CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG,
+		     CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_DISABLE_VAL);
+	regmap_write(cs42l43->regmap, CS42L43_FW_MISSION_CTRL_MM_CTRL_SELECTION,
+		     CS42L43_FW_MM_CTRL_MCU_SEL_MASK);
+	regmap_write(cs42l43->regmap, CS42L43_MCU_SW_INTERRUPT, CS42L43_CONTROL_IND_MASK);
+	regmap_write(cs42l43->regmap, CS42L43_MCU_SW_INTERRUPT, 0);
+
+	ret = regmap_read_poll_timeout(cs42l43->regmap, CS42L43_SOFT_INT_SHADOW, val,
+				       (val & CS42L43_CONTROL_APPLIED_INT_MASK),
+				       CS42L43_MCU_POLL, CS42L43_MCU_CMD_TIMEOUT);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to disable firmware: %d, 0x%x\n", ret, val);
+		return ret;
+	}
+
+	/* Soft reset to clear any register state the firmware left behind. */
+	return cs42l43_soft_reset(cs42l43);
+}
+
+/*
+ * Callback to load firmware updates.
+ */
+static void cs42l43_mcu_load_firmware(const struct firmware *firmware, void *context)
+{
+	struct cs42l43 *cs42l43 = context;
+	const struct cs42l43_patch_header *hdr;
+	unsigned int loadaddr, val;
+	int ret;
+
+	if (!firmware) {
+		dev_err(cs42l43->dev, "Failed to load firmware\n");
+		cs42l43->firmware_error = -ENODEV;
+		goto err;
+	}
+
+	hdr = (const struct cs42l43_patch_header *)&firmware->data[0];
+	loadaddr = le32_to_cpu(hdr->load_addr);
+
+	if (le16_to_cpu(hdr->version) != CS42L43_MCU_UPDATE_FORMAT) {
+		dev_err(cs42l43->dev, "Bad firmware file format: %d\n", hdr->version);
+		cs42l43->firmware_error = -EINVAL;
+		goto err_release;
+	}
+
+	regmap_write(cs42l43->regmap, CS42L43_PATCH_START_ADDR, loadaddr);
+	regmap_bulk_write(cs42l43->regmap, loadaddr + CS42L43_MCU_UPDATE_OFFSET,
+			  &firmware->data[0], firmware->size / sizeof(u32));
+
+	regmap_write(cs42l43->regmap, CS42L43_MCU_SW_INTERRUPT, CS42L43_PATCH_IND_MASK);
+	regmap_write(cs42l43->regmap, CS42L43_MCU_SW_INTERRUPT, 0);
+
+	ret = regmap_read_poll_timeout(cs42l43->regmap, CS42L43_SOFT_INT_SHADOW, val,
+				       (val & CS42L43_PATCH_APPLIED_INT_MASK),
+				       CS42L43_MCU_POLL, CS42L43_MCU_UPDATE_TIMEOUT);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to update firmware: %d, 0x%x\n", ret, val);
+		cs42l43->firmware_error = ret;
+		goto err_release;
+	}
+
+err_release:
+	release_firmware(firmware);
+err:
+	complete(&cs42l43->firmware_download);
+}
+
+/*
+ * The process of updating the firmware is split into a series of steps, at the
+ * end of each step a soft reset of the device might be required which will
+ * require the driver to wait for the device to re-attach on the SoundWire bus,
+ * if that control bus is being used.
+ */
+static int cs42l43_mcu_update_step(struct cs42l43 *cs42l43)
+{
+	unsigned int mcu_rev, bios_rev, boot_status, secure_cfg;
+	bool patched, shadow;
+	int ret;
+
+	/* Clear any stale software interrupt bits. */
+	regmap_read(cs42l43->regmap, CS42L43_SOFT_INT, &mcu_rev);
+
+	ret = regmap_read(cs42l43->regmap, CS42L43_BOOT_STATUS, &boot_status);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to read boot status: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_read(cs42l43->regmap, CS42L43_MCU_SW_REV, &mcu_rev);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to read firmware revision: %d\n", ret);
+		return ret;
+	}
+
+	bios_rev = (((mcu_rev & CS42L43_BIOS_MAJOR_REV_MASK) << 12) |
+		    ((mcu_rev & CS42L43_BIOS_MINOR_REV_MASK) << 4) |
+		    ((mcu_rev & CS42L43_BIOS_SUBMINOR_REV_MASK) >> 8)) >>
+		   CS42L43_BIOS_MAJOR_REV_SHIFT;
+	mcu_rev = ((mcu_rev & CS42L43_FW_MAJOR_REV_MASK) << 12) |
+		  ((mcu_rev & CS42L43_FW_MINOR_REV_MASK) << 4) |
+		  ((mcu_rev & CS42L43_FW_SUBMINOR_REV_MASK) >> 8);
+
+	/*
+	 * The firmware has two revision numbers bringing either of them up to a
+	 * supported version will provide the features the driver requires.
+	 */
+	patched = mcu_rev >= CS42L43_MCU_SUPPORTED_REV ||
+		  bios_rev >= CS42L43_MCU_SUPPORTED_BIOS_REV;
+	/*
+	 * Later versions of the firmwware require the driver to access some
+	 * features through a set of shadow registers.
+	 */
+	shadow = mcu_rev >= CS42L43_MCU_SHADOW_REGS_REQUIRED_REV;
+
+	ret = regmap_read(cs42l43->regmap, CS42L43_BOOT_CONTROL, &secure_cfg);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to read security settings: %d\n", ret);
+		return ret;
+	}
+
+	cs42l43->hw_lock = secure_cfg & CS42L43_LOCK_HW_STS_MASK;
+
+	if (!patched && cs42l43->hw_lock) {
+		dev_err(cs42l43->dev, "Unpatched secure device\n");
+		return -EPERM;
+	}
+
+	dev_dbg(cs42l43->dev, "Firmware(0x%x, 0x%x) in boot stage %d\n",
+		mcu_rev, bios_rev, boot_status);
+
+	switch (boot_status) {
+	case CS42L43_MCU_BOOT_STAGE2:
+		if (!patched) {
+			ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_UEVENT,
+						      "cs42l43.bin", cs42l43->dev,
+						      GFP_KERNEL, cs42l43,
+						      cs42l43_mcu_load_firmware);
+			if (ret) {
+				dev_err(cs42l43->dev, "Failed to request firmware: %d\n", ret);
+				return ret;
+			}
+
+			wait_for_completion(&cs42l43->firmware_download);
+
+			if (cs42l43->firmware_error)
+				return cs42l43->firmware_error;
+
+			return -EAGAIN;
+		} else {
+			return cs42l43_mcu_stage_2_3(cs42l43, shadow);
+		}
+	case CS42L43_MCU_BOOT_STAGE3:
+		if (patched)
+			return cs42l43_mcu_disable(cs42l43);
+		else
+			return cs42l43_mcu_stage_3_2(cs42l43);
+	case CS42L43_MCU_BOOT_STAGE4:
+		return 0;
+	default:
+		dev_err(cs42l43->dev, "Invalid boot status: %d\n", boot_status);
+		return -EINVAL;
+	}
+}
+
+/*
+ * Update the firmware running on the device.
+ */
+static int cs42l43_mcu_update(struct cs42l43 *cs42l43)
+{
+	int i, ret;
+
+	for (i = 0; i < CS42L43_MCU_UPDATE_RETRIES; i++) {
+		ret = cs42l43_mcu_update_step(cs42l43);
+		if (ret != -EAGAIN)
+			return ret;
+
+		ret = cs42l43_wait_for_attach(cs42l43);
+		if (ret)
+			return ret;
+	}
+
+	dev_err(cs42l43->dev, "Failed retrying update\n");
+	return -ETIMEDOUT;
+}
+
+static int cs42l43_irq_config(struct cs42l43 *cs42l43)
+{
+	struct irq_data *irq_data;
+	unsigned long irq_flags;
+	int ret;
+
+	if (cs42l43->sdw)
+		cs42l43->irq = cs42l43->sdw->irq;
+
+	cs42l43->irq_chip = cs42l43_irq_chip;
+	cs42l43->irq_chip.irq_drv_data = cs42l43;
+
+	irq_data = irq_get_irq_data(cs42l43->irq);
+	if (!irq_data) {
+		dev_err(cs42l43->dev, "Invalid IRQ: %d\n", cs42l43->irq);
+		return -EINVAL;
+	}
+
+	irq_flags = irqd_get_trigger_type(irq_data);
+	switch (irq_flags) {
+	case IRQF_TRIGGER_LOW:
+	case IRQF_TRIGGER_HIGH:
+	case IRQF_TRIGGER_RISING:
+	case IRQF_TRIGGER_FALLING:
+		break;
+	case IRQ_TYPE_NONE:
+	default:
+		irq_flags = IRQF_TRIGGER_LOW;
+		break;
+	}
+
+	irq_flags |= IRQF_ONESHOT;
+
+	ret = devm_regmap_add_irq_chip(cs42l43->dev, cs42l43->regmap,
+				       cs42l43->irq, irq_flags, 0,
+				       &cs42l43->irq_chip, &cs42l43->irq_data);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to add IRQ chip: %d\n", ret);
+		return ret;
+	}
+
+	dev_dbg(cs42l43->dev, "Configured IRQ %d with flags 0x%lx\n",
+		cs42l43->irq, irq_flags);
+
+	return 0;
+}
+
+static void cs42l43_boot_work(struct work_struct *work)
+{
+	struct cs42l43 *cs42l43 = container_of(work, struct cs42l43, boot_work);
+	unsigned int devid, revid, otp;
+	int ret;
+
+	ret = cs42l43_wait_for_attach(cs42l43);
+	if (ret)
+		goto err;
+
+	ret = regmap_read(cs42l43->regmap, CS42L43_DEVID, &devid);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to read devid: %d\n", ret);
+		goto err;
+	}
+
+	switch (devid) {
+	case CS42L43_DEVID_VAL:
+		break;
+	default:
+		dev_err(cs42l43->dev, "Unrecognised devid: 0x%06x\n", devid);
+		goto err;
+	}
+
+	ret = regmap_read(cs42l43->regmap, CS42L43_REVID, &revid);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to read rev: %d\n", ret);
+		goto err;
+	}
+
+	ret = regmap_read(cs42l43->regmap, CS42L43_OTP_REVISION_ID, &otp);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to read otp rev: %d\n", ret);
+		goto err;
+	}
+
+	dev_info(cs42l43->dev,
+		 "devid: 0x%06x, rev: 0x%02x, otp: 0x%02x\n", devid, revid, otp);
+
+	ret = cs42l43_mcu_update(cs42l43);
+	if (ret)
+		goto err;
+
+	ret = regmap_register_patch(cs42l43->regmap, cs42l43_reva_patch,
+				    ARRAY_SIZE(cs42l43_reva_patch));
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to apply register patch: %d\n", ret);
+		goto err;
+	}
+
+	ret = cs42l43_irq_config(cs42l43);
+	if (ret)
+		goto err;
+
+	ret = devm_mfd_add_devices(cs42l43->dev, PLATFORM_DEVID_NONE,
+				   cs42l43_devs, ARRAY_SIZE(cs42l43_devs),
+				   NULL, 0, NULL);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to add subdevices: %d\n", ret);
+		goto err;
+	}
+
+	pm_runtime_mark_last_busy(cs42l43->dev);
+	pm_runtime_put_autosuspend(cs42l43->dev);
+
+	return;
+
+err:
+	pm_runtime_put_sync(cs42l43->dev);
+	cs42l43_dev_remove(cs42l43);
+}
+
+static int cs42l43_power_up(struct cs42l43 *cs42l43)
+{
+	int ret;
+
+	ret = regulator_enable(cs42l43->vdd_p);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to enable vdd-p: %d\n", ret);
+		return ret;
+	}
+
+	/* vdd-p must be on for 50uS before any other supply */
+	usleep_range(CS42L43_VDDP_DELAY, 2 * CS42L43_VDDP_DELAY);
+
+	gpiod_set_value_cansleep(cs42l43->reset, 1);
+
+	ret = regulator_bulk_enable(CS42L43_N_SUPPLIES, cs42l43->core_supplies);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to enable core supplies: %d\n", ret);
+		goto err_reset;
+	}
+
+	ret = regulator_enable(cs42l43->vdd_d);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to enable vdd-d: %d\n", ret);
+		goto err_core_supplies;
+	}
+
+	usleep_range(CS42L43_VDDD_DELAY, 2 * CS42L43_VDDD_DELAY);
+
+	return 0;
+
+err_core_supplies:
+	regulator_bulk_disable(CS42L43_N_SUPPLIES, cs42l43->core_supplies);
+err_reset:
+	gpiod_set_value_cansleep(cs42l43->reset, 0);
+	regulator_disable(cs42l43->vdd_p);
+
+	return ret;
+}
+
+static int cs42l43_power_down(struct cs42l43 *cs42l43)
+{
+	int ret;
+
+	ret = regulator_disable(cs42l43->vdd_d);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to disable vdd-d: %d\n", ret);
+		return ret;
+	}
+
+	ret = regulator_bulk_disable(CS42L43_N_SUPPLIES, cs42l43->core_supplies);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to disable core supplies: %d\n", ret);
+		return ret;
+	}
+
+	gpiod_set_value_cansleep(cs42l43->reset, 0);
+
+	ret = regulator_disable(cs42l43->vdd_p);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to disable vdd-p: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int cs42l43_dev_probe(struct cs42l43 *cs42l43)
+{
+	int i, ret;
+
+	dev_set_drvdata(cs42l43->dev, cs42l43);
+
+	mutex_init(&cs42l43->pll_lock);
+	init_completion(&cs42l43->device_attach);
+	init_completion(&cs42l43->device_detach);
+	init_completion(&cs42l43->firmware_download);
+	INIT_WORK(&cs42l43->boot_work, cs42l43_boot_work);
+
+	regcache_cache_only(cs42l43->regmap, true);
+
+	cs42l43->reset = devm_gpiod_get_optional(cs42l43->dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(cs42l43->reset))
+		return dev_err_probe(cs42l43->dev, PTR_ERR(cs42l43->reset),
+				     "Failed to get reset\n");
+
+	cs42l43->vdd_p = devm_regulator_get(cs42l43->dev, "vdd-p");
+	if (IS_ERR(cs42l43->vdd_p))
+		return dev_err_probe(cs42l43->dev, PTR_ERR(cs42l43->vdd_p),
+				     "Failed to get vdd-p\n");
+
+	cs42l43->vdd_d = devm_regulator_get(cs42l43->dev, "vdd-d");
+	if (IS_ERR(cs42l43->vdd_d))
+		return dev_err_probe(cs42l43->dev, PTR_ERR(cs42l43->vdd_d),
+				     "Failed to get vdd-d\n");
+
+	BUILD_BUG_ON(ARRAY_SIZE(cs42l43_core_supplies) != CS42L43_N_SUPPLIES);
+
+	for (i = 0; i < CS42L43_N_SUPPLIES; i++)
+		cs42l43->core_supplies[i].supply = cs42l43_core_supplies[i];
+
+	ret = devm_regulator_bulk_get(cs42l43->dev, CS42L43_N_SUPPLIES,
+				      cs42l43->core_supplies);
+	if (ret)
+		return dev_err_probe(cs42l43->dev, ret,
+				     "Failed to get core supplies\n");
+
+	ret = cs42l43_power_up(cs42l43);
+	if (ret)
+		return ret;
+
+	pm_runtime_set_autosuspend_delay(cs42l43->dev, CS42L43_AUTOSUSPEND_TIME);
+	pm_runtime_use_autosuspend(cs42l43->dev);
+	pm_runtime_set_active(cs42l43->dev);
+	/*
+	 * The device is already powered up, but keep it from suspending until
+	 * the boot work runs.
+	 */
+	pm_runtime_get_noresume(cs42l43->dev);
+	devm_pm_runtime_enable(cs42l43->dev);
+
+	queue_work(system_long_wq, &cs42l43->boot_work);
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cs42l43_dev_probe, MFD_CS42L43);
+
+void cs42l43_dev_remove(struct cs42l43 *cs42l43)
+{
+	cs42l43_power_down(cs42l43);
+}
+EXPORT_SYMBOL_NS_GPL(cs42l43_dev_remove, MFD_CS42L43);
+
+static int cs42l43_suspend(struct device *dev)
+{
+	struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+	int ret;
+
+	/*
+	 * Don't care about being resumed here, but the driver does want
+	 * force_resume to always trigger an actual resume, so that register
+	 * state for the MCU/GPIOs is returned as soon as possible after system
+	 * resume. force_resume will resume if the reference count is resumed on
+	 * suspend hence the get_noresume.
+	 */
+	pm_runtime_get_noresume(dev);
+
+	ret = pm_runtime_force_suspend(dev);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to force suspend: %d\n", ret);
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
+
+	pm_runtime_put_noidle(dev);
+
+	ret = cs42l43_power_down(cs42l43);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int cs42l43_resume(struct device *dev)
+{
+	struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+	int ret;
+
+	ret = cs42l43_power_up(cs42l43);
+	if (ret)
+		return ret;
+
+	ret = pm_runtime_force_resume(dev);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to force resume: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int cs42l43_runtime_suspend(struct device *dev)
+{
+	struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+
+	/*
+	 * Whilst the driver doesn't power the chip down here, going into runtime
+	 * suspend lets the SoundWire bus power down, which means the driver
+	 * can't communicate with the device any more.
+	 */
+	regcache_cache_only(cs42l43->regmap, true);
+
+	return 0;
+}
+
+static int cs42l43_runtime_resume(struct device *dev)
+{
+	struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+	unsigned int reset_canary;
+	int ret;
+
+	ret = cs42l43_wait_for_attach(cs42l43);
+	if (ret)
+		return ret;
+
+	ret = regmap_read(cs42l43->regmap, CS42L43_RELID, &reset_canary);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to check reset canary: %d\n", ret);
+		goto err;
+	}
+
+	if (!reset_canary) {
+		/*
+		 * If the canary has cleared the chip has reset, re-handle the
+		 * MCU and mark the cache as dirty to indicate the chip reset.
+		 */
+		ret = cs42l43_mcu_update(cs42l43);
+		if (ret)
+			goto err;
+
+		regcache_mark_dirty(cs42l43->regmap);
+	}
+
+	ret = regcache_sync(cs42l43->regmap);
+	if (ret) {
+		dev_err(cs42l43->dev, "Failed to restore register cache: %d\n", ret);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	regcache_cache_only(cs42l43->regmap, true);
+
+	return ret;
+}
+
+EXPORT_NS_GPL_DEV_PM_OPS(cs42l43_pm_ops, MFD_CS42L43) = {
+	SET_SYSTEM_SLEEP_PM_OPS(cs42l43_suspend, cs42l43_resume)
+	SET_RUNTIME_PM_OPS(cs42l43_runtime_suspend, cs42l43_runtime_resume, NULL)
+};
+
+MODULE_DESCRIPTION("CS42L43 Core Driver");
+MODULE_AUTHOR("Charles Keepax <ckeepax@opensource.cirrus.com>");
+MODULE_LICENSE("GPL");
+MODULE_FIRMWARE("cs42l43.bin");
diff --git a/drivers/mfd/cs42l43.h b/drivers/mfd/cs42l43.h
new file mode 100644
index 000000000000..eb4caf393833
--- /dev/null
+++ b/drivers/mfd/cs42l43.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CS42L43 core driver internal data
+ *
+ * Copyright (C) 2022-2023 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ */
+
+#include <linux/mfd/cs42l43.h>
+#include <linux/pm.h>
+#include <linux/regmap.h>
+
+#ifndef CS42L43_CORE_INT_H
+#define CS42L43_CORE_INT_H
+
+#define CS42L43_N_DEFAULTS 176
+
+extern const struct dev_pm_ops cs42l43_pm_ops;
+extern const struct reg_default cs42l43_reg_default[CS42L43_N_DEFAULTS];
+
+bool cs42l43_readable_register(struct device *dev, unsigned int reg);
+bool cs42l43_precious_register(struct device *dev, unsigned int reg);
+bool cs42l43_volatile_register(struct device *dev, unsigned int reg);
+
+int cs42l43_dev_probe(struct cs42l43 *cs42l43);
+void cs42l43_dev_remove(struct cs42l43 *cs42l43);
+
+#endif /* CS42L43_CORE_INT_H */
diff --git a/include/linux/mfd/cs42l43-regs.h b/include/linux/mfd/cs42l43-regs.h
new file mode 100644
index 000000000000..c39a49269cb7
--- /dev/null
+++ b/include/linux/mfd/cs42l43-regs.h
@@ -0,0 +1,1184 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * cs42l43 register definitions
+ *
+ * Copyright (c) 2022-2023 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ */
+
+#ifndef CS42L43_CORE_REGS_H
+#define CS42L43_CORE_REGS_H
+
+/* Registers */
+#define CS42L43_GEN_INT_STAT_1					0x000000C0
+#define CS42L43_GEN_INT_MASK_1					0x000000C1
+#define CS42L43_DEVID						0x00003000
+#define CS42L43_REVID						0x00003004
+#define CS42L43_RELID						0x0000300C
+#define CS42L43_SFT_RESET					0x00003020
+#define CS42L43_DRV_CTRL1					0x00006004
+#define CS42L43_DRV_CTRL3					0x0000600C
+#define CS42L43_DRV_CTRL4					0x00006010
+#define CS42L43_DRV_CTRL_5					0x00006014
+#define CS42L43_GPIO_CTRL1					0x00006034
+#define CS42L43_GPIO_CTRL2					0x00006038
+#define CS42L43_GPIO_STS					0x0000603C
+#define CS42L43_GPIO_FN_SEL					0x00006040
+#define CS42L43_MCLK_SRC_SEL					0x00007004
+#define CS42L43_CCM_BLK_CLK_CONTROL				0x00007010
+#define CS42L43_SAMPLE_RATE1					0x00007014
+#define CS42L43_SAMPLE_RATE2					0x00007018
+#define CS42L43_SAMPLE_RATE3					0x0000701C
+#define CS42L43_SAMPLE_RATE4					0x00007020
+#define CS42L43_PLL_CONTROL					0x00007034
+#define CS42L43_FS_SELECT1					0x00007038
+#define CS42L43_FS_SELECT2					0x0000703C
+#define CS42L43_FS_SELECT3					0x00007040
+#define CS42L43_FS_SELECT4					0x00007044
+#define CS42L43_PDM_CONTROL					0x0000704C
+#define CS42L43_ASP_CLK_CONFIG1					0x00007058
+#define CS42L43_ASP_CLK_CONFIG2					0x0000705C
+#define CS42L43_OSC_DIV_SEL					0x00007068
+#define CS42L43_ADC_B_CTRL1					0x00008000
+#define CS42L43_ADC_B_CTRL2					0x00008004
+#define CS42L43_DECIM_HPF_WNF_CTRL1				0x0000803C
+#define CS42L43_DECIM_HPF_WNF_CTRL2				0x00008040
+#define CS42L43_DECIM_HPF_WNF_CTRL3				0x00008044
+#define CS42L43_DECIM_HPF_WNF_CTRL4				0x00008048
+#define CS42L43_DMIC_PDM_CTRL					0x0000804C
+#define CS42L43_DECIM_VOL_CTRL_CH1_CH2				0x00008050
+#define CS42L43_DECIM_VOL_CTRL_CH3_CH4				0x00008054
+#define CS42L43_DECIM_VOL_CTRL_UPDATE				0x00008058
+#define CS42L43_INTP_VOLUME_CTRL1				0x00009008
+#define CS42L43_INTP_VOLUME_CTRL2				0x0000900C
+#define CS42L43_AMP1_2_VOL_RAMP					0x00009010
+#define CS42L43_ASP_CTRL					0x0000A000
+#define CS42L43_ASP_FSYNC_CTRL1					0x0000A004
+#define CS42L43_ASP_FSYNC_CTRL2					0x0000A008
+#define CS42L43_ASP_FSYNC_CTRL3					0x0000A00C
+#define CS42L43_ASP_FSYNC_CTRL4					0x0000A010
+#define CS42L43_ASP_DATA_CTRL					0x0000A018
+#define CS42L43_ASP_RX_EN					0x0000A020
+#define CS42L43_ASP_TX_EN					0x0000A024
+#define CS42L43_ASP_RX_CH1_CTRL					0x0000A028
+#define CS42L43_ASP_RX_CH2_CTRL					0x0000A02C
+#define CS42L43_ASP_RX_CH3_CTRL					0x0000A030
+#define CS42L43_ASP_RX_CH4_CTRL					0x0000A034
+#define CS42L43_ASP_RX_CH5_CTRL					0x0000A038
+#define CS42L43_ASP_RX_CH6_CTRL					0x0000A03C
+#define CS42L43_ASP_TX_CH1_CTRL					0x0000A068
+#define CS42L43_ASP_TX_CH2_CTRL					0x0000A06C
+#define CS42L43_ASP_TX_CH3_CTRL					0x0000A070
+#define CS42L43_ASP_TX_CH4_CTRL					0x0000A074
+#define CS42L43_ASP_TX_CH5_CTRL					0x0000A078
+#define CS42L43_ASP_TX_CH6_CTRL					0x0000A07C
+#define CS42L43_OTP_REVISION_ID					0x0000B02C
+#define CS42L43_ASPTX1_INPUT					0x0000C200
+#define CS42L43_ASPTX2_INPUT					0x0000C210
+#define CS42L43_ASPTX3_INPUT					0x0000C220
+#define CS42L43_ASPTX4_INPUT					0x0000C230
+#define CS42L43_ASPTX5_INPUT					0x0000C240
+#define CS42L43_ASPTX6_INPUT					0x0000C250
+#define CS42L43_SWIRE_DP1_CH1_INPUT				0x0000C280
+#define CS42L43_SWIRE_DP1_CH2_INPUT				0x0000C290
+#define CS42L43_SWIRE_DP1_CH3_INPUT				0x0000C2A0
+#define CS42L43_SWIRE_DP1_CH4_INPUT				0x0000C2B0
+#define CS42L43_SWIRE_DP2_CH1_INPUT				0x0000C2C0
+#define CS42L43_SWIRE_DP2_CH2_INPUT				0x0000C2D0
+#define CS42L43_SWIRE_DP3_CH1_INPUT				0x0000C2E0
+#define CS42L43_SWIRE_DP3_CH2_INPUT				0x0000C2F0
+#define CS42L43_SWIRE_DP4_CH1_INPUT				0x0000C300
+#define CS42L43_SWIRE_DP4_CH2_INPUT				0x0000C310
+#define CS42L43_ASRC_INT1_INPUT1				0x0000C400
+#define CS42L43_ASRC_INT2_INPUT1				0x0000C410
+#define CS42L43_ASRC_INT3_INPUT1				0x0000C420
+#define CS42L43_ASRC_INT4_INPUT1				0x0000C430
+#define CS42L43_ASRC_DEC1_INPUT1				0x0000C440
+#define CS42L43_ASRC_DEC2_INPUT1				0x0000C450
+#define CS42L43_ASRC_DEC3_INPUT1				0x0000C460
+#define CS42L43_ASRC_DEC4_INPUT1				0x0000C470
+#define CS42L43_ISRC1INT1_INPUT1				0x0000C500
+#define CS42L43_ISRC1INT2_INPUT1				0x0000C510
+#define CS42L43_ISRC1DEC1_INPUT1				0x0000C520
+#define CS42L43_ISRC1DEC2_INPUT1				0x0000C530
+#define CS42L43_ISRC2INT1_INPUT1				0x0000C540
+#define CS42L43_ISRC2INT2_INPUT1				0x0000C550
+#define CS42L43_ISRC2DEC1_INPUT1				0x0000C560
+#define CS42L43_ISRC2DEC2_INPUT1				0x0000C570
+#define CS42L43_EQ1MIX_INPUT1					0x0000C580
+#define CS42L43_EQ1MIX_INPUT2					0x0000C584
+#define CS42L43_EQ1MIX_INPUT3					0x0000C588
+#define CS42L43_EQ1MIX_INPUT4					0x0000C58C
+#define CS42L43_EQ2MIX_INPUT1					0x0000C590
+#define CS42L43_EQ2MIX_INPUT2					0x0000C594
+#define CS42L43_EQ2MIX_INPUT3					0x0000C598
+#define CS42L43_EQ2MIX_INPUT4					0x0000C59C
+#define CS42L43_SPDIF1_INPUT1					0x0000C600
+#define CS42L43_SPDIF2_INPUT1					0x0000C610
+#define CS42L43_AMP1MIX_INPUT1					0x0000C620
+#define CS42L43_AMP1MIX_INPUT2					0x0000C624
+#define CS42L43_AMP1MIX_INPUT3					0x0000C628
+#define CS42L43_AMP1MIX_INPUT4					0x0000C62C
+#define CS42L43_AMP2MIX_INPUT1					0x0000C630
+#define CS42L43_AMP2MIX_INPUT2					0x0000C634
+#define CS42L43_AMP2MIX_INPUT3					0x0000C638
+#define CS42L43_AMP2MIX_INPUT4					0x0000C63C
+#define CS42L43_AMP3MIX_INPUT1					0x0000C640
+#define CS42L43_AMP3MIX_INPUT2					0x0000C644
+#define CS42L43_AMP3MIX_INPUT3					0x0000C648
+#define CS42L43_AMP3MIX_INPUT4					0x0000C64C
+#define CS42L43_AMP4MIX_INPUT1					0x0000C650
+#define CS42L43_AMP4MIX_INPUT2					0x0000C654
+#define CS42L43_AMP4MIX_INPUT3					0x0000C658
+#define CS42L43_AMP4MIX_INPUT4					0x0000C65C
+#define CS42L43_ASRC_INT_ENABLES				0x0000E000
+#define CS42L43_ASRC_DEC_ENABLES				0x0000E004
+#define CS42L43_PDNCNTL						0x00010000
+#define CS42L43_RINGSENSE_DEB_CTRL				0x0001001C
+#define CS42L43_TIPSENSE_DEB_CTRL				0x00010020
+#define CS42L43_TIP_RING_SENSE_INTERRUPT_STATUS			0x00010028
+#define CS42L43_HS2						0x00010040
+#define CS42L43_HS_STAT						0x00010048
+#define CS42L43_MCU_SW_INTERRUPT				0x00010094
+#define CS42L43_STEREO_MIC_CTRL					0x000100A4
+#define CS42L43_STEREO_MIC_CLAMP_CTRL				0x000100C4
+#define CS42L43_BLOCK_EN2					0x00010104
+#define CS42L43_BLOCK_EN3					0x00010108
+#define CS42L43_BLOCK_EN4					0x0001010C
+#define CS42L43_BLOCK_EN5					0x00010110
+#define CS42L43_BLOCK_EN6					0x00010114
+#define CS42L43_BLOCK_EN7					0x00010118
+#define CS42L43_BLOCK_EN8					0x0001011C
+#define CS42L43_BLOCK_EN9					0x00010120
+#define CS42L43_BLOCK_EN10					0x00010124
+#define CS42L43_BLOCK_EN11					0x00010128
+#define CS42L43_TONE_CH1_CTRL					0x00010134
+#define CS42L43_TONE_CH2_CTRL					0x00010138
+#define CS42L43_MIC_DETECT_CONTROL_1				0x00011074
+#define CS42L43_DETECT_STATUS_1					0x0001107C
+#define CS42L43_HS_BIAS_SENSE_AND_CLAMP_AUTOCONTROL		0x00011090
+#define CS42L43_MIC_DETECT_CONTROL_ANDROID			0x000110B0
+#define CS42L43_ISRC1_CTRL					0x00012004
+#define CS42L43_ISRC2_CTRL					0x00013004
+#define CS42L43_CTRL_REG					0x00014000
+#define CS42L43_FDIV_FRAC					0x00014004
+#define CS42L43_CAL_RATIO					0x00014008
+#define CS42L43_SPI_CLK_CONFIG1					0x00016004
+#define CS42L43_SPI_CONFIG1					0x00016010
+#define CS42L43_SPI_CONFIG2					0x00016014
+#define CS42L43_SPI_CONFIG3					0x00016018
+#define CS42L43_SPI_CONFIG4					0x00016024
+#define CS42L43_SPI_STATUS1					0x00016100
+#define CS42L43_SPI_STATUS2					0x00016104
+#define CS42L43_TRAN_CONFIG1					0x00016200
+#define CS42L43_TRAN_CONFIG2					0x00016204
+#define CS42L43_TRAN_CONFIG3					0x00016208
+#define CS42L43_TRAN_CONFIG4					0x0001620C
+#define CS42L43_TRAN_CONFIG5					0x00016220
+#define CS42L43_TRAN_CONFIG6					0x00016224
+#define CS42L43_TRAN_CONFIG7					0x00016228
+#define CS42L43_TRAN_CONFIG8					0x0001622C
+#define CS42L43_TRAN_STATUS1					0x00016300
+#define CS42L43_TRAN_STATUS2					0x00016304
+#define CS42L43_TRAN_STATUS3					0x00016308
+#define CS42L43_TX_DATA						0x00016400
+#define CS42L43_RX_DATA						0x00016600
+#define CS42L43_DACCNFG1					0x00017000
+#define CS42L43_DACCNFG2					0x00017004
+#define CS42L43_HPPATHVOL					0x0001700C
+#define CS42L43_PGAVOL						0x00017014
+#define CS42L43_LOADDETRESULTS					0x00017018
+#define CS42L43_LOADDETENA					0x00017024
+#define CS42L43_CTRL						0x00017028
+#define CS42L43_COEFF_DATA_IN0					0x00018000
+#define CS42L43_COEFF_RD_WR0					0x00018008
+#define CS42L43_INIT_DONE0					0x00018010
+#define CS42L43_START_EQZ0					0x00018014
+#define CS42L43_MUTE_EQ_IN0					0x0001801C
+#define CS42L43_DECIM_INT					0x0001B000
+#define CS42L43_EQ_INT						0x0001B004
+#define CS42L43_ASP_INT						0x0001B008
+#define CS42L43_PLL_INT						0x0001B00C
+#define CS42L43_SOFT_INT					0x0001B010
+#define CS42L43_SWIRE_INT					0x0001B014
+#define CS42L43_MSM_INT						0x0001B018
+#define CS42L43_ACC_DET_INT					0x0001B01C
+#define CS42L43_I2C_TGT_INT					0x0001B020
+#define CS42L43_SPI_MSTR_INT					0x0001B024
+#define CS42L43_SW_TO_SPI_BRIDGE_INT				0x0001B028
+#define CS42L43_OTP_INT						0x0001B02C
+#define CS42L43_CLASS_D_AMP_INT					0x0001B030
+#define CS42L43_GPIO_INT					0x0001B034
+#define CS42L43_ASRC_INT					0x0001B038
+#define CS42L43_HPOUT_INT					0x0001B03C
+#define CS42L43_DECIM_MASK					0x0001B0A0
+#define CS42L43_EQ_MIX_MASK					0x0001B0A4
+#define CS42L43_ASP_MASK					0x0001B0A8
+#define CS42L43_PLL_MASK					0x0001B0AC
+#define CS42L43_SOFT_MASK					0x0001B0B0
+#define CS42L43_SWIRE_MASK					0x0001B0B4
+#define CS42L43_MSM_MASK					0x0001B0B8
+#define CS42L43_ACC_DET_MASK					0x0001B0BC
+#define CS42L43_I2C_TGT_MASK					0x0001B0C0
+#define CS42L43_SPI_MSTR_MASK					0x0001B0C4
+#define CS42L43_SW_TO_SPI_BRIDGE_MASK				0x0001B0C8
+#define CS42L43_OTP_MASK					0x0001B0CC
+#define CS42L43_CLASS_D_AMP_MASK				0x0001B0D0
+#define CS42L43_GPIO_INT_MASK					0x0001B0D4
+#define CS42L43_ASRC_MASK					0x0001B0D8
+#define CS42L43_HPOUT_MASK					0x0001B0DC
+#define CS42L43_DECIM_INT_SHADOW				0x0001B300
+#define CS42L43_EQ_MIX_INT_SHADOW				0x0001B304
+#define CS42L43_ASP_INT_SHADOW					0x0001B308
+#define CS42L43_PLL_INT_SHADOW					0x0001B30C
+#define CS42L43_SOFT_INT_SHADOW					0x0001B310
+#define CS42L43_SWIRE_INT_SHADOW				0x0001B314
+#define CS42L43_MSM_INT_SHADOW					0x0001B318
+#define CS42L43_ACC_DET_INT_SHADOW				0x0001B31C
+#define CS42L43_I2C_TGT_INT_SHADOW				0x0001B320
+#define CS42L43_SPI_MSTR_INT_SHADOW				0x0001B324
+#define CS42L43_SW_TO_SPI_BRIDGE_SHADOW				0x0001B328
+#define CS42L43_OTP_INT_SHADOW					0x0001B32C
+#define CS42L43_CLASS_D_AMP_INT_SHADOW				0x0001B330
+#define CS42L43_GPIO_SHADOW					0x0001B334
+#define CS42L43_ASRC_SHADOW					0x0001B338
+#define CS42L43_HP_OUT_SHADOW					0x0001B33C
+#define CS42L43_BOOT_CONTROL					0x00101000
+#define CS42L43_BLOCK_EN					0x00101008
+#define CS42L43_SHUTTER_CONTROL					0x0010100C
+#define CS42L43_MCU_SW_REV					0x00114000
+#define CS42L43_PATCH_START_ADDR				0x00114004
+#define CS42L43_NEED_CONFIGS					0x0011400C
+#define CS42L43_BOOT_STATUS					0x0011401C
+#define CS42L43_FW_SH_BOOT_CFG_NEED_CONFIGS			0x0011F8F8
+#define CS42L43_FW_MISSION_CTRL_NEED_CONFIGS			0x0011FE00
+#define CS42L43_FW_MISSION_CTRL_HAVE_CONFIGS			0x0011FE04
+#define CS42L43_FW_MISSION_CTRL_MM_CTRL_SELECTION		0x0011FE0C
+#define CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG			0x0011FE10
+#define CS42L43_MCU_RAM_MAX					0x0011FFFF
+
+/* CS42L43_DEVID */
+#define CS42L43_DEVID_VAL					0x00042A43
+
+/* CS42L43_GEN_INT_STAT_1 */
+#define CS42L43_INT_STAT_GEN1_MASK				0x00000001
+#define CS42L43_INT_STAT_GEN1_SHIFT				0
+
+/* CS42L43_SFT_RESET */
+#define CS42L43_SFT_RESET_MASK					0xFF000000
+#define CS42L43_SFT_RESET_SHIFT					24
+
+#define CS42L43_SFT_RESET_VAL					0x5A000000
+
+/* CS42L43_DRV_CTRL1 */
+#define CS42L43_ASP_DOUT_DRV_MASK				0x00038000
+#define CS42L43_ASP_DOUT_DRV_SHIFT				15
+#define CS42L43_ASP_FSYNC_DRV_MASK				0x00000E00
+#define CS42L43_ASP_FSYNC_DRV_SHIFT				9
+#define CS42L43_ASP_BCLK_DRV_MASK				0x000001C0
+#define CS42L43_ASP_BCLK_DRV_SHIFT				6
+
+/* CS42L43_DRV_CTRL3 */
+#define CS42L43_I2C_ADDR_DRV_MASK				0x30000000
+#define CS42L43_I2C_ADDR_DRV_SHIFT				28
+#define CS42L43_I2C_SDA_DRV_MASK				0x0C000000
+#define CS42L43_I2C_SDA_DRV_SHIFT				26
+#define CS42L43_PDMOUT2_CLK_DRV_MASK				0x00E00000
+#define CS42L43_PDMOUT2_CLK_DRV_SHIFT				21
+#define CS42L43_PDMOUT2_DATA_DRV_MASK				0x001C0000
+#define CS42L43_PDMOUT2_DATA_DRV_SHIFT				18
+#define CS42L43_PDMOUT1_CLK_DRV_MASK				0x00038000
+#define CS42L43_PDMOUT1_CLK_DRV_SHIFT				15
+#define CS42L43_PDMOUT1_DATA_DRV_MASK				0x00007000
+#define CS42L43_PDMOUT1_DATA_DRV_SHIFT				12
+#define CS42L43_SPI_MISO_DRV_MASK				0x00000038
+#define CS42L43_SPI_MISO_DRV_SHIFT				3
+
+/* CS42L43_DRV_CTRL4 */
+#define CS42L43_GPIO3_DRV_MASK					0x00000E00
+#define CS42L43_GPIO3_DRV_SHIFT					9
+#define CS42L43_GPIO2_DRV_MASK					0x000001C0
+#define CS42L43_GPIO2_DRV_SHIFT					6
+#define CS42L43_GPIO1_DRV_MASK					0x00000038
+#define CS42L43_GPIO1_DRV_SHIFT					3
+
+/* CS42L43_DRV_CTRL_5 */
+#define CS42L43_I2C_SCL_DRV_MASK				0x18000000
+#define CS42L43_I2C_SCL_DRV_SHIFT				27
+#define CS42L43_SPI_SCK_DRV_MASK				0x07000000
+#define CS42L43_SPI_SCK_DRV_SHIFT				24
+#define CS42L43_SPI_MOSI_DRV_MASK				0x00E00000
+#define CS42L43_SPI_MOSI_DRV_SHIFT				21
+#define CS42L43_SPI_SSB_DRV_MASK				0x001C0000
+#define CS42L43_SPI_SSB_DRV_SHIFT				18
+#define CS42L43_ASP_DIN_DRV_MASK				0x000001C0
+#define CS42L43_ASP_DIN_DRV_SHIFT				6
+
+/* CS42L43_GPIO_CTRL1 */
+#define CS42L43_GPIO3_POL_MASK					0x00040000
+#define CS42L43_GPIO3_POL_SHIFT					18
+#define CS42L43_GPIO2_POL_MASK					0x00020000
+#define CS42L43_GPIO2_POL_SHIFT					17
+#define CS42L43_GPIO1_POL_MASK					0x00010000
+#define CS42L43_GPIO1_POL_SHIFT					16
+#define CS42L43_GPIO3_LVL_MASK					0x00000400
+#define CS42L43_GPIO3_LVL_SHIFT					10
+#define CS42L43_GPIO2_LVL_MASK					0x00000200
+#define CS42L43_GPIO2_LVL_SHIFT					9
+#define CS42L43_GPIO1_LVL_MASK					0x00000100
+#define CS42L43_GPIO1_LVL_SHIFT					8
+#define CS42L43_GPIO3_DIR_MASK					0x00000004
+#define CS42L43_GPIO3_DIR_SHIFT					2
+#define CS42L43_GPIO2_DIR_MASK					0x00000002
+#define CS42L43_GPIO2_DIR_SHIFT					1
+#define CS42L43_GPIO1_DIR_MASK					0x00000001
+#define CS42L43_GPIO1_DIR_SHIFT					0
+
+/* CS42L43_GPIO_CTRL2 */
+#define CS42L43_GPIO3_DEGLITCH_BYP_MASK				0x00000004
+#define CS42L43_GPIO3_DEGLITCH_BYP_SHIFT			2
+#define CS42L43_GPIO2_DEGLITCH_BYP_MASK				0x00000002
+#define CS42L43_GPIO2_DEGLITCH_BYP_SHIFT			1
+#define CS42L43_GPIO1_DEGLITCH_BYP_MASK				0x00000001
+#define CS42L43_GPIO1_DEGLITCH_BYP_SHIFT			0
+
+/* CS42L43_GPIO_STS */
+#define CS42L43_GPIO3_STS_MASK					0x00000004
+#define CS42L43_GPIO3_STS_SHIFT					2
+#define CS42L43_GPIO2_STS_MASK					0x00000002
+#define CS42L43_GPIO2_STS_SHIFT					1
+#define CS42L43_GPIO1_STS_MASK					0x00000001
+#define CS42L43_GPIO1_STS_SHIFT					0
+
+/* CS42L43_GPIO_FN_SEL */
+#define CS42L43_GPIO3_FN_SEL_MASK				0x00000004
+#define CS42L43_GPIO3_FN_SEL_SHIFT				2
+#define CS42L43_GPIO1_FN_SEL_MASK				0x00000001
+#define CS42L43_GPIO1_FN_SEL_SHIFT				0
+
+/* CS42L43_MCLK_SRC_SEL */
+#define CS42L43_OSC_PLL_MCLK_SEL_MASK				0x00000001
+#define CS42L43_OSC_PLL_MCLK_SEL_SHIFT				0
+
+/* CS42L43_SAMPLE_RATE1..CS42L43_SAMPLE_RATE4 */
+#define CS42L43_SAMPLE_RATE_MASK				0x0000001F
+#define CS42L43_SAMPLE_RATE_SHIFT				0
+
+/* CS42L43_PLL_CONTROL */
+#define CS42L43_PLL_REFCLK_EN_MASK				0x00000008
+#define CS42L43_PLL_REFCLK_EN_SHIFT				3
+#define CS42L43_PLL_REFCLK_DIV_MASK				0x00000006
+#define CS42L43_PLL_REFCLK_DIV_SHIFT				1
+#define CS42L43_PLL_REFCLK_SRC_MASK				0x00000001
+#define CS42L43_PLL_REFCLK_SRC_SHIFT				0
+
+/* CS42L43_FS_SELECT1 */
+#define CS42L43_ASP_RATE_MASK					0x00000003
+#define CS42L43_ASP_RATE_SHIFT					0
+
+/* CS42L43_FS_SELECT2 */
+#define CS42L43_ASRC_DEC_OUT_RATE_MASK				0x000000C0
+#define CS42L43_ASRC_DEC_OUT_RATE_SHIFT				6
+#define CS42L43_ASRC_INT_OUT_RATE_MASK				0x00000030
+#define CS42L43_ASRC_INT_OUT_RATE_SHIFT				4
+#define CS42L43_ASRC_DEC_IN_RATE_MASK				0x0000000C
+#define CS42L43_ASRC_DEC_IN_RATE_SHIFT				2
+#define CS42L43_ASRC_INT_IN_RATE_MASK				0x00000003
+#define CS42L43_ASRC_INT_IN_RATE_SHIFT				0
+
+/* CS42L43_FS_SELECT3 */
+#define CS42L43_HPOUT_RATE_MASK					0x0000C000
+#define CS42L43_HPOUT_RATE_SHIFT				14
+#define CS42L43_EQZ_RATE_MASK					0x00003000
+#define CS42L43_EQZ_RATE_SHIFT					12
+#define CS42L43_DIAGGEN_RATE_MASK				0x00000C00
+#define CS42L43_DIAGGEN_RATE_SHIFT				10
+#define CS42L43_DECIM_CH4_RATE_MASK				0x00000300
+#define CS42L43_DECIM_CH4_RATE_SHIFT				8
+#define CS42L43_DECIM_CH3_RATE_MASK				0x000000C0
+#define CS42L43_DECIM_CH3_RATE_SHIFT				6
+#define CS42L43_DECIM_CH2_RATE_MASK				0x00000030
+#define CS42L43_DECIM_CH2_RATE_SHIFT				4
+#define CS42L43_DECIM_CH1_RATE_MASK				0x0000000C
+#define CS42L43_DECIM_CH1_RATE_SHIFT				2
+#define CS42L43_AMP1_2_RATE_MASK				0x00000003
+#define CS42L43_AMP1_2_RATE_SHIFT				0
+
+/* CS42L43_FS_SELECT4 */
+#define CS42L43_SW_DP7_RATE_MASK				0x00C00000
+#define CS42L43_SW_DP7_RATE_SHIFT				22
+#define CS42L43_SW_DP6_RATE_MASK				0x00300000
+#define CS42L43_SW_DP6_RATE_SHIFT				20
+#define CS42L43_SPDIF_RATE_MASK					0x000C0000
+#define CS42L43_SPDIF_RATE_SHIFT				18
+#define CS42L43_SW_DP5_RATE_MASK				0x00030000
+#define CS42L43_SW_DP5_RATE_SHIFT				16
+#define CS42L43_SW_DP4_RATE_MASK				0x0000C000
+#define CS42L43_SW_DP4_RATE_SHIFT				14
+#define CS42L43_SW_DP3_RATE_MASK				0x00003000
+#define CS42L43_SW_DP3_RATE_SHIFT				12
+#define CS42L43_SW_DP2_RATE_MASK				0x00000C00
+#define CS42L43_SW_DP2_RATE_SHIFT				10
+#define CS42L43_SW_DP1_RATE_MASK				0x00000300
+#define CS42L43_SW_DP1_RATE_SHIFT				8
+#define CS42L43_ISRC2_LOW_RATE_MASK				0x000000C0
+#define CS42L43_ISRC2_LOW_RATE_SHIFT				6
+#define CS42L43_ISRC2_HIGH_RATE_MASK				0x00000030
+#define CS42L43_ISRC2_HIGH_RATE_SHIFT				4
+#define CS42L43_ISRC1_LOW_RATE_MASK				0x0000000C
+#define CS42L43_ISRC1_LOW_RATE_SHIFT				2
+#define CS42L43_ISRC1_HIGH_RATE_MASK				0x00000003
+#define CS42L43_ISRC1_HIGH_RATE_SHIFT				0
+
+/* CS42L43_PDM_CONTROL */
+#define CS42L43_PDM2_CLK_DIV_MASK				0x0000000C
+#define CS42L43_PDM2_CLK_DIV_SHIFT				2
+#define CS42L43_PDM1_CLK_DIV_MASK				0x00000003
+#define CS42L43_PDM1_CLK_DIV_SHIFT				0
+
+/* CS42L43_ASP_CLK_CONFIG1 */
+#define CS42L43_ASP_BCLK_N_MASK					0x03FF0000
+#define CS42L43_ASP_BCLK_N_SHIFT				16
+#define CS42L43_ASP_BCLK_M_MASK					0x000003FF
+#define CS42L43_ASP_BCLK_M_SHIFT				0
+
+/* CS42L43_ASP_CLK_CONFIG2 */
+#define CS42L43_ASP_MASTER_MODE_MASK				0x00000002
+#define CS42L43_ASP_MASTER_MODE_SHIFT				1
+#define CS42L43_ASP_BCLK_INV_MASK				0x00000001
+#define CS42L43_ASP_BCLK_INV_SHIFT				0
+
+/* CS42L43_OSC_DIV_SEL */
+#define CS42L43_OSC_DIV2_EN_MASK				0x00000001
+#define CS42L43_OSC_DIV2_EN_SHIFT				0
+
+/* CS42L43_ADC_B_CTRL1..CS42L43_ADC_B_CTRL1 */
+#define CS42L43_PGA_WIDESWING_MODE_EN_MASK			0x00000080
+#define CS42L43_PGA_WIDESWING_MODE_EN_SHIFT			7
+#define CS42L43_ADC_AIN_SEL_MASK				0x00000010
+#define CS42L43_ADC_AIN_SEL_SHIFT				4
+#define CS42L43_ADC_PGA_GAIN_MASK				0x0000000F
+#define CS42L43_ADC_PGA_GAIN_SHIFT				0
+
+/* CS42L43_DECIM_HPF_WNF_CTRL1..CS42L43_DECIM_HPF_WNF_CTRL4 */
+#define CS42L43_DECIM_WNF_CF_MASK				0x00000070
+#define CS42L43_DECIM_WNF_CF_SHIFT				4
+#define CS42L43_DECIM_WNF_EN_MASK				0x00000008
+#define CS42L43_DECIM_WNF_EN_SHIFT				3
+#define CS42L43_DECIM_HPF_CF_MASK				0x00000006
+#define CS42L43_DECIM_HPF_CF_SHIFT				1
+#define CS42L43_DECIM_HPF_EN_MASK				0x00000001
+#define CS42L43_DECIM_HPF_EN_SHIFT				0
+
+/* CS42L43_DMIC_PDM_CTRL */
+#define CS42L43_PDM2R_INV_MASK					0x00000020
+#define CS42L43_PDM2R_INV_SHIFT					5
+#define CS42L43_PDM2L_INV_MASK					0x00000010
+#define CS42L43_PDM2L_INV_SHIFT					4
+#define CS42L43_PDM1R_INV_MASK					0x00000008
+#define CS42L43_PDM1R_INV_SHIFT					3
+#define CS42L43_PDM1L_INV_MASK					0x00000004
+#define CS42L43_PDM1L_INV_SHIFT					2
+
+/* CS42L43_DECIM_VOL_CTRL_CH1_CH2 */
+#define CS42L43_DECIM2_MUTE_MASK				0x80000000
+#define CS42L43_DECIM2_MUTE_SHIFT				31
+#define CS42L43_DECIM2_VOL_MASK					0x3FC00000
+#define CS42L43_DECIM2_VOL_SHIFT				22
+#define CS42L43_DECIM2_VD_RAMP_MASK				0x00380000
+#define CS42L43_DECIM2_VD_RAMP_SHIFT				19
+#define CS42L43_DECIM2_VI_RAMP_MASK				0x00070000
+#define CS42L43_DECIM2_VI_RAMP_SHIFT				16
+#define CS42L43_DECIM1_MUTE_MASK				0x00008000
+#define CS42L43_DECIM1_MUTE_SHIFT				15
+#define CS42L43_DECIM1_VOL_MASK					0x00003FC0
+#define CS42L43_DECIM1_VOL_SHIFT				6
+#define CS42L43_DECIM1_VD_RAMP_MASK				0x00000038
+#define CS42L43_DECIM1_VD_RAMP_SHIFT				3
+#define CS42L43_DECIM1_VI_RAMP_MASK				0x00000007
+#define CS42L43_DECIM1_VI_RAMP_SHIFT				0
+
+/* CS42L43_DECIM_VOL_CTRL_CH3_CH4 */
+#define CS42L43_DECIM4_MUTE_MASK				0x80000000
+#define CS42L43_DECIM4_MUTE_SHIFT				31
+#define CS42L43_DECIM4_VOL_MASK					0x3FC00000
+#define CS42L43_DECIM4_VOL_SHIFT				22
+#define CS42L43_DECIM4_VD_RAMP_MASK				0x00380000
+#define CS42L43_DECIM4_VD_RAMP_SHIFT				19
+#define CS42L43_DECIM4_VI_RAMP_MASK				0x00070000
+#define CS42L43_DECIM4_VI_RAMP_SHIFT				16
+#define CS42L43_DECIM3_MUTE_MASK				0x00008000
+#define CS42L43_DECIM3_MUTE_SHIFT				15
+#define CS42L43_DECIM3_VOL_MASK					0x00003FC0
+#define CS42L43_DECIM3_VOL_SHIFT				6
+#define CS42L43_DECIM3_VD_RAMP_MASK				0x00000038
+#define CS42L43_DECIM3_VD_RAMP_SHIFT				3
+#define CS42L43_DECIM3_VI_RAMP_MASK				0x00000007
+#define CS42L43_DECIM3_VI_RAMP_SHIFT				0
+
+/* CS42L43_DECIM_VOL_CTRL_UPDATE */
+#define CS42L43_DECIM4_VOL_UPDATE_MASK				0x00000008
+#define CS42L43_DECIM4_VOL_UPDATE_SHIFT				3
+#define CS42L43_DECIM3_VOL_UPDATE_MASK				0x00000004
+#define CS42L43_DECIM3_VOL_UPDATE_SHIFT				2
+#define CS42L43_DECIM2_VOL_UPDATE_MASK				0x00000002
+#define CS42L43_DECIM2_VOL_UPDATE_SHIFT				1
+#define CS42L43_DECIM1_VOL_UPDATE_MASK				0x00000001
+#define CS42L43_DECIM1_VOL_UPDATE_SHIFT				0
+
+/* CS42L43_INTP_VOLUME_CTRL1..CS42L43_INTP_VOLUME_CTRL2 */
+#define CS42L43_AMP1_2_VU_MASK					0x00000200
+#define CS42L43_AMP1_2_VU_SHIFT					9
+#define CS42L43_AMP_MUTE_MASK					0x00000100
+#define CS42L43_AMP_MUTE_SHIFT					8
+#define CS42L43_AMP_VOL_MASK					0x000000FF
+#define CS42L43_AMP_VOL_SHIFT					0
+
+/* CS42L43_AMP1_2_VOL_RAMP */
+#define CS42L43_AMP1_2_VD_RAMP_MASK				0x00000070
+#define CS42L43_AMP1_2_VD_RAMP_SHIFT				4
+#define CS42L43_AMP1_2_VI_RAMP_MASK				0x00000007
+#define CS42L43_AMP1_2_VI_RAMP_SHIFT				0
+
+/* CS42L43_ASP_CTRL */
+#define CS42L43_ASP_FSYNC_MODE_MASK				0x00000004
+#define CS42L43_ASP_FSYNC_MODE_SHIFT				2
+#define CS42L43_ASP_BCLK_EN_MASK				0x00000002
+#define CS42L43_ASP_BCLK_EN_SHIFT				1
+#define CS42L43_ASP_FSYNC_EN_MASK				0x00000001
+#define CS42L43_ASP_FSYNC_EN_SHIFT				0
+
+/* CS42L43_ASP_FSYNC_CTRL1 */
+#define CS42L43_ASP_FSYNC_M_MASK				0x0007FFFF
+#define CS42L43_ASP_FSYNC_M_SHIFT				0
+
+/* CS42L43_ASP_FSYNC_CTRL3 */
+#define CS42L43_ASP_FSYNC_IN_INV_MASK				0x00000002
+#define CS42L43_ASP_FSYNC_IN_INV_SHIFT				1
+#define CS42L43_ASP_FSYNC_OUT_INV_MASK				0x00000001
+#define CS42L43_ASP_FSYNC_OUT_INV_SHIFT				0
+
+/* CS42L43_ASP_FSYNC_CTRL4 */
+#define CS42L43_ASP_NUM_BCLKS_PER_FSYNC_MASK			0x00001FFE
+#define CS42L43_ASP_NUM_BCLKS_PER_FSYNC_SHIFT			1
+
+/* CS42L43_ASP_DATA_CTRL */
+#define CS42L43_ASP_FSYNC_FRAME_START_PHASE_MASK		0x00000008
+#define CS42L43_ASP_FSYNC_FRAME_START_PHASE_SHIFT		3
+#define CS42L43_ASP_FSYNC_FRAME_START_DLY_MASK			0x00000007
+#define CS42L43_ASP_FSYNC_FRAME_START_DLY_SHIFT			0
+
+/* CS42L43_ASP_RX_EN */
+#define CS42L43_ASP_RX_CH6_EN_MASK				0x00000020
+#define CS42L43_ASP_RX_CH6_EN_SHIFT				5
+#define CS42L43_ASP_RX_CH5_EN_MASK				0x00000010
+#define CS42L43_ASP_RX_CH5_EN_SHIFT				4
+#define CS42L43_ASP_RX_CH4_EN_MASK				0x00000008
+#define CS42L43_ASP_RX_CH4_EN_SHIFT				3
+#define CS42L43_ASP_RX_CH3_EN_MASK				0x00000004
+#define CS42L43_ASP_RX_CH3_EN_SHIFT				2
+#define CS42L43_ASP_RX_CH2_EN_MASK				0x00000002
+#define CS42L43_ASP_RX_CH2_EN_SHIFT				1
+#define CS42L43_ASP_RX_CH1_EN_MASK				0x00000001
+#define CS42L43_ASP_RX_CH1_EN_SHIFT				0
+
+/* CS42L43_ASP_TX_EN */
+#define CS42L43_ASP_TX_CH6_EN_MASK				0x00000020
+#define CS42L43_ASP_TX_CH6_EN_SHIFT				5
+#define CS42L43_ASP_TX_CH5_EN_MASK				0x00000010
+#define CS42L43_ASP_TX_CH5_EN_SHIFT				4
+#define CS42L43_ASP_TX_CH4_EN_MASK				0x00000008
+#define CS42L43_ASP_TX_CH4_EN_SHIFT				3
+#define CS42L43_ASP_TX_CH3_EN_MASK				0x00000004
+#define CS42L43_ASP_TX_CH3_EN_SHIFT				2
+#define CS42L43_ASP_TX_CH2_EN_MASK				0x00000002
+#define CS42L43_ASP_TX_CH2_EN_SHIFT				1
+#define CS42L43_ASP_TX_CH1_EN_MASK				0x00000001
+#define CS42L43_ASP_TX_CH1_EN_SHIFT				0
+
+/* CS42L43_ASP_RX_CH1_CTRL..CS42L43_ASP_TX_CH6_CTRL */
+#define CS42L43_ASP_CH_WIDTH_MASK				0x001F0000
+#define CS42L43_ASP_CH_WIDTH_SHIFT				16
+#define CS42L43_ASP_CH_SLOT_MASK				0x00001FFE
+#define CS42L43_ASP_CH_SLOT_SHIFT				1
+#define CS42L43_ASP_CH_SLOT_PHASE_MASK				0x00000001
+#define CS42L43_ASP_CH_SLOT_PHASE_SHIFT				0
+
+/* CS42L43_ASPTX1_INPUT..CS42L43_AMP4MIX_INPUT4 */
+#define CS42L43_MIXER_VOL_MASK					0x00FE0000
+#define CS42L43_MIXER_VOL_SHIFT					17
+#define CS42L43_MIXER_SRC_MASK					0x000001FF
+#define CS42L43_MIXER_SRC_SHIFT					0
+
+/* CS42L43_ASRC_INT_ENABLES */
+#define CS42L43_ASRC_INT4_EN_MASK				0x00000008
+#define CS42L43_ASRC_INT4_EN_SHIFT				3
+#define CS42L43_ASRC_INT3_EN_MASK				0x00000004
+#define CS42L43_ASRC_INT3_EN_SHIFT				2
+#define CS42L43_ASRC_INT2_EN_MASK				0x00000002
+#define CS42L43_ASRC_INT2_EN_SHIFT				1
+#define CS42L43_ASRC_INT1_EN_MASK				0x00000001
+#define CS42L43_ASRC_INT1_EN_SHIFT				0
+
+/* CS42L43_ASRC_DEC_ENABLES */
+#define CS42L43_ASRC_DEC4_EN_MASK				0x00000008
+#define CS42L43_ASRC_DEC4_EN_SHIFT				3
+#define CS42L43_ASRC_DEC3_EN_MASK				0x00000004
+#define CS42L43_ASRC_DEC3_EN_SHIFT				2
+#define CS42L43_ASRC_DEC2_EN_MASK				0x00000002
+#define CS42L43_ASRC_DEC2_EN_SHIFT				1
+#define CS42L43_ASRC_DEC1_EN_MASK				0x00000001
+#define CS42L43_ASRC_DEC1_EN_SHIFT				0
+
+/* CS42L43_PDNCNTL */
+#define CS42L43_RING_SENSE_EN_MASK				0x00000002
+#define CS42L43_RING_SENSE_EN_SHIFT				1
+
+/* CS42L43_RINGSENSE_DEB_CTRL */
+#define CS42L43_RINGSENSE_INV_MASK				0x00000080
+#define CS42L43_RINGSENSE_INV_SHIFT				7
+#define CS42L43_RINGSENSE_PULLUP_PDNB_MASK			0x00000040
+#define CS42L43_RINGSENSE_PULLUP_PDNB_SHIFT			6
+#define CS42L43_RINGSENSE_FALLING_DB_TIME_MASK			0x00000038
+#define CS42L43_RINGSENSE_FALLING_DB_TIME_SHIFT			3
+#define CS42L43_RINGSENSE_RISING_DB_TIME_MASK			0x00000007
+#define CS42L43_RINGSENSE_RISING_DB_TIME_SHIFT			0
+
+/* CS42L43_TIPSENSE_DEB_CTRL */
+#define CS42L43_TIPSENSE_INV_MASK				0x00000080
+#define CS42L43_TIPSENSE_INV_SHIFT				7
+#define CS42L43_TIPSENSE_FALLING_DB_TIME_MASK			0x00000038
+#define CS42L43_TIPSENSE_FALLING_DB_TIME_SHIFT			3
+#define CS42L43_TIPSENSE_RISING_DB_TIME_MASK			0x00000007
+#define CS42L43_TIPSENSE_RISING_DB_TIME_SHIFT			0
+
+/* CS42L43_TIP_RING_SENSE_INTERRUPT_STATUS */
+#define CS42L43_TIPSENSE_UNPLUG_DB_STS_MASK			0x00000008
+#define CS42L43_TIPSENSE_UNPLUG_DB_STS_SHIFT			3
+#define CS42L43_TIPSENSE_PLUG_DB_STS_MASK			0x00000004
+#define CS42L43_TIPSENSE_PLUG_DB_STS_SHIFT			2
+#define CS42L43_RINGSENSE_UNPLUG_DB_STS_MASK			0x00000002
+#define CS42L43_RINGSENSE_UNPLUG_DB_STS_SHIFT			1
+#define CS42L43_RINGSENSE_PLUG_DB_STS_MASK			0x00000001
+#define CS42L43_RINGSENSE_PLUG_DB_STS_SHIFT			0
+
+/* CS42L43_HS2 */
+#define CS42L43_HS_CLAMP_DISABLE_MASK				0x10000000
+#define CS42L43_HS_CLAMP_DISABLE_SHIFT				28
+#define CS42L43_HSBIAS_RAMP_MASK				0x0C000000
+#define CS42L43_HSBIAS_RAMP_SHIFT				26
+#define CS42L43_HSDET_MODE_MASK					0x00018000
+#define CS42L43_HSDET_MODE_SHIFT				15
+#define CS42L43_HSDET_MANUAL_MODE_MASK				0x00006000
+#define CS42L43_HSDET_MANUAL_MODE_SHIFT				13
+#define CS42L43_AUTO_HSDET_TIME_MASK				0x00000700
+#define CS42L43_AUTO_HSDET_TIME_SHIFT				8
+#define CS42L43_AMP3_4_GNDREF_HS3_SEL_MASK			0x00000080
+#define CS42L43_AMP3_4_GNDREF_HS3_SEL_SHIFT			7
+#define CS42L43_AMP3_4_GNDREF_HS4_SEL_MASK			0x00000040
+#define CS42L43_AMP3_4_GNDREF_HS4_SEL_SHIFT			6
+#define CS42L43_HSBIAS_GNDREF_HS3_SEL_MASK			0x00000020
+#define CS42L43_HSBIAS_GNDREF_HS3_SEL_SHIFT			5
+#define CS42L43_HSBIAS_GNDREF_HS4_SEL_MASK			0x00000010
+#define CS42L43_HSBIAS_GNDREF_HS4_SEL_SHIFT			4
+#define CS42L43_HSBIAS_OUT_HS3_SEL_MASK				0x00000008
+#define CS42L43_HSBIAS_OUT_HS3_SEL_SHIFT			3
+#define CS42L43_HSBIAS_OUT_HS4_SEL_MASK				0x00000004
+#define CS42L43_HSBIAS_OUT_HS4_SEL_SHIFT			2
+#define CS42L43_HSGND_HS3_SEL_MASK				0x00000002
+#define CS42L43_HSGND_HS3_SEL_SHIFT				1
+#define CS42L43_HSGND_HS4_SEL_MASK				0x00000001
+#define CS42L43_HSGND_HS4_SEL_SHIFT				0
+
+/* CS42L43_HS_STAT */
+#define CS42L43_HSDET_TYPE_STS_MASK				0x00000007
+#define CS42L43_HSDET_TYPE_STS_SHIFT				0
+
+/* CS42L43_MCU_SW_INTERRUPT */
+#define CS42L43_CONTROL_IND_MASK				0x00000004
+#define CS42L43_CONTROL_IND_SHIFT				2
+#define CS42L43_CONFIGS_IND_MASK				0x00000002
+#define CS42L43_CONFIGS_IND_SHIFT				1
+#define CS42L43_PATCH_IND_MASK					0x00000001
+#define CS42L43_PATCH_IND_SHIFT					0
+
+/* CS42L43_STEREO_MIC_CTRL */
+#define CS42L43_HS2_BIAS_SENSE_EN_MASK				0x00000020
+#define CS42L43_HS2_BIAS_SENSE_EN_SHIFT				5
+#define CS42L43_HS1_BIAS_SENSE_EN_MASK				0x00000010
+#define CS42L43_HS1_BIAS_SENSE_EN_SHIFT				4
+#define CS42L43_HS2_BIAS_EN_MASK				0x00000008
+#define CS42L43_HS2_BIAS_EN_SHIFT				3
+#define CS42L43_HS1_BIAS_EN_MASK				0x00000004
+#define CS42L43_HS1_BIAS_EN_SHIFT				2
+#define CS42L43_JACK_STEREO_CONFIG_MASK				0x00000003
+#define CS42L43_JACK_STEREO_CONFIG_SHIFT			0
+
+/* CS42L43_STEREO_MIC_CLAMP_CTRL */
+#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK		0x00000002
+#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_SHIFT		1
+#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_MASK			0x00000001
+#define CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_SHIFT			0
+
+/* CS42L43_BLOCK_EN2 */
+#define CS42L43_SPI_MSTR_EN_MASK				0x00000001
+#define CS42L43_SPI_MSTR_EN_SHIFT				0
+
+/* CS42L43_BLOCK_EN3 */
+#define CS42L43_PDM2_DIN_R_EN_MASK				0x00000020
+#define CS42L43_PDM2_DIN_R_EN_SHIFT				5
+#define CS42L43_PDM2_DIN_L_EN_MASK				0x00000010
+#define CS42L43_PDM2_DIN_L_EN_SHIFT				4
+#define CS42L43_PDM1_DIN_R_EN_MASK				0x00000008
+#define CS42L43_PDM1_DIN_R_EN_SHIFT				3
+#define CS42L43_PDM1_DIN_L_EN_MASK				0x00000004
+#define CS42L43_PDM1_DIN_L_EN_SHIFT				2
+#define CS42L43_ADC2_EN_MASK					0x00000002
+#define CS42L43_ADC2_EN_SHIFT					1
+#define CS42L43_ADC1_EN_MASK					0x00000001
+#define CS42L43_ADC1_EN_SHIFT					0
+
+/* CS42L43_BLOCK_EN4 */
+#define CS42L43_ASRC_DEC_BANK_EN_MASK				0x00000002
+#define CS42L43_ASRC_DEC_BANK_EN_SHIFT				1
+#define CS42L43_ASRC_INT_BANK_EN_MASK				0x00000001
+#define CS42L43_ASRC_INT_BANK_EN_SHIFT				0
+
+/* CS42L43_BLOCK_EN5 */
+#define CS42L43_ISRC2_BANK_EN_MASK				0x00000002
+#define CS42L43_ISRC2_BANK_EN_SHIFT				1
+#define CS42L43_ISRC1_BANK_EN_MASK				0x00000001
+#define CS42L43_ISRC1_BANK_EN_SHIFT				0
+
+/* CS42L43_BLOCK_EN6 */
+#define CS42L43_MIXER_EN_MASK					0x00000001
+#define CS42L43_MIXER_EN_SHIFT					0
+
+/* CS42L43_BLOCK_EN7 */
+#define CS42L43_EQ_EN_MASK					0x00000001
+#define CS42L43_EQ_EN_SHIFT					0
+
+/* CS42L43_BLOCK_EN8 */
+#define CS42L43_HP_EN_MASK					0x00000001
+#define CS42L43_HP_EN_SHIFT					0
+
+/* CS42L43_BLOCK_EN9 */
+#define CS42L43_TONE_EN_MASK					0x00000001
+#define CS42L43_TONE_EN_SHIFT					0
+
+/* CS42L43_BLOCK_EN10 */
+#define CS42L43_AMP2_EN_MASK					0x00000002
+#define CS42L43_AMP2_EN_SHIFT					1
+#define CS42L43_AMP1_EN_MASK					0x00000001
+#define CS42L43_AMP1_EN_SHIFT					0
+
+/* CS42L43_BLOCK_EN11 */
+#define CS42L43_SPDIF_EN_MASK					0x00000001
+#define CS42L43_SPDIF_EN_SHIFT					0
+
+/* CS42L43_TONE_CH1_CTRL..CS42L43_TONE_CH2_CTRL  */
+#define CS42L43_TONE_FREQ_MASK					0x00000070
+#define CS42L43_TONE_FREQ_SHIFT					4
+#define CS42L43_TONE_SEL_MASK					0x0000000F
+#define CS42L43_TONE_SEL_SHIFT					0
+
+/* CS42L43_MIC_DETECT_CONTROL_1 */
+#define CS42L43_BUTTON_DETECT_MODE_MASK				0x00000018
+#define CS42L43_BUTTON_DETECT_MODE_SHIFT			3
+#define CS42L43_HSBIAS_MODE_MASK				0x00000006
+#define CS42L43_HSBIAS_MODE_SHIFT				1
+#define CS42L43_MIC_LVL_DET_DISABLE_MASK			0x00000001
+#define CS42L43_MIC_LVL_DET_DISABLE_SHIFT			0
+
+/* CS42L43_DETECT_STATUS_1 */
+#define CS42L43_HSDET_DC_STS_MASK				0x01FF0000
+#define CS42L43_HSDET_DC_STS_SHIFT				16
+#define CS42L43_JACKDET_STS_MASK				0x00000080
+#define CS42L43_JACKDET_STS_SHIFT				7
+#define CS42L43_HSBIAS_CLAMP_STS_MASK				0x00000040
+#define CS42L43_HSBIAS_CLAMP_STS_SHIFT				6
+
+/* CS42L43_HS_BIAS_SENSE_AND_CLAMP_AUTOCONTROL */
+#define CS42L43_JACKDET_MODE_MASK				0xC0000000
+#define CS42L43_JACKDET_MODE_SHIFT				30
+#define CS42L43_JACKDET_INV_MASK				0x20000000
+#define CS42L43_JACKDET_INV_SHIFT				29
+#define CS42L43_JACKDET_DB_TIME_MASK				0x03000000
+#define CS42L43_JACKDET_DB_TIME_SHIFT				24
+#define CS42L43_S0_AUTO_ADCMUTE_DISABLE_MASK			0x00800000
+#define CS42L43_S0_AUTO_ADCMUTE_DISABLE_SHIFT			23
+#define CS42L43_HSBIAS_SENSE_EN_MASK				0x00000080
+#define CS42L43_HSBIAS_SENSE_EN_SHIFT				7
+#define CS42L43_AUTO_HSBIAS_CLAMP_EN_MASK			0x00000040
+#define CS42L43_AUTO_HSBIAS_CLAMP_EN_SHIFT			6
+#define CS42L43_JACKDET_SENSE_EN_MASK				0x00000020
+#define CS42L43_JACKDET_SENSE_EN_SHIFT				5
+#define CS42L43_HSBIAS_SENSE_TRIP_MASK				0x00000007
+#define CS42L43_HSBIAS_SENSE_TRIP_SHIFT				0
+
+/* CS42L43_MIC_DETECT_CONTROL_ANDROID */
+#define CS42L43_HSDET_LVL_COMBWIDTH_MASK			0xC0000000
+#define CS42L43_HSDET_LVL_COMBWIDTH_SHIFT			30
+#define CS42L43_HSDET_LVL2_THRESH_MASK				0x01FF0000
+#define CS42L43_HSDET_LVL2_THRESH_SHIFT				16
+#define CS42L43_HSDET_LVL1_THRESH_MASK				0x000001FF
+#define CS42L43_HSDET_LVL1_THRESH_SHIFT				0
+
+/* CS42L43_ISRC1_CTRL..CS42L43_ISRC2_CTRL */
+#define CS42L43_ISRC_INT2_EN_MASK				0x00000200
+#define CS42L43_ISRC_INT2_EN_SHIFT				9
+#define CS42L43_ISRC_INT1_EN_MASK				0x00000100
+#define CS42L43_ISRC_INT1_EN_SHIFT				8
+#define CS42L43_ISRC_DEC2_EN_MASK				0x00000002
+#define CS42L43_ISRC_DEC2_EN_SHIFT				1
+#define CS42L43_ISRC_DEC1_EN_MASK				0x00000001
+#define CS42L43_ISRC_DEC1_EN_SHIFT				0
+
+/* CS42L43_CTRL_REG */
+#define CS42L43_PLL_MODE_BYPASS_500_MASK			0x00000004
+#define CS42L43_PLL_MODE_BYPASS_500_SHIFT			2
+#define CS42L43_PLL_MODE_BYPASS_1029_MASK			0x00000002
+#define CS42L43_PLL_MODE_BYPASS_1029_SHIFT			1
+#define CS42L43_PLL_EN_MASK					0x00000001
+#define CS42L43_PLL_EN_SHIFT					0
+
+/* CS42L43_FDIV_FRAC */
+#define CS42L43_PLL_DIV_INT_MASK				0xFF000000
+#define CS42L43_PLL_DIV_INT_SHIFT				24
+#define CS42L43_PLL_DIV_FRAC_BYTE2_MASK				0x00FF0000
+#define CS42L43_PLL_DIV_FRAC_BYTE2_SHIFT			16
+#define CS42L43_PLL_DIV_FRAC_BYTE1_MASK				0x0000FF00
+#define CS42L43_PLL_DIV_FRAC_BYTE1_SHIFT			8
+#define CS42L43_PLL_DIV_FRAC_BYTE0_MASK				0x000000FF
+#define CS42L43_PLL_DIV_FRAC_BYTE0_SHIFT			0
+
+/* CS42L43_CAL_RATIO */
+#define CS42L43_PLL_CAL_RATIO_MASK				0x000000FF
+#define CS42L43_PLL_CAL_RATIO_SHIFT				0
+
+/* CS42L43_SPI_CLK_CONFIG1 */
+#define CS42L43_SCLK_DIV_MASK					0x0000000F
+#define CS42L43_SCLK_DIV_SHIFT					0
+
+/* CS42L43_SPI_CONFIG1 */
+#define CS42L43_SPI_SS_IDLE_DUR_MASK				0x0F000000
+#define CS42L43_SPI_SS_IDLE_DUR_SHIFT				24
+#define CS42L43_SPI_SS_DELAY_DUR_MASK				0x000F0000
+#define CS42L43_SPI_SS_DELAY_DUR_SHIFT				16
+#define CS42L43_SPI_THREE_WIRE_MASK				0x00000100
+#define CS42L43_SPI_THREE_WIRE_SHIFT				8
+#define CS42L43_SPI_DPHA_MASK					0x00000040
+#define CS42L43_SPI_DPHA_SHIFT					6
+#define CS42L43_SPI_CPHA_MASK					0x00000020
+#define CS42L43_SPI_CPHA_SHIFT					5
+#define CS42L43_SPI_CPOL_MASK					0x00000010
+#define CS42L43_SPI_CPOL_SHIFT					4
+#define CS42L43_SPI_SS_SEL_MASK					0x00000007
+#define CS42L43_SPI_SS_SEL_SHIFT				0
+
+/* CS42L43_SPI_CONFIG2 */
+#define CS42L43_SPI_SS_FRC_MASK					0x00000001
+#define CS42L43_SPI_SS_FRC_SHIFT				0
+
+/* CS42L43_SPI_CONFIG3 */
+#define CS42L43_SPI_WDT_ENA_MASK				0x00000001
+#define CS42L43_SPI_WDT_ENA_SHIFT				0
+
+/* CS42L43_SPI_CONFIG4 */
+#define CS42L43_SPI_STALL_ENA_MASK				0x00010000
+#define CS42L43_SPI_STALL_ENA_SHIFT				16
+
+/* CS42L43_SPI_STATUS1 */
+#define CS42L43_SPI_ABORT_STS_MASK				0x00000002
+#define CS42L43_SPI_ABORT_STS_SHIFT				1
+#define CS42L43_SPI_DONE_STS_MASK				0x00000001
+#define CS42L43_SPI_DONE_STS_SHIFT				0
+
+/* CS42L43_SPI_STATUS2 */
+#define CS42L43_SPI_RX_DONE_STS_MASK				0x00000010
+#define CS42L43_SPI_RX_DONE_STS_SHIFT				4
+#define CS42L43_SPI_TX_DONE_STS_MASK				0x00000001
+#define CS42L43_SPI_TX_DONE_STS_SHIFT				0
+
+/* CS42L43_TRAN_CONFIG1 */
+#define CS42L43_SPI_START_MASK					0x00000001
+#define CS42L43_SPI_START_SHIFT					0
+
+/* CS42L43_TRAN_CONFIG2 */
+#define CS42L43_SPI_ABORT_MASK					0x00000001
+#define CS42L43_SPI_ABORT_SHIFT					0
+
+/* CS42L43_TRAN_CONFIG3 */
+#define CS42L43_SPI_WORD_SIZE_MASK				0x00070000
+#define CS42L43_SPI_WORD_SIZE_SHIFT				16
+#define CS42L43_SPI_CMD_MASK					0x00000003
+#define CS42L43_SPI_CMD_SHIFT					0
+
+/* CS42L43_TRAN_CONFIG4 */
+#define CS42L43_SPI_TX_LENGTH_MASK				0x0000FFFF
+#define CS42L43_SPI_TX_LENGTH_SHIFT				0
+
+/* CS42L43_TRAN_CONFIG5 */
+#define CS42L43_SPI_RX_LENGTH_MASK				0x0000FFFF
+#define CS42L43_SPI_RX_LENGTH_SHIFT				0
+
+/* CS42L43_TRAN_CONFIG6 */
+#define CS42L43_SPI_TX_BLOCK_LENGTH_MASK			0x0000000F
+#define CS42L43_SPI_TX_BLOCK_LENGTH_SHIFT			0
+
+/* CS42L43_TRAN_CONFIG7 */
+#define CS42L43_SPI_RX_BLOCK_LENGTH_MASK			0x0000000F
+#define CS42L43_SPI_RX_BLOCK_LENGTH_SHIFT			0
+
+/* CS42L43_TRAN_CONFIG8 */
+#define CS42L43_SPI_RX_DONE_MASK				0x00000010
+#define CS42L43_SPI_RX_DONE_SHIFT				4
+#define CS42L43_SPI_TX_DONE_MASK				0x00000001
+#define CS42L43_SPI_TX_DONE_SHIFT				0
+
+/* CS42L43_TRAN_STATUS1 */
+#define CS42L43_SPI_BUSY_STS_MASK				0x00000100
+#define CS42L43_SPI_BUSY_STS_SHIFT				8
+#define CS42L43_SPI_RX_REQUEST_MASK				0x00000010
+#define CS42L43_SPI_RX_REQUEST_SHIFT				4
+#define CS42L43_SPI_TX_REQUEST_MASK				0x00000001
+#define CS42L43_SPI_TX_REQUEST_SHIFT				0
+
+/* CS42L43_TRAN_STATUS2 */
+#define CS42L43_SPI_TX_BYTE_COUNT_MASK				0x0000FFFF
+#define CS42L43_SPI_TX_BYTE_COUNT_SHIFT				0
+
+/* CS42L43_TRAN_STATUS3 */
+#define CS42L43_SPI_RX_BYTE_COUNT_MASK				0x0000FFFF
+#define CS42L43_SPI_RX_BYTE_COUNT_SHIFT				0
+
+/* CS42L43_TX_DATA */
+#define CS42L43_SPI_TX_DATA_MASK				0xFFFFFFFF
+#define CS42L43_SPI_TX_DATA_SHIFT				0
+
+/* CS42L43_RX_DATA */
+#define CS42L43_SPI_RX_DATA_MASK				0xFFFFFFFF
+#define CS42L43_SPI_RX_DATA_SHIFT				0
+
+/* CS42L43_DACCNFG1 */
+#define CS42L43_HP_MSTR_VOL_CTRL_EN_MASK			0x00000008
+#define CS42L43_HP_MSTR_VOL_CTRL_EN_SHIFT			3
+#define CS42L43_AMP4_INV_MASK					0x00000002
+#define CS42L43_AMP4_INV_SHIFT					1
+#define CS42L43_AMP3_INV_MASK					0x00000001
+#define CS42L43_AMP3_INV_SHIFT					0
+
+/* CS42L43_DACCNFG2 */
+#define CS42L43_HP_AUTO_CLAMP_DISABLE_MASK			0x00000002
+#define CS42L43_HP_AUTO_CLAMP_DISABLE_SHIFT			1
+#define CS42L43_HP_HPF_EN_MASK					0x00000001
+#define CS42L43_HP_HPF_EN_SHIFT					0
+
+/* CS42L43_HPPATHVOL */
+#define CS42L43_AMP4_PATH_VOL_MASK				0x01FF0000
+#define CS42L43_AMP4_PATH_VOL_SHIFT				16
+#define CS42L43_AMP3_PATH_VOL_MASK				0x000001FF
+#define CS42L43_AMP3_PATH_VOL_SHIFT				0
+
+/* CS42L43_PGAVOL */
+#define CS42L43_HP_PATH_VOL_RAMP_MASK				0x0003C000
+#define CS42L43_HP_PATH_VOL_RAMP_SHIFT				14
+#define CS42L43_HP_PATH_VOL_ZC_MASK				0x00002000
+#define CS42L43_HP_PATH_VOL_ZC_SHIFT				13
+#define CS42L43_HP_PATH_VOL_SFT_MASK				0x00001000
+#define CS42L43_HP_PATH_VOL_SFT_SHIFT				12
+#define CS42L43_HP_DIG_VOL_RAMP_MASK				0x00000F00
+#define CS42L43_HP_DIG_VOL_RAMP_SHIFT				8
+#define CS42L43_HP_ANA_VOL_RAMP_MASK				0x0000000F
+#define CS42L43_HP_ANA_VOL_RAMP_SHIFT				0
+
+/* CS42L43_LOADDETRESULTS */
+#define CS42L43_AMP3_RES_DET_MASK				0x00000003
+#define CS42L43_AMP3_RES_DET_SHIFT				0
+
+/* CS42L43_LOADDETENA */
+#define CS42L43_HPLOAD_DET_EN_MASK				0x00000001
+#define CS42L43_HPLOAD_DET_EN_SHIFT				0
+
+/* CS42L43_CTRL */
+#define CS42L43_ADPTPWR_MODE_MASK				0x00000007
+#define CS42L43_ADPTPWR_MODE_SHIFT				0
+
+/* CS42L43_COEFF_RD_WR0 */
+#define CS42L43_WRITE_MODE_MASK					0x00000002
+#define CS42L43_WRITE_MODE_SHIFT				1
+
+/* CS42L43_INIT_DONE0 */
+#define CS42L43_INITIALIZE_DONE_MASK				0x00000001
+#define CS42L43_INITIALIZE_DONE_SHIFT				0
+
+/* CS42L43_START_EQZ0 */
+#define CS42L43_START_FILTER_MASK				0x00000001
+#define CS42L43_START_FILTER_SHIFT				0
+
+/* CS42L43_MUTE_EQ_IN0 */
+#define CS42L43_MUTE_EQ_CH2_MASK				0x00000002
+#define CS42L43_MUTE_EQ_CH2_SHIFT				1
+#define CS42L43_MUTE_EQ_CH1_MASK				0x00000001
+#define CS42L43_MUTE_EQ_CH1_SHIFT				0
+
+/* CS42L43_PLL_INT */
+#define CS42L43_PLL_LOST_LOCK_INT_MASK				0x00000002
+#define CS42L43_PLL_LOST_LOCK_INT_SHIFT				1
+#define CS42L43_PLL_READY_INT_MASK				0x00000001
+#define CS42L43_PLL_READY_INT_SHIFT				0
+
+/* CS42L43_SOFT_INT */
+#define CS42L43_CONTROL_APPLIED_INT_MASK			0x00000010
+#define CS42L43_CONTROL_APPLIED_INT_SHIFT			4
+#define CS42L43_CONTROL_WARN_INT_MASK				0x00000008
+#define CS42L43_CONTROL_WARN_INT_SHIFT				3
+#define CS42L43_PATCH_WARN_INT_MASK				0x00000002
+#define CS42L43_PATCH_WARN_INT_SHIFT				1
+#define CS42L43_PATCH_APPLIED_INT_MASK				0x00000001
+#define CS42L43_PATCH_APPLIED_INT_SHIFT				0
+
+/* CS42L43_MSM_INT */
+#define CS42L43_HP_STARTUP_DONE_INT_MASK			0x00000800
+#define CS42L43_HP_STARTUP_DONE_INT_SHIFT			11
+#define CS42L43_HP_SHUTDOWN_DONE_INT_MASK			0x00000400
+#define CS42L43_HP_SHUTDOWN_DONE_INT_SHIFT			10
+#define CS42L43_HSDET_DONE_INT_MASK				0x00000200
+#define CS42L43_HSDET_DONE_INT_SHIFT				9
+#define CS42L43_TIPSENSE_UNPLUG_DB_INT_MASK			0x00000080
+#define CS42L43_TIPSENSE_UNPLUG_DB_INT_SHIFT			7
+#define CS42L43_TIPSENSE_PLUG_DB_INT_MASK			0x00000040
+#define CS42L43_TIPSENSE_PLUG_DB_INT_SHIFT			6
+#define CS42L43_RINGSENSE_UNPLUG_DB_INT_MASK			0x00000020
+#define CS42L43_RINGSENSE_UNPLUG_DB_INT_SHIFT			5
+#define CS42L43_RINGSENSE_PLUG_DB_INT_MASK			0x00000010
+#define CS42L43_RINGSENSE_PLUG_DB_INT_SHIFT			4
+#define CS42L43_TIPSENSE_UNPLUG_PDET_INT_MASK			0x00000008
+#define CS42L43_TIPSENSE_UNPLUG_PDET_INT_SHIFT			3
+#define CS42L43_TIPSENSE_PLUG_PDET_INT_MASK			0x00000004
+#define CS42L43_TIPSENSE_PLUG_PDET_INT_SHIFT			2
+#define CS42L43_RINGSENSE_UNPLUG_PDET_INT_MASK			0x00000002
+#define CS42L43_RINGSENSE_UNPLUG_PDET_INT_SHIFT			1
+#define CS42L43_RINGSENSE_PLUG_PDET_INT_MASK			0x00000001
+#define CS42L43_RINGSENSE_PLUG_PDET_INT_SHIFT			0
+
+/* CS42L43_ACC_DET_INT */
+#define CS42L43_HS2_BIAS_SENSE_INT_MASK				0x00000800
+#define CS42L43_HS2_BIAS_SENSE_INT_SHIFT			11
+#define CS42L43_HS1_BIAS_SENSE_INT_MASK				0x00000400
+#define CS42L43_HS1_BIAS_SENSE_INT_SHIFT			10
+#define CS42L43_DC_DETECT1_FALSE_INT_MASK			0x00000080
+#define CS42L43_DC_DETECT1_FALSE_INT_SHIFT			7
+#define CS42L43_DC_DETECT1_TRUE_INT_MASK			0x00000040
+#define CS42L43_DC_DETECT1_TRUE_INT_SHIFT			6
+#define CS42L43_HSBIAS_CLAMPED_INT_MASK				0x00000008
+#define CS42L43_HSBIAS_CLAMPED_INT_SHIFT			3
+#define CS42L43_HS3_4_BIAS_SENSE_INT_MASK			0x00000001
+#define CS42L43_HS3_4_BIAS_SENSE_INT_SHIFT			0
+
+/* CS42L43_SPI_MSTR_INT */
+#define CS42L43_IRQ_SPI_STALLING_INT_MASK			0x00000004
+#define CS42L43_IRQ_SPI_STALLING_INT_SHIFT			2
+#define CS42L43_IRQ_SPI_STS_INT_MASK				0x00000002
+#define CS42L43_IRQ_SPI_STS_INT_SHIFT				1
+#define CS42L43_IRQ_SPI_BLOCK_INT_MASK				0x00000001
+#define CS42L43_IRQ_SPI_BLOCK_INT_SHIFT				0
+
+/* CS42L43_SW_TO_SPI_BRIDGE_INT */
+#define CS42L43_SW2SPI_BUF_OVF_UDF_INT_MASK			0x00000001
+#define CS42L43_SW2SPI_BUF_OVF_UDF_INT_SHIFT			0
+
+/* CS42L43_CLASS_D_AMP_INT */
+#define CS42L43_AMP2_CLK_STOP_FAULT_INT_MASK			0x00002000
+#define CS42L43_AMP2_CLK_STOP_FAULT_INT_SHIFT			13
+#define CS42L43_AMP1_CLK_STOP_FAULT_INT_MASK			0x00001000
+#define CS42L43_AMP1_CLK_STOP_FAULT_INT_SHIFT			12
+#define CS42L43_AMP2_VDDSPK_FAULT_INT_MASK			0x00000800
+#define CS42L43_AMP2_VDDSPK_FAULT_INT_SHIFT			11
+#define CS42L43_AMP1_VDDSPK_FAULT_INT_MASK			0x00000400
+#define CS42L43_AMP1_VDDSPK_FAULT_INT_SHIFT			10
+#define CS42L43_AMP2_SHUTDOWN_DONE_INT_MASK			0x00000200
+#define CS42L43_AMP2_SHUTDOWN_DONE_INT_SHIFT			9
+#define CS42L43_AMP1_SHUTDOWN_DONE_INT_MASK			0x00000100
+#define CS42L43_AMP1_SHUTDOWN_DONE_INT_SHIFT			8
+#define CS42L43_AMP2_STARTUP_DONE_INT_MASK			0x00000080
+#define CS42L43_AMP2_STARTUP_DONE_INT_SHIFT			7
+#define CS42L43_AMP1_STARTUP_DONE_INT_MASK			0x00000040
+#define CS42L43_AMP1_STARTUP_DONE_INT_SHIFT			6
+#define CS42L43_AMP2_THERM_SHDN_INT_MASK			0x00000020
+#define CS42L43_AMP2_THERM_SHDN_INT_SHIFT			5
+#define CS42L43_AMP1_THERM_SHDN_INT_MASK			0x00000010
+#define CS42L43_AMP1_THERM_SHDN_INT_SHIFT			4
+#define CS42L43_AMP2_THERM_WARN_INT_MASK			0x00000008
+#define CS42L43_AMP2_THERM_WARN_INT_SHIFT			3
+#define CS42L43_AMP1_THERM_WARN_INT_MASK			0x00000004
+#define CS42L43_AMP1_THERM_WARN_INT_SHIFT			2
+#define CS42L43_AMP2_SCDET_INT_MASK				0x00000002
+#define CS42L43_AMP2_SCDET_INT_SHIFT				1
+#define CS42L43_AMP1_SCDET_INT_MASK				0x00000001
+#define CS42L43_AMP1_SCDET_INT_SHIFT				0
+
+/* CS42L43_GPIO_INT */
+#define CS42L43_GPIO3_FALL_INT_MASK				0x00000020
+#define CS42L43_GPIO3_FALL_INT_SHIFT				5
+#define CS42L43_GPIO3_RISE_INT_MASK				0x00000010
+#define CS42L43_GPIO3_RISE_INT_SHIFT				4
+#define CS42L43_GPIO2_FALL_INT_MASK				0x00000008
+#define CS42L43_GPIO2_FALL_INT_SHIFT				3
+#define CS42L43_GPIO2_RISE_INT_MASK				0x00000004
+#define CS42L43_GPIO2_RISE_INT_SHIFT				2
+#define CS42L43_GPIO1_FALL_INT_MASK				0x00000002
+#define CS42L43_GPIO1_FALL_INT_SHIFT				1
+#define CS42L43_GPIO1_RISE_INT_MASK				0x00000001
+#define CS42L43_GPIO1_RISE_INT_SHIFT				0
+
+/* CS42L43_HPOUT_INT */
+#define CS42L43_HP_ILIMIT_INT_MASK				0x00000002
+#define CS42L43_HP_ILIMIT_INT_SHIFT				1
+#define CS42L43_HP_LOADDET_DONE_INT_MASK			0x00000001
+#define CS42L43_HP_LOADDET_DONE_INT_SHIFT			0
+
+/* CS42L43_BOOT_CONTROL */
+#define CS42L43_LOCK_HW_STS_MASK				0x00000002
+#define CS42L43_LOCK_HW_STS_SHIFT				1
+
+/* CS42L43_BLOCK_EN */
+#define CS42L43_MCU_EN_MASK					0x00000001
+#define CS42L43_MCU_EN_SHIFT					0
+
+/* CS42L43_SHUTTER_CONTROL */
+#define CS42L43_STATUS_SPK_SHUTTER_MUTE_MASK			0x00008000
+#define CS42L43_STATUS_SPK_SHUTTER_MUTE_SHIFT			15
+#define CS42L43_SPK_SHUTTER_CFG_MASK				0x00000F00
+#define CS42L43_SPK_SHUTTER_CFG_SHIFT				8
+#define CS42L43_STATUS_MIC_SHUTTER_MUTE_MASK			0x00000080
+#define CS42L43_STATUS_MIC_SHUTTER_MUTE_SHIFT			7
+#define CS42L43_MIC_SHUTTER_CFG_MASK				0x0000000F
+#define CS42L43_MIC_SHUTTER_CFG_SHIFT				0
+
+/* CS42L43_MCU_SW_REV */
+#define CS42L43_BIOS_SUBMINOR_REV_MASK				0xFF000000
+#define CS42L43_BIOS_SUBMINOR_REV_SHIFT				24
+#define CS42L43_BIOS_MINOR_REV_MASK				0x00F00000
+#define CS42L43_BIOS_MINOR_REV_SHIFT				20
+#define CS42L43_BIOS_MAJOR_REV_MASK				0x000F0000
+#define CS42L43_BIOS_MAJOR_REV_SHIFT				16
+#define CS42L43_FW_SUBMINOR_REV_MASK				0x0000FF00
+#define CS42L43_FW_SUBMINOR_REV_SHIFT				8
+#define CS42L43_FW_MINOR_REV_MASK				0x000000F0
+#define CS42L43_FW_MINOR_REV_SHIFT				4
+#define CS42L43_FW_MAJOR_REV_MASK				0x0000000F
+#define CS42L43_FW_MAJOR_REV_SHIFT				0
+
+/* CS42L43_NEED_CONFIGS */
+#define CS42L43_FW_PATCH_NEED_CFG_MASK				0x80000000
+#define CS42L43_FW_PATCH_NEED_CFG_SHIFT				31
+
+/* CS42L43_FW_MISSION_CTRL_MM_CTRL_SELECTION */
+#define CS42L43_FW_MM_CTRL_MCU_SEL_MASK				0x00000001
+#define CS42L43_FW_MM_CTRL_MCU_SEL_SHIFT			0
+
+/* CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_REG */
+#define CS42L43_FW_MISSION_CTRL_MM_MCU_CFG_DISABLE_VAL		0xF05AA50F
+
+#endif /* CS42L43_CORE_REGS_H */
diff --git a/include/linux/mfd/cs42l43.h b/include/linux/mfd/cs42l43.h
new file mode 100644
index 000000000000..cf8263aab41b
--- /dev/null
+++ b/include/linux/mfd/cs42l43.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CS42L43 core driver external data
+ *
+ * Copyright (C) 2022-2023 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ */
+
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/mutex.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/soundwire/sdw.h>
+#include <linux/workqueue.h>
+
+#ifndef CS42L43_CORE_EXT_H
+#define CS42L43_CORE_EXT_H
+
+#define CS42L43_N_SUPPLIES		3
+
+enum cs42l43_irq_numbers {
+	CS42L43_PLL_LOST_LOCK,
+	CS42L43_PLL_READY,
+
+	CS42L43_HP_STARTUP_DONE,
+	CS42L43_HP_SHUTDOWN_DONE,
+	CS42L43_HSDET_DONE,
+	CS42L43_TIPSENSE_UNPLUG_DB,
+	CS42L43_TIPSENSE_PLUG_DB,
+	CS42L43_RINGSENSE_UNPLUG_DB,
+	CS42L43_RINGSENSE_PLUG_DB,
+	CS42L43_TIPSENSE_UNPLUG_PDET,
+	CS42L43_TIPSENSE_PLUG_PDET,
+	CS42L43_RINGSENSE_UNPLUG_PDET,
+	CS42L43_RINGSENSE_PLUG_PDET,
+
+	CS42L43_HS2_BIAS_SENSE,
+	CS42L43_HS1_BIAS_SENSE,
+	CS42L43_DC_DETECT1_FALSE,
+	CS42L43_DC_DETECT1_TRUE,
+	CS42L43_HSBIAS_CLAMPED,
+	CS42L43_HS3_4_BIAS_SENSE,
+
+	CS42L43_AMP2_CLK_STOP_FAULT,
+	CS42L43_AMP1_CLK_STOP_FAULT,
+	CS42L43_AMP2_VDDSPK_FAULT,
+	CS42L43_AMP1_VDDSPK_FAULT,
+	CS42L43_AMP2_SHUTDOWN_DONE,
+	CS42L43_AMP1_SHUTDOWN_DONE,
+	CS42L43_AMP2_STARTUP_DONE,
+	CS42L43_AMP1_STARTUP_DONE,
+	CS42L43_AMP2_THERM_SHDN,
+	CS42L43_AMP1_THERM_SHDN,
+	CS42L43_AMP2_THERM_WARN,
+	CS42L43_AMP1_THERM_WARN,
+	CS42L43_AMP2_SCDET,
+	CS42L43_AMP1_SCDET,
+
+	CS42L43_GPIO3_FALL,
+	CS42L43_GPIO3_RISE,
+	CS42L43_GPIO2_FALL,
+	CS42L43_GPIO2_RISE,
+	CS42L43_GPIO1_FALL,
+	CS42L43_GPIO1_RISE,
+
+	CS42L43_HP_ILIMIT,
+	CS42L43_HP_LOADDET_DONE,
+};
+
+struct cs42l43 {
+	struct device *dev;
+	struct regmap *regmap;
+	struct sdw_slave *sdw;
+
+	struct regulator *vdd_p;
+	struct regulator *vdd_d;
+	struct regulator_bulk_data core_supplies[CS42L43_N_SUPPLIES];
+
+	struct gpio_desc *reset;
+
+	int irq;
+	struct regmap_irq_chip irq_chip;
+	struct regmap_irq_chip_data *irq_data;
+
+	struct work_struct boot_work;
+	struct completion device_attach;
+	struct completion device_detach;
+	struct completion firmware_download;
+	int firmware_error;
+
+	unsigned int sdw_freq;
+	/* Lock to gate control of the PLL and its sources. */
+	struct mutex pll_lock;
+
+	bool sdw_pll_active;
+	bool attached;
+	bool hw_lock;
+};
+
+#endif /* CS42L43_CORE_EXT_H */

From d5282a53929791071b17dde3eed52e40f76b101c Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Fri, 4 Aug 2023 11:46:00 +0100
Subject: [PATCH 1261/2047] pinctrl: cs42l43: Add support for the cs42l43

The CS42L43 is an audio CODEC with integrated MIPI SoundWire interface
(Version 1.2.1 compliant), I2C, SPI, and I2S/TDM interfaces designed
for portable applications. It provides a high dynamic range, stereo
DAC for headphone output, two integrated Class D amplifiers for
loudspeakers, and two ADCs for wired headset microphone input or
stereo line input. PDM inputs are provided for digital microphones.

Add a basic pinctrl driver which supports driver strength for the
various pins, gpios, and pinmux for the 2 multi-function pins.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20230804104602.395892-5-ckeepax@opensource.cirrus.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 MAINTAINERS                              |   1 +
 drivers/pinctrl/cirrus/Kconfig           |  11 +
 drivers/pinctrl/cirrus/Makefile          |   2 +
 drivers/pinctrl/cirrus/pinctrl-cs42l43.c | 609 +++++++++++++++++++++++
 4 files changed, 623 insertions(+)
 create mode 100644 drivers/pinctrl/cirrus/pinctrl-cs42l43.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 45aa18c45d20..4e35a4880cf8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4880,6 +4880,7 @@ L:	patches@opensource.cirrus.com
 S:	Maintained
 F:	Documentation/devicetree/bindings/sound/cirrus,cs*
 F:	drivers/mfd/cs42l43*
+F:	drivers/pinctrl/cirrus/pinctrl-cs42l43*
 F:	include/dt-bindings/sound/cs*
 F:	include/linux/mfd/cs42l43*
 F:	include/sound/cs*
diff --git a/drivers/pinctrl/cirrus/Kconfig b/drivers/pinctrl/cirrus/Kconfig
index 530426a74f75..d6318cb57aff 100644
--- a/drivers/pinctrl/cirrus/Kconfig
+++ b/drivers/pinctrl/cirrus/Kconfig
@@ -1,4 +1,15 @@
 # SPDX-License-Identifier: GPL-2.0-only
+config PINCTRL_CS42L43
+	tristate "Cirrus Logic CS42L43 Pinctrl Driver"
+	depends on MFD_CS42L43
+	select GPIOLIB
+	select PINMUX
+	select PINCONF
+	select GENERIC_PINCONF
+	help
+	  Select this to support the GPIO/Pinctrl functions of the Cirrus
+	  Logic CS42L43 PC CODEC.
+
 config PINCTRL_LOCHNAGAR
 	tristate "Cirrus Logic Lochnagar pinctrl driver"
 	depends on MFD_LOCHNAGAR
diff --git a/drivers/pinctrl/cirrus/Makefile b/drivers/pinctrl/cirrus/Makefile
index a484518c840e..9b618d766907 100644
--- a/drivers/pinctrl/cirrus/Makefile
+++ b/drivers/pinctrl/cirrus/Makefile
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 # Cirrus Logic pinctrl drivers
+obj-$(CONFIG_PINCTRL_CS42L43)	+= pinctrl-cs42l43.o
+
 obj-$(CONFIG_PINCTRL_LOCHNAGAR)	+= pinctrl-lochnagar.o
 
 pinctrl-madera-objs		:= pinctrl-madera-core.o
diff --git a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c
new file mode 100644
index 000000000000..c09646318419
--- /dev/null
+++ b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c
@@ -0,0 +1,609 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// CS42L43 Pinctrl and GPIO driver
+//
+// Copyright (c) 2023 Cirrus Logic, Inc. and
+//                    Cirrus Logic International Semiconductor Ltd.
+
+#include <linux/bits.h>
+#include <linux/build_bug.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/gpio/driver.h>
+#include <linux/mfd/cs42l43.h>
+#include <linux/mfd/cs42l43-regs.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/string_helpers.h>
+
+#include <linux/pinctrl/consumer.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/pinctrl/pinmux.h>
+
+#include "../pinctrl-utils.h"
+
+#define CS42L43_NUM_GPIOS 3
+
+struct cs42l43_pin {
+	struct gpio_chip gpio_chip;
+
+	struct device *dev;
+	struct regmap *regmap;
+	bool shutters_locked;
+};
+
+struct cs42l43_pin_data {
+	unsigned int reg;
+	unsigned int shift;
+	unsigned int mask;
+};
+
+#define CS42L43_PIN(_number, _name, _reg, _field) { \
+	.number = _number, .name = _name, \
+	.drv_data = &((struct cs42l43_pin_data){ \
+		.reg = CS42L43_##_reg, \
+		.shift = CS42L43_##_field##_DRV_SHIFT, \
+		.mask = CS42L43_##_field##_DRV_MASK, \
+	}), \
+}
+
+static const struct pinctrl_pin_desc cs42l43_pin_pins[] = {
+	CS42L43_PIN(0,	"gpio1",	DRV_CTRL4,	GPIO1),
+	CS42L43_PIN(1,	"gpio2",	DRV_CTRL4,	GPIO2),
+	CS42L43_PIN(2,	"gpio3",	DRV_CTRL4,	GPIO3),
+	CS42L43_PIN(3,	"asp_dout",	DRV_CTRL1,	ASP_DOUT),
+	CS42L43_PIN(4,	"asp_fsync",	DRV_CTRL1,	ASP_FSYNC),
+	CS42L43_PIN(5,	"asp_bclk",	DRV_CTRL1,	ASP_BCLK),
+	CS42L43_PIN(6,	"pdmout2_clk",	DRV_CTRL3,	PDMOUT2_CLK),
+	CS42L43_PIN(7,	"pdmout2_data",	DRV_CTRL3,	PDMOUT2_DATA),
+	CS42L43_PIN(8,	"pdmout1_clk",	DRV_CTRL3,	PDMOUT1_CLK),
+	CS42L43_PIN(9,	"pdmout1_data",	DRV_CTRL3,	PDMOUT1_DATA),
+	CS42L43_PIN(10,	"i2c_sda",	DRV_CTRL3,	I2C_SDA),
+	CS42L43_PIN(11,	"i2c_scl",	DRV_CTRL_5,	I2C_SCL),
+	CS42L43_PIN(12,	"spi_miso",	DRV_CTRL3,	SPI_MISO),
+	CS42L43_PIN(13,	"spi_sck",	DRV_CTRL_5,	SPI_SCK),
+	CS42L43_PIN(14,	"spi_ssb",	DRV_CTRL_5,	SPI_SSB),
+};
+
+static const unsigned int cs42l43_pin_gpio1_pins[] = { 0 };
+static const unsigned int cs42l43_pin_gpio2_pins[] = { 1 };
+static const unsigned int cs42l43_pin_gpio3_pins[] = { 2 };
+static const unsigned int cs42l43_pin_asp_pins[] = { 3, 4, 5 };
+static const unsigned int cs42l43_pin_pdmout2_pins[] = { 6, 7 };
+static const unsigned int cs42l43_pin_pdmout1_pins[] = { 8, 9 };
+static const unsigned int cs42l43_pin_i2c_pins[] = { 10, 11 };
+static const unsigned int cs42l43_pin_spi_pins[] = { 12, 13, 14 };
+
+#define CS42L43_PINGROUP(_name) \
+	PINCTRL_PINGROUP(#_name, cs42l43_pin_##_name##_pins, \
+			 ARRAY_SIZE(cs42l43_pin_##_name##_pins))
+
+static const struct pingroup cs42l43_pin_groups[] = {
+	CS42L43_PINGROUP(gpio1),
+	CS42L43_PINGROUP(gpio2),
+	CS42L43_PINGROUP(gpio3),
+	CS42L43_PINGROUP(asp),
+	CS42L43_PINGROUP(pdmout2),
+	CS42L43_PINGROUP(pdmout1),
+	CS42L43_PINGROUP(i2c),
+	CS42L43_PINGROUP(spi),
+};
+
+static int cs42l43_pin_get_groups_count(struct pinctrl_dev *pctldev)
+{
+	return ARRAY_SIZE(cs42l43_pin_groups);
+}
+
+static const char *cs42l43_pin_get_group_name(struct pinctrl_dev *pctldev,
+					      unsigned int group_idx)
+{
+	return cs42l43_pin_groups[group_idx].name;
+}
+
+static int cs42l43_pin_get_group_pins(struct pinctrl_dev *pctldev,
+				      unsigned int group_idx,
+				      const unsigned int **pins,
+				      unsigned int *num_pins)
+{
+	*pins = cs42l43_pin_groups[group_idx].pins;
+	*num_pins = cs42l43_pin_groups[group_idx].npins;
+
+	return 0;
+}
+
+static const struct pinctrl_ops cs42l43_pin_group_ops = {
+	.get_groups_count = cs42l43_pin_get_groups_count,
+	.get_group_name = cs42l43_pin_get_group_name,
+	.get_group_pins = cs42l43_pin_get_group_pins,
+#if IS_ENABLED(CONFIG_OF)
+	.dt_node_to_map = pinconf_generic_dt_node_to_map_all,
+	.dt_free_map = pinconf_generic_dt_free_map,
+#endif
+};
+
+enum cs42l43_pin_funcs {
+	CS42L43_FUNC_GPIO,
+	CS42L43_FUNC_SPDIF,
+	CS42L43_FUNC_IRQ,
+	CS42L43_FUNC_MIC_SHT,
+	CS42L43_FUNC_SPK_SHT,
+	CS42L43_FUNC_MAX
+};
+
+static const char * const cs42l43_pin_funcs[] = {
+	"gpio", "spdif", "irq", "mic-shutter", "spk-shutter",
+};
+
+static const char * const cs42l43_pin_gpio_groups[] = { "gpio1", "gpio3" };
+static const char * const cs42l43_pin_spdif_groups[] = { "gpio3" };
+static const char * const cs42l43_pin_irq_groups[] = { "gpio1" };
+static const char * const cs42l43_pin_shutter_groups[] = { "gpio1", "gpio2", "gpio3" };
+
+static const struct pinfunction cs42l43_pin_func_groups[] = {
+	PINCTRL_PINFUNCTION("gpio", cs42l43_pin_gpio_groups,
+			    ARRAY_SIZE(cs42l43_pin_gpio_groups)),
+	PINCTRL_PINFUNCTION("spdif", cs42l43_pin_spdif_groups,
+			    ARRAY_SIZE(cs42l43_pin_spdif_groups)),
+	PINCTRL_PINFUNCTION("irq",  cs42l43_pin_irq_groups,
+			    ARRAY_SIZE(cs42l43_pin_irq_groups)),
+	PINCTRL_PINFUNCTION("mic-shutter", cs42l43_pin_shutter_groups,
+			    ARRAY_SIZE(cs42l43_pin_shutter_groups)),
+	PINCTRL_PINFUNCTION("spk-shutter", cs42l43_pin_shutter_groups,
+			    ARRAY_SIZE(cs42l43_pin_shutter_groups)),
+};
+
+static_assert(ARRAY_SIZE(cs42l43_pin_funcs) == CS42L43_FUNC_MAX);
+static_assert(ARRAY_SIZE(cs42l43_pin_func_groups) == CS42L43_FUNC_MAX);
+
+static int cs42l43_pin_get_func_count(struct pinctrl_dev *pctldev)
+{
+	return ARRAY_SIZE(cs42l43_pin_funcs);
+}
+
+static const char *cs42l43_pin_get_func_name(struct pinctrl_dev *pctldev,
+					     unsigned int func_idx)
+{
+	return cs42l43_pin_funcs[func_idx];
+}
+
+static int cs42l43_pin_get_func_groups(struct pinctrl_dev *pctldev,
+				       unsigned int func_idx,
+				       const char * const **groups,
+				       unsigned int * const num_groups)
+{
+	*groups = cs42l43_pin_func_groups[func_idx].groups;
+	*num_groups = cs42l43_pin_func_groups[func_idx].ngroups;
+
+	return 0;
+}
+
+static int cs42l43_pin_set_mux(struct pinctrl_dev *pctldev,
+			       unsigned int func_idx, unsigned int group_idx)
+{
+	struct cs42l43_pin *priv = pinctrl_dev_get_drvdata(pctldev);
+	unsigned int reg, mask, val;
+
+	dev_dbg(priv->dev, "Setting %s to %s\n",
+		cs42l43_pin_groups[group_idx].name, cs42l43_pin_funcs[func_idx]);
+
+	switch (func_idx) {
+	case CS42L43_FUNC_MIC_SHT:
+		reg = CS42L43_SHUTTER_CONTROL;
+		mask = CS42L43_MIC_SHUTTER_CFG_MASK;
+		val = 0x2 << (group_idx + CS42L43_MIC_SHUTTER_CFG_SHIFT);
+		break;
+	case CS42L43_FUNC_SPK_SHT:
+		reg = CS42L43_SHUTTER_CONTROL;
+		mask = CS42L43_SPK_SHUTTER_CFG_MASK;
+		val = 0x2 << (group_idx + CS42L43_SPK_SHUTTER_CFG_SHIFT);
+		break;
+	default:
+		reg = CS42L43_GPIO_FN_SEL;
+		mask = BIT(group_idx + CS42L43_GPIO1_FN_SEL_SHIFT);
+		val = (func_idx == CS42L43_FUNC_GPIO) ?
+				(0x1 << (group_idx + CS42L43_GPIO1_FN_SEL_SHIFT)) : 0;
+		break;
+	}
+
+	if (priv->shutters_locked && reg == CS42L43_SHUTTER_CONTROL) {
+		dev_err(priv->dev, "Shutter configuration not available\n");
+		return -EPERM;
+	}
+
+	return regmap_update_bits(priv->regmap, reg, mask, val);
+}
+
+static int cs42l43_gpio_set_direction(struct pinctrl_dev *pctldev,
+				      struct pinctrl_gpio_range *range,
+				      unsigned int offset, bool input)
+{
+	struct cs42l43_pin *priv = pinctrl_dev_get_drvdata(pctldev);
+	unsigned int shift = offset + CS42L43_GPIO1_DIR_SHIFT;
+	int ret;
+
+	dev_dbg(priv->dev, "Setting gpio%d to %s\n",
+		offset + 1, input ? "input" : "output");
+
+	ret = pm_runtime_resume_and_get(priv->dev);
+	if (ret) {
+		dev_err(priv->dev, "Failed to resume for direction: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_update_bits(priv->regmap, CS42L43_GPIO_CTRL1,
+				 BIT(shift), !!input << shift);
+	if (ret)
+		dev_err(priv->dev, "Failed to set gpio%d direction: %d\n",
+			offset + 1, ret);
+
+	pm_runtime_put(priv->dev);
+
+	return ret;
+}
+
+static int cs42l43_gpio_request_enable(struct pinctrl_dev *pctldev,
+				       struct pinctrl_gpio_range *range,
+				       unsigned int offset)
+{
+	return cs42l43_pin_set_mux(pctldev, 0, offset);
+}
+
+static void cs42l43_gpio_disable_free(struct pinctrl_dev *pctldev,
+				      struct pinctrl_gpio_range *range,
+				      unsigned int offset)
+{
+	cs42l43_gpio_set_direction(pctldev, range, offset, true);
+}
+
+static const struct pinmux_ops cs42l43_pin_mux_ops = {
+	.get_functions_count	= cs42l43_pin_get_func_count,
+	.get_function_name	= cs42l43_pin_get_func_name,
+	.get_function_groups	= cs42l43_pin_get_func_groups,
+
+	.set_mux		= cs42l43_pin_set_mux,
+
+	.gpio_request_enable	= cs42l43_gpio_request_enable,
+	.gpio_disable_free	= cs42l43_gpio_disable_free,
+	.gpio_set_direction	= cs42l43_gpio_set_direction,
+
+	.strict			= true,
+};
+
+static const unsigned int cs42l43_pin_drv_str_ma[] = { 1, 2, 4, 8, 9, 10, 12, 16 };
+
+static inline int cs42l43_pin_get_drv_str(struct cs42l43_pin *priv, unsigned int pin)
+{
+	const struct cs42l43_pin_data *pdat = cs42l43_pin_pins[pin].drv_data;
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(priv->regmap, pdat->reg, &val);
+	if (ret)
+		return ret;
+
+	return cs42l43_pin_drv_str_ma[(val & pdat->mask) >> pdat->shift];
+}
+
+static inline int cs42l43_pin_set_drv_str(struct cs42l43_pin *priv, unsigned int pin,
+					  unsigned int ma)
+{
+	const struct cs42l43_pin_data *pdat = cs42l43_pin_pins[pin].drv_data;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cs42l43_pin_drv_str_ma); i++) {
+		if (ma == cs42l43_pin_drv_str_ma[i]) {
+			if ((i << pdat->shift) > pdat->mask)
+				goto err;
+
+			dev_dbg(priv->dev, "Set drive strength for %s to %d mA\n",
+				cs42l43_pin_pins[pin].name, ma);
+
+			return regmap_update_bits(priv->regmap, pdat->reg,
+						  pdat->mask, i << pdat->shift);
+		}
+	}
+
+err:
+	dev_err(priv->dev, "Invalid drive strength for %s: %d mA\n",
+		cs42l43_pin_pins[pin].name, ma);
+	return -EINVAL;
+}
+
+static inline int cs42l43_pin_get_db(struct cs42l43_pin *priv, unsigned int pin)
+{
+	unsigned int val;
+	int ret;
+
+	if (pin >= CS42L43_NUM_GPIOS)
+		return -ENOTSUPP;
+
+	ret = regmap_read(priv->regmap, CS42L43_GPIO_CTRL2, &val);
+	if (ret)
+		return ret;
+
+	if (val & (CS42L43_GPIO1_DEGLITCH_BYP_MASK << pin))
+		return 0;
+
+	return 85; // Debounce is roughly 85uS
+}
+
+static inline int cs42l43_pin_set_db(struct cs42l43_pin *priv, unsigned int pin,
+				     unsigned int us)
+{
+	if (pin >= CS42L43_NUM_GPIOS)
+		return -ENOTSUPP;
+
+	dev_dbg(priv->dev, "Set debounce %s for %s\n",
+		str_on_off(us), cs42l43_pin_pins[pin].name);
+
+	return regmap_update_bits(priv->regmap, CS42L43_GPIO_CTRL2,
+				  CS42L43_GPIO1_DEGLITCH_BYP_MASK << pin,
+				  !!us << pin);
+}
+
+static int cs42l43_pin_config_get(struct pinctrl_dev *pctldev,
+				  unsigned int pin, unsigned long *config)
+{
+	struct cs42l43_pin *priv = pinctrl_dev_get_drvdata(pctldev);
+	unsigned int param = pinconf_to_config_param(*config);
+	int ret;
+
+	switch (param) {
+	case PIN_CONFIG_DRIVE_STRENGTH:
+		ret = cs42l43_pin_get_drv_str(priv, pin);
+		if (ret < 0)
+			return ret;
+		break;
+	case PIN_CONFIG_INPUT_DEBOUNCE:
+		ret = cs42l43_pin_get_db(priv, pin);
+		if (ret < 0)
+			return ret;
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	*config = pinconf_to_config_packed(param, ret);
+
+	return 0;
+}
+
+static int cs42l43_pin_config_set(struct pinctrl_dev *pctldev, unsigned int pin,
+				  unsigned long *configs, unsigned int num_configs)
+{
+	struct cs42l43_pin *priv = pinctrl_dev_get_drvdata(pctldev);
+	unsigned int val;
+	int ret;
+
+	while (num_configs) {
+		val = pinconf_to_config_argument(*configs);
+
+		switch (pinconf_to_config_param(*configs)) {
+		case PIN_CONFIG_DRIVE_STRENGTH:
+			ret = cs42l43_pin_set_drv_str(priv, pin, val);
+			if (ret)
+				return ret;
+			break;
+		case PIN_CONFIG_INPUT_DEBOUNCE:
+			ret = cs42l43_pin_set_db(priv, pin, val);
+			if (ret)
+				return ret;
+			break;
+		default:
+			return -ENOTSUPP;
+		}
+
+		configs++;
+		num_configs--;
+	}
+
+	return 0;
+}
+
+static int cs42l43_pin_config_group_get(struct pinctrl_dev *pctldev,
+					unsigned int selector, unsigned long *config)
+{
+	int i, ret;
+
+	for (i = 0; i < cs42l43_pin_groups[selector].npins; ++i) {
+		ret = cs42l43_pin_config_get(pctldev,
+					     cs42l43_pin_groups[selector].pins[i],
+					     config);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int cs42l43_pin_config_group_set(struct pinctrl_dev *pctldev,
+					unsigned int selector,
+					unsigned long *configs,
+					unsigned int num_configs)
+{
+	int i, ret;
+
+	for (i = 0; i < cs42l43_pin_groups[selector].npins; ++i) {
+		ret = cs42l43_pin_config_set(pctldev,
+					     cs42l43_pin_groups[selector].pins[i],
+					     configs, num_configs);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static const struct pinconf_ops cs42l43_pin_conf_ops = {
+	.is_generic		= true,
+
+	.pin_config_get		= cs42l43_pin_config_get,
+	.pin_config_set		= cs42l43_pin_config_set,
+	.pin_config_group_get	= cs42l43_pin_config_group_get,
+	.pin_config_group_set	= cs42l43_pin_config_group_set,
+};
+
+static struct pinctrl_desc cs42l43_pin_desc = {
+	.name		= "cs42l43-pinctrl",
+	.owner		= THIS_MODULE,
+
+	.pins		= cs42l43_pin_pins,
+	.npins		= ARRAY_SIZE(cs42l43_pin_pins),
+
+	.pctlops	= &cs42l43_pin_group_ops,
+	.pmxops		= &cs42l43_pin_mux_ops,
+	.confops	= &cs42l43_pin_conf_ops,
+};
+
+static int cs42l43_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+	struct cs42l43_pin *priv = gpiochip_get_data(chip);
+	unsigned int val;
+	int ret;
+
+	ret = pm_runtime_resume_and_get(priv->dev);
+	if (ret) {
+		dev_err(priv->dev, "Failed to resume for get: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_read(priv->regmap, CS42L43_GPIO_STS, &val);
+	if (ret)
+		dev_err(priv->dev, "Failed to get gpio%d: %d\n", offset + 1, ret);
+	else
+		ret = !!(val & BIT(offset + CS42L43_GPIO1_STS_SHIFT));
+
+	pm_runtime_put(priv->dev);
+
+	return ret;
+}
+
+static void cs42l43_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
+{
+	struct cs42l43_pin *priv = gpiochip_get_data(chip);
+	unsigned int shift = offset + CS42L43_GPIO1_LVL_SHIFT;
+	int ret;
+
+	dev_dbg(priv->dev, "Setting gpio%d to %s\n",
+		offset + 1, value ? "high" : "low");
+
+	ret = pm_runtime_resume_and_get(priv->dev);
+	if (ret) {
+		dev_err(priv->dev, "Failed to resume for set: %d\n", ret);
+		return;
+	}
+
+	ret = regmap_update_bits(priv->regmap, CS42L43_GPIO_CTRL1,
+				 BIT(shift), value << shift);
+	if (ret)
+		dev_err(priv->dev, "Failed to set gpio%d: %d\n", offset + 1, ret);
+
+	pm_runtime_put(priv->dev);
+}
+
+static int cs42l43_gpio_direction_in(struct gpio_chip *chip, unsigned int offset)
+{
+	return pinctrl_gpio_direction_input(chip->base + offset);
+}
+
+static int cs42l43_gpio_direction_out(struct gpio_chip *chip,
+				      unsigned int offset, int value)
+{
+	cs42l43_gpio_set(chip, offset, value);
+
+	return pinctrl_gpio_direction_output(chip->base + offset);
+}
+
+static int cs42l43_gpio_add_pin_ranges(struct gpio_chip *chip)
+{
+	struct cs42l43_pin *priv = gpiochip_get_data(chip);
+	int ret;
+
+	ret = gpiochip_add_pin_range(&priv->gpio_chip, priv->gpio_chip.label,
+				     0, 0, CS42L43_NUM_GPIOS);
+	if (ret)
+		dev_err(priv->dev, "Failed to add GPIO pin range: %d\n", ret);
+
+	return ret;
+}
+
+static int cs42l43_pin_probe(struct platform_device *pdev)
+{
+	struct cs42l43 *cs42l43 = dev_get_drvdata(pdev->dev.parent);
+	struct cs42l43_pin *priv;
+	struct pinctrl_dev *pctldev;
+	struct fwnode_handle *fwnode = dev_fwnode(cs42l43->dev);
+	int ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->dev = &pdev->dev;
+	priv->regmap = cs42l43->regmap;
+
+	priv->shutters_locked = cs42l43->hw_lock;
+
+	priv->gpio_chip.request = gpiochip_generic_request;
+	priv->gpio_chip.free = gpiochip_generic_free;
+	priv->gpio_chip.direction_input = cs42l43_gpio_direction_in;
+	priv->gpio_chip.direction_output = cs42l43_gpio_direction_out;
+	priv->gpio_chip.add_pin_ranges = cs42l43_gpio_add_pin_ranges;
+	priv->gpio_chip.get = cs42l43_gpio_get;
+	priv->gpio_chip.set = cs42l43_gpio_set;
+	priv->gpio_chip.label = dev_name(priv->dev);
+	priv->gpio_chip.parent = priv->dev;
+	priv->gpio_chip.can_sleep = true;
+	priv->gpio_chip.base = -1;
+	priv->gpio_chip.ngpio = CS42L43_NUM_GPIOS;
+
+	if (is_of_node(fwnode)) {
+		fwnode = fwnode_get_named_child_node(fwnode, "pinctrl");
+
+		if (fwnode && !fwnode->dev)
+			fwnode->dev = priv->dev;
+	}
+
+	priv->gpio_chip.fwnode = fwnode;
+
+	device_set_node(priv->dev, fwnode);
+
+	devm_pm_runtime_enable(priv->dev);
+	pm_runtime_idle(priv->dev);
+
+	pctldev = devm_pinctrl_register(priv->dev, &cs42l43_pin_desc, priv);
+	if (IS_ERR(pctldev))
+		return dev_err_probe(priv->dev, PTR_ERR(pctldev),
+				     "Failed to register pinctrl\n");
+
+	ret = devm_gpiochip_add_data(priv->dev, &priv->gpio_chip, priv);
+	if (ret)
+		return dev_err_probe(priv->dev, ret,
+				     "Failed to register gpiochip\n");
+
+	return 0;
+}
+
+static const struct platform_device_id cs42l43_pin_id_table[] = {
+	{ "cs42l43-pinctrl", },
+	{}
+};
+MODULE_DEVICE_TABLE(platform, cs42l43_pin_id_table);
+
+static struct platform_driver cs42l43_pin_driver = {
+	.driver = {
+		.name	= "cs42l43-pinctrl",
+	},
+	.probe		= cs42l43_pin_probe,
+	.id_table	= cs42l43_pin_id_table,
+};
+module_platform_driver(cs42l43_pin_driver);
+
+MODULE_DESCRIPTION("CS42L43 Pinctrl Driver");
+MODULE_AUTHOR("Charles Keepax <ckeepax@opensource.cirrus.com>");
+MODULE_LICENSE("GPL");

From 897a6b5a030e62c21566551c870d81740f82ca13 Mon Sep 17 00:00:00 2001
From: Maciej Strozek <mstrozek@opensource.cirrus.com>
Date: Thu, 17 Aug 2023 12:27:11 +0100
Subject: [PATCH 1262/2047] ASoC: cs35l56: Read firmware uuid from a device
 property instead of _SUB

Use a device property "cirrus,firmware-uid" to get the unique firmware
identifier instead of using ACPI _SUB. There aren't any products that use
_SUB.

There will not usually be a _SUB in Soundwire nodes. The ACPI can use a
_DSD section for custom properties.

There is also a need to support instantiating this driver using software
nodes. This is for systems where the CS35L56 is a back-end device and the
ACPI refers only to the front-end audio device - there will not be any ACPI
references to CS35L56.

Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56")
Signed-off-by: Maciej Strozek <mstrozek@opensource.cirrus.com>
Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20230817112712.16637-2-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs35l56.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c
index c03f9d3c9a13..fd06b9f9d496 100644
--- a/sound/soc/codecs/cs35l56.c
+++ b/sound/soc/codecs/cs35l56.c
@@ -5,7 +5,6 @@
 // Copyright (C) 2023 Cirrus Logic, Inc. and
 //                    Cirrus Logic International Semiconductor Ltd.
 
-#include <linux/acpi.h>
 #include <linux/completion.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
@@ -1354,26 +1353,22 @@ static int cs35l56_dsp_init(struct cs35l56_private *cs35l56)
 	return 0;
 }
 
-static int cs35l56_acpi_get_name(struct cs35l56_private *cs35l56)
+static int cs35l56_get_firmware_uid(struct cs35l56_private *cs35l56)
 {
-	acpi_handle handle = ACPI_HANDLE(cs35l56->dev);
-	const char *sub;
+	struct device *dev = cs35l56->dev;
+	const char *prop;
+	int ret;
 
-	/* If there is no ACPI_HANDLE, there is no ACPI for this system, return 0 */
-	if (!handle)
+	ret = device_property_read_string(dev, "cirrus,firmware-uid", &prop);
+	/* If bad sw node property, return 0 and fallback to legacy firmware path */
+	if (ret < 0)
 		return 0;
 
-	sub = acpi_get_subsystem_id(handle);
-	if (IS_ERR(sub)) {
-		/* If bad ACPI, return 0 and fallback to legacy firmware path, otherwise fail */
-		if (PTR_ERR(sub) == -ENODATA)
-			return 0;
-		else
-			return PTR_ERR(sub);
-	}
+	cs35l56->dsp.system_name = devm_kstrdup(dev, prop, GFP_KERNEL);
+	if (cs35l56->dsp.system_name == NULL)
+		return -ENOMEM;
 
-	cs35l56->dsp.system_name = sub;
-	dev_dbg(cs35l56->dev, "Subsystem ID: %s\n", cs35l56->dsp.system_name);
+	dev_dbg(dev, "Firmware UID: %s\n", cs35l56->dsp.system_name);
 
 	return 0;
 }
@@ -1417,7 +1412,7 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56)
 		gpiod_set_value_cansleep(cs35l56->reset_gpio, 1);
 	}
 
-	ret = cs35l56_acpi_get_name(cs35l56);
+	ret = cs35l56_get_firmware_uid(cs35l56);
 	if (ret != 0)
 		goto err;
 
@@ -1604,8 +1599,6 @@ void cs35l56_remove(struct cs35l56_private *cs35l56)
 
 	regcache_cache_only(cs35l56->regmap, true);
 
-	kfree(cs35l56->dsp.system_name);
-
 	gpiod_set_value_cansleep(cs35l56->reset_gpio, 0);
 	regulator_bulk_disable(ARRAY_SIZE(cs35l56->supplies), cs35l56->supplies);
 }

From e8500a70270334b9abad72fea504ef38a2952274 Mon Sep 17 00:00:00 2001
From: Simon Trimmer <simont@opensource.cirrus.com>
Date: Thu, 17 Aug 2023 12:27:12 +0100
Subject: [PATCH 1263/2047] ASoC: cs35l56: Add an ACPI match table

An ACPI ID has been allocated for CS35L56 ASoC devices so that they can
be instantiated from ACPI Device entries.

Signed-off-by: Simon Trimmer <simont@opensource.cirrus.com>
Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20230817112712.16637-3-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs35l56-i2c.c | 9 +++++++++
 sound/soc/codecs/cs35l56-spi.c | 9 +++++++++
 2 files changed, 18 insertions(+)

diff --git a/sound/soc/codecs/cs35l56-i2c.c b/sound/soc/codecs/cs35l56-i2c.c
index ed2a41943d97..40666e6698ba 100644
--- a/sound/soc/codecs/cs35l56-i2c.c
+++ b/sound/soc/codecs/cs35l56-i2c.c
@@ -62,10 +62,19 @@ static const struct i2c_device_id cs35l56_id_i2c[] = {
 };
 MODULE_DEVICE_TABLE(i2c, cs35l56_id_i2c);
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id cs35l56_asoc_acpi_match[] = {
+	{ "CSC355C", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_asoc_acpi_match);
+#endif
+
 static struct i2c_driver cs35l56_i2c_driver = {
 	.driver = {
 		.name		= "cs35l56",
 		.pm = &cs35l56_pm_ops_i2c_spi,
+		.acpi_match_table = ACPI_PTR(cs35l56_asoc_acpi_match),
 	},
 	.id_table	= cs35l56_id_i2c,
 	.probe		= cs35l56_i2c_probe,
diff --git a/sound/soc/codecs/cs35l56-spi.c b/sound/soc/codecs/cs35l56-spi.c
index 996aab10500e..302f9c47407a 100644
--- a/sound/soc/codecs/cs35l56-spi.c
+++ b/sound/soc/codecs/cs35l56-spi.c
@@ -59,10 +59,19 @@ static const struct spi_device_id cs35l56_id_spi[] = {
 };
 MODULE_DEVICE_TABLE(spi, cs35l56_id_spi);
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id cs35l56_asoc_acpi_match[] = {
+	{ "CSC355C", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_asoc_acpi_match);
+#endif
+
 static struct spi_driver cs35l56_spi_driver = {
 	.driver = {
 		.name		= "cs35l56",
 		.pm = &cs35l56_pm_ops_i2c_spi,
+		.acpi_match_table = ACPI_PTR(cs35l56_asoc_acpi_match),
 	},
 	.id_table	= cs35l56_id_spi,
 	.probe		= cs35l56_spi_probe,

From 3d521f9f3663ba7a22e56d339c6632f0ca787371 Mon Sep 17 00:00:00 2001
From: Shenghao Ding <shenghao-ding@ti.com>
Date: Thu, 17 Aug 2023 17:32:56 +0800
Subject: [PATCH 1264/2047] ASoC: tas2781: fixed register access error when
 switching to other chips

fixed register access error when switching to other tas2781 -- refresh the page
inside regmap on the switched tas2781

Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
Link: https://lore.kernel.org/r/20230817093257.951-1-shenghao-ding@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/tas2781-comlib.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/sound/soc/codecs/tas2781-comlib.c b/sound/soc/codecs/tas2781-comlib.c
index a88c6c28a394..ffb26e4a7e2f 100644
--- a/sound/soc/codecs/tas2781-comlib.c
+++ b/sound/soc/codecs/tas2781-comlib.c
@@ -57,16 +57,17 @@ static int tasdevice_change_chn_book(struct tasdevice_priv *tas_priv,
 
 		if (client->addr != tasdev->dev_addr) {
 			client->addr = tasdev->dev_addr;
-			if (tasdev->cur_book == book) {
-				ret = regmap_write(map,
-					TASDEVICE_PAGE_SELECT, 0);
-				if (ret < 0) {
-					dev_err(tas_priv->dev, "%s, E=%d\n",
-						__func__, ret);
-					goto out;
-				}
+			/* All tas2781s share the same regmap, clear the page
+			 * inside regmap once switching to another tas2781.
+			 * Register 0 at any pages and any books inside tas2781
+			 * is the same one for page-switching.
+			 */
+			ret = regmap_write(map, TASDEVICE_PAGE_SELECT, 0);
+			if (ret < 0) {
+				dev_err(tas_priv->dev, "%s, E=%d\n",
+					__func__, ret);
+				goto out;
 			}
-			goto out;
 		}
 
 		if (tasdev->cur_book != book) {

From 37a08f010b7c423b5e4c9ed3b187d21166553007 Mon Sep 17 00:00:00 2001
From: Holger Dengler <dengler@linux.ibm.com>
Date: Wed, 26 Jul 2023 11:33:45 +0200
Subject: [PATCH 1265/2047] s390/pkey: fix/harmonize internal keyblob headers

Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC
private keys")' introduced PKEY_TYPE_EP11_AES as a supplement to
PKEY_TYPE_EP11. All pkeys have an internal header/payload structure,
which is opaque to the userspace. The header structures for
PKEY_TYPE_EP11 and PKEY_TYPE_EP11_AES are nearly identical and there
is no reason, why different structures are used. In preparation to fix
the keyversion handling in the broken PKEY IOCTLs, the same header
structure is used for PKEY_TYPE_EP11 and PKEY_TYPE_EP11_AES. This
reduces the number of different code paths and increases the
readability.

Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")
Signed-off-by: Holger Dengler <dengler@linux.ibm.com>
Reviewed-by: Ingo Franzki <ifranzki@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/pkey_api.c        | 2 +-
 drivers/s390/crypto/zcrypt_ep11misc.c | 4 ++--
 drivers/s390/crypto/zcrypt_ep11misc.h | 9 +--------
 3 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index e58bfd225323..ba8581e0809c 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -895,7 +895,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
 		if (ktype)
 			*ktype = PKEY_TYPE_EP11;
 		if (ksize)
-			*ksize = kb->head.keybitlen;
+			*ksize = kb->head.bitlen;
 
 		rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain,
 				    ZCRYPT_CEX7, EP11_API_V, kb->wkvp);
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c
index 958f5ee47f1b..d7ecd6ce5b7a 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.c
+++ b/drivers/s390/crypto/zcrypt_ep11misc.c
@@ -787,7 +787,7 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
 	kb->head.type = TOKTYPE_NON_CCA;
 	kb->head.len = rep_pl->data_len;
 	kb->head.version = TOKVER_EP11_AES;
-	kb->head.keybitlen = keybitsize;
+	kb->head.bitlen = keybitsize;
 
 out:
 	kfree(req);
@@ -1055,7 +1055,7 @@ static int ep11_unwrapkey(u16 card, u16 domain,
 	kb->head.type = TOKTYPE_NON_CCA;
 	kb->head.len = rep_pl->data_len;
 	kb->head.version = TOKVER_EP11_AES;
-	kb->head.keybitlen = keybitsize;
+	kb->head.bitlen = keybitsize;
 
 out:
 	kfree(req);
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h
index a3eddf51242d..67cc80d71ba3 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.h
+++ b/drivers/s390/crypto/zcrypt_ep11misc.h
@@ -29,14 +29,7 @@ struct ep11keyblob {
 	union {
 		u8 session[32];
 		/* only used for PKEY_TYPE_EP11: */
-		struct {
-			u8  type;      /* 0x00 (TOKTYPE_NON_CCA) */
-			u8  res0;      /* unused */
-			u16 len;       /* total length in bytes of this blob */
-			u8  version;   /* 0x03 (TOKVER_EP11_AES) */
-			u8  res1;      /* unused */
-			u16 keybitlen; /* clear key bit len, 0 for unknown */
-		} head;
+		struct ep11kblob_header head;
 	};
 	u8  wkvp[16];  /* wrapping key verification pattern */
 	u64 attr;      /* boolean key attributes */

From fb249ce7f7bfd8621a38e4ad401ba74b680786d4 Mon Sep 17 00:00:00 2001
From: Holger Dengler <dengler@linux.ibm.com>
Date: Tue, 25 Jul 2023 09:49:55 +0200
Subject: [PATCH 1266/2047] s390/pkey: fix PKEY_TYPE_EP11_AES handling in
 PKEY_GENSECK2 IOCTL

Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC
private keys")' introduced PKEY_TYPE_EP11_AES for the PKEY_GENSECK2
IOCTL, to enable userspace to generate securekey blobs of this
type. Unfortunately, all PKEY_GENSECK2 IOCTL requests for
PKEY_TYPE_EP11_AES return with an error (-EINVAL). Fix the handling
for PKEY_TYPE_EP11_AES in PKEY_GENSECK2 IOCTL, so that userspace can
generate securekey blobs of this type.

The start of the header and the keyblob, as well as the length need
special handling, depending on the internal keyversion. Add a helper
function that splits an uninitialized buffer into start and size of
the header as well as start and size of the payload, depending on the
requested keyversion.

Do the header-related calculations and the raw genkey request handling
in separate functions. Use the raw genkey request function for
internal purposes.

Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")
Signed-off-by: Holger Dengler <dengler@linux.ibm.com>
Reviewed-by: Ingo Franzki <ifranzki@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/pkey_api.c        |  18 +++--
 drivers/s390/crypto/zcrypt_ep11misc.c | 103 ++++++++++++++++++++++----
 drivers/s390/crypto/zcrypt_ep11misc.h |   2 +-
 3 files changed, 102 insertions(+), 21 deletions(-)

diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index ba8581e0809c..2661d6a9ea13 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -713,6 +713,11 @@ static int pkey_genseckey2(const struct pkey_apqn *apqns, size_t nr_apqns,
 		if (*keybufsize < MINEP11AESKEYBLOBSIZE)
 			return -EINVAL;
 		break;
+	case PKEY_TYPE_EP11_AES:
+		if (*keybufsize < (sizeof(struct ep11kblob_header) +
+				   MINEP11AESKEYBLOBSIZE))
+			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -729,9 +734,10 @@ static int pkey_genseckey2(const struct pkey_apqn *apqns, size_t nr_apqns,
 	for (i = 0, rc = -ENODEV; i < nr_apqns; i++) {
 		card = apqns[i].card;
 		dom = apqns[i].domain;
-		if (ktype == PKEY_TYPE_EP11) {
+		if (ktype == PKEY_TYPE_EP11 ||
+		    ktype == PKEY_TYPE_EP11_AES) {
 			rc = ep11_genaeskey(card, dom, ksize, kflags,
-					    keybuf, keybufsize);
+					    keybuf, keybufsize, ktype);
 		} else if (ktype == PKEY_TYPE_CCA_DATA) {
 			rc = cca_genseckey(card, dom, ksize, keybuf);
 			*keybufsize = (rc ? 0 : SECKEYBLOBSIZE);
@@ -1466,7 +1472,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
 		apqns = _copy_apqns_from_user(kgs.apqns, kgs.apqn_entries);
 		if (IS_ERR(apqns))
 			return PTR_ERR(apqns);
-		kkey = kmalloc(klen, GFP_KERNEL);
+		kkey = kzalloc(klen, GFP_KERNEL);
 		if (!kkey) {
 			kfree(apqns);
 			return -ENOMEM;
@@ -2130,7 +2136,8 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits,
 	for (i = 0, rc = -ENODEV; i < nr_apqns; i++) {
 		card = apqns[i] >> 16;
 		dom = apqns[i] & 0xFFFF;
-		rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize);
+		rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize,
+				    PKEY_TYPE_EP11);
 		if (rc == 0)
 			break;
 	}
@@ -2140,7 +2147,8 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits,
 	if (is_xts) {
 		keysize = MAXEP11AESKEYBLOBSIZE;
 		buf += MAXEP11AESKEYBLOBSIZE;
-		rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize);
+		rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize,
+				    PKEY_TYPE_EP11);
 		if (rc == 0)
 			return 2 * MAXEP11AESKEYBLOBSIZE;
 	}
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c
index d7ecd6ce5b7a..51f6753e01c5 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.c
+++ b/drivers/s390/crypto/zcrypt_ep11misc.c
@@ -113,6 +113,50 @@ static void __exit card_cache_free(void)
 	spin_unlock_bh(&card_list_lock);
 }
 
+static int ep11_kb_split(const u8 *kb, size_t kblen, u32 kbver,
+			 struct ep11kblob_header **kbhdr, size_t *kbhdrsize,
+			 u8 **kbpl, size_t *kbplsize)
+{
+	struct ep11kblob_header *hdr = NULL;
+	size_t hdrsize, plsize = 0;
+	int rc = -EINVAL;
+	u8 *pl = NULL;
+
+	if (kblen < sizeof(struct ep11kblob_header))
+		goto out;
+	hdr = (struct ep11kblob_header *)kb;
+
+	switch (kbver) {
+	case TOKVER_EP11_AES:
+		/* header overlays the payload */
+		hdrsize = 0;
+		break;
+	case TOKVER_EP11_ECC_WITH_HEADER:
+	case TOKVER_EP11_AES_WITH_HEADER:
+		/* payload starts after the header */
+		hdrsize = sizeof(struct ep11kblob_header);
+		break;
+	default:
+		goto out;
+	}
+
+	plsize = kblen - hdrsize;
+	pl = (u8 *)kb + hdrsize;
+
+	if (kbhdr)
+		*kbhdr = hdr;
+	if (kbhdrsize)
+		*kbhdrsize = hdrsize;
+	if (kbpl)
+		*kbpl = pl;
+	if (kbplsize)
+		*kbplsize = plsize;
+
+	rc = 0;
+out:
+	return rc;
+}
+
 /*
  * Simple check if the key blob is a valid EP11 AES key blob with header.
  */
@@ -664,8 +708,9 @@ EXPORT_SYMBOL(ep11_get_domain_info);
  */
 #define KEY_ATTR_DEFAULTS 0x00200c00
 
-int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
-		   u8 *keybuf, size_t *keybufsize)
+static int _ep11_genaeskey(u16 card, u16 domain,
+			   u32 keybitsize, u32 keygenflags,
+			   u8 *keybuf, size_t *keybufsize)
 {
 	struct keygen_req_pl {
 		struct pl_head head;
@@ -701,7 +746,6 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
 	struct ep11_cprb *req = NULL, *rep = NULL;
 	struct ep11_target_dev target;
 	struct ep11_urb *urb = NULL;
-	struct ep11keyblob *kb;
 	int api, rc = -ENOMEM;
 
 	switch (keybitsize) {
@@ -780,14 +824,9 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
 		goto out;
 	}
 
-	/* copy key blob and set header values */
+	/* copy key blob */
 	memcpy(keybuf, rep_pl->data, rep_pl->data_len);
 	*keybufsize = rep_pl->data_len;
-	kb = (struct ep11keyblob *)keybuf;
-	kb->head.type = TOKTYPE_NON_CCA;
-	kb->head.len = rep_pl->data_len;
-	kb->head.version = TOKVER_EP11_AES;
-	kb->head.bitlen = keybitsize;
 
 out:
 	kfree(req);
@@ -795,6 +834,43 @@ out:
 	kfree(urb);
 	return rc;
 }
+
+int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
+		   u8 *keybuf, size_t *keybufsize, u32 keybufver)
+{
+	struct ep11kblob_header *hdr;
+	size_t hdr_size, pl_size;
+	u8 *pl;
+	int rc;
+
+	switch (keybufver) {
+	case TOKVER_EP11_AES:
+	case TOKVER_EP11_AES_WITH_HEADER:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rc = ep11_kb_split(keybuf, *keybufsize, keybufver,
+			   &hdr, &hdr_size, &pl, &pl_size);
+	if (rc)
+		return rc;
+
+	rc = _ep11_genaeskey(card, domain, keybitsize, keygenflags,
+			     pl, &pl_size);
+	if (rc)
+		return rc;
+
+	*keybufsize = hdr_size + pl_size;
+
+	/* update header information */
+	hdr->type = TOKTYPE_NON_CCA;
+	hdr->len = *keybufsize;
+	hdr->version = keybufver;
+	hdr->bitlen = keybitsize;
+
+	return 0;
+}
 EXPORT_SYMBOL(ep11_genaeskey);
 
 static int ep11_cryptsingle(u16 card, u16 domain,
@@ -1201,7 +1277,6 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
 		     const u8 *clrkey, u8 *keybuf, size_t *keybufsize)
 {
 	int rc;
-	struct ep11keyblob *kb;
 	u8 encbuf[64], *kek = NULL;
 	size_t clrkeylen, keklen, encbuflen = sizeof(encbuf);
 
@@ -1223,17 +1298,15 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
 	}
 
 	/* Step 1: generate AES 256 bit random kek key */
-	rc = ep11_genaeskey(card, domain, 256,
-			    0x00006c00, /* EN/DECRYPT, WRAP/UNWRAP */
-			    kek, &keklen);
+	rc = _ep11_genaeskey(card, domain, 256,
+			     0x00006c00, /* EN/DECRYPT, WRAP/UNWRAP */
+			     kek, &keklen);
 	if (rc) {
 		DEBUG_ERR(
 			"%s generate kek key failed, rc=%d\n",
 			__func__, rc);
 		goto out;
 	}
-	kb = (struct ep11keyblob *)kek;
-	memset(&kb->head, 0, sizeof(kb->head));
 
 	/* Step 2: encrypt clear key value with the kek key */
 	rc = ep11_cryptsingle(card, domain, 0, 0, def_iv, kek, keklen,
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h
index 67cc80d71ba3..2eecbd7be6e5 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.h
+++ b/drivers/s390/crypto/zcrypt_ep11misc.h
@@ -107,7 +107,7 @@ int ep11_get_domain_info(u16 card, u16 domain, struct ep11_domain_info *info);
  * Generate (random) EP11 AES secure key.
  */
 int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
-		   u8 *keybuf, size_t *keybufsize);
+		   u8 *keybuf, size_t *keybufsize, u32 keybufver);
 
 /*
  * Generate EP11 AES secure key with given clear key value.

From da2863f15945de100b95c72d5656541d30956c5d Mon Sep 17 00:00:00 2001
From: Holger Dengler <dengler@linux.ibm.com>
Date: Tue, 25 Jul 2023 11:24:47 +0200
Subject: [PATCH 1267/2047] s390/pkey: fix PKEY_TYPE_EP11_AES handling in
 PKEY_CLR2SECK2 IOCTL

Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC
private keys")' introduced PKEY_TYPE_EP11_AES for the PKEY_CLR2SECK2
IOCTL to convert an AES clearkey into a securekey of this type.
Unfortunately, all PKEY_CLR2SECK2 IOCTL requests with type
PKEY_TYPE_EP11_AES return with an error (-EINVAL). Fix the handling
for PKEY_TYPE_EP11_AES in PKEY_CLR2SECK2 IOCTL, so that userspace can
convert clearkey blobs into PKEY_TYPE_EP11_AES securekey blobs.

Cc: stable@vger.kernel.org # v5.10+
Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")
Signed-off-by: Holger Dengler <dengler@linux.ibm.com>
Reviewed-by: Ingo Franzki <ifranzki@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/pkey_api.c        | 16 +++++--
 drivers/s390/crypto/zcrypt_ep11misc.c | 61 ++++++++++++++++++++-------
 drivers/s390/crypto/zcrypt_ep11misc.h |  3 +-
 3 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 2661d6a9ea13..7543757c82e2 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -272,7 +272,8 @@ static int pkey_clr2ep11key(const u8 *clrkey, size_t clrkeylen,
 		card = apqns[i] >> 16;
 		dom = apqns[i] & 0xFFFF;
 		rc = ep11_clr2keyblob(card, dom, clrkeylen * 8,
-				      0, clrkey, keybuf, keybuflen);
+				      0, clrkey, keybuf, keybuflen,
+				      PKEY_TYPE_EP11);
 		if (rc == 0)
 			break;
 	}
@@ -775,6 +776,11 @@ static int pkey_clr2seckey2(const struct pkey_apqn *apqns, size_t nr_apqns,
 		if (*keybufsize < MINEP11AESKEYBLOBSIZE)
 			return -EINVAL;
 		break;
+	case PKEY_TYPE_EP11_AES:
+		if (*keybufsize < (sizeof(struct ep11kblob_header) +
+				   MINEP11AESKEYBLOBSIZE))
+			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -793,9 +799,11 @@ static int pkey_clr2seckey2(const struct pkey_apqn *apqns, size_t nr_apqns,
 	for (i = 0, rc = -ENODEV; i < nr_apqns; i++) {
 		card = apqns[i].card;
 		dom = apqns[i].domain;
-		if (ktype == PKEY_TYPE_EP11) {
+		if (ktype == PKEY_TYPE_EP11 ||
+		    ktype == PKEY_TYPE_EP11_AES) {
 			rc = ep11_clr2keyblob(card, dom, ksize, kflags,
-					      clrkey, keybuf, keybufsize);
+					      clrkey, keybuf, keybufsize,
+					      ktype);
 		} else if (ktype == PKEY_TYPE_CCA_DATA) {
 			rc = cca_clr2seckey(card, dom, ksize,
 					    clrkey, keybuf);
@@ -1514,7 +1522,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
 		apqns = _copy_apqns_from_user(kcs.apqns, kcs.apqn_entries);
 		if (IS_ERR(apqns))
 			return PTR_ERR(apqns);
-		kkey = kmalloc(klen, GFP_KERNEL);
+		kkey = kzalloc(klen, GFP_KERNEL);
 		if (!kkey) {
 			kfree(apqns);
 			return -ENOMEM;
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c
index 51f6753e01c5..355d30bc0aac 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.c
+++ b/drivers/s390/crypto/zcrypt_ep11misc.c
@@ -1000,12 +1000,12 @@ out:
 	return rc;
 }
 
-static int ep11_unwrapkey(u16 card, u16 domain,
-			  const u8 *kek, size_t keksize,
-			  const u8 *enckey, size_t enckeysize,
-			  u32 mech, const u8 *iv,
-			  u32 keybitsize, u32 keygenflags,
-			  u8 *keybuf, size_t *keybufsize)
+static int _ep11_unwrapkey(u16 card, u16 domain,
+			   const u8 *kek, size_t keksize,
+			   const u8 *enckey, size_t enckeysize,
+			   u32 mech, const u8 *iv,
+			   u32 keybitsize, u32 keygenflags,
+			   u8 *keybuf, size_t *keybufsize)
 {
 	struct uw_req_pl {
 		struct pl_head head;
@@ -1042,7 +1042,6 @@ static int ep11_unwrapkey(u16 card, u16 domain,
 	struct ep11_cprb *req = NULL, *rep = NULL;
 	struct ep11_target_dev target;
 	struct ep11_urb *urb = NULL;
-	struct ep11keyblob *kb;
 	size_t req_pl_size;
 	int api, rc = -ENOMEM;
 	u8 *p;
@@ -1124,14 +1123,9 @@ static int ep11_unwrapkey(u16 card, u16 domain,
 		goto out;
 	}
 
-	/* copy key blob and set header values */
+	/* copy key blob */
 	memcpy(keybuf, rep_pl->data, rep_pl->data_len);
 	*keybufsize = rep_pl->data_len;
-	kb = (struct ep11keyblob *)keybuf;
-	kb->head.type = TOKTYPE_NON_CCA;
-	kb->head.len = rep_pl->data_len;
-	kb->head.version = TOKVER_EP11_AES;
-	kb->head.bitlen = keybitsize;
 
 out:
 	kfree(req);
@@ -1140,6 +1134,42 @@ out:
 	return rc;
 }
 
+static int ep11_unwrapkey(u16 card, u16 domain,
+			  const u8 *kek, size_t keksize,
+			  const u8 *enckey, size_t enckeysize,
+			  u32 mech, const u8 *iv,
+			  u32 keybitsize, u32 keygenflags,
+			  u8 *keybuf, size_t *keybufsize,
+			  u8 keybufver)
+{
+	struct ep11kblob_header *hdr;
+	size_t hdr_size, pl_size;
+	u8 *pl;
+	int rc;
+
+	rc = ep11_kb_split(keybuf, *keybufsize, keybufver,
+			   &hdr, &hdr_size, &pl, &pl_size);
+	if (rc)
+		return rc;
+
+	rc = _ep11_unwrapkey(card, domain, kek, keksize, enckey, enckeysize,
+			     mech, iv, keybitsize, keygenflags,
+			     pl, &pl_size);
+	if (rc)
+		return rc;
+
+	*keybufsize = hdr_size + pl_size;
+
+	/* update header information */
+	hdr = (struct ep11kblob_header *)keybuf;
+	hdr->type = TOKTYPE_NON_CCA;
+	hdr->len = *keybufsize;
+	hdr->version = keybufver;
+	hdr->bitlen = keybitsize;
+
+	return 0;
+}
+
 static int ep11_wrapkey(u16 card, u16 domain,
 			const u8 *key, size_t keysize,
 			u32 mech, const u8 *iv,
@@ -1274,7 +1304,8 @@ out:
 }
 
 int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
-		     const u8 *clrkey, u8 *keybuf, size_t *keybufsize)
+		     const u8 *clrkey, u8 *keybuf, size_t *keybufsize,
+		     u32 keytype)
 {
 	int rc;
 	u8 encbuf[64], *kek = NULL;
@@ -1321,7 +1352,7 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
 	/* Step 3: import the encrypted key value as a new key */
 	rc = ep11_unwrapkey(card, domain, kek, keklen,
 			    encbuf, encbuflen, 0, def_iv,
-			    keybitsize, 0, keybuf, keybufsize);
+			    keybitsize, 0, keybuf, keybufsize, keytype);
 	if (rc) {
 		DEBUG_ERR(
 			"%s importing key value as new key failed,, rc=%d\n",
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h
index 2eecbd7be6e5..b611cf64231d 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.h
+++ b/drivers/s390/crypto/zcrypt_ep11misc.h
@@ -113,7 +113,8 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
  * Generate EP11 AES secure key with given clear key value.
  */
 int ep11_clr2keyblob(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
-		     const u8 *clrkey, u8 *keybuf, size_t *keybufsize);
+		     const u8 *clrkey, u8 *keybuf, size_t *keybufsize,
+		     u32 keytype);
 
 /*
  * Build a list of ep11 apqns meeting the following constrains:

From d1fdfb0b2f339cf882c0b5431084a1950b8b73b9 Mon Sep 17 00:00:00 2001
From: Holger Dengler <dengler@linux.ibm.com>
Date: Wed, 26 Jul 2023 16:22:19 +0200
Subject: [PATCH 1268/2047] s390/pkey: fix PKEY_TYPE_EP11_AES handling in
 PKEY_KBLOB2PROTK[23]

Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC
private keys")' introduced a new PKEY_TYPE_EP11_AES type for the
PKEY_KBLOB2PROTK2 and a new IOCTL, PKEY_KBLOB2PROTK3, which both
allows userspace to convert opaque securekey blobs of this type into
protectedkey blobs. Unfortunately, all PKEY_KBLOB2PROTK2 and
PKEY_KBLOB2PROTK3 IOCTL requests with this keyblobs of this type
return with an error (-EINVAL). Fix PKEY_TYPE_EP11_AES handling in
PKEY_KBLOB2PROTK2 and PKEY_KBLOB2PROTK3 IOCTLs, so that userspace can
convert PKEY_TYPE_EP11_AES keyblobs into protectedkey blobs.

Add a helper function to decode the start and size of the internal
header as well as start and size of the keyblob payload of an existing
keyblob. Also validate the length of header and keyblob, as well as
the keyblob magic.

Introduce another helper function, which handles a raw key wrapping
request and do the keyblob decoding in the calling function. Remove
all other header-related calculations.

Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")
Signed-off-by: Holger Dengler <dengler@linux.ibm.com>
Reviewed-by: Ingo Franzki <ifranzki@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/pkey_api.c        |  33 +++----
 drivers/s390/crypto/zcrypt_ep11misc.c | 123 ++++++++++++++++----------
 drivers/s390/crypto/zcrypt_ep11misc.h |   6 ++
 3 files changed, 100 insertions(+), 62 deletions(-)

diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 7543757c82e2..75d7f0d5f14e 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -288,10 +288,9 @@ out:
 /*
  * Find card and transform EP11 secure key into protected key.
  */
-static int pkey_ep11key2pkey(const u8 *key, u8 *protkey,
-			     u32 *protkeylen, u32 *protkeytype)
+static int pkey_ep11key2pkey(const u8 *key, size_t keylen,
+			     u8 *protkey, u32 *protkeylen, u32 *protkeytype)
 {
-	struct ep11keyblob *kb = (struct ep11keyblob *)key;
 	u32 nr_apqns, *apqns = NULL;
 	u16 card, dom;
 	int i, rc;
@@ -300,7 +299,8 @@ static int pkey_ep11key2pkey(const u8 *key, u8 *protkey,
 
 	/* build a list of apqns suitable for this key */
 	rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF,
-			    ZCRYPT_CEX7, EP11_API_V, kb->wkvp);
+			    ZCRYPT_CEX7, EP11_API_V,
+			    ep11_kb_wkvp(key, keylen));
 	if (rc)
 		goto out;
 
@@ -308,7 +308,7 @@ static int pkey_ep11key2pkey(const u8 *key, u8 *protkey,
 	for (rc = -ENODEV, i = 0; i < nr_apqns; i++) {
 		card = apqns[i] >> 16;
 		dom = apqns[i] & 0xFFFF;
-		rc = ep11_kblob2protkey(card, dom, key, kb->head.len,
+		rc = ep11_kblob2protkey(card, dom, key, keylen,
 					protkey, protkeylen, protkeytype);
 		if (rc == 0)
 			break;
@@ -496,7 +496,7 @@ try_via_ep11:
 			      tmpbuf, &tmpbuflen);
 	if (rc)
 		goto failure;
-	rc = pkey_ep11key2pkey(tmpbuf,
+	rc = pkey_ep11key2pkey(tmpbuf, tmpbuflen,
 			       protkey, protkeylen, protkeytype);
 	if (!rc)
 		goto out;
@@ -612,7 +612,7 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen,
 		rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1);
 		if (rc)
 			goto out;
-		rc = pkey_ep11key2pkey(key,
+		rc = pkey_ep11key2pkey(key, keylen,
 				       protkey, protkeylen, protkeytype);
 		break;
 	}
@@ -621,7 +621,7 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen,
 		rc = ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1);
 		if (rc)
 			goto out;
-		rc = pkey_ep11key2pkey(key + sizeof(struct ep11kblob_header),
+		rc = pkey_ep11key2pkey(key, keylen,
 				       protkey, protkeylen, protkeytype);
 		break;
 	default:
@@ -963,10 +963,12 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns,
 		}
 	} else if (hdr->type == TOKTYPE_NON_CCA) {
 		if (hdr->version == TOKVER_EP11_AES) {
-			if (keylen < sizeof(struct ep11keyblob))
-				return -EINVAL;
 			if (ep11_check_aes_key(debug_info, 3, key, keylen, 1))
 				return -EINVAL;
+		} else if (hdr->version == TOKVER_EP11_AES_WITH_HEADER) {
+			if (ep11_check_aes_key_with_hdr(debug_info, 3,
+							key, keylen, 1))
+				return -EINVAL;
 		} else {
 			return pkey_nonccatok2pkey(key, keylen,
 						   protkey, protkeylen,
@@ -994,10 +996,7 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns,
 						protkey, protkeylen,
 						protkeytype);
 		} else {
-			/* EP11 AES secure key blob */
-			struct ep11keyblob *kb = (struct ep11keyblob *)key;
-
-			rc = ep11_kblob2protkey(card, dom, key, kb->head.len,
+			rc = ep11_kblob2protkey(card, dom, key, keylen,
 						protkey, protkeylen,
 						protkeytype);
 		}
@@ -1257,12 +1256,14 @@ static int pkey_keyblob2pkey3(const struct pkey_apqn *apqns, size_t nr_apqns,
 		     hdr->version == TOKVER_EP11_ECC_WITH_HEADER) &&
 		    is_ep11_keyblob(key + sizeof(struct ep11kblob_header)))
 			rc = ep11_kblob2protkey(card, dom, key, hdr->len,
-						protkey, protkeylen, protkeytype);
+						protkey, protkeylen,
+						protkeytype);
 		else if (hdr->type == TOKTYPE_NON_CCA &&
 			 hdr->version == TOKVER_EP11_AES &&
 			 is_ep11_keyblob(key))
 			rc = ep11_kblob2protkey(card, dom, key, hdr->len,
-						protkey, protkeylen, protkeytype);
+						protkey, protkeylen,
+						protkeytype);
 		else if (hdr->type == TOKTYPE_CCA_INTERNAL &&
 			 hdr->version == TOKVER_CCA_AES)
 			rc = cca_sec2protkey(card, dom, key, protkey,
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c
index 355d30bc0aac..669ad6f5d5b0 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.c
+++ b/drivers/s390/crypto/zcrypt_ep11misc.c
@@ -157,6 +157,65 @@ out:
 	return rc;
 }
 
+static int ep11_kb_decode(const u8 *kb, size_t kblen,
+			  struct ep11kblob_header **kbhdr, size_t *kbhdrsize,
+			  struct ep11keyblob **kbpl, size_t *kbplsize)
+{
+	struct ep11kblob_header *tmph, *hdr = NULL;
+	size_t hdrsize = 0, plsize = 0;
+	struct ep11keyblob *pl = NULL;
+	int rc = -EINVAL;
+	u8 *tmpp;
+
+	if (kblen < sizeof(struct ep11kblob_header))
+		goto out;
+	tmph = (struct ep11kblob_header *)kb;
+
+	if (tmph->type != TOKTYPE_NON_CCA &&
+	    tmph->len > kblen)
+		goto out;
+
+	if (ep11_kb_split(kb, kblen, tmph->version,
+			  &hdr, &hdrsize, &tmpp, &plsize))
+		goto out;
+
+	if (plsize < sizeof(struct ep11keyblob))
+		goto out;
+
+	if (!is_ep11_keyblob(tmpp))
+		goto out;
+
+	pl = (struct ep11keyblob *)tmpp;
+	plsize = hdr->len - hdrsize;
+
+	if (kbhdr)
+		*kbhdr = hdr;
+	if (kbhdrsize)
+		*kbhdrsize = hdrsize;
+	if (kbpl)
+		*kbpl = pl;
+	if (kbplsize)
+		*kbplsize = plsize;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+/*
+ * For valid ep11 keyblobs, returns a reference to the wrappingkey verification
+ * pattern. Otherwise NULL.
+ */
+const u8 *ep11_kb_wkvp(const u8 *keyblob, size_t keybloblen)
+{
+	struct ep11keyblob *kb;
+
+	if (ep11_kb_decode(keyblob, keybloblen, NULL, NULL, &kb, NULL))
+		return NULL;
+	return kb->wkvp;
+}
+EXPORT_SYMBOL(ep11_kb_wkvp);
+
 /*
  * Simple check if the key blob is a valid EP11 AES key blob with header.
  */
@@ -1170,10 +1229,10 @@ static int ep11_unwrapkey(u16 card, u16 domain,
 	return 0;
 }
 
-static int ep11_wrapkey(u16 card, u16 domain,
-			const u8 *key, size_t keysize,
-			u32 mech, const u8 *iv,
-			u8 *databuf, size_t *datasize)
+static int _ep11_wrapkey(u16 card, u16 domain,
+			 const u8 *key, size_t keysize,
+			 u32 mech, const u8 *iv,
+			 u8 *databuf, size_t *datasize)
 {
 	struct wk_req_pl {
 		struct pl_head head;
@@ -1203,20 +1262,10 @@ static int ep11_wrapkey(u16 card, u16 domain,
 	struct ep11_cprb *req = NULL, *rep = NULL;
 	struct ep11_target_dev target;
 	struct ep11_urb *urb = NULL;
-	struct ep11keyblob *kb;
 	size_t req_pl_size;
 	int api, rc = -ENOMEM;
-	bool has_header = false;
 	u8 *p;
 
-	/* maybe the session field holds a header with key info */
-	kb = (struct ep11keyblob *)key;
-	if (kb->head.type == TOKTYPE_NON_CCA &&
-	    kb->head.version == TOKVER_EP11_AES) {
-		has_header = true;
-		keysize = min_t(size_t, kb->head.len, keysize);
-	}
-
 	/* request cprb and payload */
 	req_pl_size = sizeof(struct wk_req_pl) + (iv ? 16 : 0)
 		+ ASN1TAGLEN(keysize) + 4;
@@ -1241,11 +1290,6 @@ static int ep11_wrapkey(u16 card, u16 domain,
 	}
 	/* key blob */
 	p += asn1tag_write(p, 0x04, key, keysize);
-	/* maybe the key argument needs the head data cleaned out */
-	if (has_header) {
-		kb = (struct ep11keyblob *)(p - keysize);
-		memset(&kb->head, 0, sizeof(kb->head));
-	}
 	/* empty kek tag */
 	*p++ = 0x04;
 	*p++ = 0;
@@ -1366,11 +1410,12 @@ out:
 }
 EXPORT_SYMBOL(ep11_clr2keyblob);
 
-int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen,
+int ep11_kblob2protkey(u16 card, u16 dom,
+		       const u8 *keyblob, size_t keybloblen,
 		       u8 *protkey, u32 *protkeylen, u32 *protkeytype)
 {
-	int rc = -EIO;
-	u8 *wkbuf = NULL;
+	struct ep11kblob_header *hdr;
+	struct ep11keyblob *key;
 	size_t wkbuflen, keylen;
 	struct wk_info {
 		u16 version;
@@ -1381,31 +1426,17 @@ int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen,
 		u8  res2[8];
 		u8  pkey[];
 	} __packed * wki;
-	const u8 *key;
-	struct ep11kblob_header *hdr;
+	u8 *wkbuf = NULL;
+	int rc = -EIO;
 
-	/* key with or without header ? */
-	hdr = (struct ep11kblob_header *)keyblob;
-	if (hdr->type == TOKTYPE_NON_CCA &&
-	    (hdr->version == TOKVER_EP11_AES_WITH_HEADER ||
-	     hdr->version == TOKVER_EP11_ECC_WITH_HEADER) &&
-	    is_ep11_keyblob(keyblob + sizeof(struct ep11kblob_header))) {
-		/* EP11 AES or ECC key with header */
-		key = keyblob + sizeof(struct ep11kblob_header);
-		keylen = hdr->len - sizeof(struct ep11kblob_header);
-	} else if (hdr->type == TOKTYPE_NON_CCA &&
-		   hdr->version == TOKVER_EP11_AES &&
-		   is_ep11_keyblob(keyblob)) {
-		/* EP11 AES key (old style) */
-		key = keyblob;
-		keylen = hdr->len;
-	} else if (is_ep11_keyblob(keyblob)) {
-		/* raw EP11 key blob */
-		key = keyblob;
-		keylen = keybloblen;
-	} else {
+	if (ep11_kb_decode((u8 *)keyblob, keybloblen, &hdr, NULL, &key, &keylen))
 		return -EINVAL;
+
+	if (hdr->version == TOKVER_EP11_AES) {
+		/* wipe overlayed header */
+		memset(hdr, 0, sizeof(*hdr));
 	}
+	/* !!! hdr is no longer a valid header !!! */
 
 	/* alloc temp working buffer */
 	wkbuflen = (keylen + AES_BLOCK_SIZE) & (~(AES_BLOCK_SIZE - 1));
@@ -1414,8 +1445,8 @@ int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen,
 		return -ENOMEM;
 
 	/* ep11 secure key -> protected key + info */
-	rc = ep11_wrapkey(card, dom, key, keylen,
-			  0, def_iv, wkbuf, &wkbuflen);
+	rc = _ep11_wrapkey(card, dom, (u8 *)key, keylen,
+			   0, def_iv, wkbuf, &wkbuflen);
 	if (rc) {
 		DEBUG_ERR(
 			"%s rewrapping ep11 key to pkey failed, rc=%d\n",
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h
index b611cf64231d..a0de1cccebbe 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.h
+++ b/drivers/s390/crypto/zcrypt_ep11misc.h
@@ -48,6 +48,12 @@ static inline bool is_ep11_keyblob(const u8 *key)
 	return (kb->version == EP11_STRUCT_MAGIC);
 }
 
+/*
+ * For valid ep11 keyblobs, returns a reference to the wrappingkey verification
+ * pattern. Otherwise NULL.
+ */
+const u8 *ep11_kb_wkvp(const u8 *kblob, size_t kbloblen);
+
 /*
  * Simple check if the key blob is a valid EP11 AES key blob with header.
  * If checkcpacfexport is enabled, the key is also checked for the

From 745742dbca11a1b63684ec7032a81aaedcf51fb0 Mon Sep 17 00:00:00 2001
From: Holger Dengler <dengler@linux.ibm.com>
Date: Tue, 25 Jul 2023 13:05:36 +0200
Subject: [PATCH 1269/2047] s390/pkey: fix PKEY_TYPE_EP11_AES handling in
 PKEY_VERIFYKEY2 IOCTL

Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC
private keys")' introduced a new PKEY_TYPE_EP11_AES type for the
PKEY_VERIFYKEY2 IOCTL to verify keyblobs of this type. Unfortunately,
all PKEY_VERIFYKEY2 IOCTL requests with keyblobs of this type return
with an error (-EINVAL). Fix PKEY_TYPE_EP11_AES handling in
PKEY_VERIFYKEY2 IOCTL, so that userspace can verify keyblobs of this
type.

Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")
Signed-off-by: Holger Dengler <dengler@linux.ibm.com>
Reviewed-by: Ingo Franzki <ifranzki@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/pkey_api.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 75d7f0d5f14e..8d6f35ccc561 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -912,7 +912,8 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
 			*ksize = kb->head.bitlen;
 
 		rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain,
-				    ZCRYPT_CEX7, EP11_API_V, kb->wkvp);
+				    ZCRYPT_CEX7, EP11_API_V,
+				    ep11_kb_wkvp(key, keylen));
 		if (rc)
 			goto out;
 
@@ -922,6 +923,30 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
 		*cardnr = ((struct pkey_apqn *)_apqns)->card;
 		*domain = ((struct pkey_apqn *)_apqns)->domain;
 
+	} else if (hdr->type == TOKTYPE_NON_CCA &&
+		   hdr->version == TOKVER_EP11_AES_WITH_HEADER) {
+		struct ep11kblob_header *kh = (struct ep11kblob_header *)key;
+
+		rc = ep11_check_aes_key_with_hdr(debug_info, 3,
+						 key, keylen, 1);
+		if (rc)
+			goto out;
+		if (ktype)
+			*ktype = PKEY_TYPE_EP11_AES;
+		if (ksize)
+			*ksize = kh->bitlen;
+
+		rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain,
+				    ZCRYPT_CEX7, EP11_API_V,
+				    ep11_kb_wkvp(key, keylen));
+		if (rc)
+			goto out;
+
+		if (flags)
+			*flags = PKEY_FLAGS_MATCH_CUR_MKVP;
+
+		*cardnr = ((struct pkey_apqn *)_apqns)->card;
+		*domain = ((struct pkey_apqn *)_apqns)->domain;
 	} else {
 		rc = -EINVAL;
 	}

From b9352e4b9b9eff949bcc6907b8569b3a1d992f1e Mon Sep 17 00:00:00 2001
From: Holger Dengler <dengler@linux.ibm.com>
Date: Fri, 4 Aug 2023 16:02:58 +0200
Subject: [PATCH 1270/2047] s390/pkey: fix PKEY_TYPE_EP11_AES handling for
 sysfs attributes

Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC
private keys")' introduced a new PKEY_TYPE_EP11_AES securekey type as
a supplement to the existing PKEY_TYPE_EP11 (which won't work in
environments with session-bound keys). The pkey EP11 securekey
attributes use PKEY_TYPE_EP11_AES (instead of PKEY_TYPE_EP11)
keyblobs, to make the generated keyblobs usable also in environments,
where session-bound keys are required.

There should be no negative impacts to userspace because the internal
structure of the keyblobs is opaque. The increased size of the
generated keyblobs is reflected by the changed size of the attributes.

Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")
Signed-off-by: Holger Dengler <dengler@linux.ibm.com>
Reviewed-by: Ingo Franzki <ifranzki@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/uapi/asm/pkey.h | 2 +-
 drivers/s390/crypto/pkey_api.c    | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index 5faf0a1d2c16..5ad76471e73f 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -26,7 +26,7 @@
 #define MAXCLRKEYSIZE	32	   /* a clear key value may be up to 32 bytes */
 #define MAXAESCIPHERKEYSIZE 136  /* our aes cipher keys have always 136 bytes */
 #define MINEP11AESKEYBLOBSIZE 256  /* min EP11 AES key blob size  */
-#define MAXEP11AESKEYBLOBSIZE 320  /* max EP11 AES key blob size */
+#define MAXEP11AESKEYBLOBSIZE 336  /* max EP11 AES key blob size */
 
 /* Minimum size of a key blob */
 #define MINKEYBLOBSIZE	SECKEYBLOBSIZE
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 8d6f35ccc561..396a159afdf5 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -2142,7 +2142,7 @@ static struct attribute_group ccacipher_attr_group = {
  * (i.e. off != 0 or count < key blob size) -EINVAL is returned.
  * This function and the sysfs attributes using it provide EP11 key blobs
  * padded to the upper limit of MAXEP11AESKEYBLOBSIZE which is currently
- * 320 bytes.
+ * 336 bytes.
  */
 static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits,
 				       bool is_xts, char *buf, loff_t off,
@@ -2171,7 +2171,7 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits,
 		card = apqns[i] >> 16;
 		dom = apqns[i] & 0xFFFF;
 		rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize,
-				    PKEY_TYPE_EP11);
+				    PKEY_TYPE_EP11_AES);
 		if (rc == 0)
 			break;
 	}
@@ -2182,7 +2182,7 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits,
 		keysize = MAXEP11AESKEYBLOBSIZE;
 		buf += MAXEP11AESKEYBLOBSIZE;
 		rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize,
-				    PKEY_TYPE_EP11);
+				    PKEY_TYPE_EP11_AES);
 		if (rc == 0)
 			return 2 * MAXEP11AESKEYBLOBSIZE;
 	}

From cba33db3fc4dbf2e54294b0e499d2335a3a00d78 Mon Sep 17 00:00:00 2001
From: Holger Dengler <dengler@linux.ibm.com>
Date: Wed, 9 Aug 2023 14:23:45 +0200
Subject: [PATCH 1271/2047] s390/paes: fix PKEY_TYPE_EP11_AES handling for
 secure keyblobs

Commit 'fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC
private keys")' introduced PKEY_TYPE_EP11_AES securekey blobs as a
supplement to the PKEY_TYPE_EP11 (which won't work in environments
with session-bound keys). This new keyblobs has a different maximum
size, so fix paes crypto module to accept also these larger keyblobs.

Fixes: fa6999e326fe ("s390/pkey: support CCA and EP11 secure ECC private keys")
Signed-off-by: Holger Dengler <dengler@linux.ibm.com>
Reviewed-by: Ingo Franzki <ifranzki@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/crypto/paes_s390.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 38349150c96e..8b541e44151d 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -35,7 +35,7 @@
  * and padding is also possible, the limits need to be generous.
  */
 #define PAES_MIN_KEYSIZE 16
-#define PAES_MAX_KEYSIZE 320
+#define PAES_MAX_KEYSIZE MAXEP11AESKEYBLOBSIZE
 
 static u8 *ctrblk;
 static DEFINE_MUTEX(ctrblk_lock);

From 63304558ba5dcaaff9e052ee43cfdcc7f9c29e85 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 16 Aug 2023 15:40:59 +0200
Subject: [PATCH 1272/2047] sched/eevdf: Curb wakeup-preemption

Mike and others noticed that EEVDF does like to over-schedule quite a
bit -- which does hurt performance of a number of benchmarks /
workloads.

In particular, what seems to cause over-scheduling is that when lag is
of the same order (or larger) than the request / slice then placement
will not only cause the task to be placed left of current, but also
with a smaller deadline than current, which causes immediate
preemption.

[ notably, lag bounds are relative to HZ ]

Mike suggested we stick to picking 'current' for as long as it's
eligible to run, giving it uninterrupted runtime until it reaches
parity with the pack.

Augment Mike's suggestion by only allowing it to exhaust it's initial
request.

One random data point:

echo NO_RUN_TO_PARITY > /debug/sched/features
perf stat -a -e context-switches --repeat 10 -- perf bench sched messaging -g 20 -t -l 5000

	3,723,554        context-switches      ( +-  0.56% )
	9.5136 +- 0.0394 seconds time elapsed  ( +-  0.41% )

echo RUN_TO_PARITY > /debug/sched/features
perf stat -a -e context-switches --repeat 10 -- perf bench sched messaging -g 20 -t -l 5000

	2,556,535        context-switches      ( +-  0.51% )
	9.2427 +- 0.0302 seconds time elapsed  ( +-  0.33% )

Suggested-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230816134059.GC982867@hirez.programming.kicks-ass.net
---
 kernel/sched/fair.c     | 12 ++++++++++++
 kernel/sched/features.h |  1 +
 2 files changed, 13 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f496cef90ce7..0b7445cd5af9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -873,6 +873,13 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
 	if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
 		curr = NULL;
 
+	/*
+	 * Once selected, run a task until it either becomes non-eligible or
+	 * until it gets a new slice. See the HACK in set_next_entity().
+	 */
+	if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
+		return curr;
+
 	while (node) {
 		struct sched_entity *se = __node_2_se(node);
 
@@ -5167,6 +5174,11 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		update_stats_wait_end_fair(cfs_rq, se);
 		__dequeue_entity(cfs_rq, se);
 		update_load_avg(cfs_rq, se, UPDATE_TG);
+		/*
+		 * HACK, stash a copy of deadline at the point of pick in vlag,
+		 * which isn't used until dequeue.
+		 */
+		se->vlag = se->deadline;
 	}
 
 	update_stats_curr_start(cfs_rq, se);
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 61bcbf5e46a4..f770168230ae 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -6,6 +6,7 @@
  */
 SCHED_FEAT(PLACE_LAG, true)
 SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
+SCHED_FEAT(RUN_TO_PARITY, true)
 
 /*
  * Prefer to schedule the task we woke last (assuming it failed

From bff24699b94a34c5fcb8d3283794e7d39adb092c Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Mon, 14 Aug 2023 19:17:49 +0300
Subject: [PATCH 1273/2047] tpm_tis: Revert "tpm_tis: Disable interrupts on
 ThinkPad T490s"

Since for MMIO driver using FIFO registers, also known as tpm_tis, the
default (and tbh recommended) behaviour is now the polling mode, the
"tristate" workaround is no longer for benefit.

If someone wants to explicitly enable IRQs for a TPM chip that should be
without question allowed. It could very well be a piece hardware in the
existing deny list because of e.g. firmware update or something similar.

While at it, document the module parameter, as this was not done in 2006
when it first appeared in the mainline.

Link: https://lore.kernel.org/linux-integrity/20201015214430.17937-1-jsnitsel@redhat.com/
Link: https://lore.kernel.org/all/1145393776.4829.19.camel@localhost.localdomain/
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 .../admin-guide/kernel-parameters.txt         |  7 ++
 drivers/char/tpm/tpm_tis.c                    | 93 +------------------
 2 files changed, 9 insertions(+), 91 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 722b6eca2e93..155101120438 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6340,6 +6340,13 @@
 			This will guarantee that all the other pcrs
 			are saved.
 
+	tpm_tis.interrupts= [HW,TPM]
+			Enable interrupts for the MMIO based physical layer
+			for the FIFO interface. By default it is set to false
+			(0). For more information about TPM hardware interfaces
+			defined by Trusted Computing Group (TCG) see
+			https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/
+
 	tp_printk	[FTRACE]
 			Have the tracepoints sent to printk as well as the
 			tracing ring buffer. This is useful for early boot up
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 7fa3d91042b2..077fdb73740c 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -27,7 +27,6 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/kernel.h>
-#include <linux/dmi.h>
 #include "tpm.h"
 #include "tpm_tis_core.h"
 
@@ -89,8 +88,8 @@ static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
 	tpm_tis_flush(iobase);
 }
 
-static int interrupts;
-module_param(interrupts, int, 0444);
+static bool interrupts;
+module_param(interrupts, bool, 0444);
 MODULE_PARM_DESC(interrupts, "Enable interrupts");
 
 static bool itpm;
@@ -103,92 +102,6 @@ module_param(force, bool, 0444);
 MODULE_PARM_DESC(force, "Force device probe rather than using ACPI entry");
 #endif
 
-static int tpm_tis_disable_irq(const struct dmi_system_id *d)
-{
-	if (interrupts == -1) {
-		pr_notice("tpm_tis: %s detected: disabling interrupts.\n", d->ident);
-		interrupts = 0;
-	}
-
-	return 0;
-}
-
-static const struct dmi_system_id tpm_tis_dmi_table[] = {
-	{
-		.callback = tpm_tis_disable_irq,
-		.ident = "Framework Laptop (12th Gen Intel Core)",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "Laptop (12th Gen Intel Core)"),
-		},
-	},
-	{
-		.callback = tpm_tis_disable_irq,
-		.ident = "Framework Laptop (13th Gen Intel Core)",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "Laptop (13th Gen Intel Core)"),
-		},
-	},
-	{
-		.callback = tpm_tis_disable_irq,
-		.ident = "ThinkPad T490s",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T490s"),
-		},
-	},
-	{
-		.callback = tpm_tis_disable_irq,
-		.ident = "ThinkStation P360 Tiny",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkStation P360 Tiny"),
-		},
-	},
-	{
-		.callback = tpm_tis_disable_irq,
-		.ident = "ThinkPad L490",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L490"),
-		},
-	},
-	{
-		.callback = tpm_tis_disable_irq,
-		.ident = "ThinkPad L590",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L590"),
-		},
-	},
-	{
-		.callback = tpm_tis_disable_irq,
-		.ident = "ThinkStation P620",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkStation P620"),
-		},
-	},
-	{
-		.callback = tpm_tis_disable_irq,
-		.ident = "TUXEDO InfinityBook S 15/17 Gen7",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "TUXEDO InfinityBook S 15/17 Gen7"),
-		},
-	},
-	{
-		.callback = tpm_tis_disable_irq,
-		.ident = "UPX-TGL",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "AAEON"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "UPX-TGL01"),
-		},
-	},
-	{}
-};
-
 #if defined(CONFIG_PNP) && defined(CONFIG_ACPI)
 static int has_hid(struct acpi_device *dev, const char *hid)
 {
@@ -312,8 +225,6 @@ static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info)
 	int irq = -1;
 	int rc;
 
-	dmi_check_system(tpm_tis_dmi_table);
-
 	rc = check_acpi_tpm2(dev);
 	if (rc)
 		return rc;

From 7f6fd06d34f4a9bd62bc30e9232934cfb89ae4d8 Mon Sep 17 00:00:00 2001
From: Wentong Wu <wentong.wu@intel.com>
Date: Sat, 29 Jul 2023 19:52:55 +0800
Subject: [PATCH 1274/2047] ACPI: scan: Defer enumeration of devices with a
 _DEP pointing to IVSC device

Inside IVSC, switching ownership requires an interface with two
different hardware modules, ACE and CSI. The software interface
to these modules is based on Intel MEI framework. Usually mei
client devices are dynamically created, so the info of consumers
depending on mei client devices is not present in the firmware
tables.

This causes problems with the probe ordering with respect to
drivers for consumers of these MEI client devices. But on these
camera sensor devices, the ACPI nodes describing the sensors all
have a _DEP dependency on the matching MEI bus ACPI device, so
adding IVSC MEI bus ACPI device to acpi_honor_dep_ids allows
solving the probe-ordering problem by deferring the enumeration of
ACPI-devices which have a _DEP dependency on an IVSC mei bus ACPI
device.

On TGL platform, the HID of IVSC MEI bus ACPI device is INTC1059,
and on ADL platform, the HID is INTC1095. So add both of them to
acpi_honor_dep_ids.

Signed-off-by: Wentong Wu <wentong.wu@intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 87e385542576..c799b03d0369 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -795,6 +795,9 @@ static const char * const acpi_ignore_dep_ids[] = {
 /* List of HIDs for which we honor deps of matching ACPI devs, when checking _DEP lists. */
 static const char * const acpi_honor_dep_ids[] = {
 	"INT3472", /* Camera sensor PMIC / clk and regulator info */
+	"INTC1059", /* IVSC (TGL) driver must be loaded to allow i2c access to camera sensors */
+	"INTC1095", /* IVSC (ADL) driver must be loaded to allow i2c access to camera sensors */
+	"INTC100A", /* IVSC (RPL) driver must be loaded to allow i2c access to camera sensors */
 	NULL
 };
 

From f6fcf03ce8a9b8c135cf47f4978617bdf2829711 Mon Sep 17 00:00:00 2001
From: Bibo Mao <maobibo@loongson.cn>
Date: Mon, 17 Jul 2023 10:22:58 +0800
Subject: [PATCH 1275/2047] ACPI: processor: LoongArch: Get physical ID from
 MADT

With ACPI Spec 6.5 chapter 5.2.12.20, each processor in LoongArch
systems has a Core Programmable Interrupt Controller in MADT. The
value of its type is 0x11 in the spec and defined as enum variable
ACPI_MADT_TYPE_CORE_PIC in the Linux kernel.

Physical IDs can be retrieved from MADT for LoongArch systems during
initialization and they can be retrieved from the _MAT output for
hotplug CPUs.

Add physical CPU ID enumeration for LoongArch systems.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/processor_core.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index d6606a9f2da6..7dd6dbaa98c3 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -132,6 +132,30 @@ static int map_rintc_hartid(struct acpi_subtable_header *entry,
 	return -EINVAL;
 }
 
+/*
+ * Retrieve LoongArch CPU physical id
+ */
+static int map_core_pic_id(struct acpi_subtable_header *entry,
+		int device_declaration, u32 acpi_id, phys_cpuid_t *phys_id)
+{
+	struct acpi_madt_core_pic *core_pic =
+		container_of(entry, struct acpi_madt_core_pic, header);
+
+	if (!(core_pic->flags & ACPI_MADT_ENABLED))
+		return -ENODEV;
+
+	/* device_declaration means Device object in DSDT, in LoongArch
+	 * system, logical processor acpi_id is required in _UID property
+	 * of DSDT table, so we should check device_declaration here
+	 */
+	if (device_declaration && (core_pic->processor_id == acpi_id)) {
+		*phys_id = core_pic->core_id;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
 static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
 				   int type, u32 acpi_id)
 {
@@ -165,6 +189,9 @@ static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
 		} else if (header->type == ACPI_MADT_TYPE_RINTC) {
 			if (!map_rintc_hartid(header, type, acpi_id, &phys_id))
 				break;
+		} else if (header->type == ACPI_MADT_TYPE_CORE_PIC) {
+			if (!map_core_pic_id(header, type, acpi_id, &phys_id))
+				break;
 		}
 		entry += header->length;
 	}
@@ -216,6 +243,8 @@ static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
 		map_x2apic_id(header, type, acpi_id, &phys_id);
 	else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT)
 		map_gicc_mpidr(header, type, acpi_id, &phys_id);
+	else if (header->type == ACPI_MADT_TYPE_CORE_PIC)
+		map_core_pic_id(header, type, acpi_id, &phys_id);
 
 exit:
 	kfree(buffer.pointer);

From 89c290ea758911e660878e26270e084d862c03b0 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 21 Jul 2023 23:21:43 +0800
Subject: [PATCH 1276/2047] ACPI: video: Put ACPI video and its child devices
 into D0 on boot

Screen brightness can only be changed once on HP ZBook Fury 16 G10.

The vendor reports that the issue is related to the fact that Linux doesn't
invoke _PS0 at boot for all ACPI devices, as expected by the platform firmware:

    Scope (\_SB.PC00.GFX0)
    {
        Scope (DD1F)
        {
            Method (_PS0, 0, Serialized)  // _PS0: Power State 0
            {
                If (CondRefOf (\_SB.PC00.LPCB.EC0.SSBC))
                {
                    \_SB.PC00.LPCB.EC0.SSBC ()
                }
            }
	    ...
	}
	...
    }

The \_SB.PC00.GFX0.DD1F is the panel device, and its _PS0 needs to be
executed at the initialization time to make the brightness control work
properly.

_PS0 is not evaluated for this device, because _PSC is missing,
which violates the ACPI specification (ACPI 6.5, section 7.3.6).

Commit b3785492268f ("ACPI / PM: Do not power manage devices in unknown
initial states") tried to work around missing _PSC on platforms with
defective firmware, but got reverted due to a regression.

So the safest approach is to use acpi_device_fix_up_power_extended() to
put ACPI video and its child devices to D0 to address the issue at hand.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=217683
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_video.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 62f4364e4460..1732780a672b 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -2027,6 +2027,12 @@ static int acpi_video_bus_add(struct acpi_device *device)
 	if (error)
 		goto err_put_video;
 
+	/*
+	 * HP ZBook Fury 16 G10 requires ACPI video's child devices have _PS0
+	 * evaluated to have functional panel brightness control.
+	 */
+	acpi_device_fix_up_power_extended(device);
+
 	pr_info("%s [%s] (multi-head: %s  rom: %s  post: %s)\n",
 	       ACPI_VIDEO_DEVICE_NAME, acpi_device_bid(device),
 	       video->flags.multihead ? "yes" : "no",

From 8cf04bb321f036dd2e523e993897e0789bd5265c Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 7 Aug 2023 11:44:08 +0200
Subject: [PATCH 1277/2047] ACPI: video: Add backlight=native DMI quirk for
 Apple iMac12,1 and iMac12,2

Linux defaults to picking the non-working ACPI video backlight interface
on the Apple iMac12,1 and iMac12,2.

Add a DMI quirk to pick the working native radeon_bl0 interface instead.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1838
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2753
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/video_detect.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 0c376edd64fe..442396f6ed1f 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -495,6 +495,24 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "iMac11,3"),
 		},
 	},
+	{
+	 /* https://gitlab.freedesktop.org/drm/amd/-/issues/1838 */
+	 .callback = video_detect_force_native,
+	 /* Apple iMac12,1 */
+	 .matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+		DMI_MATCH(DMI_PRODUCT_NAME, "iMac12,1"),
+		},
+	},
+	{
+	 /* https://gitlab.freedesktop.org/drm/amd/-/issues/2753 */
+	 .callback = video_detect_force_native,
+	 /* Apple iMac12,2 */
+	 .matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+		DMI_MATCH(DMI_PRODUCT_NAME, "iMac12,2"),
+		},
+	},
 	{
 	 /* https://bugzilla.redhat.com/show_bug.cgi?id=1217249 */
 	 .callback = video_detect_force_native,

From 588b51ddc7dc96c4876e71bc155bd38a6bd59a14 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 22 Jul 2023 10:55:05 +0800
Subject: [PATCH 1278/2047] ACPI: Remove unused extern declaration
 acpi_paddr_to_node()

This is never used since commit 1e3590e2e4a3 ("[PATCH] pgdat allocation
for new node add (get node id by acpi)").

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 641dc4843987..58a0fdf68ca2 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -477,8 +477,6 @@ static inline int acpi_get_node(acpi_handle handle)
 	return 0;
 }
 #endif
-extern int acpi_paddr_to_node(u64 start_addr, u64 size);
-
 extern int pnpacpi_disabled;
 
 #define PXM_INVAL	(-1)

From 99c31bff185610304532c4acf00cda341572e2d4 Mon Sep 17 00:00:00 2001
From: Sunil V L <sunilvl@ventanamicro.com>
Date: Tue, 25 Jul 2023 10:59:25 +0530
Subject: [PATCH 1279/2047] PNP: ACPI: Fix string truncation warning

LKP reports below warning when building for RISC-V.

drivers/pnp/pnpacpi/core.c:253:17:
warning: 'strncpy' specified bound 50 equals destination
size [-Wstringop-truncation]

This appears to be a valid issue since the destination string may not be
null-terminated.

To fix this, append the NUL explicitly after the strncpy().

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202307241942.Rff2Nri5-lkp@intel.com/
Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pnp/pnpacpi/core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index 38928ff7472b..6ab272c84b7b 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -254,6 +254,9 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
 	else
 		strncpy(dev->name, acpi_device_bid(device), sizeof(dev->name));
 
+	/* Handle possible string truncation */
+	dev->name[sizeof(dev->name) - 1] = '\0';
+
 	if (dev->active)
 		pnpacpi_parse_allocated_resource(dev);
 

From ae769fbd143d9f9349387f27bb4f36d2ba5b45df Mon Sep 17 00:00:00 2001
From: Xiaochun Lee <lixc17@lenovo.com>
Date: Wed, 2 Aug 2023 21:47:46 +0800
Subject: [PATCH 1280/2047] ACPI: extlog: Fix finding the generic error data
 for v3 structure

Fix by using acpi_hest_get_payload() to find out the correct
generic error data for v3 structure.

The revision v300 generic error data is different from the old one, so
for compatibility with old and new version, change to a new interface to
locate the right memory error section that was defined in CPER.

Signed-off-by: Xiaochun Lee <lixc17@lenovo.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_extlog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index e648158368a7..e120a96e1eae 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -172,7 +172,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 			fru_text = "";
 		sec_type = (guid_t *)gdata->section_type;
 		if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
-			struct cper_sec_mem_err *mem = (void *)(gdata + 1);
+			struct cper_sec_mem_err *mem = acpi_hest_get_payload(gdata);
 
 			if (gdata->error_data_length >= sizeof(*mem))
 				trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,

From eecb91b9f98d6427d4af5fdb8f108f52572a39e7 Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Thu, 17 Aug 2023 20:55:39 +0800
Subject: [PATCH 1281/2047] tracing: Fix memleak due to race between
 current_tracer and trace

Kmemleak report a leak in graph_trace_open():

  unreferenced object 0xffff0040b95f4a00 (size 128):
    comm "cat", pid 204981, jiffies 4301155872 (age 99771.964s)
    hex dump (first 32 bytes):
      e0 05 e7 b4 ab 7d 00 00 0b 00 01 00 00 00 00 00 .....}..........
      f4 00 01 10 00 a0 ff ff 00 00 00 00 65 00 10 00 ............e...
    backtrace:
      [<000000005db27c8b>] kmem_cache_alloc_trace+0x348/0x5f0
      [<000000007df90faa>] graph_trace_open+0xb0/0x344
      [<00000000737524cd>] __tracing_open+0x450/0xb10
      [<0000000098043327>] tracing_open+0x1a0/0x2a0
      [<00000000291c3876>] do_dentry_open+0x3c0/0xdc0
      [<000000004015bcd6>] vfs_open+0x98/0xd0
      [<000000002b5f60c9>] do_open+0x520/0x8d0
      [<00000000376c7820>] path_openat+0x1c0/0x3e0
      [<00000000336a54b5>] do_filp_open+0x14c/0x324
      [<000000002802df13>] do_sys_openat2+0x2c4/0x530
      [<0000000094eea458>] __arm64_sys_openat+0x130/0x1c4
      [<00000000a71d7881>] el0_svc_common.constprop.0+0xfc/0x394
      [<00000000313647bf>] do_el0_svc+0xac/0xec
      [<000000002ef1c651>] el0_svc+0x20/0x30
      [<000000002fd4692a>] el0_sync_handler+0xb0/0xb4
      [<000000000c309c35>] el0_sync+0x160/0x180

The root cause is descripted as follows:

  __tracing_open() {  // 1. File 'trace' is being opened;
    ...
    *iter->trace = *tr->current_trace;  // 2. Tracer 'function_graph' is
                                        //    currently set;
    ...
    iter->trace->open(iter);  // 3. Call graph_trace_open() here,
                              //    and memory are allocated in it;
    ...
  }

  s_start() {  // 4. The opened file is being read;
    ...
    *iter->trace = *tr->current_trace;  // 5. If tracer is switched to
                                        //    'nop' or others, then memory
                                        //    in step 3 are leaked!!!
    ...
  }

To fix it, in s_start(), close tracer before switching then reopen the
new tracer after switching. And some tracers like 'wakeup' may not update
'iter->private' in some cases when reopen, then it should be cleared
to avoid being mistakenly closed again.

Link: https://lore.kernel.org/linux-trace-kernel/20230817125539.1646321-1-zhengyejian1@huawei.com

Fixes: d7350c3f4569 ("tracing/core: make the read callbacks reentrants")
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c              | 9 ++++++++-
 kernel/trace/trace_irqsoff.c      | 3 ++-
 kernel/trace/trace_sched_wakeup.c | 2 ++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b0e8eb6ea8ac..29a2e4d7886d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4213,8 +4213,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 	 * will point to the same string as current_trace->name.
 	 */
 	mutex_lock(&trace_types_lock);
-	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
+	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
+		/* Close iter->trace before switching to the new current tracer */
+		if (iter->trace->close)
+			iter->trace->close(iter);
 		*iter->trace = *tr->current_trace;
+		/* Reopen the new current tracer */
+		if (iter->trace->open)
+			iter->trace->open(iter);
+	}
 	mutex_unlock(&trace_types_lock);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 590b3d51afae..ba37f768e2f2 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -231,7 +231,8 @@ static void irqsoff_trace_open(struct trace_iterator *iter)
 {
 	if (is_graph(iter->tr))
 		graph_trace_open(iter);
-
+	else
+		iter->private = NULL;
 }
 
 static void irqsoff_trace_close(struct trace_iterator *iter)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 330aee1c1a49..0469a04a355f 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -168,6 +168,8 @@ static void wakeup_trace_open(struct trace_iterator *iter)
 {
 	if (is_graph(iter->tr))
 		graph_trace_open(iter);
+	else
+		iter->private = NULL;
 }
 
 static void wakeup_trace_close(struct trace_iterator *iter)

From 638f139fda4f4fcb1b244b4ac755a4c4f0a6c4a2 Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Mon, 14 Aug 2023 21:56:23 +0800
Subject: [PATCH 1282/2047] ACPI: Remove assorted unused declarations of
 functions

acpi_create_dir()/acpi_remove_dir() are never implemented since
the beginning of git history.

Commit f8d31489629c ("ACPICA: Debugger: Convert some mechanisms to OSPM specific")
declared but never implemented acpi_run_debugger().

Commit 781d737c7466 ("ACPI: Drop power resources driver")
removed acpi_power_init() but not its declaration.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
[ rjw: Subject edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/internal.h | 1 -
 include/acpi/acpi_bus.h | 2 --
 include/acpi/acpixf.h   | 2 --
 3 files changed, 5 deletions(-)

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 21ec31b78216..956aed5afded 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -123,7 +123,6 @@ int __acpi_device_uevent_modalias(const struct acpi_device *adev,
 /* --------------------------------------------------------------------------
                                   Power Resource
    -------------------------------------------------------------------------- */
-int acpi_power_init(void);
 void acpi_power_resources_list_free(struct list_head *list);
 int acpi_extract_power_resources(union acpi_object *package, unsigned int start,
 				 struct list_head *list);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c941d99162c0..449ec8a015bf 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -563,8 +563,6 @@ int acpi_match_device_ids(struct acpi_device *device,
 			  const struct acpi_device_id *ids);
 void acpi_set_modalias(struct acpi_device *adev, const char *default_id,
 		       char *modalias, size_t len);
-int acpi_create_dir(struct acpi_device *);
-void acpi_remove_dir(struct acpi_device *);
 
 static inline bool acpi_device_enumerated(struct acpi_device *adev)
 {
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 9ffdc0425bc2..cb64010cc5d8 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -970,8 +970,6 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 					       void **data,
 					       void (*callback)(void *)))
 
-void acpi_run_debugger(char *batch_buffer);
-
 void acpi_set_debugger_thread_id(acpi_thread_id thread_id);
 
 #endif				/* __ACXFACE_H__ */

From 596ca52a56da1b9370d358c38acf2ba5251687d9 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Sat, 29 Jul 2023 12:20:50 +0800
Subject: [PATCH 1283/2047] ACPI: TAD: Install SystemCMOS address space handler
 for ACPI000E

Currently, the SystemCMOS address space handler is installed for the
ACPI RTC devices (PNP0B00/PNP0B01/PNP0B02) only. But there are platforms
with SystemCMOS Operetion Region defined under the ACPI Time and Alarm
Device (ACPI000E), which is used by the ACPI pre-defined control methods
like _GRT (Get the Real time) and _SRT (Set the Real time).

When accessing these control methods via the acpi_tad sysfs interface,
missing SystemCMOS address space handler causes errors like below
[  478.255453] ACPI Error: No handler for Region [RTCM] (00000000a8d2dd39) [SystemCMOS] (20230331/evregion-130)
[  478.255458] ACPI Error: Region SystemCMOS (ID=5) has no handler (20230331/exfldio-261)
[  478.255461] Initialized Local Variables for Method [_GRT]:
[  478.255461]   Local1: 00000000f182542c <Obj>           Integer 0000000000000000
[  478.255464] No Arguments are initialized for method [_GRT]
[  478.255465] ACPI Error: Aborting method \_SB.AWAC._GRT due to previous error (AE_NOT_EXIST) (20230331/psparse-529)

Export two APIs for SystemCMOS address space handler from acpi_cmos_rtc
scan handler and install the handler for the ACPI Time and Alarm Device
from the ACPI TAD driver.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=217714
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
[ rjw: Subject and changelog edits, whitespace adjustment ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_cmos_rtc.c | 25 ++++++++++++++++++-------
 drivers/acpi/acpi_tad.c      | 27 ++++++++++++++++++++++-----
 include/acpi/acpi_bus.h      |  9 +++++++++
 3 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/drivers/acpi/acpi_cmos_rtc.c b/drivers/acpi/acpi_cmos_rtc.c
index 4cf4aef7ce0c..9b55d1593d16 100644
--- a/drivers/acpi/acpi_cmos_rtc.c
+++ b/drivers/acpi/acpi_cmos_rtc.c
@@ -51,12 +51,11 @@ acpi_cmos_rtc_space_handler(u32 function, acpi_physical_address address,
 	return AE_OK;
 }
 
-static int acpi_install_cmos_rtc_space_handler(struct acpi_device *adev,
-		const struct acpi_device_id *id)
+int acpi_install_cmos_rtc_space_handler(acpi_handle handle)
 {
 	acpi_status status;
 
-	status = acpi_install_address_space_handler(adev->handle,
+	status = acpi_install_address_space_handler(handle,
 			ACPI_ADR_SPACE_CMOS,
 			&acpi_cmos_rtc_space_handler,
 			NULL, NULL);
@@ -67,18 +66,30 @@ static int acpi_install_cmos_rtc_space_handler(struct acpi_device *adev,
 
 	return 1;
 }
+EXPORT_SYMBOL_GPL(acpi_install_cmos_rtc_space_handler);
 
-static void acpi_remove_cmos_rtc_space_handler(struct acpi_device *adev)
+void acpi_remove_cmos_rtc_space_handler(acpi_handle handle)
 {
-	if (ACPI_FAILURE(acpi_remove_address_space_handler(adev->handle,
+	if (ACPI_FAILURE(acpi_remove_address_space_handler(handle,
 			ACPI_ADR_SPACE_CMOS, &acpi_cmos_rtc_space_handler)))
 		pr_err("Error removing CMOS-RTC region handler\n");
 }
+EXPORT_SYMBOL_GPL(acpi_remove_cmos_rtc_space_handler);
+
+static int acpi_cmos_rtc_attach_handler(struct acpi_device *adev, const struct acpi_device_id *id)
+{
+	return acpi_install_cmos_rtc_space_handler(adev->handle);
+}
+
+static void acpi_cmos_rtc_detach_handler(struct acpi_device *adev)
+{
+	acpi_remove_cmos_rtc_space_handler(adev->handle);
+}
 
 static struct acpi_scan_handler cmos_rtc_handler = {
 	.ids = acpi_cmos_rtc_ids,
-	.attach = acpi_install_cmos_rtc_space_handler,
-	.detach = acpi_remove_cmos_rtc_space_handler,
+	.attach = acpi_cmos_rtc_attach_handler,
+	.detach = acpi_cmos_rtc_detach_handler,
 };
 
 void __init acpi_cmos_rtc_init(void)
diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c
index e9b8e8305e23..33c3b16af556 100644
--- a/drivers/acpi/acpi_tad.c
+++ b/drivers/acpi/acpi_tad.c
@@ -557,6 +557,7 @@ static int acpi_tad_disable_timer(struct device *dev, u32 timer_id)
 static int acpi_tad_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
+	acpi_handle handle = ACPI_HANDLE(dev);
 	struct acpi_tad_driver_data *dd = dev_get_drvdata(dev);
 
 	device_init_wakeup(dev, false);
@@ -577,6 +578,7 @@ static int acpi_tad_remove(struct platform_device *pdev)
 
 	pm_runtime_put_sync(dev);
 	pm_runtime_disable(dev);
+	acpi_remove_cmos_rtc_space_handler(handle);
 	return 0;
 }
 
@@ -589,6 +591,11 @@ static int acpi_tad_probe(struct platform_device *pdev)
 	unsigned long long caps;
 	int ret;
 
+	ret = acpi_install_cmos_rtc_space_handler(handle);
+	if (ret < 0) {
+		dev_info(dev, "Unable to install space handler\n");
+		return -ENODEV;
+	}
 	/*
 	 * Initialization failure messages are mostly about firmware issues, so
 	 * print them at the "info" level.
@@ -596,22 +603,27 @@ static int acpi_tad_probe(struct platform_device *pdev)
 	status = acpi_evaluate_integer(handle, "_GCP", NULL, &caps);
 	if (ACPI_FAILURE(status)) {
 		dev_info(dev, "Unable to get capabilities\n");
-		return -ENODEV;
+		ret = -ENODEV;
+		goto remove_handler;
 	}
 
 	if (!(caps & ACPI_TAD_AC_WAKE)) {
 		dev_info(dev, "Unsupported capabilities\n");
-		return -ENODEV;
+		ret = -ENODEV;
+		goto remove_handler;
 	}
 
 	if (!acpi_has_method(handle, "_PRW")) {
 		dev_info(dev, "Missing _PRW\n");
-		return -ENODEV;
+		ret = -ENODEV;
+		goto remove_handler;
 	}
 
 	dd = devm_kzalloc(dev, sizeof(*dd), GFP_KERNEL);
-	if (!dd)
-		return -ENOMEM;
+	if (!dd) {
+		ret = -ENOMEM;
+		goto remove_handler;
+	}
 
 	dd->capabilities = caps;
 	dev_set_drvdata(dev, dd);
@@ -653,6 +665,11 @@ static int acpi_tad_probe(struct platform_device *pdev)
 
 fail:
 	acpi_tad_remove(pdev);
+	/* Don't fallthrough because cmos rtc space handler is removed in acpi_tad_remove() */
+	return ret;
+
+remove_handler:
+	acpi_remove_cmos_rtc_space_handler(handle);
 	return ret;
 }
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c941d99162c0..fb95fff23e5b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -645,6 +645,8 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev);
 #ifdef CONFIG_X86
 bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status);
 bool acpi_quirk_skip_acpi_ac_and_battery(void);
+int acpi_install_cmos_rtc_space_handler(acpi_handle handle);
+void acpi_remove_cmos_rtc_space_handler(acpi_handle handle);
 #else
 static inline bool acpi_device_override_status(struct acpi_device *adev,
 					       unsigned long long *status)
@@ -655,6 +657,13 @@ static inline bool acpi_quirk_skip_acpi_ac_and_battery(void)
 {
 	return false;
 }
+static inline int acpi_install_cmos_rtc_space_handler(acpi_handle handle)
+{
+	return 1;
+}
+static inline void acpi_remove_cmos_rtc_space_handler(acpi_handle handle)
+{
+}
 #endif
 
 #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS)

From 46822860a5a9a5a558475d323a55c8aab0b54012 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Aug 2023 12:32:15 -0700
Subject: [PATCH 1284/2047] seccomp: Add missing kerndoc notations

The kerndoc for some struct member and function arguments were missing.
Add them.

Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Will Drewry <wad@chromium.org>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202308171742.AncabIG1-lkp@intel.com/
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/seccomp.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d3fdc0086168..255999ba9190 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -110,11 +110,13 @@ struct seccomp_knotif {
  * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
  *         is allowed.
  * @ioctl_flags: The flags used for the seccomp_addfd ioctl.
+ * @setfd: whether or not SECCOMP_ADDFD_FLAG_SETFD was set during notify_addfd
  * @ret: The return value of the installing process. It is set to the fd num
  *       upon success (>= 0).
  * @completion: Indicates that the installing process has completed fd
  *              installation, or gone away (either due to successful
  *              reply, or signal)
+ * @list: list_head for chaining seccomp_kaddfd together.
  *
  */
 struct seccomp_kaddfd {
@@ -138,12 +140,12 @@ struct seccomp_kaddfd {
  * structure is fairly large, we store the notification-specific stuff in a
  * separate structure.
  *
- * @request: A semaphore that users of this notification can wait on for
- *           changes. Actual reads and writes are still controlled with
- *           filter->notify_lock.
+ * @requests: A semaphore that users of this notification can wait on for
+ *            changes. Actual reads and writes are still controlled with
+ *            filter->notify_lock.
+ * @flags: A set of SECCOMP_USER_NOTIF_FD_* flags.
  * @next_id: The id of the next request.
  * @notifications: A list of struct seccomp_knotif elements.
- * @flags: A set of SECCOMP_USER_NOTIF_FD_* flags.
  */
 
 struct notification {
@@ -558,6 +560,8 @@ static void __seccomp_filter_release(struct seccomp_filter *orig)
  *			    drop its reference count, and notify
  *			    about unused filters
  *
+ * @tsk: task the filter should be released from.
+ *
  * This function should only be called when the task is exiting as
  * it detaches it from its filter tree. As such, READ_ONCE() and
  * barriers are not needed here, as would normally be needed.
@@ -577,6 +581,8 @@ void seccomp_filter_release(struct task_struct *tsk)
 /**
  * seccomp_sync_threads: sets all threads to use current's filter
  *
+ * @flags: SECCOMP_FILTER_FLAG_* flags to set during sync.
+ *
  * Expects sighand and cred_guard_mutex locks to be held, and for
  * seccomp_can_sync_threads() to have returned success already
  * without dropping the locks.

From 4cfb908054456ad8b6b8cd5108bbdf80faade8cd Mon Sep 17 00:00:00 2001
From: Eric Snowberg <eric.snowberg@oracle.com>
Date: Mon, 22 May 2023 19:09:42 -0400
Subject: [PATCH 1285/2047] KEYS: DigitalSignature link restriction

Add a new link restriction.  Restrict the addition of keys in a keyring
based on the key having digitalSignature usage set. Additionally, verify
the new certificate against the ones in the system keyrings.  Add two
additional functions to use the new restriction within either the builtin
or secondary keyrings.

[jarkko@kernel.org: Fix checkpatch.pl --strict issues]
Signed-off-by: Eric Snowberg <eric.snowberg@oracle.com>
Reviewed-and-tested-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 certs/system_keyring.c            | 49 +++++++++++++++++++++++++++++++
 crypto/asymmetric_keys/restrict.c | 44 +++++++++++++++++++++++++++
 include/crypto/public_key.h       | 12 ++++++++
 include/keys/system_keyring.h     | 10 +++++++
 4 files changed, 115 insertions(+)

diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index a7a49b17ceb1..b348e0898d34 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -51,6 +51,26 @@ int restrict_link_by_builtin_trusted(struct key *dest_keyring,
 					  builtin_trusted_keys);
 }
 
+/**
+ * restrict_link_by_digsig_builtin - Restrict digitalSignature key additions by the built-in keyring
+ * @dest_keyring: Keyring being linked to.
+ * @type: The type of key being added.
+ * @payload: The payload of the new key.
+ * @restriction_key: A ring of keys that can be used to vouch for the new cert.
+ *
+ * Restrict the addition of keys into a keyring based on the key-to-be-added
+ * being vouched for by a key in the built in system keyring. The new key
+ * must have the digitalSignature usage field set.
+ */
+int restrict_link_by_digsig_builtin(struct key *dest_keyring,
+				    const struct key_type *type,
+				    const union key_payload *payload,
+				    struct key *restriction_key)
+{
+	return restrict_link_by_digsig(dest_keyring, type, payload,
+				       builtin_trusted_keys);
+}
+
 #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
 /**
  * restrict_link_by_builtin_and_secondary_trusted - Restrict keyring
@@ -83,6 +103,35 @@ int restrict_link_by_builtin_and_secondary_trusted(
 					  secondary_trusted_keys);
 }
 
+/**
+ * restrict_link_by_digsig_builtin_and_secondary - Restrict by digitalSignature.
+ * @dest_keyring: Keyring being linked to.
+ * @type: The type of key being added.
+ * @payload: The payload of the new key.
+ * @restrict_key: A ring of keys that can be used to vouch for the new cert.
+ *
+ * Restrict the addition of keys into a keyring based on the key-to-be-added
+ * being vouched for by a key in either the built-in or the secondary system
+ * keyrings. The new key must have the digitalSignature usage field set.
+ */
+int restrict_link_by_digsig_builtin_and_secondary(struct key *dest_keyring,
+						  const struct key_type *type,
+						  const union key_payload *payload,
+						  struct key *restrict_key)
+{
+	/* If we have a secondary trusted keyring, then that contains a link
+	 * through to the builtin keyring and the search will follow that link.
+	 */
+	if (type == &key_type_keyring &&
+	    dest_keyring == secondary_trusted_keys &&
+	    payload == &builtin_trusted_keys->payload)
+		/* Allow the builtin keyring to be added to the secondary */
+		return 0;
+
+	return restrict_link_by_digsig(dest_keyring, type, payload,
+				       secondary_trusted_keys);
+}
+
 /*
  * Allocate a struct key_restriction for the "builtin and secondary trust"
  * keyring. Only for use in system_trusted_keyring_init().
diff --git a/crypto/asymmetric_keys/restrict.c b/crypto/asymmetric_keys/restrict.c
index 276bdb627498..6b69ea40da23 100644
--- a/crypto/asymmetric_keys/restrict.c
+++ b/crypto/asymmetric_keys/restrict.c
@@ -148,6 +148,50 @@ int restrict_link_by_ca(struct key *dest_keyring,
 	return 0;
 }
 
+/**
+ * restrict_link_by_digsig - Restrict additions to a ring of digsig keys
+ * @dest_keyring: Keyring being linked to.
+ * @type: The type of key being added.
+ * @payload: The payload of the new key.
+ * @trust_keyring: A ring of keys that can be used to vouch for the new cert.
+ *
+ * Check if the new certificate has digitalSignature usage set. If it is,
+ * then mark the new certificate as being ok to link. Afterwards verify
+ * the new certificate against the ones in the trust_keyring.
+ *
+ * Returns 0 if the new certificate was accepted, -ENOKEY if the
+ * certificate is not a digsig. -ENOPKG if the signature uses unsupported
+ * crypto, or some other error if there is a matching certificate but
+ * the signature check cannot be performed.
+ */
+int restrict_link_by_digsig(struct key *dest_keyring,
+			    const struct key_type *type,
+			    const union key_payload *payload,
+			    struct key *trust_keyring)
+{
+	const struct public_key *pkey;
+
+	if (type != &key_type_asymmetric)
+		return -EOPNOTSUPP;
+
+	pkey = payload->data[asym_crypto];
+
+	if (!pkey)
+		return -ENOPKG;
+
+	if (!test_bit(KEY_EFLAG_DIGITALSIG, &pkey->key_eflags))
+		return -ENOKEY;
+
+	if (test_bit(KEY_EFLAG_CA, &pkey->key_eflags))
+		return -ENOKEY;
+
+	if (test_bit(KEY_EFLAG_KEYCERTSIGN, &pkey->key_eflags))
+		return -ENOKEY;
+
+	return restrict_link_by_signature(dest_keyring, type, payload,
+					  trust_keyring);
+}
+
 static bool match_either_id(const struct asymmetric_key_id **pair,
 			    const struct asymmetric_key_id *single)
 {
diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index 8fadd561c50e..462f8a34cdf8 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -78,6 +78,10 @@ extern int restrict_link_by_ca(struct key *dest_keyring,
 			       const struct key_type *type,
 			       const union key_payload *payload,
 			       struct key *trust_keyring);
+int restrict_link_by_digsig(struct key *dest_keyring,
+			    const struct key_type *type,
+			    const union key_payload *payload,
+			    struct key *trust_keyring);
 #else
 static inline int restrict_link_by_ca(struct key *dest_keyring,
 				      const struct key_type *type,
@@ -86,6 +90,14 @@ static inline int restrict_link_by_ca(struct key *dest_keyring,
 {
 	return 0;
 }
+
+static inline int restrict_link_by_digsig(struct key *dest_keyring,
+					  const struct key_type *type,
+					  const union key_payload *payload,
+					  struct key *trust_keyring)
+{
+	return 0;
+}
 #endif
 
 extern int query_asymmetric_key(const struct kernel_pkey_params *,
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
index 91e080efb918..7e2583208820 100644
--- a/include/keys/system_keyring.h
+++ b/include/keys/system_keyring.h
@@ -23,10 +23,15 @@ extern int restrict_link_by_builtin_trusted(struct key *keyring,
 					    const struct key_type *type,
 					    const union key_payload *payload,
 					    struct key *restriction_key);
+int restrict_link_by_digsig_builtin(struct key *dest_keyring,
+				    const struct key_type *type,
+				    const union key_payload *payload,
+				    struct key *restriction_key);
 extern __init int load_module_cert(struct key *keyring);
 
 #else
 #define restrict_link_by_builtin_trusted restrict_link_reject
+#define restrict_link_by_digsig_builtin restrict_link_reject
 
 static inline __init int load_module_cert(struct key *keyring)
 {
@@ -41,8 +46,13 @@ extern int restrict_link_by_builtin_and_secondary_trusted(
 	const struct key_type *type,
 	const union key_payload *payload,
 	struct key *restriction_key);
+int restrict_link_by_digsig_builtin_and_secondary(struct key *keyring,
+						  const struct key_type *type,
+						  const union key_payload *payload,
+						  struct key *restriction_key);
 #else
 #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted
+#define restrict_link_by_digsig_builtin_and_secondary restrict_link_by_digsig_builtin
 #endif
 
 #ifdef CONFIG_INTEGRITY_MACHINE_KEYRING

From 90f6f691a706754e33d2d0c6fa2e1dacedb477f6 Mon Sep 17 00:00:00 2001
From: Eric Snowberg <eric.snowberg@oracle.com>
Date: Mon, 22 May 2023 19:09:43 -0400
Subject: [PATCH 1286/2047] integrity: Enforce digitalSignature usage in the
 ima and evm keyrings

After being vouched for by a system keyring, only allow keys into the .ima
and .evm keyrings that have the digitalSignature usage field set.

Link: https://lore.kernel.org/all/41dffdaeb7eb7840f7e38bc691fbda836635c9f9.camel@linux.ibm.com
Suggested-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Eric Snowberg <eric.snowberg@oracle.com>
Acked-and-tested-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/integrity/digsig.c    | 4 ++--
 security/integrity/evm/Kconfig | 3 ++-
 security/integrity/ima/Kconfig | 3 ++-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 6f31ffe23c48..d0704b1597d4 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -34,9 +34,9 @@ static const char * const keyring_name[INTEGRITY_KEYRING_MAX] = {
 };
 
 #ifdef CONFIG_IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
-#define restrict_link_to_ima restrict_link_by_builtin_and_secondary_trusted
+#define restrict_link_to_ima restrict_link_by_digsig_builtin_and_secondary
 #else
-#define restrict_link_to_ima restrict_link_by_builtin_trusted
+#define restrict_link_to_ima restrict_link_by_digsig_builtin
 #endif
 
 static struct key *integrity_keyring_from_id(const unsigned int id)
diff --git a/security/integrity/evm/Kconfig b/security/integrity/evm/Kconfig
index a6e19d23e700..fba9ee359bc9 100644
--- a/security/integrity/evm/Kconfig
+++ b/security/integrity/evm/Kconfig
@@ -64,7 +64,8 @@ config EVM_LOAD_X509
 
 	   This option enables X509 certificate loading from the kernel
 	   onto the '.evm' trusted keyring.  A public key can be used to
-	   verify EVM integrity starting from the 'init' process.
+	   verify EVM integrity starting from the 'init' process. The
+	   key must have digitalSignature usage set.
 
 config EVM_X509_PATH
 	string "EVM X509 certificate path"
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index 60a511c6b583..684425936c53 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -270,7 +270,8 @@ config IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
 	help
 	  Keys may be added to the IMA or IMA blacklist keyrings, if the
 	  key is validly signed by a CA cert in the system built-in or
-	  secondary trusted keyrings.
+	  secondary trusted keyrings. The key must also have the
+	  digitalSignature usage set.
 
 	  Intermediate keys between those the kernel has compiled in and the
 	  IMA keys to be added may be added to the system secondary keyring,

From a3af7188e360aea343611dd385056eec44e10175 Mon Sep 17 00:00:00 2001
From: Nayna Jain <nayna@linux.ibm.com>
Date: Tue, 15 Aug 2023 07:27:17 -0400
Subject: [PATCH 1287/2047] integrity: PowerVM support for loading CA keys on
 machine keyring

Keys that derive their trust from an entity such as a security officer,
administrator, system owner, or machine owner are said to have "imputed
trust". CA keys with imputed trust can be loaded onto the machine keyring.
The mechanism for loading these keys onto the machine keyring is platform
dependent.

Load keys stored in the variable trustedcadb onto the .machine keyring
on PowerVM platform.

Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
Reviewed-and-tested-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Nageswara R Sastry <rnsastry@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 .../integrity/platform_certs/keyring_handler.c  |  8 ++++++++
 .../integrity/platform_certs/keyring_handler.h  |  5 +++++
 .../integrity/platform_certs/load_powerpc.c     | 17 +++++++++++++++++
 3 files changed, 30 insertions(+)

diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c
index 8a1124e4d769..1649d047e3b8 100644
--- a/security/integrity/platform_certs/keyring_handler.c
+++ b/security/integrity/platform_certs/keyring_handler.c
@@ -69,6 +69,14 @@ __init efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type)
 	return NULL;
 }
 
+__init efi_element_handler_t get_handler_for_ca_keys(const efi_guid_t *sig_type)
+{
+	if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0)
+		return add_to_machine_keyring;
+
+	return NULL;
+}
+
 /*
  * Return the appropriate handler for particular signature list types found in
  * the UEFI dbx and MokListXRT tables.
diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h
index 212d894a8c0c..6f15bb4cc8dc 100644
--- a/security/integrity/platform_certs/keyring_handler.h
+++ b/security/integrity/platform_certs/keyring_handler.h
@@ -29,6 +29,11 @@ efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type);
  */
 efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type);
 
+/*
+ * Return the handler for particular signature list types for CA keys.
+ */
+efi_element_handler_t get_handler_for_ca_keys(const efi_guid_t *sig_type);
+
 /*
  * Return the handler for particular signature list types found in the dbx.
  */
diff --git a/security/integrity/platform_certs/load_powerpc.c b/security/integrity/platform_certs/load_powerpc.c
index 170789dc63d2..339053d9726d 100644
--- a/security/integrity/platform_certs/load_powerpc.c
+++ b/security/integrity/platform_certs/load_powerpc.c
@@ -59,6 +59,7 @@ static __init void *get_cert_list(u8 *key, unsigned long keylen, u64 *size)
 static int __init load_powerpc_certs(void)
 {
 	void *db = NULL, *dbx = NULL, *data = NULL;
+	void *trustedca;
 	u64 dsize = 0;
 	u64 offset = 0;
 	int rc = 0;
@@ -120,6 +121,22 @@ static int __init load_powerpc_certs(void)
 		kfree(data);
 	}
 
+	data = get_cert_list("trustedcadb", 12,  &dsize);
+	if (!data) {
+		pr_info("Couldn't get trustedcadb list from firmware\n");
+	} else if (IS_ERR(data)) {
+		rc = PTR_ERR(data);
+		pr_err("Error reading trustedcadb from firmware: %d\n", rc);
+	} else {
+		extract_esl(trustedca, data, dsize, offset);
+
+		rc = parse_efi_signature_list("powerpc:trustedca", trustedca, dsize,
+					      get_handler_for_ca_keys);
+		if (rc)
+			pr_err("Couldn't parse trustedcadb signatures: %d\n", rc);
+		kfree(data);
+	}
+
 	return rc;
 }
 late_initcall(load_powerpc_certs);

From bc02667698b0e828f566e031d5020298b8d0c5de Mon Sep 17 00:00:00 2001
From: Nayna Jain <nayna@linux.ibm.com>
Date: Tue, 15 Aug 2023 07:27:18 -0400
Subject: [PATCH 1288/2047] integrity: ignore keys failing CA restrictions on
 non-UEFI platform

On non-UEFI platforms, handle restrict_link_by_ca failures differently.

Certificates which do not satisfy CA restrictions on non-UEFI platforms
are ignored.

Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
Reviewed-and-tested-by: Mimi Zohar <zohar@linux.ibm.com>
Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Nageswara R Sastry <rnsastry@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/integrity/platform_certs/machine_keyring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/integrity/platform_certs/machine_keyring.c b/security/integrity/platform_certs/machine_keyring.c
index 7aaed7950b6e..389a6e7c9245 100644
--- a/security/integrity/platform_certs/machine_keyring.c
+++ b/security/integrity/platform_certs/machine_keyring.c
@@ -36,7 +36,7 @@ void __init add_to_machine_keyring(const char *source, const void *data, size_t
 	 * If the restriction check does not pass and the platform keyring
 	 * is configured, try to add it into that keyring instead.
 	 */
-	if (rc && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
+	if (rc && efi_enabled(EFI_BOOT) && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
 		rc = integrity_load_cert(INTEGRITY_KEYRING_PLATFORM, source,
 					 data, len, perm);
 

From 7b9de406582d12b72cce72c056b5678e8c0627eb Mon Sep 17 00:00:00 2001
From: Nayna Jain <nayna@linux.ibm.com>
Date: Tue, 15 Aug 2023 07:27:19 -0400
Subject: [PATCH 1289/2047] integrity: remove global variable from
 machine_keyring.c

trust_mok variable is accessed within a single function locally.

Change trust_mok from global to local static variable.

Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
Reviewed-and-tested-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Nageswara R Sastry <rnsastry@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/integrity/platform_certs/machine_keyring.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/integrity/platform_certs/machine_keyring.c b/security/integrity/platform_certs/machine_keyring.c
index 389a6e7c9245..9482e16cb2ca 100644
--- a/security/integrity/platform_certs/machine_keyring.c
+++ b/security/integrity/platform_certs/machine_keyring.c
@@ -8,8 +8,6 @@
 #include <linux/efi.h>
 #include "../integrity.h"
 
-static bool trust_mok;
-
 static __init int machine_keyring_init(void)
 {
 	int rc;
@@ -65,9 +63,11 @@ static __init bool uefi_check_trust_mok_keys(void)
 bool __init trust_moklist(void)
 {
 	static bool initialized;
+	static bool trust_mok;
 
 	if (!initialized) {
 		initialized = true;
+		trust_mok = false;
 
 		if (uefi_check_trust_mok_keys())
 			trust_mok = true;

From 4cb1ed94f18047d0863f976bc95aa7c0584cc51c Mon Sep 17 00:00:00 2001
From: Nayna Jain <nayna@linux.ibm.com>
Date: Tue, 15 Aug 2023 07:27:20 -0400
Subject: [PATCH 1290/2047] integrity: check whether imputed trust is enabled

trust_moklist() is specific to UEFI enabled systems. Other platforms
rely only on the Kconfig.

Define a generic wrapper named imputed_trust_enabled().

Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Tested-by: Nageswara R Sastry <rnsastry@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/integrity/digsig.c                    |  2 +-
 security/integrity/integrity.h                 |  5 +++--
 .../integrity/platform_certs/keyring_handler.c |  3 ++-
 .../integrity/platform_certs/machine_keyring.c | 18 ++++++++++++++++--
 4 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index d0704b1597d4..df387de29bfa 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -113,7 +113,7 @@ static int __init __integrity_init_keyring(const unsigned int id,
 	} else {
 		if (id == INTEGRITY_KEYRING_PLATFORM)
 			set_platform_trusted_keys(keyring[id]);
-		if (id == INTEGRITY_KEYRING_MACHINE && trust_moklist())
+		if (id == INTEGRITY_KEYRING_MACHINE && imputed_trust_enabled())
 			set_machine_trusted_keys(keyring[id]);
 		if (id == INTEGRITY_KEYRING_IMA)
 			load_module_cert(keyring[id]);
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 7167a6e99bdc..d7553c93f5c0 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -320,13 +320,14 @@ static inline void __init add_to_platform_keyring(const char *source,
 
 #ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
 void __init add_to_machine_keyring(const char *source, const void *data, size_t len);
-bool __init trust_moklist(void);
+bool __init imputed_trust_enabled(void);
 #else
 static inline void __init add_to_machine_keyring(const char *source,
 						  const void *data, size_t len)
 {
 }
-static inline bool __init trust_moklist(void)
+
+static inline bool __init imputed_trust_enabled(void)
 {
 	return false;
 }
diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c
index 1649d047e3b8..586027b9a3f5 100644
--- a/security/integrity/platform_certs/keyring_handler.c
+++ b/security/integrity/platform_certs/keyring_handler.c
@@ -61,7 +61,8 @@ __init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type)
 __init efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type)
 {
 	if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) {
-		if (IS_ENABLED(CONFIG_INTEGRITY_MACHINE_KEYRING) && trust_moklist())
+		if (IS_ENABLED(CONFIG_INTEGRITY_MACHINE_KEYRING) &&
+		    imputed_trust_enabled())
 			return add_to_machine_keyring;
 		else
 			return add_to_platform_keyring;
diff --git a/security/integrity/platform_certs/machine_keyring.c b/security/integrity/platform_certs/machine_keyring.c
index 9482e16cb2ca..a401640a63cd 100644
--- a/security/integrity/platform_certs/machine_keyring.c
+++ b/security/integrity/platform_certs/machine_keyring.c
@@ -34,7 +34,8 @@ void __init add_to_machine_keyring(const char *source, const void *data, size_t
 	 * If the restriction check does not pass and the platform keyring
 	 * is configured, try to add it into that keyring instead.
 	 */
-	if (rc && efi_enabled(EFI_BOOT) && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
+	if (rc && efi_enabled(EFI_BOOT) &&
+	    IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
 		rc = integrity_load_cert(INTEGRITY_KEYRING_PLATFORM, source,
 					 data, len, perm);
 
@@ -60,7 +61,7 @@ static __init bool uefi_check_trust_mok_keys(void)
 	return false;
 }
 
-bool __init trust_moklist(void)
+static bool __init trust_moklist(void)
 {
 	static bool initialized;
 	static bool trust_mok;
@@ -75,3 +76,16 @@ bool __init trust_moklist(void)
 
 	return trust_mok;
 }
+
+/*
+ * Provides platform specific check for trusting imputed keys before loading
+ * on .machine keyring. UEFI systems enable this trust based on a variable,
+ * and for other platforms, it is always enabled.
+ */
+bool __init imputed_trust_enabled(void)
+{
+	if (efi_enabled(EFI_BOOT))
+		return trust_moklist();
+
+	return true;
+}

From d7d91c4743c4ef0f60b7556d2794b6dd27cda373 Mon Sep 17 00:00:00 2001
From: Nayna Jain <nayna@linux.ibm.com>
Date: Tue, 15 Aug 2023 07:27:21 -0400
Subject: [PATCH 1291/2047] integrity: PowerVM machine keyring enablement

Update Kconfig to enable machine keyring and limit to CA certificates
on PowerVM. Only key signing CA keys are allowed.

Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
Reviewed-and-tested-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Nageswara R Sastry <rnsastry@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/integrity/Kconfig | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig
index ec6e0d789da1..232191ee09e3 100644
--- a/security/integrity/Kconfig
+++ b/security/integrity/Kconfig
@@ -67,7 +67,9 @@ config INTEGRITY_MACHINE_KEYRING
 	depends on SECONDARY_TRUSTED_KEYRING
 	depends on INTEGRITY_ASYMMETRIC_KEYS
 	depends on SYSTEM_BLACKLIST_KEYRING
-	depends on LOAD_UEFI_KEYS
+	depends on LOAD_UEFI_KEYS || LOAD_PPC_KEYS
+	select INTEGRITY_CA_MACHINE_KEYRING if LOAD_PPC_KEYS
+	select INTEGRITY_CA_MACHINE_KEYRING_MAX if LOAD_PPC_KEYS
 	help
 	 If set, provide a keyring to which Machine Owner Keys (MOK) may
 	 be added. This keyring shall contain just MOK keys.  Unlike keys

From 44e69ea53892f18e8753943a4376de20b076c3fe Mon Sep 17 00:00:00 2001
From: Nayna Jain <nayna@linux.ibm.com>
Date: Tue, 15 Aug 2023 07:27:22 -0400
Subject: [PATCH 1292/2047] integrity: PowerVM support for loading third party
 code signing keys

On secure boot enabled PowerVM LPAR, third party code signing keys are
needed during early boot to verify signed third party modules. These
third party keys are stored in moduledb object in the Platform
KeyStore (PKS).

Load third party code signing keys onto .secondary_trusted_keys keyring.

Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
Reviewed-and-tested-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Nageswara R Sastry <rnsastry@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 certs/system_keyring.c                        | 30 +++++++++++++++++++
 include/keys/system_keyring.h                 |  4 +++
 .../platform_certs/keyring_handler.c          |  8 +++++
 .../platform_certs/keyring_handler.h          |  5 ++++
 .../integrity/platform_certs/load_powerpc.c   | 17 +++++++++++
 5 files changed, 64 insertions(+)

diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index b348e0898d34..33841c91f12c 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -152,6 +152,36 @@ static __init struct key_restriction *get_builtin_and_secondary_restriction(void
 
 	return restriction;
 }
+
+/**
+ * add_to_secondary_keyring - Add to secondary keyring.
+ * @source: Source of key
+ * @data: The blob holding the key
+ * @len: The length of the data blob
+ *
+ * Add a key to the secondary keyring. The key must be vouched for by a key in the builtin,
+ * machine or secondary keyring itself.
+ */
+void __init add_to_secondary_keyring(const char *source, const void *data, size_t len)
+{
+	key_ref_t key;
+	key_perm_t perm;
+
+	perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW;
+
+	key = key_create_or_update(make_key_ref(secondary_trusted_keys, 1),
+				   "asymmetric",
+				   NULL, data, len, perm,
+				   KEY_ALLOC_NOT_IN_QUOTA);
+	if (IS_ERR(key)) {
+		pr_err("Problem loading X.509 certificate from %s to secondary keyring %ld\n",
+		       source, PTR_ERR(key));
+		return;
+	}
+
+	pr_notice("Loaded X.509 cert '%s'\n", key_ref_to_ptr(key)->description);
+	key_ref_put(key);
+}
 #endif
 #ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
 void __init set_machine_trusted_keys(struct key *keyring)
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
index 7e2583208820..8365adf842ef 100644
--- a/include/keys/system_keyring.h
+++ b/include/keys/system_keyring.h
@@ -50,9 +50,13 @@ int restrict_link_by_digsig_builtin_and_secondary(struct key *keyring,
 						  const struct key_type *type,
 						  const union key_payload *payload,
 						  struct key *restriction_key);
+void __init add_to_secondary_keyring(const char *source, const void *data, size_t len);
 #else
 #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted
 #define restrict_link_by_digsig_builtin_and_secondary restrict_link_by_digsig_builtin
+static inline void __init add_to_secondary_keyring(const char *source, const void *data, size_t len)
+{
+}
 #endif
 
 #ifdef CONFIG_INTEGRITY_MACHINE_KEYRING
diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c
index 586027b9a3f5..13ea17207902 100644
--- a/security/integrity/platform_certs/keyring_handler.c
+++ b/security/integrity/platform_certs/keyring_handler.c
@@ -78,6 +78,14 @@ __init efi_element_handler_t get_handler_for_ca_keys(const efi_guid_t *sig_type)
 	return NULL;
 }
 
+__init efi_element_handler_t get_handler_for_code_signing_keys(const efi_guid_t *sig_type)
+{
+	if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0)
+		return add_to_secondary_keyring;
+
+	return NULL;
+}
+
 /*
  * Return the appropriate handler for particular signature list types found in
  * the UEFI dbx and MokListXRT tables.
diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h
index 6f15bb4cc8dc..f92895cc50f6 100644
--- a/security/integrity/platform_certs/keyring_handler.h
+++ b/security/integrity/platform_certs/keyring_handler.h
@@ -34,6 +34,11 @@ efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type);
  */
 efi_element_handler_t get_handler_for_ca_keys(const efi_guid_t *sig_type);
 
+/*
+ * Return the handler for particular signature list types for code signing keys.
+ */
+efi_element_handler_t get_handler_for_code_signing_keys(const efi_guid_t *sig_type);
+
 /*
  * Return the handler for particular signature list types found in the dbx.
  */
diff --git a/security/integrity/platform_certs/load_powerpc.c b/security/integrity/platform_certs/load_powerpc.c
index 339053d9726d..c85febca3343 100644
--- a/security/integrity/platform_certs/load_powerpc.c
+++ b/security/integrity/platform_certs/load_powerpc.c
@@ -60,6 +60,7 @@ static int __init load_powerpc_certs(void)
 {
 	void *db = NULL, *dbx = NULL, *data = NULL;
 	void *trustedca;
+	void *moduledb;
 	u64 dsize = 0;
 	u64 offset = 0;
 	int rc = 0;
@@ -137,6 +138,22 @@ static int __init load_powerpc_certs(void)
 		kfree(data);
 	}
 
+	data = get_cert_list("moduledb", 9,  &dsize);
+	if (!data) {
+		pr_info("Couldn't get moduledb list from firmware\n");
+	} else if (IS_ERR(data)) {
+		rc = PTR_ERR(data);
+		pr_err("Error reading moduledb from firmware: %d\n", rc);
+	} else {
+		extract_esl(moduledb, data, dsize, offset);
+
+		rc = parse_efi_signature_list("powerpc:moduledb", moduledb, dsize,
+					      get_handler_for_code_signing_keys);
+		if (rc)
+			pr_err("Couldn't parse moduledb signatures: %d\n", rc);
+		kfree(data);
+	}
+
 	return rc;
 }
 late_initcall(load_powerpc_certs);

From 604b8e7558857814bf7c109bc651697129543383 Mon Sep 17 00:00:00 2001
From: Azeem Shaikh <azeemshaikh38@gmail.com>
Date: Thu, 18 May 2023 04:15:13 +0000
Subject: [PATCH 1293/2047] KEYS: Replace all non-returning strlcpy with
 strscpy

strlcpy() reads the entire source buffer first.
This read may exceed the destination size limit.
This is both inefficient and can lead to linear read
overflows if a source string is not NUL-terminated [1].
In an effort to remove strlcpy() completely [2], replace
strlcpy() here with strscpy().
No return values were used, so direct replacement is safe.

[1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy
[2] https://github.com/KSPP/linux/issues/89

Signed-off-by: Azeem Shaikh <azeemshaikh38@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/keys/request_key_auth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 41e9735006d0..8f33cd170e42 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -178,7 +178,7 @@ struct key *request_key_auth_new(struct key *target, const char *op,
 	if (!rka->callout_info)
 		goto error_free_rka;
 	rka->callout_len = callout_len;
-	strlcpy(rka->op, op, sizeof(rka->op));
+	strscpy(rka->op, op, sizeof(rka->op));
 
 	/* see if the calling process is already servicing the key request of
 	 * another process */

From a86a42ac2bd652fdc7836a9d880c306a2485c142 Mon Sep 17 00:00:00 2001
From: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Date: Fri, 21 Apr 2023 14:43:08 +0530
Subject: [PATCH 1294/2047] tpm_tis_spi: Add hardware wait polling

TPM devices may insert wait state on last clock cycle of ADDR phase.
For SPI controllers that support full-duplex transfers, this can be
detected using software by reading the MISO line. For SPI controllers
that only support half-duplex transfers, such as the Tegra QSPI, it is
not possible to detect the wait signal from software. The QSPI
controller in Tegra234 and Tegra241 implement hardware detection of the
wait signal which can be enabled in the controller for TPM devices.

The current TPM TIS driver only supports software detection of the wait
signal. To support SPI controllers that use hardware to detect the wait
signal, add the function tpm_tis_spi_transfer_half() and move the
existing code for software based detection into a function called
tpm_tis_spi_transfer_full(). SPI controllers that only support
half-duplex transfers will always call tpm_tis_spi_transfer_half()
because they cannot support software based detection. The bit
SPI_TPM_HW_FLOW is set to indicate to the SPI controller that hardware
detection is required and it is the responsibility of the SPI controller
driver to determine if this is supported or not.

For hardware flow control, CMD-ADDR-DATA messages are combined into a
single message where as for software flow control exiting method of
CMD-ADDR in a message and DATA in another is followed.

[jarkko: Fixed the function names to match the code change, and the tag
in the short summary.]
Signed-off-by: Krishna Yarlagadda <kyarlagadda@nvidia.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_spi_main.c | 91 ++++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 2 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c
index 9bfaba092a06..a62f5c7f38d3 100644
--- a/drivers/char/tpm/tpm_tis_spi_main.c
+++ b/drivers/char/tpm/tpm_tis_spi_main.c
@@ -71,8 +71,74 @@ static int tpm_tis_spi_flow_control(struct tpm_tis_spi_phy *phy,
 	return 0;
 }
 
-int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
-			 u8 *in, const u8 *out)
+/*
+ * Half duplex controller with support for TPM wait state detection like
+ * Tegra QSPI need CMD, ADDR & DATA sent in single message to manage HW flow
+ * control. Each phase sent in different transfer for controller to idenity
+ * phase.
+ */
+static int tpm_tis_spi_transfer_half(struct tpm_tis_data *data,	u32 addr,
+				     u16 len, u8 *in, const u8 *out)
+{
+	struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data);
+	struct spi_transfer spi_xfer[3];
+	struct spi_message m;
+	u8 transfer_len;
+	int ret;
+
+	while (len) {
+		transfer_len = min_t(u16, len, MAX_SPI_FRAMESIZE);
+
+		spi_message_init(&m);
+		phy->iobuf[0] = (in ? 0x80 : 0) | (transfer_len - 1);
+		phy->iobuf[1] = 0xd4;
+		phy->iobuf[2] = addr >> 8;
+		phy->iobuf[3] = addr;
+
+		memset(&spi_xfer, 0, sizeof(spi_xfer));
+
+		spi_xfer[0].tx_buf = phy->iobuf;
+		spi_xfer[0].len = 1;
+		spi_message_add_tail(&spi_xfer[0], &m);
+
+		spi_xfer[1].tx_buf = phy->iobuf + 1;
+		spi_xfer[1].len = 3;
+		spi_message_add_tail(&spi_xfer[1], &m);
+
+		if (out) {
+			spi_xfer[2].tx_buf = &phy->iobuf[4];
+			spi_xfer[2].rx_buf = NULL;
+			memcpy(&phy->iobuf[4], out, transfer_len);
+			out += transfer_len;
+		}
+
+		if (in) {
+			spi_xfer[2].tx_buf = NULL;
+			spi_xfer[2].rx_buf = &phy->iobuf[4];
+		}
+
+		spi_xfer[2].len = transfer_len;
+		spi_message_add_tail(&spi_xfer[2], &m);
+
+		reinit_completion(&phy->ready);
+
+		ret = spi_sync(phy->spi_device, &m);
+		if (ret < 0)
+			return ret;
+
+		if (in) {
+			memcpy(in, &phy->iobuf[4], transfer_len);
+			in += transfer_len;
+		}
+
+		len -= transfer_len;
+	}
+
+	return ret;
+}
+
+static int tpm_tis_spi_transfer_full(struct tpm_tis_data *data, u32 addr,
+				     u16 len, u8 *in, const u8 *out)
 {
 	struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data);
 	int ret = 0;
@@ -148,6 +214,24 @@ exit:
 	return ret;
 }
 
+int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
+			 u8 *in, const u8 *out)
+{
+	struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data);
+	struct spi_controller *ctlr = phy->spi_device->controller;
+
+	/*
+	 * TPM flow control over SPI requires full duplex support.
+	 * Send entire message to a half duplex controller to handle
+	 * wait polling in controller.
+	 * Set TPM HW flow control flag..
+	 */
+	if (ctlr->flags & SPI_CONTROLLER_HALF_DUPLEX)
+		return tpm_tis_spi_transfer_half(data, addr, len, in, out);
+	else
+		return tpm_tis_spi_transfer_full(data, addr, len, in, out);
+}
+
 static int tpm_tis_spi_read_bytes(struct tpm_tis_data *data, u32 addr,
 				  u16 len, u8 *result, enum tpm_tis_io_mode io_mode)
 {
@@ -189,6 +273,9 @@ static int tpm_tis_spi_probe(struct spi_device *dev)
 
 	phy->flow_control = tpm_tis_spi_flow_control;
 
+	if (dev->controller->flags & SPI_CONTROLLER_HALF_DUPLEX)
+		dev->mode |= SPI_TPM_HW_FLOW;
+
 	/* If the SPI device has an IRQ then use that */
 	if (dev->irq > 0)
 		irq = dev->irq;

From 32a0c860ff48543299772f7a98f32dd3ee9281b7 Mon Sep 17 00:00:00 2001
From: Alexander Steffen <Alexander.Steffen@infineon.com>
Date: Tue, 13 Jun 2023 20:02:57 +0200
Subject: [PATCH 1295/2047] tpm_tis: Move CRC check to generic send routine

The CRC functionality is initialized before tpm_tis_core, so it can be used
on all code paths within the module. Therefore, move the CRC check to the
generic send routine, that also contains all other checks for successful
command transmission, so that all those checks are in one place.

Also, this ensures that tpm_tis_ready is called when a CRC failure is
detected, to clear the invalid data from the TPM, which did not happen
previously.

Signed-off-by: Alexander Steffen <Alexander.Steffen@infineon.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_core.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index b95963095729..469e4cd7fb4a 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -469,6 +469,12 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
 		goto out_err;
 	}
 
+	rc = tpm_tis_verify_crc(priv, len, buf);
+	if (rc < 0) {
+		dev_err(&chip->dev, "CRC mismatch for command.\n");
+		goto out_err;
+	}
+
 	return 0;
 
 out_err:
@@ -517,12 +523,6 @@ static int tpm_tis_send_main(struct tpm_chip *chip, const u8 *buf, size_t len)
 	if (rc < 0)
 		return rc;
 
-	rc = tpm_tis_verify_crc(priv, len, buf);
-	if (rc < 0) {
-		dev_err(&chip->dev, "CRC mismatch for command.\n");
-		return rc;
-	}
-
 	/* go and do it */
 	rc = tpm_tis_write8(priv, TPM_STS(priv->locality), TPM_STS_GO);
 	if (rc < 0)

From b400f9d33fc21726bef465c49e71cd7364d0b8ef Mon Sep 17 00:00:00 2001
From: Alexander Steffen <Alexander.Steffen@infineon.com>
Date: Tue, 13 Jun 2023 20:02:58 +0200
Subject: [PATCH 1296/2047] tpm_tis: Use responseRetry to recover from data
 transfer errors

TPM responses may become damaged during transmission, for example due to
bit flips on the wire. Instead of aborting when detecting such issues, the
responseRetry functionality can be used to make the TPM retransmit its
response and receive it again without errors.

Signed-off-by: Alexander Steffen <Alexander.Steffen@infineon.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_core.c | 37 ++++++++++++++++++++++++++-------
 drivers/char/tpm/tpm_tis_core.h |  1 +
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 469e4cd7fb4a..52e2dc7a9f81 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -340,7 +340,7 @@ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count)
 	return size;
 }
 
-static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+static int tpm_tis_try_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 {
 	struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
 	int size = 0;
@@ -348,11 +348,6 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	u32 expected;
 	int rc;
 
-	if (count < TPM_HEADER_SIZE) {
-		size = -EIO;
-		goto out;
-	}
-
 	size = recv_data(chip, buf, TPM_HEADER_SIZE);
 	/* read first 10 bytes, including tag, paramsize, and result */
 	if (size < TPM_HEADER_SIZE) {
@@ -385,7 +380,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 		goto out;
 	}
 	status = tpm_tis_status(chip);
-	if (status & TPM_STS_DATA_AVAIL) {	/* retry? */
+	if (status & TPM_STS_DATA_AVAIL) {
 		dev_err(&chip->dev, "Error left over data\n");
 		size = -EIO;
 		goto out;
@@ -399,10 +394,36 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 	}
 
 out:
-	tpm_tis_ready(chip);
 	return size;
 }
 
+static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+	struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+	unsigned int try;
+	int rc = 0;
+
+	if (count < TPM_HEADER_SIZE)
+		return -EIO;
+
+	for (try = 0; try < TPM_RETRY; try++) {
+		rc = tpm_tis_try_recv(chip, buf, count);
+
+		if (rc == -EIO)
+			/* Data transfer errors, indicated by EIO, can be
+			 * recovered by rereading the response.
+			 */
+			tpm_tis_write8(priv, TPM_STS(priv->locality),
+				       TPM_STS_RESPONSE_RETRY);
+		else
+			break;
+	}
+
+	tpm_tis_ready(chip);
+
+	return rc;
+}
+
 /*
  * If interrupts are used (signaled by an irq set in the vendor structure)
  * tpm.c can skip polling for the data to be available as the interrupt is
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index b1a169d7d1ca..13e99cf65efe 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -34,6 +34,7 @@ enum tis_status {
 	TPM_STS_GO = 0x20,
 	TPM_STS_DATA_AVAIL = 0x10,
 	TPM_STS_DATA_EXPECT = 0x08,
+	TPM_STS_RESPONSE_RETRY = 0x02,
 	TPM_STS_READ_ZERO = 0x23, /* bits that must be zero on read */
 };
 

From 280db21e153d8810ce3b93640c63ae922bcb9e8e Mon Sep 17 00:00:00 2001
From: Alexander Steffen <Alexander.Steffen@infineon.com>
Date: Tue, 13 Jun 2023 20:02:59 +0200
Subject: [PATCH 1297/2047] tpm_tis: Resend command to recover from data
 transfer errors

Similar to the transmission of TPM responses, also the transmission of TPM
commands may become corrupted. Instead of aborting when detecting such
issues, try resending the command again.

Signed-off-by: Alexander Steffen <Alexander.Steffen@infineon.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_core.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 52e2dc7a9f81..1b350412d8a6 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -539,10 +539,17 @@ static int tpm_tis_send_main(struct tpm_chip *chip, const u8 *buf, size_t len)
 	int rc;
 	u32 ordinal;
 	unsigned long dur;
+	unsigned int try;
 
-	rc = tpm_tis_send_data(chip, buf, len);
-	if (rc < 0)
-		return rc;
+	for (try = 0; try < TPM_RETRY; try++) {
+		rc = tpm_tis_send_data(chip, buf, len);
+		if (rc >= 0)
+			/* Data transfer done successfully */
+			break;
+		else if (rc != -EIO)
+			/* Data transfer failed, not recoverable */
+			return rc;
+	}
 
 	/* go and do it */
 	rc = tpm_tis_write8(priv, TPM_STS(priv->locality), TPM_STS_GO);

From df333d058a60a8dc282009a483124983669b4203 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 16 Jun 2023 18:07:30 +0100
Subject: [PATCH 1298/2047] tpm: remove redundant variable len

Variable len is being accumulated but the value is never read. It is
redundant and can be removed. Cleans up clang scan build warning:

drivers/char/tpm/eventlog/tpm1.c:276:2: warning: Value stored to 'len' is never read [deadcode.DeadStores]

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/eventlog/tpm1.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/tpm/eventlog/tpm1.c b/drivers/char/tpm/eventlog/tpm1.c
index 8aa9057601d6..12ee42a31c71 100644
--- a/drivers/char/tpm/eventlog/tpm1.c
+++ b/drivers/char/tpm/eventlog/tpm1.c
@@ -251,7 +251,6 @@ static int tpm1_binary_bios_measurements_show(struct seq_file *m, void *v)
 
 static int tpm1_ascii_bios_measurements_show(struct seq_file *m, void *v)
 {
-	int len = 0;
 	char *eventname;
 	struct tcpa_event *event = v;
 	unsigned char *event_entry =
@@ -273,7 +272,7 @@ static int tpm1_ascii_bios_measurements_show(struct seq_file *m, void *v)
 	/* 3rd: event type identifier */
 	seq_printf(m, " %02x", do_endian_conversion(event->event_type));
 
-	len += get_event_name(eventname, event, event_entry);
+	get_event_name(eventname, event, event_entry);
 
 	/* 4th: eventname <= max + \'0' delimiter */
 	seq_printf(m, " %s\n", eventname);

From 2ccf8c76b4fb38deb25af3830aa9489b6a374621 Mon Sep 17 00:00:00 2001
From: Li Zetao <lizetao1@huawei.com>
Date: Tue, 15 Aug 2023 16:06:37 +0800
Subject: [PATCH 1299/2047] tpm/tpm_tis_synquacer: Use module_platform_driver
 macro to simplify the code

Use the module_platform_driver macro to simplify the code, which is the
same as declaring with module_init() and module_exit().

Signed-off-by: Li Zetao <lizetao1@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_synquacer.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_synquacer.c b/drivers/char/tpm/tpm_tis_synquacer.c
index 49278746b0e2..f7d5e76b505e 100644
--- a/drivers/char/tpm/tpm_tis_synquacer.c
+++ b/drivers/char/tpm/tpm_tis_synquacer.c
@@ -162,23 +162,7 @@ static struct platform_driver tis_synquacer_drv = {
 	},
 };
 
-static int __init tpm_tis_synquacer_module_init(void)
-{
-	int rc;
+module_platform_driver(tis_synquacer_drv);
 
-	rc = platform_driver_register(&tis_synquacer_drv);
-	if (rc)
-		return rc;
-
-	return 0;
-}
-
-static void __exit tpm_tis_synquacer_module_exit(void)
-{
-	platform_driver_unregister(&tis_synquacer_drv);
-}
-
-module_init(tpm_tis_synquacer_module_init);
-module_exit(tpm_tis_synquacer_module_exit);
 MODULE_DESCRIPTION("TPM MMIO Driver for Socionext SynQuacer platform");
 MODULE_LICENSE("GPL");

From 218a2680624cba1611e3dfc7d9b646d240e5f855 Mon Sep 17 00:00:00 2001
From: Eric Snowberg <eric.snowberg@oracle.com>
Date: Thu, 17 Aug 2023 10:04:07 -0400
Subject: [PATCH 1300/2047] certs: Reference revocation list for all keyrings

Systems booted with shim have a Forbidden Signature Database called mokx.
During boot, hashes and certs contained within the mokx are loaded into the
blacklist keyring.  When calling verify_pkcs7_message_sig the contents of
the blacklist keyring (or revocation list) are referenced when validating
keys on the platform keyring. Currently, when validating against the
secondary or builtin keyrings, the revocation list is not referenced.  Move
up the check to allow the revocation list to be used with all keyrings,
including the secondary and builtin, allowing the system owner to take
corrective action should a vulnerability be found within keys contained
within either keyring.

Signed-off-by: Eric Snowberg <eric.snowberg@oracle.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 certs/system_keyring.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 33841c91f12c..9de610bf1f4b 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -330,6 +330,12 @@ int verify_pkcs7_message_sig(const void *data, size_t len,
 	if (ret < 0)
 		goto error;
 
+	ret = is_key_on_revocation_list(pkcs7);
+	if (ret != -ENOKEY) {
+		pr_devel("PKCS#7 key is on revocation list\n");
+		goto error;
+	}
+
 	if (!trusted_keys) {
 		trusted_keys = builtin_trusted_keys;
 	} else if (trusted_keys == VERIFY_USE_SECONDARY_KEYRING) {
@@ -349,12 +355,6 @@ int verify_pkcs7_message_sig(const void *data, size_t len,
 			pr_devel("PKCS#7 platform keyring is not available\n");
 			goto error;
 		}
-
-		ret = is_key_on_revocation_list(pkcs7);
-		if (ret != -ENOKEY) {
-			pr_devel("PKCS#7 platform key is on revocation list\n");
-			goto error;
-		}
 	}
 	ret = pkcs7_validate_trust(pkcs7, trusted_keys);
 	if (ret < 0) {

From 104fd0b5e948157f8e8ac88a20b46ba8641d4e95 Mon Sep 17 00:00:00 2001
From: Yuxiao Zhang <yuxiaozhang@google.com>
Date: Tue, 27 Jun 2023 13:25:41 -0700
Subject: [PATCH 1301/2047] pstore: Support record sizes larger than kmalloc()
 limit

Currently pstore record buffers are allocated using kmalloc() which has
a maximum size based on page size. If a large "pmsg-size" module
parameter is specified, pmsg will fail to copy the contents since
memdup_user() is limited to kmalloc() allocation sizes.

Since we don't need physically contiguous memory for any of the pstore
record buffers, use kvzalloc() to avoid such limitations in the core of
pstore and in the ram backend, and explicitly read from userspace using
vmemdup_user(). This also means that any other backends that want to
(or do already) support larger record sizes will Just Work now.

Signed-off-by: Yuxiao Zhang <yuxiaozhang@google.com>
Link: https://lore.kernel.org/r/20230627202540.881909-2-yuxiaozhang@google.com
Co-developed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/inode.c    |  2 +-
 fs/pstore/platform.c | 27 ++++++++++++++-------------
 fs/pstore/ram.c      | 11 ++++++-----
 fs/pstore/ram_core.c |  5 +++--
 4 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ffbadb8b3032..df7fb2ad4599 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -54,7 +54,7 @@ static void free_pstore_private(struct pstore_private *private)
 	if (!private)
 		return;
 	if (private->record) {
-		kfree(private->record->buf);
+		kvfree(private->record->buf);
 		kfree(private->record->priv);
 		kfree(private->record);
 	}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index aaa5e4e66db4..62356d542ef6 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/kmsg_dump.h>
 #include <linux/console.h>
+#include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/pstore.h>
 #include <linux/string.h>
@@ -215,7 +216,7 @@ static void allocate_buf_for_compression(void)
 	 * uncompressed record size, since any record that would be expanded by
 	 * compression is just stored uncompressed.
 	 */
-	buf = kmalloc(psinfo->bufsize, GFP_KERNEL);
+	buf = kvzalloc(psinfo->bufsize, GFP_KERNEL);
 	if (!buf) {
 		pr_err("Failed %zu byte compression buffer allocation for: %s\n",
 		       psinfo->bufsize, compress);
@@ -226,7 +227,7 @@ static void allocate_buf_for_compression(void)
 		vmalloc(zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL));
 	if (!compress_workspace) {
 		pr_err("Failed to allocate zlib deflate workspace\n");
-		kfree(buf);
+		kvfree(buf);
 		return;
 	}
 
@@ -243,7 +244,7 @@ static void free_buf_for_compression(void)
 		compress_workspace = NULL;
 	}
 
-	kfree(big_oops_buf);
+	kvfree(big_oops_buf);
 	big_oops_buf = NULL;
 }
 
@@ -421,7 +422,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
 	if (record->buf)
 		return -EINVAL;
 
-	record->buf = memdup_user(buf, record->size);
+	record->buf = vmemdup_user(buf, record->size);
 	if (IS_ERR(record->buf)) {
 		ret = PTR_ERR(record->buf);
 		goto out;
@@ -429,7 +430,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
 
 	ret = record->psi->write(record);
 
-	kfree(record->buf);
+	kvfree(record->buf);
 out:
 	record->buf = NULL;
 
@@ -582,8 +583,8 @@ static void decompress_record(struct pstore_record *record,
 	}
 
 	/* Allocate enough space to hold max decompression and ECC. */
-	workspace = kmalloc(psinfo->bufsize + record->ecc_notice_size,
-			    GFP_KERNEL);
+	workspace = kvzalloc(psinfo->bufsize + record->ecc_notice_size,
+			     GFP_KERNEL);
 	if (!workspace)
 		return;
 
@@ -595,7 +596,7 @@ static void decompress_record(struct pstore_record *record,
 	ret = zlib_inflate(zstream, Z_FINISH);
 	if (ret != Z_STREAM_END) {
 		pr_err("zlib_inflate() failed, ret = %d!\n", ret);
-		kfree(workspace);
+		kvfree(workspace);
 		return;
 	}
 
@@ -606,14 +607,14 @@ static void decompress_record(struct pstore_record *record,
 	       record->ecc_notice_size);
 
 	/* Copy decompressed contents into an minimum-sized allocation. */
-	unzipped = kmemdup(workspace, unzipped_len + record->ecc_notice_size,
-			   GFP_KERNEL);
-	kfree(workspace);
+	unzipped = kvmemdup(workspace, unzipped_len + record->ecc_notice_size,
+			    GFP_KERNEL);
+	kvfree(workspace);
 	if (!unzipped)
 		return;
 
 	/* Swap out compressed contents with decompressed contents. */
-	kfree(record->buf);
+	kvfree(record->buf);
 	record->buf = unzipped;
 	record->size = unzipped_len;
 	record->compressed = false;
@@ -673,7 +674,7 @@ void pstore_get_backend_records(struct pstore_info *psi,
 		rc = pstore_mkfile(root, record);
 		if (rc) {
 			/* pstore_mkfile() did not take record, so free it. */
-			kfree(record->buf);
+			kvfree(record->buf);
 			kfree(record->priv);
 			kfree(record);
 			if (rc != -EEXIST || !quiet)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 2f625e1fa8d8..d36702c7ab3c 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -20,6 +20,7 @@
 #include <linux/compiler.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/mm.h>
 
 #include "internal.h"
 #include "ram_internal.h"
@@ -268,7 +269,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
 	/* ECC correction notice */
 	record->ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
 
-	record->buf = kmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
+	record->buf = kvzalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
 	if (record->buf == NULL) {
 		size = -ENOMEM;
 		goto out;
@@ -282,7 +283,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
 
 out:
 	if (free_prz) {
-		kfree(prz->old_log);
+		kvfree(prz->old_log);
 		kfree(prz);
 	}
 
@@ -833,7 +834,7 @@ static int ramoops_probe(struct platform_device *pdev)
 	 */
 	if (cxt->pstore.flags & PSTORE_FLAGS_DMESG) {
 		cxt->pstore.bufsize = cxt->dprzs[0]->buffer_size;
-		cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL);
+		cxt->pstore.buf = kvzalloc(cxt->pstore.bufsize, GFP_KERNEL);
 		if (!cxt->pstore.buf) {
 			pr_err("cannot allocate pstore crash dump buffer\n");
 			err = -ENOMEM;
@@ -866,7 +867,7 @@ static int ramoops_probe(struct platform_device *pdev)
 	return 0;
 
 fail_buf:
-	kfree(cxt->pstore.buf);
+	kvfree(cxt->pstore.buf);
 fail_clear:
 	cxt->pstore.bufsize = 0;
 fail_init:
@@ -881,7 +882,7 @@ static void ramoops_remove(struct platform_device *pdev)
 
 	pstore_unregister(&cxt->pstore);
 
-	kfree(cxt->pstore.buf);
+	kvfree(cxt->pstore.buf);
 	cxt->pstore.bufsize = 0;
 
 	ramoops_free_przs(cxt);
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index eb6df190d752..0eb780b90281 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 #include <asm/page.h>
 
 #include "ram_internal.h"
@@ -301,7 +302,7 @@ void persistent_ram_save_old(struct persistent_ram_zone *prz)
 
 	if (!prz->old_log) {
 		persistent_ram_ecc_old(prz);
-		prz->old_log = kmalloc(size, GFP_KERNEL);
+		prz->old_log = kvzalloc(size, GFP_KERNEL);
 	}
 	if (!prz->old_log) {
 		pr_err("failed to allocate buffer\n");
@@ -385,7 +386,7 @@ void *persistent_ram_old(struct persistent_ram_zone *prz)
 
 void persistent_ram_free_old(struct persistent_ram_zone *prz)
 {
-	kfree(prz->old_log);
+	kvfree(prz->old_log);
 	prz->old_log = NULL;
 	prz->old_log_size = 0;
 }

From c8248faf3ca276ebdf60f003b3e04bf764daba91 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Aug 2023 13:06:03 -0700
Subject: [PATCH 1302/2047] Compiler Attributes: counted_by: Adjust name and
 identifier expansion

GCC and Clang's current RFCs name this attribute "counted_by", and have
moved away from using a string for the member name. Update the kernel's
macros to match. Additionally provide a UAPI no-op macro for UAPI structs
that will gain annotations.

Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Fixes: dd06e72e68bc ("Compiler Attributes: Add __counted_by macro")
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20230817200558.never.077-kees@kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/compiler_attributes.h | 26 +++++++++++++-------------
 include/uapi/linux/stddef.h         |  4 ++++
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 00efa35c350f..28566624f008 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -94,6 +94,19 @@
 # define __copy(symbol)
 #endif
 
+/*
+ * Optional: only supported since gcc >= 14
+ * Optional: only supported since clang >= 18
+ *
+ *   gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
+ * clang: https://reviews.llvm.org/D148381
+ */
+#if __has_attribute(__counted_by__)
+# define __counted_by(member)		__attribute__((__counted_by__(member)))
+#else
+# define __counted_by(member)
+#endif
+
 /*
  * Optional: not supported by gcc
  * Optional: only supported since clang >= 14.0
@@ -129,19 +142,6 @@
 # define __designated_init
 #endif
 
-/*
- * Optional: only supported since gcc >= 14
- * Optional: only supported since clang >= 17
- *
- *   gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
- * clang: https://reviews.llvm.org/D148381
- */
-#if __has_attribute(__element_count__)
-# define __counted_by(member)		__attribute__((__element_count__(#member)))
-#else
-# define __counted_by(member)
-#endif
-
 /*
  * Optional: only supported since clang >= 14.0
  *
diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index 7837ba4fe728..7c3fc3980881 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -45,3 +45,7 @@
 		TYPE NAME[]; \
 	}
 #endif
+
+#ifndef __counted_by
+#define __counted_by(m)
+#endif

From 5d207e83ca41206e75c2cd414d40b451ef04c259 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Aug 2023 21:27:35 -0700
Subject: [PATCH 1303/2047] lkdtm: Add FAM_BOUNDS test for __counted_by

Add new CONFIG_UBSAN_BOUNDS test for __counted_by attribute.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/misc/lkdtm/bugs.c | 47 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index 963b4dee6a7d..c66cc05a68c4 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -273,8 +273,8 @@ static void lkdtm_HUNG_TASK(void)
 	schedule();
 }
 
-volatile unsigned int huge = INT_MAX - 2;
-volatile unsigned int ignored;
+static volatile unsigned int huge = INT_MAX - 2;
+static volatile unsigned int ignored;
 
 static void lkdtm_OVERFLOW_SIGNED(void)
 {
@@ -305,7 +305,7 @@ static void lkdtm_OVERFLOW_UNSIGNED(void)
 	ignored = value;
 }
 
-/* Intentionally using old-style flex array definition of 1 byte. */
+/* Intentionally using unannotated flex array definition. */
 struct array_bounds_flex_array {
 	int one;
 	int two;
@@ -357,6 +357,46 @@ static void lkdtm_ARRAY_BOUNDS(void)
 		pr_expected_config(CONFIG_UBSAN_BOUNDS);
 }
 
+struct lkdtm_annotated {
+	unsigned long flags;
+	int count;
+	int array[] __counted_by(count);
+};
+
+static volatile int fam_count = 4;
+
+static void lkdtm_FAM_BOUNDS(void)
+{
+	struct lkdtm_annotated *inst;
+
+	inst = kzalloc(struct_size(inst, array, fam_count + 1), GFP_KERNEL);
+	if (!inst) {
+		pr_err("FAIL: could not allocate test struct!\n");
+		return;
+	}
+
+	inst->count = fam_count;
+	pr_info("Array access within bounds ...\n");
+	inst->array[1] = fam_count;
+	ignored = inst->array[1];
+
+	pr_info("Array access beyond bounds ...\n");
+	inst->array[fam_count] = fam_count;
+	ignored = inst->array[fam_count];
+
+	kfree(inst);
+
+	pr_err("FAIL: survived access of invalid flexible array member index!\n");
+
+	if (!__has_attribute(__counted_by__))
+		pr_warn("This is expected since this %s was built a compiler supporting __counted_by\n",
+			lkdtm_kernel_info);
+	else if (IS_ENABLED(CONFIG_UBSAN_BOUNDS))
+		pr_expected_config(CONFIG_UBSAN_TRAP);
+	else
+		pr_expected_config(CONFIG_UBSAN_BOUNDS);
+}
+
 static void lkdtm_CORRUPT_LIST_ADD(void)
 {
 	/*
@@ -616,6 +656,7 @@ static struct crashtype crashtypes[] = {
 	CRASHTYPE(OVERFLOW_SIGNED),
 	CRASHTYPE(OVERFLOW_UNSIGNED),
 	CRASHTYPE(ARRAY_BOUNDS),
+	CRASHTYPE(FAM_BOUNDS),
 	CRASHTYPE(CORRUPT_LIST_ADD),
 	CRASHTYPE(CORRUPT_LIST_DEL),
 	CRASHTYPE(STACK_GUARD_PAGE_LEADING),

From a5a297918abba9e35b4b1683b14542d6b7f31ade Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 18 Aug 2023 10:55:49 +0530
Subject: [PATCH 1304/2047] OPP: Fix argument name in doc comment

The name of the argument is "opp_table" and not "table", fix the
comment.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202308172310.FzcidE4c-lkp@intel.com/
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/cpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/opp/cpu.c b/drivers/opp/cpu.c
index 3c3506021501..12c429b407ca 100644
--- a/drivers/opp/cpu.c
+++ b/drivers/opp/cpu.c
@@ -24,7 +24,7 @@
 /**
  * dev_pm_opp_init_cpufreq_table() - create a cpufreq table for a device
  * @dev:	device for which we do this operation
- * @table:	Cpufreq table returned back to caller
+ * @opp_table:	Cpufreq table returned back to caller
  *
  * Generate a cpufreq table for a provided device- this assumes that the
  * opp table is already initialized and ready for usage.
@@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_init_cpufreq_table);
 /**
  * dev_pm_opp_free_cpufreq_table() - free the cpufreq table
  * @dev:	device for which we do this operation
- * @table:	table to free
+ * @opp_table:	table to free
  *
  * Free up the table allocated by dev_pm_opp_init_cpufreq_table
  */

From 3faf89f27aab1ef484e088c3b126126a3199615c Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Sat, 12 Aug 2023 20:36:35 +0200
Subject: [PATCH 1305/2047] gpio: sim: simplify code with cleanup helpers

Use macros defined in linux/cleanup.h to automate resource lifetime
control in gpio-sim.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-sim.c | 254 ++++++++++++++--------------------------
 1 file changed, 85 insertions(+), 169 deletions(-)

diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index b7161c83c104..141249f2bf40 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -8,6 +8,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/bitmap.h>
+#include <linux/cleanup.h>
 #include <linux/completion.h>
 #include <linux/configfs.h>
 #include <linux/device.h>
@@ -68,7 +69,7 @@ static int gpio_sim_apply_pull(struct gpio_sim_chip *chip,
 	gc = &chip->gc;
 	desc = &gc->gpiodev->descs[offset];
 
-	mutex_lock(&chip->lock);
+	guard(mutex)(&chip->lock);
 
 	if (test_bit(FLAG_REQUESTED, &desc->flags) &&
 	    !test_bit(FLAG_IS_OUT, &desc->flags)) {
@@ -104,29 +105,24 @@ set_value:
 
 set_pull:
 	__assign_bit(offset, chip->pull_map, value);
-	mutex_unlock(&chip->lock);
 	return 0;
 }
 
 static int gpio_sim_get(struct gpio_chip *gc, unsigned int offset)
 {
 	struct gpio_sim_chip *chip = gpiochip_get_data(gc);
-	int ret;
 
-	mutex_lock(&chip->lock);
-	ret = !!test_bit(offset, chip->value_map);
-	mutex_unlock(&chip->lock);
+	guard(mutex)(&chip->lock);
 
-	return ret;
+	return !!test_bit(offset, chip->value_map);
 }
 
 static void gpio_sim_set(struct gpio_chip *gc, unsigned int offset, int value)
 {
 	struct gpio_sim_chip *chip = gpiochip_get_data(gc);
 
-	mutex_lock(&chip->lock);
-	__assign_bit(offset, chip->value_map, value);
-	mutex_unlock(&chip->lock);
+	scoped_guard(mutex, &chip->lock)
+		__assign_bit(offset, chip->value_map, value);
 }
 
 static int gpio_sim_get_multiple(struct gpio_chip *gc,
@@ -134,9 +130,8 @@ static int gpio_sim_get_multiple(struct gpio_chip *gc,
 {
 	struct gpio_sim_chip *chip = gpiochip_get_data(gc);
 
-	mutex_lock(&chip->lock);
-	bitmap_replace(bits, bits, chip->value_map, mask, gc->ngpio);
-	mutex_unlock(&chip->lock);
+	scoped_guard(mutex, &chip->lock)
+		bitmap_replace(bits, bits, chip->value_map, mask, gc->ngpio);
 
 	return 0;
 }
@@ -146,9 +141,9 @@ static void gpio_sim_set_multiple(struct gpio_chip *gc,
 {
 	struct gpio_sim_chip *chip = gpiochip_get_data(gc);
 
-	mutex_lock(&chip->lock);
-	bitmap_replace(chip->value_map, chip->value_map, bits, mask, gc->ngpio);
-	mutex_unlock(&chip->lock);
+	scoped_guard(mutex, &chip->lock)
+		bitmap_replace(chip->value_map, chip->value_map, bits, mask,
+			       gc->ngpio);
 }
 
 static int gpio_sim_direction_output(struct gpio_chip *gc,
@@ -156,10 +151,10 @@ static int gpio_sim_direction_output(struct gpio_chip *gc,
 {
 	struct gpio_sim_chip *chip = gpiochip_get_data(gc);
 
-	mutex_lock(&chip->lock);
-	__clear_bit(offset, chip->direction_map);
-	__assign_bit(offset, chip->value_map, value);
-	mutex_unlock(&chip->lock);
+	scoped_guard(mutex, &chip->lock) {
+		__clear_bit(offset, chip->direction_map);
+		__assign_bit(offset, chip->value_map, value);
+	}
 
 	return 0;
 }
@@ -168,9 +163,8 @@ static int gpio_sim_direction_input(struct gpio_chip *gc, unsigned int offset)
 {
 	struct gpio_sim_chip *chip = gpiochip_get_data(gc);
 
-	mutex_lock(&chip->lock);
-	__set_bit(offset, chip->direction_map);
-	mutex_unlock(&chip->lock);
+	scoped_guard(mutex, &chip->lock)
+		__set_bit(offset, chip->direction_map);
 
 	return 0;
 }
@@ -180,9 +174,8 @@ static int gpio_sim_get_direction(struct gpio_chip *gc, unsigned int offset)
 	struct gpio_sim_chip *chip = gpiochip_get_data(gc);
 	int direction;
 
-	mutex_lock(&chip->lock);
-	direction = !!test_bit(offset, chip->direction_map);
-	mutex_unlock(&chip->lock);
+	scoped_guard(mutex, &chip->lock)
+		direction = !!test_bit(offset, chip->direction_map);
 
 	return direction ? GPIO_LINE_DIRECTION_IN : GPIO_LINE_DIRECTION_OUT;
 }
@@ -215,9 +208,9 @@ static void gpio_sim_free(struct gpio_chip *gc, unsigned int offset)
 {
 	struct gpio_sim_chip *chip = gpiochip_get_data(gc);
 
-	mutex_lock(&chip->lock);
-	__assign_bit(offset, chip->value_map, !!test_bit(offset, chip->pull_map));
-	mutex_unlock(&chip->lock);
+	scoped_guard(mutex, &chip->lock)
+		__assign_bit(offset, chip->value_map,
+			     !!test_bit(offset, chip->pull_map));
 }
 
 static ssize_t gpio_sim_sysfs_val_show(struct device *dev,
@@ -227,9 +220,8 @@ static ssize_t gpio_sim_sysfs_val_show(struct device *dev,
 	struct gpio_sim_chip *chip = dev_get_drvdata(dev);
 	int val;
 
-	mutex_lock(&chip->lock);
-	val = !!test_bit(line_attr->offset, chip->value_map);
-	mutex_unlock(&chip->lock);
+	scoped_guard(mutex, &chip->lock)
+		val = !!test_bit(line_attr->offset, chip->value_map);
 
 	return sysfs_emit(buf, "%d\n", val);
 }
@@ -258,9 +250,8 @@ static ssize_t gpio_sim_sysfs_pull_show(struct device *dev,
 	struct gpio_sim_chip *chip = dev_get_drvdata(dev);
 	int pull;
 
-	mutex_lock(&chip->lock);
-	pull = !!test_bit(line_attr->offset, chip->pull_map);
-	mutex_unlock(&chip->lock);
+	scoped_guard(mutex, &chip->lock)
+		pull = !!test_bit(line_attr->offset, chip->pull_map);
 
 	return sysfs_emit(buf, "%s\n", gpio_sim_sysfs_pull_strings[pull]);
 }
@@ -656,17 +647,14 @@ static ssize_t gpio_sim_device_config_dev_name_show(struct config_item *item,
 {
 	struct gpio_sim_device *dev = to_gpio_sim_device(item);
 	struct platform_device *pdev;
-	int ret;
 
-	mutex_lock(&dev->lock);
+	guard(mutex)(&dev->lock);
+
 	pdev = dev->pdev;
 	if (pdev)
-		ret = sprintf(page, "%s\n", dev_name(&pdev->dev));
-	else
-		ret = sprintf(page, "gpio-sim.%d\n", dev->id);
-	mutex_unlock(&dev->lock);
+		return sprintf(page, "%s\n", dev_name(&pdev->dev));
 
-	return ret;
+	return sprintf(page, "gpio-sim.%d\n", dev->id);
 }
 
 CONFIGFS_ATTR_RO(gpio_sim_device_config_, dev_name);
@@ -677,9 +665,8 @@ gpio_sim_device_config_live_show(struct config_item *item, char *page)
 	struct gpio_sim_device *dev = to_gpio_sim_device(item);
 	bool live;
 
-	mutex_lock(&dev->lock);
-	live = gpio_sim_device_is_live_unlocked(dev);
-	mutex_unlock(&dev->lock);
+	scoped_guard(mutex, &dev->lock)
+		live = gpio_sim_device_is_live_unlocked(dev);
 
 	return sprintf(page, "%c\n", live ? '1' : '0');
 }
@@ -834,8 +821,7 @@ gpio_sim_make_bank_swnode(struct gpio_sim_bank *bank,
 {
 	struct property_entry properties[GPIO_SIM_PROP_MAX];
 	unsigned int prop_idx = 0, line_names_size = 0;
-	struct fwnode_handle *swnode;
-	char **line_names;
+	char **line_names __free(kfree) = NULL;
 
 	memset(properties, 0, sizeof(properties));
 
@@ -854,9 +840,7 @@ gpio_sim_make_bank_swnode(struct gpio_sim_bank *bank,
 						"gpio-line-names",
 						line_names, line_names_size);
 
-	swnode = fwnode_create_software_node(properties, parent);
-	kfree(line_names);
-	return swnode;
+	return fwnode_create_software_node(properties, parent);
 }
 
 static void gpio_sim_remove_swnode_recursive(struct fwnode_handle *swnode)
@@ -981,7 +965,7 @@ gpio_sim_device_config_live_store(struct config_item *item,
 	if (ret)
 		return ret;
 
-	mutex_lock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
 	if (live == gpio_sim_device_is_live_unlocked(dev))
 		ret = -EPERM;
@@ -990,8 +974,6 @@ gpio_sim_device_config_live_store(struct config_item *item,
 	else
 		gpio_sim_device_deactivate_unlocked(dev);
 
-	mutex_unlock(&dev->lock);
-
 	return ret ?: count;
 }
 
@@ -1028,17 +1010,14 @@ static ssize_t gpio_sim_bank_config_chip_name_show(struct config_item *item,
 	struct gpio_sim_bank *bank = to_gpio_sim_bank(item);
 	struct gpio_sim_device *dev = gpio_sim_bank_get_device(bank);
 	struct gpio_sim_chip_name_ctx ctx = { bank->swnode, page };
-	int ret;
 
-	mutex_lock(&dev->lock);
+	guard(mutex)(&dev->lock);
+
 	if (gpio_sim_device_is_live_unlocked(dev))
-		ret = device_for_each_child(&dev->pdev->dev, &ctx,
-					    gpio_sim_emit_chip_name);
-	else
-		ret = sprintf(page, "none\n");
-	mutex_unlock(&dev->lock);
+		return device_for_each_child(&dev->pdev->dev, &ctx,
+					     gpio_sim_emit_chip_name);
 
-	return ret;
+	return sprintf(page, "none\n");
 }
 
 CONFIGFS_ATTR_RO(gpio_sim_bank_config_, chip_name);
@@ -1048,13 +1027,10 @@ gpio_sim_bank_config_label_show(struct config_item *item, char *page)
 {
 	struct gpio_sim_bank *bank = to_gpio_sim_bank(item);
 	struct gpio_sim_device *dev = gpio_sim_bank_get_device(bank);
-	int ret;
 
-	mutex_lock(&dev->lock);
-	ret = sprintf(page, "%s\n", bank->label ?: "");
-	mutex_unlock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
-	return ret;
+	return sprintf(page, "%s\n", bank->label ?: "");
 }
 
 static ssize_t gpio_sim_bank_config_label_store(struct config_item *item,
@@ -1064,23 +1040,18 @@ static ssize_t gpio_sim_bank_config_label_store(struct config_item *item,
 	struct gpio_sim_device *dev = gpio_sim_bank_get_device(bank);
 	char *trimmed;
 
-	mutex_lock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
-	if (gpio_sim_device_is_live_unlocked(dev)) {
-		mutex_unlock(&dev->lock);
+	if (gpio_sim_device_is_live_unlocked(dev))
 		return -EBUSY;
-	}
 
 	trimmed = gpio_sim_strdup_trimmed(page, count);
-	if (!trimmed) {
-		mutex_unlock(&dev->lock);
+	if (!trimmed)
 		return -ENOMEM;
-	}
 
 	kfree(bank->label);
 	bank->label = trimmed;
 
-	mutex_unlock(&dev->lock);
 	return count;
 }
 
@@ -1091,13 +1062,10 @@ gpio_sim_bank_config_num_lines_show(struct config_item *item, char *page)
 {
 	struct gpio_sim_bank *bank = to_gpio_sim_bank(item);
 	struct gpio_sim_device *dev = gpio_sim_bank_get_device(bank);
-	int ret;
 
-	mutex_lock(&dev->lock);
-	ret = sprintf(page, "%u\n", bank->num_lines);
-	mutex_unlock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
-	return ret;
+	return sprintf(page, "%u\n", bank->num_lines);
 }
 
 static ssize_t
@@ -1116,16 +1084,13 @@ gpio_sim_bank_config_num_lines_store(struct config_item *item,
 	if (num_lines == 0)
 		return -EINVAL;
 
-	mutex_lock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
-	if (gpio_sim_device_is_live_unlocked(dev)) {
-		mutex_unlock(&dev->lock);
+	if (gpio_sim_device_is_live_unlocked(dev))
 		return -EBUSY;
-	}
 
 	bank->num_lines = num_lines;
 
-	mutex_unlock(&dev->lock);
 	return count;
 }
 
@@ -1143,13 +1108,10 @@ gpio_sim_line_config_name_show(struct config_item *item, char *page)
 {
 	struct gpio_sim_line *line = to_gpio_sim_line(item);
 	struct gpio_sim_device *dev = gpio_sim_line_get_device(line);
-	int ret;
 
-	mutex_lock(&dev->lock);
-	ret = sprintf(page, "%s\n", line->name ?: "");
-	mutex_unlock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
-	return ret;
+	return sprintf(page, "%s\n", line->name ?: "");
 }
 
 static ssize_t gpio_sim_line_config_name_store(struct config_item *item,
@@ -1159,24 +1121,18 @@ static ssize_t gpio_sim_line_config_name_store(struct config_item *item,
 	struct gpio_sim_device *dev = gpio_sim_line_get_device(line);
 	char *trimmed;
 
-	mutex_lock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
-	if (gpio_sim_device_is_live_unlocked(dev)) {
-		mutex_unlock(&dev->lock);
+	if (gpio_sim_device_is_live_unlocked(dev))
 		return -EBUSY;
-	}
 
 	trimmed = gpio_sim_strdup_trimmed(page, count);
-	if (!trimmed) {
-		mutex_unlock(&dev->lock);
+	if (!trimmed)
 		return -ENOMEM;
-	}
 
 	kfree(line->name);
 	line->name = trimmed;
 
-	mutex_unlock(&dev->lock);
-
 	return count;
 }
 
@@ -1192,13 +1148,10 @@ static ssize_t gpio_sim_hog_config_name_show(struct config_item *item,
 {
 	struct gpio_sim_hog *hog = to_gpio_sim_hog(item);
 	struct gpio_sim_device *dev = gpio_sim_hog_get_device(hog);
-	int ret;
 
-	mutex_lock(&dev->lock);
-	ret = sprintf(page, "%s\n", hog->name ?: "");
-	mutex_unlock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
-	return ret;
+	return sprintf(page, "%s\n", hog->name ?: "");
 }
 
 static ssize_t gpio_sim_hog_config_name_store(struct config_item *item,
@@ -1208,24 +1161,18 @@ static ssize_t gpio_sim_hog_config_name_store(struct config_item *item,
 	struct gpio_sim_device *dev = gpio_sim_hog_get_device(hog);
 	char *trimmed;
 
-	mutex_lock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
-	if (gpio_sim_device_is_live_unlocked(dev)) {
-		mutex_unlock(&dev->lock);
+	if (gpio_sim_device_is_live_unlocked(dev))
 		return -EBUSY;
-	}
 
 	trimmed = gpio_sim_strdup_trimmed(page, count);
-	if (!trimmed) {
-		mutex_unlock(&dev->lock);
+	if (!trimmed)
 		return -ENOMEM;
-	}
 
 	kfree(hog->name);
 	hog->name = trimmed;
 
-	mutex_unlock(&dev->lock);
-
 	return count;
 }
 
@@ -1239,9 +1186,8 @@ static ssize_t gpio_sim_hog_config_direction_show(struct config_item *item,
 	char *repr;
 	int dir;
 
-	mutex_lock(&dev->lock);
-	dir = hog->dir;
-	mutex_unlock(&dev->lock);
+	scoped_guard(mutex, &dev->lock)
+		dir = hog->dir;
 
 	switch (dir) {
 	case GPIOD_IN:
@@ -1270,12 +1216,10 @@ gpio_sim_hog_config_direction_store(struct config_item *item,
 	struct gpio_sim_device *dev = gpio_sim_hog_get_device(hog);
 	int dir;
 
-	mutex_lock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
-	if (gpio_sim_device_is_live_unlocked(dev)) {
-		mutex_unlock(&dev->lock);
+	if (gpio_sim_device_is_live_unlocked(dev))
 		return -EBUSY;
-	}
 
 	if (sysfs_streq(page, "input"))
 		dir = GPIOD_IN;
@@ -1284,17 +1228,10 @@ gpio_sim_hog_config_direction_store(struct config_item *item,
 	else if (sysfs_streq(page, "output-low"))
 		dir = GPIOD_OUT_LOW;
 	else
-		dir = -EINVAL;
-
-	if (dir < 0) {
-		mutex_unlock(&dev->lock);
-		return dir;
-	}
+		return -EINVAL;
 
 	hog->dir = dir;
 
-	mutex_unlock(&dev->lock);
-
 	return count;
 }
 
@@ -1312,9 +1249,8 @@ static void gpio_sim_hog_config_item_release(struct config_item *item)
 	struct gpio_sim_line *line = hog->parent;
 	struct gpio_sim_device *dev = gpio_sim_hog_get_device(hog);
 
-	mutex_lock(&dev->lock);
-	line->hog = NULL;
-	mutex_unlock(&dev->lock);
+	scoped_guard(mutex, &dev->lock)
+		line->hog = NULL;
 
 	kfree(hog->name);
 	kfree(hog);
@@ -1340,13 +1276,11 @@ gpio_sim_line_config_make_hog_item(struct config_group *group, const char *name)
 	if (strcmp(name, "hog") != 0)
 		return ERR_PTR(-EINVAL);
 
-	mutex_lock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
 	hog = kzalloc(sizeof(*hog), GFP_KERNEL);
-	if (!hog) {
-		mutex_unlock(&dev->lock);
+	if (!hog)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	config_item_init_type_name(&hog->item, name,
 				   &gpio_sim_hog_config_type);
@@ -1356,8 +1290,6 @@ gpio_sim_line_config_make_hog_item(struct config_group *group, const char *name)
 	hog->parent = line;
 	line->hog = hog;
 
-	mutex_unlock(&dev->lock);
-
 	return &hog->item;
 }
 
@@ -1366,9 +1298,8 @@ static void gpio_sim_line_config_group_release(struct config_item *item)
 	struct gpio_sim_line *line = to_gpio_sim_line(item);
 	struct gpio_sim_device *dev = gpio_sim_line_get_device(line);
 
-	mutex_lock(&dev->lock);
-	list_del(&line->siblings);
-	mutex_unlock(&dev->lock);
+	scoped_guard(mutex, &dev->lock)
+		list_del(&line->siblings);
 
 	kfree(line->name);
 	kfree(line);
@@ -1403,18 +1334,14 @@ gpio_sim_bank_config_make_line_group(struct config_group *group,
 	if (ret != 1 || nchar != strlen(name))
 		return ERR_PTR(-EINVAL);
 
-	mutex_lock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
-	if (gpio_sim_device_is_live_unlocked(dev)) {
-		mutex_unlock(&dev->lock);
+	if (gpio_sim_device_is_live_unlocked(dev))
 		return ERR_PTR(-EBUSY);
-	}
 
 	line = kzalloc(sizeof(*line), GFP_KERNEL);
-	if (!line) {
-		mutex_unlock(&dev->lock);
+	if (!line)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	config_group_init_type_name(&line->group, name,
 				    &gpio_sim_line_config_type);
@@ -1423,8 +1350,6 @@ gpio_sim_bank_config_make_line_group(struct config_group *group,
 	line->offset = offset;
 	list_add_tail(&line->siblings, &bank->line_list);
 
-	mutex_unlock(&dev->lock);
-
 	return &line->group;
 }
 
@@ -1433,9 +1358,8 @@ static void gpio_sim_bank_config_group_release(struct config_item *item)
 	struct gpio_sim_bank *bank = to_gpio_sim_bank(item);
 	struct gpio_sim_device *dev = gpio_sim_bank_get_device(bank);
 
-	mutex_lock(&dev->lock);
-	list_del(&bank->siblings);
-	mutex_unlock(&dev->lock);
+	scoped_guard(mutex, &dev->lock)
+		list_del(&bank->siblings);
 
 	kfree(bank->label);
 	kfree(bank);
@@ -1463,18 +1387,14 @@ gpio_sim_device_config_make_bank_group(struct config_group *group,
 	struct gpio_sim_device *dev = to_gpio_sim_device(&group->cg_item);
 	struct gpio_sim_bank *bank;
 
-	mutex_lock(&dev->lock);
+	guard(mutex)(&dev->lock);
 
-	if (gpio_sim_device_is_live_unlocked(dev)) {
-		mutex_unlock(&dev->lock);
+	if (gpio_sim_device_is_live_unlocked(dev))
 		return ERR_PTR(-EBUSY);
-	}
 
 	bank = kzalloc(sizeof(*bank), GFP_KERNEL);
-	if (!bank) {
-		mutex_unlock(&dev->lock);
+	if (!bank)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	config_group_init_type_name(&bank->group, name,
 				    &gpio_sim_bank_config_group_type);
@@ -1483,8 +1403,6 @@ gpio_sim_device_config_make_bank_group(struct config_group *group,
 	INIT_LIST_HEAD(&bank->line_list);
 	list_add_tail(&bank->siblings, &dev->bank_list);
 
-	mutex_unlock(&dev->lock);
-
 	return &bank->group;
 }
 
@@ -1492,10 +1410,10 @@ static void gpio_sim_device_config_group_release(struct config_item *item)
 {
 	struct gpio_sim_device *dev = to_gpio_sim_device(item);
 
-	mutex_lock(&dev->lock);
-	if (gpio_sim_device_is_live_unlocked(dev))
-		gpio_sim_device_deactivate_unlocked(dev);
-	mutex_unlock(&dev->lock);
+	scoped_guard(mutex, &dev->lock) {
+		if (gpio_sim_device_is_live_unlocked(dev))
+			gpio_sim_device_deactivate_unlocked(dev);
+	}
 
 	mutex_destroy(&dev->lock);
 	ida_free(&gpio_sim_ida, dev->id);
@@ -1520,7 +1438,7 @@ static const struct config_item_type gpio_sim_device_config_group_type = {
 static struct config_group *
 gpio_sim_config_make_device_group(struct config_group *group, const char *name)
 {
-	struct gpio_sim_device *dev;
+	struct gpio_sim_device *dev __free(kfree) = NULL;
 	int id;
 
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -1528,10 +1446,8 @@ gpio_sim_config_make_device_group(struct config_group *group, const char *name)
 		return ERR_PTR(-ENOMEM);
 
 	id = ida_alloc(&gpio_sim_ida, GFP_KERNEL);
-	if (id < 0) {
-		kfree(dev);
+	if (id < 0)
 		return ERR_PTR(id);
-	}
 
 	config_group_init_type_name(&dev->group, name,
 				    &gpio_sim_device_config_group_type);
@@ -1542,7 +1458,7 @@ gpio_sim_config_make_device_group(struct config_group *group, const char *name)
 	dev->bus_notifier.notifier_call = gpio_sim_bus_notifier_call;
 	init_completion(&dev->probe_completion);
 
-	return &dev->group;
+	return &no_free_ptr(dev)->group;
 }
 
 static struct configfs_group_operations gpio_sim_config_group_ops = {

From 20c827683de05a6c7e7ae7fae586899690693251 Mon Sep 17 00:00:00 2001
From: Frieder Schrempf <frieder.schrempf@kontron.de>
Date: Mon, 24 Jul 2023 17:16:32 +0200
Subject: [PATCH 1306/2047] drm: bridge: samsung-dsim: Fix init during host
 transfer

In case the downstream bridge or panel uses DSI transfers before the
DSI host was actually initialized through samsung_dsim_atomic_enable()
which clears the stop state (LP11) mode, all transfers will fail.

This happens with downstream bridges that are controlled by DSI
commands such as the tc358762.

As documented in [1] DSI hosts are expected to allow transfers
outside the normal bridge enable/disable flow.

To fix this make sure that stop state is cleared in
samsung_dsim_host_transfer() which restores the previous
behavior.

We also factor out the common code to enable/disable stop state
to samsung_dsim_set_stop_state().

[1] https://docs.kernel.org/gpu/drm-kms-helpers.html#mipi-dsi-bridge-operation

Fixes: 0c14d3130654 ("drm: bridge: samsung-dsim: Fix i.MX8M enable flow to meet spec")
Reported-by: Tim Harvey <tharvey@gateworks.com>
Signed-off-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Tested-by: Tim Harvey <tharvey@gateworks.com>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230724151640.555490-1-frieder@fris.de
---
 drivers/gpu/drm/bridge/samsung-dsim.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c
index 043b8109e64a..73ec60757dbc 100644
--- a/drivers/gpu/drm/bridge/samsung-dsim.c
+++ b/drivers/gpu/drm/bridge/samsung-dsim.c
@@ -1386,6 +1386,18 @@ static void samsung_dsim_disable_irq(struct samsung_dsim *dsi)
 	disable_irq(dsi->irq);
 }
 
+static void samsung_dsim_set_stop_state(struct samsung_dsim *dsi, bool enable)
+{
+	u32 reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
+
+	if (enable)
+		reg |= DSIM_FORCE_STOP_STATE;
+	else
+		reg &= ~DSIM_FORCE_STOP_STATE;
+
+	samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
+}
+
 static int samsung_dsim_init(struct samsung_dsim *dsi)
 {
 	const struct samsung_dsim_driver_data *driver_data = dsi->driver_data;
@@ -1445,15 +1457,12 @@ static void samsung_dsim_atomic_enable(struct drm_bridge *bridge,
 				       struct drm_bridge_state *old_bridge_state)
 {
 	struct samsung_dsim *dsi = bridge_to_dsi(bridge);
-	u32 reg;
 
 	if (samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) {
 		samsung_dsim_set_display_mode(dsi);
 		samsung_dsim_set_display_enable(dsi, true);
 	} else {
-		reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
-		reg &= ~DSIM_FORCE_STOP_STATE;
-		samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
+		samsung_dsim_set_stop_state(dsi, false);
 	}
 
 	dsi->state |= DSIM_STATE_VIDOUT_AVAILABLE;
@@ -1463,16 +1472,12 @@ static void samsung_dsim_atomic_disable(struct drm_bridge *bridge,
 					struct drm_bridge_state *old_bridge_state)
 {
 	struct samsung_dsim *dsi = bridge_to_dsi(bridge);
-	u32 reg;
 
 	if (!(dsi->state & DSIM_STATE_ENABLED))
 		return;
 
-	if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) {
-		reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
-		reg |= DSIM_FORCE_STOP_STATE;
-		samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
-	}
+	if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type))
+		samsung_dsim_set_stop_state(dsi, true);
 
 	dsi->state &= ~DSIM_STATE_VIDOUT_AVAILABLE;
 }
@@ -1775,6 +1780,8 @@ static ssize_t samsung_dsim_host_transfer(struct mipi_dsi_host *host,
 	if (ret)
 		return ret;
 
+	samsung_dsim_set_stop_state(dsi, false);
+
 	ret = mipi_dsi_create_packet(&xfer.packet, msg);
 	if (ret < 0)
 		return ret;

From b52c8c72dd7c2c5695bc86a31d48dd8da4127d94 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 7 Aug 2023 13:16:43 +0200
Subject: [PATCH 1307/2047] crypto: caam - fix PM operations definition

The newly added PM operations use the deprecated SIMPLE_DEV_PM_OPS() macro,
causing a warning in some configurations:

drivers/crypto/caam/ctrl.c:828:12: error: 'caam_ctrl_resume' defined but not used [-Werror=unused-function]
  828 | static int caam_ctrl_resume(struct device *dev)
      |            ^~~~~~~~~~~~~~~~
drivers/crypto/caam/ctrl.c:818:12: error: 'caam_ctrl_suspend' defined but not used [-Werror=unused-function]
  818 | static int caam_ctrl_suspend(struct device *dev)
      |            ^~~~~~~~~~~~~~~~~
drivers/crypto/caam/jr.c:732:12: error: 'caam_jr_resume' defined but not used [-Werror=unused-function]
  732 | static int caam_jr_resume(struct device *dev)
      |            ^~~~~~~~~~~~~~
drivers/crypto/caam/jr.c:687:12: error: 'caam_jr_suspend' defined but not used [-Werror=unused-function]
  687 | static int caam_jr_suspend(struct device *dev)
      |            ^~~~~~~~~~~~~~~

Use the normal DEFINE_SIMPLE_DEV_PM_OPS() variant now, and use pm_ptr() to
completely eliminate the structure in configs without CONFIG_PM.

Fixes: 322d74752c28a ("crypto: caam - add power management support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Meenakshi Aggarwal <meenakshi.aggarwal@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/ctrl.c | 4 ++--
 drivers/crypto/caam/jr.c   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index a7a4583107f4..2a228a36fa15 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -841,7 +841,7 @@ static int caam_ctrl_resume(struct device *dev)
 	return ret;
 }
 
-static SIMPLE_DEV_PM_OPS(caam_ctrl_pm_ops, caam_ctrl_suspend, caam_ctrl_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(caam_ctrl_pm_ops, caam_ctrl_suspend, caam_ctrl_resume);
 
 /* Probe routine for CAAM top (controller) level */
 static int caam_probe(struct platform_device *pdev)
@@ -1138,7 +1138,7 @@ static struct platform_driver caam_driver = {
 	.driver = {
 		.name = "caam",
 		.of_match_table = caam_match,
-		.pm = &caam_ctrl_pm_ops,
+		.pm = pm_ptr(&caam_ctrl_pm_ops),
 	},
 	.probe       = caam_probe,
 };
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 316180d26f8a..767fbf052536 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -794,7 +794,7 @@ add_jr:
 	return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(caam_jr_pm_ops, caam_jr_suspend, caam_jr_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(caam_jr_pm_ops, caam_jr_suspend, caam_jr_resume);
 
 static const struct of_device_id caam_jr_match[] = {
 	{
@@ -811,7 +811,7 @@ static struct platform_driver caam_jr_driver = {
 	.driver = {
 		.name = "caam_jr",
 		.of_match_table = caam_jr_match,
-		.pm = &caam_jr_pm_ops,
+		.pm = pm_ptr(&caam_jr_pm_ops),
 	},
 	.probe       = caam_jr_probe,
 	.remove      = caam_jr_remove,

From e30685204711a6be40dec2622606950ccd37dafe Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gaurav.jain@nxp.com>
Date: Tue, 8 Aug 2023 12:55:25 +0200
Subject: [PATCH 1308/2047] crypto: caam - fix unchecked return value error

error:
Unchecked return value (CHECKED_RETURN)
check_return: Calling sg_miter_next without checking return value

fix:
added check if(!sg_miter_next)

Fixes: 8a2a0dd35f2e ("crypto: caam - strip input zeros from RSA input buffer")
Signed-off-by: Gaurav Jain <gaurav.jain@nxp.com>
Signed-off-by: Meenakshi Aggarwal <meenakshi.aggarwal@nxp.com>
Reviewed-by: Gaurav Jain <gaurav.jain@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caampkc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 72afc249d42f..7e08af751e4e 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -225,7 +225,9 @@ static int caam_rsa_count_leading_zeros(struct scatterlist *sgl,
 		if (len && *buff)
 			break;
 
-		sg_miter_next(&miter);
+		if (!sg_miter_next(&miter))
+			break;
+
 		buff = miter.addr;
 		len = miter.length;
 

From e47e6d2aaacd30e93b97f20d4a20556cefd18406 Mon Sep 17 00:00:00 2001
From: Iuliana Prodan <iuliana.prodan@nxp.com>
Date: Tue, 8 Aug 2023 12:55:26 +0200
Subject: [PATCH 1309/2047] crypto: caam - increase the domain of write memory
 barrier to full system

In caam_jr_enqueue, under heavy DDR load, smp_wmb() or dma_wmb()
fail to make the input ring be updated before the CAAM starts
reading it. So, CAAM will process, again, an old descriptor address
and will put it in the output ring. This will make caam_jr_dequeue()
to fail, since this old descriptor is not in the software ring.
To fix this, use wmb() which works on the full system instead of
inner/outer shareable domains.

Signed-off-by: Iuliana Prodan <iuliana.prodan@nxp.com>
Signed-off-by: Meenakshi Aggarwal <meenakshi.aggarwal@nxp.com>
Reviewed-by: Gaurav Jain <gaurav.jain@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/jr.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 767fbf052536..5507d5d34a4c 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -464,8 +464,16 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
 	 * Guarantee that the descriptor's DMA address has been written to
 	 * the next slot in the ring before the write index is updated, since
 	 * other cores may update this index independently.
+	 *
+	 * Under heavy DDR load, smp_wmb() or dma_wmb() fail to make the input
+	 * ring be updated before the CAAM starts reading it. So, CAAM will
+	 * process, again, an old descriptor address and will put it in the
+	 * output ring. This will make caam_jr_dequeue() to fail, since this
+	 * old descriptor is not in the software ring.
+	 * To fix this, use wmb() which works on the full system instead of
+	 * inner/outer shareable domains.
 	 */
-	smp_wmb();
+	wmb();
 
 	jrp->head = (head + 1) & (JOBR_DEPTH - 1);
 

From 23d422a4f127901b2e58d925f130d4ce534a662a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Tue, 8 Aug 2023 12:55:27 +0200
Subject: [PATCH 1310/2047] crypto: caam/jr - fix shared IRQ line handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are cases when the interrupt status register (JRINTR) is non-zero,
even though:
1. An interrupt was generated, but it was masked OR
2. There was no interrupt generated at all
for the corresponding job ring.

1. The case when interrupt is masked (JRCFGR_LS[IMSK]=1b'1)
while other events have happened and are being accounted for, e.g.
-JRINTR[HALT]=2b'10 - input job ring underwent a flush of all on-going
jobs and processing of still-existing jobs (sitting in the ring) has been
halted
-JRINTR[HALT]=2b'01 - input job ring is currently undergoing a flush
-JRINTR[ENTER_FAIL]=1b'1 - SecMon / SNVS transitioned to FAIL MODE
It doesn't matter whether these events would assert the interrupt signal
or not, interrupt is anyhow masked.

2. The case when interrupt is not masked (JRCFGR_LS[IMSK]=1b'0), however
the events accounted for in JRINTR do not generate interrupts, e.g.:
-JRINTR[HALT]=2b'01
-JRINTR[ENTER_FAIL]=1b'1 and JRCFGR_MS[FAIL_MODE]=1b'0

Currently in these cases, when the JR interrupt handler is invoked (as a
consequence of JR sharing the interrupt line with other devices - e.g.
the two JRs on i.MX7ULP) it continues execution instead of returning
IRQ_NONE.
This could lead to situations like interrupt handler clearing JRINTR (and
thus also the JRINTR[HALT] field) while corresponding job ring is
suspended and then that job ring failing on resume path, due to expecting
JRINTR[HALT]=b'10 and reading instead JRINTR[HALT]=b'00.

Fix this by checking status of JRINTR[JRI] in the JR interrupt handler.
If JRINTR[JRI]=1b'0, there was no interrupt generated for this JR and
handler must return IRQ_NONE.

Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Meenakshi Aggarwal <meenakshi.aggarwal@nxp.com>
Reviewed-by: Gaurav Jain <gaurav.jain@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/jr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 5507d5d34a4c..07ec2f27cc68 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -232,7 +232,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
 	 * tasklet if jobs done.
 	 */
 	irqstate = rd_reg32(&jrp->rregs->jrintstatus);
-	if (!irqstate)
+	if (!(irqstate & JRINT_JR_INT))
 		return IRQ_NONE;
 
 	/*

From 64dd341e66f45cdde95f81788d0b13636847fcc3 Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Wed, 9 Aug 2023 11:14:43 +0800
Subject: [PATCH 1311/2047] crypto: allwinner - Remove unused function
 declarations

Commit 06f751b61329 ("crypto: allwinner - Add sun8i-ce Crypto Engine")
declared but never implemented sun8i_ce_enqueue().
Commit 56f6d5aee88d ("crypto: sun8i-ce - support hash algorithms")
declared but never implemented sun8i_ce_hash().
Commit f08fcced6d00 ("crypto: allwinner - Add sun8i-ss cryptographic offloader")
declared but never implemented sun8i_ss_enqueue().

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h | 3 ---
 drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h | 2 --
 2 files changed, 5 deletions(-)

diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
index 27029fb77e29..4742b48ef52e 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
@@ -347,8 +347,6 @@ struct sun8i_ce_alg_template {
 	char fbname[CRYPTO_MAX_ALG_NAME];
 };
 
-int sun8i_ce_enqueue(struct crypto_async_request *areq, u32 type);
-
 int sun8i_ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			unsigned int keylen);
 int sun8i_ce_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
@@ -367,7 +365,6 @@ void sun8i_ce_hash_craexit(struct crypto_tfm *tfm);
 int sun8i_ce_hash_init(struct ahash_request *areq);
 int sun8i_ce_hash_export(struct ahash_request *areq, void *out);
 int sun8i_ce_hash_import(struct ahash_request *areq, const void *in);
-int sun8i_ce_hash(struct ahash_request *areq);
 int sun8i_ce_hash_final(struct ahash_request *areq);
 int sun8i_ce_hash_update(struct ahash_request *areq);
 int sun8i_ce_hash_finup(struct ahash_request *areq);
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
index df6f08f6092f..bfe305261e9a 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
@@ -293,8 +293,6 @@ struct sun8i_ss_alg_template {
 	char fbname[CRYPTO_MAX_ALG_NAME];
 };
 
-int sun8i_ss_enqueue(struct crypto_async_request *areq, u32 type);
-
 int sun8i_ss_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			unsigned int keylen);
 int sun8i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,

From d94e0f25deefda953ac348a244189cf358b8bd3e Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Wed, 9 Aug 2023 11:16:14 +0800
Subject: [PATCH 1312/2047] crypto: qat - Remove unused function declarations

Commit d8cba25d2c68 ("crypto: qat - Intel(R) QAT driver framework")
declared but never implemented these functions.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Acked-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/intel/qat/qat_common/adf_common_drv.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
index 799a2193d3e5..673b5044c62a 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
@@ -58,12 +58,6 @@ int adf_dev_restart(struct adf_accel_dev *accel_dev);
 
 void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data);
 void adf_clean_vf_map(bool);
-
-int adf_ctl_dev_register(void);
-void adf_ctl_dev_unregister(void);
-int adf_processes_dev_register(void);
-void adf_processes_dev_unregister(void);
-
 int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev,
 		       struct adf_accel_dev *pf);
 void adf_devmgr_rm_dev(struct adf_accel_dev *accel_dev,
@@ -182,8 +176,6 @@ int qat_hal_init_rd_xfer(struct icp_qat_fw_loader_handle *handle,
 int qat_hal_init_nn(struct icp_qat_fw_loader_handle *handle,
 		    unsigned char ae, unsigned long ctx_mask,
 		    unsigned short reg_num, unsigned int regdata);
-int qat_hal_wr_lm(struct icp_qat_fw_loader_handle *handle,
-		  unsigned char ae, unsigned short lm_addr, unsigned int value);
 void qat_hal_set_ae_tindex_mode(struct icp_qat_fw_loader_handle *handle,
 				unsigned char ae, unsigned char mode);
 int qat_uclo_wr_all_uimage(struct icp_qat_fw_loader_handle *handle);

From d1c02e876f2e63520fd4fe393ed6b68882c9f848 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 12:00:23 +0200
Subject: [PATCH 1313/2047] crypto: exynos - fix Wvoid-pointer-to-enum-cast
 warning

'type' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  exynos-rng.c:280:14: error: cast to smaller integer type 'enum exynos_prng_type' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/exynos-rng.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/exynos-rng.c b/drivers/crypto/exynos-rng.c
index cbd8ca6e52ee..b1df66be9adc 100644
--- a/drivers/crypto/exynos-rng.c
+++ b/drivers/crypto/exynos-rng.c
@@ -277,7 +277,7 @@ static int exynos_rng_probe(struct platform_device *pdev)
 	if (!rng)
 		return -ENOMEM;
 
-	rng->type = (enum exynos_prng_type)of_device_get_match_data(&pdev->dev);
+	rng->type = (uintptr_t)of_device_get_match_data(&pdev->dev);
 
 	mutex_init(&rng->lock);
 

From d88bdbd96d258ab7973e67b94701c55324e9885f Mon Sep 17 00:00:00 2001
From: GUO Zihua <guozihua@huawei.com>
Date: Thu, 10 Aug 2023 21:00:43 +0800
Subject: [PATCH 1314/2047] hwrng: core - Remove duplicated include

Remove duplicated include of linux/random.h. Resolves checkincludes
message. And adjust includes in alphabetical order.

Signed-off-by: GUO Zihua <guozihua@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index f34d356fe2c0..e3598ec9cfca 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -15,14 +15,13 @@
 #include <linux/err.h>
 #include <linux/fs.h>
 #include <linux/hw_random.h>
-#include <linux/random.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
-#include <linux/sched/signal.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 

From 8e03dd62e5be811efbf0cbeba47e79e793519105 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian.fainelli@broadcom.com>
Date: Thu, 10 Aug 2023 12:22:08 -0700
Subject: [PATCH 1315/2047] hwrng: iproc-rng200 - Implement suspend and resume
 calls

Chips such as BCM7278 support system wide suspend/resume which will
cause the HWRNG block to lose its state and reset to its power on reset
register values. We need to cleanup and re-initialize the HWRNG for it
to be functional coming out of a system suspend cycle.

Fixes: c3577f6100ca ("hwrng: iproc-rng200 - Add support for BCM7278")
Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/iproc-rng200.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/char/hw_random/iproc-rng200.c b/drivers/char/hw_random/iproc-rng200.c
index 34df3f0d3e45..440fe28bddc0 100644
--- a/drivers/char/hw_random/iproc-rng200.c
+++ b/drivers/char/hw_random/iproc-rng200.c
@@ -181,6 +181,8 @@ static int iproc_rng200_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->base);
 	}
 
+	dev_set_drvdata(dev, priv);
+
 	priv->rng.name = "iproc-rng200";
 	priv->rng.read = iproc_rng200_read;
 	priv->rng.init = iproc_rng200_init;
@@ -198,6 +200,28 @@ static int iproc_rng200_probe(struct platform_device *pdev)
 	return 0;
 }
 
+static int __maybe_unused iproc_rng200_suspend(struct device *dev)
+{
+	struct iproc_rng200_dev *priv = dev_get_drvdata(dev);
+
+	iproc_rng200_cleanup(&priv->rng);
+
+	return 0;
+}
+
+static int __maybe_unused iproc_rng200_resume(struct device *dev)
+{
+	struct iproc_rng200_dev *priv =  dev_get_drvdata(dev);
+
+	iproc_rng200_init(&priv->rng);
+
+	return 0;
+}
+
+static const struct dev_pm_ops iproc_rng200_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(iproc_rng200_suspend, iproc_rng200_resume)
+};
+
 static const struct of_device_id iproc_rng200_of_match[] = {
 	{ .compatible = "brcm,bcm2711-rng200", },
 	{ .compatible = "brcm,bcm7211-rng200", },
@@ -211,6 +235,7 @@ static struct platform_driver iproc_rng200_driver = {
 	.driver = {
 		.name		= "iproc-rng200",
 		.of_match_table = iproc_rng200_of_match,
+		.pm		= &iproc_rng200_pm_ops,
 	},
 	.probe		= iproc_rng200_probe,
 };

From f9fc1ec28baeeef6259127b3cbeb34986834574f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 11 Aug 2023 15:46:33 +0200
Subject: [PATCH 1316/2047] crypto: drivers - avoid memcpy size warning

Some configurations with gcc-12 or gcc-13 produce a warning for the source
and destination of a memcpy() in atmel_sha_hmac_compute_ipad_hash() potentially
overlapping:

In file included from include/linux/string.h:254,
                 from drivers/crypto/atmel-sha.c:15:
drivers/crypto/atmel-sha.c: In function 'atmel_sha_hmac_compute_ipad_hash':
include/linux/fortify-string.h:57:33: error: '__builtin_memcpy' accessing 129 or more bytes at offsets 408 and 280 overlaps 1 or more bytes at offset 408 [-Werror=restrict]
   57 | #define __underlying_memcpy     __builtin_memcpy
      |                                 ^
include/linux/fortify-string.h:648:9: note: in expansion of macro '__underlying_memcpy'
  648 |         __underlying_##op(p, q, __fortify_size);                        \
      |         ^~~~~~~~~~~~~
include/linux/fortify-string.h:693:26: note: in expansion of macro '__fortify_memcpy_chk'
  693 | #define memcpy(p, q, s)  __fortify_memcpy_chk(p, q, s,                  \
      |                          ^~~~~~~~~~~~~~~~~~~~
drivers/crypto/atmel-sha.c:1773:9: note: in expansion of macro 'memcpy'
 1773 |         memcpy(hmac->opad, hmac->ipad, bs);
      |         ^~~~~~

The same thing happens in two more drivers that have the same logic:

drivers/crypto/chelsio/chcr_algo.c: In function 'chcr_ahash_setkey':
include/linux/fortify-string.h:57:33: error: '__builtin_memcpy' accessing 129 or more bytes at offsets 260 and 132 overlaps 1 or more bytes at offset 260 [-Werror=restrict]
drivers/crypto/bcm/cipher.c: In function 'ahash_hmac_setkey':
include/linux/fortify-string.h:57:33: error: '__builtin_memcpy' accessing between 129 and 4294967295 bytes at offsets 840 and 712 overlaps between 1 and 4294967167 bytes at offset 840 [-Werror=restrict]

I don't think it can actually happen because the size is strictly bounded
to the available block sizes, at most 128 bytes, though inlining decisions
could lead gcc to not see that.

Use the unsafe_memcpy() helper instead of memcpy(), with the only difference
being that this skips the hardening checks that produce the warning.

Suggested-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-sha.c         | 3 ++-
 drivers/crypto/bcm/cipher.c        | 3 ++-
 drivers/crypto/chelsio/chcr_algo.c | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 54fec72dfba2..99a9ff8e743f 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -1770,7 +1770,8 @@ static int atmel_sha_hmac_compute_ipad_hash(struct atmel_sha_dev *dd)
 	size_t bs = ctx->block_size;
 	size_t i, num_words = bs / sizeof(u32);
 
-	memcpy(hmac->opad, hmac->ipad, bs);
+	unsafe_memcpy(hmac->opad, hmac->ipad, bs,
+		      "fortified memcpy causes -Wrestrict warning");
 	for (i = 0; i < num_words; ++i) {
 		hmac->ipad[i] ^= 0x36363636;
 		hmac->opad[i] ^= 0x5c5c5c5c;
diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index 70b911baab26..4c46357e2570 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -2397,7 +2397,8 @@ static int ahash_hmac_setkey(struct crypto_ahash *ahash, const u8 *key,
 		memset(ctx->ipad + ctx->authkeylen, 0,
 		       blocksize - ctx->authkeylen);
 		ctx->authkeylen = 0;
-		memcpy(ctx->opad, ctx->ipad, blocksize);
+		unsafe_memcpy(ctx->opad, ctx->ipad, blocksize,
+			      "fortified memcpy causes -Wrestrict warning");
 
 		for (index = 0; index < blocksize; index++) {
 			ctx->ipad[index] ^= HMAC_IPAD_VALUE;
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 0eade4fa6695..16298ae4a00b 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -2216,7 +2216,8 @@ static int chcr_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 		memcpy(hmacctx->ipad, key, keylen);
 	}
 	memset(hmacctx->ipad + keylen, 0, bs - keylen);
-	memcpy(hmacctx->opad, hmacctx->ipad, bs);
+	unsafe_memcpy(hmacctx->opad, hmacctx->ipad, bs,
+		      "fortified memcpy causes -Wrestrict warning");
 
 	for (i = 0; i < bs / sizeof(int); i++) {
 		*((unsigned int *)(&hmacctx->ipad) + i) ^= IPAD_DATA;

From b9296bb41275ef2173d2c8b144b808f5e6d18bbe Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Fri, 11 Aug 2023 22:50:56 +0200
Subject: [PATCH 1317/2047] dt-bindings: crypto: qcom,prng: Add SM8450

SM8450's PRNG does not require a core clock reference. Add a new
compatible with a qcom,prng-ee fallback and handle that.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../devicetree/bindings/crypto/qcom,prng.yaml | 24 +++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/crypto/qcom,prng.yaml b/Documentation/devicetree/bindings/crypto/qcom,prng.yaml
index bb42f4588b40..36b0ebd9a44b 100644
--- a/Documentation/devicetree/bindings/crypto/qcom,prng.yaml
+++ b/Documentation/devicetree/bindings/crypto/qcom,prng.yaml
@@ -11,9 +11,13 @@ maintainers:
 
 properties:
   compatible:
-    enum:
-      - qcom,prng  # 8916 etc.
-      - qcom,prng-ee  # 8996 and later using EE
+    oneOf:
+      - enum:
+          - qcom,prng  # 8916 etc.
+          - qcom,prng-ee  # 8996 and later using EE
+      - items:
+          - const: qcom,sm8450-prng-ee
+          - const: qcom,prng-ee
 
   reg:
     maxItems: 1
@@ -28,8 +32,18 @@ properties:
 required:
   - compatible
   - reg
-  - clocks
-  - clock-names
+
+allOf:
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              const: qcom,sm8450-prng-ee
+    then:
+      required:
+        - clocks
+        - clock-names
 
 additionalProperties: false
 

From cdb8b7e1692a81f26030050ec30aadd2e7538fef Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Fri, 11 Aug 2023 22:50:57 +0200
Subject: [PATCH 1318/2047] crypto: qcom-rng: Make the core clock optional
 regardless of ACPI presence

Some newer SoCs (like SM8450) do not require a clock vote for the PRNG
to function. Make it entirely optional and rely on the bindings checker
to ensure platforms that need it, consume one.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Bjorn Andersson <andersson@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qcom-rng.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c
index 72dd1a4ebac4..825a729f205e 100644
--- a/drivers/crypto/qcom-rng.c
+++ b/drivers/crypto/qcom-rng.c
@@ -173,13 +173,9 @@ static int qcom_rng_probe(struct platform_device *pdev)
 	if (IS_ERR(rng->base))
 		return PTR_ERR(rng->base);
 
-	/* ACPI systems have clk already on, so skip clk_get */
-	if (!has_acpi_companion(&pdev->dev)) {
-		rng->clk = devm_clk_get(&pdev->dev, "core");
-		if (IS_ERR(rng->clk))
-			return PTR_ERR(rng->clk);
-	}
-
+	rng->clk = devm_clk_get_optional(&pdev->dev, "core");
+	if (IS_ERR(rng->clk))
+		return PTR_ERR(rng->clk);
 
 	rng->skip_init = (unsigned long)device_get_match_data(&pdev->dev);
 

From 4136212ab18eb3dce6efb6e18108765c36708f71 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:07 +0800
Subject: [PATCH 1319/2047] crypto: sun8i-ce - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../allwinner/sun8i-ce/sun8i-ce-cipher.c      | 20 +++++++++++++------
 .../crypto/allwinner/sun8i-ce/sun8i-ce-hash.c |  2 --
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
index c13550090785..573a08fa7afa 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
@@ -294,7 +294,7 @@ theend:
 	return err;
 }
 
-static int sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq)
+static void sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq)
 {
 	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(breq);
@@ -308,10 +308,10 @@ static int sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq)
 	local_bh_disable();
 	crypto_finalize_skcipher_request(engine, breq, err);
 	local_bh_enable();
-	return 0;
 }
 
-static int sun8i_ce_cipher_unprepare(struct crypto_engine *engine, void *async_req)
+static void sun8i_ce_cipher_unprepare(struct crypto_engine *engine,
+				      void *async_req)
 {
 	struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
@@ -353,7 +353,17 @@ static int sun8i_ce_cipher_unprepare(struct crypto_engine *engine, void *async_r
 	}
 
 	dma_unmap_single(ce->dev, rctx->addr_key, op->keylen, DMA_TO_DEVICE);
+}
 
+static int sun8i_ce_cipher_do_one(struct crypto_engine *engine, void *areq)
+{
+	int err = sun8i_ce_cipher_prepare(engine, areq);
+
+	if (err)
+		return err;
+
+	sun8i_ce_cipher_run(engine, areq);
+	sun8i_ce_cipher_unprepare(engine, areq);
 	return 0;
 }
 
@@ -423,9 +433,7 @@ int sun8i_ce_cipher_init(struct crypto_tfm *tfm)
 	       crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)),
 	       CRYPTO_MAX_ALG_NAME);
 
-	op->enginectx.op.do_one_request = sun8i_ce_cipher_run;
-	op->enginectx.op.prepare_request = sun8i_ce_cipher_prepare;
-	op->enginectx.op.unprepare_request = sun8i_ce_cipher_unprepare;
+	op->enginectx.op.do_one_request = sun8i_ce_cipher_do_one;
 
 	err = pm_runtime_get_sync(op->ce->dev);
 	if (err < 0)
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
index 930ad157004c..04d7d890de58 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
@@ -32,8 +32,6 @@ int sun8i_ce_hash_crainit(struct crypto_tfm *tfm)
 	op->ce = algt->ce;
 
 	op->enginectx.op.do_one_request = sun8i_ce_hash_run;
-	op->enginectx.op.prepare_request = NULL;
-	op->enginectx.op.unprepare_request = NULL;
 
 	/* FALLBACK */
 	op->fallback_tfm = crypto_alloc_ahash(crypto_tfm_alg_name(tfm), 0,

From ff0800af25011c0dc0563f9cc72bb2a9158d12c2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:09 +0800
Subject: [PATCH 1320/2047] crypto: sun8i-ss - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c | 2 --
 drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c   | 2 --
 2 files changed, 4 deletions(-)

diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 381a90fbeaff..2758dadf74eb 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -409,8 +409,6 @@ int sun8i_ss_cipher_init(struct crypto_tfm *tfm)
 	       CRYPTO_MAX_ALG_NAME);
 
 	op->enginectx.op.do_one_request = sun8i_ss_handle_cipher_request;
-	op->enginectx.op.prepare_request = NULL;
-	op->enginectx.op.unprepare_request = NULL;
 
 	err = pm_runtime_resume_and_get(op->ss->dev);
 	if (err < 0) {
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
index a4b67d130d11..e6865b49b1df 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
@@ -120,8 +120,6 @@ int sun8i_ss_hash_crainit(struct crypto_tfm *tfm)
 	op->ss = algt->ss;
 
 	op->enginectx.op.do_one_request = sun8i_ss_hash_run;
-	op->enginectx.op.prepare_request = NULL;
-	op->enginectx.op.unprepare_request = NULL;
 
 	/* FALLBACK */
 	op->fallback_tfm = crypto_alloc_ahash(crypto_tfm_alg_name(tfm), 0,

From db9f49e802d9f439cceaaf66ede841b32346e10e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:11 +0800
Subject: [PATCH 1321/2047] crypto: amlogic - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/amlogic/amlogic-gxl-cipher.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/crypto/amlogic/amlogic-gxl-cipher.c b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
index af017a087ebf..03b6586b71ef 100644
--- a/drivers/crypto/amlogic/amlogic-gxl-cipher.c
+++ b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
@@ -332,8 +332,6 @@ int meson_cipher_init(struct crypto_tfm *tfm)
 			 crypto_skcipher_reqsize(op->fallback_tfm);
 
 	op->enginectx.op.do_one_request = meson_handle_cipher_request;
-	op->enginectx.op.prepare_request = NULL;
-	op->enginectx.op.unprepare_request = NULL;
 
 	return 0;
 }

From 13bba5b505695eec89fde5bd68a12b83f71adc33 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:13 +0800
Subject: [PATCH 1322/2047] crypto: aspeed - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/aspeed/aspeed-acry.c        |  2 --
 drivers/crypto/aspeed/aspeed-hace-crypto.c |  2 --
 drivers/crypto/aspeed/aspeed-hace-hash.c   | 14 ++++++++------
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/aspeed/aspeed-acry.c b/drivers/crypto/aspeed/aspeed-acry.c
index 470122c87fea..5ae529ce6806 100644
--- a/drivers/crypto/aspeed/aspeed-acry.c
+++ b/drivers/crypto/aspeed/aspeed-acry.c
@@ -590,8 +590,6 @@ static int aspeed_acry_rsa_init_tfm(struct crypto_akcipher *tfm)
 	}
 
 	ctx->enginectx.op.do_one_request = aspeed_acry_do_request;
-	ctx->enginectx.op.prepare_request = NULL;
-	ctx->enginectx.op.unprepare_request = NULL;
 
 	return 0;
 }
diff --git a/drivers/crypto/aspeed/aspeed-hace-crypto.c b/drivers/crypto/aspeed/aspeed-hace-crypto.c
index ef73b0028b4d..8d6d9ecb3a28 100644
--- a/drivers/crypto/aspeed/aspeed-hace-crypto.c
+++ b/drivers/crypto/aspeed/aspeed-hace-crypto.c
@@ -714,8 +714,6 @@ static int aspeed_crypto_cra_init(struct crypto_skcipher *tfm)
 			 crypto_skcipher_reqsize(ctx->fallback_tfm));
 
 	ctx->enginectx.op.do_one_request = aspeed_crypto_do_request;
-	ctx->enginectx.op.prepare_request = NULL;
-	ctx->enginectx.op.unprepare_request = NULL;
 
 	return 0;
 }
diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c
index 935135229ebd..f8c96568142e 100644
--- a/drivers/crypto/aspeed/aspeed-hace-hash.c
+++ b/drivers/crypto/aspeed/aspeed-hace-hash.c
@@ -565,8 +565,8 @@ static int aspeed_ahash_do_request(struct crypto_engine *engine, void *areq)
 	return 0;
 }
 
-static int aspeed_ahash_prepare_request(struct crypto_engine *engine,
-					void *areq)
+static void aspeed_ahash_prepare_request(struct crypto_engine *engine,
+					 void *areq)
 {
 	struct ahash_request *req = ahash_request_cast(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
@@ -581,8 +581,12 @@ static int aspeed_ahash_prepare_request(struct crypto_engine *engine,
 		hash_engine->dma_prepare = aspeed_ahash_dma_prepare_sg;
 	else
 		hash_engine->dma_prepare = aspeed_ahash_dma_prepare;
+}
 
-	return 0;
+static int aspeed_ahash_do_one(struct crypto_engine *engine, void *areq)
+{
+	aspeed_ahash_prepare_request(engine, areq);
+	return aspeed_ahash_do_request(engine, areq);
 }
 
 static int aspeed_sham_update(struct ahash_request *req)
@@ -876,9 +880,7 @@ static int aspeed_sham_cra_init(struct crypto_tfm *tfm)
 		}
 	}
 
-	tctx->enginectx.op.do_one_request = aspeed_ahash_do_request;
-	tctx->enginectx.op.prepare_request = aspeed_ahash_prepare_request;
-	tctx->enginectx.op.unprepare_request = NULL;
+	tctx->enginectx.op.do_one_request = aspeed_ahash_do_one;
 
 	return 0;
 }

From 0a3fa126578eb69eda563ac4f95955a8bb1715ed Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:15 +0800
Subject: [PATCH 1323/2047] crypto: sl3516 - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/gemini/sl3516-ce-cipher.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c
index 14d0d83d388d..0232f847785a 100644
--- a/drivers/crypto/gemini/sl3516-ce-cipher.c
+++ b/drivers/crypto/gemini/sl3516-ce-cipher.c
@@ -336,8 +336,6 @@ int sl3516_ce_cipher_init(struct crypto_tfm *tfm)
 		 crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)));
 
 	op->enginectx.op.do_one_request = sl3516_ce_handle_cipher_request;
-	op->enginectx.op.prepare_request = NULL;
-	op->enginectx.op.unprepare_request = NULL;
 
 	err = pm_runtime_get_sync(op->ce->dev);
 	if (err < 0)

From 08d81da7a848a7aedb971d5d7d1ee9e373d3ecec Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:17 +0800
Subject: [PATCH 1324/2047] crypto: keembay - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/intel/keembay/keembay-ocs-aes-core.c | 4 ----
 drivers/crypto/intel/keembay/keembay-ocs-ecc.c      | 2 --
 2 files changed, 6 deletions(-)

diff --git a/drivers/crypto/intel/keembay/keembay-ocs-aes-core.c b/drivers/crypto/intel/keembay/keembay-ocs-aes-core.c
index ae31be00357a..f94f48289683 100644
--- a/drivers/crypto/intel/keembay/keembay-ocs-aes-core.c
+++ b/drivers/crypto/intel/keembay/keembay-ocs-aes-core.c
@@ -1150,9 +1150,7 @@ static int kmb_ocs_sm4_ccm_decrypt(struct aead_request *req)
 
 static inline int ocs_common_init(struct ocs_aes_tctx *tctx)
 {
-	tctx->engine_ctx.op.prepare_request = NULL;
 	tctx->engine_ctx.op.do_one_request = kmb_ocs_aes_sk_do_one_request;
-	tctx->engine_ctx.op.unprepare_request = NULL;
 
 	return 0;
 }
@@ -1208,9 +1206,7 @@ static void ocs_exit_tfm(struct crypto_skcipher *tfm)
 
 static inline int ocs_common_aead_init(struct ocs_aes_tctx *tctx)
 {
-	tctx->engine_ctx.op.prepare_request = NULL;
 	tctx->engine_ctx.op.do_one_request = kmb_ocs_aes_aead_do_one_request;
-	tctx->engine_ctx.op.unprepare_request = NULL;
 
 	return 0;
 }
diff --git a/drivers/crypto/intel/keembay/keembay-ocs-ecc.c b/drivers/crypto/intel/keembay/keembay-ocs-ecc.c
index 2269df17514c..e91e570b7ae0 100644
--- a/drivers/crypto/intel/keembay/keembay-ocs-ecc.c
+++ b/drivers/crypto/intel/keembay/keembay-ocs-ecc.c
@@ -794,9 +794,7 @@ static int kmb_ecc_tctx_init(struct ocs_ecc_ctx *tctx, unsigned int curve_id)
 	if (!tctx->curve)
 		return -EOPNOTSUPP;
 
-	tctx->engine_ctx.op.prepare_request = NULL;
 	tctx->engine_ctx.op.do_one_request = kmb_ocs_ecc_do_one_request;
-	tctx->engine_ctx.op.unprepare_request = NULL;
 
 	return 0;
 }

From c752c01389673b76c479d53cdd8f37558a0e406b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:19 +0800
Subject: [PATCH 1325/2047] crypto: omap - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/omap-aes-gcm.c | 14 +++++++-------
 drivers/crypto/omap-aes.c     | 19 ++++---------------
 drivers/crypto/omap-des.c     | 19 ++++---------------
 drivers/crypto/omap-sham.c    |  6 ++++--
 4 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/drivers/crypto/omap-aes-gcm.c b/drivers/crypto/omap-aes-gcm.c
index 9f937bdc53a7..d02363e976e7 100644
--- a/drivers/crypto/omap-aes-gcm.c
+++ b/drivers/crypto/omap-aes-gcm.c
@@ -212,12 +212,10 @@ static int omap_aes_gcm_handle_queue(struct omap_aes_dev *dd,
 	return 0;
 }
 
-static int omap_aes_gcm_prepare_req(struct crypto_engine *engine, void *areq)
+static int omap_aes_gcm_prepare_req(struct aead_request *req,
+				    struct omap_aes_dev *dd)
 {
-	struct aead_request *req = container_of(areq, struct aead_request,
-						base);
 	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
-	struct omap_aes_dev *dd = rctx->dd;
 	struct omap_aes_gcm_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
 	int err;
 
@@ -362,11 +360,15 @@ static int omap_aes_gcm_crypt_req(struct crypto_engine *engine, void *areq)
 						base);
 	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
 	struct omap_aes_dev *dd = rctx->dd;
-	int ret = 0;
+	int ret;
 
 	if (!dd)
 		return -ENODEV;
 
+	ret = omap_aes_gcm_prepare_req(req, dd);
+	if (ret)
+		return ret;
+
 	if (dd->in_sg_len)
 		ret = omap_aes_crypt_dma_start(dd);
 	else
@@ -379,8 +381,6 @@ int omap_aes_gcm_cra_init(struct crypto_aead *tfm)
 {
 	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
 
-	ctx->enginectx.op.prepare_request = omap_aes_gcm_prepare_req;
-	ctx->enginectx.op.unprepare_request = NULL;
 	ctx->enginectx.op.do_one_request = omap_aes_gcm_crypt_req;
 
 	crypto_aead_set_reqsize(tfm, sizeof(struct omap_aes_reqctx));
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 67a99c760bc4..d6fb8676f6cc 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -426,20 +426,15 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd,
 	return 0;
 }
 
-static int omap_aes_prepare_req(struct crypto_engine *engine,
-				void *areq)
+static int omap_aes_prepare_req(struct skcipher_request *req,
+				struct omap_aes_dev *dd)
 {
-	struct skcipher_request *req = container_of(areq, struct skcipher_request, base);
 	struct omap_aes_ctx *ctx = crypto_skcipher_ctx(
 			crypto_skcipher_reqtfm(req));
 	struct omap_aes_reqctx *rctx = skcipher_request_ctx(req);
-	struct omap_aes_dev *dd = rctx->dd;
 	int ret;
 	u16 flags;
 
-	if (!dd)
-		return -ENODEV;
-
 	/* assign new request to device */
 	dd->req = req;
 	dd->total = req->cryptlen;
@@ -491,7 +486,8 @@ static int omap_aes_crypt_req(struct crypto_engine *engine,
 	if (!dd)
 		return -ENODEV;
 
-	return omap_aes_crypt_dma_start(dd);
+	return omap_aes_prepare_req(req, dd) ?:
+	       omap_aes_crypt_dma_start(dd);
 }
 
 static void omap_aes_copy_ivout(struct omap_aes_dev *dd, u8 *ivbuf)
@@ -629,11 +625,6 @@ static int omap_aes_ctr_decrypt(struct skcipher_request *req)
 	return omap_aes_crypt(req, FLAGS_CTR);
 }
 
-static int omap_aes_prepare_req(struct crypto_engine *engine,
-				void *req);
-static int omap_aes_crypt_req(struct crypto_engine *engine,
-			      void *req);
-
 static int omap_aes_init_tfm(struct crypto_skcipher *tfm)
 {
 	const char *name = crypto_tfm_alg_name(&tfm->base);
@@ -649,8 +640,6 @@ static int omap_aes_init_tfm(struct crypto_skcipher *tfm)
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct omap_aes_reqctx) +
 					 crypto_skcipher_reqsize(blk));
 
-	ctx->enginectx.op.prepare_request = omap_aes_prepare_req;
-	ctx->enginectx.op.unprepare_request = NULL;
 	ctx->enginectx.op.do_one_request = omap_aes_crypt_req;
 
 	return 0;
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index 371a51094e34..29a3b4c9edaf 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -522,20 +522,15 @@ static int omap_des_handle_queue(struct omap_des_dev *dd,
 	return 0;
 }
 
-static int omap_des_prepare_req(struct crypto_engine *engine,
-				void *areq)
+static int omap_des_prepare_req(struct skcipher_request *req,
+				struct omap_des_dev *dd)
 {
-	struct skcipher_request *req = container_of(areq, struct skcipher_request, base);
 	struct omap_des_ctx *ctx = crypto_skcipher_ctx(
 			crypto_skcipher_reqtfm(req));
-	struct omap_des_dev *dd = omap_des_find_dev(ctx);
 	struct omap_des_reqctx *rctx;
 	int ret;
 	u16 flags;
 
-	if (!dd)
-		return -ENODEV;
-
 	/* assign new request to device */
 	dd->req = req;
 	dd->total = req->cryptlen;
@@ -590,7 +585,8 @@ static int omap_des_crypt_req(struct crypto_engine *engine,
 	if (!dd)
 		return -ENODEV;
 
-	return omap_des_crypt_dma_start(dd);
+	return omap_des_prepare_req(req, dd) ?:
+	       omap_des_crypt_dma_start(dd);
 }
 
 static void omap_des_done_task(unsigned long data)
@@ -709,11 +705,6 @@ static int omap_des_cbc_decrypt(struct skcipher_request *req)
 	return omap_des_crypt(req, FLAGS_CBC);
 }
 
-static int omap_des_prepare_req(struct crypto_engine *engine,
-				void *areq);
-static int omap_des_crypt_req(struct crypto_engine *engine,
-			      void *areq);
-
 static int omap_des_init_tfm(struct crypto_skcipher *tfm)
 {
 	struct omap_des_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -722,8 +713,6 @@ static int omap_des_init_tfm(struct crypto_skcipher *tfm)
 
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct omap_des_reqctx));
 
-	ctx->enginectx.op.prepare_request = omap_des_prepare_req;
-	ctx->enginectx.op.unprepare_request = NULL;
 	ctx->enginectx.op.do_one_request = omap_des_crypt_req;
 
 	return 0;
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index cbeda59c6b19..2ef92301969f 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -1074,6 +1074,10 @@ static int omap_sham_hash_one_req(struct crypto_engine *engine, void *areq)
 	dev_dbg(dd->dev, "hash-one: op: %u, total: %u, digcnt: %zd, final: %d",
 		ctx->op, ctx->total, ctx->digcnt, final);
 
+	err = omap_sham_prepare_request(engine, areq);
+	if (err)
+		return err;
+
 	err = pm_runtime_resume_and_get(dd->dev);
 	if (err < 0) {
 		dev_err(dd->dev, "failed to get sync: %d\n", err);
@@ -1350,8 +1354,6 @@ static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
 	}
 
 	tctx->enginectx.op.do_one_request = omap_sham_hash_one_req;
-	tctx->enginectx.op.prepare_request = omap_sham_prepare_request;
-	tctx->enginectx.op.unprepare_request = NULL;
 
 	return 0;
 }

From c66c17a0f69b0e017bbc01d999a28ed96ee84826 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:22 +0800
Subject: [PATCH 1326/2047] crypto: rk3288 - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index a78ff3dcd0b1..1519aa0a0f7c 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -240,14 +240,13 @@ static int rk_hash_prepare(struct crypto_engine *engine, void *breq)
 	return 0;
 }
 
-static int rk_hash_unprepare(struct crypto_engine *engine, void *breq)
+static void rk_hash_unprepare(struct crypto_engine *engine, void *breq)
 {
 	struct ahash_request *areq = container_of(breq, struct ahash_request, base);
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
 	struct rk_crypto_info *rkc = rctx->dev;
 
 	dma_unmap_sg(rkc->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE);
-	return 0;
 }
 
 static int rk_hash_run(struct crypto_engine *engine, void *breq)
@@ -259,7 +258,7 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
 	struct scatterlist *sg = areq->src;
 	struct rk_crypto_info *rkc = rctx->dev;
-	int err = 0;
+	int err;
 	int i;
 	u32 v;
 
@@ -267,6 +266,10 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 	if (err)
 		return err;
 
+	err = rk_hash_prepare(engine, breq);
+	if (err)
+		goto theend;
+
 	rctx->mode = 0;
 
 	algt->stat_req++;
@@ -327,6 +330,8 @@ theend:
 	crypto_finalize_hash_request(engine, breq, err);
 	local_bh_enable();
 
+	rk_hash_unprepare(engine, breq);
+
 	return 0;
 }
 
@@ -350,8 +355,6 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm)
 				 crypto_ahash_reqsize(tctx->fallback_tfm));
 
 	tctx->enginectx.op.do_one_request = rk_hash_run;
-	tctx->enginectx.op.prepare_request = rk_hash_prepare;
-	tctx->enginectx.op.unprepare_request = rk_hash_unprepare;
 
 	return 0;
 }

From 50c546d70af866ea5cbeb39f5007df96ea27c41e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:24 +0800
Subject: [PATCH 1327/2047] crypto: jh1100 - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/starfive/jh7110-aes.c  | 54 ++++++---------------------
 drivers/crypto/starfive/jh7110-hash.c |  7 ----
 2 files changed, 11 insertions(+), 50 deletions(-)

diff --git a/drivers/crypto/starfive/jh7110-aes.c b/drivers/crypto/starfive/jh7110-aes.c
index 278dfa4aa743..777656cbb7ce 100644
--- a/drivers/crypto/starfive/jh7110-aes.c
+++ b/drivers/crypto/starfive/jh7110-aes.c
@@ -518,8 +518,13 @@ static int starfive_aes_do_one_req(struct crypto_engine *engine, void *areq)
 	struct starfive_cryp_dev *cryp = ctx->cryp;
 	u32 block[AES_BLOCK_32];
 	u32 stat;
+	int err;
 	int i;
 
+	err = starfive_aes_prepare_req(req, NULL);
+	if (err)
+		return err;
+
 	/*
 	 * Write first plain/ciphertext block to start the module
 	 * then let irq tasklet handle the rest of the data blocks.
@@ -538,15 +543,6 @@ static int starfive_aes_do_one_req(struct crypto_engine *engine, void *areq)
 	return 0;
 }
 
-static int starfive_aes_skcipher_prepare_req(struct crypto_engine *engine,
-					     void *areq)
-{
-	struct skcipher_request *req =
-		container_of(areq, struct skcipher_request, base);
-
-	return starfive_aes_prepare_req(req, NULL);
-}
-
 static int starfive_aes_init_tfm(struct crypto_skcipher *tfm)
 {
 	struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -559,21 +555,10 @@ static int starfive_aes_init_tfm(struct crypto_skcipher *tfm)
 				    sizeof(struct skcipher_request));
 
 	ctx->enginectx.op.do_one_request = starfive_aes_do_one_req;
-	ctx->enginectx.op.prepare_request = starfive_aes_skcipher_prepare_req;
-	ctx->enginectx.op.unprepare_request = NULL;
 
 	return 0;
 }
 
-static void starfive_aes_exit_tfm(struct crypto_skcipher *tfm)
-{
-	struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
-
-	ctx->enginectx.op.do_one_request = NULL;
-	ctx->enginectx.op.prepare_request = NULL;
-	ctx->enginectx.op.unprepare_request = NULL;
-}
-
 static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq)
 {
 	struct aead_request *req =
@@ -584,8 +569,13 @@ static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq
 	struct starfive_cryp_request_ctx *rctx = ctx->rctx;
 	u32 block[AES_BLOCK_32];
 	u32 stat;
+	int err;
 	int i;
 
+	err = starfive_aes_prepare_req(NULL, req);
+	if (err)
+		return err;
+
 	if (!cryp->assoclen)
 		goto write_text;
 
@@ -625,14 +615,6 @@ finish_req:
 	return 0;
 }
 
-static int starfive_aes_aead_prepare_req(struct crypto_engine *engine, void *areq)
-{
-	struct aead_request *req =
-		container_of(areq, struct aead_request, base);
-
-	return starfive_aes_prepare_req(NULL, req);
-}
-
 static int starfive_aes_aead_init_tfm(struct crypto_aead *tfm)
 {
 	struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm);
@@ -657,8 +639,6 @@ static int starfive_aes_aead_init_tfm(struct crypto_aead *tfm)
 				sizeof(struct aead_request));
 
 	ctx->enginectx.op.do_one_request = starfive_aes_aead_do_one_req;
-	ctx->enginectx.op.prepare_request = starfive_aes_aead_prepare_req;
-	ctx->enginectx.op.unprepare_request = NULL;
 
 	return 0;
 }
@@ -667,14 +647,7 @@ static void starfive_aes_aead_exit_tfm(struct crypto_aead *tfm)
 {
 	struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm);
 
-	if (ctx->aead_fbk) {
-		crypto_free_aead(ctx->aead_fbk);
-		ctx->aead_fbk = NULL;
-	}
-
-	ctx->enginectx.op.do_one_request = NULL;
-	ctx->enginectx.op.prepare_request = NULL;
-	ctx->enginectx.op.unprepare_request = NULL;
+	crypto_free_aead(ctx->aead_fbk);
 }
 
 static int starfive_aes_crypt(struct skcipher_request *req, unsigned long flags)
@@ -874,7 +847,6 @@ static int starfive_aes_ccm_decrypt(struct aead_request *req)
 static struct skcipher_alg skcipher_algs[] = {
 {
 	.init				= starfive_aes_init_tfm,
-	.exit				= starfive_aes_exit_tfm,
 	.setkey				= starfive_aes_setkey,
 	.encrypt			= starfive_aes_ecb_encrypt,
 	.decrypt			= starfive_aes_ecb_decrypt,
@@ -892,7 +864,6 @@ static struct skcipher_alg skcipher_algs[] = {
 	},
 }, {
 	.init				= starfive_aes_init_tfm,
-	.exit				= starfive_aes_exit_tfm,
 	.setkey				= starfive_aes_setkey,
 	.encrypt			= starfive_aes_cbc_encrypt,
 	.decrypt			= starfive_aes_cbc_decrypt,
@@ -911,7 +882,6 @@ static struct skcipher_alg skcipher_algs[] = {
 	},
 }, {
 	.init				= starfive_aes_init_tfm,
-	.exit				= starfive_aes_exit_tfm,
 	.setkey				= starfive_aes_setkey,
 	.encrypt			= starfive_aes_ctr_encrypt,
 	.decrypt			= starfive_aes_ctr_decrypt,
@@ -930,7 +900,6 @@ static struct skcipher_alg skcipher_algs[] = {
 	},
 }, {
 	.init				= starfive_aes_init_tfm,
-	.exit				= starfive_aes_exit_tfm,
 	.setkey				= starfive_aes_setkey,
 	.encrypt			= starfive_aes_cfb_encrypt,
 	.decrypt			= starfive_aes_cfb_decrypt,
@@ -949,7 +918,6 @@ static struct skcipher_alg skcipher_algs[] = {
 	},
 }, {
 	.init				= starfive_aes_init_tfm,
-	.exit				= starfive_aes_exit_tfm,
 	.setkey				= starfive_aes_setkey,
 	.encrypt			= starfive_aes_ofb_encrypt,
 	.decrypt			= starfive_aes_ofb_decrypt,
diff --git a/drivers/crypto/starfive/jh7110-hash.c b/drivers/crypto/starfive/jh7110-hash.c
index 5064150b8a1c..7fe89cd13336 100644
--- a/drivers/crypto/starfive/jh7110-hash.c
+++ b/drivers/crypto/starfive/jh7110-hash.c
@@ -434,8 +434,6 @@ static int starfive_hash_init_tfm(struct crypto_ahash *hash,
 	ctx->hash_mode = mode;
 
 	ctx->enginectx.op.do_one_request = starfive_hash_one_request;
-	ctx->enginectx.op.prepare_request = NULL;
-	ctx->enginectx.op.unprepare_request = NULL;
 
 	return 0;
 }
@@ -445,11 +443,6 @@ static void starfive_hash_exit_tfm(struct crypto_ahash *hash)
 	struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash);
 
 	crypto_free_ahash(ctx->ahash_fbk);
-
-	ctx->ahash_fbk = NULL;
-	ctx->enginectx.op.do_one_request = NULL;
-	ctx->enginectx.op.prepare_request = NULL;
-	ctx->enginectx.op.unprepare_request = NULL;
 }
 
 static int starfive_hash_long_setkey(struct starfive_cryp_ctx *ctx,

From 6912b79da85a445b85d4da37a9e5e49821b63e18 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:26 +0800
Subject: [PATCH 1328/2047] crypto: stm32 - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32-cryp.c | 37 ++++++-------------------------
 1 file changed, 7 insertions(+), 30 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
index 6b8d731092a4..07e32b8dbe29 100644
--- a/drivers/crypto/stm32/stm32-cryp.c
+++ b/drivers/crypto/stm32/stm32-cryp.c
@@ -825,8 +825,6 @@ static int stm32_cryp_cpu_start(struct stm32_cryp *cryp)
 }
 
 static int stm32_cryp_cipher_one_req(struct crypto_engine *engine, void *areq);
-static int stm32_cryp_prepare_cipher_req(struct crypto_engine *engine,
-					 void *areq);
 
 static int stm32_cryp_init_tfm(struct crypto_skcipher *tfm)
 {
@@ -835,14 +833,10 @@ static int stm32_cryp_init_tfm(struct crypto_skcipher *tfm)
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct stm32_cryp_reqctx));
 
 	ctx->enginectx.op.do_one_request = stm32_cryp_cipher_one_req;
-	ctx->enginectx.op.prepare_request = stm32_cryp_prepare_cipher_req;
-	ctx->enginectx.op.unprepare_request = NULL;
 	return 0;
 }
 
 static int stm32_cryp_aead_one_req(struct crypto_engine *engine, void *areq);
-static int stm32_cryp_prepare_aead_req(struct crypto_engine *engine,
-				       void *areq);
 
 static int stm32_cryp_aes_aead_init(struct crypto_aead *tfm)
 {
@@ -851,8 +845,6 @@ static int stm32_cryp_aes_aead_init(struct crypto_aead *tfm)
 	tfm->reqsize = sizeof(struct stm32_cryp_reqctx);
 
 	ctx->enginectx.op.do_one_request = stm32_cryp_aead_one_req;
-	ctx->enginectx.op.prepare_request = stm32_cryp_prepare_aead_req;
-	ctx->enginectx.op.unprepare_request = NULL;
 
 	return 0;
 }
@@ -1180,9 +1172,6 @@ static int stm32_cryp_prepare_req(struct skcipher_request *req,
 
 	cryp = ctx->cryp;
 
-	if (!cryp)
-		return -ENODEV;
-
 	rctx = req ? skcipher_request_ctx(req) : aead_request_ctx(areq);
 	rctx->mode &= FLG_MODE_MASK;
 
@@ -1248,16 +1237,6 @@ static int stm32_cryp_prepare_req(struct skcipher_request *req,
 	return ret;
 }
 
-static int stm32_cryp_prepare_cipher_req(struct crypto_engine *engine,
-					 void *areq)
-{
-	struct skcipher_request *req = container_of(areq,
-						      struct skcipher_request,
-						      base);
-
-	return stm32_cryp_prepare_req(req, NULL);
-}
-
 static int stm32_cryp_cipher_one_req(struct crypto_engine *engine, void *areq)
 {
 	struct skcipher_request *req = container_of(areq,
@@ -1270,15 +1249,8 @@ static int stm32_cryp_cipher_one_req(struct crypto_engine *engine, void *areq)
 	if (!cryp)
 		return -ENODEV;
 
-	return stm32_cryp_cpu_start(cryp);
-}
-
-static int stm32_cryp_prepare_aead_req(struct crypto_engine *engine, void *areq)
-{
-	struct aead_request *req = container_of(areq, struct aead_request,
-						base);
-
-	return stm32_cryp_prepare_req(NULL, req);
+	return stm32_cryp_prepare_req(req, NULL) ?:
+	       stm32_cryp_cpu_start(cryp);
 }
 
 static int stm32_cryp_aead_one_req(struct crypto_engine *engine, void *areq)
@@ -1287,10 +1259,15 @@ static int stm32_cryp_aead_one_req(struct crypto_engine *engine, void *areq)
 						base);
 	struct stm32_cryp_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
 	struct stm32_cryp *cryp = ctx->cryp;
+	int err;
 
 	if (!cryp)
 		return -ENODEV;
 
+	err = stm32_cryp_prepare_req(NULL, req);
+	if (err)
+		return err;
+
 	if (unlikely(!cryp->payload_in && !cryp->header_in)) {
 		/* No input data to process: get tag and finish */
 		stm32_cryp_finish_req(cryp, 0);

From fc0bdcb8303b70bc0c1b5fbf236a4882f254cf94 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:28 +0800
Subject: [PATCH 1329/2047] crypto: virtio - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/virtio/virtio_crypto_akcipher_algs.c | 2 --
 drivers/crypto/virtio/virtio_crypto_skcipher_algs.c | 2 --
 2 files changed, 4 deletions(-)

diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
index 6963344f6a3a..ff3369ca319f 100644
--- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
@@ -476,8 +476,6 @@ static int virtio_crypto_rsa_init_tfm(struct crypto_akcipher *tfm)
 
 	ctx->tfm = tfm;
 	ctx->enginectx.op.do_one_request = virtio_crypto_rsa_do_req;
-	ctx->enginectx.op.prepare_request = NULL;
-	ctx->enginectx.op.unprepare_request = NULL;
 
 	akcipher_set_reqsize(tfm,
 			     sizeof(struct virtio_crypto_akcipher_request));
diff --git a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
index e5876286828b..71b8751ab5ab 100644
--- a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
@@ -524,8 +524,6 @@ static int virtio_crypto_skcipher_init(struct crypto_skcipher *tfm)
 	ctx->tfm = tfm;
 
 	ctx->enginectx.op.do_one_request = virtio_crypto_skcipher_crypt_req;
-	ctx->enginectx.op.prepare_request = NULL;
-	ctx->enginectx.op.unprepare_request = NULL;
 	return 0;
 }
 

From be8b8a950f0ae601c8ff841b9dcc899f83054025 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:30 +0800
Subject: [PATCH 1330/2047] crypto: zynqmp - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/xilinx/zynqmp-aes-gcm.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
index bf1f421e05f2..ae7dd82abea1 100644
--- a/drivers/crypto/xilinx/zynqmp-aes-gcm.c
+++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
@@ -316,8 +316,6 @@ static int zynqmp_aes_aead_init(struct crypto_aead *aead)
 	tfm_ctx->dev = drv_ctx->dev;
 
 	tfm_ctx->engine_ctx.op.do_one_request = zynqmp_handle_aes_req;
-	tfm_ctx->engine_ctx.op.prepare_request = NULL;
-	tfm_ctx->engine_ctx.op.unprepare_request = NULL;
 
 	tfm_ctx->fbk_cipher = crypto_alloc_aead(drv_ctx->alg.aead.base.cra_name,
 						0,

From bcd6e41d983621954dfc3f1f64249a55838b3e6a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:32 +0800
Subject: [PATCH 1331/2047] crypto: engine - Remove prepare/unprepare request

The callbacks for prepare and unprepare request in crypto_engine
is superfluous.  They can be done directly from do_one_request.

Move the code into do_one_request and remove the unused callbacks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crypto_engine.c  | 42 +----------------------------------------
 include/crypto/engine.h |  6 ------
 2 files changed, 1 insertion(+), 47 deletions(-)

diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index 74fcc0897041..17f7955500a0 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -26,9 +26,6 @@ static void crypto_finalize_request(struct crypto_engine *engine,
 				    struct crypto_async_request *req, int err)
 {
 	unsigned long flags;
-	bool finalize_req = false;
-	int ret;
-	struct crypto_engine_ctx *enginectx;
 
 	/*
 	 * If hardware cannot enqueue more requests
@@ -38,21 +35,11 @@ static void crypto_finalize_request(struct crypto_engine *engine,
 	if (!engine->retry_support) {
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		if (engine->cur_req == req) {
-			finalize_req = true;
 			engine->cur_req = NULL;
 		}
 		spin_unlock_irqrestore(&engine->queue_lock, flags);
 	}
 
-	if (finalize_req || engine->retry_support) {
-		enginectx = crypto_tfm_ctx(req->tfm);
-		if (enginectx->op.prepare_request &&
-		    enginectx->op.unprepare_request) {
-			ret = enginectx->op.unprepare_request(engine, req);
-			if (ret)
-				dev_err(engine->dev, "failed to unprepare request\n");
-		}
-	}
 	lockdep_assert_in_softirq();
 	crypto_request_complete(req, err);
 
@@ -141,20 +128,12 @@ start_request:
 		ret = engine->prepare_crypt_hardware(engine);
 		if (ret) {
 			dev_err(engine->dev, "failed to prepare crypt hardware\n");
-			goto req_err_2;
+			goto req_err_1;
 		}
 	}
 
 	enginectx = crypto_tfm_ctx(async_req->tfm);
 
-	if (enginectx->op.prepare_request) {
-		ret = enginectx->op.prepare_request(engine, async_req);
-		if (ret) {
-			dev_err(engine->dev, "failed to prepare request: %d\n",
-				ret);
-			goto req_err_2;
-		}
-	}
 	if (!enginectx->op.do_one_request) {
 		dev_err(engine->dev, "failed to do request\n");
 		ret = -EINVAL;
@@ -177,18 +156,6 @@ start_request:
 				ret);
 			goto req_err_1;
 		}
-		/*
-		 * If retry mechanism is supported,
-		 * unprepare current request and
-		 * enqueue it back into crypto-engine queue.
-		 */
-		if (enginectx->op.unprepare_request) {
-			ret = enginectx->op.unprepare_request(engine,
-							      async_req);
-			if (ret)
-				dev_err(engine->dev,
-					"failed to unprepare request\n");
-		}
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		/*
 		 * If hardware was unable to execute request, enqueue it
@@ -204,13 +171,6 @@ start_request:
 	goto retry;
 
 req_err_1:
-	if (enginectx->op.unprepare_request) {
-		ret = enginectx->op.unprepare_request(engine, async_req);
-		if (ret)
-			dev_err(engine->dev, "failed to unprepare request\n");
-	}
-
-req_err_2:
 	crypto_request_complete(async_req, ret);
 
 retry:
diff --git a/include/crypto/engine.h b/include/crypto/engine.h
index 2038764b30c2..1b02f69e0a79 100644
--- a/include/crypto/engine.h
+++ b/include/crypto/engine.h
@@ -78,15 +78,9 @@ struct crypto_engine {
 
 /*
  * struct crypto_engine_op - crypto hardware engine operations
- * @prepare_request: do some preparation if needed before handling the current request
- * @unprepare_request: undo any work done by prepare_request()
  * @do_one_request: do encryption for current request
  */
 struct crypto_engine_op {
-	int (*prepare_request)(struct crypto_engine *engine,
-			       void *areq);
-	int (*unprepare_request)(struct crypto_engine *engine,
-				 void *areq);
 	int (*do_one_request)(struct crypto_engine *engine,
 			      void *areq);
 };

From 1c27c0ca220b42fb8b727d68b58693b77cbd095d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:34 +0800
Subject: [PATCH 1332/2047] crypto: jh7110 - Include crypto/hash.h in header
 file

The header file jh7110-cryp uses ahash_request without including
crypto/hash.h.  Fix that by adding the inclusion.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/starfive/jh7110-cryp.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/starfive/jh7110-cryp.h b/drivers/crypto/starfive/jh7110-cryp.h
index b6d809e8fe45..4462d1db9544 100644
--- a/drivers/crypto/starfive/jh7110-cryp.h
+++ b/drivers/crypto/starfive/jh7110-cryp.h
@@ -2,16 +2,16 @@
 #ifndef __STARFIVE_STR_H__
 #define __STARFIVE_STR_H__
 
+#include <crypto/aes.h>
+#include <crypto/engine.h>
+#include <crypto/hash.h>
+#include <crypto/sha2.h>
+#include <crypto/sm3.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/interrupt.h>
 
-#include <crypto/aes.h>
-#include <crypto/engine.h>
-#include <crypto/sha2.h>
-#include <crypto/sm3.h>
-
 #define STARFIVE_ALG_CR_OFFSET			0x0
 #define STARFIVE_ALG_FIFO_OFFSET		0x4
 #define STARFIVE_IE_MASK_OFFSET			0x8

From 68021dee251e72d87ebbf052acf69b3217c11383 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:36 +0800
Subject: [PATCH 1333/2047] crypto: engine - Move crypto inclusions out of
 header file

The engine file does not need the actual crypto type definitions
so move those header inclusions to where they are actually used.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crypto_engine.c  |  7 ++++++-
 include/crypto/engine.h | 17 +++++++----------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index 17f7955500a0..ba43dfba2fa9 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -7,10 +7,15 @@
  * Author: Baolin Wang <baolin.wang@linaro.org>
  */
 
+#include <crypto/aead.h>
+#include <crypto/akcipher.h>
+#include <crypto/engine.h>
+#include <crypto/hash.h>
+#include <crypto/kpp.h>
+#include <crypto/skcipher.h>
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/device.h>
-#include <crypto/engine.h>
 #include <uapi/linux/sched/types.h>
 #include "internal.h"
 
diff --git a/include/crypto/engine.h b/include/crypto/engine.h
index 1b02f69e0a79..643639c3227c 100644
--- a/include/crypto/engine.h
+++ b/include/crypto/engine.h
@@ -7,20 +7,17 @@
 #ifndef _CRYPTO_ENGINE_H
 #define _CRYPTO_ENGINE_H
 
-#include <linux/crypto.h>
-#include <linux/list.h>
+#include <crypto/algapi.h>
 #include <linux/kthread.h>
-#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
 #include <linux/types.h>
 
-#include <crypto/algapi.h>
-#include <crypto/aead.h>
-#include <crypto/akcipher.h>
-#include <crypto/hash.h>
-#include <crypto/skcipher.h>
-#include <crypto/kpp.h>
-
+struct aead_request;
+struct ahash_request;
+struct akcipher_request;
 struct device;
+struct kpp_request;
+struct skcipher_request;
 
 #define ENGINE_NAME_LEN	30
 /*

From 2d6a79cc2ade0b3e67e66393a6183284e87b87f6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:38 +0800
Subject: [PATCH 1334/2047] crypto: jh7110 - Include scatterwalk.h for struct
 scatter_walk

Include crypto/scatterwalk.h explicitly instead of getting it
through random header files.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/starfive/jh7110-cryp.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/starfive/jh7110-cryp.h b/drivers/crypto/starfive/jh7110-cryp.h
index 4462d1db9544..345a8d878761 100644
--- a/drivers/crypto/starfive/jh7110-cryp.h
+++ b/drivers/crypto/starfive/jh7110-cryp.h
@@ -5,6 +5,7 @@
 #include <crypto/aes.h>
 #include <crypto/engine.h>
 #include <crypto/hash.h>
+#include <crypto/scatterwalk.h>
 #include <crypto/sha2.h>
 #include <crypto/sm3.h>
 #include <linux/delay.h>

From 45c461c503a7a12f4c5efaff289be17a442aeefe Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:41 +0800
Subject: [PATCH 1335/2047] crypto: engine - Create internal/engine.h

Create crypto/internal/engine.h to house details that should not
be used by drivers.  It is empty for the time being.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crypto_engine.c           |  2 +-
 include/crypto/internal/engine.h | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 include/crypto/internal/engine.h

diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index ba43dfba2fa9..a75162bc1bf4 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -9,8 +9,8 @@
 
 #include <crypto/aead.h>
 #include <crypto/akcipher.h>
-#include <crypto/engine.h>
 #include <crypto/hash.h>
+#include <crypto/internal/engine.h>
 #include <crypto/kpp.h>
 #include <crypto/skcipher.h>
 #include <linux/err.h>
diff --git a/include/crypto/internal/engine.h b/include/crypto/internal/engine.h
new file mode 100644
index 000000000000..ffa1bb39d5e4
--- /dev/null
+++ b/include/crypto/internal/engine.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Crypto engine API
+ *
+ * Copyright (c) 2016 Baolin Wang <baolin.wang@linaro.org>
+ * Copyright (c) 2023 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+#ifndef _CRYPTO_INTERNAL_ENGINE_H
+#define _CRYPTO_INTERNAL_ENGINE_H
+
+#include <crypto/engine.h>
+
+#endif

From b7b23ccbda5d956a8f605da35685ff5f53c0f3b9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:43 +0800
Subject: [PATCH 1336/2047] crypto: omap - Include internal/engine.h

Inlucde internal/engine.h because this driver uses directly
accesses attributes inside struct crypto_engine.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/omap-aes.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index d6fb8676f6cc..ad0d8db086db 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -13,28 +13,26 @@
 #define prn(num) pr_debug(#num "=%d\n", num)
 #define prx(num) pr_debug(#num "=%x\n", num)
 
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/scatterlist.h>
+#include <crypto/aes.h>
+#include <crypto/gcm.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/engine.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
-#include <linux/pm_runtime.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_address.h>
-#include <linux/io.h>
-#include <linux/crypto.h>
-#include <linux/interrupt.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/aes.h>
-#include <crypto/gcm.h>
-#include <crypto/engine.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/internal/aead.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
 
 #include "omap-crypto.h"
 #include "omap-aes.h"

From 4ac1a2d88d8ff2b7d6decb40f712bf9ffddcd8c9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:45 +0800
Subject: [PATCH 1337/2047] crypto: caam - Include internal/engine.h

Inlucde internal/engine.h because this driver uses directly
accesses attributes inside struct crypto_engine.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg.c  | 4 ++--
 drivers/crypto/caam/caamhash.c | 2 +-
 drivers/crypto/caam/caampkc.c  | 1 +
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index feb86013dbf6..da8182ee86fe 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -56,9 +56,9 @@
 #include "sg_sw_sec4.h"
 #include "key_gen.h"
 #include "caamalg_desc.h"
-#include <crypto/engine.h>
-#include <crypto/xts.h>
 #include <asm/unaligned.h>
+#include <crypto/internal/engine.h>
+#include <crypto/xts.h>
 #include <linux/dma-mapping.h>
 #include <linux/device.h>
 #include <linux/err.h>
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 641c3afd59ca..9ef25387f6b6 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -65,7 +65,7 @@
 #include "sg_sw_sec4.h"
 #include "key_gen.h"
 #include "caamhash_desc.h"
-#include <crypto/engine.h>
+#include <crypto/internal/engine.h>
 #include <linux/dma-mapping.h>
 #include <linux/kernel.h>
 
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 7e08af751e4e..1809c97d04be 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -16,6 +16,7 @@
 #include "desc_constr.h"
 #include "sg_sw_sec4.h"
 #include "caampkc.h"
+#include <crypto/internal/engine.h>
 #include <linux/dma-mapping.h>
 #include <linux/kernel.h>
 

From c1091e2baef65b24a39db6a61de316605d3505c5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:47 +0800
Subject: [PATCH 1338/2047] crypto: engine - Move struct crypto_engine into
 internal/engine.h

Most drivers should not access the internal details of struct
crypto_engine.  Move it into the internal header file.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/engine.h          | 58 +-----------------------------
 include/crypto/internal/engine.h | 61 ++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 57 deletions(-)

diff --git a/include/crypto/engine.h b/include/crypto/engine.h
index 643639c3227c..cc9c7fd1c4d9 100644
--- a/include/crypto/engine.h
+++ b/include/crypto/engine.h
@@ -7,72 +7,16 @@
 #ifndef _CRYPTO_ENGINE_H
 #define _CRYPTO_ENGINE_H
 
-#include <crypto/algapi.h>
-#include <linux/kthread.h>
-#include <linux/spinlock_types.h>
 #include <linux/types.h>
 
 struct aead_request;
 struct ahash_request;
 struct akcipher_request;
+struct crypto_engine;
 struct device;
 struct kpp_request;
 struct skcipher_request;
 
-#define ENGINE_NAME_LEN	30
-/*
- * struct crypto_engine - crypto hardware engine
- * @name: the engine name
- * @idling: the engine is entering idle state
- * @busy: request pump is busy
- * @running: the engine is on working
- * @retry_support: indication that the hardware allows re-execution
- * of a failed backlog request
- * crypto-engine, in head position to keep order
- * @list: link with the global crypto engine list
- * @queue_lock: spinlock to synchronise access to request queue
- * @queue: the crypto queue of the engine
- * @rt: whether this queue is set to run as a realtime task
- * @prepare_crypt_hardware: a request will soon arrive from the queue
- * so the subsystem requests the driver to prepare the hardware
- * by issuing this call
- * @unprepare_crypt_hardware: there are currently no more requests on the
- * queue so the subsystem notifies the driver that it may relax the
- * hardware by issuing this call
- * @do_batch_requests: execute a batch of requests. Depends on multiple
- * requests support.
- * @kworker: kthread worker struct for request pump
- * @pump_requests: work struct for scheduling work to the request pump
- * @priv_data: the engine private data
- * @cur_req: the current request which is on processing
- */
-struct crypto_engine {
-	char			name[ENGINE_NAME_LEN];
-	bool			idling;
-	bool			busy;
-	bool			running;
-
-	bool			retry_support;
-
-	struct list_head	list;
-	spinlock_t		queue_lock;
-	struct crypto_queue	queue;
-	struct device		*dev;
-
-	bool			rt;
-
-	int (*prepare_crypt_hardware)(struct crypto_engine *engine);
-	int (*unprepare_crypt_hardware)(struct crypto_engine *engine);
-	int (*do_batch_requests)(struct crypto_engine *engine);
-
-
-	struct kthread_worker           *kworker;
-	struct kthread_work             pump_requests;
-
-	void				*priv_data;
-	struct crypto_async_request	*cur_req;
-};
-
 /*
  * struct crypto_engine_op - crypto hardware engine operations
  * @do_one_request: do encryption for current request
diff --git a/include/crypto/internal/engine.h b/include/crypto/internal/engine.h
index ffa1bb39d5e4..fbf4be56cf12 100644
--- a/include/crypto/internal/engine.h
+++ b/include/crypto/internal/engine.h
@@ -8,6 +8,67 @@
 #ifndef _CRYPTO_INTERNAL_ENGINE_H
 #define _CRYPTO_INTERNAL_ENGINE_H
 
+#include <crypto/algapi.h>
 #include <crypto/engine.h>
+#include <linux/kthread.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+
+#define ENGINE_NAME_LEN	30
+
+struct device;
+
+/*
+ * struct crypto_engine - crypto hardware engine
+ * @name: the engine name
+ * @idling: the engine is entering idle state
+ * @busy: request pump is busy
+ * @running: the engine is on working
+ * @retry_support: indication that the hardware allows re-execution
+ * of a failed backlog request
+ * crypto-engine, in head position to keep order
+ * @list: link with the global crypto engine list
+ * @queue_lock: spinlock to synchronise access to request queue
+ * @queue: the crypto queue of the engine
+ * @rt: whether this queue is set to run as a realtime task
+ * @prepare_crypt_hardware: a request will soon arrive from the queue
+ * so the subsystem requests the driver to prepare the hardware
+ * by issuing this call
+ * @unprepare_crypt_hardware: there are currently no more requests on the
+ * queue so the subsystem notifies the driver that it may relax the
+ * hardware by issuing this call
+ * @do_batch_requests: execute a batch of requests. Depends on multiple
+ * requests support.
+ * @kworker: kthread worker struct for request pump
+ * @pump_requests: work struct for scheduling work to the request pump
+ * @priv_data: the engine private data
+ * @cur_req: the current request which is on processing
+ */
+struct crypto_engine {
+	char			name[ENGINE_NAME_LEN];
+	bool			idling;
+	bool			busy;
+	bool			running;
+
+	bool			retry_support;
+
+	struct list_head	list;
+	spinlock_t		queue_lock;
+	struct crypto_queue	queue;
+	struct device		*dev;
+
+	bool			rt;
+
+	int (*prepare_crypt_hardware)(struct crypto_engine *engine);
+	int (*unprepare_crypt_hardware)(struct crypto_engine *engine);
+	int (*do_batch_requests)(struct crypto_engine *engine);
+
+
+	struct kthread_worker           *kworker;
+	struct kthread_work             pump_requests;
+
+	void				*priv_data;
+	struct crypto_async_request	*cur_req;
+};
 
 #endif

From e5e7eb023f24653b07329162b6359283b3a03a20 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:49 +0800
Subject: [PATCH 1339/2047] crypto: engine - Move crypto_engine_ops from
 request into crypto_alg

Rather than having the callback in the request, move it into the
crypto_alg object.  This avoids having crypto_engine look into the
request context is private to the driver.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crypto_engine.c  | 213 +++++++++++++++++++++++++++++++++++++---
 include/crypto/engine.h |  59 ++++++++++-
 2 files changed, 256 insertions(+), 16 deletions(-)

diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index a75162bc1bf4..abfb1e6bfa48 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -7,20 +7,30 @@
  * Author: Baolin Wang <baolin.wang@linaro.org>
  */
 
-#include <crypto/aead.h>
-#include <crypto/akcipher.h>
-#include <crypto/hash.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/akcipher.h>
 #include <crypto/internal/engine.h>
-#include <crypto/kpp.h>
-#include <crypto/skcipher.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <uapi/linux/sched/types.h>
 #include "internal.h"
 
 #define CRYPTO_ENGINE_MAX_QLEN 10
 
+/* Temporary algorithm flag used to indicate an updated driver. */
+#define CRYPTO_ALG_ENGINE 0x200
+
+struct crypto_engine_alg {
+	struct crypto_alg base;
+	struct crypto_engine_op op;
+};
+
 /**
  * crypto_finalize_request - finalize one request if the request is done
  * @engine: the hardware engine
@@ -64,6 +74,8 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 				 bool in_kthread)
 {
 	struct crypto_async_request *async_req, *backlog;
+	struct crypto_engine_alg *alg;
+	struct crypto_engine_op *op;
 	unsigned long flags;
 	bool was_busy = false;
 	int ret;
@@ -137,15 +149,22 @@ start_request:
 		}
 	}
 
-	enginectx = crypto_tfm_ctx(async_req->tfm);
+	if (async_req->tfm->__crt_alg->cra_flags & CRYPTO_ALG_ENGINE) {
+		alg = container_of(async_req->tfm->__crt_alg,
+				   struct crypto_engine_alg, base);
+		op = &alg->op;
+	} else {
+		enginectx = crypto_tfm_ctx(async_req->tfm);
+		op = &enginectx->op;
 
-	if (!enginectx->op.do_one_request) {
-		dev_err(engine->dev, "failed to do request\n");
-		ret = -EINVAL;
-		goto req_err_1;
+		if (!op->do_one_request) {
+			dev_err(engine->dev, "failed to do request\n");
+			ret = -EINVAL;
+			goto req_err_1;
+		}
 	}
 
-	ret = enginectx->op.do_one_request(engine, async_req);
+	ret = op->do_one_request(engine, async_req);
 
 	/* Request unsuccessfully executed by hardware */
 	if (ret < 0) {
@@ -556,5 +575,177 @@ int crypto_engine_exit(struct crypto_engine *engine)
 }
 EXPORT_SYMBOL_GPL(crypto_engine_exit);
 
+int crypto_engine_register_aead(struct aead_engine_alg *alg)
+{
+	if (!alg->op.do_one_request)
+		return -EINVAL;
+
+	alg->base.base.cra_flags |= CRYPTO_ALG_ENGINE;
+
+	return crypto_register_aead(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_register_aead);
+
+void crypto_engine_unregister_aead(struct aead_engine_alg *alg)
+{
+	crypto_unregister_aead(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_unregister_aead);
+
+int crypto_engine_register_aeads(struct aead_engine_alg *algs, int count)
+{
+	int i, ret;
+
+	for (i = 0; i < count; i++) {
+		ret = crypto_engine_register_aead(&algs[i]);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	crypto_engine_unregister_aeads(algs, i);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_engine_register_aeads);
+
+void crypto_engine_unregister_aeads(struct aead_engine_alg *algs, int count)
+{
+	int i;
+
+	for (i = count - 1; i >= 0; --i)
+		crypto_engine_unregister_aead(&algs[i]);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_unregister_aeads);
+
+int crypto_engine_register_ahash(struct ahash_engine_alg *alg)
+{
+	if (!alg->op.do_one_request)
+		return -EINVAL;
+
+	alg->base.halg.base.cra_flags |= CRYPTO_ALG_ENGINE;
+
+	return crypto_register_ahash(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_register_ahash);
+
+void crypto_engine_unregister_ahash(struct ahash_engine_alg *alg)
+{
+	crypto_unregister_ahash(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_unregister_ahash);
+
+int crypto_engine_register_ahashes(struct ahash_engine_alg *algs, int count)
+{
+	int i, ret;
+
+	for (i = 0; i < count; i++) {
+		ret = crypto_engine_register_ahash(&algs[i]);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	crypto_engine_unregister_ahashes(algs, i);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_engine_register_ahashes);
+
+void crypto_engine_unregister_ahashes(struct ahash_engine_alg *algs,
+				      int count)
+{
+	int i;
+
+	for (i = count - 1; i >= 0; --i)
+		crypto_engine_unregister_ahash(&algs[i]);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_unregister_ahashes);
+
+int crypto_engine_register_akcipher(struct akcipher_engine_alg *alg)
+{
+	if (!alg->op.do_one_request)
+		return -EINVAL;
+
+	alg->base.base.cra_flags |= CRYPTO_ALG_ENGINE;
+
+	return crypto_register_akcipher(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_register_akcipher);
+
+void crypto_engine_unregister_akcipher(struct akcipher_engine_alg *alg)
+{
+	crypto_unregister_akcipher(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_unregister_akcipher);
+
+int crypto_engine_register_kpp(struct kpp_engine_alg *alg)
+{
+	if (!alg->op.do_one_request)
+		return -EINVAL;
+
+	alg->base.base.cra_flags |= CRYPTO_ALG_ENGINE;
+
+	return crypto_register_kpp(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_register_kpp);
+
+void crypto_engine_unregister_kpp(struct kpp_engine_alg *alg)
+{
+	crypto_unregister_kpp(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_unregister_kpp);
+
+int crypto_engine_register_skcipher(struct skcipher_engine_alg *alg)
+{
+	if (!alg->op.do_one_request)
+		return -EINVAL;
+
+	alg->base.base.cra_flags |= CRYPTO_ALG_ENGINE;
+
+	return crypto_register_skcipher(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_register_skcipher);
+
+void crypto_engine_unregister_skcipher(struct skcipher_engine_alg *alg)
+{
+	return crypto_unregister_skcipher(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_unregister_skcipher);
+
+int crypto_engine_register_skciphers(struct skcipher_engine_alg *algs,
+				     int count)
+{
+	int i, ret;
+
+	for (i = 0; i < count; i++) {
+		ret = crypto_engine_register_skcipher(&algs[i]);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	crypto_engine_unregister_skciphers(algs, i);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_engine_register_skciphers);
+
+void crypto_engine_unregister_skciphers(struct skcipher_engine_alg *algs,
+					int count)
+{
+	int i;
+
+	for (i = count - 1; i >= 0; --i)
+		crypto_engine_unregister_skcipher(&algs[i]);
+}
+EXPORT_SYMBOL_GPL(crypto_engine_unregister_skciphers);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Crypto hardware engine framework");
diff --git a/include/crypto/engine.h b/include/crypto/engine.h
index cc9c7fd1c4d9..cf57e732566b 100644
--- a/include/crypto/engine.h
+++ b/include/crypto/engine.h
@@ -7,15 +7,15 @@
 #ifndef _CRYPTO_ENGINE_H
 #define _CRYPTO_ENGINE_H
 
+#include <crypto/aead.h>
+#include <crypto/akcipher.h>
+#include <crypto/hash.h>
+#include <crypto/kpp.h>
+#include <crypto/skcipher.h>
 #include <linux/types.h>
 
-struct aead_request;
-struct ahash_request;
-struct akcipher_request;
 struct crypto_engine;
 struct device;
-struct kpp_request;
-struct skcipher_request;
 
 /*
  * struct crypto_engine_op - crypto hardware engine operations
@@ -30,6 +30,31 @@ struct crypto_engine_ctx {
 	struct crypto_engine_op op;
 };
 
+struct aead_engine_alg {
+	struct aead_alg base;
+	struct crypto_engine_op op;
+};
+
+struct ahash_engine_alg {
+	struct ahash_alg base;
+	struct crypto_engine_op op;
+};
+
+struct akcipher_engine_alg {
+	struct akcipher_alg base;
+	struct crypto_engine_op op;
+};
+
+struct kpp_engine_alg {
+	struct kpp_alg base;
+	struct crypto_engine_op op;
+};
+
+struct skcipher_engine_alg {
+	struct skcipher_alg base;
+	struct crypto_engine_op op;
+};
+
 int crypto_transfer_aead_request_to_engine(struct crypto_engine *engine,
 					   struct aead_request *req);
 int crypto_transfer_akcipher_request_to_engine(struct crypto_engine *engine,
@@ -59,4 +84,28 @@ struct crypto_engine *crypto_engine_alloc_init_and_set(struct device *dev,
 						       bool rt, int qlen);
 int crypto_engine_exit(struct crypto_engine *engine);
 
+int crypto_engine_register_aead(struct aead_engine_alg *alg);
+void crypto_engine_unregister_aead(struct aead_engine_alg *alg);
+int crypto_engine_register_aeads(struct aead_engine_alg *algs, int count);
+void crypto_engine_unregister_aeads(struct aead_engine_alg *algs, int count);
+
+int crypto_engine_register_ahash(struct ahash_engine_alg *alg);
+void crypto_engine_unregister_ahash(struct ahash_engine_alg *alg);
+int crypto_engine_register_ahashes(struct ahash_engine_alg *algs, int count);
+void crypto_engine_unregister_ahashes(struct ahash_engine_alg *algs,
+				      int count);
+
+int crypto_engine_register_akcipher(struct akcipher_engine_alg *alg);
+void crypto_engine_unregister_akcipher(struct akcipher_engine_alg *alg);
+
+int crypto_engine_register_kpp(struct kpp_engine_alg *alg);
+void crypto_engine_unregister_kpp(struct kpp_engine_alg *alg);
+
+int crypto_engine_register_skcipher(struct skcipher_engine_alg *alg);
+void crypto_engine_unregister_skcipher(struct skcipher_engine_alg *alg);
+int crypto_engine_register_skciphers(struct skcipher_engine_alg *algs,
+				     int count);
+void crypto_engine_unregister_skciphers(struct skcipher_engine_alg *algs,
+					int count);
+
 #endif /* _CRYPTO_ENGINE_H */

From 07e34cd39282af37160c978abf18308d5bb287e3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:51 +0800
Subject: [PATCH 1340/2047] crypto: sun8i-ce - Use new crypto_engine_op
 interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../allwinner/sun8i-ce/sun8i-ce-cipher.c      |  25 +--
 .../crypto/allwinner/sun8i-ce/sun8i-ce-core.c | 171 +++++++++++-------
 .../crypto/allwinner/sun8i-ce/sun8i-ce-hash.c |  98 +++++-----
 drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h  |  13 +-
 4 files changed, 181 insertions(+), 126 deletions(-)

diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
index 573a08fa7afa..8d4c42863a62 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
@@ -29,7 +29,7 @@ static int sun8i_ce_cipher_need_fallback(struct skcipher_request *areq)
 	struct sun8i_ce_alg_template *algt;
 	unsigned int todo, len;
 
-	algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
+	algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher.base);
 
 	if (sg_nents_for_len(areq->src, areq->cryptlen) > MAX_SG ||
 	    sg_nents_for_len(areq->dst, areq->cryptlen) > MAX_SG) {
@@ -92,13 +92,18 @@ static int sun8i_ce_cipher_fallback(struct skcipher_request *areq)
 	struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
 	struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 	int err;
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
-	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
-	struct sun8i_ce_alg_template *algt;
 
-	algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
-	algt->stat_fb++;
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG)) {
+		struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+		struct sun8i_ce_alg_template *algt __maybe_unused;
+
+		algt = container_of(alg, struct sun8i_ce_alg_template,
+				    alg.skcipher.base);
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
+		algt->stat_fb++;
 #endif
+	}
 
 	skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
 	skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
@@ -133,7 +138,7 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req
 	int ns = sg_nents_for_len(areq->src, areq->cryptlen);
 	int nd = sg_nents_for_len(areq->dst, areq->cryptlen);
 
-	algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
+	algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher.base);
 
 	dev_dbg(ce->dev, "%s %s %u %x IV(%p %u) key=%u\n", __func__,
 		crypto_tfm_alg_name(areq->base.tfm),
@@ -355,7 +360,7 @@ static void sun8i_ce_cipher_unprepare(struct crypto_engine *engine,
 	dma_unmap_single(ce->dev, rctx->addr_key, op->keylen, DMA_TO_DEVICE);
 }
 
-static int sun8i_ce_cipher_do_one(struct crypto_engine *engine, void *areq)
+int sun8i_ce_cipher_do_one(struct crypto_engine *engine, void *areq)
 {
 	int err = sun8i_ce_cipher_prepare(engine, areq);
 
@@ -416,7 +421,7 @@ int sun8i_ce_cipher_init(struct crypto_tfm *tfm)
 
 	memset(op, 0, sizeof(struct sun8i_cipher_tfm_ctx));
 
-	algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher);
+	algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher.base);
 	op->ce = algt->ce;
 
 	op->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
@@ -433,8 +438,6 @@ int sun8i_ce_cipher_init(struct crypto_tfm *tfm)
 	       crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)),
 	       CRYPTO_MAX_ALG_NAME);
 
-	op->enginectx.op.do_one_request = sun8i_ce_cipher_do_one;
-
 	err = pm_runtime_get_sync(op->ce->dev);
 	if (err < 0)
 		goto error_pm;
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
index 07ea0cc82b16..1c2c32d0568e 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
@@ -9,21 +9,25 @@
  *
  * You could find a link for the datasheet in Documentation/arch/arm/sunxi.rst
  */
+
+#include <crypto/engine.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/rng.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/clk.h>
-#include <linux/crypto.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
+#include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
-#include <crypto/internal/rng.h>
-#include <crypto/internal/skcipher.h>
 
 #include "sun8i-ce.h"
 
@@ -277,7 +281,7 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.ce_algo_id = CE_ID_CIPHER_AES,
 	.ce_blockmode = CE_ID_OP_CBC,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base = {
 			.cra_name = "cbc(aes)",
 			.cra_driver_name = "cbc-aes-sun8i-ce",
@@ -298,13 +302,16 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 		.setkey		= sun8i_ce_aes_setkey,
 		.encrypt	= sun8i_ce_skencrypt,
 		.decrypt	= sun8i_ce_skdecrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = sun8i_ce_cipher_do_one,
+	},
 },
 {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.ce_algo_id = CE_ID_CIPHER_AES,
 	.ce_blockmode = CE_ID_OP_ECB,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base = {
 			.cra_name = "ecb(aes)",
 			.cra_driver_name = "ecb-aes-sun8i-ce",
@@ -324,13 +331,16 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 		.setkey		= sun8i_ce_aes_setkey,
 		.encrypt	= sun8i_ce_skencrypt,
 		.decrypt	= sun8i_ce_skdecrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = sun8i_ce_cipher_do_one,
+	},
 },
 {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.ce_algo_id = CE_ID_CIPHER_DES3,
 	.ce_blockmode = CE_ID_OP_CBC,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base = {
 			.cra_name = "cbc(des3_ede)",
 			.cra_driver_name = "cbc-des3-sun8i-ce",
@@ -351,13 +361,16 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 		.setkey		= sun8i_ce_des3_setkey,
 		.encrypt	= sun8i_ce_skencrypt,
 		.decrypt	= sun8i_ce_skdecrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = sun8i_ce_cipher_do_one,
+	},
 },
 {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.ce_algo_id = CE_ID_CIPHER_DES3,
 	.ce_blockmode = CE_ID_OP_ECB,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base = {
 			.cra_name = "ecb(des3_ede)",
 			.cra_driver_name = "ecb-des3-sun8i-ce",
@@ -377,12 +390,15 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 		.setkey		= sun8i_ce_des3_setkey,
 		.encrypt	= sun8i_ce_skencrypt,
 		.decrypt	= sun8i_ce_skdecrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = sun8i_ce_cipher_do_one,
+	},
 },
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_HASH
 {	.type = CRYPTO_ALG_TYPE_AHASH,
 	.ce_algo_id = CE_ID_HASH_MD5,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = sun8i_ce_hash_init,
 		.update = sun8i_ce_hash_update,
 		.final = sun8i_ce_hash_final,
@@ -390,6 +406,8 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 		.digest = sun8i_ce_hash_digest,
 		.export = sun8i_ce_hash_export,
 		.import = sun8i_ce_hash_import,
+		.init_tfm = sun8i_ce_hash_init_tfm,
+		.exit_tfm = sun8i_ce_hash_exit_tfm,
 		.halg = {
 			.digestsize = MD5_DIGEST_SIZE,
 			.statesize = sizeof(struct md5_state),
@@ -404,15 +422,17 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 				.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun8i_ce_hash_tfm_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun8i_ce_hash_crainit,
-				.cra_exit = sun8i_ce_hash_craexit,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = sun8i_ce_hash_run,
+	},
+
 },
 {	.type = CRYPTO_ALG_TYPE_AHASH,
 	.ce_algo_id = CE_ID_HASH_SHA1,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = sun8i_ce_hash_init,
 		.update = sun8i_ce_hash_update,
 		.final = sun8i_ce_hash_final,
@@ -420,6 +440,8 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 		.digest = sun8i_ce_hash_digest,
 		.export = sun8i_ce_hash_export,
 		.import = sun8i_ce_hash_import,
+		.init_tfm = sun8i_ce_hash_init_tfm,
+		.exit_tfm = sun8i_ce_hash_exit_tfm,
 		.halg = {
 			.digestsize = SHA1_DIGEST_SIZE,
 			.statesize = sizeof(struct sha1_state),
@@ -434,15 +456,16 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 				.cra_blocksize = SHA1_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun8i_ce_hash_tfm_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun8i_ce_hash_crainit,
-				.cra_exit = sun8i_ce_hash_craexit,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = sun8i_ce_hash_run,
+	},
 },
 {	.type = CRYPTO_ALG_TYPE_AHASH,
 	.ce_algo_id = CE_ID_HASH_SHA224,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = sun8i_ce_hash_init,
 		.update = sun8i_ce_hash_update,
 		.final = sun8i_ce_hash_final,
@@ -450,6 +473,8 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 		.digest = sun8i_ce_hash_digest,
 		.export = sun8i_ce_hash_export,
 		.import = sun8i_ce_hash_import,
+		.init_tfm = sun8i_ce_hash_init_tfm,
+		.exit_tfm = sun8i_ce_hash_exit_tfm,
 		.halg = {
 			.digestsize = SHA224_DIGEST_SIZE,
 			.statesize = sizeof(struct sha256_state),
@@ -464,15 +489,16 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 				.cra_blocksize = SHA224_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun8i_ce_hash_tfm_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun8i_ce_hash_crainit,
-				.cra_exit = sun8i_ce_hash_craexit,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = sun8i_ce_hash_run,
+	},
 },
 {	.type = CRYPTO_ALG_TYPE_AHASH,
 	.ce_algo_id = CE_ID_HASH_SHA256,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = sun8i_ce_hash_init,
 		.update = sun8i_ce_hash_update,
 		.final = sun8i_ce_hash_final,
@@ -480,6 +506,8 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 		.digest = sun8i_ce_hash_digest,
 		.export = sun8i_ce_hash_export,
 		.import = sun8i_ce_hash_import,
+		.init_tfm = sun8i_ce_hash_init_tfm,
+		.exit_tfm = sun8i_ce_hash_exit_tfm,
 		.halg = {
 			.digestsize = SHA256_DIGEST_SIZE,
 			.statesize = sizeof(struct sha256_state),
@@ -494,15 +522,16 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 				.cra_blocksize = SHA256_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun8i_ce_hash_tfm_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun8i_ce_hash_crainit,
-				.cra_exit = sun8i_ce_hash_craexit,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = sun8i_ce_hash_run,
+	},
 },
 {	.type = CRYPTO_ALG_TYPE_AHASH,
 	.ce_algo_id = CE_ID_HASH_SHA384,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = sun8i_ce_hash_init,
 		.update = sun8i_ce_hash_update,
 		.final = sun8i_ce_hash_final,
@@ -510,6 +539,8 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 		.digest = sun8i_ce_hash_digest,
 		.export = sun8i_ce_hash_export,
 		.import = sun8i_ce_hash_import,
+		.init_tfm = sun8i_ce_hash_init_tfm,
+		.exit_tfm = sun8i_ce_hash_exit_tfm,
 		.halg = {
 			.digestsize = SHA384_DIGEST_SIZE,
 			.statesize = sizeof(struct sha512_state),
@@ -524,15 +555,16 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 				.cra_blocksize = SHA384_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun8i_ce_hash_tfm_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun8i_ce_hash_crainit,
-				.cra_exit = sun8i_ce_hash_craexit,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = sun8i_ce_hash_run,
+	},
 },
 {	.type = CRYPTO_ALG_TYPE_AHASH,
 	.ce_algo_id = CE_ID_HASH_SHA512,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = sun8i_ce_hash_init,
 		.update = sun8i_ce_hash_update,
 		.final = sun8i_ce_hash_final,
@@ -540,6 +572,8 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 		.digest = sun8i_ce_hash_digest,
 		.export = sun8i_ce_hash_export,
 		.import = sun8i_ce_hash_import,
+		.init_tfm = sun8i_ce_hash_init_tfm,
+		.exit_tfm = sun8i_ce_hash_exit_tfm,
 		.halg = {
 			.digestsize = SHA512_DIGEST_SIZE,
 			.statesize = sizeof(struct sha512_state),
@@ -554,11 +588,12 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 				.cra_blocksize = SHA512_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun8i_ce_hash_tfm_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun8i_ce_hash_crainit,
-				.cra_exit = sun8i_ce_hash_craexit,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = sun8i_ce_hash_run,
+	},
 },
 #endif
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_PRNG
@@ -582,14 +617,18 @@ static struct sun8i_ce_alg_template ce_algs[] = {
 #endif
 };
 
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
 static int sun8i_ce_debugfs_show(struct seq_file *seq, void *v)
 {
-	struct sun8i_ce_dev *ce = seq->private;
+	struct sun8i_ce_dev *ce __maybe_unused = seq->private;
 	unsigned int i;
 
 	for (i = 0; i < MAXFLOW; i++)
-		seq_printf(seq, "Channel %d: nreq %lu\n", i, ce->chanlist[i].stat_req);
+		seq_printf(seq, "Channel %d: nreq %lu\n", i,
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
+			   ce->chanlist[i].stat_req);
+#else
+			   0ul);
+#endif
 
 	for (i = 0; i < ARRAY_SIZE(ce_algs); i++) {
 		if (!ce_algs[i].ce)
@@ -597,8 +636,8 @@ static int sun8i_ce_debugfs_show(struct seq_file *seq, void *v)
 		switch (ce_algs[i].type) {
 		case CRYPTO_ALG_TYPE_SKCIPHER:
 			seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
-				   ce_algs[i].alg.skcipher.base.cra_driver_name,
-				   ce_algs[i].alg.skcipher.base.cra_name,
+				   ce_algs[i].alg.skcipher.base.base.cra_driver_name,
+				   ce_algs[i].alg.skcipher.base.base.cra_name,
 				   ce_algs[i].stat_req, ce_algs[i].stat_fb);
 			seq_printf(seq, "\tLast fallback is: %s\n",
 				   ce_algs[i].fbname);
@@ -621,8 +660,8 @@ static int sun8i_ce_debugfs_show(struct seq_file *seq, void *v)
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
-				   ce_algs[i].alg.hash.halg.base.cra_driver_name,
-				   ce_algs[i].alg.hash.halg.base.cra_name,
+				   ce_algs[i].alg.hash.base.halg.base.cra_driver_name,
+				   ce_algs[i].alg.hash.base.halg.base.cra_name,
 				   ce_algs[i].stat_req, ce_algs[i].stat_fb);
 			seq_printf(seq, "\tLast fallback is: %s\n",
 				   ce_algs[i].fbname);
@@ -643,7 +682,8 @@ static int sun8i_ce_debugfs_show(struct seq_file *seq, void *v)
 			break;
 		}
 	}
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_TRNG
+#if defined(CONFIG_CRYPTO_DEV_SUN8I_CE_TRNG) && \
+    defined(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG)
 	seq_printf(seq, "HWRNG %lu %lu\n",
 		   ce->hwrng_stat_req, ce->hwrng_stat_bytes);
 #endif
@@ -651,7 +691,6 @@ static int sun8i_ce_debugfs_show(struct seq_file *seq, void *v)
 }
 
 DEFINE_SHOW_ATTRIBUTE(sun8i_ce_debugfs);
-#endif
 
 static void sun8i_ce_free_chanlist(struct sun8i_ce_dev *ce, int i)
 {
@@ -839,7 +878,7 @@ static int sun8i_ce_register_algs(struct sun8i_ce_dev *ce)
 			if (ce_method == CE_ID_NOTSUPP) {
 				dev_dbg(ce->dev,
 					"DEBUG: Algo of %s not supported\n",
-					ce_algs[i].alg.skcipher.base.cra_name);
+					ce_algs[i].alg.skcipher.base.base.cra_name);
 				ce_algs[i].ce = NULL;
 				break;
 			}
@@ -847,16 +886,16 @@ static int sun8i_ce_register_algs(struct sun8i_ce_dev *ce)
 			ce_method = ce->variant->op_mode[id];
 			if (ce_method == CE_ID_NOTSUPP) {
 				dev_dbg(ce->dev, "DEBUG: Blockmode of %s not supported\n",
-					ce_algs[i].alg.skcipher.base.cra_name);
+					ce_algs[i].alg.skcipher.base.base.cra_name);
 				ce_algs[i].ce = NULL;
 				break;
 			}
 			dev_info(ce->dev, "Register %s\n",
-				 ce_algs[i].alg.skcipher.base.cra_name);
-			err = crypto_register_skcipher(&ce_algs[i].alg.skcipher);
+				 ce_algs[i].alg.skcipher.base.base.cra_name);
+			err = crypto_engine_register_skcipher(&ce_algs[i].alg.skcipher);
 			if (err) {
 				dev_err(ce->dev, "ERROR: Fail to register %s\n",
-					ce_algs[i].alg.skcipher.base.cra_name);
+					ce_algs[i].alg.skcipher.base.base.cra_name);
 				ce_algs[i].ce = NULL;
 				return err;
 			}
@@ -867,16 +906,16 @@ static int sun8i_ce_register_algs(struct sun8i_ce_dev *ce)
 			if (ce_method == CE_ID_NOTSUPP) {
 				dev_info(ce->dev,
 					 "DEBUG: Algo of %s not supported\n",
-					 ce_algs[i].alg.hash.halg.base.cra_name);
+					 ce_algs[i].alg.hash.base.halg.base.cra_name);
 				ce_algs[i].ce = NULL;
 				break;
 			}
 			dev_info(ce->dev, "Register %s\n",
-				 ce_algs[i].alg.hash.halg.base.cra_name);
-			err = crypto_register_ahash(&ce_algs[i].alg.hash);
+				 ce_algs[i].alg.hash.base.halg.base.cra_name);
+			err = crypto_engine_register_ahash(&ce_algs[i].alg.hash);
 			if (err) {
 				dev_err(ce->dev, "ERROR: Fail to register %s\n",
-					ce_algs[i].alg.hash.halg.base.cra_name);
+					ce_algs[i].alg.hash.base.halg.base.cra_name);
 				ce_algs[i].ce = NULL;
 				return err;
 			}
@@ -916,13 +955,13 @@ static void sun8i_ce_unregister_algs(struct sun8i_ce_dev *ce)
 		switch (ce_algs[i].type) {
 		case CRYPTO_ALG_TYPE_SKCIPHER:
 			dev_info(ce->dev, "Unregister %d %s\n", i,
-				 ce_algs[i].alg.skcipher.base.cra_name);
-			crypto_unregister_skcipher(&ce_algs[i].alg.skcipher);
+				 ce_algs[i].alg.skcipher.base.base.cra_name);
+			crypto_engine_unregister_skcipher(&ce_algs[i].alg.skcipher);
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			dev_info(ce->dev, "Unregister %d %s\n", i,
-				 ce_algs[i].alg.hash.halg.base.cra_name);
-			crypto_unregister_ahash(&ce_algs[i].alg.hash);
+				 ce_algs[i].alg.hash.base.halg.base.cra_name);
+			crypto_engine_unregister_ahash(&ce_algs[i].alg.hash);
 			break;
 		case CRYPTO_ALG_TYPE_RNG:
 			dev_info(ce->dev, "Unregister %d %s\n", i,
@@ -1007,13 +1046,21 @@ static int sun8i_ce_probe(struct platform_device *pdev)
 
 	pm_runtime_put_sync(ce->dev);
 
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG)) {
+		struct dentry *dbgfs_dir __maybe_unused;
+		struct dentry *dbgfs_stats __maybe_unused;
+
+		/* Ignore error of debugfs */
+		dbgfs_dir = debugfs_create_dir("sun8i-ce", NULL);
+		dbgfs_stats = debugfs_create_file("stats", 0444,
+						  dbgfs_dir, ce,
+						  &sun8i_ce_debugfs_fops);
+
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
-	/* Ignore error of debugfs */
-	ce->dbgfs_dir = debugfs_create_dir("sun8i-ce", NULL);
-	ce->dbgfs_stats = debugfs_create_file("stats", 0444,
-					      ce->dbgfs_dir, ce,
-					      &sun8i_ce_debugfs_fops);
+		ce->dbgfs_dir = dbgfs_dir;
+		ce->dbgfs_stats = dbgfs_stats;
 #endif
+	}
 
 	return 0;
 error_alg:
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
index 04d7d890de58..d358334e5981 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
@@ -9,46 +9,46 @@
  *
  * You could find the datasheet in Documentation/arch/arm/sunxi.rst
  */
-#include <linux/bottom_half.h>
-#include <linux/dma-mapping.h>
-#include <linux/pm_runtime.h>
-#include <linux/scatterlist.h>
+
 #include <crypto/internal/hash.h>
+#include <crypto/md5.h>
 #include <crypto/sha1.h>
 #include <crypto/sha2.h>
-#include <crypto/md5.h>
+#include <linux/bottom_half.h>
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 #include "sun8i-ce.h"
 
-int sun8i_ce_hash_crainit(struct crypto_tfm *tfm)
+int sun8i_ce_hash_init_tfm(struct crypto_ahash *tfm)
 {
-	struct sun8i_ce_hash_tfm_ctx *op = crypto_tfm_ctx(tfm);
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
+	struct sun8i_ce_hash_tfm_ctx *op = crypto_ahash_ctx(tfm);
+	struct ahash_alg *alg = crypto_ahash_alg(tfm);
 	struct sun8i_ce_alg_template *algt;
 	int err;
 
-	memset(op, 0, sizeof(struct sun8i_ce_hash_tfm_ctx));
-
-	algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash);
+	algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash.base);
 	op->ce = algt->ce;
 
-	op->enginectx.op.do_one_request = sun8i_ce_hash_run;
-
 	/* FALLBACK */
-	op->fallback_tfm = crypto_alloc_ahash(crypto_tfm_alg_name(tfm), 0,
+	op->fallback_tfm = crypto_alloc_ahash(crypto_ahash_alg_name(tfm), 0,
 					      CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(op->fallback_tfm)) {
 		dev_err(algt->ce->dev, "Fallback driver could no be loaded\n");
 		return PTR_ERR(op->fallback_tfm);
 	}
 
-	if (algt->alg.hash.halg.statesize < crypto_ahash_statesize(op->fallback_tfm))
-		algt->alg.hash.halg.statesize = crypto_ahash_statesize(op->fallback_tfm);
+	crypto_ahash_set_statesize(tfm,
+				   crypto_ahash_statesize(op->fallback_tfm));
 
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+	crypto_ahash_set_reqsize(tfm,
 				 sizeof(struct sun8i_ce_hash_reqctx) +
 				 crypto_ahash_reqsize(op->fallback_tfm));
 
-	memcpy(algt->fbname, crypto_tfm_alg_driver_name(&op->fallback_tfm->base),
+	memcpy(algt->fbname, crypto_ahash_driver_name(op->fallback_tfm),
 	       CRYPTO_MAX_ALG_NAME);
 
 	err = pm_runtime_get_sync(op->ce->dev);
@@ -61,9 +61,9 @@ error_pm:
 	return err;
 }
 
-void sun8i_ce_hash_craexit(struct crypto_tfm *tfm)
+void sun8i_ce_hash_exit_tfm(struct crypto_ahash *tfm)
 {
-	struct sun8i_ce_hash_tfm_ctx *tfmctx = crypto_tfm_ctx(tfm);
+	struct sun8i_ce_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
 
 	crypto_free_ahash(tfmctx->fallback_tfm);
 	pm_runtime_put_sync_suspend(tfmctx->ce->dev);
@@ -112,20 +112,22 @@ int sun8i_ce_hash_final(struct ahash_request *areq)
 	struct sun8i_ce_hash_reqctx *rctx = ahash_request_ctx(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct sun8i_ce_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
-	struct sun8i_ce_alg_template *algt;
-#endif
 
 	ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
 	rctx->fallback_req.base.flags = areq->base.flags &
 					CRYPTO_TFM_REQ_MAY_SLEEP;
 	rctx->fallback_req.result = areq->result;
 
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG)) {
+		struct sun8i_ce_alg_template *algt __maybe_unused;
+		struct ahash_alg *alg = crypto_ahash_alg(tfm);
+
+		algt = container_of(alg, struct sun8i_ce_alg_template,
+				    alg.hash.base);
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
-	algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash);
-	algt->stat_fb++;
+		algt->stat_fb++;
 #endif
+	}
 
 	return crypto_ahash_final(&rctx->fallback_req);
 }
@@ -150,10 +152,6 @@ int sun8i_ce_hash_finup(struct ahash_request *areq)
 	struct sun8i_ce_hash_reqctx *rctx = ahash_request_ctx(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct sun8i_ce_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
-	struct sun8i_ce_alg_template *algt;
-#endif
 
 	ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
 	rctx->fallback_req.base.flags = areq->base.flags &
@@ -162,10 +160,17 @@ int sun8i_ce_hash_finup(struct ahash_request *areq)
 	rctx->fallback_req.nbytes = areq->nbytes;
 	rctx->fallback_req.src = areq->src;
 	rctx->fallback_req.result = areq->result;
+
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG)) {
+		struct sun8i_ce_alg_template *algt __maybe_unused;
+		struct ahash_alg *alg = crypto_ahash_alg(tfm);
+
+		algt = container_of(alg, struct sun8i_ce_alg_template,
+				    alg.hash.base);
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
-	algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash);
-	algt->stat_fb++;
+		algt->stat_fb++;
 #endif
+	}
 
 	return crypto_ahash_finup(&rctx->fallback_req);
 }
@@ -175,10 +180,6 @@ static int sun8i_ce_hash_digest_fb(struct ahash_request *areq)
 	struct sun8i_ce_hash_reqctx *rctx = ahash_request_ctx(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct sun8i_ce_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
-	struct sun8i_ce_alg_template *algt;
-#endif
 
 	ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
 	rctx->fallback_req.base.flags = areq->base.flags &
@@ -187,10 +188,17 @@ static int sun8i_ce_hash_digest_fb(struct ahash_request *areq)
 	rctx->fallback_req.nbytes = areq->nbytes;
 	rctx->fallback_req.src = areq->src;
 	rctx->fallback_req.result = areq->result;
+
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG)) {
+		struct sun8i_ce_alg_template *algt __maybe_unused;
+		struct ahash_alg *alg = crypto_ahash_alg(tfm);
+
+		algt = container_of(alg, struct sun8i_ce_alg_template,
+				    alg.hash.base);
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG
-	algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash);
-	algt->stat_fb++;
+		algt->stat_fb++;
 #endif
+	}
 
 	return crypto_ahash_digest(&rctx->fallback_req);
 }
@@ -202,7 +210,7 @@ static bool sun8i_ce_hash_need_fallback(struct ahash_request *areq)
 	struct sun8i_ce_alg_template *algt;
 	struct scatterlist *sg;
 
-	algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash);
+	algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash.base);
 
 	if (areq->nbytes == 0) {
 		algt->stat_fb_len0++;
@@ -251,7 +259,7 @@ int sun8i_ce_hash_digest(struct ahash_request *areq)
 			return sun8i_ce_hash_digest_fb(areq);
 	}
 
-	algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash);
+	algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash.base);
 	ce = algt->ce;
 
 	e = sun8i_ce_get_engine_number(ce);
@@ -343,11 +351,11 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
 	dma_addr_t addr_res, addr_pad;
 	int ns = sg_nents_for_len(areq->src, areq->nbytes);
 
-	algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash);
+	algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash.base);
 	ce = algt->ce;
 
-	bs = algt->alg.hash.halg.base.cra_blocksize;
-	digestsize = algt->alg.hash.halg.digestsize;
+	bs = algt->alg.hash.base.halg.base.cra_blocksize;
+	digestsize = algt->alg.hash.base.halg.digestsize;
 	if (digestsize == SHA224_DIGEST_SIZE)
 		digestsize = SHA256_DIGEST_SIZE;
 	if (digestsize == SHA384_DIGEST_SIZE)
@@ -452,14 +460,14 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
 
 	chan->timeout = areq->nbytes;
 
-	err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(areq->base.tfm));
+	err = sun8i_ce_run_task(ce, flow, crypto_ahash_alg_name(tfm));
 
 	dma_unmap_single(ce->dev, addr_pad, j * 4, DMA_TO_DEVICE);
 	dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE);
 	dma_unmap_single(ce->dev, addr_res, digestsize, DMA_FROM_DEVICE);
 
 
-	memcpy(areq->result, result, algt->alg.hash.halg.digestsize);
+	memcpy(areq->result, result, algt->alg.hash.base.halg.digestsize);
 theend:
 	kfree(buf);
 	kfree(result);
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
index 4742b48ef52e..93d4985def87 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
@@ -265,14 +265,12 @@ struct sun8i_cipher_req_ctx {
 
 /*
  * struct sun8i_cipher_tfm_ctx - context for a skcipher TFM
- * @enginectx:		crypto_engine used by this TFM
  * @key:		pointer to key data
  * @keylen:		len of the key
  * @ce:			pointer to the private data of driver handling this TFM
  * @fallback_tfm:	pointer to the fallback TFM
  */
 struct sun8i_cipher_tfm_ctx {
-	struct crypto_engine_ctx enginectx;
 	u32 *key;
 	u32 keylen;
 	struct sun8i_ce_dev *ce;
@@ -281,12 +279,10 @@ struct sun8i_cipher_tfm_ctx {
 
 /*
  * struct sun8i_ce_hash_tfm_ctx - context for an ahash TFM
- * @enginectx:		crypto_engine used by this TFM
  * @ce:			pointer to the private data of driver handling this TFM
  * @fallback_tfm:	pointer to the fallback TFM
  */
 struct sun8i_ce_hash_tfm_ctx {
-	struct crypto_engine_ctx enginectx;
 	struct sun8i_ce_dev *ce;
 	struct crypto_ahash *fallback_tfm;
 };
@@ -329,8 +325,8 @@ struct sun8i_ce_alg_template {
 	u32 ce_blockmode;
 	struct sun8i_ce_dev *ce;
 	union {
-		struct skcipher_alg skcipher;
-		struct ahash_alg hash;
+		struct skcipher_engine_alg skcipher;
+		struct ahash_engine_alg hash;
 		struct rng_alg rng;
 	} alg;
 	unsigned long stat_req;
@@ -353,6 +349,7 @@ int sun8i_ce_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			 unsigned int keylen);
 int sun8i_ce_cipher_init(struct crypto_tfm *tfm);
 void sun8i_ce_cipher_exit(struct crypto_tfm *tfm);
+int sun8i_ce_cipher_do_one(struct crypto_engine *engine, void *areq);
 int sun8i_ce_skdecrypt(struct skcipher_request *areq);
 int sun8i_ce_skencrypt(struct skcipher_request *areq);
 
@@ -360,8 +357,8 @@ int sun8i_ce_get_engine_number(struct sun8i_ce_dev *ce);
 
 int sun8i_ce_run_task(struct sun8i_ce_dev *ce, int flow, const char *name);
 
-int sun8i_ce_hash_crainit(struct crypto_tfm *tfm);
-void sun8i_ce_hash_craexit(struct crypto_tfm *tfm);
+int sun8i_ce_hash_init_tfm(struct crypto_ahash *tfm);
+void sun8i_ce_hash_exit_tfm(struct crypto_ahash *tfm);
 int sun8i_ce_hash_init(struct ahash_request *areq);
 int sun8i_ce_hash_export(struct ahash_request *areq, void *out);
 int sun8i_ce_hash_import(struct ahash_request *areq, const void *in);

From 4c19e8fb5e9ce0b2b535274150605c78a4617bf9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:53 +0800
Subject: [PATCH 1341/2047] crypto: sun8i-ss - Use new crypto_engine_op
 interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../allwinner/sun8i-ss/sun8i-ss-cipher.c      |  25 +--
 .../crypto/allwinner/sun8i-ss/sun8i-ss-core.c | 154 +++++++++++-------
 .../crypto/allwinner/sun8i-ss/sun8i-ss-hash.c | 112 +++++++------
 drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h  |  17 +-
 4 files changed, 177 insertions(+), 131 deletions(-)

diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 2758dadf74eb..7fa359725ec7 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -24,7 +24,7 @@ static bool sun8i_ss_need_fallback(struct skcipher_request *areq)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
-	struct sun8i_ss_alg_template *algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
+	struct sun8i_ss_alg_template *algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher.base);
 	struct scatterlist *in_sg = areq->src;
 	struct scatterlist *out_sg = areq->dst;
 	struct scatterlist *sg;
@@ -93,13 +93,18 @@ static int sun8i_ss_cipher_fallback(struct skcipher_request *areq)
 	struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
 	int err;
 
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
-	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
-	struct sun8i_ss_alg_template *algt;
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG)) {
+		struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+		struct sun8i_ss_alg_template *algt __maybe_unused;
 
-	algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
-	algt->stat_fb++;
+		algt = container_of(alg, struct sun8i_ss_alg_template,
+				    alg.skcipher.base);
+
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
+		algt->stat_fb++;
 #endif
+	}
+
 	skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
 	skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
 				      areq->base.complete, areq->base.data);
@@ -193,7 +198,7 @@ static int sun8i_ss_cipher(struct skcipher_request *areq)
 	int nsgd = sg_nents_for_len(areq->dst, areq->cryptlen);
 	int i;
 
-	algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
+	algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher.base);
 
 	dev_dbg(ss->dev, "%s %s %u %x IV(%p %u) key=%u\n", __func__,
 		crypto_tfm_alg_name(areq->base.tfm),
@@ -324,7 +329,7 @@ theend:
 	return err;
 }
 
-static int sun8i_ss_handle_cipher_request(struct crypto_engine *engine, void *areq)
+int sun8i_ss_handle_cipher_request(struct crypto_engine *engine, void *areq)
 {
 	int err;
 	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
@@ -390,7 +395,7 @@ int sun8i_ss_cipher_init(struct crypto_tfm *tfm)
 
 	memset(op, 0, sizeof(struct sun8i_cipher_tfm_ctx));
 
-	algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher);
+	algt = container_of(alg, struct sun8i_ss_alg_template, alg.skcipher.base);
 	op->ss = algt->ss;
 
 	op->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
@@ -408,8 +413,6 @@ int sun8i_ss_cipher_init(struct crypto_tfm *tfm)
 	       crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)),
 	       CRYPTO_MAX_ALG_NAME);
 
-	op->enginectx.op.do_one_request = sun8i_ss_handle_cipher_request;
-
 	err = pm_runtime_resume_and_get(op->ss->dev);
 	if (err < 0) {
 		dev_err(op->ss->dev, "pm error %d\n", err);
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
index 3dd844b40ff7..62c001c9efc2 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
@@ -9,10 +9,14 @@
  *
  * You could find a link for the datasheet in Documentation/arch/arm/sunxi.rst
  */
+
+#include <crypto/engine.h>
+#include <crypto/internal/rng.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/clk.h>
-#include <linux/crypto.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
+#include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
@@ -23,8 +27,6 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
-#include <crypto/internal/rng.h>
-#include <crypto/internal/skcipher.h>
 
 #include "sun8i-ss.h"
 
@@ -168,7 +170,7 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.ss_algo_id = SS_ID_CIPHER_AES,
 	.ss_blockmode = SS_ID_OP_CBC,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base = {
 			.cra_name = "cbc(aes)",
 			.cra_driver_name = "cbc-aes-sun8i-ss",
@@ -189,13 +191,16 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 		.setkey		= sun8i_ss_aes_setkey,
 		.encrypt	= sun8i_ss_skencrypt,
 		.decrypt	= sun8i_ss_skdecrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = sun8i_ss_handle_cipher_request,
+	},
 },
 {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.ss_algo_id = SS_ID_CIPHER_AES,
 	.ss_blockmode = SS_ID_OP_ECB,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base = {
 			.cra_name = "ecb(aes)",
 			.cra_driver_name = "ecb-aes-sun8i-ss",
@@ -215,13 +220,16 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 		.setkey		= sun8i_ss_aes_setkey,
 		.encrypt	= sun8i_ss_skencrypt,
 		.decrypt	= sun8i_ss_skdecrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = sun8i_ss_handle_cipher_request,
+	},
 },
 {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.ss_algo_id = SS_ID_CIPHER_DES3,
 	.ss_blockmode = SS_ID_OP_CBC,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base = {
 			.cra_name = "cbc(des3_ede)",
 			.cra_driver_name = "cbc-des3-sun8i-ss",
@@ -242,13 +250,16 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 		.setkey		= sun8i_ss_des3_setkey,
 		.encrypt	= sun8i_ss_skencrypt,
 		.decrypt	= sun8i_ss_skdecrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = sun8i_ss_handle_cipher_request,
+	},
 },
 {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.ss_algo_id = SS_ID_CIPHER_DES3,
 	.ss_blockmode = SS_ID_OP_ECB,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base = {
 			.cra_name = "ecb(des3_ede)",
 			.cra_driver_name = "ecb-des3-sun8i-ss",
@@ -268,7 +279,10 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 		.setkey		= sun8i_ss_des3_setkey,
 		.encrypt	= sun8i_ss_skencrypt,
 		.decrypt	= sun8i_ss_skdecrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = sun8i_ss_handle_cipher_request,
+	},
 },
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_PRNG
 {
@@ -292,7 +306,7 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_HASH
 {	.type = CRYPTO_ALG_TYPE_AHASH,
 	.ss_algo_id = SS_ID_HASH_MD5,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = sun8i_ss_hash_init,
 		.update = sun8i_ss_hash_update,
 		.final = sun8i_ss_hash_final,
@@ -300,6 +314,8 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 		.digest = sun8i_ss_hash_digest,
 		.export = sun8i_ss_hash_export,
 		.import = sun8i_ss_hash_import,
+		.init_tfm = sun8i_ss_hash_init_tfm,
+		.exit_tfm = sun8i_ss_hash_exit_tfm,
 		.halg = {
 			.digestsize = MD5_DIGEST_SIZE,
 			.statesize = sizeof(struct md5_state),
@@ -314,15 +330,16 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 				.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun8i_ss_hash_tfm_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun8i_ss_hash_crainit,
-				.cra_exit = sun8i_ss_hash_craexit,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = sun8i_ss_hash_run,
+	},
 },
 {	.type = CRYPTO_ALG_TYPE_AHASH,
 	.ss_algo_id = SS_ID_HASH_SHA1,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = sun8i_ss_hash_init,
 		.update = sun8i_ss_hash_update,
 		.final = sun8i_ss_hash_final,
@@ -330,6 +347,8 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 		.digest = sun8i_ss_hash_digest,
 		.export = sun8i_ss_hash_export,
 		.import = sun8i_ss_hash_import,
+		.init_tfm = sun8i_ss_hash_init_tfm,
+		.exit_tfm = sun8i_ss_hash_exit_tfm,
 		.halg = {
 			.digestsize = SHA1_DIGEST_SIZE,
 			.statesize = sizeof(struct sha1_state),
@@ -344,15 +363,16 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 				.cra_blocksize = SHA1_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun8i_ss_hash_tfm_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun8i_ss_hash_crainit,
-				.cra_exit = sun8i_ss_hash_craexit,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = sun8i_ss_hash_run,
+	},
 },
 {	.type = CRYPTO_ALG_TYPE_AHASH,
 	.ss_algo_id = SS_ID_HASH_SHA224,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = sun8i_ss_hash_init,
 		.update = sun8i_ss_hash_update,
 		.final = sun8i_ss_hash_final,
@@ -360,6 +380,8 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 		.digest = sun8i_ss_hash_digest,
 		.export = sun8i_ss_hash_export,
 		.import = sun8i_ss_hash_import,
+		.init_tfm = sun8i_ss_hash_init_tfm,
+		.exit_tfm = sun8i_ss_hash_exit_tfm,
 		.halg = {
 			.digestsize = SHA224_DIGEST_SIZE,
 			.statesize = sizeof(struct sha256_state),
@@ -374,15 +396,16 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 				.cra_blocksize = SHA224_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun8i_ss_hash_tfm_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun8i_ss_hash_crainit,
-				.cra_exit = sun8i_ss_hash_craexit,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = sun8i_ss_hash_run,
+	},
 },
 {	.type = CRYPTO_ALG_TYPE_AHASH,
 	.ss_algo_id = SS_ID_HASH_SHA256,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = sun8i_ss_hash_init,
 		.update = sun8i_ss_hash_update,
 		.final = sun8i_ss_hash_final,
@@ -390,6 +413,8 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 		.digest = sun8i_ss_hash_digest,
 		.export = sun8i_ss_hash_export,
 		.import = sun8i_ss_hash_import,
+		.init_tfm = sun8i_ss_hash_init_tfm,
+		.exit_tfm = sun8i_ss_hash_exit_tfm,
 		.halg = {
 			.digestsize = SHA256_DIGEST_SIZE,
 			.statesize = sizeof(struct sha256_state),
@@ -404,15 +429,16 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 				.cra_blocksize = SHA256_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun8i_ss_hash_tfm_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun8i_ss_hash_crainit,
-				.cra_exit = sun8i_ss_hash_craexit,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = sun8i_ss_hash_run,
+	},
 },
 {	.type = CRYPTO_ALG_TYPE_AHASH,
 	.ss_algo_id = SS_ID_HASH_SHA1,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = sun8i_ss_hash_init,
 		.update = sun8i_ss_hash_update,
 		.final = sun8i_ss_hash_final,
@@ -420,6 +446,8 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 		.digest = sun8i_ss_hash_digest,
 		.export = sun8i_ss_hash_export,
 		.import = sun8i_ss_hash_import,
+		.init_tfm = sun8i_ss_hash_init_tfm,
+		.exit_tfm = sun8i_ss_hash_exit_tfm,
 		.setkey = sun8i_ss_hmac_setkey,
 		.halg = {
 			.digestsize = SHA1_DIGEST_SIZE,
@@ -435,23 +463,28 @@ static struct sun8i_ss_alg_template ss_algs[] = {
 				.cra_blocksize = SHA1_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct sun8i_ss_hash_tfm_ctx),
 				.cra_module = THIS_MODULE,
-				.cra_init = sun8i_ss_hash_crainit,
-				.cra_exit = sun8i_ss_hash_craexit,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = sun8i_ss_hash_run,
+	},
 },
 #endif
 };
 
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
 static int sun8i_ss_debugfs_show(struct seq_file *seq, void *v)
 {
-	struct sun8i_ss_dev *ss = seq->private;
+	struct sun8i_ss_dev *ss __maybe_unused = seq->private;
 	unsigned int i;
 
 	for (i = 0; i < MAXFLOW; i++)
-		seq_printf(seq, "Channel %d: nreq %lu\n", i, ss->flows[i].stat_req);
+		seq_printf(seq, "Channel %d: nreq %lu\n", i,
+#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
+			   ss->flows[i].stat_req);
+#else
+			   0ul);
+#endif
 
 	for (i = 0; i < ARRAY_SIZE(ss_algs); i++) {
 		if (!ss_algs[i].ss)
@@ -459,8 +492,8 @@ static int sun8i_ss_debugfs_show(struct seq_file *seq, void *v)
 		switch (ss_algs[i].type) {
 		case CRYPTO_ALG_TYPE_SKCIPHER:
 			seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
-				   ss_algs[i].alg.skcipher.base.cra_driver_name,
-				   ss_algs[i].alg.skcipher.base.cra_name,
+				   ss_algs[i].alg.skcipher.base.base.cra_driver_name,
+				   ss_algs[i].alg.skcipher.base.base.cra_name,
 				   ss_algs[i].stat_req, ss_algs[i].stat_fb);
 
 			seq_printf(seq, "\tLast fallback is: %s\n",
@@ -482,8 +515,8 @@ static int sun8i_ss_debugfs_show(struct seq_file *seq, void *v)
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
-				   ss_algs[i].alg.hash.halg.base.cra_driver_name,
-				   ss_algs[i].alg.hash.halg.base.cra_name,
+				   ss_algs[i].alg.hash.base.halg.base.cra_driver_name,
+				   ss_algs[i].alg.hash.base.halg.base.cra_name,
 				   ss_algs[i].stat_req, ss_algs[i].stat_fb);
 			seq_printf(seq, "\tLast fallback is: %s\n",
 				   ss_algs[i].fbname);
@@ -502,7 +535,6 @@ static int sun8i_ss_debugfs_show(struct seq_file *seq, void *v)
 }
 
 DEFINE_SHOW_ATTRIBUTE(sun8i_ss_debugfs);
-#endif
 
 static void sun8i_ss_free_flows(struct sun8i_ss_dev *ss, int i)
 {
@@ -659,7 +691,7 @@ static int sun8i_ss_register_algs(struct sun8i_ss_dev *ss)
 			if (ss_method == SS_ID_NOTSUPP) {
 				dev_info(ss->dev,
 					 "DEBUG: Algo of %s not supported\n",
-					 ss_algs[i].alg.skcipher.base.cra_name);
+					 ss_algs[i].alg.skcipher.base.base.cra_name);
 				ss_algs[i].ss = NULL;
 				break;
 			}
@@ -667,16 +699,16 @@ static int sun8i_ss_register_algs(struct sun8i_ss_dev *ss)
 			ss_method = ss->variant->op_mode[id];
 			if (ss_method == SS_ID_NOTSUPP) {
 				dev_info(ss->dev, "DEBUG: Blockmode of %s not supported\n",
-					 ss_algs[i].alg.skcipher.base.cra_name);
+					 ss_algs[i].alg.skcipher.base.base.cra_name);
 				ss_algs[i].ss = NULL;
 				break;
 			}
 			dev_info(ss->dev, "DEBUG: Register %s\n",
-				 ss_algs[i].alg.skcipher.base.cra_name);
-			err = crypto_register_skcipher(&ss_algs[i].alg.skcipher);
+				 ss_algs[i].alg.skcipher.base.base.cra_name);
+			err = crypto_engine_register_skcipher(&ss_algs[i].alg.skcipher);
 			if (err) {
 				dev_err(ss->dev, "Fail to register %s\n",
-					ss_algs[i].alg.skcipher.base.cra_name);
+					ss_algs[i].alg.skcipher.base.base.cra_name);
 				ss_algs[i].ss = NULL;
 				return err;
 			}
@@ -695,16 +727,16 @@ static int sun8i_ss_register_algs(struct sun8i_ss_dev *ss)
 			if (ss_method == SS_ID_NOTSUPP) {
 				dev_info(ss->dev,
 					"DEBUG: Algo of %s not supported\n",
-					ss_algs[i].alg.hash.halg.base.cra_name);
+					ss_algs[i].alg.hash.base.halg.base.cra_name);
 				ss_algs[i].ss = NULL;
 				break;
 			}
 			dev_info(ss->dev, "Register %s\n",
-				 ss_algs[i].alg.hash.halg.base.cra_name);
-			err = crypto_register_ahash(&ss_algs[i].alg.hash);
+				 ss_algs[i].alg.hash.base.halg.base.cra_name);
+			err = crypto_engine_register_ahash(&ss_algs[i].alg.hash);
 			if (err) {
 				dev_err(ss->dev, "ERROR: Fail to register %s\n",
-					ss_algs[i].alg.hash.halg.base.cra_name);
+					ss_algs[i].alg.hash.base.halg.base.cra_name);
 				ss_algs[i].ss = NULL;
 				return err;
 			}
@@ -727,8 +759,8 @@ static void sun8i_ss_unregister_algs(struct sun8i_ss_dev *ss)
 		switch (ss_algs[i].type) {
 		case CRYPTO_ALG_TYPE_SKCIPHER:
 			dev_info(ss->dev, "Unregister %d %s\n", i,
-				 ss_algs[i].alg.skcipher.base.cra_name);
-			crypto_unregister_skcipher(&ss_algs[i].alg.skcipher);
+				 ss_algs[i].alg.skcipher.base.base.cra_name);
+			crypto_engine_unregister_skcipher(&ss_algs[i].alg.skcipher);
 			break;
 		case CRYPTO_ALG_TYPE_RNG:
 			dev_info(ss->dev, "Unregister %d %s\n", i,
@@ -737,8 +769,8 @@ static void sun8i_ss_unregister_algs(struct sun8i_ss_dev *ss)
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			dev_info(ss->dev, "Unregister %d %s\n", i,
-				 ss_algs[i].alg.hash.halg.base.cra_name);
-			crypto_unregister_ahash(&ss_algs[i].alg.hash);
+				 ss_algs[i].alg.hash.base.halg.base.cra_name);
+			crypto_engine_unregister_ahash(&ss_algs[i].alg.hash);
 			break;
 		}
 	}
@@ -851,13 +883,21 @@ static int sun8i_ss_probe(struct platform_device *pdev)
 
 	pm_runtime_put_sync(ss->dev);
 
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG)) {
+		struct dentry *dbgfs_dir __maybe_unused;
+		struct dentry *dbgfs_stats __maybe_unused;
+
+		/* Ignore error of debugfs */
+		dbgfs_dir = debugfs_create_dir("sun8i-ss", NULL);
+		dbgfs_stats = debugfs_create_file("stats", 0444,
+						   dbgfs_dir, ss,
+						   &sun8i_ss_debugfs_fops);
+
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
-	/* Ignore error of debugfs */
-	ss->dbgfs_dir = debugfs_create_dir("sun8i-ss", NULL);
-	ss->dbgfs_stats = debugfs_create_file("stats", 0444,
-					      ss->dbgfs_dir, ss,
-					      &sun8i_ss_debugfs_fops);
+		ss->dbgfs_dir = dbgfs_dir;
+		ss->dbgfs_stats = dbgfs_stats;
 #endif
+	}
 
 	return 0;
 error_alg:
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
index e6865b49b1df..d70b105dcfa1 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
@@ -9,16 +9,21 @@
  *
  * You could find the datasheet in Documentation/arch/arm/sunxi.rst
  */
-#include <linux/bottom_half.h>
-#include <linux/dma-mapping.h>
-#include <linux/pm_runtime.h>
-#include <linux/scatterlist.h>
-#include <crypto/internal/hash.h>
+
 #include <crypto/hmac.h>
+#include <crypto/internal/hash.h>
+#include <crypto/md5.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/sha1.h>
 #include <crypto/sha2.h>
-#include <crypto/md5.h>
+#include <linux/bottom_half.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 #include "sun8i-ss.h"
 
 static int sun8i_ss_hashkey(struct sun8i_ss_hash_tfm_ctx *tfmctx, const u8 *key,
@@ -60,14 +65,11 @@ int sun8i_ss_hmac_setkey(struct crypto_ahash *ahash, const u8 *key,
 			 unsigned int keylen)
 {
 	struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(ahash);
-	struct ahash_alg *alg = __crypto_ahash_alg(ahash->base.__crt_alg);
-	struct sun8i_ss_alg_template *algt;
 	int digestsize, i;
 	int bs = crypto_ahash_blocksize(ahash);
 	int ret;
 
-	algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
-	digestsize = algt->alg.hash.halg.digestsize;
+	digestsize = crypto_ahash_digestsize(ahash);
 
 	if (keylen > bs) {
 		ret = sun8i_ss_hashkey(tfmctx, key, keylen);
@@ -107,36 +109,33 @@ err_opad:
 	return ret;
 }
 
-int sun8i_ss_hash_crainit(struct crypto_tfm *tfm)
+int sun8i_ss_hash_init_tfm(struct crypto_ahash *tfm)
 {
-	struct sun8i_ss_hash_tfm_ctx *op = crypto_tfm_ctx(tfm);
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
+	struct sun8i_ss_hash_tfm_ctx *op = crypto_ahash_ctx(tfm);
+	struct ahash_alg *alg = crypto_ahash_alg(tfm);
 	struct sun8i_ss_alg_template *algt;
 	int err;
 
-	memset(op, 0, sizeof(struct sun8i_ss_hash_tfm_ctx));
-
-	algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
+	algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash.base);
 	op->ss = algt->ss;
 
-	op->enginectx.op.do_one_request = sun8i_ss_hash_run;
-
 	/* FALLBACK */
-	op->fallback_tfm = crypto_alloc_ahash(crypto_tfm_alg_name(tfm), 0,
+	op->fallback_tfm = crypto_alloc_ahash(crypto_ahash_alg_name(tfm), 0,
 					      CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(op->fallback_tfm)) {
 		dev_err(algt->ss->dev, "Fallback driver could no be loaded\n");
 		return PTR_ERR(op->fallback_tfm);
 	}
 
-	if (algt->alg.hash.halg.statesize < crypto_ahash_statesize(op->fallback_tfm))
-		algt->alg.hash.halg.statesize = crypto_ahash_statesize(op->fallback_tfm);
+	crypto_ahash_set_statesize(tfm,
+				   crypto_ahash_statesize(op->fallback_tfm));
 
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+	crypto_ahash_set_reqsize(tfm,
 				 sizeof(struct sun8i_ss_hash_reqctx) +
 				 crypto_ahash_reqsize(op->fallback_tfm));
 
-	memcpy(algt->fbname, crypto_tfm_alg_driver_name(&op->fallback_tfm->base), CRYPTO_MAX_ALG_NAME);
+	memcpy(algt->fbname, crypto_ahash_driver_name(op->fallback_tfm),
+	       CRYPTO_MAX_ALG_NAME);
 
 	err = pm_runtime_get_sync(op->ss->dev);
 	if (err < 0)
@@ -148,9 +147,9 @@ error_pm:
 	return err;
 }
 
-void sun8i_ss_hash_craexit(struct crypto_tfm *tfm)
+void sun8i_ss_hash_exit_tfm(struct crypto_ahash *tfm)
 {
-	struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_tfm_ctx(tfm);
+	struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
 
 	kfree_sensitive(tfmctx->ipad);
 	kfree_sensitive(tfmctx->opad);
@@ -202,20 +201,23 @@ int sun8i_ss_hash_final(struct ahash_request *areq)
 	struct sun8i_ss_hash_reqctx *rctx = ahash_request_ctx(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
-	struct sun8i_ss_alg_template *algt;
-#endif
 
 	ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
 	rctx->fallback_req.base.flags = areq->base.flags &
 					CRYPTO_TFM_REQ_MAY_SLEEP;
 	rctx->fallback_req.result = areq->result;
 
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG)) {
+		struct ahash_alg *alg = crypto_ahash_alg(tfm);
+		struct sun8i_ss_alg_template *algt __maybe_unused;
+
+		algt = container_of(alg, struct sun8i_ss_alg_template,
+				    alg.hash.base);
+
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
-	algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
-	algt->stat_fb++;
+		algt->stat_fb++;
 #endif
+	}
 
 	return crypto_ahash_final(&rctx->fallback_req);
 }
@@ -240,10 +242,6 @@ int sun8i_ss_hash_finup(struct ahash_request *areq)
 	struct sun8i_ss_hash_reqctx *rctx = ahash_request_ctx(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
-	struct sun8i_ss_alg_template *algt;
-#endif
 
 	ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
 	rctx->fallback_req.base.flags = areq->base.flags &
@@ -252,10 +250,18 @@ int sun8i_ss_hash_finup(struct ahash_request *areq)
 	rctx->fallback_req.nbytes = areq->nbytes;
 	rctx->fallback_req.src = areq->src;
 	rctx->fallback_req.result = areq->result;
+
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG)) {
+		struct ahash_alg *alg = crypto_ahash_alg(tfm);
+		struct sun8i_ss_alg_template *algt __maybe_unused;
+
+		algt = container_of(alg, struct sun8i_ss_alg_template,
+				    alg.hash.base);
+
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
-	algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
-	algt->stat_fb++;
+		algt->stat_fb++;
 #endif
+	}
 
 	return crypto_ahash_finup(&rctx->fallback_req);
 }
@@ -265,10 +271,6 @@ static int sun8i_ss_hash_digest_fb(struct ahash_request *areq)
 	struct sun8i_ss_hash_reqctx *rctx = ahash_request_ctx(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
-#ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
-	struct sun8i_ss_alg_template *algt;
-#endif
 
 	ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
 	rctx->fallback_req.base.flags = areq->base.flags &
@@ -277,10 +279,18 @@ static int sun8i_ss_hash_digest_fb(struct ahash_request *areq)
 	rctx->fallback_req.nbytes = areq->nbytes;
 	rctx->fallback_req.src = areq->src;
 	rctx->fallback_req.result = areq->result;
+
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG)) {
+		struct ahash_alg *alg = crypto_ahash_alg(tfm);
+		struct sun8i_ss_alg_template *algt __maybe_unused;
+
+		algt = container_of(alg, struct sun8i_ss_alg_template,
+				    alg.hash.base);
+
 #ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG
-	algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
-	algt->stat_fb++;
+		algt->stat_fb++;
 #endif
+	}
 
 	return crypto_ahash_digest(&rctx->fallback_req);
 }
@@ -347,11 +357,11 @@ static int sun8i_ss_run_hash_task(struct sun8i_ss_dev *ss,
 static bool sun8i_ss_hash_need_fallback(struct ahash_request *areq)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
+	struct ahash_alg *alg = crypto_ahash_alg(tfm);
 	struct sun8i_ss_alg_template *algt;
 	struct scatterlist *sg;
 
-	algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
+	algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash.base);
 
 	if (areq->nbytes == 0) {
 		algt->stat_fb_len++;
@@ -396,8 +406,8 @@ static bool sun8i_ss_hash_need_fallback(struct ahash_request *areq)
 int sun8i_ss_hash_digest(struct ahash_request *areq)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
 	struct sun8i_ss_hash_reqctx *rctx = ahash_request_ctx(areq);
+	struct ahash_alg *alg = crypto_ahash_alg(tfm);
 	struct sun8i_ss_alg_template *algt;
 	struct sun8i_ss_dev *ss;
 	struct crypto_engine *engine;
@@ -406,7 +416,7 @@ int sun8i_ss_hash_digest(struct ahash_request *areq)
 	if (sun8i_ss_hash_need_fallback(areq))
 		return sun8i_ss_hash_digest_fb(areq);
 
-	algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
+	algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash.base);
 	ss = algt->ss;
 
 	e = sun8i_ss_get_engine_number(ss);
@@ -482,8 +492,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
 	struct ahash_request *areq = container_of(breq, struct ahash_request, base);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct sun8i_ss_hash_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
 	struct sun8i_ss_hash_reqctx *rctx = ahash_request_ctx(areq);
+	struct ahash_alg *alg = crypto_ahash_alg(tfm);
 	struct sun8i_ss_alg_template *algt;
 	struct sun8i_ss_dev *ss;
 	struct scatterlist *sg;
@@ -502,10 +512,10 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
 	 */
 	int hmac = 0;
 
-	algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash);
+	algt = container_of(alg, struct sun8i_ss_alg_template, alg.hash.base);
 	ss = algt->ss;
 
-	digestsize = algt->alg.hash.halg.digestsize;
+	digestsize = crypto_ahash_digestsize(tfm);
 	if (digestsize == SHA224_DIGEST_SIZE)
 		digestsize = SHA256_DIGEST_SIZE;
 
@@ -698,7 +708,7 @@ err_dma_result:
 	}
 
 	if (!err)
-		memcpy(areq->result, result, algt->alg.hash.halg.digestsize);
+		memcpy(areq->result, result, crypto_ahash_digestsize(tfm));
 theend:
 	local_bh_disable();
 	crypto_finalize_hash_request(engine, breq, err);
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
index bfe305261e9a..ae66eb45fb24 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h
@@ -201,16 +201,12 @@ struct sun8i_cipher_req_ctx {
 
 /*
  * struct sun8i_cipher_tfm_ctx - context for a skcipher TFM
- * @enginectx:		crypto_engine used by this TFM
  * @key:		pointer to key data
  * @keylen:		len of the key
  * @ss:			pointer to the private data of driver handling this TFM
  * @fallback_tfm:	pointer to the fallback TFM
- *
- * enginectx must be the first element
  */
 struct sun8i_cipher_tfm_ctx {
-	struct crypto_engine_ctx enginectx;
 	u32 *key;
 	u32 keylen;
 	struct sun8i_ss_dev *ss;
@@ -229,14 +225,10 @@ struct sun8i_ss_rng_tfm_ctx {
 
 /*
  * struct sun8i_ss_hash_tfm_ctx - context for an ahash TFM
- * @enginectx:		crypto_engine used by this TFM
  * @fallback_tfm:	pointer to the fallback TFM
  * @ss:			pointer to the private data of driver handling this TFM
- *
- * enginectx must be the first element
  */
 struct sun8i_ss_hash_tfm_ctx {
-	struct crypto_engine_ctx enginectx;
 	struct crypto_ahash *fallback_tfm;
 	struct sun8i_ss_dev *ss;
 	u8 *ipad;
@@ -279,9 +271,9 @@ struct sun8i_ss_alg_template {
 	u32 ss_blockmode;
 	struct sun8i_ss_dev *ss;
 	union {
-		struct skcipher_alg skcipher;
+		struct skcipher_engine_alg skcipher;
 		struct rng_alg rng;
-		struct ahash_alg hash;
+		struct ahash_engine_alg hash;
 	} alg;
 	unsigned long stat_req;
 	unsigned long stat_fb;
@@ -299,6 +291,7 @@ int sun8i_ss_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			 unsigned int keylen);
 int sun8i_ss_cipher_init(struct crypto_tfm *tfm);
 void sun8i_ss_cipher_exit(struct crypto_tfm *tfm);
+int sun8i_ss_handle_cipher_request(struct crypto_engine *engine, void *areq);
 int sun8i_ss_skdecrypt(struct skcipher_request *areq);
 int sun8i_ss_skencrypt(struct skcipher_request *areq);
 
@@ -311,8 +304,8 @@ int sun8i_ss_prng_seed(struct crypto_rng *tfm, const u8 *seed, unsigned int slen
 int sun8i_ss_prng_init(struct crypto_tfm *tfm);
 void sun8i_ss_prng_exit(struct crypto_tfm *tfm);
 
-int sun8i_ss_hash_crainit(struct crypto_tfm *tfm);
-void sun8i_ss_hash_craexit(struct crypto_tfm *tfm);
+int sun8i_ss_hash_init_tfm(struct crypto_ahash *tfm);
+void sun8i_ss_hash_exit_tfm(struct crypto_ahash *tfm);
 int sun8i_ss_hash_init(struct ahash_request *areq);
 int sun8i_ss_hash_export(struct ahash_request *areq, void *out);
 int sun8i_ss_hash_import(struct ahash_request *areq, const void *in);

From 4dd4d5e486ebdeb48dbc558237d4ba8aab8917d5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:55 +0800
Subject: [PATCH 1342/2047] crypto: amlogic - Use new crypto_engine_op
 interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/amlogic/amlogic-gxl-cipher.c | 11 ++--
 drivers/crypto/amlogic/amlogic-gxl-core.c   | 60 ++++++++++++++-------
 drivers/crypto/amlogic/amlogic-gxl.h        |  5 +-
 3 files changed, 47 insertions(+), 29 deletions(-)

diff --git a/drivers/crypto/amlogic/amlogic-gxl-cipher.c b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
index 03b6586b71ef..3308406612fc 100644
--- a/drivers/crypto/amlogic/amlogic-gxl-cipher.c
+++ b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
@@ -65,7 +65,7 @@ static int meson_cipher_do_fallback(struct skcipher_request *areq)
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct meson_alg_template *algt;
 
-	algt = container_of(alg, struct meson_alg_template, alg.skcipher);
+	algt = container_of(alg, struct meson_alg_template, alg.skcipher.base);
 	algt->stat_fb++;
 #endif
 	skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
@@ -101,7 +101,7 @@ static int meson_cipher(struct skcipher_request *areq)
 	void *backup_iv = NULL, *bkeyiv;
 	u32 v;
 
-	algt = container_of(alg, struct meson_alg_template, alg.skcipher);
+	algt = container_of(alg, struct meson_alg_template, alg.skcipher.base);
 
 	dev_dbg(mc->dev, "%s %s %u %x IV(%u) key=%u flow=%d\n", __func__,
 		crypto_tfm_alg_name(areq->base.tfm),
@@ -258,8 +258,7 @@ theend:
 	return err;
 }
 
-static int meson_handle_cipher_request(struct crypto_engine *engine,
-				       void *areq)
+int meson_handle_cipher_request(struct crypto_engine *engine, void *areq)
 {
 	int err;
 	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
@@ -318,7 +317,7 @@ int meson_cipher_init(struct crypto_tfm *tfm)
 
 	memset(op, 0, sizeof(struct meson_cipher_tfm_ctx));
 
-	algt = container_of(alg, struct meson_alg_template, alg.skcipher);
+	algt = container_of(alg, struct meson_alg_template, alg.skcipher.base);
 	op->mc = algt->mc;
 
 	op->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
@@ -331,8 +330,6 @@ int meson_cipher_init(struct crypto_tfm *tfm)
 	sktfm->reqsize = sizeof(struct meson_cipher_req_ctx) +
 			 crypto_skcipher_reqsize(op->fallback_tfm);
 
-	op->enginectx.op.do_one_request = meson_handle_cipher_request;
-
 	return 0;
 }
 
diff --git a/drivers/crypto/amlogic/amlogic-gxl-core.c b/drivers/crypto/amlogic/amlogic-gxl-core.c
index 937187027ad5..9846d791c956 100644
--- a/drivers/crypto/amlogic/amlogic-gxl-core.c
+++ b/drivers/crypto/amlogic/amlogic-gxl-core.c
@@ -6,17 +6,20 @@
  *
  * Core file which registers crypto algorithms supported by the hardware.
  */
+
+#include <crypto/engine.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/clk.h>
-#include <linux/crypto.h>
-#include <linux/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/dma-mapping.h>
 
 #include "amlogic-gxl.h"
 
@@ -47,7 +50,7 @@ static struct meson_alg_template mc_algs[] = {
 {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.blockmode = MESON_OPMODE_CBC,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base = {
 			.cra_name = "cbc(aes)",
 			.cra_driver_name = "cbc-aes-gxl",
@@ -68,12 +71,15 @@ static struct meson_alg_template mc_algs[] = {
 		.setkey		= meson_aes_setkey,
 		.encrypt	= meson_skencrypt,
 		.decrypt	= meson_skdecrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = meson_handle_cipher_request,
+	},
 },
 {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.blockmode = MESON_OPMODE_ECB,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base = {
 			.cra_name = "ecb(aes)",
 			.cra_driver_name = "ecb-aes-gxl",
@@ -93,33 +99,43 @@ static struct meson_alg_template mc_algs[] = {
 		.setkey		= meson_aes_setkey,
 		.encrypt	= meson_skencrypt,
 		.decrypt	= meson_skdecrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = meson_handle_cipher_request,
+	},
 },
 };
 
-#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
 static int meson_debugfs_show(struct seq_file *seq, void *v)
 {
-	struct meson_dev *mc = seq->private;
+	struct meson_dev *mc __maybe_unused = seq->private;
 	int i;
 
 	for (i = 0; i < MAXFLOW; i++)
-		seq_printf(seq, "Channel %d: nreq %lu\n", i, mc->chanlist[i].stat_req);
+		seq_printf(seq, "Channel %d: nreq %lu\n", i,
+#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+			   mc->chanlist[i].stat_req);
+#else
+			   0ul);
+#endif
 
 	for (i = 0; i < ARRAY_SIZE(mc_algs); i++) {
 		switch (mc_algs[i].type) {
 		case CRYPTO_ALG_TYPE_SKCIPHER:
 			seq_printf(seq, "%s %s %lu %lu\n",
-				   mc_algs[i].alg.skcipher.base.cra_driver_name,
-				   mc_algs[i].alg.skcipher.base.cra_name,
+				   mc_algs[i].alg.skcipher.base.base.cra_driver_name,
+				   mc_algs[i].alg.skcipher.base.base.cra_name,
+#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
 				   mc_algs[i].stat_req, mc_algs[i].stat_fb);
+#else
+				   0ul, 0ul);
+#endif
 			break;
 		}
 	}
 	return 0;
 }
 DEFINE_SHOW_ATTRIBUTE(meson_debugfs);
-#endif
 
 static void meson_free_chanlist(struct meson_dev *mc, int i)
 {
@@ -183,10 +199,10 @@ static int meson_register_algs(struct meson_dev *mc)
 		mc_algs[i].mc = mc;
 		switch (mc_algs[i].type) {
 		case CRYPTO_ALG_TYPE_SKCIPHER:
-			err = crypto_register_skcipher(&mc_algs[i].alg.skcipher);
+			err = crypto_engine_register_skcipher(&mc_algs[i].alg.skcipher);
 			if (err) {
 				dev_err(mc->dev, "Fail to register %s\n",
-					mc_algs[i].alg.skcipher.base.cra_name);
+					mc_algs[i].alg.skcipher.base.base.cra_name);
 				mc_algs[i].mc = NULL;
 				return err;
 			}
@@ -206,7 +222,7 @@ static void meson_unregister_algs(struct meson_dev *mc)
 			continue;
 		switch (mc_algs[i].type) {
 		case CRYPTO_ALG_TYPE_SKCIPHER:
-			crypto_unregister_skcipher(&mc_algs[i].alg.skcipher);
+			crypto_engine_unregister_skcipher(&mc_algs[i].alg.skcipher);
 			break;
 		}
 	}
@@ -264,10 +280,16 @@ static int meson_crypto_probe(struct platform_device *pdev)
 	if (err)
 		goto error_alg;
 
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG)) {
+		struct dentry *dbgfs_dir;
+
+		dbgfs_dir = debugfs_create_dir("gxl-crypto", NULL);
+		debugfs_create_file("stats", 0444, dbgfs_dir, mc, &meson_debugfs_fops);
+
 #ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
-	mc->dbgfs_dir = debugfs_create_dir("gxl-crypto", NULL);
-	debugfs_create_file("stats", 0444, mc->dbgfs_dir, mc, &meson_debugfs_fops);
+		mc->dbgfs_dir = dbgfs_dir;
 #endif
+	}
 
 	return 0;
 error_alg:
diff --git a/drivers/crypto/amlogic/amlogic-gxl.h b/drivers/crypto/amlogic/amlogic-gxl.h
index 8c0746a1d6d4..1013a666c932 100644
--- a/drivers/crypto/amlogic/amlogic-gxl.h
+++ b/drivers/crypto/amlogic/amlogic-gxl.h
@@ -114,7 +114,6 @@ struct meson_cipher_req_ctx {
 
 /*
  * struct meson_cipher_tfm_ctx - context for a skcipher TFM
- * @enginectx:		crypto_engine used by this TFM
  * @key:		pointer to key data
  * @keylen:		len of the key
  * @keymode:		The keymode(type and size of key) associated with this TFM
@@ -122,7 +121,6 @@ struct meson_cipher_req_ctx {
  * @fallback_tfm:	pointer to the fallback TFM
  */
 struct meson_cipher_tfm_ctx {
-	struct crypto_engine_ctx enginectx;
 	u32 *key;
 	u32 keylen;
 	u32 keymode;
@@ -143,7 +141,7 @@ struct meson_alg_template {
 	u32 type;
 	u32 blockmode;
 	union {
-		struct skcipher_alg skcipher;
+		struct skcipher_engine_alg skcipher;
 	} alg;
 	struct meson_dev *mc;
 #ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
@@ -160,3 +158,4 @@ int meson_cipher_init(struct crypto_tfm *tfm);
 void meson_cipher_exit(struct crypto_tfm *tfm);
 int meson_skdecrypt(struct skcipher_request *areq);
 int meson_skencrypt(struct skcipher_request *areq);
+int meson_handle_cipher_request(struct crypto_engine *engine, void *areq);

From 304506f299b324f188b624dca2d4f29722746432 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:57 +0800
Subject: [PATCH 1343/2047] crypto: aspeed - Use new crypto_engine_op interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/aspeed/aspeed-acry.c        |  39 +++---
 drivers/crypto/aspeed/aspeed-hace-crypto.c | 132 +++++++++++++++------
 drivers/crypto/aspeed/aspeed-hace-hash.c   |  97 +++++++++++----
 drivers/crypto/aspeed/aspeed-hace.c        |   9 +-
 drivers/crypto/aspeed/aspeed-hace.h        |  30 ++---
 5 files changed, 202 insertions(+), 105 deletions(-)

diff --git a/drivers/crypto/aspeed/aspeed-acry.c b/drivers/crypto/aspeed/aspeed-acry.c
index 5ae529ce6806..2a970367e686 100644
--- a/drivers/crypto/aspeed/aspeed-acry.c
+++ b/drivers/crypto/aspeed/aspeed-acry.c
@@ -2,25 +2,26 @@
 /*
  * Copyright 2021 Aspeed Technology Inc.
  */
-#include <crypto/akcipher.h>
-#include <crypto/algapi.h>
 #include <crypto/engine.h>
 #include <crypto/internal/akcipher.h>
 #include <crypto/internal/rsa.h>
 #include <crypto/scatterwalk.h>
 #include <linux/clk.h>
-#include <linux/platform_device.h>
+#include <linux/count_zeros.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/mfd/syscon.h>
-#include <linux/interrupt.h>
-#include <linux/count_zeros.h>
-#include <linux/err.h>
-#include <linux/dma-mapping.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 
 #ifdef CONFIG_CRYPTO_DEV_ASPEED_DEBUG
 #define ACRY_DBG(d, fmt, ...)	\
@@ -112,7 +113,6 @@ struct aspeed_acry_dev {
 };
 
 struct aspeed_acry_ctx {
-	struct crypto_engine_ctx	enginectx;
 	struct aspeed_acry_dev		*acry_dev;
 
 	struct rsa_key			key;
@@ -131,7 +131,7 @@ struct aspeed_acry_ctx {
 
 struct aspeed_acry_alg {
 	struct aspeed_acry_dev		*acry_dev;
-	struct akcipher_alg		akcipher;
+	struct akcipher_engine_alg	akcipher;
 };
 
 enum aspeed_rsa_key_mode {
@@ -577,7 +577,7 @@ static int aspeed_acry_rsa_init_tfm(struct crypto_akcipher *tfm)
 	const char *name = crypto_tfm_alg_name(&tfm->base);
 	struct aspeed_acry_alg *acry_alg;
 
-	acry_alg = container_of(alg, struct aspeed_acry_alg, akcipher);
+	acry_alg = container_of(alg, struct aspeed_acry_alg, akcipher.base);
 
 	ctx->acry_dev = acry_alg->acry_dev;
 
@@ -589,8 +589,6 @@ static int aspeed_acry_rsa_init_tfm(struct crypto_akcipher *tfm)
 		return PTR_ERR(ctx->fallback_tfm);
 	}
 
-	ctx->enginectx.op.do_one_request = aspeed_acry_do_request;
-
 	return 0;
 }
 
@@ -603,7 +601,7 @@ static void aspeed_acry_rsa_exit_tfm(struct crypto_akcipher *tfm)
 
 static struct aspeed_acry_alg aspeed_acry_akcipher_algs[] = {
 	{
-		.akcipher = {
+		.akcipher.base = {
 			.encrypt = aspeed_acry_rsa_enc,
 			.decrypt = aspeed_acry_rsa_dec,
 			.sign = aspeed_acry_rsa_dec,
@@ -625,6 +623,9 @@ static struct aspeed_acry_alg aspeed_acry_akcipher_algs[] = {
 				.cra_ctxsize = sizeof(struct aspeed_acry_ctx),
 			},
 		},
+		.akcipher.op = {
+			.do_one_request = aspeed_acry_do_request,
+		},
 	},
 };
 
@@ -634,10 +635,10 @@ static void aspeed_acry_register(struct aspeed_acry_dev *acry_dev)
 
 	for (i = 0; i < ARRAY_SIZE(aspeed_acry_akcipher_algs); i++) {
 		aspeed_acry_akcipher_algs[i].acry_dev = acry_dev;
-		rc = crypto_register_akcipher(&aspeed_acry_akcipher_algs[i].akcipher);
+		rc = crypto_engine_register_akcipher(&aspeed_acry_akcipher_algs[i].akcipher);
 		if (rc) {
 			ACRY_DBG(acry_dev, "Failed to register %s\n",
-				 aspeed_acry_akcipher_algs[i].akcipher.base.cra_name);
+				 aspeed_acry_akcipher_algs[i].akcipher.base.base.cra_name);
 		}
 	}
 }
@@ -647,7 +648,7 @@ static void aspeed_acry_unregister(struct aspeed_acry_dev *acry_dev)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(aspeed_acry_akcipher_algs); i++)
-		crypto_unregister_akcipher(&aspeed_acry_akcipher_algs[i].akcipher);
+		crypto_engine_unregister_akcipher(&aspeed_acry_akcipher_algs[i].akcipher);
 }
 
 /* ACRY interrupt service routine. */
diff --git a/drivers/crypto/aspeed/aspeed-hace-crypto.c b/drivers/crypto/aspeed/aspeed-hace-crypto.c
index 8d6d9ecb3a28..f0eddb7854e5 100644
--- a/drivers/crypto/aspeed/aspeed-hace-crypto.c
+++ b/drivers/crypto/aspeed/aspeed-hace-crypto.c
@@ -4,6 +4,17 @@
  */
 
 #include "aspeed-hace.h"
+#include <crypto/des.h>
+#include <crypto/engine.h>
+#include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/string.h>
 
 #ifdef CONFIG_CRYPTO_DEV_ASPEED_HACE_CRYPTO_DEBUG
 #define CIPHER_DBG(h, fmt, ...)	\
@@ -696,7 +707,7 @@ static int aspeed_crypto_cra_init(struct crypto_skcipher *tfm)
 	struct aspeed_hace_alg *crypto_alg;
 
 
-	crypto_alg = container_of(alg, struct aspeed_hace_alg, alg.skcipher);
+	crypto_alg = container_of(alg, struct aspeed_hace_alg, alg.skcipher.base);
 	ctx->hace_dev = crypto_alg->hace_dev;
 	ctx->start = aspeed_hace_skcipher_trigger;
 
@@ -713,8 +724,6 @@ static int aspeed_crypto_cra_init(struct crypto_skcipher *tfm)
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct aspeed_cipher_reqctx) +
 			 crypto_skcipher_reqsize(ctx->fallback_tfm));
 
-	ctx->enginectx.op.do_one_request = aspeed_crypto_do_request;
-
 	return 0;
 }
 
@@ -729,7 +738,7 @@ static void aspeed_crypto_cra_exit(struct crypto_skcipher *tfm)
 
 static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.min_keysize	= AES_MIN_KEY_SIZE,
 			.max_keysize	= AES_MAX_KEY_SIZE,
 			.setkey		= aspeed_aes_setkey,
@@ -749,10 +758,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.ivsize		= AES_BLOCK_SIZE,
 			.min_keysize	= AES_MIN_KEY_SIZE,
 			.max_keysize	= AES_MAX_KEY_SIZE,
@@ -773,10 +785,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.ivsize		= AES_BLOCK_SIZE,
 			.min_keysize	= AES_MIN_KEY_SIZE,
 			.max_keysize	= AES_MAX_KEY_SIZE,
@@ -797,10 +812,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.ivsize		= AES_BLOCK_SIZE,
 			.min_keysize	= AES_MIN_KEY_SIZE,
 			.max_keysize	= AES_MAX_KEY_SIZE,
@@ -821,10 +839,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.min_keysize	= DES_KEY_SIZE,
 			.max_keysize	= DES_KEY_SIZE,
 			.setkey		= aspeed_des_setkey,
@@ -844,10 +865,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.ivsize		= DES_BLOCK_SIZE,
 			.min_keysize	= DES_KEY_SIZE,
 			.max_keysize	= DES_KEY_SIZE,
@@ -868,10 +892,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.ivsize		= DES_BLOCK_SIZE,
 			.min_keysize	= DES_KEY_SIZE,
 			.max_keysize	= DES_KEY_SIZE,
@@ -892,10 +919,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.ivsize		= DES_BLOCK_SIZE,
 			.min_keysize	= DES_KEY_SIZE,
 			.max_keysize	= DES_KEY_SIZE,
@@ -916,10 +946,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.min_keysize	= DES3_EDE_KEY_SIZE,
 			.max_keysize	= DES3_EDE_KEY_SIZE,
 			.setkey		= aspeed_des_setkey,
@@ -939,10 +972,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.ivsize		= DES_BLOCK_SIZE,
 			.min_keysize	= DES3_EDE_KEY_SIZE,
 			.max_keysize	= DES3_EDE_KEY_SIZE,
@@ -963,10 +999,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.ivsize		= DES_BLOCK_SIZE,
 			.min_keysize	= DES3_EDE_KEY_SIZE,
 			.max_keysize	= DES3_EDE_KEY_SIZE,
@@ -987,10 +1026,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.ivsize		= DES_BLOCK_SIZE,
 			.min_keysize	= DES3_EDE_KEY_SIZE,
 			.max_keysize	= DES3_EDE_KEY_SIZE,
@@ -1011,13 +1053,16 @@ static struct aspeed_hace_alg aspeed_crypto_algs[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 };
 
 static struct aspeed_hace_alg aspeed_crypto_algs_g6[] = {
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.ivsize		= AES_BLOCK_SIZE,
 			.min_keysize	= AES_MIN_KEY_SIZE,
 			.max_keysize	= AES_MAX_KEY_SIZE,
@@ -1037,10 +1082,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs_g6[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.ivsize		= DES_BLOCK_SIZE,
 			.min_keysize	= DES_KEY_SIZE,
 			.max_keysize	= DES_KEY_SIZE,
@@ -1060,10 +1108,13 @@ static struct aspeed_hace_alg aspeed_crypto_algs_g6[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 	{
-		.alg.skcipher = {
+		.alg.skcipher.base = {
 			.ivsize		= DES_BLOCK_SIZE,
 			.min_keysize	= DES3_EDE_KEY_SIZE,
 			.max_keysize	= DES3_EDE_KEY_SIZE,
@@ -1083,7 +1134,10 @@ static struct aspeed_hace_alg aspeed_crypto_algs_g6[] = {
 				.cra_alignmask		= 0x0f,
 				.cra_module		= THIS_MODULE,
 			}
-		}
+		},
+		.alg.skcipher.op = {
+			.do_one_request = aspeed_crypto_do_request,
+		},
 	},
 
 };
@@ -1093,13 +1147,13 @@ void aspeed_unregister_hace_crypto_algs(struct aspeed_hace_dev *hace_dev)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(aspeed_crypto_algs); i++)
-		crypto_unregister_skcipher(&aspeed_crypto_algs[i].alg.skcipher);
+		crypto_engine_unregister_skcipher(&aspeed_crypto_algs[i].alg.skcipher);
 
 	if (hace_dev->version != AST2600_VERSION)
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(aspeed_crypto_algs_g6); i++)
-		crypto_unregister_skcipher(&aspeed_crypto_algs_g6[i].alg.skcipher);
+		crypto_engine_unregister_skcipher(&aspeed_crypto_algs_g6[i].alg.skcipher);
 }
 
 void aspeed_register_hace_crypto_algs(struct aspeed_hace_dev *hace_dev)
@@ -1110,10 +1164,10 @@ void aspeed_register_hace_crypto_algs(struct aspeed_hace_dev *hace_dev)
 
 	for (i = 0; i < ARRAY_SIZE(aspeed_crypto_algs); i++) {
 		aspeed_crypto_algs[i].hace_dev = hace_dev;
-		rc = crypto_register_skcipher(&aspeed_crypto_algs[i].alg.skcipher);
+		rc = crypto_engine_register_skcipher(&aspeed_crypto_algs[i].alg.skcipher);
 		if (rc) {
 			CIPHER_DBG(hace_dev, "Failed to register %s\n",
-				   aspeed_crypto_algs[i].alg.skcipher.base.cra_name);
+				   aspeed_crypto_algs[i].alg.skcipher.base.base.cra_name);
 		}
 	}
 
@@ -1122,10 +1176,10 @@ void aspeed_register_hace_crypto_algs(struct aspeed_hace_dev *hace_dev)
 
 	for (i = 0; i < ARRAY_SIZE(aspeed_crypto_algs_g6); i++) {
 		aspeed_crypto_algs_g6[i].hace_dev = hace_dev;
-		rc = crypto_register_skcipher(&aspeed_crypto_algs_g6[i].alg.skcipher);
+		rc = crypto_engine_register_skcipher(&aspeed_crypto_algs_g6[i].alg.skcipher);
 		if (rc) {
 			CIPHER_DBG(hace_dev, "Failed to register %s\n",
-				   aspeed_crypto_algs_g6[i].alg.skcipher.base.cra_name);
+				   aspeed_crypto_algs_g6[i].alg.skcipher.base.base.cra_name);
 		}
 	}
 }
diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c
index f8c96568142e..abc459af2ac8 100644
--- a/drivers/crypto/aspeed/aspeed-hace-hash.c
+++ b/drivers/crypto/aspeed/aspeed-hace-hash.c
@@ -4,6 +4,17 @@
  */
 
 #include "aspeed-hace.h"
+#include <crypto/engine.h>
+#include <crypto/hmac.h>
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
 
 #ifdef CONFIG_CRYPTO_DEV_ASPEED_DEBUG
 #define AHASH_DBG(h, fmt, ...)	\
@@ -858,7 +869,7 @@ static int aspeed_sham_cra_init(struct crypto_tfm *tfm)
 	struct aspeed_sham_ctx *tctx = crypto_tfm_ctx(tfm);
 	struct aspeed_hace_alg *ast_alg;
 
-	ast_alg = container_of(alg, struct aspeed_hace_alg, alg.ahash);
+	ast_alg = container_of(alg, struct aspeed_hace_alg, alg.ahash.base);
 	tctx->hace_dev = ast_alg->hace_dev;
 	tctx->flags = 0;
 
@@ -880,8 +891,6 @@ static int aspeed_sham_cra_init(struct crypto_tfm *tfm)
 		}
 	}
 
-	tctx->enginectx.op.do_one_request = aspeed_ahash_do_one;
-
 	return 0;
 }
 
@@ -919,7 +928,7 @@ static int aspeed_sham_import(struct ahash_request *req, const void *in)
 
 static struct aspeed_hace_alg aspeed_ahash_algs[] = {
 	{
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sham_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -946,9 +955,12 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 	{
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sham_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -975,9 +987,12 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 	{
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sham_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -1004,10 +1019,13 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 	{
 		.alg_base = "sha1",
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sham_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -1036,10 +1054,13 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 	{
 		.alg_base = "sha224",
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sham_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -1068,10 +1089,13 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 	{
 		.alg_base = "sha256",
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sham_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -1100,12 +1124,15 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 };
 
 static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = {
 	{
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sham_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -1132,9 +1159,12 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 	{
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sham_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -1161,9 +1191,12 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 	{
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sha512s_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -1190,9 +1223,12 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 	{
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sha512s_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -1219,10 +1255,13 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 	{
 		.alg_base = "sha384",
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sham_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -1251,10 +1290,13 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 	{
 		.alg_base = "sha512",
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sham_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -1283,10 +1325,13 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 	{
 		.alg_base = "sha512_224",
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sha512s_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -1315,10 +1360,13 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 	{
 		.alg_base = "sha512_256",
-		.alg.ahash = {
+		.alg.ahash.base = {
 			.init	= aspeed_sha512s_init,
 			.update	= aspeed_sham_update,
 			.final	= aspeed_sham_final,
@@ -1347,6 +1395,9 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = {
 				}
 			}
 		},
+		.alg.ahash.op = {
+			.do_one_request = aspeed_ahash_do_one,
+		},
 	},
 };
 
@@ -1355,13 +1406,13 @@ void aspeed_unregister_hace_hash_algs(struct aspeed_hace_dev *hace_dev)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(aspeed_ahash_algs); i++)
-		crypto_unregister_ahash(&aspeed_ahash_algs[i].alg.ahash);
+		crypto_engine_unregister_ahash(&aspeed_ahash_algs[i].alg.ahash);
 
 	if (hace_dev->version != AST2600_VERSION)
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(aspeed_ahash_algs_g6); i++)
-		crypto_unregister_ahash(&aspeed_ahash_algs_g6[i].alg.ahash);
+		crypto_engine_unregister_ahash(&aspeed_ahash_algs_g6[i].alg.ahash);
 }
 
 void aspeed_register_hace_hash_algs(struct aspeed_hace_dev *hace_dev)
@@ -1372,10 +1423,10 @@ void aspeed_register_hace_hash_algs(struct aspeed_hace_dev *hace_dev)
 
 	for (i = 0; i < ARRAY_SIZE(aspeed_ahash_algs); i++) {
 		aspeed_ahash_algs[i].hace_dev = hace_dev;
-		rc = crypto_register_ahash(&aspeed_ahash_algs[i].alg.ahash);
+		rc = crypto_engine_register_ahash(&aspeed_ahash_algs[i].alg.ahash);
 		if (rc) {
 			AHASH_DBG(hace_dev, "Failed to register %s\n",
-				  aspeed_ahash_algs[i].alg.ahash.halg.base.cra_name);
+				  aspeed_ahash_algs[i].alg.ahash.base.halg.base.cra_name);
 		}
 	}
 
@@ -1384,10 +1435,10 @@ void aspeed_register_hace_hash_algs(struct aspeed_hace_dev *hace_dev)
 
 	for (i = 0; i < ARRAY_SIZE(aspeed_ahash_algs_g6); i++) {
 		aspeed_ahash_algs_g6[i].hace_dev = hace_dev;
-		rc = crypto_register_ahash(&aspeed_ahash_algs_g6[i].alg.ahash);
+		rc = crypto_engine_register_ahash(&aspeed_ahash_algs_g6[i].alg.ahash);
 		if (rc) {
 			AHASH_DBG(hace_dev, "Failed to register %s\n",
-				  aspeed_ahash_algs_g6[i].alg.ahash.halg.base.cra_name);
+				  aspeed_ahash_algs_g6[i].alg.ahash.base.halg.base.cra_name);
 		}
 	}
 }
diff --git a/drivers/crypto/aspeed/aspeed-hace.c b/drivers/crypto/aspeed/aspeed-hace.c
index d2871e1de9c2..8f7aab82e1d8 100644
--- a/drivers/crypto/aspeed/aspeed-hace.c
+++ b/drivers/crypto/aspeed/aspeed-hace.c
@@ -3,7 +3,14 @@
  * Copyright (c) 2021 Aspeed Technology Inc.
  */
 
+#include "aspeed-hace.h"
+#include <crypto/engine.h>
 #include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
@@ -11,8 +18,6 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 
-#include "aspeed-hace.h"
-
 #ifdef CONFIG_CRYPTO_DEV_ASPEED_DEBUG
 #define HACE_DBG(d, fmt, ...)	\
 	dev_info((d)->dev, "%s() " fmt, __func__, ##__VA_ARGS__)
diff --git a/drivers/crypto/aspeed/aspeed-hace.h b/drivers/crypto/aspeed/aspeed-hace.h
index 05d0a15d546d..68f70e01fccb 100644
--- a/drivers/crypto/aspeed/aspeed-hace.h
+++ b/drivers/crypto/aspeed/aspeed-hace.h
@@ -2,25 +2,14 @@
 #ifndef __ASPEED_HACE_H__
 #define __ASPEED_HACE_H__
 
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/fips.h>
-#include <linux/dma-mapping.h>
 #include <crypto/aes.h>
-#include <crypto/des.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/internal/aead.h>
-#include <crypto/internal/akcipher.h>
-#include <crypto/internal/des.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/kpp.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/algapi.h>
 #include <crypto/engine.h>
-#include <crypto/hmac.h>
-#include <crypto/sha1.h>
+#include <crypto/hash.h>
 #include <crypto/sha2.h>
+#include <linux/bits.h>
+#include <linux/compiler_attributes.h>
+#include <linux/interrupt.h>
+#include <linux/types.h>
 
 /*****************************
  *                           *
@@ -144,6 +133,7 @@
 					 HACE_CMD_OFB | HACE_CMD_CTR)
 
 struct aspeed_hace_dev;
+struct scatterlist;
 
 typedef int (*aspeed_hace_fn_t)(struct aspeed_hace_dev *);
 
@@ -178,8 +168,6 @@ struct aspeed_sha_hmac_ctx {
 };
 
 struct aspeed_sham_ctx {
-	struct crypto_engine_ctx	enginectx;
-
 	struct aspeed_hace_dev		*hace_dev;
 	unsigned long			flags;	/* hmac flag */
 
@@ -235,8 +223,6 @@ struct aspeed_engine_crypto {
 };
 
 struct aspeed_cipher_ctx {
-	struct crypto_engine_ctx	enginectx;
-
 	struct aspeed_hace_dev		*hace_dev;
 	int				key_len;
 	u8				key[AES_MAX_KEYLENGTH];
@@ -275,8 +261,8 @@ struct aspeed_hace_alg {
 	const char			*alg_base;
 
 	union {
-		struct skcipher_alg	skcipher;
-		struct ahash_alg	ahash;
+		struct skcipher_engine_alg skcipher;
+		struct ahash_engine_alg ahash;
 	} alg;
 };
 

From d33a6a3f5a68b01acf0d79b626c9b857cc9719cc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:54:59 +0800
Subject: [PATCH 1344/2047] crypto: aspeed - Remove non-standard sha512
 algorithms

Algorithms must never be added to a driver unless there is a generic
implementation.  These truncated versions of sha512 slipped through.
Remove them as they are useless.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/aspeed/aspeed-hace-hash.c | 212 -----------------------
 1 file changed, 212 deletions(-)

diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c
index abc459af2ac8..0b6e49c06eff 100644
--- a/drivers/crypto/aspeed/aspeed-hace-hash.c
+++ b/drivers/crypto/aspeed/aspeed-hace-hash.c
@@ -59,28 +59,6 @@ static const __be64 sha512_iv[8] = {
 	cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7)
 };
 
-static const __be32 sha512_224_iv[16] = {
-	cpu_to_be32(0xC8373D8CUL), cpu_to_be32(0xA24D5419UL),
-	cpu_to_be32(0x6699E173UL), cpu_to_be32(0xD6D4DC89UL),
-	cpu_to_be32(0xAEB7FA1DUL), cpu_to_be32(0x829CFF32UL),
-	cpu_to_be32(0x14D59D67UL), cpu_to_be32(0xCF9F2F58UL),
-	cpu_to_be32(0x692B6D0FUL), cpu_to_be32(0xA84DD47BUL),
-	cpu_to_be32(0x736FE377UL), cpu_to_be32(0x4289C404UL),
-	cpu_to_be32(0xA8859D3FUL), cpu_to_be32(0xC8361D6AUL),
-	cpu_to_be32(0xADE61211UL), cpu_to_be32(0xA192D691UL)
-};
-
-static const __be32 sha512_256_iv[16] = {
-	cpu_to_be32(0x94213122UL), cpu_to_be32(0x2CF72BFCUL),
-	cpu_to_be32(0xA35F559FUL), cpu_to_be32(0xC2644CC8UL),
-	cpu_to_be32(0x6BB89323UL), cpu_to_be32(0x51B1536FUL),
-	cpu_to_be32(0x19773896UL), cpu_to_be32(0xBDEA4059UL),
-	cpu_to_be32(0xE23E2896UL), cpu_to_be32(0xE3FF8EA8UL),
-	cpu_to_be32(0x251E5EBEUL), cpu_to_be32(0x92398653UL),
-	cpu_to_be32(0xFC99012BUL), cpu_to_be32(0xAAB8852CUL),
-	cpu_to_be32(0xDC2DB70EUL), cpu_to_be32(0xA22CC581UL)
-};
-
 /* The purpose of this padding is to ensure that the padded message is a
  * multiple of 512 bits (SHA1/SHA224/SHA256) or 1024 bits (SHA384/SHA512).
  * The bit "1" is appended at the end of the message followed by
@@ -765,62 +743,6 @@ static int aspeed_sham_init(struct ahash_request *req)
 	return 0;
 }
 
-static int aspeed_sha512s_init(struct ahash_request *req)
-{
-	struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct aspeed_sham_ctx *tctx = crypto_ahash_ctx(tfm);
-	struct aspeed_hace_dev *hace_dev = tctx->hace_dev;
-	struct aspeed_sha_hmac_ctx *bctx = tctx->base;
-
-	AHASH_DBG(hace_dev, "digest size: %d\n", crypto_ahash_digestsize(tfm));
-
-	rctx->cmd = HASH_CMD_ACC_MODE;
-	rctx->flags = 0;
-
-	switch (crypto_ahash_digestsize(tfm)) {
-	case SHA224_DIGEST_SIZE:
-		rctx->cmd |= HASH_CMD_SHA512_SER | HASH_CMD_SHA512_224 |
-			     HASH_CMD_SHA_SWAP;
-		rctx->flags |= SHA_FLAGS_SHA512_224;
-		rctx->digsize = SHA224_DIGEST_SIZE;
-		rctx->block_size = SHA512_BLOCK_SIZE;
-		rctx->sha_iv = sha512_224_iv;
-		rctx->ivsize = 64;
-		memcpy(rctx->digest, sha512_224_iv, rctx->ivsize);
-		break;
-	case SHA256_DIGEST_SIZE:
-		rctx->cmd |= HASH_CMD_SHA512_SER | HASH_CMD_SHA512_256 |
-			     HASH_CMD_SHA_SWAP;
-		rctx->flags |= SHA_FLAGS_SHA512_256;
-		rctx->digsize = SHA256_DIGEST_SIZE;
-		rctx->block_size = SHA512_BLOCK_SIZE;
-		rctx->sha_iv = sha512_256_iv;
-		rctx->ivsize = 64;
-		memcpy(rctx->digest, sha512_256_iv, rctx->ivsize);
-		break;
-	default:
-		dev_warn(tctx->hace_dev->dev, "digest size %d not support\n",
-			 crypto_ahash_digestsize(tfm));
-		return -EINVAL;
-	}
-
-	rctx->bufcnt = 0;
-	rctx->total = 0;
-	rctx->digcnt[0] = 0;
-	rctx->digcnt[1] = 0;
-
-	/* HMAC init */
-	if (tctx->flags & SHA_FLAGS_HMAC) {
-		rctx->digcnt[0] = rctx->block_size;
-		rctx->bufcnt = rctx->block_size;
-		memcpy(rctx->buffer, bctx->ipad, rctx->block_size);
-		rctx->flags |= SHA_FLAGS_HMAC;
-	}
-
-	return 0;
-}
-
 static int aspeed_sham_digest(struct ahash_request *req)
 {
 	return aspeed_sham_init(req) ? : aspeed_sham_finup(req);
@@ -1195,70 +1117,6 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = {
 			.do_one_request = aspeed_ahash_do_one,
 		},
 	},
-	{
-		.alg.ahash.base = {
-			.init	= aspeed_sha512s_init,
-			.update	= aspeed_sham_update,
-			.final	= aspeed_sham_final,
-			.finup	= aspeed_sham_finup,
-			.digest	= aspeed_sham_digest,
-			.export	= aspeed_sham_export,
-			.import	= aspeed_sham_import,
-			.halg = {
-				.digestsize = SHA224_DIGEST_SIZE,
-				.statesize = sizeof(struct aspeed_sham_reqctx),
-				.base = {
-					.cra_name		= "sha512_224",
-					.cra_driver_name	= "aspeed-sha512_224",
-					.cra_priority		= 300,
-					.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
-								  CRYPTO_ALG_ASYNC |
-								  CRYPTO_ALG_KERN_DRIVER_ONLY,
-					.cra_blocksize		= SHA512_BLOCK_SIZE,
-					.cra_ctxsize		= sizeof(struct aspeed_sham_ctx),
-					.cra_alignmask		= 0,
-					.cra_module		= THIS_MODULE,
-					.cra_init		= aspeed_sham_cra_init,
-					.cra_exit		= aspeed_sham_cra_exit,
-				}
-			}
-		},
-		.alg.ahash.op = {
-			.do_one_request = aspeed_ahash_do_one,
-		},
-	},
-	{
-		.alg.ahash.base = {
-			.init	= aspeed_sha512s_init,
-			.update	= aspeed_sham_update,
-			.final	= aspeed_sham_final,
-			.finup	= aspeed_sham_finup,
-			.digest	= aspeed_sham_digest,
-			.export	= aspeed_sham_export,
-			.import	= aspeed_sham_import,
-			.halg = {
-				.digestsize = SHA256_DIGEST_SIZE,
-				.statesize = sizeof(struct aspeed_sham_reqctx),
-				.base = {
-					.cra_name		= "sha512_256",
-					.cra_driver_name	= "aspeed-sha512_256",
-					.cra_priority		= 300,
-					.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
-								  CRYPTO_ALG_ASYNC |
-								  CRYPTO_ALG_KERN_DRIVER_ONLY,
-					.cra_blocksize		= SHA512_BLOCK_SIZE,
-					.cra_ctxsize		= sizeof(struct aspeed_sham_ctx),
-					.cra_alignmask		= 0,
-					.cra_module		= THIS_MODULE,
-					.cra_init		= aspeed_sham_cra_init,
-					.cra_exit		= aspeed_sham_cra_exit,
-				}
-			}
-		},
-		.alg.ahash.op = {
-			.do_one_request = aspeed_ahash_do_one,
-		},
-	},
 	{
 		.alg_base = "sha384",
 		.alg.ahash.base = {
@@ -1329,76 +1187,6 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = {
 			.do_one_request = aspeed_ahash_do_one,
 		},
 	},
-	{
-		.alg_base = "sha512_224",
-		.alg.ahash.base = {
-			.init	= aspeed_sha512s_init,
-			.update	= aspeed_sham_update,
-			.final	= aspeed_sham_final,
-			.finup	= aspeed_sham_finup,
-			.digest	= aspeed_sham_digest,
-			.setkey	= aspeed_sham_setkey,
-			.export	= aspeed_sham_export,
-			.import	= aspeed_sham_import,
-			.halg = {
-				.digestsize = SHA224_DIGEST_SIZE,
-				.statesize = sizeof(struct aspeed_sham_reqctx),
-				.base = {
-					.cra_name		= "hmac(sha512_224)",
-					.cra_driver_name	= "aspeed-hmac-sha512_224",
-					.cra_priority		= 300,
-					.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
-								  CRYPTO_ALG_ASYNC |
-								  CRYPTO_ALG_KERN_DRIVER_ONLY,
-					.cra_blocksize		= SHA512_BLOCK_SIZE,
-					.cra_ctxsize		= sizeof(struct aspeed_sham_ctx) +
-								sizeof(struct aspeed_sha_hmac_ctx),
-					.cra_alignmask		= 0,
-					.cra_module		= THIS_MODULE,
-					.cra_init		= aspeed_sham_cra_init,
-					.cra_exit		= aspeed_sham_cra_exit,
-				}
-			}
-		},
-		.alg.ahash.op = {
-			.do_one_request = aspeed_ahash_do_one,
-		},
-	},
-	{
-		.alg_base = "sha512_256",
-		.alg.ahash.base = {
-			.init	= aspeed_sha512s_init,
-			.update	= aspeed_sham_update,
-			.final	= aspeed_sham_final,
-			.finup	= aspeed_sham_finup,
-			.digest	= aspeed_sham_digest,
-			.setkey	= aspeed_sham_setkey,
-			.export	= aspeed_sham_export,
-			.import	= aspeed_sham_import,
-			.halg = {
-				.digestsize = SHA256_DIGEST_SIZE,
-				.statesize = sizeof(struct aspeed_sham_reqctx),
-				.base = {
-					.cra_name		= "hmac(sha512_256)",
-					.cra_driver_name	= "aspeed-hmac-sha512_256",
-					.cra_priority		= 300,
-					.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
-								  CRYPTO_ALG_ASYNC |
-								  CRYPTO_ALG_KERN_DRIVER_ONLY,
-					.cra_blocksize		= SHA512_BLOCK_SIZE,
-					.cra_ctxsize		= sizeof(struct aspeed_sham_ctx) +
-								sizeof(struct aspeed_sha_hmac_ctx),
-					.cra_alignmask		= 0,
-					.cra_module		= THIS_MODULE,
-					.cra_init		= aspeed_sham_cra_init,
-					.cra_exit		= aspeed_sham_cra_exit,
-				}
-			}
-		},
-		.alg.ahash.op = {
-			.do_one_request = aspeed_ahash_do_one,
-		},
-	},
 };
 
 void aspeed_unregister_hace_hash_algs(struct aspeed_hace_dev *hace_dev)

From 623814c0408771399c4209db73f60685f7cf1d14 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:55:01 +0800
Subject: [PATCH 1345/2047] crypto: caam - Use new crypto_engine_op interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg.c  | 382 +++++++++++++++++++++++++--------
 drivers/crypto/caam/caamhash.c |  28 +--
 drivers/crypto/caam/caampkc.c  |  20 +-
 drivers/crypto/caam/caampkc.h  |   3 -
 4 files changed, 320 insertions(+), 113 deletions(-)

diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index da8182ee86fe..eba2d750c3b0 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -57,11 +57,14 @@
 #include "key_gen.h"
 #include "caamalg_desc.h"
 #include <asm/unaligned.h>
+#include <crypto/internal/aead.h>
 #include <crypto/internal/engine.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
 #include <linux/dma-mapping.h>
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/string.h>
@@ -95,13 +98,13 @@ struct caam_alg_entry {
 };
 
 struct caam_aead_alg {
-	struct aead_alg aead;
+	struct aead_engine_alg aead;
 	struct caam_alg_entry caam;
 	bool registered;
 };
 
 struct caam_skcipher_alg {
-	struct skcipher_alg skcipher;
+	struct skcipher_engine_alg skcipher;
 	struct caam_alg_entry caam;
 	bool registered;
 };
@@ -110,7 +113,6 @@ struct caam_skcipher_alg {
  * per-session context
  */
 struct caam_ctx {
-	struct crypto_engine_ctx enginectx;
 	u32 sh_desc_enc[DESC_MAX_USED_LEN];
 	u32 sh_desc_dec[DESC_MAX_USED_LEN];
 	u8 key[CAAM_MAX_KEY_SIZE];
@@ -188,7 +190,8 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 static int aead_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
-						 struct caam_aead_alg, aead);
+						 struct caam_aead_alg,
+						 aead.base);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
@@ -738,7 +741,7 @@ static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct caam_skcipher_alg *alg =
 		container_of(crypto_skcipher_alg(skcipher), typeof(*alg),
-			     skcipher);
+			     skcipher.base);
 	struct device *jrdev = ctx->jrdev;
 	unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
 	u32 *desc;
@@ -1195,7 +1198,8 @@ static void init_authenc_job(struct aead_request *req,
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
-						 struct caam_aead_alg, aead);
+						 struct caam_aead_alg,
+						 aead.base);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent);
@@ -1881,7 +1885,7 @@ static int skcipher_decrypt(struct skcipher_request *req)
 
 static struct caam_skcipher_alg driver_algs[] = {
 	{
-		.skcipher = {
+		.skcipher.base = {
 			.base = {
 				.cra_name = "cbc(aes)",
 				.cra_driver_name = "cbc-aes-caam",
@@ -1894,10 +1898,13 @@ static struct caam_skcipher_alg driver_algs[] = {
 			.max_keysize = AES_MAX_KEY_SIZE,
 			.ivsize = AES_BLOCK_SIZE,
 		},
+		.skcipher.op = {
+			.do_one_request = skcipher_do_one_req,
+		},
 		.caam.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 	},
 	{
-		.skcipher = {
+		.skcipher.base = {
 			.base = {
 				.cra_name = "cbc(des3_ede)",
 				.cra_driver_name = "cbc-3des-caam",
@@ -1910,10 +1917,13 @@ static struct caam_skcipher_alg driver_algs[] = {
 			.max_keysize = DES3_EDE_KEY_SIZE,
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 		},
+		.skcipher.op = {
+			.do_one_request = skcipher_do_one_req,
+		},
 		.caam.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 	},
 	{
-		.skcipher = {
+		.skcipher.base = {
 			.base = {
 				.cra_name = "cbc(des)",
 				.cra_driver_name = "cbc-des-caam",
@@ -1926,10 +1936,13 @@ static struct caam_skcipher_alg driver_algs[] = {
 			.max_keysize = DES_KEY_SIZE,
 			.ivsize = DES_BLOCK_SIZE,
 		},
+		.skcipher.op = {
+			.do_one_request = skcipher_do_one_req,
+		},
 		.caam.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 	},
 	{
-		.skcipher = {
+		.skcipher.base = {
 			.base = {
 				.cra_name = "ctr(aes)",
 				.cra_driver_name = "ctr-aes-caam",
@@ -1943,11 +1956,14 @@ static struct caam_skcipher_alg driver_algs[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.chunksize = AES_BLOCK_SIZE,
 		},
+		.skcipher.op = {
+			.do_one_request = skcipher_do_one_req,
+		},
 		.caam.class1_alg_type = OP_ALG_ALGSEL_AES |
 					OP_ALG_AAI_CTR_MOD128,
 	},
 	{
-		.skcipher = {
+		.skcipher.base = {
 			.base = {
 				.cra_name = "rfc3686(ctr(aes))",
 				.cra_driver_name = "rfc3686-ctr-aes-caam",
@@ -1963,6 +1979,9 @@ static struct caam_skcipher_alg driver_algs[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.chunksize = AES_BLOCK_SIZE,
 		},
+		.skcipher.op = {
+			.do_one_request = skcipher_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -1970,7 +1989,7 @@ static struct caam_skcipher_alg driver_algs[] = {
 		},
 	},
 	{
-		.skcipher = {
+		.skcipher.base = {
 			.base = {
 				.cra_name = "xts(aes)",
 				.cra_driver_name = "xts-aes-caam",
@@ -1984,10 +2003,13 @@ static struct caam_skcipher_alg driver_algs[] = {
 			.max_keysize = 2 * AES_MAX_KEY_SIZE,
 			.ivsize = AES_BLOCK_SIZE,
 		},
+		.skcipher.op = {
+			.do_one_request = skcipher_do_one_req,
+		},
 		.caam.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_XTS,
 	},
 	{
-		.skcipher = {
+		.skcipher.base = {
 			.base = {
 				.cra_name = "ecb(des)",
 				.cra_driver_name = "ecb-des-caam",
@@ -1999,10 +2021,13 @@ static struct caam_skcipher_alg driver_algs[] = {
 			.min_keysize = DES_KEY_SIZE,
 			.max_keysize = DES_KEY_SIZE,
 		},
+		.skcipher.op = {
+			.do_one_request = skcipher_do_one_req,
+		},
 		.caam.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_ECB,
 	},
 	{
-		.skcipher = {
+		.skcipher.base = {
 			.base = {
 				.cra_name = "ecb(aes)",
 				.cra_driver_name = "ecb-aes-caam",
@@ -2014,10 +2039,13 @@ static struct caam_skcipher_alg driver_algs[] = {
 			.min_keysize = AES_MIN_KEY_SIZE,
 			.max_keysize = AES_MAX_KEY_SIZE,
 		},
+		.skcipher.op = {
+			.do_one_request = skcipher_do_one_req,
+		},
 		.caam.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_ECB,
 	},
 	{
-		.skcipher = {
+		.skcipher.base = {
 			.base = {
 				.cra_name = "ecb(des3_ede)",
 				.cra_driver_name = "ecb-des3-caam",
@@ -2029,13 +2057,16 @@ static struct caam_skcipher_alg driver_algs[] = {
 			.min_keysize = DES3_EDE_KEY_SIZE,
 			.max_keysize = DES3_EDE_KEY_SIZE,
 		},
+		.skcipher.op = {
+			.do_one_request = skcipher_do_one_req,
+		},
 		.caam.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_ECB,
 	},
 };
 
 static struct caam_aead_alg driver_aeads[] = {
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "rfc4106(gcm(aes))",
 				.cra_driver_name = "rfc4106-gcm-aes-caam",
@@ -2048,13 +2079,16 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = GCM_RFC4106_IV_SIZE,
 			.maxauthsize = AES_BLOCK_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
 			.nodkp = true,
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "rfc4543(gcm(aes))",
 				.cra_driver_name = "rfc4543-gcm-aes-caam",
@@ -2067,6 +2101,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = GCM_RFC4543_IV_SIZE,
 			.maxauthsize = AES_BLOCK_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
 			.nodkp = true,
@@ -2074,7 +2111,7 @@ static struct caam_aead_alg driver_aeads[] = {
 	},
 	/* Galois Counter Mode */
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "gcm(aes)",
 				.cra_driver_name = "gcm-aes-caam",
@@ -2087,6 +2124,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = GCM_AES_IV_SIZE,
 			.maxauthsize = AES_BLOCK_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
 			.nodkp = true,
@@ -2094,7 +2134,7 @@ static struct caam_aead_alg driver_aeads[] = {
 	},
 	/* single-pass ipsec_esp descriptor */
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(md5),"
 					    "ecb(cipher_null))",
@@ -2109,13 +2149,16 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = NULL_IV_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha1),"
 					    "ecb(cipher_null))",
@@ -2130,13 +2173,16 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = NULL_IV_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha224),"
 					    "ecb(cipher_null))",
@@ -2151,13 +2197,16 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = NULL_IV_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha256),"
 					    "ecb(cipher_null))",
@@ -2172,13 +2221,16 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = NULL_IV_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha384),"
 					    "ecb(cipher_null))",
@@ -2193,13 +2245,16 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = NULL_IV_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha512),"
 					    "ecb(cipher_null))",
@@ -2214,13 +2269,16 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = NULL_IV_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
 					   OP_ALG_AAI_HMAC_PRECOMP,
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(md5),cbc(aes))",
 				.cra_driver_name = "authenc-hmac-md5-"
@@ -2234,6 +2292,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
@@ -2241,7 +2302,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(md5),"
 					    "cbc(aes)))",
@@ -2256,6 +2317,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
@@ -2264,7 +2328,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha1),cbc(aes))",
 				.cra_driver_name = "authenc-hmac-sha1-"
@@ -2278,6 +2342,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
@@ -2285,7 +2352,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha1),"
 					    "cbc(aes)))",
@@ -2300,6 +2367,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
@@ -2308,7 +2378,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha224),cbc(aes))",
 				.cra_driver_name = "authenc-hmac-sha224-"
@@ -2322,6 +2392,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
@@ -2329,7 +2402,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha224),"
 					    "cbc(aes)))",
@@ -2344,6 +2417,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
@@ -2352,7 +2428,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha256),cbc(aes))",
 				.cra_driver_name = "authenc-hmac-sha256-"
@@ -2366,6 +2442,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
@@ -2373,7 +2452,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha256),"
 					    "cbc(aes)))",
@@ -2388,6 +2467,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
@@ -2396,7 +2478,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha384),cbc(aes))",
 				.cra_driver_name = "authenc-hmac-sha384-"
@@ -2410,6 +2492,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
@@ -2417,7 +2502,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha384),"
 					    "cbc(aes)))",
@@ -2432,6 +2517,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
@@ -2440,7 +2528,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha512),cbc(aes))",
 				.cra_driver_name = "authenc-hmac-sha512-"
@@ -2454,6 +2542,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
@@ -2461,7 +2552,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha512),"
 					    "cbc(aes)))",
@@ -2476,6 +2567,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
@@ -2484,7 +2578,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(md5),cbc(des3_ede))",
 				.cra_driver_name = "authenc-hmac-md5-"
@@ -2498,6 +2592,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
@@ -2505,7 +2602,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		}
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(md5),"
 					    "cbc(des3_ede)))",
@@ -2520,6 +2617,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
@@ -2528,7 +2628,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		}
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha1),"
 					    "cbc(des3_ede))",
@@ -2543,6 +2643,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
@@ -2550,7 +2653,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha1),"
 					    "cbc(des3_ede)))",
@@ -2566,6 +2669,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
@@ -2574,7 +2680,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha224),"
 					    "cbc(des3_ede))",
@@ -2589,6 +2695,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
@@ -2596,7 +2705,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha224),"
 					    "cbc(des3_ede)))",
@@ -2612,6 +2721,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
@@ -2620,7 +2732,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha256),"
 					    "cbc(des3_ede))",
@@ -2635,6 +2747,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
@@ -2642,7 +2757,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha256),"
 					    "cbc(des3_ede)))",
@@ -2658,6 +2773,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
@@ -2666,7 +2784,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha384),"
 					    "cbc(des3_ede))",
@@ -2681,6 +2799,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
@@ -2688,7 +2809,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha384),"
 					    "cbc(des3_ede)))",
@@ -2704,6 +2825,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
@@ -2712,7 +2836,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha512),"
 					    "cbc(des3_ede))",
@@ -2727,6 +2851,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
@@ -2734,7 +2861,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha512),"
 					    "cbc(des3_ede)))",
@@ -2750,6 +2877,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
@@ -2758,7 +2888,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(md5),cbc(des))",
 				.cra_driver_name = "authenc-hmac-md5-"
@@ -2772,6 +2902,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
@@ -2779,7 +2912,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(md5),"
 					    "cbc(des)))",
@@ -2794,6 +2927,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_MD5 |
@@ -2802,7 +2938,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha1),cbc(des))",
 				.cra_driver_name = "authenc-hmac-sha1-"
@@ -2816,6 +2952,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
@@ -2823,7 +2962,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha1),"
 					    "cbc(des)))",
@@ -2838,6 +2977,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA1 |
@@ -2846,7 +2988,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha224),cbc(des))",
 				.cra_driver_name = "authenc-hmac-sha224-"
@@ -2860,6 +3002,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
@@ -2867,7 +3012,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha224),"
 					    "cbc(des)))",
@@ -2882,6 +3027,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
@@ -2890,7 +3038,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha256),cbc(des))",
 				.cra_driver_name = "authenc-hmac-sha256-"
@@ -2904,6 +3052,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
@@ -2911,7 +3062,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha256),"
 					    "cbc(des)))",
@@ -2926,6 +3077,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
@@ -2934,7 +3088,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha384),cbc(des))",
 				.cra_driver_name = "authenc-hmac-sha384-"
@@ -2948,6 +3102,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
@@ -2955,7 +3112,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha384),"
 					    "cbc(des)))",
@@ -2970,6 +3127,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
@@ -2978,7 +3138,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha512),cbc(des))",
 				.cra_driver_name = "authenc-hmac-sha512-"
@@ -2992,6 +3152,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
@@ -2999,7 +3162,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "echainiv(authenc(hmac(sha512),"
 					    "cbc(des)))",
@@ -3014,6 +3177,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
 			.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
@@ -3022,7 +3188,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(md5),"
 					    "rfc3686(ctr(aes)))",
@@ -3037,6 +3203,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -3046,7 +3215,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "seqiv(authenc("
 					    "hmac(md5),rfc3686(ctr(aes))))",
@@ -3061,6 +3230,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -3071,7 +3243,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha1),"
 					    "rfc3686(ctr(aes)))",
@@ -3086,6 +3258,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -3095,7 +3270,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "seqiv(authenc("
 					    "hmac(sha1),rfc3686(ctr(aes))))",
@@ -3110,6 +3285,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -3120,7 +3298,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha224),"
 					    "rfc3686(ctr(aes)))",
@@ -3135,6 +3313,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -3144,7 +3325,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "seqiv(authenc("
 					    "hmac(sha224),rfc3686(ctr(aes))))",
@@ -3159,6 +3340,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -3169,7 +3353,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha256),"
 					    "rfc3686(ctr(aes)))",
@@ -3184,6 +3368,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -3193,7 +3380,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "seqiv(authenc(hmac(sha256),"
 					    "rfc3686(ctr(aes))))",
@@ -3208,6 +3395,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -3218,7 +3408,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha384),"
 					    "rfc3686(ctr(aes)))",
@@ -3233,6 +3423,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -3242,7 +3435,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "seqiv(authenc(hmac(sha384),"
 					    "rfc3686(ctr(aes))))",
@@ -3257,6 +3450,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -3267,7 +3463,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "authenc(hmac(sha512),"
 					    "rfc3686(ctr(aes)))",
@@ -3282,6 +3478,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -3291,7 +3490,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "seqiv(authenc(hmac(sha512),"
 					    "rfc3686(ctr(aes))))",
@@ -3306,6 +3505,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_AES |
 					   OP_ALG_AAI_CTR_MOD128,
@@ -3316,7 +3518,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "rfc7539(chacha20,poly1305)",
 				.cra_driver_name = "rfc7539-chacha20-poly1305-"
@@ -3330,6 +3532,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = CHACHAPOLY_IV_SIZE,
 			.maxauthsize = POLY1305_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_CHACHA20 |
 					   OP_ALG_AAI_AEAD,
@@ -3339,7 +3544,7 @@ static struct caam_aead_alg driver_aeads[] = {
 		},
 	},
 	{
-		.aead = {
+		.aead.base = {
 			.base = {
 				.cra_name = "rfc7539esp(chacha20,poly1305)",
 				.cra_driver_name = "rfc7539esp-chacha20-"
@@ -3353,6 +3558,9 @@ static struct caam_aead_alg driver_aeads[] = {
 			.ivsize = 8,
 			.maxauthsize = POLY1305_DIGEST_SIZE,
 		},
+		.aead.op = {
+			.do_one_request = aead_do_one_req,
+		},
 		.caam = {
 			.class1_alg_type = OP_ALG_ALGSEL_CHACHA20 |
 					   OP_ALG_AAI_AEAD,
@@ -3412,13 +3620,11 @@ static int caam_cra_init(struct crypto_skcipher *tfm)
 {
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct caam_skcipher_alg *caam_alg =
-		container_of(alg, typeof(*caam_alg), skcipher);
+		container_of(alg, typeof(*caam_alg), skcipher.base);
 	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 	u32 alg_aai = caam_alg->caam.class1_alg_type & OP_ALG_AAI_MASK;
 	int ret = 0;
 
-	ctx->enginectx.op.do_one_request = skcipher_do_one_req;
-
 	if (alg_aai == OP_ALG_AAI_XTS) {
 		const char *tfm_name = crypto_tfm_alg_name(&tfm->base);
 		struct crypto_skcipher *fallback;
@@ -3449,13 +3655,11 @@ static int caam_aead_init(struct crypto_aead *tfm)
 {
 	struct aead_alg *alg = crypto_aead_alg(tfm);
 	struct caam_aead_alg *caam_alg =
-		 container_of(alg, struct caam_aead_alg, aead);
+		 container_of(alg, struct caam_aead_alg, aead.base);
 	struct caam_ctx *ctx = crypto_aead_ctx_dma(tfm);
 
 	crypto_aead_set_reqsize(tfm, sizeof(struct caam_aead_req_ctx));
 
-	ctx->enginectx.op.do_one_request = aead_do_one_req;
-
 	return caam_init_common(ctx, &caam_alg->caam, !caam_alg->caam.nodkp);
 }
 
@@ -3490,20 +3694,20 @@ void caam_algapi_exit(void)
 		struct caam_aead_alg *t_alg = driver_aeads + i;
 
 		if (t_alg->registered)
-			crypto_unregister_aead(&t_alg->aead);
+			crypto_engine_unregister_aead(&t_alg->aead);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
 		struct caam_skcipher_alg *t_alg = driver_algs + i;
 
 		if (t_alg->registered)
-			crypto_unregister_skcipher(&t_alg->skcipher);
+			crypto_engine_unregister_skcipher(&t_alg->skcipher);
 	}
 }
 
 static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg)
 {
-	struct skcipher_alg *alg = &t_alg->skcipher;
+	struct skcipher_alg *alg = &t_alg->skcipher.base;
 
 	alg->base.cra_module = THIS_MODULE;
 	alg->base.cra_priority = CAAM_CRA_PRIORITY;
@@ -3517,7 +3721,7 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg)
 
 static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
 {
-	struct aead_alg *alg = &t_alg->aead;
+	struct aead_alg *alg = &t_alg->aead.base;
 
 	alg->base.cra_module = THIS_MODULE;
 	alg->base.cra_priority = CAAM_CRA_PRIORITY;
@@ -3607,10 +3811,10 @@ int caam_algapi_init(struct device *ctrldev)
 
 		caam_skcipher_alg_init(t_alg);
 
-		err = crypto_register_skcipher(&t_alg->skcipher);
+		err = crypto_engine_register_skcipher(&t_alg->skcipher);
 		if (err) {
 			pr_warn("%s alg registration failed\n",
-				t_alg->skcipher.base.cra_driver_name);
+				t_alg->skcipher.base.base.cra_driver_name);
 			continue;
 		}
 
@@ -3654,15 +3858,15 @@ int caam_algapi_init(struct device *ctrldev)
 		 * if MD or MD size is not supported by device.
 		 */
 		if (is_mdha(c2_alg_sel) &&
-		    (!md_inst || t_alg->aead.maxauthsize > md_limit))
+		    (!md_inst || t_alg->aead.base.maxauthsize > md_limit))
 			continue;
 
 		caam_aead_alg_init(t_alg);
 
-		err = crypto_register_aead(&t_alg->aead);
+		err = crypto_engine_register_aead(&t_alg->aead);
 		if (err) {
 			pr_warn("%s alg registration failed\n",
-				t_alg->aead.base.cra_driver_name);
+				t_alg->aead.base.base.cra_driver_name);
 			continue;
 		}
 
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 9ef25387f6b6..290c8500c247 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -66,8 +66,12 @@
 #include "key_gen.h"
 #include "caamhash_desc.h"
 #include <crypto/internal/engine.h>
+#include <crypto/internal/hash.h>
 #include <linux/dma-mapping.h>
+#include <linux/err.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 
 #define CAAM_CRA_PRIORITY		3000
 
@@ -89,7 +93,6 @@ static struct list_head hash_list;
 
 /* ahash per-session context */
 struct caam_hash_ctx {
-	struct crypto_engine_ctx enginectx;
 	u32 sh_desc_update[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
 	u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
 	u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
@@ -1750,7 +1753,7 @@ static struct caam_hash_template driver_hash[] = {
 struct caam_hash_alg {
 	struct list_head entry;
 	int alg_type;
-	struct ahash_alg ahash_alg;
+	struct ahash_engine_alg ahash_alg;
 };
 
 static int caam_hash_cra_init(struct crypto_tfm *tfm)
@@ -1762,7 +1765,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 	struct ahash_alg *alg =
 		 container_of(halg, struct ahash_alg, halg);
 	struct caam_hash_alg *caam_hash =
-		 container_of(alg, struct caam_hash_alg, ahash_alg);
+		 container_of(alg, struct caam_hash_alg, ahash_alg.base);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	/* Sizes for MDHA running digests: MD5, SHA1, 224, 256, 384, 512 */
 	static const u8 runninglen[] = { HASH_MSG_LEN + MD5_DIGEST_SIZE,
@@ -1853,8 +1856,6 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 						      sh_desc_digest) -
 					sh_desc_update_offset;
 
-	ctx->enginectx.op.do_one_request = ahash_do_one_req;
-
 	crypto_ahash_set_reqsize_dma(ahash, sizeof(struct caam_hash_state));
 
 	/*
@@ -1887,7 +1888,7 @@ void caam_algapi_hash_exit(void)
 		return;
 
 	list_for_each_entry_safe(t_alg, n, &hash_list, entry) {
-		crypto_unregister_ahash(&t_alg->ahash_alg);
+		crypto_engine_unregister_ahash(&t_alg->ahash_alg);
 		list_del(&t_alg->entry);
 		kfree(t_alg);
 	}
@@ -1905,8 +1906,8 @@ caam_hash_alloc(struct caam_hash_template *template,
 	if (!t_alg)
 		return ERR_PTR(-ENOMEM);
 
-	t_alg->ahash_alg = template->template_ahash;
-	halg = &t_alg->ahash_alg;
+	t_alg->ahash_alg.base = template->template_ahash;
+	halg = &t_alg->ahash_alg.base;
 	alg = &halg->halg.base;
 
 	if (keyed) {
@@ -1919,7 +1920,7 @@ caam_hash_alloc(struct caam_hash_template *template,
 			 template->name);
 		snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
 			 template->driver_name);
-		t_alg->ahash_alg.setkey = NULL;
+		halg->setkey = NULL;
 	}
 	alg->cra_module = THIS_MODULE;
 	alg->cra_init = caam_hash_cra_init;
@@ -1931,6 +1932,7 @@ caam_hash_alloc(struct caam_hash_template *template,
 	alg->cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY;
 
 	t_alg->alg_type = template->alg_type;
+	t_alg->ahash_alg.op.do_one_request = ahash_do_one_req;
 
 	return t_alg;
 }
@@ -1992,10 +1994,10 @@ int caam_algapi_hash_init(struct device *ctrldev)
 			continue;
 		}
 
-		err = crypto_register_ahash(&t_alg->ahash_alg);
+		err = crypto_engine_register_ahash(&t_alg->ahash_alg);
 		if (err) {
 			pr_warn("%s alg registration failed: %d\n",
-				t_alg->ahash_alg.halg.base.cra_driver_name,
+				t_alg->ahash_alg.base.halg.base.cra_driver_name,
 				err);
 			kfree(t_alg);
 		} else
@@ -2012,10 +2014,10 @@ int caam_algapi_hash_init(struct device *ctrldev)
 			continue;
 		}
 
-		err = crypto_register_ahash(&t_alg->ahash_alg);
+		err = crypto_engine_register_ahash(&t_alg->ahash_alg);
 		if (err) {
 			pr_warn("%s alg registration failed: %d\n",
-				t_alg->ahash_alg.halg.base.cra_driver_name,
+				t_alg->ahash_alg.base.halg.base.cra_driver_name,
 				err);
 			kfree(t_alg);
 		} else
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 1809c97d04be..887a5f2fb927 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -18,7 +18,10 @@
 #include "caampkc.h"
 #include <crypto/internal/engine.h>
 #include <linux/dma-mapping.h>
+#include <linux/err.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 
 #define DESC_RSA_PUB_LEN	(2 * CAAM_CMD_SZ + SIZEOF_RSA_PUB_PDB)
 #define DESC_RSA_PRIV_F1_LEN	(2 * CAAM_CMD_SZ + \
@@ -39,7 +42,7 @@ static u8 *zero_buffer;
 static bool init_done;
 
 struct caam_akcipher_alg {
-	struct akcipher_alg akcipher;
+	struct akcipher_engine_alg akcipher;
 	bool registered;
 };
 
@@ -1124,8 +1127,6 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm)
 		return -ENOMEM;
 	}
 
-	ctx->enginectx.op.do_one_request = akcipher_do_one_req;
-
 	return 0;
 }
 
@@ -1142,7 +1143,7 @@ static void caam_rsa_exit_tfm(struct crypto_akcipher *tfm)
 }
 
 static struct caam_akcipher_alg caam_rsa = {
-	.akcipher = {
+	.akcipher.base = {
 		.encrypt = caam_rsa_enc,
 		.decrypt = caam_rsa_dec,
 		.set_pub_key = caam_rsa_set_pub_key,
@@ -1158,7 +1159,10 @@ static struct caam_akcipher_alg caam_rsa = {
 			.cra_ctxsize = sizeof(struct caam_rsa_ctx) +
 				       CRYPTO_DMA_PADDING,
 		},
-	}
+	},
+	.akcipher.op = {
+		.do_one_request = akcipher_do_one_req,
+	},
 };
 
 /* Public Key Cryptography module initialization handler */
@@ -1196,12 +1200,12 @@ int caam_pkc_init(struct device *ctrldev)
 	if (!zero_buffer)
 		return -ENOMEM;
 
-	err = crypto_register_akcipher(&caam_rsa.akcipher);
+	err = crypto_engine_register_akcipher(&caam_rsa.akcipher);
 
 	if (err) {
 		kfree(zero_buffer);
 		dev_warn(ctrldev, "%s alg registration failed\n",
-			 caam_rsa.akcipher.base.cra_driver_name);
+			 caam_rsa.akcipher.base.base.cra_driver_name);
 	} else {
 		init_done = true;
 		caam_rsa.registered = true;
@@ -1217,7 +1221,7 @@ void caam_pkc_exit(void)
 		return;
 
 	if (caam_rsa.registered)
-		crypto_unregister_akcipher(&caam_rsa.akcipher);
+		crypto_engine_unregister_akcipher(&caam_rsa.akcipher);
 
 	kfree(zero_buffer);
 }
diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h
index cc889a525e2f..96d03704c9be 100644
--- a/drivers/crypto/caam/caampkc.h
+++ b/drivers/crypto/caam/caampkc.h
@@ -12,7 +12,6 @@
 #define _PKC_DESC_H_
 #include "compat.h"
 #include "pdb.h"
-#include <crypto/engine.h>
 
 /**
  * caam_priv_key_form - CAAM RSA private key representation
@@ -88,13 +87,11 @@ struct caam_rsa_key {
 
 /**
  * caam_rsa_ctx - per session context.
- * @enginectx   : crypto engine context
  * @key         : RSA key in DMA zone
  * @dev         : device structure
  * @padding_dma : dma address of padding, for adding it to the input
  */
 struct caam_rsa_ctx {
-	struct crypto_engine_ctx enginectx;
 	struct caam_rsa_key key;
 	struct device *dev;
 	dma_addr_t padding_dma;

From 67b7702c5b03b89ca196c5bd137407509b397049 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:55:04 +0800
Subject: [PATCH 1346/2047] crypto: sl3516 - Use new crypto_engine_op interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/gemini/sl3516-ce-cipher.c | 20 +++++-----
 drivers/crypto/gemini/sl3516-ce-core.c   | 49 +++++++++++++++---------
 drivers/crypto/gemini/sl3516-ce.h        |  8 +---
 3 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c
index 0232f847785a..49dce9e0a834 100644
--- a/drivers/crypto/gemini/sl3516-ce-cipher.c
+++ b/drivers/crypto/gemini/sl3516-ce-cipher.c
@@ -8,13 +8,17 @@
  * ECB mode.
  */
 
-#include <linux/crypto.h>
+#include <crypto/engine.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
+#include <linux/err.h>
 #include <linux/io.h>
+#include <linux/kernel.h>
 #include <linux/pm_runtime.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/internal/skcipher.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 #include "sl3516-ce.h"
 
 /* sl3516_ce_need_fallback - check if a request can be handled by the CE */
@@ -105,7 +109,7 @@ static int sl3516_ce_cipher_fallback(struct skcipher_request *areq)
 	struct sl3516_ce_alg_template *algt;
 	int err;
 
-	algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher);
+	algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher.base);
 	algt->stat_fb++;
 
 	skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
@@ -136,7 +140,7 @@ static int sl3516_ce_cipher(struct skcipher_request *areq)
 	int err = 0;
 	int i;
 
-	algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher);
+	algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher.base);
 
 	dev_dbg(ce->dev, "%s %s %u %x IV(%p %u) key=%u\n", __func__,
 		crypto_tfm_alg_name(areq->base.tfm),
@@ -258,7 +262,7 @@ theend:
 	return err;
 }
 
-static int sl3516_ce_handle_cipher_request(struct crypto_engine *engine, void *areq)
+int sl3516_ce_handle_cipher_request(struct crypto_engine *engine, void *areq)
 {
 	int err;
 	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
@@ -318,7 +322,7 @@ int sl3516_ce_cipher_init(struct crypto_tfm *tfm)
 
 	memset(op, 0, sizeof(struct sl3516_ce_cipher_tfm_ctx));
 
-	algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher);
+	algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher.base);
 	op->ce = algt->ce;
 
 	op->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
@@ -335,8 +339,6 @@ int sl3516_ce_cipher_init(struct crypto_tfm *tfm)
 		 crypto_tfm_alg_driver_name(&sktfm->base),
 		 crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)));
 
-	op->enginectx.op.do_one_request = sl3516_ce_handle_cipher_request;
-
 	err = pm_runtime_get_sync(op->ce->dev);
 	if (err < 0)
 		goto error_pm;
diff --git a/drivers/crypto/gemini/sl3516-ce-core.c b/drivers/crypto/gemini/sl3516-ce-core.c
index b7524b649068..0fd47ab9df5c 100644
--- a/drivers/crypto/gemini/sl3516-ce-core.c
+++ b/drivers/crypto/gemini/sl3516-ce-core.c
@@ -6,22 +6,25 @@
  *
  * Core file which registers crypto algorithms supported by the CryptoEngine
  */
+
+#include <crypto/engine.h>
+#include <crypto/internal/rng.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/clk.h>
-#include <linux/crypto.h>
 #include <linux/debugfs.h>
 #include <linux/dev_printk.h>
 #include <linux/dma-mapping.h>
+#include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
-#include <crypto/internal/rng.h>
-#include <crypto/internal/skcipher.h>
 
 #include "sl3516-ce.h"
 
@@ -217,7 +220,7 @@ static struct sl3516_ce_alg_template ce_algs[] = {
 {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.mode = ECB_AES,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base = {
 			.cra_name = "ecb(aes)",
 			.cra_driver_name = "ecb-aes-sl3516",
@@ -236,11 +239,13 @@ static struct sl3516_ce_alg_template ce_algs[] = {
 		.setkey		= sl3516_ce_aes_setkey,
 		.encrypt	= sl3516_ce_skencrypt,
 		.decrypt	= sl3516_ce_skdecrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = sl3516_ce_handle_cipher_request,
+	},
 },
 };
 
-#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG
 static int sl3516_ce_debugfs_show(struct seq_file *seq, void *v)
 {
 	struct sl3516_ce_dev *ce = seq->private;
@@ -264,8 +269,8 @@ static int sl3516_ce_debugfs_show(struct seq_file *seq, void *v)
 		switch (ce_algs[i].type) {
 		case CRYPTO_ALG_TYPE_SKCIPHER:
 			seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
-				   ce_algs[i].alg.skcipher.base.cra_driver_name,
-				   ce_algs[i].alg.skcipher.base.cra_name,
+				   ce_algs[i].alg.skcipher.base.base.cra_driver_name,
+				   ce_algs[i].alg.skcipher.base.base.cra_name,
 				   ce_algs[i].stat_req, ce_algs[i].stat_fb);
 			break;
 		}
@@ -274,7 +279,6 @@ static int sl3516_ce_debugfs_show(struct seq_file *seq, void *v)
 }
 
 DEFINE_SHOW_ATTRIBUTE(sl3516_ce_debugfs);
-#endif
 
 static int sl3516_ce_register_algs(struct sl3516_ce_dev *ce)
 {
@@ -286,11 +290,11 @@ static int sl3516_ce_register_algs(struct sl3516_ce_dev *ce)
 		switch (ce_algs[i].type) {
 		case CRYPTO_ALG_TYPE_SKCIPHER:
 			dev_info(ce->dev, "DEBUG: Register %s\n",
-				 ce_algs[i].alg.skcipher.base.cra_name);
-			err = crypto_register_skcipher(&ce_algs[i].alg.skcipher);
+				 ce_algs[i].alg.skcipher.base.base.cra_name);
+			err = crypto_engine_register_skcipher(&ce_algs[i].alg.skcipher);
 			if (err) {
 				dev_err(ce->dev, "Fail to register %s\n",
-					ce_algs[i].alg.skcipher.base.cra_name);
+					ce_algs[i].alg.skcipher.base.base.cra_name);
 				ce_algs[i].ce = NULL;
 				return err;
 			}
@@ -313,8 +317,8 @@ static void sl3516_ce_unregister_algs(struct sl3516_ce_dev *ce)
 		switch (ce_algs[i].type) {
 		case CRYPTO_ALG_TYPE_SKCIPHER:
 			dev_info(ce->dev, "Unregister %d %s\n", i,
-				 ce_algs[i].alg.skcipher.base.cra_name);
-			crypto_unregister_skcipher(&ce_algs[i].alg.skcipher);
+				 ce_algs[i].alg.skcipher.base.base.cra_name);
+			crypto_engine_unregister_skcipher(&ce_algs[i].alg.skcipher);
 			break;
 		}
 	}
@@ -473,13 +477,20 @@ static int sl3516_ce_probe(struct platform_device *pdev)
 
 	pm_runtime_put_sync(ce->dev);
 
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_SL3516_DEBUG)) {
+		struct dentry *dbgfs_dir __maybe_unused;
+		struct dentry *dbgfs_stats __maybe_unused;
+
+		/* Ignore error of debugfs */
+		dbgfs_dir = debugfs_create_dir("sl3516", NULL);
+		dbgfs_stats = debugfs_create_file("stats", 0444,
+						  dbgfs_dir, ce,
+						  &sl3516_ce_debugfs_fops);
 #ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG
-	/* Ignore error of debugfs */
-	ce->dbgfs_dir = debugfs_create_dir("sl3516", NULL);
-	ce->dbgfs_stats = debugfs_create_file("stats", 0444,
-					      ce->dbgfs_dir, ce,
-					      &sl3516_ce_debugfs_fops);
+		ce->dbgfs_dir = dbgfs_dir;
+		ce->dbgfs_stats = dbgfs_stats;
 #endif
+	}
 
 	return 0;
 error_pmuse:
diff --git a/drivers/crypto/gemini/sl3516-ce.h b/drivers/crypto/gemini/sl3516-ce.h
index 4c0ec6c920d1..9e1a7e7f8961 100644
--- a/drivers/crypto/gemini/sl3516-ce.h
+++ b/drivers/crypto/gemini/sl3516-ce.h
@@ -17,7 +17,6 @@
 #include <crypto/engine.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/skcipher.h>
-#include <linux/crypto.h>
 #include <linux/debugfs.h>
 #include <linux/hw_random.h>
 
@@ -292,16 +291,12 @@ struct sl3516_ce_cipher_req_ctx {
 
 /*
  * struct sl3516_ce_cipher_tfm_ctx - context for a skcipher TFM
- * @enginectx:		crypto_engine used by this TFM
  * @key:		pointer to key data
  * @keylen:		len of the key
  * @ce:			pointer to the private data of driver handling this TFM
  * @fallback_tfm:	pointer to the fallback TFM
- *
- * enginectx must be the first element
  */
 struct sl3516_ce_cipher_tfm_ctx {
-	struct crypto_engine_ctx enginectx;
 	u32 *key;
 	u32 keylen;
 	struct sl3516_ce_dev *ce;
@@ -324,7 +319,7 @@ struct sl3516_ce_alg_template {
 	u32 mode;
 	struct sl3516_ce_dev *ce;
 	union {
-		struct skcipher_alg skcipher;
+		struct skcipher_engine_alg skcipher;
 	} alg;
 	unsigned long stat_req;
 	unsigned long stat_fb;
@@ -345,3 +340,4 @@ int sl3516_ce_run_task(struct sl3516_ce_dev *ce,
 
 int sl3516_ce_rng_register(struct sl3516_ce_dev *ce);
 void sl3516_ce_rng_unregister(struct sl3516_ce_dev *ce);
+int sl3516_ce_handle_cipher_request(struct crypto_engine *engine, void *areq);

From 530d7b009d8cd66a009f13090a7fff1764b3936c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:55:06 +0800
Subject: [PATCH 1347/2047] crypto: keembay - Use new crypto_engine_op
 interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../intel/keembay/keembay-ocs-aes-core.c      | 427 +++++++++---------
 .../crypto/intel/keembay/keembay-ocs-ecc.c    |  71 ++-
 .../intel/keembay/keembay-ocs-hcu-core.c      | 232 +++++-----
 3 files changed, 360 insertions(+), 370 deletions(-)

diff --git a/drivers/crypto/intel/keembay/keembay-ocs-aes-core.c b/drivers/crypto/intel/keembay/keembay-ocs-aes-core.c
index f94f48289683..1e2fd9a754ec 100644
--- a/drivers/crypto/intel/keembay/keembay-ocs-aes-core.c
+++ b/drivers/crypto/intel/keembay/keembay-ocs-aes-core.c
@@ -5,24 +5,23 @@
  * Copyright (C) 2018-2020 Intel Corporation
  */
 
-#include <linux/clk.h>
-#include <linux/completion.h>
-#include <linux/crypto.h>
-#include <linux/dma-mapping.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/types.h>
-
 #include <crypto/aes.h>
 #include <crypto/engine.h>
 #include <crypto/gcm.h>
-#include <crypto/scatterwalk.h>
-
 #include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
 
 #include "ocs-aes.h"
 
@@ -38,7 +37,6 @@
 
 /**
  * struct ocs_aes_tctx - OCS AES Transform context
- * @engine_ctx:		Engine context.
  * @aes_dev:		The OCS AES device.
  * @key:		AES/SM4 key.
  * @key_len:		The length (in bytes) of @key.
@@ -47,7 +45,6 @@
  * @use_fallback:	Whether or not fallback cipher should be used.
  */
 struct ocs_aes_tctx {
-	struct crypto_engine_ctx engine_ctx;
 	struct ocs_aes_dev *aes_dev;
 	u8 key[OCS_AES_KEYSIZE_256];
 	unsigned int key_len;
@@ -1148,13 +1145,6 @@ static int kmb_ocs_sm4_ccm_decrypt(struct aead_request *req)
 	return kmb_ocs_aead_common(req, OCS_SM4, OCS_DECRYPT, OCS_MODE_CCM);
 }
 
-static inline int ocs_common_init(struct ocs_aes_tctx *tctx)
-{
-	tctx->engine_ctx.op.do_one_request = kmb_ocs_aes_sk_do_one_request;
-
-	return 0;
-}
-
 static int ocs_aes_init_tfm(struct crypto_skcipher *tfm)
 {
 	const char *alg_name = crypto_tfm_alg_name(&tfm->base);
@@ -1170,16 +1160,14 @@ static int ocs_aes_init_tfm(struct crypto_skcipher *tfm)
 
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct ocs_aes_rctx));
 
-	return ocs_common_init(tctx);
+	return 0;
 }
 
 static int ocs_sm4_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct ocs_aes_tctx *tctx = crypto_skcipher_ctx(tfm);
-
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct ocs_aes_rctx));
 
-	return ocs_common_init(tctx);
+	return 0;
 }
 
 static inline void clear_key(struct ocs_aes_tctx *tctx)
@@ -1204,13 +1192,6 @@ static void ocs_exit_tfm(struct crypto_skcipher *tfm)
 	}
 }
 
-static inline int ocs_common_aead_init(struct ocs_aes_tctx *tctx)
-{
-	tctx->engine_ctx.op.do_one_request = kmb_ocs_aes_aead_do_one_request;
-
-	return 0;
-}
-
 static int ocs_aes_aead_cra_init(struct crypto_aead *tfm)
 {
 	const char *alg_name = crypto_tfm_alg_name(&tfm->base);
@@ -1229,7 +1210,7 @@ static int ocs_aes_aead_cra_init(struct crypto_aead *tfm)
 				    (sizeof(struct aead_request) +
 				     crypto_aead_reqsize(tctx->sw_cipher.aead))));
 
-	return ocs_common_aead_init(tctx);
+	return 0;
 }
 
 static int kmb_ocs_aead_ccm_setauthsize(struct crypto_aead *tfm,
@@ -1257,11 +1238,9 @@ static int kmb_ocs_aead_gcm_setauthsize(struct crypto_aead *tfm,
 
 static int ocs_sm4_aead_cra_init(struct crypto_aead *tfm)
 {
-	struct ocs_aes_tctx *tctx = crypto_aead_ctx(tfm);
-
 	crypto_aead_set_reqsize(tfm, sizeof(struct ocs_aes_rctx));
 
-	return ocs_common_aead_init(tctx);
+	return 0;
 }
 
 static void ocs_aead_cra_exit(struct crypto_aead *tfm)
@@ -1276,182 +1255,190 @@ static void ocs_aead_cra_exit(struct crypto_aead *tfm)
 	}
 }
 
-static struct skcipher_alg algs[] = {
+static struct skcipher_engine_alg algs[] = {
 #ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB
 	{
-		.base.cra_name = "ecb(aes)",
-		.base.cra_driver_name = "ecb-aes-keembay-ocs",
-		.base.cra_priority = KMB_OCS_PRIORITY,
-		.base.cra_flags = CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_NEED_FALLBACK,
-		.base.cra_blocksize = AES_BLOCK_SIZE,
-		.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
-		.base.cra_module = THIS_MODULE,
-		.base.cra_alignmask = 0,
+		.base.base.cra_name = "ecb(aes)",
+		.base.base.cra_driver_name = "ecb-aes-keembay-ocs",
+		.base.base.cra_priority = KMB_OCS_PRIORITY,
+		.base.base.cra_flags = CRYPTO_ALG_ASYNC |
+				       CRYPTO_ALG_KERN_DRIVER_ONLY |
+				       CRYPTO_ALG_NEED_FALLBACK,
+		.base.base.cra_blocksize = AES_BLOCK_SIZE,
+		.base.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
+		.base.base.cra_module = THIS_MODULE,
+		.base.base.cra_alignmask = 0,
 
-		.min_keysize = OCS_AES_MIN_KEY_SIZE,
-		.max_keysize = OCS_AES_MAX_KEY_SIZE,
-		.setkey = kmb_ocs_aes_set_key,
-		.encrypt = kmb_ocs_aes_ecb_encrypt,
-		.decrypt = kmb_ocs_aes_ecb_decrypt,
-		.init = ocs_aes_init_tfm,
-		.exit = ocs_exit_tfm,
+		.base.min_keysize = OCS_AES_MIN_KEY_SIZE,
+		.base.max_keysize = OCS_AES_MAX_KEY_SIZE,
+		.base.setkey = kmb_ocs_aes_set_key,
+		.base.encrypt = kmb_ocs_aes_ecb_encrypt,
+		.base.decrypt = kmb_ocs_aes_ecb_decrypt,
+		.base.init = ocs_aes_init_tfm,
+		.base.exit = ocs_exit_tfm,
+		.op.do_one_request = kmb_ocs_aes_sk_do_one_request,
 	},
 #endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB */
 	{
-		.base.cra_name = "cbc(aes)",
-		.base.cra_driver_name = "cbc-aes-keembay-ocs",
-		.base.cra_priority = KMB_OCS_PRIORITY,
-		.base.cra_flags = CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_NEED_FALLBACK,
-		.base.cra_blocksize = AES_BLOCK_SIZE,
-		.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
-		.base.cra_module = THIS_MODULE,
-		.base.cra_alignmask = 0,
+		.base.base.cra_name = "cbc(aes)",
+		.base.base.cra_driver_name = "cbc-aes-keembay-ocs",
+		.base.base.cra_priority = KMB_OCS_PRIORITY,
+		.base.base.cra_flags = CRYPTO_ALG_ASYNC |
+				       CRYPTO_ALG_KERN_DRIVER_ONLY |
+				       CRYPTO_ALG_NEED_FALLBACK,
+		.base.base.cra_blocksize = AES_BLOCK_SIZE,
+		.base.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
+		.base.base.cra_module = THIS_MODULE,
+		.base.base.cra_alignmask = 0,
 
-		.min_keysize = OCS_AES_MIN_KEY_SIZE,
-		.max_keysize = OCS_AES_MAX_KEY_SIZE,
-		.ivsize = AES_BLOCK_SIZE,
-		.setkey = kmb_ocs_aes_set_key,
-		.encrypt = kmb_ocs_aes_cbc_encrypt,
-		.decrypt = kmb_ocs_aes_cbc_decrypt,
-		.init = ocs_aes_init_tfm,
-		.exit = ocs_exit_tfm,
+		.base.min_keysize = OCS_AES_MIN_KEY_SIZE,
+		.base.max_keysize = OCS_AES_MAX_KEY_SIZE,
+		.base.ivsize = AES_BLOCK_SIZE,
+		.base.setkey = kmb_ocs_aes_set_key,
+		.base.encrypt = kmb_ocs_aes_cbc_encrypt,
+		.base.decrypt = kmb_ocs_aes_cbc_decrypt,
+		.base.init = ocs_aes_init_tfm,
+		.base.exit = ocs_exit_tfm,
+		.op.do_one_request = kmb_ocs_aes_sk_do_one_request,
 	},
 	{
-		.base.cra_name = "ctr(aes)",
-		.base.cra_driver_name = "ctr-aes-keembay-ocs",
-		.base.cra_priority = KMB_OCS_PRIORITY,
-		.base.cra_flags = CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_NEED_FALLBACK,
-		.base.cra_blocksize = 1,
-		.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
-		.base.cra_module = THIS_MODULE,
-		.base.cra_alignmask = 0,
+		.base.base.cra_name = "ctr(aes)",
+		.base.base.cra_driver_name = "ctr-aes-keembay-ocs",
+		.base.base.cra_priority = KMB_OCS_PRIORITY,
+		.base.base.cra_flags = CRYPTO_ALG_ASYNC |
+				       CRYPTO_ALG_KERN_DRIVER_ONLY |
+				       CRYPTO_ALG_NEED_FALLBACK,
+		.base.base.cra_blocksize = 1,
+		.base.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
+		.base.base.cra_module = THIS_MODULE,
+		.base.base.cra_alignmask = 0,
 
-		.min_keysize = OCS_AES_MIN_KEY_SIZE,
-		.max_keysize = OCS_AES_MAX_KEY_SIZE,
-		.ivsize = AES_BLOCK_SIZE,
-		.setkey = kmb_ocs_aes_set_key,
-		.encrypt = kmb_ocs_aes_ctr_encrypt,
-		.decrypt = kmb_ocs_aes_ctr_decrypt,
-		.init = ocs_aes_init_tfm,
-		.exit = ocs_exit_tfm,
+		.base.min_keysize = OCS_AES_MIN_KEY_SIZE,
+		.base.max_keysize = OCS_AES_MAX_KEY_SIZE,
+		.base.ivsize = AES_BLOCK_SIZE,
+		.base.setkey = kmb_ocs_aes_set_key,
+		.base.encrypt = kmb_ocs_aes_ctr_encrypt,
+		.base.decrypt = kmb_ocs_aes_ctr_decrypt,
+		.base.init = ocs_aes_init_tfm,
+		.base.exit = ocs_exit_tfm,
+		.op.do_one_request = kmb_ocs_aes_sk_do_one_request,
 	},
 #ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS
 	{
-		.base.cra_name = "cts(cbc(aes))",
-		.base.cra_driver_name = "cts-aes-keembay-ocs",
-		.base.cra_priority = KMB_OCS_PRIORITY,
-		.base.cra_flags = CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_NEED_FALLBACK,
-		.base.cra_blocksize = AES_BLOCK_SIZE,
-		.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
-		.base.cra_module = THIS_MODULE,
-		.base.cra_alignmask = 0,
+		.base.base.cra_name = "cts(cbc(aes))",
+		.base.base.cra_driver_name = "cts-aes-keembay-ocs",
+		.base.base.cra_priority = KMB_OCS_PRIORITY,
+		.base.base.cra_flags = CRYPTO_ALG_ASYNC |
+				       CRYPTO_ALG_KERN_DRIVER_ONLY |
+				       CRYPTO_ALG_NEED_FALLBACK,
+		.base.base.cra_blocksize = AES_BLOCK_SIZE,
+		.base.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
+		.base.base.cra_module = THIS_MODULE,
+		.base.base.cra_alignmask = 0,
 
-		.min_keysize = OCS_AES_MIN_KEY_SIZE,
-		.max_keysize = OCS_AES_MAX_KEY_SIZE,
-		.ivsize = AES_BLOCK_SIZE,
-		.setkey = kmb_ocs_aes_set_key,
-		.encrypt = kmb_ocs_aes_cts_encrypt,
-		.decrypt = kmb_ocs_aes_cts_decrypt,
-		.init = ocs_aes_init_tfm,
-		.exit = ocs_exit_tfm,
+		.base.min_keysize = OCS_AES_MIN_KEY_SIZE,
+		.base.max_keysize = OCS_AES_MAX_KEY_SIZE,
+		.base.ivsize = AES_BLOCK_SIZE,
+		.base.setkey = kmb_ocs_aes_set_key,
+		.base.encrypt = kmb_ocs_aes_cts_encrypt,
+		.base.decrypt = kmb_ocs_aes_cts_decrypt,
+		.base.init = ocs_aes_init_tfm,
+		.base.exit = ocs_exit_tfm,
+		.op.do_one_request = kmb_ocs_aes_sk_do_one_request,
 	},
 #endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS */
 #ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB
 	{
-		.base.cra_name = "ecb(sm4)",
-		.base.cra_driver_name = "ecb-sm4-keembay-ocs",
-		.base.cra_priority = KMB_OCS_PRIORITY,
-		.base.cra_flags = CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.base.cra_blocksize = AES_BLOCK_SIZE,
-		.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
-		.base.cra_module = THIS_MODULE,
-		.base.cra_alignmask = 0,
+		.base.base.cra_name = "ecb(sm4)",
+		.base.base.cra_driver_name = "ecb-sm4-keembay-ocs",
+		.base.base.cra_priority = KMB_OCS_PRIORITY,
+		.base.base.cra_flags = CRYPTO_ALG_ASYNC |
+				       CRYPTO_ALG_KERN_DRIVER_ONLY,
+		.base.base.cra_blocksize = AES_BLOCK_SIZE,
+		.base.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
+		.base.base.cra_module = THIS_MODULE,
+		.base.base.cra_alignmask = 0,
 
-		.min_keysize = OCS_SM4_KEY_SIZE,
-		.max_keysize = OCS_SM4_KEY_SIZE,
-		.setkey = kmb_ocs_sm4_set_key,
-		.encrypt = kmb_ocs_sm4_ecb_encrypt,
-		.decrypt = kmb_ocs_sm4_ecb_decrypt,
-		.init = ocs_sm4_init_tfm,
-		.exit = ocs_exit_tfm,
+		.base.min_keysize = OCS_SM4_KEY_SIZE,
+		.base.max_keysize = OCS_SM4_KEY_SIZE,
+		.base.setkey = kmb_ocs_sm4_set_key,
+		.base.encrypt = kmb_ocs_sm4_ecb_encrypt,
+		.base.decrypt = kmb_ocs_sm4_ecb_decrypt,
+		.base.init = ocs_sm4_init_tfm,
+		.base.exit = ocs_exit_tfm,
+		.op.do_one_request = kmb_ocs_aes_sk_do_one_request,
 	},
 #endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_ECB */
 	{
-		.base.cra_name = "cbc(sm4)",
-		.base.cra_driver_name = "cbc-sm4-keembay-ocs",
-		.base.cra_priority = KMB_OCS_PRIORITY,
-		.base.cra_flags = CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.base.cra_blocksize = AES_BLOCK_SIZE,
-		.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
-		.base.cra_module = THIS_MODULE,
-		.base.cra_alignmask = 0,
+		.base.base.cra_name = "cbc(sm4)",
+		.base.base.cra_driver_name = "cbc-sm4-keembay-ocs",
+		.base.base.cra_priority = KMB_OCS_PRIORITY,
+		.base.base.cra_flags = CRYPTO_ALG_ASYNC |
+				       CRYPTO_ALG_KERN_DRIVER_ONLY,
+		.base.base.cra_blocksize = AES_BLOCK_SIZE,
+		.base.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
+		.base.base.cra_module = THIS_MODULE,
+		.base.base.cra_alignmask = 0,
 
-		.min_keysize = OCS_SM4_KEY_SIZE,
-		.max_keysize = OCS_SM4_KEY_SIZE,
-		.ivsize = AES_BLOCK_SIZE,
-		.setkey = kmb_ocs_sm4_set_key,
-		.encrypt = kmb_ocs_sm4_cbc_encrypt,
-		.decrypt = kmb_ocs_sm4_cbc_decrypt,
-		.init = ocs_sm4_init_tfm,
-		.exit = ocs_exit_tfm,
+		.base.min_keysize = OCS_SM4_KEY_SIZE,
+		.base.max_keysize = OCS_SM4_KEY_SIZE,
+		.base.ivsize = AES_BLOCK_SIZE,
+		.base.setkey = kmb_ocs_sm4_set_key,
+		.base.encrypt = kmb_ocs_sm4_cbc_encrypt,
+		.base.decrypt = kmb_ocs_sm4_cbc_decrypt,
+		.base.init = ocs_sm4_init_tfm,
+		.base.exit = ocs_exit_tfm,
+		.op.do_one_request = kmb_ocs_aes_sk_do_one_request,
 	},
 	{
-		.base.cra_name = "ctr(sm4)",
-		.base.cra_driver_name = "ctr-sm4-keembay-ocs",
-		.base.cra_priority = KMB_OCS_PRIORITY,
-		.base.cra_flags = CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.base.cra_blocksize = 1,
-		.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
-		.base.cra_module = THIS_MODULE,
-		.base.cra_alignmask = 0,
+		.base.base.cra_name = "ctr(sm4)",
+		.base.base.cra_driver_name = "ctr-sm4-keembay-ocs",
+		.base.base.cra_priority = KMB_OCS_PRIORITY,
+		.base.base.cra_flags = CRYPTO_ALG_ASYNC |
+				       CRYPTO_ALG_KERN_DRIVER_ONLY,
+		.base.base.cra_blocksize = 1,
+		.base.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
+		.base.base.cra_module = THIS_MODULE,
+		.base.base.cra_alignmask = 0,
 
-		.min_keysize = OCS_SM4_KEY_SIZE,
-		.max_keysize = OCS_SM4_KEY_SIZE,
-		.ivsize = AES_BLOCK_SIZE,
-		.setkey = kmb_ocs_sm4_set_key,
-		.encrypt = kmb_ocs_sm4_ctr_encrypt,
-		.decrypt = kmb_ocs_sm4_ctr_decrypt,
-		.init = ocs_sm4_init_tfm,
-		.exit = ocs_exit_tfm,
+		.base.min_keysize = OCS_SM4_KEY_SIZE,
+		.base.max_keysize = OCS_SM4_KEY_SIZE,
+		.base.ivsize = AES_BLOCK_SIZE,
+		.base.setkey = kmb_ocs_sm4_set_key,
+		.base.encrypt = kmb_ocs_sm4_ctr_encrypt,
+		.base.decrypt = kmb_ocs_sm4_ctr_decrypt,
+		.base.init = ocs_sm4_init_tfm,
+		.base.exit = ocs_exit_tfm,
+		.op.do_one_request = kmb_ocs_aes_sk_do_one_request,
 	},
 #ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS
 	{
-		.base.cra_name = "cts(cbc(sm4))",
-		.base.cra_driver_name = "cts-sm4-keembay-ocs",
-		.base.cra_priority = KMB_OCS_PRIORITY,
-		.base.cra_flags = CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.base.cra_blocksize = AES_BLOCK_SIZE,
-		.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
-		.base.cra_module = THIS_MODULE,
-		.base.cra_alignmask = 0,
+		.base.base.cra_name = "cts(cbc(sm4))",
+		.base.base.cra_driver_name = "cts-sm4-keembay-ocs",
+		.base.base.cra_priority = KMB_OCS_PRIORITY,
+		.base.base.cra_flags = CRYPTO_ALG_ASYNC |
+				       CRYPTO_ALG_KERN_DRIVER_ONLY,
+		.base.base.cra_blocksize = AES_BLOCK_SIZE,
+		.base.base.cra_ctxsize = sizeof(struct ocs_aes_tctx),
+		.base.base.cra_module = THIS_MODULE,
+		.base.base.cra_alignmask = 0,
 
-		.min_keysize = OCS_SM4_KEY_SIZE,
-		.max_keysize = OCS_SM4_KEY_SIZE,
-		.ivsize = AES_BLOCK_SIZE,
-		.setkey = kmb_ocs_sm4_set_key,
-		.encrypt = kmb_ocs_sm4_cts_encrypt,
-		.decrypt = kmb_ocs_sm4_cts_decrypt,
-		.init = ocs_sm4_init_tfm,
-		.exit = ocs_exit_tfm,
+		.base.min_keysize = OCS_SM4_KEY_SIZE,
+		.base.max_keysize = OCS_SM4_KEY_SIZE,
+		.base.ivsize = AES_BLOCK_SIZE,
+		.base.setkey = kmb_ocs_sm4_set_key,
+		.base.encrypt = kmb_ocs_sm4_cts_encrypt,
+		.base.decrypt = kmb_ocs_sm4_cts_decrypt,
+		.base.init = ocs_sm4_init_tfm,
+		.base.exit = ocs_exit_tfm,
+		.op.do_one_request = kmb_ocs_aes_sk_do_one_request,
 	}
 #endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_AES_SM4_CTS */
 };
 
-static struct aead_alg algs_aead[] = {
+static struct aead_engine_alg algs_aead[] = {
 	{
-		.base = {
+		.base.base = {
 			.cra_name = "gcm(aes)",
 			.cra_driver_name = "gcm-aes-keembay-ocs",
 			.cra_priority = KMB_OCS_PRIORITY,
@@ -1463,17 +1450,18 @@ static struct aead_alg algs_aead[] = {
 			.cra_alignmask = 0,
 			.cra_module = THIS_MODULE,
 		},
-		.init = ocs_aes_aead_cra_init,
-		.exit = ocs_aead_cra_exit,
-		.ivsize = GCM_AES_IV_SIZE,
-		.maxauthsize = AES_BLOCK_SIZE,
-		.setauthsize = kmb_ocs_aead_gcm_setauthsize,
-		.setkey = kmb_ocs_aes_aead_set_key,
-		.encrypt = kmb_ocs_aes_gcm_encrypt,
-		.decrypt = kmb_ocs_aes_gcm_decrypt,
+		.base.init = ocs_aes_aead_cra_init,
+		.base.exit = ocs_aead_cra_exit,
+		.base.ivsize = GCM_AES_IV_SIZE,
+		.base.maxauthsize = AES_BLOCK_SIZE,
+		.base.setauthsize = kmb_ocs_aead_gcm_setauthsize,
+		.base.setkey = kmb_ocs_aes_aead_set_key,
+		.base.encrypt = kmb_ocs_aes_gcm_encrypt,
+		.base.decrypt = kmb_ocs_aes_gcm_decrypt,
+		.op.do_one_request = kmb_ocs_aes_aead_do_one_request,
 	},
 	{
-		.base = {
+		.base.base = {
 			.cra_name = "ccm(aes)",
 			.cra_driver_name = "ccm-aes-keembay-ocs",
 			.cra_priority = KMB_OCS_PRIORITY,
@@ -1485,17 +1473,18 @@ static struct aead_alg algs_aead[] = {
 			.cra_alignmask = 0,
 			.cra_module = THIS_MODULE,
 		},
-		.init = ocs_aes_aead_cra_init,
-		.exit = ocs_aead_cra_exit,
-		.ivsize = AES_BLOCK_SIZE,
-		.maxauthsize = AES_BLOCK_SIZE,
-		.setauthsize = kmb_ocs_aead_ccm_setauthsize,
-		.setkey = kmb_ocs_aes_aead_set_key,
-		.encrypt = kmb_ocs_aes_ccm_encrypt,
-		.decrypt = kmb_ocs_aes_ccm_decrypt,
+		.base.init = ocs_aes_aead_cra_init,
+		.base.exit = ocs_aead_cra_exit,
+		.base.ivsize = AES_BLOCK_SIZE,
+		.base.maxauthsize = AES_BLOCK_SIZE,
+		.base.setauthsize = kmb_ocs_aead_ccm_setauthsize,
+		.base.setkey = kmb_ocs_aes_aead_set_key,
+		.base.encrypt = kmb_ocs_aes_ccm_encrypt,
+		.base.decrypt = kmb_ocs_aes_ccm_decrypt,
+		.op.do_one_request = kmb_ocs_aes_aead_do_one_request,
 	},
 	{
-		.base = {
+		.base.base = {
 			.cra_name = "gcm(sm4)",
 			.cra_driver_name = "gcm-sm4-keembay-ocs",
 			.cra_priority = KMB_OCS_PRIORITY,
@@ -1506,17 +1495,18 @@ static struct aead_alg algs_aead[] = {
 			.cra_alignmask = 0,
 			.cra_module = THIS_MODULE,
 		},
-		.init = ocs_sm4_aead_cra_init,
-		.exit = ocs_aead_cra_exit,
-		.ivsize = GCM_AES_IV_SIZE,
-		.maxauthsize = AES_BLOCK_SIZE,
-		.setauthsize = kmb_ocs_aead_gcm_setauthsize,
-		.setkey = kmb_ocs_sm4_aead_set_key,
-		.encrypt = kmb_ocs_sm4_gcm_encrypt,
-		.decrypt = kmb_ocs_sm4_gcm_decrypt,
+		.base.init = ocs_sm4_aead_cra_init,
+		.base.exit = ocs_aead_cra_exit,
+		.base.ivsize = GCM_AES_IV_SIZE,
+		.base.maxauthsize = AES_BLOCK_SIZE,
+		.base.setauthsize = kmb_ocs_aead_gcm_setauthsize,
+		.base.setkey = kmb_ocs_sm4_aead_set_key,
+		.base.encrypt = kmb_ocs_sm4_gcm_encrypt,
+		.base.decrypt = kmb_ocs_sm4_gcm_decrypt,
+		.op.do_one_request = kmb_ocs_aes_aead_do_one_request,
 	},
 	{
-		.base = {
+		.base.base = {
 			.cra_name = "ccm(sm4)",
 			.cra_driver_name = "ccm-sm4-keembay-ocs",
 			.cra_priority = KMB_OCS_PRIORITY,
@@ -1527,21 +1517,22 @@ static struct aead_alg algs_aead[] = {
 			.cra_alignmask = 0,
 			.cra_module = THIS_MODULE,
 		},
-		.init = ocs_sm4_aead_cra_init,
-		.exit = ocs_aead_cra_exit,
-		.ivsize = AES_BLOCK_SIZE,
-		.maxauthsize = AES_BLOCK_SIZE,
-		.setauthsize = kmb_ocs_aead_ccm_setauthsize,
-		.setkey = kmb_ocs_sm4_aead_set_key,
-		.encrypt = kmb_ocs_sm4_ccm_encrypt,
-		.decrypt = kmb_ocs_sm4_ccm_decrypt,
+		.base.init = ocs_sm4_aead_cra_init,
+		.base.exit = ocs_aead_cra_exit,
+		.base.ivsize = AES_BLOCK_SIZE,
+		.base.maxauthsize = AES_BLOCK_SIZE,
+		.base.setauthsize = kmb_ocs_aead_ccm_setauthsize,
+		.base.setkey = kmb_ocs_sm4_aead_set_key,
+		.base.encrypt = kmb_ocs_sm4_ccm_encrypt,
+		.base.decrypt = kmb_ocs_sm4_ccm_decrypt,
+		.op.do_one_request = kmb_ocs_aes_aead_do_one_request,
 	}
 };
 
 static void unregister_aes_algs(struct ocs_aes_dev *aes_dev)
 {
-	crypto_unregister_aeads(algs_aead, ARRAY_SIZE(algs_aead));
-	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+	crypto_engine_unregister_aeads(algs_aead, ARRAY_SIZE(algs_aead));
+	crypto_engine_unregister_skciphers(algs, ARRAY_SIZE(algs));
 }
 
 static int register_aes_algs(struct ocs_aes_dev *aes_dev)
@@ -1552,13 +1543,13 @@ static int register_aes_algs(struct ocs_aes_dev *aes_dev)
 	 * If any algorithm fails to register, all preceding algorithms that
 	 * were successfully registered will be automatically unregistered.
 	 */
-	ret = crypto_register_aeads(algs_aead, ARRAY_SIZE(algs_aead));
+	ret = crypto_engine_register_aeads(algs_aead, ARRAY_SIZE(algs_aead));
 	if (ret)
 		return ret;
 
-	ret = crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+	ret = crypto_engine_register_skciphers(algs, ARRAY_SIZE(algs));
 	if (ret)
-		crypto_unregister_aeads(algs_aead, ARRAY_SIZE(algs));
+		crypto_engine_unregister_aeads(algs_aead, ARRAY_SIZE(algs));
 
 	return ret;
 }
diff --git a/drivers/crypto/intel/keembay/keembay-ocs-ecc.c b/drivers/crypto/intel/keembay/keembay-ocs-ecc.c
index e91e570b7ae0..fb95deed9057 100644
--- a/drivers/crypto/intel/keembay/keembay-ocs-ecc.c
+++ b/drivers/crypto/intel/keembay/keembay-ocs-ecc.c
@@ -7,30 +7,27 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <crypto/ecc_curve.h>
+#include <crypto/ecdh.h>
+#include <crypto/engine.h>
+#include <crypto/internal/ecc.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/kpp.h>
+#include <crypto/rng.h>
 #include <linux/clk.h>
 #include <linux/completion.h>
-#include <linux/crypto.h>
-#include <linux/delay.h>
+#include <linux/err.h>
 #include <linux/fips.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/irq.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-
-#include <crypto/ecc_curve.h>
-#include <crypto/ecdh.h>
-#include <crypto/engine.h>
-#include <crypto/kpp.h>
-#include <crypto/rng.h>
-
-#include <crypto/internal/ecc.h>
-#include <crypto/internal/kpp.h>
+#include <linux/string.h>
 
 #define DRV_NAME			"keembay-ocs-ecc"
 
@@ -95,13 +92,11 @@ struct ocs_ecc_dev {
 
 /**
  * struct ocs_ecc_ctx - Transformation context.
- * @engine_ctx:	 Crypto engine ctx.
  * @ecc_dev:	 The ECC driver associated with this context.
  * @curve:	 The elliptic curve used by this transformation.
  * @private_key: The private key.
  */
 struct ocs_ecc_ctx {
-	struct crypto_engine_ctx engine_ctx;
 	struct ocs_ecc_dev *ecc_dev;
 	const struct ecc_curve *curve;
 	u64 private_key[KMB_ECC_VLI_MAX_DIGITS];
@@ -794,8 +789,6 @@ static int kmb_ecc_tctx_init(struct ocs_ecc_ctx *tctx, unsigned int curve_id)
 	if (!tctx->curve)
 		return -EOPNOTSUPP;
 
-	tctx->engine_ctx.op.do_one_request = kmb_ocs_ecc_do_one_request;
-
 	return 0;
 }
 
@@ -828,36 +821,38 @@ static unsigned int kmb_ocs_ecdh_max_size(struct crypto_kpp *tfm)
 	return digits_to_bytes(tctx->curve->g.ndigits) * 2;
 }
 
-static struct kpp_alg ocs_ecdh_p256 = {
-	.set_secret = kmb_ocs_ecdh_set_secret,
-	.generate_public_key = kmb_ocs_ecdh_generate_public_key,
-	.compute_shared_secret = kmb_ocs_ecdh_compute_shared_secret,
-	.init = kmb_ocs_ecdh_nist_p256_init_tfm,
-	.exit = kmb_ocs_ecdh_exit_tfm,
-	.max_size = kmb_ocs_ecdh_max_size,
-	.base = {
+static struct kpp_engine_alg ocs_ecdh_p256 = {
+	.base.set_secret = kmb_ocs_ecdh_set_secret,
+	.base.generate_public_key = kmb_ocs_ecdh_generate_public_key,
+	.base.compute_shared_secret = kmb_ocs_ecdh_compute_shared_secret,
+	.base.init = kmb_ocs_ecdh_nist_p256_init_tfm,
+	.base.exit = kmb_ocs_ecdh_exit_tfm,
+	.base.max_size = kmb_ocs_ecdh_max_size,
+	.base.base = {
 		.cra_name = "ecdh-nist-p256",
 		.cra_driver_name = "ecdh-nist-p256-keembay-ocs",
 		.cra_priority = KMB_OCS_ECC_PRIORITY,
 		.cra_module = THIS_MODULE,
 		.cra_ctxsize = sizeof(struct ocs_ecc_ctx),
 	},
+	.op.do_one_request = kmb_ocs_ecc_do_one_request,
 };
 
-static struct kpp_alg ocs_ecdh_p384 = {
-	.set_secret = kmb_ocs_ecdh_set_secret,
-	.generate_public_key = kmb_ocs_ecdh_generate_public_key,
-	.compute_shared_secret = kmb_ocs_ecdh_compute_shared_secret,
-	.init = kmb_ocs_ecdh_nist_p384_init_tfm,
-	.exit = kmb_ocs_ecdh_exit_tfm,
-	.max_size = kmb_ocs_ecdh_max_size,
-	.base = {
+static struct kpp_engine_alg ocs_ecdh_p384 = {
+	.base.set_secret = kmb_ocs_ecdh_set_secret,
+	.base.generate_public_key = kmb_ocs_ecdh_generate_public_key,
+	.base.compute_shared_secret = kmb_ocs_ecdh_compute_shared_secret,
+	.base.init = kmb_ocs_ecdh_nist_p384_init_tfm,
+	.base.exit = kmb_ocs_ecdh_exit_tfm,
+	.base.max_size = kmb_ocs_ecdh_max_size,
+	.base.base = {
 		.cra_name = "ecdh-nist-p384",
 		.cra_driver_name = "ecdh-nist-p384-keembay-ocs",
 		.cra_priority = KMB_OCS_ECC_PRIORITY,
 		.cra_module = THIS_MODULE,
 		.cra_ctxsize = sizeof(struct ocs_ecc_ctx),
 	},
+	.op.do_one_request = kmb_ocs_ecc_do_one_request,
 };
 
 static irqreturn_t ocs_ecc_irq_handler(int irq, void *dev_id)
@@ -939,14 +934,14 @@ static int kmb_ocs_ecc_probe(struct platform_device *pdev)
 	}
 
 	/* Register the KPP algo. */
-	rc = crypto_register_kpp(&ocs_ecdh_p256);
+	rc = crypto_engine_register_kpp(&ocs_ecdh_p256);
 	if (rc) {
 		dev_err(dev,
 			"Could not register OCS algorithms with Crypto API\n");
 		goto cleanup;
 	}
 
-	rc = crypto_register_kpp(&ocs_ecdh_p384);
+	rc = crypto_engine_register_kpp(&ocs_ecdh_p384);
 	if (rc) {
 		dev_err(dev,
 			"Could not register OCS algorithms with Crypto API\n");
@@ -956,7 +951,7 @@ static int kmb_ocs_ecc_probe(struct platform_device *pdev)
 	return 0;
 
 ocs_ecdh_p384_error:
-	crypto_unregister_kpp(&ocs_ecdh_p256);
+	crypto_engine_unregister_kpp(&ocs_ecdh_p256);
 
 cleanup:
 	crypto_engine_exit(ecc_dev->engine);
@@ -975,8 +970,8 @@ static int kmb_ocs_ecc_remove(struct platform_device *pdev)
 
 	ecc_dev = platform_get_drvdata(pdev);
 
-	crypto_unregister_kpp(&ocs_ecdh_p384);
-	crypto_unregister_kpp(&ocs_ecdh_p256);
+	crypto_engine_unregister_kpp(&ocs_ecdh_p384);
+	crypto_engine_unregister_kpp(&ocs_ecdh_p256);
 
 	spin_lock(&ocs_ecc.lock);
 	list_del(&ecc_dev->list);
diff --git a/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c b/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c
index 51a6de6294cb..57a20281ead8 100644
--- a/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c
+++ b/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c
@@ -5,19 +5,20 @@
  * Copyright (C) 2018-2020 Intel Corporation
  */
 
-#include <linux/completion.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/of_device.h>
-
 #include <crypto/engine.h>
+#include <crypto/hmac.h>
+#include <crypto/internal/hash.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/sha2.h>
 #include <crypto/sm3.h>
-#include <crypto/hmac.h>
-#include <crypto/internal/hash.h>
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/string.h>
 
 #include "ocs-hcu.h"
 
@@ -34,7 +35,6 @@
 
 /**
  * struct ocs_hcu_ctx: OCS HCU Transform context.
- * @engine_ctx:	 Crypto Engine context.
  * @hcu_dev:	 The OCS HCU device used by the transformation.
  * @key:	 The key (used only for HMAC transformations).
  * @key_len:	 The length of the key.
@@ -42,7 +42,6 @@
  * @is_hmac_tfm: Whether or not this is a HMAC transformation.
  */
 struct ocs_hcu_ctx {
-	struct crypto_engine_ctx engine_ctx;
 	struct ocs_hcu_dev *hcu_dev;
 	u8 key[SHA512_BLOCK_SIZE];
 	size_t key_len;
@@ -824,11 +823,6 @@ static void __cra_init(struct crypto_tfm *tfm, struct ocs_hcu_ctx *ctx)
 {
 	crypto_ahash_set_reqsize_dma(__crypto_ahash_cast(tfm),
 				     sizeof(struct ocs_hcu_rctx));
-
-	/* Init context to 0. */
-	memzero_explicit(ctx, sizeof(*ctx));
-	/* Set engine ops. */
-	ctx->engine_ctx.op.do_one_request = kmb_ocs_hcu_do_one_request;
 }
 
 static int kmb_ocs_hcu_sha_cra_init(struct crypto_tfm *tfm)
@@ -883,17 +877,17 @@ static void kmb_ocs_hcu_hmac_cra_exit(struct crypto_tfm *tfm)
 	memzero_explicit(ctx->key, sizeof(ctx->key));
 }
 
-static struct ahash_alg ocs_hcu_algs[] = {
+static struct ahash_engine_alg ocs_hcu_algs[] = {
 #ifdef CONFIG_CRYPTO_DEV_KEEMBAY_OCS_HCU_HMAC_SHA224
 {
-	.init		= kmb_ocs_hcu_init,
-	.update		= kmb_ocs_hcu_update,
-	.final		= kmb_ocs_hcu_final,
-	.finup		= kmb_ocs_hcu_finup,
-	.digest		= kmb_ocs_hcu_digest,
-	.export		= kmb_ocs_hcu_export,
-	.import		= kmb_ocs_hcu_import,
-	.halg = {
+	.base.init		= kmb_ocs_hcu_init,
+	.base.update		= kmb_ocs_hcu_update,
+	.base.final		= kmb_ocs_hcu_final,
+	.base.finup		= kmb_ocs_hcu_finup,
+	.base.digest		= kmb_ocs_hcu_digest,
+	.base.export		= kmb_ocs_hcu_export,
+	.base.import		= kmb_ocs_hcu_import,
+	.base.halg = {
 		.digestsize	= SHA224_DIGEST_SIZE,
 		.statesize	= sizeof(struct ocs_hcu_rctx),
 		.base	= {
@@ -907,18 +901,19 @@ static struct ahash_alg ocs_hcu_algs[] = {
 			.cra_module		= THIS_MODULE,
 			.cra_init		= kmb_ocs_hcu_sha_cra_init,
 		}
-	}
+	},
+	.op.do_one_request = kmb_ocs_hcu_do_one_request,
 },
 {
-	.init		= kmb_ocs_hcu_init,
-	.update		= kmb_ocs_hcu_update,
-	.final		= kmb_ocs_hcu_final,
-	.finup		= kmb_ocs_hcu_finup,
-	.digest		= kmb_ocs_hcu_digest,
-	.export		= kmb_ocs_hcu_export,
-	.import		= kmb_ocs_hcu_import,
-	.setkey		= kmb_ocs_hcu_setkey,
-	.halg = {
+	.base.init		= kmb_ocs_hcu_init,
+	.base.update		= kmb_ocs_hcu_update,
+	.base.final		= kmb_ocs_hcu_final,
+	.base.finup		= kmb_ocs_hcu_finup,
+	.base.digest		= kmb_ocs_hcu_digest,
+	.base.export		= kmb_ocs_hcu_export,
+	.base.import		= kmb_ocs_hcu_import,
+	.base.setkey		= kmb_ocs_hcu_setkey,
+	.base.halg = {
 		.digestsize	= SHA224_DIGEST_SIZE,
 		.statesize	= sizeof(struct ocs_hcu_rctx),
 		.base	= {
@@ -933,18 +928,19 @@ static struct ahash_alg ocs_hcu_algs[] = {
 			.cra_init		= kmb_ocs_hcu_hmac_cra_init,
 			.cra_exit		= kmb_ocs_hcu_hmac_cra_exit,
 		}
-	}
+	},
+	.op.do_one_request = kmb_ocs_hcu_do_one_request,
 },
 #endif /* CONFIG_CRYPTO_DEV_KEEMBAY_OCS_HCU_HMAC_SHA224 */
 {
-	.init		= kmb_ocs_hcu_init,
-	.update		= kmb_ocs_hcu_update,
-	.final		= kmb_ocs_hcu_final,
-	.finup		= kmb_ocs_hcu_finup,
-	.digest		= kmb_ocs_hcu_digest,
-	.export		= kmb_ocs_hcu_export,
-	.import		= kmb_ocs_hcu_import,
-	.halg = {
+	.base.init		= kmb_ocs_hcu_init,
+	.base.update		= kmb_ocs_hcu_update,
+	.base.final		= kmb_ocs_hcu_final,
+	.base.finup		= kmb_ocs_hcu_finup,
+	.base.digest		= kmb_ocs_hcu_digest,
+	.base.export		= kmb_ocs_hcu_export,
+	.base.import		= kmb_ocs_hcu_import,
+	.base.halg = {
 		.digestsize	= SHA256_DIGEST_SIZE,
 		.statesize	= sizeof(struct ocs_hcu_rctx),
 		.base	= {
@@ -958,18 +954,19 @@ static struct ahash_alg ocs_hcu_algs[] = {
 			.cra_module		= THIS_MODULE,
 			.cra_init		= kmb_ocs_hcu_sha_cra_init,
 		}
-	}
+	},
+	.op.do_one_request = kmb_ocs_hcu_do_one_request,
 },
 {
-	.init		= kmb_ocs_hcu_init,
-	.update		= kmb_ocs_hcu_update,
-	.final		= kmb_ocs_hcu_final,
-	.finup		= kmb_ocs_hcu_finup,
-	.digest		= kmb_ocs_hcu_digest,
-	.export		= kmb_ocs_hcu_export,
-	.import		= kmb_ocs_hcu_import,
-	.setkey		= kmb_ocs_hcu_setkey,
-	.halg = {
+	.base.init		= kmb_ocs_hcu_init,
+	.base.update		= kmb_ocs_hcu_update,
+	.base.final		= kmb_ocs_hcu_final,
+	.base.finup		= kmb_ocs_hcu_finup,
+	.base.digest		= kmb_ocs_hcu_digest,
+	.base.export		= kmb_ocs_hcu_export,
+	.base.import		= kmb_ocs_hcu_import,
+	.base.setkey		= kmb_ocs_hcu_setkey,
+	.base.halg = {
 		.digestsize	= SHA256_DIGEST_SIZE,
 		.statesize	= sizeof(struct ocs_hcu_rctx),
 		.base	= {
@@ -984,17 +981,18 @@ static struct ahash_alg ocs_hcu_algs[] = {
 			.cra_init		= kmb_ocs_hcu_hmac_cra_init,
 			.cra_exit		= kmb_ocs_hcu_hmac_cra_exit,
 		}
-	}
+	},
+	.op.do_one_request = kmb_ocs_hcu_do_one_request,
 },
 {
-	.init		= kmb_ocs_hcu_init,
-	.update		= kmb_ocs_hcu_update,
-	.final		= kmb_ocs_hcu_final,
-	.finup		= kmb_ocs_hcu_finup,
-	.digest		= kmb_ocs_hcu_digest,
-	.export		= kmb_ocs_hcu_export,
-	.import		= kmb_ocs_hcu_import,
-	.halg = {
+	.base.init		= kmb_ocs_hcu_init,
+	.base.update		= kmb_ocs_hcu_update,
+	.base.final		= kmb_ocs_hcu_final,
+	.base.finup		= kmb_ocs_hcu_finup,
+	.base.digest		= kmb_ocs_hcu_digest,
+	.base.export		= kmb_ocs_hcu_export,
+	.base.import		= kmb_ocs_hcu_import,
+	.base.halg = {
 		.digestsize	= SM3_DIGEST_SIZE,
 		.statesize	= sizeof(struct ocs_hcu_rctx),
 		.base	= {
@@ -1008,18 +1006,19 @@ static struct ahash_alg ocs_hcu_algs[] = {
 			.cra_module		= THIS_MODULE,
 			.cra_init		= kmb_ocs_hcu_sm3_cra_init,
 		}
-	}
+	},
+	.op.do_one_request = kmb_ocs_hcu_do_one_request,
 },
 {
-	.init		= kmb_ocs_hcu_init,
-	.update		= kmb_ocs_hcu_update,
-	.final		= kmb_ocs_hcu_final,
-	.finup		= kmb_ocs_hcu_finup,
-	.digest		= kmb_ocs_hcu_digest,
-	.export		= kmb_ocs_hcu_export,
-	.import		= kmb_ocs_hcu_import,
-	.setkey		= kmb_ocs_hcu_setkey,
-	.halg = {
+	.base.init		= kmb_ocs_hcu_init,
+	.base.update		= kmb_ocs_hcu_update,
+	.base.final		= kmb_ocs_hcu_final,
+	.base.finup		= kmb_ocs_hcu_finup,
+	.base.digest		= kmb_ocs_hcu_digest,
+	.base.export		= kmb_ocs_hcu_export,
+	.base.import		= kmb_ocs_hcu_import,
+	.base.setkey		= kmb_ocs_hcu_setkey,
+	.base.halg = {
 		.digestsize	= SM3_DIGEST_SIZE,
 		.statesize	= sizeof(struct ocs_hcu_rctx),
 		.base	= {
@@ -1034,17 +1033,18 @@ static struct ahash_alg ocs_hcu_algs[] = {
 			.cra_init		= kmb_ocs_hcu_hmac_sm3_cra_init,
 			.cra_exit		= kmb_ocs_hcu_hmac_cra_exit,
 		}
-	}
+	},
+	.op.do_one_request = kmb_ocs_hcu_do_one_request,
 },
 {
-	.init		= kmb_ocs_hcu_init,
-	.update		= kmb_ocs_hcu_update,
-	.final		= kmb_ocs_hcu_final,
-	.finup		= kmb_ocs_hcu_finup,
-	.digest		= kmb_ocs_hcu_digest,
-	.export		= kmb_ocs_hcu_export,
-	.import		= kmb_ocs_hcu_import,
-	.halg = {
+	.base.init		= kmb_ocs_hcu_init,
+	.base.update		= kmb_ocs_hcu_update,
+	.base.final		= kmb_ocs_hcu_final,
+	.base.finup		= kmb_ocs_hcu_finup,
+	.base.digest		= kmb_ocs_hcu_digest,
+	.base.export		= kmb_ocs_hcu_export,
+	.base.import		= kmb_ocs_hcu_import,
+	.base.halg = {
 		.digestsize	= SHA384_DIGEST_SIZE,
 		.statesize	= sizeof(struct ocs_hcu_rctx),
 		.base	= {
@@ -1058,18 +1058,19 @@ static struct ahash_alg ocs_hcu_algs[] = {
 			.cra_module		= THIS_MODULE,
 			.cra_init		= kmb_ocs_hcu_sha_cra_init,
 		}
-	}
+	},
+	.op.do_one_request = kmb_ocs_hcu_do_one_request,
 },
 {
-	.init		= kmb_ocs_hcu_init,
-	.update		= kmb_ocs_hcu_update,
-	.final		= kmb_ocs_hcu_final,
-	.finup		= kmb_ocs_hcu_finup,
-	.digest		= kmb_ocs_hcu_digest,
-	.export		= kmb_ocs_hcu_export,
-	.import		= kmb_ocs_hcu_import,
-	.setkey		= kmb_ocs_hcu_setkey,
-	.halg = {
+	.base.init		= kmb_ocs_hcu_init,
+	.base.update		= kmb_ocs_hcu_update,
+	.base.final		= kmb_ocs_hcu_final,
+	.base.finup		= kmb_ocs_hcu_finup,
+	.base.digest		= kmb_ocs_hcu_digest,
+	.base.export		= kmb_ocs_hcu_export,
+	.base.import		= kmb_ocs_hcu_import,
+	.base.setkey		= kmb_ocs_hcu_setkey,
+	.base.halg = {
 		.digestsize	= SHA384_DIGEST_SIZE,
 		.statesize	= sizeof(struct ocs_hcu_rctx),
 		.base	= {
@@ -1084,17 +1085,18 @@ static struct ahash_alg ocs_hcu_algs[] = {
 			.cra_init		= kmb_ocs_hcu_hmac_cra_init,
 			.cra_exit		= kmb_ocs_hcu_hmac_cra_exit,
 		}
-	}
+	},
+	.op.do_one_request = kmb_ocs_hcu_do_one_request,
 },
 {
-	.init		= kmb_ocs_hcu_init,
-	.update		= kmb_ocs_hcu_update,
-	.final		= kmb_ocs_hcu_final,
-	.finup		= kmb_ocs_hcu_finup,
-	.digest		= kmb_ocs_hcu_digest,
-	.export		= kmb_ocs_hcu_export,
-	.import		= kmb_ocs_hcu_import,
-	.halg = {
+	.base.init		= kmb_ocs_hcu_init,
+	.base.update		= kmb_ocs_hcu_update,
+	.base.final		= kmb_ocs_hcu_final,
+	.base.finup		= kmb_ocs_hcu_finup,
+	.base.digest		= kmb_ocs_hcu_digest,
+	.base.export		= kmb_ocs_hcu_export,
+	.base.import		= kmb_ocs_hcu_import,
+	.base.halg = {
 		.digestsize	= SHA512_DIGEST_SIZE,
 		.statesize	= sizeof(struct ocs_hcu_rctx),
 		.base	= {
@@ -1108,18 +1110,19 @@ static struct ahash_alg ocs_hcu_algs[] = {
 			.cra_module		= THIS_MODULE,
 			.cra_init		= kmb_ocs_hcu_sha_cra_init,
 		}
-	}
+	},
+	.op.do_one_request = kmb_ocs_hcu_do_one_request,
 },
 {
-	.init		= kmb_ocs_hcu_init,
-	.update		= kmb_ocs_hcu_update,
-	.final		= kmb_ocs_hcu_final,
-	.finup		= kmb_ocs_hcu_finup,
-	.digest		= kmb_ocs_hcu_digest,
-	.export		= kmb_ocs_hcu_export,
-	.import		= kmb_ocs_hcu_import,
-	.setkey		= kmb_ocs_hcu_setkey,
-	.halg = {
+	.base.init		= kmb_ocs_hcu_init,
+	.base.update		= kmb_ocs_hcu_update,
+	.base.final		= kmb_ocs_hcu_final,
+	.base.finup		= kmb_ocs_hcu_finup,
+	.base.digest		= kmb_ocs_hcu_digest,
+	.base.export		= kmb_ocs_hcu_export,
+	.base.import		= kmb_ocs_hcu_import,
+	.base.setkey		= kmb_ocs_hcu_setkey,
+	.base.halg = {
 		.digestsize	= SHA512_DIGEST_SIZE,
 		.statesize	= sizeof(struct ocs_hcu_rctx),
 		.base	= {
@@ -1134,7 +1137,8 @@ static struct ahash_alg ocs_hcu_algs[] = {
 			.cra_init		= kmb_ocs_hcu_hmac_cra_init,
 			.cra_exit		= kmb_ocs_hcu_hmac_cra_exit,
 		}
-	}
+	},
+	.op.do_one_request = kmb_ocs_hcu_do_one_request,
 },
 };
 
@@ -1155,7 +1159,7 @@ static int kmb_ocs_hcu_remove(struct platform_device *pdev)
 	if (!hcu_dev)
 		return -ENODEV;
 
-	crypto_unregister_ahashes(ocs_hcu_algs, ARRAY_SIZE(ocs_hcu_algs));
+	crypto_engine_unregister_ahashes(ocs_hcu_algs, ARRAY_SIZE(ocs_hcu_algs));
 
 	rc = crypto_engine_exit(hcu_dev->engine);
 
@@ -1223,7 +1227,7 @@ static int kmb_ocs_hcu_probe(struct platform_device *pdev)
 
 	/* Security infrastructure guarantees OCS clock is enabled. */
 
-	rc = crypto_register_ahashes(ocs_hcu_algs, ARRAY_SIZE(ocs_hcu_algs));
+	rc = crypto_engine_register_ahashes(ocs_hcu_algs, ARRAY_SIZE(ocs_hcu_algs));
 	if (rc) {
 		dev_err(dev, "Could not register algorithms.\n");
 		goto cleanup;

From 03906fba750ed468b8ea3484afc650454ce45bcc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:55:08 +0800
Subject: [PATCH 1348/2047] crypto: omap - Use new crypto_engine_op interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/omap-aes-gcm.c |  25 ++--
 drivers/crypto/omap-aes.c     | 210 ++++++++++++++------------
 drivers/crypto/omap-aes.h     |  15 +-
 drivers/crypto/omap-des.c     | 181 +++++++++++-----------
 drivers/crypto/omap-sham.c    | 274 +++++++++++++++++-----------------
 5 files changed, 365 insertions(+), 340 deletions(-)

diff --git a/drivers/crypto/omap-aes-gcm.c b/drivers/crypto/omap-aes-gcm.c
index d02363e976e7..c498950402e8 100644
--- a/drivers/crypto/omap-aes-gcm.c
+++ b/drivers/crypto/omap-aes-gcm.c
@@ -7,18 +7,21 @@
  * Copyright (c) 2016 Texas Instruments Incorporated
  */
 
-#include <linux/errno.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-#include <linux/dmaengine.h>
-#include <linux/omap-dma.h>
-#include <linux/interrupt.h>
-#include <linux/pm_runtime.h>
 #include <crypto/aes.h>
+#include <crypto/engine.h>
 #include <crypto/gcm.h>
+#include <crypto/internal/aead.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/skcipher.h>
-#include <crypto/internal/aead.h>
+#include <linux/errno.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/omap-dma.h>
+#include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
+#include <linux/string.h>
 
 #include "omap-crypto.h"
 #include "omap-aes.h"
@@ -354,7 +357,7 @@ int omap_aes_4106gcm_setauthsize(struct crypto_aead *parent,
 	return crypto_rfc4106_check_authsize(authsize);
 }
 
-static int omap_aes_gcm_crypt_req(struct crypto_engine *engine, void *areq)
+int omap_aes_gcm_crypt_req(struct crypto_engine *engine, void *areq)
 {
 	struct aead_request *req = container_of(areq, struct aead_request,
 						base);
@@ -379,10 +382,6 @@ static int omap_aes_gcm_crypt_req(struct crypto_engine *engine, void *areq)
 
 int omap_aes_gcm_cra_init(struct crypto_aead *tfm)
 {
-	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
-
-	ctx->enginectx.op.do_one_request = omap_aes_gcm_crypt_req;
-
 	crypto_aead_set_reqsize(tfm, sizeof(struct omap_aes_reqctx));
 
 	return 0;
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index ad0d8db086db..ea1331218105 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -33,6 +33,7 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/scatterlist.h>
+#include <linux/string.h>
 
 #include "omap-crypto.h"
 #include "omap-aes.h"
@@ -638,8 +639,6 @@ static int omap_aes_init_tfm(struct crypto_skcipher *tfm)
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct omap_aes_reqctx) +
 					 crypto_skcipher_reqsize(blk));
 
-	ctx->enginectx.op.do_one_request = omap_aes_crypt_req;
-
 	return 0;
 }
 
@@ -655,68 +654,77 @@ static void omap_aes_exit_tfm(struct crypto_skcipher *tfm)
 
 /* ********************** ALGS ************************************ */
 
-static struct skcipher_alg algs_ecb_cbc[] = {
+static struct skcipher_engine_alg algs_ecb_cbc[] = {
 {
-	.base.cra_name		= "ecb(aes)",
-	.base.cra_driver_name	= "ecb-aes-omap",
-	.base.cra_priority	= 300,
-	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_NEED_FALLBACK,
-	.base.cra_blocksize	= AES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct omap_aes_ctx),
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "ecb(aes)",
+		.base.cra_driver_name	= "ecb-aes-omap",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+					  CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_NEED_FALLBACK,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct omap_aes_ctx),
+		.base.cra_module	= THIS_MODULE,
 
-	.min_keysize		= AES_MIN_KEY_SIZE,
-	.max_keysize		= AES_MAX_KEY_SIZE,
-	.setkey			= omap_aes_setkey,
-	.encrypt		= omap_aes_ecb_encrypt,
-	.decrypt		= omap_aes_ecb_decrypt,
-	.init			= omap_aes_init_tfm,
-	.exit			= omap_aes_exit_tfm,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.setkey			= omap_aes_setkey,
+		.encrypt		= omap_aes_ecb_encrypt,
+		.decrypt		= omap_aes_ecb_decrypt,
+		.init			= omap_aes_init_tfm,
+		.exit			= omap_aes_exit_tfm,
+	},
+	.op.do_one_request = omap_aes_crypt_req,
 },
 {
-	.base.cra_name		= "cbc(aes)",
-	.base.cra_driver_name	= "cbc-aes-omap",
-	.base.cra_priority	= 300,
-	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_NEED_FALLBACK,
-	.base.cra_blocksize	= AES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct omap_aes_ctx),
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "cbc(aes)",
+		.base.cra_driver_name	= "cbc-aes-omap",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+					  CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_NEED_FALLBACK,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct omap_aes_ctx),
+		.base.cra_module	= THIS_MODULE,
 
-	.min_keysize		= AES_MIN_KEY_SIZE,
-	.max_keysize		= AES_MAX_KEY_SIZE,
-	.ivsize			= AES_BLOCK_SIZE,
-	.setkey			= omap_aes_setkey,
-	.encrypt		= omap_aes_cbc_encrypt,
-	.decrypt		= omap_aes_cbc_decrypt,
-	.init			= omap_aes_init_tfm,
-	.exit			= omap_aes_exit_tfm,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+		.setkey			= omap_aes_setkey,
+		.encrypt		= omap_aes_cbc_encrypt,
+		.decrypt		= omap_aes_cbc_decrypt,
+		.init			= omap_aes_init_tfm,
+		.exit			= omap_aes_exit_tfm,
+	},
+	.op.do_one_request = omap_aes_crypt_req,
 }
 };
 
-static struct skcipher_alg algs_ctr[] = {
+static struct skcipher_engine_alg algs_ctr[] = {
 {
-	.base.cra_name		= "ctr(aes)",
-	.base.cra_driver_name	= "ctr-aes-omap",
-	.base.cra_priority	= 300,
-	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_NEED_FALLBACK,
-	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct omap_aes_ctx),
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "ctr(aes)",
+		.base.cra_driver_name	= "ctr-aes-omap",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+					  CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_NEED_FALLBACK,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct omap_aes_ctx),
+		.base.cra_module	= THIS_MODULE,
 
-	.min_keysize		= AES_MIN_KEY_SIZE,
-	.max_keysize		= AES_MAX_KEY_SIZE,
-	.ivsize			= AES_BLOCK_SIZE,
-	.setkey			= omap_aes_setkey,
-	.encrypt		= omap_aes_ctr_encrypt,
-	.decrypt		= omap_aes_ctr_decrypt,
-	.init			= omap_aes_init_tfm,
-	.exit			= omap_aes_exit_tfm,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+		.setkey			= omap_aes_setkey,
+		.encrypt		= omap_aes_ctr_encrypt,
+		.decrypt		= omap_aes_ctr_decrypt,
+		.init			= omap_aes_init_tfm,
+		.exit			= omap_aes_exit_tfm,
+	},
+	.op.do_one_request = omap_aes_crypt_req,
 }
 };
 
@@ -727,46 +735,52 @@ static struct omap_aes_algs_info omap_aes_algs_info_ecb_cbc[] = {
 	},
 };
 
-static struct aead_alg algs_aead_gcm[] = {
+static struct aead_engine_alg algs_aead_gcm[] = {
 {
 	.base = {
-		.cra_name		= "gcm(aes)",
-		.cra_driver_name	= "gcm-aes-omap",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_ASYNC |
-					  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.cra_blocksize		= 1,
-		.cra_ctxsize		= sizeof(struct omap_aes_gcm_ctx),
-		.cra_alignmask		= 0xf,
-		.cra_module		= THIS_MODULE,
+		.base = {
+			.cra_name		= "gcm(aes)",
+			.cra_driver_name	= "gcm-aes-omap",
+			.cra_priority		= 300,
+			.cra_flags		= CRYPTO_ALG_ASYNC |
+						  CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize		= 1,
+			.cra_ctxsize		= sizeof(struct omap_aes_gcm_ctx),
+			.cra_alignmask		= 0xf,
+			.cra_module		= THIS_MODULE,
+		},
+		.init		= omap_aes_gcm_cra_init,
+		.ivsize		= GCM_AES_IV_SIZE,
+		.maxauthsize	= AES_BLOCK_SIZE,
+		.setkey		= omap_aes_gcm_setkey,
+		.setauthsize	= omap_aes_gcm_setauthsize,
+		.encrypt	= omap_aes_gcm_encrypt,
+		.decrypt	= omap_aes_gcm_decrypt,
 	},
-	.init		= omap_aes_gcm_cra_init,
-	.ivsize		= GCM_AES_IV_SIZE,
-	.maxauthsize	= AES_BLOCK_SIZE,
-	.setkey		= omap_aes_gcm_setkey,
-	.setauthsize	= omap_aes_gcm_setauthsize,
-	.encrypt	= omap_aes_gcm_encrypt,
-	.decrypt	= omap_aes_gcm_decrypt,
+	.op.do_one_request = omap_aes_gcm_crypt_req,
 },
 {
 	.base = {
-		.cra_name		= "rfc4106(gcm(aes))",
-		.cra_driver_name	= "rfc4106-gcm-aes-omap",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_ASYNC |
-					  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.cra_blocksize		= 1,
-		.cra_ctxsize		= sizeof(struct omap_aes_gcm_ctx),
-		.cra_alignmask		= 0xf,
-		.cra_module		= THIS_MODULE,
+		.base = {
+			.cra_name		= "rfc4106(gcm(aes))",
+			.cra_driver_name	= "rfc4106-gcm-aes-omap",
+			.cra_priority		= 300,
+			.cra_flags		= CRYPTO_ALG_ASYNC |
+						  CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize		= 1,
+			.cra_ctxsize		= sizeof(struct omap_aes_gcm_ctx),
+			.cra_alignmask		= 0xf,
+			.cra_module		= THIS_MODULE,
+		},
+		.init		= omap_aes_gcm_cra_init,
+		.maxauthsize	= AES_BLOCK_SIZE,
+		.ivsize		= GCM_RFC4106_IV_SIZE,
+		.setkey		= omap_aes_4106gcm_setkey,
+		.setauthsize	= omap_aes_4106gcm_setauthsize,
+		.encrypt	= omap_aes_4106gcm_encrypt,
+		.decrypt	= omap_aes_4106gcm_decrypt,
 	},
-	.init		= omap_aes_gcm_cra_init,
-	.maxauthsize	= AES_BLOCK_SIZE,
-	.ivsize		= GCM_RFC4106_IV_SIZE,
-	.setkey		= omap_aes_4106gcm_setkey,
-	.setauthsize	= omap_aes_4106gcm_setauthsize,
-	.encrypt	= omap_aes_4106gcm_encrypt,
-	.decrypt	= omap_aes_4106gcm_decrypt,
+	.op.do_one_request = omap_aes_gcm_crypt_req,
 },
 };
 
@@ -1088,8 +1102,8 @@ static int omap_aes_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct omap_aes_dev *dd;
-	struct skcipher_alg *algp;
-	struct aead_alg *aalg;
+	struct skcipher_engine_alg *algp;
+	struct aead_engine_alg *aalg;
 	struct resource res;
 	int err = -ENOMEM, i, j, irq = -1;
 	u32 reg;
@@ -1182,9 +1196,9 @@ static int omap_aes_probe(struct platform_device *pdev)
 			for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
 				algp = &dd->pdata->algs_info[i].algs_list[j];
 
-				pr_debug("reg alg: %s\n", algp->base.cra_name);
+				pr_debug("reg alg: %s\n", algp->base.base.cra_name);
 
-				err = crypto_register_skcipher(algp);
+				err = crypto_engine_register_skcipher(algp);
 				if (err)
 					goto err_algs;
 
@@ -1198,9 +1212,9 @@ static int omap_aes_probe(struct platform_device *pdev)
 		for (i = 0; i < dd->pdata->aead_algs_info->size; i++) {
 			aalg = &dd->pdata->aead_algs_info->algs_list[i];
 
-			pr_debug("reg alg: %s\n", aalg->base.cra_name);
+			pr_debug("reg alg: %s\n", aalg->base.base.cra_name);
 
-			err = crypto_register_aead(aalg);
+			err = crypto_engine_register_aead(aalg);
 			if (err)
 				goto err_aead_algs;
 
@@ -1218,12 +1232,12 @@ static int omap_aes_probe(struct platform_device *pdev)
 err_aead_algs:
 	for (i = dd->pdata->aead_algs_info->registered - 1; i >= 0; i--) {
 		aalg = &dd->pdata->aead_algs_info->algs_list[i];
-		crypto_unregister_aead(aalg);
+		crypto_engine_unregister_aead(aalg);
 	}
 err_algs:
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
-			crypto_unregister_skcipher(
+			crypto_engine_unregister_skcipher(
 					&dd->pdata->algs_info[i].algs_list[j]);
 
 err_engine:
@@ -1245,7 +1259,7 @@ err_data:
 static int omap_aes_remove(struct platform_device *pdev)
 {
 	struct omap_aes_dev *dd = platform_get_drvdata(pdev);
-	struct aead_alg *aalg;
+	struct aead_engine_alg *aalg;
 	int i, j;
 
 	spin_lock_bh(&list_lock);
@@ -1254,14 +1268,14 @@ static int omap_aes_remove(struct platform_device *pdev)
 
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) {
-			crypto_unregister_skcipher(
+			crypto_engine_unregister_skcipher(
 					&dd->pdata->algs_info[i].algs_list[j]);
 			dd->pdata->algs_info[i].registered--;
 		}
 
 	for (i = dd->pdata->aead_algs_info->registered - 1; i >= 0; i--) {
 		aalg = &dd->pdata->aead_algs_info->algs_list[i];
-		crypto_unregister_aead(aalg);
+		crypto_engine_unregister_aead(aalg);
 		dd->pdata->aead_algs_info->registered--;
 	}
 
diff --git a/drivers/crypto/omap-aes.h b/drivers/crypto/omap-aes.h
index 23d073e87bb8..0f35c9164764 100644
--- a/drivers/crypto/omap-aes.h
+++ b/drivers/crypto/omap-aes.h
@@ -10,7 +10,6 @@
 #define __OMAP_AES_H__
 
 #include <crypto/aes.h>
-#include <crypto/engine.h>
 
 #define DST_MAXBURST			4
 #define DMA_MIN				(DST_MAXBURST * sizeof(u32))
@@ -93,7 +92,6 @@ struct omap_aes_gcm_result {
 };
 
 struct omap_aes_ctx {
-	struct crypto_engine_ctx enginectx;
 	int		keylen;
 	u32		key[AES_KEYSIZE_256 / sizeof(u32)];
 	u8		nonce[4];
@@ -117,15 +115,15 @@ struct omap_aes_reqctx {
 #define OMAP_AES_CACHE_SIZE	0
 
 struct omap_aes_algs_info {
-	struct skcipher_alg	*algs_list;
-	unsigned int		size;
-	unsigned int		registered;
+	struct skcipher_engine_alg	*algs_list;
+	unsigned int			size;
+	unsigned int			registered;
 };
 
 struct omap_aes_aead_algs {
-	struct aead_alg	*algs_list;
-	unsigned int	size;
-	unsigned int	registered;
+	struct aead_engine_alg		*algs_list;
+	unsigned int			size;
+	unsigned int			registered;
 };
 
 struct omap_aes_pdata {
@@ -218,5 +216,6 @@ int omap_aes_crypt_dma_start(struct omap_aes_dev *dd);
 int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd);
 void omap_aes_gcm_dma_out_callback(void *data);
 void omap_aes_clear_copy_flags(struct omap_aes_dev *dd);
+int omap_aes_gcm_crypt_req(struct crypto_engine *engine, void *areq);
 
 #endif
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index 29a3b4c9edaf..ae9e9e4eb94c 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -16,27 +16,25 @@
 #define prx(num)  do { } while (0)
 #endif
 
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/scatterlist.h>
+#include <crypto/engine.h>
+#include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
-#include <linux/pm_runtime.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_address.h>
-#include <linux/io.h>
-#include <linux/crypto.h>
-#include <linux/interrupt.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/internal/des.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/algapi.h>
-#include <crypto/engine.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
+#include <linux/string.h>
 
 #include "omap-crypto.h"
 
@@ -83,7 +81,6 @@
 #define FLAGS_OUT_DATA_ST_SHIFT	10
 
 struct omap_des_ctx {
-	struct crypto_engine_ctx enginectx;
 	struct omap_des_dev *dd;
 
 	int		keylen;
@@ -99,9 +96,9 @@ struct omap_des_reqctx {
 #define OMAP_DES_CACHE_SIZE	0
 
 struct omap_des_algs_info {
-	struct skcipher_alg	*algs_list;
-	unsigned int		size;
-	unsigned int		registered;
+	struct skcipher_engine_alg	*algs_list;
+	unsigned int			size;
+	unsigned int			registered;
 };
 
 struct omap_des_pdata {
@@ -707,89 +704,97 @@ static int omap_des_cbc_decrypt(struct skcipher_request *req)
 
 static int omap_des_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct omap_des_ctx *ctx = crypto_skcipher_ctx(tfm);
-
 	pr_debug("enter\n");
 
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct omap_des_reqctx));
 
-	ctx->enginectx.op.do_one_request = omap_des_crypt_req;
-
 	return 0;
 }
 
 /* ********************** ALGS ************************************ */
 
-static struct skcipher_alg algs_ecb_cbc[] = {
+static struct skcipher_engine_alg algs_ecb_cbc[] = {
 {
-	.base.cra_name		= "ecb(des)",
-	.base.cra_driver_name	= "ecb-des-omap",
-	.base.cra_priority	= 300,
-	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC,
-	.base.cra_blocksize	= DES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct omap_des_ctx),
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "ecb(des)",
+		.base.cra_driver_name	= "ecb-des-omap",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+					  CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct omap_des_ctx),
+		.base.cra_module	= THIS_MODULE,
 
-	.min_keysize		= DES_KEY_SIZE,
-	.max_keysize		= DES_KEY_SIZE,
-	.setkey			= omap_des_setkey,
-	.encrypt		= omap_des_ecb_encrypt,
-	.decrypt		= omap_des_ecb_decrypt,
-	.init			= omap_des_init_tfm,
+		.min_keysize		= DES_KEY_SIZE,
+		.max_keysize		= DES_KEY_SIZE,
+		.setkey			= omap_des_setkey,
+		.encrypt		= omap_des_ecb_encrypt,
+		.decrypt		= omap_des_ecb_decrypt,
+		.init			= omap_des_init_tfm,
+	},
+	.op.do_one_request = omap_des_crypt_req,
 },
 {
-	.base.cra_name		= "cbc(des)",
-	.base.cra_driver_name	= "cbc-des-omap",
-	.base.cra_priority	= 300,
-	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC,
-	.base.cra_blocksize	= DES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct omap_des_ctx),
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "cbc(des)",
+		.base.cra_driver_name	= "cbc-des-omap",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+					  CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct omap_des_ctx),
+		.base.cra_module	= THIS_MODULE,
 
-	.min_keysize		= DES_KEY_SIZE,
-	.max_keysize		= DES_KEY_SIZE,
-	.ivsize			= DES_BLOCK_SIZE,
-	.setkey			= omap_des_setkey,
-	.encrypt		= omap_des_cbc_encrypt,
-	.decrypt		= omap_des_cbc_decrypt,
-	.init			= omap_des_init_tfm,
+		.min_keysize		= DES_KEY_SIZE,
+		.max_keysize		= DES_KEY_SIZE,
+		.ivsize			= DES_BLOCK_SIZE,
+		.setkey			= omap_des_setkey,
+		.encrypt		= omap_des_cbc_encrypt,
+		.decrypt		= omap_des_cbc_decrypt,
+		.init			= omap_des_init_tfm,
+	},
+	.op.do_one_request = omap_des_crypt_req,
 },
 {
-	.base.cra_name		= "ecb(des3_ede)",
-	.base.cra_driver_name	= "ecb-des3-omap",
-	.base.cra_priority	= 300,
-	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC,
-	.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct omap_des_ctx),
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "ecb(des3_ede)",
+		.base.cra_driver_name	= "ecb-des3-omap",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+					  CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct omap_des_ctx),
+		.base.cra_module	= THIS_MODULE,
 
-	.min_keysize		= DES3_EDE_KEY_SIZE,
-	.max_keysize		= DES3_EDE_KEY_SIZE,
-	.setkey			= omap_des3_setkey,
-	.encrypt		= omap_des_ecb_encrypt,
-	.decrypt		= omap_des_ecb_decrypt,
-	.init			= omap_des_init_tfm,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.setkey			= omap_des3_setkey,
+		.encrypt		= omap_des_ecb_encrypt,
+		.decrypt		= omap_des_ecb_decrypt,
+		.init			= omap_des_init_tfm,
+	},
+	.op.do_one_request = omap_des_crypt_req,
 },
 {
-	.base.cra_name		= "cbc(des3_ede)",
-	.base.cra_driver_name	= "cbc-des3-omap",
-	.base.cra_priority	= 300,
-	.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC,
-	.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct omap_des_ctx),
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "cbc(des3_ede)",
+		.base.cra_driver_name	= "cbc-des3-omap",
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_ALG_KERN_DRIVER_ONLY |
+					  CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct omap_des_ctx),
+		.base.cra_module	= THIS_MODULE,
 
-	.min_keysize		= DES3_EDE_KEY_SIZE,
-	.max_keysize		= DES3_EDE_KEY_SIZE,
-	.ivsize			= DES3_EDE_BLOCK_SIZE,
-	.setkey			= omap_des3_setkey,
-	.encrypt		= omap_des_cbc_encrypt,
-	.decrypt		= omap_des_cbc_decrypt,
-	.init			= omap_des_init_tfm,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.ivsize			= DES3_EDE_BLOCK_SIZE,
+		.setkey			= omap_des3_setkey,
+		.encrypt		= omap_des_cbc_encrypt,
+		.decrypt		= omap_des_cbc_decrypt,
+		.init			= omap_des_init_tfm,
+	},
+	.op.do_one_request = omap_des_crypt_req,
 }
 };
 
@@ -947,7 +952,7 @@ static int omap_des_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct omap_des_dev *dd;
-	struct skcipher_alg *algp;
+	struct skcipher_engine_alg *algp;
 	struct resource *res;
 	int err = -ENOMEM, i, j, irq = -1;
 	u32 reg;
@@ -1035,9 +1040,9 @@ static int omap_des_probe(struct platform_device *pdev)
 		for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
 			algp = &dd->pdata->algs_info[i].algs_list[j];
 
-			pr_debug("reg alg: %s\n", algp->base.cra_name);
+			pr_debug("reg alg: %s\n", algp->base.base.cra_name);
 
-			err = crypto_register_skcipher(algp);
+			err = crypto_engine_register_skcipher(algp);
 			if (err)
 				goto err_algs;
 
@@ -1050,7 +1055,7 @@ static int omap_des_probe(struct platform_device *pdev)
 err_algs:
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
-			crypto_unregister_skcipher(
+			crypto_engine_unregister_skcipher(
 					&dd->pdata->algs_info[i].algs_list[j]);
 
 err_engine:
@@ -1080,7 +1085,7 @@ static int omap_des_remove(struct platform_device *pdev)
 
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
-			crypto_unregister_skcipher(
+			crypto_engine_unregister_skcipher(
 					&dd->pdata->algs_info[i].algs_list[j]);
 
 	tasklet_kill(&dd->done_task);
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 2ef92301969f..4b79d54fd671 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -13,34 +13,31 @@
 
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
+#include <crypto/engine.h>
+#include <crypto/hmac.h>
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
 #include <linux/err.h>
 #include <linux/device.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
-#include <linux/pm_runtime.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/delay.h>
-#include <linux/crypto.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/algapi.h>
-#include <crypto/sha1.h>
-#include <crypto/sha2.h>
-#include <crypto/hash.h>
-#include <crypto/hmac.h>
-#include <crypto/internal/hash.h>
-#include <crypto/engine.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 
 #define MD5_DIGEST_SIZE			16
 
@@ -168,7 +165,6 @@ struct omap_sham_hmac_ctx {
 };
 
 struct omap_sham_ctx {
-	struct crypto_engine_ctx	enginectx;
 	unsigned long		flags;
 
 	/* fallback stuff */
@@ -180,7 +176,7 @@ struct omap_sham_ctx {
 #define OMAP_SHAM_QUEUE_LENGTH	10
 
 struct omap_sham_algs_info {
-	struct ahash_alg	*algs_list;
+	struct ahash_engine_alg	*algs_list;
 	unsigned int		size;
 	unsigned int		registered;
 };
@@ -1353,8 +1349,6 @@ static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
 
 	}
 
-	tctx->enginectx.op.do_one_request = omap_sham_hash_one_req;
-
 	return 0;
 }
 
@@ -1425,15 +1419,15 @@ static int omap_sham_import(struct ahash_request *req, const void *in)
 	return 0;
 }
 
-static struct ahash_alg algs_sha1_md5[] = {
+static struct ahash_engine_alg algs_sha1_md5[] = {
 {
-	.init		= omap_sham_init,
-	.update		= omap_sham_update,
-	.final		= omap_sham_final,
-	.finup		= omap_sham_finup,
-	.digest		= omap_sham_digest,
-	.halg.digestsize	= SHA1_DIGEST_SIZE,
-	.halg.base	= {
+	.base.init		= omap_sham_init,
+	.base.update		= omap_sham_update,
+	.base.final		= omap_sham_final,
+	.base.finup		= omap_sham_finup,
+	.base.digest		= omap_sham_digest,
+	.base.halg.digestsize	= SHA1_DIGEST_SIZE,
+	.base.halg.base	= {
 		.cra_name		= "sha1",
 		.cra_driver_name	= "omap-sha1",
 		.cra_priority		= 400,
@@ -1446,16 +1440,17 @@ static struct ahash_alg algs_sha1_md5[] = {
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_init,
 		.cra_exit		= omap_sham_cra_exit,
-	}
+	},
+	.op.do_one_request = omap_sham_hash_one_req,
 },
 {
-	.init		= omap_sham_init,
-	.update		= omap_sham_update,
-	.final		= omap_sham_final,
-	.finup		= omap_sham_finup,
-	.digest		= omap_sham_digest,
-	.halg.digestsize	= MD5_DIGEST_SIZE,
-	.halg.base	= {
+	.base.init		= omap_sham_init,
+	.base.update		= omap_sham_update,
+	.base.final		= omap_sham_final,
+	.base.finup		= omap_sham_finup,
+	.base.digest		= omap_sham_digest,
+	.base.halg.digestsize	= MD5_DIGEST_SIZE,
+	.base.halg.base	= {
 		.cra_name		= "md5",
 		.cra_driver_name	= "omap-md5",
 		.cra_priority		= 400,
@@ -1468,17 +1463,18 @@ static struct ahash_alg algs_sha1_md5[] = {
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_init,
 		.cra_exit		= omap_sham_cra_exit,
-	}
+	},
+	.op.do_one_request = omap_sham_hash_one_req,
 },
 {
-	.init		= omap_sham_init,
-	.update		= omap_sham_update,
-	.final		= omap_sham_final,
-	.finup		= omap_sham_finup,
-	.digest		= omap_sham_digest,
-	.setkey		= omap_sham_setkey,
-	.halg.digestsize	= SHA1_DIGEST_SIZE,
-	.halg.base	= {
+	.base.init		= omap_sham_init,
+	.base.update		= omap_sham_update,
+	.base.final		= omap_sham_final,
+	.base.finup		= omap_sham_finup,
+	.base.digest		= omap_sham_digest,
+	.base.setkey		= omap_sham_setkey,
+	.base.halg.digestsize	= SHA1_DIGEST_SIZE,
+	.base.halg.base	= {
 		.cra_name		= "hmac(sha1)",
 		.cra_driver_name	= "omap-hmac-sha1",
 		.cra_priority		= 400,
@@ -1492,17 +1488,18 @@ static struct ahash_alg algs_sha1_md5[] = {
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_sha1_init,
 		.cra_exit		= omap_sham_cra_exit,
-	}
+	},
+	.op.do_one_request = omap_sham_hash_one_req,
 },
 {
-	.init		= omap_sham_init,
-	.update		= omap_sham_update,
-	.final		= omap_sham_final,
-	.finup		= omap_sham_finup,
-	.digest		= omap_sham_digest,
-	.setkey		= omap_sham_setkey,
-	.halg.digestsize	= MD5_DIGEST_SIZE,
-	.halg.base	= {
+	.base.init		= omap_sham_init,
+	.base.update		= omap_sham_update,
+	.base.final		= omap_sham_final,
+	.base.finup		= omap_sham_finup,
+	.base.digest		= omap_sham_digest,
+	.base.setkey		= omap_sham_setkey,
+	.base.halg.digestsize	= MD5_DIGEST_SIZE,
+	.base.halg.base	= {
 		.cra_name		= "hmac(md5)",
 		.cra_driver_name	= "omap-hmac-md5",
 		.cra_priority		= 400,
@@ -1516,20 +1513,21 @@ static struct ahash_alg algs_sha1_md5[] = {
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_md5_init,
 		.cra_exit		= omap_sham_cra_exit,
-	}
+	},
+	.op.do_one_request = omap_sham_hash_one_req,
 }
 };
 
 /* OMAP4 has some algs in addition to what OMAP2 has */
-static struct ahash_alg algs_sha224_sha256[] = {
+static struct ahash_engine_alg algs_sha224_sha256[] = {
 {
-	.init		= omap_sham_init,
-	.update		= omap_sham_update,
-	.final		= omap_sham_final,
-	.finup		= omap_sham_finup,
-	.digest		= omap_sham_digest,
-	.halg.digestsize	= SHA224_DIGEST_SIZE,
-	.halg.base	= {
+	.base.init		= omap_sham_init,
+	.base.update		= omap_sham_update,
+	.base.final		= omap_sham_final,
+	.base.finup		= omap_sham_finup,
+	.base.digest		= omap_sham_digest,
+	.base.halg.digestsize	= SHA224_DIGEST_SIZE,
+	.base.halg.base	= {
 		.cra_name		= "sha224",
 		.cra_driver_name	= "omap-sha224",
 		.cra_priority		= 400,
@@ -1542,16 +1540,17 @@ static struct ahash_alg algs_sha224_sha256[] = {
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_init,
 		.cra_exit		= omap_sham_cra_exit,
-	}
+	},
+	.op.do_one_request = omap_sham_hash_one_req,
 },
 {
-	.init		= omap_sham_init,
-	.update		= omap_sham_update,
-	.final		= omap_sham_final,
-	.finup		= omap_sham_finup,
-	.digest		= omap_sham_digest,
-	.halg.digestsize	= SHA256_DIGEST_SIZE,
-	.halg.base	= {
+	.base.init		= omap_sham_init,
+	.base.update		= omap_sham_update,
+	.base.final		= omap_sham_final,
+	.base.finup		= omap_sham_finup,
+	.base.digest		= omap_sham_digest,
+	.base.halg.digestsize	= SHA256_DIGEST_SIZE,
+	.base.halg.base	= {
 		.cra_name		= "sha256",
 		.cra_driver_name	= "omap-sha256",
 		.cra_priority		= 400,
@@ -1564,17 +1563,18 @@ static struct ahash_alg algs_sha224_sha256[] = {
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_init,
 		.cra_exit		= omap_sham_cra_exit,
-	}
+	},
+	.op.do_one_request = omap_sham_hash_one_req,
 },
 {
-	.init		= omap_sham_init,
-	.update		= omap_sham_update,
-	.final		= omap_sham_final,
-	.finup		= omap_sham_finup,
-	.digest		= omap_sham_digest,
-	.setkey		= omap_sham_setkey,
-	.halg.digestsize	= SHA224_DIGEST_SIZE,
-	.halg.base	= {
+	.base.init		= omap_sham_init,
+	.base.update		= omap_sham_update,
+	.base.final		= omap_sham_final,
+	.base.finup		= omap_sham_finup,
+	.base.digest		= omap_sham_digest,
+	.base.setkey		= omap_sham_setkey,
+	.base.halg.digestsize	= SHA224_DIGEST_SIZE,
+	.base.halg.base	= {
 		.cra_name		= "hmac(sha224)",
 		.cra_driver_name	= "omap-hmac-sha224",
 		.cra_priority		= 400,
@@ -1588,17 +1588,18 @@ static struct ahash_alg algs_sha224_sha256[] = {
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_sha224_init,
 		.cra_exit		= omap_sham_cra_exit,
-	}
+	},
+	.op.do_one_request = omap_sham_hash_one_req,
 },
 {
-	.init		= omap_sham_init,
-	.update		= omap_sham_update,
-	.final		= omap_sham_final,
-	.finup		= omap_sham_finup,
-	.digest		= omap_sham_digest,
-	.setkey		= omap_sham_setkey,
-	.halg.digestsize	= SHA256_DIGEST_SIZE,
-	.halg.base	= {
+	.base.init		= omap_sham_init,
+	.base.update		= omap_sham_update,
+	.base.final		= omap_sham_final,
+	.base.finup		= omap_sham_finup,
+	.base.digest		= omap_sham_digest,
+	.base.setkey		= omap_sham_setkey,
+	.base.halg.digestsize	= SHA256_DIGEST_SIZE,
+	.base.halg.base	= {
 		.cra_name		= "hmac(sha256)",
 		.cra_driver_name	= "omap-hmac-sha256",
 		.cra_priority		= 400,
@@ -1612,19 +1613,20 @@ static struct ahash_alg algs_sha224_sha256[] = {
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_sha256_init,
 		.cra_exit		= omap_sham_cra_exit,
-	}
+	},
+	.op.do_one_request = omap_sham_hash_one_req,
 },
 };
 
-static struct ahash_alg algs_sha384_sha512[] = {
+static struct ahash_engine_alg algs_sha384_sha512[] = {
 {
-	.init		= omap_sham_init,
-	.update		= omap_sham_update,
-	.final		= omap_sham_final,
-	.finup		= omap_sham_finup,
-	.digest		= omap_sham_digest,
-	.halg.digestsize	= SHA384_DIGEST_SIZE,
-	.halg.base	= {
+	.base.init		= omap_sham_init,
+	.base.update		= omap_sham_update,
+	.base.final		= omap_sham_final,
+	.base.finup		= omap_sham_finup,
+	.base.digest		= omap_sham_digest,
+	.base.halg.digestsize	= SHA384_DIGEST_SIZE,
+	.base.halg.base	= {
 		.cra_name		= "sha384",
 		.cra_driver_name	= "omap-sha384",
 		.cra_priority		= 400,
@@ -1637,16 +1639,17 @@ static struct ahash_alg algs_sha384_sha512[] = {
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_init,
 		.cra_exit		= omap_sham_cra_exit,
-	}
+	},
+	.op.do_one_request = omap_sham_hash_one_req,
 },
 {
-	.init		= omap_sham_init,
-	.update		= omap_sham_update,
-	.final		= omap_sham_final,
-	.finup		= omap_sham_finup,
-	.digest		= omap_sham_digest,
-	.halg.digestsize	= SHA512_DIGEST_SIZE,
-	.halg.base	= {
+	.base.init		= omap_sham_init,
+	.base.update		= omap_sham_update,
+	.base.final		= omap_sham_final,
+	.base.finup		= omap_sham_finup,
+	.base.digest		= omap_sham_digest,
+	.base.halg.digestsize	= SHA512_DIGEST_SIZE,
+	.base.halg.base	= {
 		.cra_name		= "sha512",
 		.cra_driver_name	= "omap-sha512",
 		.cra_priority		= 400,
@@ -1659,17 +1662,18 @@ static struct ahash_alg algs_sha384_sha512[] = {
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_init,
 		.cra_exit		= omap_sham_cra_exit,
-	}
+	},
+	.op.do_one_request = omap_sham_hash_one_req,
 },
 {
-	.init		= omap_sham_init,
-	.update		= omap_sham_update,
-	.final		= omap_sham_final,
-	.finup		= omap_sham_finup,
-	.digest		= omap_sham_digest,
-	.setkey		= omap_sham_setkey,
-	.halg.digestsize	= SHA384_DIGEST_SIZE,
-	.halg.base	= {
+	.base.init		= omap_sham_init,
+	.base.update		= omap_sham_update,
+	.base.final		= omap_sham_final,
+	.base.finup		= omap_sham_finup,
+	.base.digest		= omap_sham_digest,
+	.base.setkey		= omap_sham_setkey,
+	.base.halg.digestsize	= SHA384_DIGEST_SIZE,
+	.base.halg.base	= {
 		.cra_name		= "hmac(sha384)",
 		.cra_driver_name	= "omap-hmac-sha384",
 		.cra_priority		= 400,
@@ -1683,17 +1687,18 @@ static struct ahash_alg algs_sha384_sha512[] = {
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_sha384_init,
 		.cra_exit		= omap_sham_cra_exit,
-	}
+	},
+	.op.do_one_request = omap_sham_hash_one_req,
 },
 {
-	.init		= omap_sham_init,
-	.update		= omap_sham_update,
-	.final		= omap_sham_final,
-	.finup		= omap_sham_finup,
-	.digest		= omap_sham_digest,
-	.setkey		= omap_sham_setkey,
-	.halg.digestsize	= SHA512_DIGEST_SIZE,
-	.halg.base	= {
+	.base.init		= omap_sham_init,
+	.base.update		= omap_sham_update,
+	.base.final		= omap_sham_final,
+	.base.finup		= omap_sham_finup,
+	.base.digest		= omap_sham_digest,
+	.base.setkey		= omap_sham_setkey,
+	.base.halg.digestsize	= SHA512_DIGEST_SIZE,
+	.base.halg.base	= {
 		.cra_name		= "hmac(sha512)",
 		.cra_driver_name	= "omap-hmac-sha512",
 		.cra_priority		= 400,
@@ -1707,7 +1712,8 @@ static struct ahash_alg algs_sha384_sha512[] = {
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_sha512_init,
 		.cra_exit		= omap_sham_cra_exit,
-	}
+	},
+	.op.do_one_request = omap_sham_hash_one_req,
 },
 };
 
@@ -2148,14 +2154,16 @@ static int omap_sham_probe(struct platform_device *pdev)
 			break;
 
 		for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
+			struct ahash_engine_alg *ealg;
 			struct ahash_alg *alg;
 
-			alg = &dd->pdata->algs_info[i].algs_list[j];
+			ealg = &dd->pdata->algs_info[i].algs_list[j];
+			alg = &ealg->base;
 			alg->export = omap_sham_export;
 			alg->import = omap_sham_import;
 			alg->halg.statesize = sizeof(struct omap_sham_reqctx) +
 					      BUFLEN;
-			err = crypto_register_ahash(alg);
+			err = crypto_engine_register_ahash(ealg);
 			if (err)
 				goto err_algs;
 
@@ -2174,7 +2182,7 @@ static int omap_sham_probe(struct platform_device *pdev)
 err_algs:
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
-			crypto_unregister_ahash(
+			crypto_engine_unregister_ahash(
 					&dd->pdata->algs_info[i].algs_list[j]);
 err_engine_start:
 	crypto_engine_exit(dd->engine);
@@ -2205,7 +2213,7 @@ static int omap_sham_remove(struct platform_device *pdev)
 	spin_unlock_bh(&sham.lock);
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) {
-			crypto_unregister_ahash(
+			crypto_engine_unregister_ahash(
 					&dd->pdata->algs_info[i].algs_list[j]);
 			dd->pdata->algs_info[i].registered--;
 		}

From 1a15d26c3131b3210b7299d3864aa75b71687a16 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:55:10 +0800
Subject: [PATCH 1349/2047] crypto: rk3288 - Use new crypto_engine_op interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c       | 58 +++++++++-------
 drivers/crypto/rockchip/rk3288_crypto.h       | 21 +++---
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 67 +++++++++++--------
 .../crypto/rockchip/rk3288_crypto_skcipher.c  | 60 +++++++++++------
 4 files changed, 123 insertions(+), 83 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 9f6ba770a90a..937826c50470 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -10,14 +10,22 @@
  */
 
 #include "rk3288_crypto.h"
+#include <crypto/engine.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/clk.h>
 #include <linux/dma-mapping.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/clk.h>
-#include <linux/crypto.h>
 #include <linux/reset.h>
+#include <linux/spinlock.h>
 
 static struct rockchip_ip rocklist = {
 	.dev_list = LIST_HEAD_INIT(rocklist.dev_list),
@@ -184,7 +192,6 @@ static struct rk_crypto_tmp *rk_cipher_algs[] = {
 	&rk_ahash_md5,
 };
 
-#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
 static int rk_crypto_debugfs_show(struct seq_file *seq, void *v)
 {
 	struct rk_crypto_info *dd;
@@ -204,8 +211,8 @@ static int rk_crypto_debugfs_show(struct seq_file *seq, void *v)
 		switch (rk_cipher_algs[i]->type) {
 		case CRYPTO_ALG_TYPE_SKCIPHER:
 			seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
-				   rk_cipher_algs[i]->alg.skcipher.base.cra_driver_name,
-				   rk_cipher_algs[i]->alg.skcipher.base.cra_name,
+				   rk_cipher_algs[i]->alg.skcipher.base.base.cra_driver_name,
+				   rk_cipher_algs[i]->alg.skcipher.base.base.cra_name,
 				   rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb);
 			seq_printf(seq, "\tfallback due to length: %lu\n",
 				   rk_cipher_algs[i]->stat_fb_len);
@@ -216,8 +223,8 @@ static int rk_crypto_debugfs_show(struct seq_file *seq, void *v)
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
-				   rk_cipher_algs[i]->alg.hash.halg.base.cra_driver_name,
-				   rk_cipher_algs[i]->alg.hash.halg.base.cra_name,
+				   rk_cipher_algs[i]->alg.hash.base.halg.base.cra_driver_name,
+				   rk_cipher_algs[i]->alg.hash.base.halg.base.cra_name,
 				   rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb);
 			break;
 		}
@@ -226,17 +233,20 @@ static int rk_crypto_debugfs_show(struct seq_file *seq, void *v)
 }
 
 DEFINE_SHOW_ATTRIBUTE(rk_crypto_debugfs);
-#endif
 
 static void register_debugfs(struct rk_crypto_info *crypto_info)
 {
-#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
+	struct dentry *dbgfs_dir __maybe_unused;
+	struct dentry *dbgfs_stats __maybe_unused;
+
 	/* Ignore error of debugfs */
-	rocklist.dbgfs_dir = debugfs_create_dir("rk3288_crypto", NULL);
-	rocklist.dbgfs_stats = debugfs_create_file("stats", 0444,
-						   rocklist.dbgfs_dir,
-						   &rocklist,
-						   &rk_crypto_debugfs_fops);
+	dbgfs_dir = debugfs_create_dir("rk3288_crypto", NULL);
+	dbgfs_stats = debugfs_create_file("stats", 0444, dbgfs_dir, &rocklist,
+					  &rk_crypto_debugfs_fops);
+
+#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
+	rocklist.dbgfs_dir = dbgfs_dir;
+	rocklist.dbgfs_stats = dbgfs_stats;
 #endif
 }
 
@@ -250,15 +260,15 @@ static int rk_crypto_register(struct rk_crypto_info *crypto_info)
 		switch (rk_cipher_algs[i]->type) {
 		case CRYPTO_ALG_TYPE_SKCIPHER:
 			dev_info(crypto_info->dev, "Register %s as %s\n",
-				 rk_cipher_algs[i]->alg.skcipher.base.cra_name,
-				 rk_cipher_algs[i]->alg.skcipher.base.cra_driver_name);
-			err = crypto_register_skcipher(&rk_cipher_algs[i]->alg.skcipher);
+				 rk_cipher_algs[i]->alg.skcipher.base.base.cra_name,
+				 rk_cipher_algs[i]->alg.skcipher.base.base.cra_driver_name);
+			err = crypto_engine_register_skcipher(&rk_cipher_algs[i]->alg.skcipher);
 			break;
 		case CRYPTO_ALG_TYPE_AHASH:
 			dev_info(crypto_info->dev, "Register %s as %s\n",
-				 rk_cipher_algs[i]->alg.hash.halg.base.cra_name,
-				 rk_cipher_algs[i]->alg.hash.halg.base.cra_driver_name);
-			err = crypto_register_ahash(&rk_cipher_algs[i]->alg.hash);
+				 rk_cipher_algs[i]->alg.hash.base.halg.base.cra_name,
+				 rk_cipher_algs[i]->alg.hash.base.halg.base.cra_driver_name);
+			err = crypto_engine_register_ahash(&rk_cipher_algs[i]->alg.hash);
 			break;
 		default:
 			dev_err(crypto_info->dev, "unknown algorithm\n");
@@ -271,9 +281,9 @@ static int rk_crypto_register(struct rk_crypto_info *crypto_info)
 err_cipher_algs:
 	for (k = 0; k < i; k++) {
 		if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER)
-			crypto_unregister_skcipher(&rk_cipher_algs[k]->alg.skcipher);
+			crypto_engine_unregister_skcipher(&rk_cipher_algs[k]->alg.skcipher);
 		else
-			crypto_unregister_ahash(&rk_cipher_algs[i]->alg.hash);
+			crypto_engine_unregister_ahash(&rk_cipher_algs[i]->alg.hash);
 	}
 	return err;
 }
@@ -284,9 +294,9 @@ static void rk_crypto_unregister(void)
 
 	for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) {
 		if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER)
-			crypto_unregister_skcipher(&rk_cipher_algs[i]->alg.skcipher);
+			crypto_engine_unregister_skcipher(&rk_cipher_algs[i]->alg.skcipher);
 		else
-			crypto_unregister_ahash(&rk_cipher_algs[i]->alg.hash);
+			crypto_engine_unregister_ahash(&rk_cipher_algs[i]->alg.hash);
 	}
 }
 
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index b2695258cade..3aa03cbfb6be 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -3,21 +3,18 @@
 #define __RK3288_CRYPTO_H__
 
 #include <crypto/aes.h>
-#include <crypto/internal/des.h>
-#include <crypto/algapi.h>
-#include <linux/dma-mapping.h>
-#include <linux/interrupt.h>
-#include <linux/debugfs.h>
-#include <linux/delay.h>
-#include <linux/pm_runtime.h>
-#include <linux/scatterlist.h>
 #include <crypto/engine.h>
+#include <crypto/internal/des.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
-
 #include <crypto/md5.h>
 #include <crypto/sha1.h>
 #include <crypto/sha2.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
+#include <linux/types.h>
 
 #define _SBF(v, f)			((v) << (f))
 
@@ -231,7 +228,6 @@ struct rk_crypto_info {
 
 /* the private variable of hash */
 struct rk_ahash_ctx {
-	struct crypto_engine_ctx enginectx;
 	/* for fallback */
 	struct crypto_ahash		*fallback_tfm;
 };
@@ -246,7 +242,6 @@ struct rk_ahash_rctx {
 
 /* the private variable of cipher */
 struct rk_cipher_ctx {
-	struct crypto_engine_ctx enginectx;
 	unsigned int			keylen;
 	u8				key[AES_MAX_KEY_SIZE];
 	u8				iv[AES_BLOCK_SIZE];
@@ -264,8 +259,8 @@ struct rk_crypto_tmp {
 	u32 type;
 	struct rk_crypto_info           *dev;
 	union {
-		struct skcipher_alg	skcipher;
-		struct ahash_alg	hash;
+		struct skcipher_engine_alg skcipher;
+		struct ahash_engine_alg hash;
 	} alg;
 	unsigned long stat_req;
 	unsigned long stat_fb;
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index 1519aa0a0f7c..8c143180645e 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -8,9 +8,15 @@
  *
  * Some ideas are from marvell/cesa.c and s5p-sss.c driver.
  */
-#include <linux/device.h>
+
 #include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <linux/device.h>
+#include <linux/err.h>
 #include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
 #include "rk3288_crypto.h"
 
 /*
@@ -40,8 +46,8 @@ static int rk_ahash_digest_fb(struct ahash_request *areq)
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct rk_ahash_ctx *tfmctx = crypto_ahash_ctx(tfm);
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
-	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
+	struct ahash_alg *alg = crypto_ahash_alg(tfm);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash.base);
 
 	algt->stat_fb++;
 
@@ -254,8 +260,8 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 	struct ahash_request *areq = container_of(breq, struct ahash_request, base);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
-	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
+	struct ahash_alg *alg = crypto_ahash_alg(tfm);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash.base);
 	struct scatterlist *sg = areq->src;
 	struct rk_crypto_info *rkc = rctx->dev;
 	int err;
@@ -335,12 +341,12 @@ theend:
 	return 0;
 }
 
-static int rk_cra_hash_init(struct crypto_tfm *tfm)
+static int rk_hash_init_tfm(struct crypto_ahash *tfm)
 {
-	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm);
-	const char *alg_name = crypto_tfm_alg_name(tfm);
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
-	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
+	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
+	const char *alg_name = crypto_ahash_alg_name(tfm);
+	struct ahash_alg *alg = crypto_ahash_alg(tfm);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash.base);
 
 	/* for fallback */
 	tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0,
@@ -350,25 +356,23 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm)
 		return PTR_ERR(tctx->fallback_tfm);
 	}
 
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+	crypto_ahash_set_reqsize(tfm,
 				 sizeof(struct rk_ahash_rctx) +
 				 crypto_ahash_reqsize(tctx->fallback_tfm));
 
-	tctx->enginectx.op.do_one_request = rk_hash_run;
-
 	return 0;
 }
 
-static void rk_cra_hash_exit(struct crypto_tfm *tfm)
+static void rk_hash_exit_tfm(struct crypto_ahash *tfm)
 {
-	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm);
+	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
 
 	crypto_free_ahash(tctx->fallback_tfm);
 }
 
 struct rk_crypto_tmp rk_ahash_sha1 = {
 	.type = CRYPTO_ALG_TYPE_AHASH,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = rk_ahash_init,
 		.update = rk_ahash_update,
 		.final = rk_ahash_final,
@@ -376,6 +380,8 @@ struct rk_crypto_tmp rk_ahash_sha1 = {
 		.export = rk_ahash_export,
 		.import = rk_ahash_import,
 		.digest = rk_ahash_digest,
+		.init_tfm = rk_hash_init_tfm,
+		.exit_tfm = rk_hash_exit_tfm,
 		.halg = {
 			 .digestsize = SHA1_DIGEST_SIZE,
 			 .statesize = sizeof(struct sha1_state),
@@ -388,17 +394,18 @@ struct rk_crypto_tmp rk_ahash_sha1 = {
 				  .cra_blocksize = SHA1_BLOCK_SIZE,
 				  .cra_ctxsize = sizeof(struct rk_ahash_ctx),
 				  .cra_alignmask = 3,
-				  .cra_init = rk_cra_hash_init,
-				  .cra_exit = rk_cra_hash_exit,
 				  .cra_module = THIS_MODULE,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = rk_hash_run,
+	},
 };
 
 struct rk_crypto_tmp rk_ahash_sha256 = {
 	.type = CRYPTO_ALG_TYPE_AHASH,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = rk_ahash_init,
 		.update = rk_ahash_update,
 		.final = rk_ahash_final,
@@ -406,6 +413,8 @@ struct rk_crypto_tmp rk_ahash_sha256 = {
 		.export = rk_ahash_export,
 		.import = rk_ahash_import,
 		.digest = rk_ahash_digest,
+		.init_tfm = rk_hash_init_tfm,
+		.exit_tfm = rk_hash_exit_tfm,
 		.halg = {
 			 .digestsize = SHA256_DIGEST_SIZE,
 			 .statesize = sizeof(struct sha256_state),
@@ -418,17 +427,18 @@ struct rk_crypto_tmp rk_ahash_sha256 = {
 				  .cra_blocksize = SHA256_BLOCK_SIZE,
 				  .cra_ctxsize = sizeof(struct rk_ahash_ctx),
 				  .cra_alignmask = 3,
-				  .cra_init = rk_cra_hash_init,
-				  .cra_exit = rk_cra_hash_exit,
 				  .cra_module = THIS_MODULE,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = rk_hash_run,
+	},
 };
 
 struct rk_crypto_tmp rk_ahash_md5 = {
 	.type = CRYPTO_ALG_TYPE_AHASH,
-	.alg.hash = {
+	.alg.hash.base = {
 		.init = rk_ahash_init,
 		.update = rk_ahash_update,
 		.final = rk_ahash_final,
@@ -436,6 +446,8 @@ struct rk_crypto_tmp rk_ahash_md5 = {
 		.export = rk_ahash_export,
 		.import = rk_ahash_import,
 		.digest = rk_ahash_digest,
+		.init_tfm = rk_hash_init_tfm,
+		.exit_tfm = rk_hash_exit_tfm,
 		.halg = {
 			 .digestsize = MD5_DIGEST_SIZE,
 			 .statesize = sizeof(struct md5_state),
@@ -448,10 +460,11 @@ struct rk_crypto_tmp rk_ahash_md5 = {
 				  .cra_blocksize = SHA1_BLOCK_SIZE,
 				  .cra_ctxsize = sizeof(struct rk_ahash_ctx),
 				  .cra_alignmask = 3,
-				  .cra_init = rk_cra_hash_init,
-				  .cra_exit = rk_cra_hash_exit,
 				  .cra_module = THIS_MODULE,
 			}
 		}
-	}
+	},
+	.alg.hash.op = {
+		.do_one_request = rk_hash_run,
+	},
 };
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 59069457582b..da95747d973f 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -8,8 +8,14 @@
  *
  * Some ideas are from marvell-cesa.c and s5p-sss.c driver.
  */
-#include <linux/device.h>
+
+#include <crypto/engine.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
 #include "rk3288_crypto.h"
 
 #define RK_CRYPTO_DEC			BIT(0)
@@ -18,7 +24,7 @@ static int rk_cipher_need_fallback(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
-	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher.base);
 	struct scatterlist *sgs, *sgd;
 	unsigned int stodo, dtodo, len;
 	unsigned int bs = crypto_skcipher_blocksize(tfm);
@@ -65,7 +71,7 @@ static int rk_cipher_fallback(struct skcipher_request *areq)
 	struct rk_cipher_ctx *op = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq);
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
-	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher.base);
 	int err;
 
 	algt->stat_fb++;
@@ -305,7 +311,7 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req)
 	unsigned int len = areq->cryptlen;
 	unsigned int todo;
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
-	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher.base);
 	struct rk_crypto_info *rkc = rctx->dev;
 
 	err = pm_runtime_resume_and_get(rkc->dev);
@@ -430,7 +436,7 @@ static int rk_cipher_tfm_init(struct crypto_skcipher *tfm)
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	const char *name = crypto_tfm_alg_name(&tfm->base);
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
-	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher.base);
 
 	ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ctx->fallback_tfm)) {
@@ -442,8 +448,6 @@ static int rk_cipher_tfm_init(struct crypto_skcipher *tfm)
 	tfm->reqsize = sizeof(struct rk_cipher_rctx) +
 		crypto_skcipher_reqsize(ctx->fallback_tfm);
 
-	ctx->enginectx.op.do_one_request = rk_cipher_run;
-
 	return 0;
 }
 
@@ -457,7 +461,7 @@ static void rk_cipher_tfm_exit(struct crypto_skcipher *tfm)
 
 struct rk_crypto_tmp rk_ecb_aes_alg = {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base.cra_name		= "ecb(aes)",
 		.base.cra_driver_name	= "ecb-aes-rk",
 		.base.cra_priority	= 300,
@@ -474,12 +478,15 @@ struct rk_crypto_tmp rk_ecb_aes_alg = {
 		.setkey			= rk_aes_setkey,
 		.encrypt		= rk_aes_ecb_encrypt,
 		.decrypt		= rk_aes_ecb_decrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = rk_cipher_run,
+	},
 };
 
 struct rk_crypto_tmp rk_cbc_aes_alg = {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base.cra_name		= "cbc(aes)",
 		.base.cra_driver_name	= "cbc-aes-rk",
 		.base.cra_priority	= 300,
@@ -497,12 +504,15 @@ struct rk_crypto_tmp rk_cbc_aes_alg = {
 		.setkey			= rk_aes_setkey,
 		.encrypt		= rk_aes_cbc_encrypt,
 		.decrypt		= rk_aes_cbc_decrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = rk_cipher_run,
+	},
 };
 
 struct rk_crypto_tmp rk_ecb_des_alg = {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base.cra_name		= "ecb(des)",
 		.base.cra_driver_name	= "ecb-des-rk",
 		.base.cra_priority	= 300,
@@ -519,12 +529,15 @@ struct rk_crypto_tmp rk_ecb_des_alg = {
 		.setkey			= rk_des_setkey,
 		.encrypt		= rk_des_ecb_encrypt,
 		.decrypt		= rk_des_ecb_decrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = rk_cipher_run,
+	},
 };
 
 struct rk_crypto_tmp rk_cbc_des_alg = {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base.cra_name		= "cbc(des)",
 		.base.cra_driver_name	= "cbc-des-rk",
 		.base.cra_priority	= 300,
@@ -542,12 +555,15 @@ struct rk_crypto_tmp rk_cbc_des_alg = {
 		.setkey			= rk_des_setkey,
 		.encrypt		= rk_des_cbc_encrypt,
 		.decrypt		= rk_des_cbc_decrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = rk_cipher_run,
+	},
 };
 
 struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base.cra_name		= "ecb(des3_ede)",
 		.base.cra_driver_name	= "ecb-des3-ede-rk",
 		.base.cra_priority	= 300,
@@ -564,12 +580,15 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
 		.setkey			= rk_tdes_setkey,
 		.encrypt		= rk_des3_ede_ecb_encrypt,
 		.decrypt		= rk_des3_ede_ecb_decrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = rk_cipher_run,
+	},
 };
 
 struct rk_crypto_tmp rk_cbc_des3_ede_alg = {
 	.type = CRYPTO_ALG_TYPE_SKCIPHER,
-	.alg.skcipher = {
+	.alg.skcipher.base = {
 		.base.cra_name		= "cbc(des3_ede)",
 		.base.cra_driver_name	= "cbc-des3-ede-rk",
 		.base.cra_priority	= 300,
@@ -587,5 +606,8 @@ struct rk_crypto_tmp rk_cbc_des3_ede_alg = {
 		.setkey			= rk_tdes_setkey,
 		.encrypt		= rk_des3_ede_cbc_encrypt,
 		.decrypt		= rk_des3_ede_cbc_decrypt,
-	}
+	},
+	.alg.skcipher.op = {
+		.do_one_request = rk_cipher_run,
+	},
 };

From 982213e4730484ad080070a20f4a4a8deb0fa688 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:55:12 +0800
Subject: [PATCH 1350/2047] crypto: jh7110 - Use new crypto_engine_op interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/starfive/jh7110-aes.c  | 162 ++++++++-------
 drivers/crypto/starfive/jh7110-cryp.c |   9 +-
 drivers/crypto/starfive/jh7110-cryp.h |   2 -
 drivers/crypto/starfive/jh7110-hash.c | 282 ++++++++++++++------------
 4 files changed, 251 insertions(+), 204 deletions(-)

diff --git a/drivers/crypto/starfive/jh7110-aes.c b/drivers/crypto/starfive/jh7110-aes.c
index 777656cbb7ce..9378e6682f0e 100644
--- a/drivers/crypto/starfive/jh7110-aes.c
+++ b/drivers/crypto/starfive/jh7110-aes.c
@@ -5,12 +5,17 @@
  * Copyright (c) 2022 StarFive Technology
  */
 
-#include <linux/iopoll.h>
+#include <crypto/engine.h>
 #include <crypto/gcm.h>
-#include <crypto/scatterwalk.h>
 #include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include "jh7110-cryp.h"
+#include <linux/err.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 
 #define STARFIVE_AES_REGS_OFFSET	0x100
 #define STARFIVE_AES_AESDIO0R		(STARFIVE_AES_REGS_OFFSET + 0x0)
@@ -554,8 +559,6 @@ static int starfive_aes_init_tfm(struct crypto_skcipher *tfm)
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) +
 				    sizeof(struct skcipher_request));
 
-	ctx->enginectx.op.do_one_request = starfive_aes_do_one_req;
-
 	return 0;
 }
 
@@ -638,8 +641,6 @@ static int starfive_aes_aead_init_tfm(struct crypto_aead *tfm)
 	crypto_aead_set_reqsize(tfm, sizeof(struct starfive_cryp_ctx) +
 				sizeof(struct aead_request));
 
-	ctx->enginectx.op.do_one_request = starfive_aes_aead_do_one_req;
-
 	return 0;
 }
 
@@ -844,15 +845,15 @@ static int starfive_aes_ccm_decrypt(struct aead_request *req)
 	return starfive_aes_aead_crypt(req, STARFIVE_AES_MODE_CCM);
 }
 
-static struct skcipher_alg skcipher_algs[] = {
+static struct skcipher_engine_alg skcipher_algs[] = {
 {
-	.init				= starfive_aes_init_tfm,
-	.setkey				= starfive_aes_setkey,
-	.encrypt			= starfive_aes_ecb_encrypt,
-	.decrypt			= starfive_aes_ecb_decrypt,
-	.min_keysize			= AES_MIN_KEY_SIZE,
-	.max_keysize			= AES_MAX_KEY_SIZE,
-	.base = {
+	.base.init			= starfive_aes_init_tfm,
+	.base.setkey			= starfive_aes_setkey,
+	.base.encrypt			= starfive_aes_ecb_encrypt,
+	.base.decrypt			= starfive_aes_ecb_decrypt,
+	.base.min_keysize		= AES_MIN_KEY_SIZE,
+	.base.max_keysize		= AES_MAX_KEY_SIZE,
+	.base.base = {
 		.cra_name		= "ecb(aes)",
 		.cra_driver_name	= "starfive-ecb-aes",
 		.cra_priority		= 200,
@@ -862,15 +863,18 @@ static struct skcipher_alg skcipher_algs[] = {
 		.cra_alignmask		= 0xf,
 		.cra_module		= THIS_MODULE,
 	},
+	.op = {
+		.do_one_request = starfive_aes_do_one_req,
+	},
 }, {
-	.init				= starfive_aes_init_tfm,
-	.setkey				= starfive_aes_setkey,
-	.encrypt			= starfive_aes_cbc_encrypt,
-	.decrypt			= starfive_aes_cbc_decrypt,
-	.min_keysize			= AES_MIN_KEY_SIZE,
-	.max_keysize			= AES_MAX_KEY_SIZE,
-	.ivsize				= AES_BLOCK_SIZE,
-	.base = {
+	.base.init			= starfive_aes_init_tfm,
+	.base.setkey			= starfive_aes_setkey,
+	.base.encrypt			= starfive_aes_cbc_encrypt,
+	.base.decrypt			= starfive_aes_cbc_decrypt,
+	.base.min_keysize		= AES_MIN_KEY_SIZE,
+	.base.max_keysize		= AES_MAX_KEY_SIZE,
+	.base.ivsize			= AES_BLOCK_SIZE,
+	.base.base = {
 		.cra_name		= "cbc(aes)",
 		.cra_driver_name	= "starfive-cbc-aes",
 		.cra_priority		= 200,
@@ -880,15 +884,18 @@ static struct skcipher_alg skcipher_algs[] = {
 		.cra_alignmask		= 0xf,
 		.cra_module		= THIS_MODULE,
 	},
+	.op = {
+		.do_one_request = starfive_aes_do_one_req,
+	},
 }, {
-	.init				= starfive_aes_init_tfm,
-	.setkey				= starfive_aes_setkey,
-	.encrypt			= starfive_aes_ctr_encrypt,
-	.decrypt			= starfive_aes_ctr_decrypt,
-	.min_keysize			= AES_MIN_KEY_SIZE,
-	.max_keysize			= AES_MAX_KEY_SIZE,
-	.ivsize				= AES_BLOCK_SIZE,
-	.base = {
+	.base.init			= starfive_aes_init_tfm,
+	.base.setkey			= starfive_aes_setkey,
+	.base.encrypt			= starfive_aes_ctr_encrypt,
+	.base.decrypt			= starfive_aes_ctr_decrypt,
+	.base.min_keysize		= AES_MIN_KEY_SIZE,
+	.base.max_keysize		= AES_MAX_KEY_SIZE,
+	.base.ivsize			= AES_BLOCK_SIZE,
+	.base.base = {
 		.cra_name		= "ctr(aes)",
 		.cra_driver_name	= "starfive-ctr-aes",
 		.cra_priority		= 200,
@@ -898,15 +905,18 @@ static struct skcipher_alg skcipher_algs[] = {
 		.cra_alignmask		= 0xf,
 		.cra_module		= THIS_MODULE,
 	},
+	.op = {
+		.do_one_request = starfive_aes_do_one_req,
+	},
 }, {
-	.init				= starfive_aes_init_tfm,
-	.setkey				= starfive_aes_setkey,
-	.encrypt			= starfive_aes_cfb_encrypt,
-	.decrypt			= starfive_aes_cfb_decrypt,
-	.min_keysize			= AES_MIN_KEY_SIZE,
-	.max_keysize			= AES_MAX_KEY_SIZE,
-	.ivsize				= AES_BLOCK_SIZE,
-	.base = {
+	.base.init			= starfive_aes_init_tfm,
+	.base.setkey			= starfive_aes_setkey,
+	.base.encrypt			= starfive_aes_cfb_encrypt,
+	.base.decrypt			= starfive_aes_cfb_decrypt,
+	.base.min_keysize		= AES_MIN_KEY_SIZE,
+	.base.max_keysize		= AES_MAX_KEY_SIZE,
+	.base.ivsize			= AES_BLOCK_SIZE,
+	.base.base = {
 		.cra_name		= "cfb(aes)",
 		.cra_driver_name	= "starfive-cfb-aes",
 		.cra_priority		= 200,
@@ -916,15 +926,18 @@ static struct skcipher_alg skcipher_algs[] = {
 		.cra_alignmask		= 0xf,
 		.cra_module		= THIS_MODULE,
 	},
+	.op = {
+		.do_one_request = starfive_aes_do_one_req,
+	},
 }, {
-	.init				= starfive_aes_init_tfm,
-	.setkey				= starfive_aes_setkey,
-	.encrypt			= starfive_aes_ofb_encrypt,
-	.decrypt			= starfive_aes_ofb_decrypt,
-	.min_keysize			= AES_MIN_KEY_SIZE,
-	.max_keysize			= AES_MAX_KEY_SIZE,
-	.ivsize				= AES_BLOCK_SIZE,
-	.base = {
+	.base.init			= starfive_aes_init_tfm,
+	.base.setkey			= starfive_aes_setkey,
+	.base.encrypt			= starfive_aes_ofb_encrypt,
+	.base.decrypt			= starfive_aes_ofb_decrypt,
+	.base.min_keysize		= AES_MIN_KEY_SIZE,
+	.base.max_keysize		= AES_MAX_KEY_SIZE,
+	.base.ivsize			= AES_BLOCK_SIZE,
+	.base.base = {
 		.cra_name		= "ofb(aes)",
 		.cra_driver_name	= "starfive-ofb-aes",
 		.cra_priority		= 200,
@@ -934,20 +947,23 @@ static struct skcipher_alg skcipher_algs[] = {
 		.cra_alignmask		= 0xf,
 		.cra_module		= THIS_MODULE,
 	},
+	.op = {
+		.do_one_request = starfive_aes_do_one_req,
+	},
 },
 };
 
-static struct aead_alg aead_algs[] = {
+static struct aead_engine_alg aead_algs[] = {
 {
-	.setkey                         = starfive_aes_aead_setkey,
-	.setauthsize                    = starfive_aes_gcm_setauthsize,
-	.encrypt                        = starfive_aes_gcm_encrypt,
-	.decrypt                        = starfive_aes_gcm_decrypt,
-	.init                           = starfive_aes_aead_init_tfm,
-	.exit                           = starfive_aes_aead_exit_tfm,
-	.ivsize                         = GCM_AES_IV_SIZE,
-	.maxauthsize                    = AES_BLOCK_SIZE,
-	.base = {
+	.base.setkey			= starfive_aes_aead_setkey,
+	.base.setauthsize		= starfive_aes_gcm_setauthsize,
+	.base.encrypt			= starfive_aes_gcm_encrypt,
+	.base.decrypt			= starfive_aes_gcm_decrypt,
+	.base.init			= starfive_aes_aead_init_tfm,
+	.base.exit			= starfive_aes_aead_exit_tfm,
+	.base.ivsize			= GCM_AES_IV_SIZE,
+	.base.maxauthsize		= AES_BLOCK_SIZE,
+	.base.base = {
 		.cra_name               = "gcm(aes)",
 		.cra_driver_name        = "starfive-gcm-aes",
 		.cra_priority           = 200,
@@ -957,16 +973,19 @@ static struct aead_alg aead_algs[] = {
 		.cra_alignmask          = 0xf,
 		.cra_module             = THIS_MODULE,
 	},
+	.op = {
+		.do_one_request = starfive_aes_aead_do_one_req,
+	},
 }, {
-	.setkey		                = starfive_aes_aead_setkey,
-	.setauthsize	                = starfive_aes_ccm_setauthsize,
-	.encrypt	                = starfive_aes_ccm_encrypt,
-	.decrypt	                = starfive_aes_ccm_decrypt,
-	.init		                = starfive_aes_aead_init_tfm,
-	.exit		                = starfive_aes_aead_exit_tfm,
-	.ivsize		                = AES_BLOCK_SIZE,
-	.maxauthsize	                = AES_BLOCK_SIZE,
-	.base = {
+	.base.setkey			= starfive_aes_aead_setkey,
+	.base.setauthsize		= starfive_aes_ccm_setauthsize,
+	.base.encrypt			= starfive_aes_ccm_encrypt,
+	.base.decrypt			= starfive_aes_ccm_decrypt,
+	.base.init			= starfive_aes_aead_init_tfm,
+	.base.exit			= starfive_aes_aead_exit_tfm,
+	.base.ivsize			= AES_BLOCK_SIZE,
+	.base.maxauthsize		= AES_BLOCK_SIZE,
+	.base.base = {
 		.cra_name		= "ccm(aes)",
 		.cra_driver_name	= "starfive-ccm-aes",
 		.cra_priority		= 200,
@@ -977,6 +996,9 @@ static struct aead_alg aead_algs[] = {
 		.cra_alignmask		= 0xf,
 		.cra_module		= THIS_MODULE,
 	},
+	.op = {
+		.do_one_request = starfive_aes_aead_do_one_req,
+	},
 },
 };
 
@@ -984,19 +1006,19 @@ int starfive_aes_register_algs(void)
 {
 	int ret;
 
-	ret = crypto_register_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
+	ret = crypto_engine_register_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
 	if (ret)
 		return ret;
 
-	ret = crypto_register_aeads(aead_algs, ARRAY_SIZE(aead_algs));
+	ret = crypto_engine_register_aeads(aead_algs, ARRAY_SIZE(aead_algs));
 	if (ret)
-		crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
+		crypto_engine_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
 
 	return ret;
 }
 
 void starfive_aes_unregister_algs(void)
 {
-	crypto_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs));
-	crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
+	crypto_engine_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs));
+	crypto_engine_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
 }
diff --git a/drivers/crypto/starfive/jh7110-cryp.c b/drivers/crypto/starfive/jh7110-cryp.c
index ab37010ceb88..890ad5259329 100644
--- a/drivers/crypto/starfive/jh7110-cryp.c
+++ b/drivers/crypto/starfive/jh7110-cryp.c
@@ -7,17 +7,20 @@
  *
  */
 
+#include <crypto/engine.h>
+#include "jh7110-cryp.h"
 #include <linux/clk.h>
-#include <linux/delay.h>
+#include <linux/completion.h>
+#include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/iopoll.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
-
-#include "jh7110-cryp.h"
+#include <linux/spinlock.h>
 
 #define DRIVER_NAME             "jh7110-crypto"
 
diff --git a/drivers/crypto/starfive/jh7110-cryp.h b/drivers/crypto/starfive/jh7110-cryp.h
index 345a8d878761..fe011d50473d 100644
--- a/drivers/crypto/starfive/jh7110-cryp.h
+++ b/drivers/crypto/starfive/jh7110-cryp.h
@@ -3,7 +3,6 @@
 #define __STARFIVE_STR_H__
 
 #include <crypto/aes.h>
-#include <crypto/engine.h>
 #include <crypto/hash.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/sha2.h>
@@ -151,7 +150,6 @@ union starfive_alg_cr {
 };
 
 struct starfive_cryp_ctx {
-	struct crypto_engine_ctx		enginectx;
 	struct starfive_cryp_dev		*cryp;
 	struct starfive_cryp_request_ctx	*rctx;
 
diff --git a/drivers/crypto/starfive/jh7110-hash.c b/drivers/crypto/starfive/jh7110-hash.c
index 7fe89cd13336..739e944229af 100644
--- a/drivers/crypto/starfive/jh7110-hash.c
+++ b/drivers/crypto/starfive/jh7110-hash.c
@@ -6,11 +6,14 @@
  *
  */
 
+#include <crypto/engine.h>
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+#include "jh7110-cryp.h"
+#include <linux/amba/pl080.h>
 #include <linux/clk.h>
-#include <linux/crypto.h>
 #include <linux/dma-direct.h>
 #include <linux/interrupt.h>
-#include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -18,13 +21,6 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
-#include <linux/amba/pl080.h>
-
-#include <crypto/hash.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/internal/hash.h>
-
-#include "jh7110-cryp.h"
 
 #define STARFIVE_HASH_REGS_OFFSET	0x300
 #define STARFIVE_HASH_SHACSR		(STARFIVE_HASH_REGS_OFFSET + 0x0)
@@ -433,8 +429,6 @@ static int starfive_hash_init_tfm(struct crypto_ahash *hash,
 	ctx->keylen = 0;
 	ctx->hash_mode = mode;
 
-	ctx->enginectx.op.do_one_request = starfive_hash_one_request;
-
 	return 0;
 }
 
@@ -612,18 +606,18 @@ static int starfive_hmac_sm3_init_tfm(struct crypto_ahash *hash)
 				      STARFIVE_HASH_SM3);
 }
 
-static struct ahash_alg algs_sha2_sm3[] = {
+static struct ahash_engine_alg algs_sha2_sm3[] = {
 {
-	.init     = starfive_hash_init,
-	.update   = starfive_hash_update,
-	.final    = starfive_hash_final,
-	.finup    = starfive_hash_finup,
-	.digest   = starfive_hash_digest,
-	.export   = starfive_hash_export,
-	.import   = starfive_hash_import,
-	.init_tfm = starfive_sha224_init_tfm,
-	.exit_tfm = starfive_hash_exit_tfm,
-	.halg = {
+	.base.init     = starfive_hash_init,
+	.base.update   = starfive_hash_update,
+	.base.final    = starfive_hash_final,
+	.base.finup    = starfive_hash_finup,
+	.base.digest   = starfive_hash_digest,
+	.base.export   = starfive_hash_export,
+	.base.import   = starfive_hash_import,
+	.base.init_tfm = starfive_sha224_init_tfm,
+	.base.exit_tfm = starfive_hash_exit_tfm,
+	.base.halg = {
 		.digestsize = SHA224_DIGEST_SIZE,
 		.statesize  = sizeof(struct sha256_state),
 		.base = {
@@ -638,19 +632,22 @@ static struct ahash_alg algs_sha2_sm3[] = {
 			.cra_alignmask		= 3,
 			.cra_module		= THIS_MODULE,
 		}
-	}
+	},
+	.op = {
+		.do_one_request = starfive_hash_one_request,
+	},
 }, {
-	.init     = starfive_hash_init,
-	.update   = starfive_hash_update,
-	.final    = starfive_hash_final,
-	.finup    = starfive_hash_finup,
-	.digest   = starfive_hash_digest,
-	.export   = starfive_hash_export,
-	.import   = starfive_hash_import,
-	.init_tfm = starfive_hmac_sha224_init_tfm,
-	.exit_tfm = starfive_hash_exit_tfm,
-	.setkey   = starfive_hash_setkey,
-	.halg = {
+	.base.init     = starfive_hash_init,
+	.base.update   = starfive_hash_update,
+	.base.final    = starfive_hash_final,
+	.base.finup    = starfive_hash_finup,
+	.base.digest   = starfive_hash_digest,
+	.base.export   = starfive_hash_export,
+	.base.import   = starfive_hash_import,
+	.base.init_tfm = starfive_hmac_sha224_init_tfm,
+	.base.exit_tfm = starfive_hash_exit_tfm,
+	.base.setkey   = starfive_hash_setkey,
+	.base.halg = {
 		.digestsize = SHA224_DIGEST_SIZE,
 		.statesize  = sizeof(struct sha256_state),
 		.base = {
@@ -665,18 +662,21 @@ static struct ahash_alg algs_sha2_sm3[] = {
 			.cra_alignmask		= 3,
 			.cra_module		= THIS_MODULE,
 		}
-	}
+	},
+	.op = {
+		.do_one_request = starfive_hash_one_request,
+	},
 }, {
-	.init     = starfive_hash_init,
-	.update   = starfive_hash_update,
-	.final    = starfive_hash_final,
-	.finup    = starfive_hash_finup,
-	.digest   = starfive_hash_digest,
-	.export   = starfive_hash_export,
-	.import   = starfive_hash_import,
-	.init_tfm = starfive_sha256_init_tfm,
-	.exit_tfm = starfive_hash_exit_tfm,
-	.halg = {
+	.base.init     = starfive_hash_init,
+	.base.update   = starfive_hash_update,
+	.base.final    = starfive_hash_final,
+	.base.finup    = starfive_hash_finup,
+	.base.digest   = starfive_hash_digest,
+	.base.export   = starfive_hash_export,
+	.base.import   = starfive_hash_import,
+	.base.init_tfm = starfive_sha256_init_tfm,
+	.base.exit_tfm = starfive_hash_exit_tfm,
+	.base.halg = {
 		.digestsize = SHA256_DIGEST_SIZE,
 		.statesize  = sizeof(struct sha256_state),
 		.base = {
@@ -691,19 +691,22 @@ static struct ahash_alg algs_sha2_sm3[] = {
 			.cra_alignmask		= 3,
 			.cra_module		= THIS_MODULE,
 		}
-	}
+	},
+	.op = {
+		.do_one_request = starfive_hash_one_request,
+	},
 }, {
-	.init     = starfive_hash_init,
-	.update   = starfive_hash_update,
-	.final    = starfive_hash_final,
-	.finup    = starfive_hash_finup,
-	.digest   = starfive_hash_digest,
-	.export   = starfive_hash_export,
-	.import   = starfive_hash_import,
-	.init_tfm = starfive_hmac_sha256_init_tfm,
-	.exit_tfm = starfive_hash_exit_tfm,
-	.setkey   = starfive_hash_setkey,
-	.halg = {
+	.base.init     = starfive_hash_init,
+	.base.update   = starfive_hash_update,
+	.base.final    = starfive_hash_final,
+	.base.finup    = starfive_hash_finup,
+	.base.digest   = starfive_hash_digest,
+	.base.export   = starfive_hash_export,
+	.base.import   = starfive_hash_import,
+	.base.init_tfm = starfive_hmac_sha256_init_tfm,
+	.base.exit_tfm = starfive_hash_exit_tfm,
+	.base.setkey   = starfive_hash_setkey,
+	.base.halg = {
 		.digestsize = SHA256_DIGEST_SIZE,
 		.statesize  = sizeof(struct sha256_state),
 		.base = {
@@ -718,18 +721,21 @@ static struct ahash_alg algs_sha2_sm3[] = {
 			.cra_alignmask		= 3,
 			.cra_module		= THIS_MODULE,
 		}
-	}
+	},
+	.op = {
+		.do_one_request = starfive_hash_one_request,
+	},
 }, {
-	.init     = starfive_hash_init,
-	.update   = starfive_hash_update,
-	.final    = starfive_hash_final,
-	.finup    = starfive_hash_finup,
-	.digest   = starfive_hash_digest,
-	.export   = starfive_hash_export,
-	.import   = starfive_hash_import,
-	.init_tfm = starfive_sha384_init_tfm,
-	.exit_tfm = starfive_hash_exit_tfm,
-	.halg = {
+	.base.init     = starfive_hash_init,
+	.base.update   = starfive_hash_update,
+	.base.final    = starfive_hash_final,
+	.base.finup    = starfive_hash_finup,
+	.base.digest   = starfive_hash_digest,
+	.base.export   = starfive_hash_export,
+	.base.import   = starfive_hash_import,
+	.base.init_tfm = starfive_sha384_init_tfm,
+	.base.exit_tfm = starfive_hash_exit_tfm,
+	.base.halg = {
 		.digestsize = SHA384_DIGEST_SIZE,
 		.statesize  = sizeof(struct sha512_state),
 		.base = {
@@ -744,19 +750,22 @@ static struct ahash_alg algs_sha2_sm3[] = {
 			.cra_alignmask		= 3,
 			.cra_module		= THIS_MODULE,
 		}
-	}
+	},
+	.op = {
+		.do_one_request = starfive_hash_one_request,
+	},
 }, {
-	.init     = starfive_hash_init,
-	.update   = starfive_hash_update,
-	.final    = starfive_hash_final,
-	.finup    = starfive_hash_finup,
-	.digest   = starfive_hash_digest,
-	.export   = starfive_hash_export,
-	.import   = starfive_hash_import,
-	.init_tfm = starfive_hmac_sha384_init_tfm,
-	.exit_tfm = starfive_hash_exit_tfm,
-	.setkey   = starfive_hash_setkey,
-	.halg = {
+	.base.init     = starfive_hash_init,
+	.base.update   = starfive_hash_update,
+	.base.final    = starfive_hash_final,
+	.base.finup    = starfive_hash_finup,
+	.base.digest   = starfive_hash_digest,
+	.base.export   = starfive_hash_export,
+	.base.import   = starfive_hash_import,
+	.base.init_tfm = starfive_hmac_sha384_init_tfm,
+	.base.exit_tfm = starfive_hash_exit_tfm,
+	.base.setkey   = starfive_hash_setkey,
+	.base.halg = {
 		.digestsize = SHA384_DIGEST_SIZE,
 		.statesize  = sizeof(struct sha512_state),
 		.base = {
@@ -771,18 +780,21 @@ static struct ahash_alg algs_sha2_sm3[] = {
 			.cra_alignmask		= 3,
 			.cra_module		= THIS_MODULE,
 		}
-	}
+	},
+	.op = {
+		.do_one_request = starfive_hash_one_request,
+	},
 }, {
-	.init     = starfive_hash_init,
-	.update   = starfive_hash_update,
-	.final    = starfive_hash_final,
-	.finup    = starfive_hash_finup,
-	.digest   = starfive_hash_digest,
-	.export   = starfive_hash_export,
-	.import   = starfive_hash_import,
-	.init_tfm = starfive_sha512_init_tfm,
-	.exit_tfm = starfive_hash_exit_tfm,
-	.halg = {
+	.base.init     = starfive_hash_init,
+	.base.update   = starfive_hash_update,
+	.base.final    = starfive_hash_final,
+	.base.finup    = starfive_hash_finup,
+	.base.digest   = starfive_hash_digest,
+	.base.export   = starfive_hash_export,
+	.base.import   = starfive_hash_import,
+	.base.init_tfm = starfive_sha512_init_tfm,
+	.base.exit_tfm = starfive_hash_exit_tfm,
+	.base.halg = {
 		.digestsize = SHA512_DIGEST_SIZE,
 		.statesize  = sizeof(struct sha512_state),
 		.base = {
@@ -797,19 +809,22 @@ static struct ahash_alg algs_sha2_sm3[] = {
 			.cra_alignmask		= 3,
 			.cra_module		= THIS_MODULE,
 		}
-	}
+	},
+	.op = {
+		.do_one_request = starfive_hash_one_request,
+	},
 }, {
-	.init     = starfive_hash_init,
-	.update   = starfive_hash_update,
-	.final    = starfive_hash_final,
-	.finup    = starfive_hash_finup,
-	.digest   = starfive_hash_digest,
-	.export   = starfive_hash_export,
-	.import   = starfive_hash_import,
-	.init_tfm = starfive_hmac_sha512_init_tfm,
-	.exit_tfm = starfive_hash_exit_tfm,
-	.setkey   = starfive_hash_setkey,
-	.halg = {
+	.base.init     = starfive_hash_init,
+	.base.update   = starfive_hash_update,
+	.base.final    = starfive_hash_final,
+	.base.finup    = starfive_hash_finup,
+	.base.digest   = starfive_hash_digest,
+	.base.export   = starfive_hash_export,
+	.base.import   = starfive_hash_import,
+	.base.init_tfm = starfive_hmac_sha512_init_tfm,
+	.base.exit_tfm = starfive_hash_exit_tfm,
+	.base.setkey   = starfive_hash_setkey,
+	.base.halg = {
 		.digestsize = SHA512_DIGEST_SIZE,
 		.statesize  = sizeof(struct sha512_state),
 		.base = {
@@ -824,18 +839,21 @@ static struct ahash_alg algs_sha2_sm3[] = {
 			.cra_alignmask		= 3,
 			.cra_module		= THIS_MODULE,
 		}
-	}
+	},
+	.op = {
+		.do_one_request = starfive_hash_one_request,
+	},
 }, {
-	.init     = starfive_hash_init,
-	.update   = starfive_hash_update,
-	.final    = starfive_hash_final,
-	.finup    = starfive_hash_finup,
-	.digest   = starfive_hash_digest,
-	.export   = starfive_hash_export,
-	.import   = starfive_hash_import,
-	.init_tfm = starfive_sm3_init_tfm,
-	.exit_tfm = starfive_hash_exit_tfm,
-	.halg = {
+	.base.init     = starfive_hash_init,
+	.base.update   = starfive_hash_update,
+	.base.final    = starfive_hash_final,
+	.base.finup    = starfive_hash_finup,
+	.base.digest   = starfive_hash_digest,
+	.base.export   = starfive_hash_export,
+	.base.import   = starfive_hash_import,
+	.base.init_tfm = starfive_sm3_init_tfm,
+	.base.exit_tfm = starfive_hash_exit_tfm,
+	.base.halg = {
 		.digestsize = SM3_DIGEST_SIZE,
 		.statesize  = sizeof(struct sm3_state),
 		.base = {
@@ -850,19 +868,22 @@ static struct ahash_alg algs_sha2_sm3[] = {
 			.cra_alignmask		= 3,
 			.cra_module		= THIS_MODULE,
 		}
-	}
+	},
+	.op = {
+		.do_one_request = starfive_hash_one_request,
+	},
 }, {
-	.init	  = starfive_hash_init,
-	.update	  = starfive_hash_update,
-	.final	  = starfive_hash_final,
-	.finup	  = starfive_hash_finup,
-	.digest	  = starfive_hash_digest,
-	.export	  = starfive_hash_export,
-	.import	  = starfive_hash_import,
-	.init_tfm = starfive_hmac_sm3_init_tfm,
-	.exit_tfm = starfive_hash_exit_tfm,
-	.setkey	  = starfive_hash_setkey,
-	.halg = {
+	.base.init	  = starfive_hash_init,
+	.base.update	  = starfive_hash_update,
+	.base.final	  = starfive_hash_final,
+	.base.finup	  = starfive_hash_finup,
+	.base.digest	  = starfive_hash_digest,
+	.base.export	  = starfive_hash_export,
+	.base.import	  = starfive_hash_import,
+	.base.init_tfm = starfive_hmac_sm3_init_tfm,
+	.base.exit_tfm = starfive_hash_exit_tfm,
+	.base.setkey	  = starfive_hash_setkey,
+	.base.halg = {
 		.digestsize = SM3_DIGEST_SIZE,
 		.statesize  = sizeof(struct sm3_state),
 		.base = {
@@ -877,16 +898,19 @@ static struct ahash_alg algs_sha2_sm3[] = {
 			.cra_alignmask		= 3,
 			.cra_module		= THIS_MODULE,
 		}
-	}
+	},
+	.op = {
+		.do_one_request = starfive_hash_one_request,
+	},
 },
 };
 
 int starfive_hash_register_algs(void)
 {
-	return crypto_register_ahashes(algs_sha2_sm3, ARRAY_SIZE(algs_sha2_sm3));
+	return crypto_engine_register_ahashes(algs_sha2_sm3, ARRAY_SIZE(algs_sha2_sm3));
 }
 
 void starfive_hash_unregister_algs(void)
 {
-	crypto_unregister_ahashes(algs_sha2_sm3, ARRAY_SIZE(algs_sha2_sm3));
+	crypto_engine_unregister_ahashes(algs_sha2_sm3, ARRAY_SIZE(algs_sha2_sm3));
 }

From d5e6b48f94d66ca05ea64341b7ffbb7f981cca64 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:55:14 +0800
Subject: [PATCH 1351/2047] crypto: stm32 - Use new crypto_engine_op interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32-cryp.c | 317 +++++++++++---------
 drivers/crypto/stm32/stm32-hash.c | 483 +++++++++++++++++-------------
 2 files changed, 445 insertions(+), 355 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
index 07e32b8dbe29..c67239686b1e 100644
--- a/drivers/crypto/stm32/stm32-cryp.c
+++ b/drivers/crypto/stm32/stm32-cryp.c
@@ -5,22 +5,24 @@
  * Ux500 support taken from snippets in the old Ux500 cryp driver
  */
 
+#include <crypto/aes.h>
+#include <crypto/engine.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/interrupt.h>
+#include <linux/err.h>
 #include <linux/iopoll.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
-
-#include <crypto/aes.h>
-#include <crypto/internal/des.h>
-#include <crypto/engine.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
+#include <linux/string.h>
 
 #define DRIVER_NAME             "stm32-cryp"
 
@@ -156,7 +158,6 @@ struct stm32_cryp_caps {
 };
 
 struct stm32_cryp_ctx {
-	struct crypto_engine_ctx enginectx;
 	struct stm32_cryp       *cryp;
 	int                     keylen;
 	__be32                  key[AES_KEYSIZE_256 / sizeof(u32)];
@@ -828,11 +829,8 @@ static int stm32_cryp_cipher_one_req(struct crypto_engine *engine, void *areq);
 
 static int stm32_cryp_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct stm32_cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
-
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct stm32_cryp_reqctx));
 
-	ctx->enginectx.op.do_one_request = stm32_cryp_cipher_one_req;
 	return 0;
 }
 
@@ -840,12 +838,8 @@ static int stm32_cryp_aead_one_req(struct crypto_engine *engine, void *areq);
 
 static int stm32_cryp_aes_aead_init(struct crypto_aead *tfm)
 {
-	struct stm32_cryp_ctx *ctx = crypto_aead_ctx(tfm);
-
 	tfm->reqsize = sizeof(struct stm32_cryp_reqctx);
 
-	ctx->enginectx.op.do_one_request = stm32_cryp_aead_one_req;
-
 	return 0;
 }
 
@@ -1686,143 +1680,178 @@ static irqreturn_t stm32_cryp_irq(int irq, void *arg)
 	return IRQ_WAKE_THREAD;
 }
 
-static struct skcipher_alg crypto_algs[] = {
+static struct skcipher_engine_alg crypto_algs[] = {
 {
-	.base.cra_name		= "ecb(aes)",
-	.base.cra_driver_name	= "stm32-ecb-aes",
-	.base.cra_priority	= 200,
-	.base.cra_flags		= CRYPTO_ALG_ASYNC,
-	.base.cra_blocksize	= AES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
-	.base.cra_alignmask	= 0,
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "ecb(aes)",
+		.base.cra_driver_name	= "stm32-ecb-aes",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+		.base.cra_alignmask	= 0,
+		.base.cra_module	= THIS_MODULE,
 
-	.init			= stm32_cryp_init_tfm,
-	.min_keysize		= AES_MIN_KEY_SIZE,
-	.max_keysize		= AES_MAX_KEY_SIZE,
-	.setkey			= stm32_cryp_aes_setkey,
-	.encrypt		= stm32_cryp_aes_ecb_encrypt,
-	.decrypt		= stm32_cryp_aes_ecb_decrypt,
+		.init			= stm32_cryp_init_tfm,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.setkey			= stm32_cryp_aes_setkey,
+		.encrypt		= stm32_cryp_aes_ecb_encrypt,
+		.decrypt		= stm32_cryp_aes_ecb_decrypt,
+	},
+	.op = {
+		.do_one_request = stm32_cryp_cipher_one_req,
+	},
 },
 {
-	.base.cra_name		= "cbc(aes)",
-	.base.cra_driver_name	= "stm32-cbc-aes",
-	.base.cra_priority	= 200,
-	.base.cra_flags		= CRYPTO_ALG_ASYNC,
-	.base.cra_blocksize	= AES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
-	.base.cra_alignmask	= 0,
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "cbc(aes)",
+		.base.cra_driver_name	= "stm32-cbc-aes",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= AES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+		.base.cra_alignmask	= 0,
+		.base.cra_module	= THIS_MODULE,
 
-	.init			= stm32_cryp_init_tfm,
-	.min_keysize		= AES_MIN_KEY_SIZE,
-	.max_keysize		= AES_MAX_KEY_SIZE,
-	.ivsize			= AES_BLOCK_SIZE,
-	.setkey			= stm32_cryp_aes_setkey,
-	.encrypt		= stm32_cryp_aes_cbc_encrypt,
-	.decrypt		= stm32_cryp_aes_cbc_decrypt,
+		.init			= stm32_cryp_init_tfm,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+		.setkey			= stm32_cryp_aes_setkey,
+		.encrypt		= stm32_cryp_aes_cbc_encrypt,
+		.decrypt		= stm32_cryp_aes_cbc_decrypt,
+	},
+	.op = {
+		.do_one_request = stm32_cryp_cipher_one_req,
+	},
 },
 {
-	.base.cra_name		= "ctr(aes)",
-	.base.cra_driver_name	= "stm32-ctr-aes",
-	.base.cra_priority	= 200,
-	.base.cra_flags		= CRYPTO_ALG_ASYNC,
-	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
-	.base.cra_alignmask	= 0,
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "ctr(aes)",
+		.base.cra_driver_name	= "stm32-ctr-aes",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+		.base.cra_alignmask	= 0,
+		.base.cra_module	= THIS_MODULE,
 
-	.init			= stm32_cryp_init_tfm,
-	.min_keysize		= AES_MIN_KEY_SIZE,
-	.max_keysize		= AES_MAX_KEY_SIZE,
-	.ivsize			= AES_BLOCK_SIZE,
-	.setkey			= stm32_cryp_aes_setkey,
-	.encrypt		= stm32_cryp_aes_ctr_encrypt,
-	.decrypt		= stm32_cryp_aes_ctr_decrypt,
+		.init			= stm32_cryp_init_tfm,
+		.min_keysize		= AES_MIN_KEY_SIZE,
+		.max_keysize		= AES_MAX_KEY_SIZE,
+		.ivsize			= AES_BLOCK_SIZE,
+		.setkey			= stm32_cryp_aes_setkey,
+		.encrypt		= stm32_cryp_aes_ctr_encrypt,
+		.decrypt		= stm32_cryp_aes_ctr_decrypt,
+	},
+	.op = {
+		.do_one_request = stm32_cryp_cipher_one_req,
+	},
 },
 {
-	.base.cra_name		= "ecb(des)",
-	.base.cra_driver_name	= "stm32-ecb-des",
-	.base.cra_priority	= 200,
-	.base.cra_flags		= CRYPTO_ALG_ASYNC,
-	.base.cra_blocksize	= DES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
-	.base.cra_alignmask	= 0,
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "ecb(des)",
+		.base.cra_driver_name	= "stm32-ecb-des",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+		.base.cra_alignmask	= 0,
+		.base.cra_module	= THIS_MODULE,
 
-	.init			= stm32_cryp_init_tfm,
-	.min_keysize		= DES_BLOCK_SIZE,
-	.max_keysize		= DES_BLOCK_SIZE,
-	.setkey			= stm32_cryp_des_setkey,
-	.encrypt		= stm32_cryp_des_ecb_encrypt,
-	.decrypt		= stm32_cryp_des_ecb_decrypt,
+		.init			= stm32_cryp_init_tfm,
+		.min_keysize		= DES_BLOCK_SIZE,
+		.max_keysize		= DES_BLOCK_SIZE,
+		.setkey			= stm32_cryp_des_setkey,
+		.encrypt		= stm32_cryp_des_ecb_encrypt,
+		.decrypt		= stm32_cryp_des_ecb_decrypt,
+	},
+	.op = {
+		.do_one_request = stm32_cryp_cipher_one_req,
+	},
 },
 {
-	.base.cra_name		= "cbc(des)",
-	.base.cra_driver_name	= "stm32-cbc-des",
-	.base.cra_priority	= 200,
-	.base.cra_flags		= CRYPTO_ALG_ASYNC,
-	.base.cra_blocksize	= DES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
-	.base.cra_alignmask	= 0,
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "cbc(des)",
+		.base.cra_driver_name	= "stm32-cbc-des",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+		.base.cra_alignmask	= 0,
+		.base.cra_module	= THIS_MODULE,
 
-	.init			= stm32_cryp_init_tfm,
-	.min_keysize		= DES_BLOCK_SIZE,
-	.max_keysize		= DES_BLOCK_SIZE,
-	.ivsize			= DES_BLOCK_SIZE,
-	.setkey			= stm32_cryp_des_setkey,
-	.encrypt		= stm32_cryp_des_cbc_encrypt,
-	.decrypt		= stm32_cryp_des_cbc_decrypt,
+		.init			= stm32_cryp_init_tfm,
+		.min_keysize		= DES_BLOCK_SIZE,
+		.max_keysize		= DES_BLOCK_SIZE,
+		.ivsize			= DES_BLOCK_SIZE,
+		.setkey			= stm32_cryp_des_setkey,
+		.encrypt		= stm32_cryp_des_cbc_encrypt,
+		.decrypt		= stm32_cryp_des_cbc_decrypt,
+	},
+	.op = {
+		.do_one_request = stm32_cryp_cipher_one_req,
+	},
 },
 {
-	.base.cra_name		= "ecb(des3_ede)",
-	.base.cra_driver_name	= "stm32-ecb-des3",
-	.base.cra_priority	= 200,
-	.base.cra_flags		= CRYPTO_ALG_ASYNC,
-	.base.cra_blocksize	= DES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
-	.base.cra_alignmask	= 0,
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "ecb(des3_ede)",
+		.base.cra_driver_name	= "stm32-ecb-des3",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+		.base.cra_alignmask	= 0,
+		.base.cra_module	= THIS_MODULE,
 
-	.init			= stm32_cryp_init_tfm,
-	.min_keysize		= 3 * DES_BLOCK_SIZE,
-	.max_keysize		= 3 * DES_BLOCK_SIZE,
-	.setkey			= stm32_cryp_tdes_setkey,
-	.encrypt		= stm32_cryp_tdes_ecb_encrypt,
-	.decrypt		= stm32_cryp_tdes_ecb_decrypt,
+		.init			= stm32_cryp_init_tfm,
+		.min_keysize		= 3 * DES_BLOCK_SIZE,
+		.max_keysize		= 3 * DES_BLOCK_SIZE,
+		.setkey			= stm32_cryp_tdes_setkey,
+		.encrypt		= stm32_cryp_tdes_ecb_encrypt,
+		.decrypt		= stm32_cryp_tdes_ecb_decrypt,
+	},
+	.op = {
+		.do_one_request = stm32_cryp_cipher_one_req,
+	},
 },
 {
-	.base.cra_name		= "cbc(des3_ede)",
-	.base.cra_driver_name	= "stm32-cbc-des3",
-	.base.cra_priority	= 200,
-	.base.cra_flags		= CRYPTO_ALG_ASYNC,
-	.base.cra_blocksize	= DES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
-	.base.cra_alignmask	= 0,
-	.base.cra_module	= THIS_MODULE,
+	.base = {
+		.base.cra_name		= "cbc(des3_ede)",
+		.base.cra_driver_name	= "stm32-cbc-des3",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_blocksize	= DES_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct stm32_cryp_ctx),
+		.base.cra_alignmask	= 0,
+		.base.cra_module	= THIS_MODULE,
 
-	.init			= stm32_cryp_init_tfm,
-	.min_keysize		= 3 * DES_BLOCK_SIZE,
-	.max_keysize		= 3 * DES_BLOCK_SIZE,
-	.ivsize			= DES_BLOCK_SIZE,
-	.setkey			= stm32_cryp_tdes_setkey,
-	.encrypt		= stm32_cryp_tdes_cbc_encrypt,
-	.decrypt		= stm32_cryp_tdes_cbc_decrypt,
+		.init			= stm32_cryp_init_tfm,
+		.min_keysize		= 3 * DES_BLOCK_SIZE,
+		.max_keysize		= 3 * DES_BLOCK_SIZE,
+		.ivsize			= DES_BLOCK_SIZE,
+		.setkey			= stm32_cryp_tdes_setkey,
+		.encrypt		= stm32_cryp_tdes_cbc_encrypt,
+		.decrypt		= stm32_cryp_tdes_cbc_decrypt,
+	},
+	.op = {
+		.do_one_request = stm32_cryp_cipher_one_req,
+	},
 },
 };
 
-static struct aead_alg aead_algs[] = {
+static struct aead_engine_alg aead_algs[] = {
 {
-	.setkey		= stm32_cryp_aes_aead_setkey,
-	.setauthsize	= stm32_cryp_aes_gcm_setauthsize,
-	.encrypt	= stm32_cryp_aes_gcm_encrypt,
-	.decrypt	= stm32_cryp_aes_gcm_decrypt,
-	.init		= stm32_cryp_aes_aead_init,
-	.ivsize		= 12,
-	.maxauthsize	= AES_BLOCK_SIZE,
+	.base.setkey		= stm32_cryp_aes_aead_setkey,
+	.base.setauthsize	= stm32_cryp_aes_gcm_setauthsize,
+	.base.encrypt		= stm32_cryp_aes_gcm_encrypt,
+	.base.decrypt		= stm32_cryp_aes_gcm_decrypt,
+	.base.init		= stm32_cryp_aes_aead_init,
+	.base.ivsize		= 12,
+	.base.maxauthsize	= AES_BLOCK_SIZE,
 
-	.base = {
+	.base.base = {
 		.cra_name		= "gcm(aes)",
 		.cra_driver_name	= "stm32-gcm-aes",
 		.cra_priority		= 200,
@@ -1832,17 +1861,20 @@ static struct aead_alg aead_algs[] = {
 		.cra_alignmask		= 0,
 		.cra_module		= THIS_MODULE,
 	},
+	.op = {
+		.do_one_request = stm32_cryp_aead_one_req,
+	},
 },
 {
-	.setkey		= stm32_cryp_aes_aead_setkey,
-	.setauthsize	= stm32_cryp_aes_ccm_setauthsize,
-	.encrypt	= stm32_cryp_aes_ccm_encrypt,
-	.decrypt	= stm32_cryp_aes_ccm_decrypt,
-	.init		= stm32_cryp_aes_aead_init,
-	.ivsize		= AES_BLOCK_SIZE,
-	.maxauthsize	= AES_BLOCK_SIZE,
+	.base.setkey		= stm32_cryp_aes_aead_setkey,
+	.base.setauthsize	= stm32_cryp_aes_ccm_setauthsize,
+	.base.encrypt		= stm32_cryp_aes_ccm_encrypt,
+	.base.decrypt		= stm32_cryp_aes_ccm_decrypt,
+	.base.init		= stm32_cryp_aes_aead_init,
+	.base.ivsize		= AES_BLOCK_SIZE,
+	.base.maxauthsize	= AES_BLOCK_SIZE,
 
-	.base = {
+	.base.base = {
 		.cra_name		= "ccm(aes)",
 		.cra_driver_name	= "stm32-ccm-aes",
 		.cra_priority		= 200,
@@ -1852,6 +1884,9 @@ static struct aead_alg aead_algs[] = {
 		.cra_alignmask		= 0,
 		.cra_module		= THIS_MODULE,
 	},
+	.op = {
+		.do_one_request = stm32_cryp_aead_one_req,
+	},
 },
 };
 
@@ -2013,14 +2048,14 @@ static int stm32_cryp_probe(struct platform_device *pdev)
 		goto err_engine2;
 	}
 
-	ret = crypto_register_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs));
+	ret = crypto_engine_register_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs));
 	if (ret) {
 		dev_err(dev, "Could not register algs\n");
 		goto err_algs;
 	}
 
 	if (cryp->caps->aeads_support) {
-		ret = crypto_register_aeads(aead_algs, ARRAY_SIZE(aead_algs));
+		ret = crypto_engine_register_aeads(aead_algs, ARRAY_SIZE(aead_algs));
 		if (ret)
 			goto err_aead_algs;
 	}
@@ -2032,7 +2067,7 @@ static int stm32_cryp_probe(struct platform_device *pdev)
 	return 0;
 
 err_aead_algs:
-	crypto_unregister_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs));
+	crypto_engine_unregister_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs));
 err_algs:
 err_engine2:
 	crypto_engine_exit(cryp->engine);
@@ -2062,8 +2097,8 @@ static int stm32_cryp_remove(struct platform_device *pdev)
 		return ret;
 
 	if (cryp->caps->aeads_support)
-		crypto_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs));
-	crypto_unregister_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs));
+		crypto_engine_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs));
+	crypto_engine_unregister_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs));
 
 	crypto_engine_exit(cryp->engine);
 
diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index 68c52eeaa6b1..53cf922d1a38 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -6,12 +6,18 @@
  * Author(s): Lionel DEBIEVE <lionel.debieve@st.com> for STMicroelectronics.
  */
 
+#include <crypto/engine.h>
+#include <crypto/internal/hash.h>
+#include <crypto/md5.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
+#include <crypto/sha3.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/interrupt.h>
-#include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -19,15 +25,7 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
-
-#include <crypto/engine.h>
-#include <crypto/hash.h>
-#include <crypto/md5.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/sha1.h>
-#include <crypto/sha2.h>
-#include <crypto/sha3.h>
-#include <crypto/internal/hash.h>
+#include <linux/string.h>
 
 #define HASH_CR				0x00
 #define HASH_DIN			0x04
@@ -133,7 +131,6 @@ enum ux500_hash_algo {
 #define HASH_AUTOSUSPEND_DELAY		50
 
 struct stm32_hash_ctx {
-	struct crypto_engine_ctx enginectx;
 	struct stm32_hash_dev	*hdev;
 	struct crypto_shash	*xtfm;
 	unsigned long		flags;
@@ -177,7 +174,7 @@ struct stm32_hash_request_ctx {
 };
 
 struct stm32_hash_algs_info {
-	struct ahash_alg	*algs_list;
+	struct ahash_engine_alg	*algs_list;
 	size_t			size;
 };
 
@@ -1194,8 +1191,6 @@ static int stm32_hash_cra_init_algs(struct crypto_tfm *tfm, u32 algs_flags)
 	if (algs_flags)
 		ctx->flags |= algs_flags;
 
-	ctx->enginectx.op.do_one_request = stm32_hash_one_request;
-
 	return stm32_hash_init_fallback(tfm);
 }
 
@@ -1268,16 +1263,16 @@ static irqreturn_t stm32_hash_irq_handler(int irq, void *dev_id)
 	return IRQ_NONE;
 }
 
-static struct ahash_alg algs_md5[] = {
+static struct ahash_engine_alg algs_md5[] = {
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.halg = {
 			.digestsize = MD5_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1293,18 +1288,21 @@ static struct ahash_alg algs_md5[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.setkey = stm32_hash_setkey,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.setkey = stm32_hash_setkey,
+		.base.halg = {
 			.digestsize = MD5_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1320,20 +1318,23 @@ static struct ahash_alg algs_md5[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	}
 };
 
-static struct ahash_alg algs_sha1[] = {
+static struct ahash_engine_alg algs_sha1[] = {
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.halg = {
 			.digestsize = SHA1_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1349,18 +1350,21 @@ static struct ahash_alg algs_sha1[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.setkey = stm32_hash_setkey,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.setkey = stm32_hash_setkey,
+		.base.halg = {
 			.digestsize = SHA1_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1376,20 +1380,23 @@ static struct ahash_alg algs_sha1[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 };
 
-static struct ahash_alg algs_sha224[] = {
+static struct ahash_engine_alg algs_sha224[] = {
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.halg = {
 			.digestsize = SHA224_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1405,18 +1412,21 @@ static struct ahash_alg algs_sha224[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.setkey = stm32_hash_setkey,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.setkey = stm32_hash_setkey,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.halg = {
 			.digestsize = SHA224_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1432,20 +1442,23 @@ static struct ahash_alg algs_sha224[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 };
 
-static struct ahash_alg algs_sha256[] = {
+static struct ahash_engine_alg algs_sha256[] = {
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.halg = {
 			.digestsize = SHA256_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1461,18 +1474,21 @@ static struct ahash_alg algs_sha256[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.setkey = stm32_hash_setkey,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.setkey = stm32_hash_setkey,
+		.base.halg = {
 			.digestsize = SHA256_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1488,20 +1504,23 @@ static struct ahash_alg algs_sha256[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 };
 
-static struct ahash_alg algs_sha384_sha512[] = {
+static struct ahash_engine_alg algs_sha384_sha512[] = {
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.halg = {
 			.digestsize = SHA384_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1517,18 +1536,21 @@ static struct ahash_alg algs_sha384_sha512[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.setkey = stm32_hash_setkey,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.setkey = stm32_hash_setkey,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.halg = {
 			.digestsize = SHA384_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1544,17 +1566,20 @@ static struct ahash_alg algs_sha384_sha512[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.halg = {
 			.digestsize = SHA512_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1570,18 +1595,21 @@ static struct ahash_alg algs_sha384_sha512[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.setkey = stm32_hash_setkey,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.setkey = stm32_hash_setkey,
+		.base.halg = {
 			.digestsize = SHA512_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1597,20 +1625,23 @@ static struct ahash_alg algs_sha384_sha512[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 };
 
-static struct ahash_alg algs_sha3[] = {
+static struct ahash_engine_alg algs_sha3[] = {
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.halg = {
 			.digestsize = SHA3_224_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1626,18 +1657,21 @@ static struct ahash_alg algs_sha3[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.setkey = stm32_hash_setkey,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.setkey = stm32_hash_setkey,
+		.base.halg = {
 			.digestsize = SHA3_224_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1653,17 +1687,20 @@ static struct ahash_alg algs_sha3[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
-		{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.halg = {
+	{
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.halg = {
 			.digestsize = SHA3_256_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1679,18 +1716,21 @@ static struct ahash_alg algs_sha3[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.setkey = stm32_hash_setkey,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.setkey = stm32_hash_setkey,
+		.base.halg = {
 			.digestsize = SHA3_256_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1706,17 +1746,20 @@ static struct ahash_alg algs_sha3[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.halg = {
 			.digestsize = SHA3_384_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1732,18 +1775,21 @@ static struct ahash_alg algs_sha3[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.setkey = stm32_hash_setkey,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.setkey = stm32_hash_setkey,
+		.base.halg = {
 			.digestsize = SHA3_384_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1759,17 +1805,20 @@ static struct ahash_alg algs_sha3[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.halg = {
 			.digestsize = SHA3_512_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1785,18 +1834,21 @@ static struct ahash_alg algs_sha3[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	},
 	{
-		.init = stm32_hash_init,
-		.update = stm32_hash_update,
-		.final = stm32_hash_final,
-		.finup = stm32_hash_finup,
-		.digest = stm32_hash_digest,
-		.export = stm32_hash_export,
-		.import = stm32_hash_import,
-		.setkey = stm32_hash_setkey,
-		.halg = {
+		.base.init = stm32_hash_init,
+		.base.update = stm32_hash_update,
+		.base.final = stm32_hash_final,
+		.base.finup = stm32_hash_finup,
+		.base.digest = stm32_hash_digest,
+		.base.export = stm32_hash_export,
+		.base.import = stm32_hash_import,
+		.base.setkey = stm32_hash_setkey,
+		.base.halg = {
 			.digestsize = SHA3_512_DIGEST_SIZE,
 			.statesize = sizeof(struct stm32_hash_state),
 			.base = {
@@ -1812,7 +1864,10 @@ static struct ahash_alg algs_sha3[] = {
 				.cra_exit = stm32_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 			}
-		}
+		},
+		.op = {
+			.do_one_request = stm32_hash_one_request,
+		},
 	}
 };
 
@@ -1823,7 +1878,7 @@ static int stm32_hash_register_algs(struct stm32_hash_dev *hdev)
 
 	for (i = 0; i < hdev->pdata->algs_info_size; i++) {
 		for (j = 0; j < hdev->pdata->algs_info[i].size; j++) {
-			err = crypto_register_ahash(
+			err = crypto_engine_register_ahash(
 				&hdev->pdata->algs_info[i].algs_list[j]);
 			if (err)
 				goto err_algs;
@@ -1835,7 +1890,7 @@ err_algs:
 	dev_err(hdev->dev, "Algo %d : %d failed\n", i, j);
 	for (; i--; ) {
 		for (; j--;)
-			crypto_unregister_ahash(
+			crypto_engine_unregister_ahash(
 				&hdev->pdata->algs_info[i].algs_list[j]);
 	}
 
@@ -1848,7 +1903,7 @@ static int stm32_hash_unregister_algs(struct stm32_hash_dev *hdev)
 
 	for (i = 0; i < hdev->pdata->algs_info_size; i++) {
 		for (j = 0; j < hdev->pdata->algs_info[i].size; j++)
-			crypto_unregister_ahash(
+			crypto_engine_unregister_ahash(
 				&hdev->pdata->algs_info[i].algs_list[j]);
 	}
 

From 7a2673d70ca69bb890dc0fec590297f5aa7da739 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:55:16 +0800
Subject: [PATCH 1352/2047] crypto: virtio - Use new crypto_engine_op interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../virtio/virtio_crypto_akcipher_algs.c      | 33 +++++++++++--------
 .../virtio/virtio_crypto_skcipher_algs.c      | 23 +++++++------
 2 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
index ff3369ca319f..2621ff8a9376 100644
--- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
@@ -7,15 +7,16 @@
   * Copyright 2022 Bytedance CO., LTD.
   */
 
-#include <linux/mpi.h>
-#include <linux/scatterlist.h>
-#include <crypto/algapi.h>
+#include <crypto/engine.h>
 #include <crypto/internal/akcipher.h>
 #include <crypto/internal/rsa.h>
-#include <linux/err.h>
 #include <crypto/scatterwalk.h>
-#include <linux/atomic.h>
-
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/mpi.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
 #include <uapi/linux/virtio_crypto.h>
 #include "virtio_crypto_common.h"
 
@@ -24,7 +25,6 @@ struct virtio_crypto_rsa_ctx {
 };
 
 struct virtio_crypto_akcipher_ctx {
-	struct crypto_engine_ctx enginectx;
 	struct virtio_crypto *vcrypto;
 	struct crypto_akcipher *tfm;
 	bool session_valid;
@@ -47,7 +47,7 @@ struct virtio_crypto_akcipher_algo {
 	uint32_t algonum;
 	uint32_t service;
 	unsigned int active_devs;
-	struct akcipher_alg algo;
+	struct akcipher_engine_alg algo;
 };
 
 static DEFINE_MUTEX(algs_lock);
@@ -475,7 +475,6 @@ static int virtio_crypto_rsa_init_tfm(struct crypto_akcipher *tfm)
 	struct virtio_crypto_akcipher_ctx *ctx = akcipher_tfm_ctx(tfm);
 
 	ctx->tfm = tfm;
-	ctx->enginectx.op.do_one_request = virtio_crypto_rsa_do_req;
 
 	akcipher_set_reqsize(tfm,
 			     sizeof(struct virtio_crypto_akcipher_request));
@@ -498,7 +497,7 @@ static struct virtio_crypto_akcipher_algo virtio_crypto_akcipher_algs[] = {
 	{
 		.algonum = VIRTIO_CRYPTO_AKCIPHER_RSA,
 		.service = VIRTIO_CRYPTO_SERVICE_AKCIPHER,
-		.algo = {
+		.algo.base = {
 			.encrypt = virtio_crypto_rsa_encrypt,
 			.decrypt = virtio_crypto_rsa_decrypt,
 			.set_pub_key = virtio_crypto_rsa_raw_set_pub_key,
@@ -514,11 +513,14 @@ static struct virtio_crypto_akcipher_algo virtio_crypto_akcipher_algs[] = {
 				.cra_ctxsize = sizeof(struct virtio_crypto_akcipher_ctx),
 			},
 		},
+		.algo.op = {
+			.do_one_request = virtio_crypto_rsa_do_req,
+		},
 	},
 	{
 		.algonum = VIRTIO_CRYPTO_AKCIPHER_RSA,
 		.service = VIRTIO_CRYPTO_SERVICE_AKCIPHER,
-		.algo = {
+		.algo.base = {
 			.encrypt = virtio_crypto_rsa_encrypt,
 			.decrypt = virtio_crypto_rsa_decrypt,
 			.sign = virtio_crypto_rsa_sign,
@@ -536,6 +538,9 @@ static struct virtio_crypto_akcipher_algo virtio_crypto_akcipher_algs[] = {
 				.cra_ctxsize = sizeof(struct virtio_crypto_akcipher_ctx),
 			},
 		},
+		.algo.op = {
+			.do_one_request = virtio_crypto_rsa_do_req,
+		},
 	},
 };
 
@@ -554,14 +559,14 @@ int virtio_crypto_akcipher_algs_register(struct virtio_crypto *vcrypto)
 			continue;
 
 		if (virtio_crypto_akcipher_algs[i].active_devs == 0) {
-			ret = crypto_register_akcipher(&virtio_crypto_akcipher_algs[i].algo);
+			ret = crypto_engine_register_akcipher(&virtio_crypto_akcipher_algs[i].algo);
 			if (ret)
 				goto unlock;
 		}
 
 		virtio_crypto_akcipher_algs[i].active_devs++;
 		dev_info(&vcrypto->vdev->dev, "Registered akcipher algo %s\n",
-			 virtio_crypto_akcipher_algs[i].algo.base.cra_name);
+			 virtio_crypto_akcipher_algs[i].algo.base.base.cra_name);
 	}
 
 unlock:
@@ -584,7 +589,7 @@ void virtio_crypto_akcipher_algs_unregister(struct virtio_crypto *vcrypto)
 			continue;
 
 		if (virtio_crypto_akcipher_algs[i].active_devs == 1)
-			crypto_unregister_akcipher(&virtio_crypto_akcipher_algs[i].algo);
+			crypto_engine_unregister_akcipher(&virtio_crypto_akcipher_algs[i].algo);
 
 		virtio_crypto_akcipher_algs[i].active_devs--;
 	}
diff --git a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
index 71b8751ab5ab..23c41d87d835 100644
--- a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
@@ -6,19 +6,16 @@
   * Copyright 2016 HUAWEI TECHNOLOGIES CO., LTD.
   */
 
-#include <linux/scatterlist.h>
-#include <crypto/algapi.h>
+#include <crypto/engine.h>
 #include <crypto/internal/skcipher.h>
-#include <linux/err.h>
 #include <crypto/scatterwalk.h>
-#include <linux/atomic.h>
-
+#include <linux/err.h>
+#include <linux/scatterlist.h>
 #include <uapi/linux/virtio_crypto.h>
 #include "virtio_crypto_common.h"
 
 
 struct virtio_crypto_skcipher_ctx {
-	struct crypto_engine_ctx enginectx;
 	struct virtio_crypto *vcrypto;
 	struct crypto_skcipher *tfm;
 
@@ -42,7 +39,7 @@ struct virtio_crypto_algo {
 	uint32_t algonum;
 	uint32_t service;
 	unsigned int active_devs;
-	struct skcipher_alg algo;
+	struct skcipher_engine_alg algo;
 };
 
 /*
@@ -523,7 +520,6 @@ static int virtio_crypto_skcipher_init(struct crypto_skcipher *tfm)
 	crypto_skcipher_set_reqsize(tfm, sizeof(struct virtio_crypto_sym_request));
 	ctx->tfm = tfm;
 
-	ctx->enginectx.op.do_one_request = virtio_crypto_skcipher_crypt_req;
 	return 0;
 }
 
@@ -578,7 +574,7 @@ static void virtio_crypto_skcipher_finalize_req(
 static struct virtio_crypto_algo virtio_crypto_algs[] = { {
 	.algonum = VIRTIO_CRYPTO_CIPHER_AES_CBC,
 	.service = VIRTIO_CRYPTO_SERVICE_CIPHER,
-	.algo = {
+	.algo.base = {
 		.base.cra_name		= "cbc(aes)",
 		.base.cra_driver_name	= "virtio_crypto_aes_cbc",
 		.base.cra_priority	= 150,
@@ -596,6 +592,9 @@ static struct virtio_crypto_algo virtio_crypto_algs[] = { {
 		.max_keysize		= AES_MAX_KEY_SIZE,
 		.ivsize			= AES_BLOCK_SIZE,
 	},
+	.algo.op = {
+		.do_one_request = virtio_crypto_skcipher_crypt_req,
+	},
 } };
 
 int virtio_crypto_skcipher_algs_register(struct virtio_crypto *vcrypto)
@@ -614,14 +613,14 @@ int virtio_crypto_skcipher_algs_register(struct virtio_crypto *vcrypto)
 			continue;
 
 		if (virtio_crypto_algs[i].active_devs == 0) {
-			ret = crypto_register_skcipher(&virtio_crypto_algs[i].algo);
+			ret = crypto_engine_register_skcipher(&virtio_crypto_algs[i].algo);
 			if (ret)
 				goto unlock;
 		}
 
 		virtio_crypto_algs[i].active_devs++;
 		dev_info(&vcrypto->vdev->dev, "Registered algo %s\n",
-			 virtio_crypto_algs[i].algo.base.cra_name);
+			 virtio_crypto_algs[i].algo.base.base.cra_name);
 	}
 
 unlock:
@@ -645,7 +644,7 @@ void virtio_crypto_skcipher_algs_unregister(struct virtio_crypto *vcrypto)
 			continue;
 
 		if (virtio_crypto_algs[i].active_devs == 1)
-			crypto_unregister_skcipher(&virtio_crypto_algs[i].algo);
+			crypto_engine_unregister_skcipher(&virtio_crypto_algs[i].algo);
 
 		virtio_crypto_algs[i].active_devs--;
 	}

From 28f860d377da437695a6db2330dad3f8ca672f8d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:55:18 +0800
Subject: [PATCH 1353/2047] crypto: zynqmp - Use new crypto_engine_op interface

Use the new crypto_engine_op interface where the callback is stored
in the algorithm object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/xilinx/zynqmp-aes-gcm.c | 35 +++++++++++++-------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
index ae7dd82abea1..42fe2b7afef2 100644
--- a/drivers/crypto/xilinx/zynqmp-aes-gcm.c
+++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
@@ -9,13 +9,14 @@
 #include <crypto/gcm.h>
 #include <crypto/internal/aead.h>
 #include <crypto/scatterwalk.h>
-
 #include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/firmware/xlnx-zynqmp.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
-
-#include <linux/firmware/xlnx-zynqmp.h>
+#include <linux/string.h>
 
 #define ZYNQMP_DMA_BIT_MASK	32U
 
@@ -43,7 +44,7 @@ enum zynqmp_aead_keysrc {
 
 struct zynqmp_aead_drv_ctx {
 	union {
-		struct aead_alg aead;
+		struct aead_engine_alg aead;
 	} alg;
 	struct device *dev;
 	struct crypto_engine *engine;
@@ -60,7 +61,6 @@ struct zynqmp_aead_hw_req {
 };
 
 struct zynqmp_aead_tfm_ctx {
-	struct crypto_engine_ctx engine_ctx;
 	struct device *dev;
 	u8 key[ZYNQMP_AES_KEY_SIZE];
 	u8 *iv;
@@ -286,7 +286,7 @@ static int zynqmp_aes_aead_encrypt(struct aead_request *req)
 	struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req);
 
 	rq_ctx->op = ZYNQMP_AES_ENCRYPT;
-	drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead);
+	drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead.base);
 
 	return crypto_transfer_aead_request_to_engine(drv_ctx->engine, req);
 }
@@ -299,7 +299,7 @@ static int zynqmp_aes_aead_decrypt(struct aead_request *req)
 	struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req);
 
 	rq_ctx->op = ZYNQMP_AES_DECRYPT;
-	drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead);
+	drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead.base);
 
 	return crypto_transfer_aead_request_to_engine(drv_ctx->engine, req);
 }
@@ -312,18 +312,16 @@ static int zynqmp_aes_aead_init(struct crypto_aead *aead)
 	struct zynqmp_aead_drv_ctx *drv_ctx;
 	struct aead_alg *alg = crypto_aead_alg(aead);
 
-	drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead);
+	drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead.base);
 	tfm_ctx->dev = drv_ctx->dev;
 
-	tfm_ctx->engine_ctx.op.do_one_request = zynqmp_handle_aes_req;
-
-	tfm_ctx->fbk_cipher = crypto_alloc_aead(drv_ctx->alg.aead.base.cra_name,
+	tfm_ctx->fbk_cipher = crypto_alloc_aead(drv_ctx->alg.aead.base.base.cra_name,
 						0,
 						CRYPTO_ALG_NEED_FALLBACK);
 
 	if (IS_ERR(tfm_ctx->fbk_cipher)) {
 		pr_err("%s() Error: failed to allocate fallback for %s\n",
-		       __func__, drv_ctx->alg.aead.base.cra_name);
+		       __func__, drv_ctx->alg.aead.base.base.cra_name);
 		return PTR_ERR(tfm_ctx->fbk_cipher);
 	}
 
@@ -348,7 +346,7 @@ static void zynqmp_aes_aead_exit(struct crypto_aead *aead)
 }
 
 static struct zynqmp_aead_drv_ctx aes_drv_ctx = {
-	.alg.aead = {
+	.alg.aead.base = {
 		.setkey		= zynqmp_aes_aead_setkey,
 		.setauthsize	= zynqmp_aes_aead_setauthsize,
 		.encrypt	= zynqmp_aes_aead_encrypt,
@@ -370,7 +368,10 @@ static struct zynqmp_aead_drv_ctx aes_drv_ctx = {
 		.cra_ctxsize		= sizeof(struct zynqmp_aead_tfm_ctx),
 		.cra_module		= THIS_MODULE,
 		}
-	}
+	},
+	.alg.aead.op = {
+		.do_one_request = zynqmp_handle_aes_req,
+	},
 };
 
 static int zynqmp_aes_aead_probe(struct platform_device *pdev)
@@ -403,7 +404,7 @@ static int zynqmp_aes_aead_probe(struct platform_device *pdev)
 		goto err_engine;
 	}
 
-	err = crypto_register_aead(&aes_drv_ctx.alg.aead);
+	err = crypto_engine_register_aead(&aes_drv_ctx.alg.aead);
 	if (err < 0) {
 		dev_err(dev, "Failed to register AEAD alg.\n");
 		goto err_aead;
@@ -411,7 +412,7 @@ static int zynqmp_aes_aead_probe(struct platform_device *pdev)
 	return 0;
 
 err_aead:
-	crypto_unregister_aead(&aes_drv_ctx.alg.aead);
+	crypto_engine_unregister_aead(&aes_drv_ctx.alg.aead);
 
 err_engine:
 	if (aes_drv_ctx.engine)
@@ -423,7 +424,7 @@ err_engine:
 static int zynqmp_aes_aead_remove(struct platform_device *pdev)
 {
 	crypto_engine_exit(aes_drv_ctx.engine);
-	crypto_unregister_aead(&aes_drv_ctx.alg.aead);
+	crypto_engine_unregister_aead(&aes_drv_ctx.alg.aead);
 
 	return 0;
 }

From 5ce0bc68e0eeab14fe4dd3be9975c5ced7b20617 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2023 14:55:20 +0800
Subject: [PATCH 1354/2047] crypto: engine - Remove crypto_engine_ctx

Remove the obsolete crypto_engine_ctx structure.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crypto_engine.c  | 12 +++---------
 include/crypto/engine.h |  4 ----
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index abfb1e6bfa48..108d9d55c509 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -79,7 +79,6 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 	unsigned long flags;
 	bool was_busy = false;
 	int ret;
-	struct crypto_engine_ctx *enginectx;
 
 	spin_lock_irqsave(&engine->queue_lock, flags);
 
@@ -154,14 +153,9 @@ start_request:
 				   struct crypto_engine_alg, base);
 		op = &alg->op;
 	} else {
-		enginectx = crypto_tfm_ctx(async_req->tfm);
-		op = &enginectx->op;
-
-		if (!op->do_one_request) {
-			dev_err(engine->dev, "failed to do request\n");
-			ret = -EINVAL;
-			goto req_err_1;
-		}
+		dev_err(engine->dev, "failed to do request\n");
+		ret = -EINVAL;
+		goto req_err_1;
 	}
 
 	ret = op->do_one_request(engine, async_req);
diff --git a/include/crypto/engine.h b/include/crypto/engine.h
index cf57e732566b..2835069c5997 100644
--- a/include/crypto/engine.h
+++ b/include/crypto/engine.h
@@ -26,10 +26,6 @@ struct crypto_engine_op {
 			      void *areq);
 };
 
-struct crypto_engine_ctx {
-	struct crypto_engine_op op;
-};
-
 struct aead_engine_alg {
 	struct aead_alg base;
 	struct crypto_engine_op op;

From 5ae4b0d8875caa44946e579420c7fd5740d58653 Mon Sep 17 00:00:00 2001
From: Giulio Benetti <giulio.benetti@benettiengineering.com>
Date: Fri, 11 Aug 2023 23:48:53 +0200
Subject: [PATCH 1355/2047] mmc: sdhci-esdhc-imx: improve ESDHC_FLAG_ERR010450

Errata ERR010450 only shows up if voltage is 1.8V, but if the device is
supplied by 3v3 the errata can be ignored. So let's check for if quirk
SDHCI_QUIRK2_NO_1_8_V is defined or not before limiting the frequency.

Cc: Jim Reinhart <jimr@tekvox.com>
Cc: James Autry <jautry@tekvox.com>
Cc: Matthew Maron <matthewm@tekvox.com>
Signed-off-by: Giulio Benetti <giulio.benetti@benettiengineering.com>
Acked-by: Haibo Chen <haibo.chen@nxp.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230811214853.8623-1-giulio.benetti@benettiengineering.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index a8bdb3c5ecd5..3b8030f3552a 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -171,8 +171,8 @@
 #define ESDHC_FLAG_HS400		BIT(9)
 /*
  * The IP has errata ERR010450
- * uSDHC: Due to the I/O timing limit, for SDR mode, SD card clock can't
- * exceed 150MHz, for DDR mode, SD card clock can't exceed 45MHz.
+ * uSDHC: At 1.8V due to the I/O timing limit, for SDR mode, SD card
+ * clock can't exceed 150MHz, for DDR mode, SD card clock can't exceed 45MHz.
  */
 #define ESDHC_FLAG_ERR010450		BIT(10)
 /* The IP supports HS400ES mode */
@@ -961,7 +961,8 @@ static inline void esdhc_pltfm_set_clock(struct sdhci_host *host,
 		| ESDHC_CLOCK_MASK);
 	sdhci_writel(host, temp, ESDHC_SYSTEM_CONTROL);
 
-	if (imx_data->socdata->flags & ESDHC_FLAG_ERR010450) {
+	if ((imx_data->socdata->flags & ESDHC_FLAG_ERR010450) &&
+	    (!(host->quirks2 & SDHCI_QUIRK2_NO_1_8_V))) {
 		unsigned int max_clock;
 
 		max_clock = imx_data->is_ddr ? 45000000 : 150000000;

From 661efa2284bbc2338da0424e219603f034072c74 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Tue, 15 Aug 2023 14:15:56 +0100
Subject: [PATCH 1356/2047] pinctrl: renesas: rzg2l: Fix NULL pointer
 dereference in rzg2l_dt_subnode_to_map()

Fix the below random NULL pointer crash during boot by serializing
pinctrl group and function creation/remove calls in
rzg2l_dt_subnode_to_map() with mutex lock.

Crash log:
    pc : __pi_strcmp+0x20/0x140
    lr : pinmux_func_name_to_selector+0x68/0xa4
    Call trace:
    __pi_strcmp+0x20/0x140
    pinmux_generic_add_function+0x34/0xcc
    rzg2l_dt_subnode_to_map+0x314/0x44c
    rzg2l_dt_node_to_map+0x164/0x194
    pinctrl_dt_to_map+0x218/0x37c
    create_pinctrl+0x70/0x3d8

While at it, add comments for bitmap_lock and lock.

Fixes: c4c4637eb57f ("pinctrl: renesas: Add RZ/G2L pin and gpio controller driver")
Tested-by: Chris Paterson <Chris.Paterson2@renesas.com>
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230815131558.33787-2-biju.das.jz@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/pinctrl/renesas/pinctrl-rzg2l.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
index b53d26167da5..6e8a76556e23 100644
--- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c
+++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/seq_file.h>
@@ -149,10 +150,11 @@ struct rzg2l_pinctrl {
 	struct gpio_chip		gpio_chip;
 	struct pinctrl_gpio_range	gpio_range;
 	DECLARE_BITMAP(tint_slot, RZG2L_TINT_MAX_INTERRUPT);
-	spinlock_t			bitmap_lock;
+	spinlock_t			bitmap_lock; /* protect tint_slot bitmap */
 	unsigned int			hwirq[RZG2L_TINT_MAX_INTERRUPT];
 
-	spinlock_t			lock;
+	spinlock_t			lock; /* lock read/write registers */
+	struct mutex			mutex; /* serialize adding groups and functions */
 };
 
 static const unsigned int iolh_groupa_mA[] = { 2, 4, 8, 12 };
@@ -362,11 +364,13 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
 		name = np->name;
 	}
 
+	mutex_lock(&pctrl->mutex);
+
 	/* Register a single pin group listing all the pins we read from DT */
 	gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL);
 	if (gsel < 0) {
 		ret = gsel;
-		goto done;
+		goto unlock;
 	}
 
 	/*
@@ -380,6 +384,8 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
 		goto remove_group;
 	}
 
+	mutex_unlock(&pctrl->mutex);
+
 	maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
 	maps[idx].data.mux.group = name;
 	maps[idx].data.mux.function = name;
@@ -391,6 +397,8 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
 
 remove_group:
 	pinctrl_generic_remove_group(pctldev, gsel);
+unlock:
+	mutex_unlock(&pctrl->mutex);
 done:
 	*index = idx;
 	kfree(configs);
@@ -1509,6 +1517,7 @@ static int rzg2l_pinctrl_probe(struct platform_device *pdev)
 
 	spin_lock_init(&pctrl->lock);
 	spin_lock_init(&pctrl->bitmap_lock);
+	mutex_init(&pctrl->mutex);
 
 	platform_set_drvdata(pdev, pctrl);
 

From f982b9d57e7f834138fc908804fe66f646f2b108 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Tue, 15 Aug 2023 14:15:57 +0100
Subject: [PATCH 1357/2047] pinctrl: renesas: rzv2m: Fix NULL pointer
 dereference in rzv2m_dt_subnode_to_map()

Fix the below random NULL pointer crash during boot by serializing
pinctrl group and function creation/remove calls in
rzv2m_dt_subnode_to_map() with mutex lock.

Crash logs:
    pc : __pi_strcmp+0x20/0x140
    lr : pinmux_func_name_to_selector+0x68/0xa4
    Call trace:
    __pi_strcmp+0x20/0x140
    pinmux_generic_add_function+0x34/0xcc
    rzv2m_dt_subnode_to_map+0x2e4/0x418
    rzv2m_dt_node_to_map+0x15c/0x18c
    pinctrl_dt_to_map+0x218/0x37c
    create_pinctrl+0x70/0x3d8

While at it, add a comment for lock.

Fixes: 92a9b8252576 ("pinctrl: renesas: Add RZ/V2M pin and gpio controller driver")
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230815131558.33787-3-biju.das.jz@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/pinctrl/renesas/pinctrl-rzv2m.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/pinctrl/renesas/pinctrl-rzv2m.c b/drivers/pinctrl/renesas/pinctrl-rzv2m.c
index 35b23c1a5684..9146101ea9e2 100644
--- a/drivers/pinctrl/renesas/pinctrl-rzv2m.c
+++ b/drivers/pinctrl/renesas/pinctrl-rzv2m.c
@@ -14,6 +14,7 @@
 #include <linux/gpio/driver.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/spinlock.h>
 
@@ -123,7 +124,8 @@ struct rzv2m_pinctrl {
 	struct gpio_chip		gpio_chip;
 	struct pinctrl_gpio_range	gpio_range;
 
-	spinlock_t			lock;
+	spinlock_t			lock; /* lock read/write registers */
+	struct mutex			mutex; /* serialize adding groups and functions */
 };
 
 static const unsigned int drv_1_8V_group2_uA[] = { 1800, 3800, 7800, 11000 };
@@ -322,11 +324,13 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
 		name = np->name;
 	}
 
+	mutex_lock(&pctrl->mutex);
+
 	/* Register a single pin group listing all the pins we read from DT */
 	gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL);
 	if (gsel < 0) {
 		ret = gsel;
-		goto done;
+		goto unlock;
 	}
 
 	/*
@@ -340,6 +344,8 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
 		goto remove_group;
 	}
 
+	mutex_unlock(&pctrl->mutex);
+
 	maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
 	maps[idx].data.mux.group = name;
 	maps[idx].data.mux.function = name;
@@ -351,6 +357,8 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
 
 remove_group:
 	pinctrl_generic_remove_group(pctldev, gsel);
+unlock:
+	mutex_unlock(&pctrl->mutex);
 done:
 	*index = idx;
 	kfree(configs);
@@ -1071,6 +1079,7 @@ static int rzv2m_pinctrl_probe(struct platform_device *pdev)
 	}
 
 	spin_lock_init(&pctrl->lock);
+	mutex_init(&pctrl->mutex);
 
 	platform_set_drvdata(pdev, pctrl);
 

From 8fcc1c40b747069644db6102c1d84c942c9d4d86 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Tue, 15 Aug 2023 14:15:58 +0100
Subject: [PATCH 1358/2047] pinctrl: renesas: rza2: Add lock around
 pinctrl_generic{{add,remove}_group,{add,remove}_function}

The pinctrl group and function creation/remove calls expect
caller to take care of locking. Add lock around these functions.

Fixes: b59d0e782706 ("pinctrl: Add RZ/A2 pin and gpio controller")
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230815131558.33787-4-biju.das.jz@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/pinctrl/renesas/pinctrl-rza2.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/pinctrl/renesas/pinctrl-rza2.c b/drivers/pinctrl/renesas/pinctrl-rza2.c
index 40b1326a1077..5591ddf16fdf 100644
--- a/drivers/pinctrl/renesas/pinctrl-rza2.c
+++ b/drivers/pinctrl/renesas/pinctrl-rza2.c
@@ -14,6 +14,7 @@
 #include <linux/gpio/driver.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/pinctrl/pinmux.h>
 
@@ -46,6 +47,7 @@ struct rza2_pinctrl_priv {
 	struct pinctrl_dev *pctl;
 	struct pinctrl_gpio_range gpio_range;
 	int npins;
+	struct mutex mutex; /* serialize adding groups and functions */
 };
 
 #define RZA2_PDR(port)		(0x0000 + (port) * 2)	/* Direction 16-bit */
@@ -358,10 +360,14 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev,
 		psel_val[i] = MUX_FUNC(value);
 	}
 
+	mutex_lock(&priv->mutex);
+
 	/* Register a single pin group listing all the pins we read from DT */
 	gsel = pinctrl_generic_add_group(pctldev, np->name, pins, npins, NULL);
-	if (gsel < 0)
-		return gsel;
+	if (gsel < 0) {
+		ret = gsel;
+		goto unlock;
+	}
 
 	/*
 	 * Register a single group function where the 'data' is an array PSEL
@@ -390,6 +396,8 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev,
 	(*map)->data.mux.function = np->name;
 	*num_maps = 1;
 
+	mutex_unlock(&priv->mutex);
+
 	return 0;
 
 remove_function:
@@ -398,6 +406,9 @@ remove_function:
 remove_group:
 	pinctrl_generic_remove_group(pctldev, gsel);
 
+unlock:
+	mutex_unlock(&priv->mutex);
+
 	dev_err(priv->dev, "Unable to parse DT node %s\n", np->name);
 
 	return ret;
@@ -473,6 +484,8 @@ static int rza2_pinctrl_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->base))
 		return PTR_ERR(priv->base);
 
+	mutex_init(&priv->mutex);
+
 	platform_set_drvdata(pdev, priv);
 
 	priv->npins = (int)(uintptr_t)of_device_get_match_data(&pdev->dev) *

From 4240e2ebe67941ce2c4f5c866c3af4b5ac7a0c67 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Mon, 14 Aug 2023 15:08:53 -0500
Subject: [PATCH 1359/2047] x86/MCE: Always save CS register on AMD Zen IF
 Poison errors

The Instruction Fetch (IF) units on current AMD Zen-based systems do not
guarantee a synchronous #MC is delivered for poison consumption errors.
Therefore, MCG_STATUS[EIPV|RIPV] will not be set. However, the
microarchitecture does guarantee that the exception is delivered within
the same context. In other words, the exact rIP is not known, but the
context is known to not have changed.

There is no architecturally-defined method to determine this behavior.

The Code Segment (CS) register is always valid on such IF unit poison
errors regardless of the value of MCG_STATUS[EIPV|RIPV].

Add a quirk to save the CS register for poison consumption from the IF
unit banks.

This is needed to properly determine the context of the error.
Otherwise, the severity grading function will assume the context is
IN_KERNEL due to the m->cs value being 0 (the initialized value). This
leads to unnecessary kernel panics on data poison errors due to the
kernel believing the poison consumption occurred in kernel context.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20230814200853.29258-1-yazen.ghannam@amd.com
---
 arch/x86/kernel/cpu/mce/core.c     | 26 ++++++++++++++++++++++++++
 arch/x86/kernel/cpu/mce/internal.h |  5 ++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b8ad5a5b4026..6f35f724cc14 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -842,6 +842,26 @@ static noinstr bool quirk_skylake_repmov(void)
 	return false;
 }
 
+/*
+ * Some Zen-based Instruction Fetch Units set EIPV=RIPV=0 on poison consumption
+ * errors. This means mce_gather_info() will not save the "ip" and "cs" registers.
+ *
+ * However, the context is still valid, so save the "cs" register for later use.
+ *
+ * The "ip" register is truly unknown, so don't save it or fixup EIPV/RIPV.
+ *
+ * The Instruction Fetch Unit is at MCA bank 1 for all affected systems.
+ */
+static __always_inline void quirk_zen_ifu(int bank, struct mce *m, struct pt_regs *regs)
+{
+	if (bank != 1)
+		return;
+	if (!(m->status & MCI_STATUS_POISON))
+		return;
+
+	m->cs = regs->cs;
+}
+
 /*
  * Do a quick check if any of the events requires a panic.
  * This decides if we keep the events around or clear them.
@@ -861,6 +881,9 @@ static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned lo
 		if (mce_flags.snb_ifu_quirk)
 			quirk_sandybridge_ifu(i, m, regs);
 
+		if (mce_flags.zen_ifu_quirk)
+			quirk_zen_ifu(i, m, regs);
+
 		m->bank = i;
 		if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) {
 			mce_read_aux(m, i);
@@ -1849,6 +1872,9 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
 		if (c->x86 == 0x15 && c->x86_model <= 0xf)
 			mce_flags.overflow_recov = 1;
 
+		if (c->x86 >= 0x17 && c->x86 <= 0x1A)
+			mce_flags.zen_ifu_quirk = 1;
+
 	}
 
 	if (c->x86_vendor == X86_VENDOR_INTEL) {
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index ed4a71c0f093..bcf1b3c66c9c 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -157,6 +157,9 @@ struct mce_vendor_flags {
 	 */
 	smca			: 1,
 
+	/* Zen IFU quirk */
+	zen_ifu_quirk		: 1,
+
 	/* AMD-style error thresholding banks present. */
 	amd_threshold		: 1,
 
@@ -172,7 +175,7 @@ struct mce_vendor_flags {
 	/* Skylake, Cascade Lake, Cooper Lake REP;MOVS* quirk */
 	skx_repmov_quirk	: 1,
 
-	__reserved_0		: 56;
+	__reserved_0		: 55;
 };
 
 extern struct mce_vendor_flags mce_flags;

From 35acbdfaf17c94d64ee336282f21b2981676748a Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Fri, 18 Aug 2023 20:42:27 +0800
Subject: [PATCH 1360/2047] regulator: db8500-prcmu: Remove unused declaration
 power_state_active_is_enabled()

Commit 38e968380b27 ("regulators/db8500: split off shared dbx500 code")
removed this but not its declaration.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Link: https://lore.kernel.org/r/20230818124227.15084-1-yuehaibing@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/db8500-prcmu.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/linux/regulator/db8500-prcmu.h b/include/linux/regulator/db8500-prcmu.h
index f90df9ee703e..d58ff273157e 100644
--- a/include/linux/regulator/db8500-prcmu.h
+++ b/include/linux/regulator/db8500-prcmu.h
@@ -35,10 +35,4 @@ enum db8500_regulator_id {
 	DB8500_NUM_REGULATORS
 };
 
-/*
- * Exported interface for CPUIdle only. This function is called with all
- * interrupts turned off.
- */
-int power_state_active_is_enabled(void);
-
 #endif

From 386cb81e4ba7811573765aaaeb91b472639c2bae Mon Sep 17 00:00:00 2001
From: Holger Dengler <dengler@linux.ibm.com>
Date: Fri, 11 Aug 2023 16:56:20 +0200
Subject: [PATCH 1361/2047] s390/zcrypt_ep11misc: support API ordinal 6 with
 empty pin-blob

Secure execution guest environments require an empty pinblob in all
key generation and unwrap requests. Empty pinblobs are only available
in EP11 API ordinal 6 or higher.

Add an empty pinblob to key generation and unwrap requests, if the AP
secure binding facility is available. In all other cases, stay with
the empty pin tag (no pinblob) and the current API ordinals.

The EP11 API ordinal also needs to be considered when the pkey module
tries to figure out the list of eligible cards for key operations
with protected keys in secure execution environment.

These changes are transparent to userspace but required for running
an secure execution guest with handling key generate and key derive
(e.g. secure key to protected key) correct. Especially using EP11
secure keys with the kernel dm-crypt layer requires this patch.

Co-developed-by: Harald Freudenberger <freude@linux.ibm.com>
Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Signed-off-by: Holger Dengler <dengler@linux.ibm.com>
Reviewed-by: Ingo Franzki <ifranzki@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/ap_bus.c          |  9 ++++
 drivers/s390/crypto/ap_bus.h          |  1 +
 drivers/s390/crypto/pkey_api.c        | 27 ++++++++----
 drivers/s390/crypto/zcrypt_ep11misc.c | 60 ++++++++++++++++++++-------
 drivers/s390/crypto/zcrypt_ep11misc.h |  4 +-
 5 files changed, 76 insertions(+), 25 deletions(-)

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index b1d2fedea086..339812efe822 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -218,6 +218,15 @@ int ap_sb_available(void)
 	return 0;
 }
 
+/*
+ * ap_is_se_guest(): Check for SE guest with AP pass-through support.
+ */
+bool ap_is_se_guest(void)
+{
+	return is_prot_virt_guest() && ap_sb_available();
+}
+EXPORT_SYMBOL(ap_is_se_guest);
+
 /*
  * ap_fetch_qci_info(): Fetch cryptographic config info
  *
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 47bbe9babc59..be54b070c031 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -274,6 +274,7 @@ void ap_flush_queue(struct ap_queue *aq);
 
 void *ap_airq_ptr(void);
 int ap_sb_available(void);
+bool ap_is_se_guest(void);
 void ap_wait(enum ap_sm_wait wait);
 void ap_request_timeout(struct timer_list *t);
 void ap_bus_force_rescan(void);
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 396a159afdf5..6cfb6b2340c9 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -263,7 +263,9 @@ static int pkey_clr2ep11key(const u8 *clrkey, size_t clrkeylen,
 
 	/* build a list of apqns suitable for ep11 keys with cpacf support */
 	rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF,
-			    ZCRYPT_CEX7, EP11_API_V, NULL);
+			    ZCRYPT_CEX7,
+			    ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4,
+			    NULL);
 	if (rc)
 		goto out;
 
@@ -299,7 +301,8 @@ static int pkey_ep11key2pkey(const u8 *key, size_t keylen,
 
 	/* build a list of apqns suitable for this key */
 	rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF,
-			    ZCRYPT_CEX7, EP11_API_V,
+			    ZCRYPT_CEX7,
+			    ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4,
 			    ep11_kb_wkvp(key, keylen));
 	if (rc)
 		goto out;
@@ -902,6 +905,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
 	} else if (hdr->type == TOKTYPE_NON_CCA &&
 		   hdr->version == TOKVER_EP11_AES) {
 		struct ep11keyblob *kb = (struct ep11keyblob *)key;
+		int api;
 
 		rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1);
 		if (rc)
@@ -911,8 +915,9 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
 		if (ksize)
 			*ksize = kb->head.bitlen;
 
+		api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
 		rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain,
-				    ZCRYPT_CEX7, EP11_API_V,
+				    ZCRYPT_CEX7, api,
 				    ep11_kb_wkvp(key, keylen));
 		if (rc)
 			goto out;
@@ -926,6 +931,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
 	} else if (hdr->type == TOKTYPE_NON_CCA &&
 		   hdr->version == TOKVER_EP11_AES_WITH_HEADER) {
 		struct ep11kblob_header *kh = (struct ep11kblob_header *)key;
+		int api;
 
 		rc = ep11_check_aes_key_with_hdr(debug_info, 3,
 						 key, keylen, 1);
@@ -936,8 +942,9 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
 		if (ksize)
 			*ksize = kh->bitlen;
 
+		api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
 		rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain,
-				    ZCRYPT_CEX7, EP11_API_V,
+				    ZCRYPT_CEX7, api,
 				    ep11_kb_wkvp(key, keylen));
 		if (rc)
 			goto out;
@@ -1056,7 +1063,7 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags,
 			return -EINVAL;
 		if (kb->attr & EP11_BLOB_PKEY_EXTRACTABLE) {
 			minhwtype = ZCRYPT_CEX7;
-			api = EP11_API_V;
+			api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
 		}
 		rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
 				    minhwtype, api, kb->wkvp);
@@ -1072,7 +1079,7 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags,
 			return -EINVAL;
 		if (kb->attr & EP11_BLOB_PKEY_EXTRACTABLE) {
 			minhwtype = ZCRYPT_CEX7;
-			api = EP11_API_V;
+			api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
 		}
 		rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
 				    minhwtype, api, kb->wkvp);
@@ -1182,11 +1189,13 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype,
 		   ktype == PKEY_TYPE_EP11_AES ||
 		   ktype == PKEY_TYPE_EP11_ECC) {
 		u8 *wkvp = NULL;
+		int api;
 
 		if (flags & PKEY_FLAGS_MATCH_CUR_MKVP)
 			wkvp = cur_mkvp;
+		api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
 		rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
-				    ZCRYPT_CEX7, EP11_API_V, wkvp);
+				    ZCRYPT_CEX7, api, wkvp);
 		if (rc)
 			goto out;
 
@@ -2160,7 +2169,9 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits,
 
 	/* build a list of apqns able to generate an cipher key */
 	rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF,
-			    ZCRYPT_CEX7, EP11_API_V, NULL);
+			    ZCRYPT_CEX7,
+			    ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4,
+			    NULL);
 	if (rc)
 		return rc;
 
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c
index 669ad6f5d5b0..0a877f9792c2 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.c
+++ b/drivers/s390/crypto/zcrypt_ep11misc.c
@@ -29,6 +29,8 @@
 #define DEBUG_WARN(...) ZCRYPT_DBF(DBF_WARN, ##__VA_ARGS__)
 #define DEBUG_ERR(...)	ZCRYPT_DBF(DBF_ERR, ##__VA_ARGS__)
 
+#define EP11_PINBLOB_V1_BYTES 56
+
 /* default iv used here */
 static const u8 def_iv[16] = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
 			       0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff };
@@ -592,7 +594,7 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
 	struct ep11_cprb *req = NULL, *rep = NULL;
 	struct ep11_target_dev target;
 	struct ep11_urb *urb = NULL;
-	int api = 1, rc = -ENOMEM;
+	int api = EP11_API_V1, rc = -ENOMEM;
 
 	/* request cprb and payload */
 	req = alloc_cprb(sizeof(struct ep11_info_req_pl));
@@ -789,8 +791,7 @@ static int _ep11_genaeskey(u16 card, u16 domain,
 		u32 attr_bool_bits;
 		u32 attr_val_len_type;
 		u32 attr_val_len_value;
-		u8  pin_tag;
-		u8  pin_len;
+		/* followed by empty pin tag or empty pinblob tag */
 	} __packed * req_pl;
 	struct keygen_rep_pl {
 		struct pl_head head;
@@ -803,9 +804,11 @@ static int _ep11_genaeskey(u16 card, u16 domain,
 		u8  data[512];
 	} __packed * rep_pl;
 	struct ep11_cprb *req = NULL, *rep = NULL;
+	size_t req_pl_size, pinblob_size = 0;
 	struct ep11_target_dev target;
 	struct ep11_urb *urb = NULL;
 	int api, rc = -ENOMEM;
+	u8 *p;
 
 	switch (keybitsize) {
 	case 128:
@@ -821,12 +824,22 @@ static int _ep11_genaeskey(u16 card, u16 domain,
 	}
 
 	/* request cprb and payload */
-	req = alloc_cprb(sizeof(struct keygen_req_pl));
+	api = (!keygenflags || keygenflags & 0x00200000) ?
+		EP11_API_V4 : EP11_API_V1;
+	if (ap_is_se_guest()) {
+		/*
+		 * genkey within SE environment requires API ordinal 6
+		 * with empty pinblob
+		 */
+		api = EP11_API_V6;
+		pinblob_size = EP11_PINBLOB_V1_BYTES;
+	}
+	req_pl_size = sizeof(struct keygen_req_pl) + ASN1TAGLEN(pinblob_size);
+	req = alloc_cprb(req_pl_size);
 	if (!req)
 		goto out;
 	req_pl = (struct keygen_req_pl *)(((u8 *)req) + sizeof(*req));
-	api = (!keygenflags || keygenflags & 0x00200000) ? 4 : 1;
-	prep_head(&req_pl->head, sizeof(*req_pl), api, 21); /* GenerateKey */
+	prep_head(&req_pl->head, req_pl_size, api, 21); /* GenerateKey */
 	req_pl->var_tag = 0x04;
 	req_pl->var_len = sizeof(u32);
 	req_pl->keybytes_tag = 0x04;
@@ -842,7 +855,10 @@ static int _ep11_genaeskey(u16 card, u16 domain,
 	req_pl->attr_bool_bits = keygenflags ? keygenflags : KEY_ATTR_DEFAULTS;
 	req_pl->attr_val_len_type = 0x00000161; /* CKA_VALUE_LEN */
 	req_pl->attr_val_len_value = keybitsize / 8;
-	req_pl->pin_tag = 0x04;
+	p = ((u8 *)req_pl) + sizeof(*req_pl);
+	/* pin tag */
+	*p++ = 0x04;
+	*p++ = pinblob_size;
 
 	/* reply cprb and payload */
 	rep = alloc_cprb(sizeof(struct keygen_rep_pl));
@@ -857,7 +873,7 @@ static int _ep11_genaeskey(u16 card, u16 domain,
 	target.ap_id = card;
 	target.dom_id = domain;
 	prep_urb(urb, &target, 1,
-		 req, sizeof(*req) + sizeof(*req_pl),
+		 req, sizeof(*req) + req_pl_size,
 		 rep, sizeof(*rep) + sizeof(*rep_pl));
 
 	rc = zcrypt_send_ep11_cprb(urb);
@@ -965,7 +981,7 @@ static int ep11_cryptsingle(u16 card, u16 domain,
 	struct ep11_target_dev target;
 	struct ep11_urb *urb = NULL;
 	size_t req_pl_size, rep_pl_size;
-	int n, api = 1, rc = -ENOMEM;
+	int n, api = EP11_API_V1, rc = -ENOMEM;
 	u8 *p;
 
 	/* the simple asn1 coding used has length limits */
@@ -1084,7 +1100,7 @@ static int _ep11_unwrapkey(u16 card, u16 domain,
 		 * maybe followed by iv data
 		 * followed by kek tag + kek blob
 		 * followed by empty mac tag
-		 * followed by empty pin tag
+		 * followed by empty pin tag or empty pinblob tag
 		 * followed by encryted key tag + bytes
 		 */
 	} __packed * req_pl;
@@ -1099,20 +1115,30 @@ static int _ep11_unwrapkey(u16 card, u16 domain,
 		u8  data[512];
 	} __packed * rep_pl;
 	struct ep11_cprb *req = NULL, *rep = NULL;
+	size_t req_pl_size, pinblob_size = 0;
 	struct ep11_target_dev target;
 	struct ep11_urb *urb = NULL;
-	size_t req_pl_size;
 	int api, rc = -ENOMEM;
 	u8 *p;
 
 	/* request cprb and payload */
+	api = (!keygenflags || keygenflags & 0x00200000) ?
+		EP11_API_V4 : EP11_API_V1;
+	if (ap_is_se_guest()) {
+		/*
+		 * unwrap within SE environment requires API ordinal 6
+		 * with empty pinblob
+		 */
+		api = EP11_API_V6;
+		pinblob_size = EP11_PINBLOB_V1_BYTES;
+	}
 	req_pl_size = sizeof(struct uw_req_pl) + (iv ? 16 : 0)
-		+ ASN1TAGLEN(keksize) + 4 + ASN1TAGLEN(enckeysize);
+		+ ASN1TAGLEN(keksize) + ASN1TAGLEN(0)
+		+ ASN1TAGLEN(pinblob_size) + ASN1TAGLEN(enckeysize);
 	req = alloc_cprb(req_pl_size);
 	if (!req)
 		goto out;
 	req_pl = (struct uw_req_pl *)(((u8 *)req) + sizeof(*req));
-	api = (!keygenflags || keygenflags & 0x00200000) ? 4 : 1;
 	prep_head(&req_pl->head, req_pl_size, api, 34); /* UnwrapKey */
 	req_pl->attr_tag = 0x04;
 	req_pl->attr_len = 7 * sizeof(u32);
@@ -1137,9 +1163,10 @@ static int _ep11_unwrapkey(u16 card, u16 domain,
 	/* empty mac key tag */
 	*p++ = 0x04;
 	*p++ = 0;
-	/* empty pin tag */
+	/* pin tag */
 	*p++ = 0x04;
-	*p++ = 0;
+	*p++ = pinblob_size;
+	p += pinblob_size;
 	/* encrypted key value tag and bytes */
 	p += asn1tag_write(p, 0x04, enckey, enckeysize);
 
@@ -1275,7 +1302,8 @@ static int _ep11_wrapkey(u16 card, u16 domain,
 	if (!mech || mech == 0x80060001)
 		req->flags |= 0x20; /* CPACF_WRAP needs special bit */
 	req_pl = (struct wk_req_pl *)(((u8 *)req) + sizeof(*req));
-	api = (!mech || mech == 0x80060001) ? 4 : 1; /* CKM_IBM_CPACF_WRAP */
+	api = (!mech || mech == 0x80060001) ? /* CKM_IBM_CPACF_WRAP */
+		EP11_API_V4 : EP11_API_V1;
 	prep_head(&req_pl->head, req_pl_size, api, 33); /* WrapKey */
 	req_pl->var_tag = 0x04;
 	req_pl->var_len = sizeof(u32);
diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h
index a0de1cccebbe..9d17fd5228a7 100644
--- a/drivers/s390/crypto/zcrypt_ep11misc.h
+++ b/drivers/s390/crypto/zcrypt_ep11misc.h
@@ -12,7 +12,9 @@
 #include <asm/zcrypt.h>
 #include <asm/pkey.h>
 
-#define EP11_API_V 4  /* highest known and supported EP11 API version */
+#define EP11_API_V1 1  /* min EP11 API, default if no higher api required */
+#define EP11_API_V4 4  /* supported EP11 API for the ep11misc cprbs */
+#define EP11_API_V6 6  /* min EP11 API for some cprbs in SE environment */
 #define EP11_STRUCT_MAGIC 0x1234
 #define EP11_BLOB_PKEY_EXTRACTABLE 0x00200000
 

From 979fe44af819d76fc02a5a97ad1cb74f7d83578e Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Wed, 16 Aug 2023 15:29:42 +0200
Subject: [PATCH 1362/2047] s390/ipl: fix virtual vs physical address confusion

The value of ipl_cert_list_addr boot variable contains
a physical address, which is used directly. That works
because virtual and physical address spaces are currently
the same, but otherwise it is wrong.

While at it, fix also a comment for the platform keyring.

Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Link: https://lore.kernel.org/r/20230816132942.2540411-1-agordeev@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/machine_kexec_file.c             | 4 ++--
 arch/s390/kernel/setup.c                          | 2 +-
 security/integrity/platform_certs/load_ipl_s390.c | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 2df94d32140c..8d207b82d9fe 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -188,7 +188,7 @@ static int kexec_file_add_ipl_report(struct kimage *image,
 	data->memsz = ALIGN(data->memsz, PAGE_SIZE);
 	buf.mem = data->memsz;
 
-	ptr = (void *)ipl_cert_list_addr;
+	ptr = __va(ipl_cert_list_addr);
 	end = ptr + ipl_cert_list_size;
 	ncerts = 0;
 	while (ptr < end) {
@@ -200,7 +200,7 @@ static int kexec_file_add_ipl_report(struct kimage *image,
 
 	addr = data->memsz + data->report->size;
 	addr += ncerts * sizeof(struct ipl_rb_certificate_entry);
-	ptr = (void *)ipl_cert_list_addr;
+	ptr = __va(ipl_cert_list_addr);
 	while (ptr < end) {
 		len = *(unsigned int *)ptr;
 		ptr += sizeof(len);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 393dd8385506..c744104e4a9c 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -875,7 +875,7 @@ static void __init log_component_list(void)
 		pr_info("Linux is running with Secure-IPL enabled\n");
 	else
 		pr_info("Linux is running with Secure-IPL disabled\n");
-	ptr = (void *) early_ipl_comp_list_addr;
+	ptr = __va(early_ipl_comp_list_addr);
 	end = (void *) ptr + early_ipl_comp_list_size;
 	pr_info("The IPL report contains the following components:\n");
 	while (ptr < end) {
diff --git a/security/integrity/platform_certs/load_ipl_s390.c b/security/integrity/platform_certs/load_ipl_s390.c
index e769dcb7ea94..c7c381a9ddaa 100644
--- a/security/integrity/platform_certs/load_ipl_s390.c
+++ b/security/integrity/platform_certs/load_ipl_s390.c
@@ -22,8 +22,8 @@ static int __init load_ipl_certs(void)
 
 	if (!ipl_cert_list_addr)
 		return 0;
-	/* Copy the certificates to the system keyring */
-	ptr = (void *) ipl_cert_list_addr;
+	/* Copy the certificates to the platform keyring */
+	ptr = __va(ipl_cert_list_addr);
 	end = ptr + ipl_cert_list_size;
 	while ((void *) ptr < end) {
 		len = *(unsigned int *) ptr;

From cfd012107f11ec4af010f11eca341edc831abf6c Mon Sep 17 00:00:00 2001
From: Justin Stitt <justinstitt@google.com>
Date: Fri, 11 Aug 2023 21:56:15 +0000
Subject: [PATCH 1363/2047] s390/ipl: refactor deprecated strncpy

`strncpy` is deprecated for use on NUL-terminated destination strings [1].

Use `strscpy` which has the same behavior as `strncpy` here with the
extra safeguard of guaranteeing NUL-termination of destination
strings.  In it's current form, this may result in silent truncation
if the src string has the same size as the destination string.

[hca@linux.ibm.com: use strscpy() instead of strscpy_pad()]
Link: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings[1]
Link: https://github.com/KSPP/linux/issues/90
Cc: linux-hardening@vger.kernel.org
Signed-off-by: Justin Stitt <justinstitt@google.com>
Link: https://lore.kernel.org/r/20230811-arch-s390-kernel-v1-1-7edbeeab3809@google.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/ipl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 7f3a84e414a4..05e51666db03 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -266,7 +266,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
 		struct kobj_attribute *attr,				\
 		const char *buf, size_t len)				\
 {									\
-	strncpy(_value, buf, sizeof(_value) - 1);			\
+	strscpy(_value, buf, sizeof(_value));				\
 	strim(_value);							\
 	return len;							\
 }									\

From 680b7ddd7e2ab7638d431722432f6d02d75dade1 Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Tue, 15 Aug 2023 14:43:22 -0400
Subject: [PATCH 1364/2047] s390/vfio-ap: no need to check the 'E' and 'I' bits
 in APQSW after TAPQ

After a ZAPQ is executed to reset a queue, if the queue is not empty or
interrupts are still enabled, the vfio_ap driver will wait for the reset
operation to complete by repeatedly executing the TAPQ instruction and
checking the 'E' and 'I' bits in the APQSW to verify that the queue is
empty and interrupts are disabled. This is unnecessary because it is
sufficient to check only the response code in the APQSW. If the reset is
still in progress, the response code will be 02; however, if the reset has
completed successfully, the response code will be 00.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Acked-by: Janosch Frank <frankja@linux.ibm.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815184333.6554-2-akrowiak@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index b441745b0418..3fd80533194b 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1608,19 +1608,10 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
 {
 	switch (status->response_code) {
 	case AP_RESPONSE_NORMAL:
-	case AP_RESPONSE_RESET_IN_PROGRESS:
-		if (status->queue_empty && !status->irq_enabled)
-			return 0;
-		return -EBUSY;
 	case AP_RESPONSE_DECONFIGURED:
-		/*
-		 * If the AP queue is deconfigured, any subsequent AP command
-		 * targeting the queue will fail with the same response code. On the
-		 * other hand, when an AP adapter is deconfigured, the associated
-		 * queues are reset, so let's return a value indicating the reset
-		 * for which we're waiting completed successfully.
-		 */
 		return 0;
+	case AP_RESPONSE_RESET_IN_PROGRESS:
+		return -EBUSY;
 	default:
 		WARN(true,
 		     "failed to verify reset of queue %02x.%04x: TAPQ rc=%u\n",

From 7aa7b2a80cb70d528785f06a54d6c8148826006d Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Tue, 15 Aug 2023 14:43:23 -0400
Subject: [PATCH 1365/2047] s390/vfio-ap: clean up irq resources if possible

The architecture does not specify whether interrupts are disabled as part
of the asynchronous reset or upon return from the PQAP/ZAPQ instruction.
If, however, PQAP/ZAPQ completes with APQSW response code 0 and the
interrupt bit in the status word is also 0, we know the interrupts are
disabled and we can go ahead and clean up the corresponding resources;
otherwise, we must wait until the asynchronous reset has completed.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Suggested-by: Halil Pasic <pasic@linux.ibm.com>
Reviewed-by: Jason J. Herne <jjherne@linux.ibm.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Janosch Frank <frankja@linux.ibm.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815184333.6554-3-akrowiak@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 3fd80533194b..be92ba45226d 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1654,9 +1654,13 @@ retry_zapq:
 	switch (status.response_code) {
 	case AP_RESPONSE_NORMAL:
 		ret = 0;
-		/* if the reset has not completed, wait for it to take effect */
-		if (!status.queue_empty || status.irq_enabled)
+		if (!status.irq_enabled)
+			vfio_ap_free_aqic_resources(q);
+		if (!status.queue_empty || status.irq_enabled) {
 			ret = apq_reset_check(q);
+			if (status.irq_enabled && ret == 0)
+				vfio_ap_free_aqic_resources(q);
+		}
 		break;
 	case AP_RESPONSE_RESET_IN_PROGRESS:
 		/*
@@ -1675,6 +1679,7 @@ retry_zapq:
 		 * completed successfully.
 		 */
 		ret = 0;
+		vfio_ap_free_aqic_resources(q);
 		break;
 	default:
 		WARN(true,
@@ -1684,8 +1689,6 @@ retry_zapq:
 		return -EIO;
 	}
 
-	vfio_ap_free_aqic_resources(q);
-
 	return ret;
 }
 

From 411b0109daa52d1cc5be39635631e22a5590c5d8 Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Tue, 15 Aug 2023 14:43:24 -0400
Subject: [PATCH 1366/2047] s390/vfio-ap: wait for response code 05 to clear on
 queue reset

Response code 05, AP busy, is a valid response code for a ZAPQ or TAPQ.
Instead of returning error -EIO when a ZAPQ fails with response code 05,
let's wait until the queue is no longer busy and try the ZAPQ again.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Acked-by: Janosch Frank <frankja@linux.ibm.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815184333.6554-4-akrowiak@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index be92ba45226d..3f67cfb53d0c 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1611,6 +1611,7 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
 	case AP_RESPONSE_DECONFIGURED:
 		return 0;
 	case AP_RESPONSE_RESET_IN_PROGRESS:
+	case AP_RESPONSE_BUSY:
 		return -EBUSY;
 	default:
 		WARN(true,
@@ -1663,6 +1664,7 @@ retry_zapq:
 		}
 		break;
 	case AP_RESPONSE_RESET_IN_PROGRESS:
+	case AP_RESPONSE_BUSY:
 		/*
 		 * There is a reset issued by another process in progress. Let's wait
 		 * for that to complete. Since we have no idea whether it was a RAPQ or

From c51f8c6bb5c8a4878310d55e3a0b91747954b43d Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Tue, 15 Aug 2023 14:43:25 -0400
Subject: [PATCH 1367/2047] s390/vfio-ap: allow deconfigured queue to be passed
 through to a guest

When a queue is reset, the status response code returned from the reset
operation is stored in the reset_rc field of the vfio_ap_queue structure
representing the queue being reset. This field is later used to decide
whether the queue should be passed through to a guest. If the reset_rc
field is a non-zero value, the queue will be filtered from the list of
queues passed through.

When an adapter is deconfigured, all queues associated with that adapter
are reset. That being the case, it is not necessary to filter those queues;
so, if the status response code returned from a reset operation indicates
the queue is deconfigured, the reset_rc field of the vfio_ap_queue
structure will be set to zero so it will be passed through (i.e., not
filtered).

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Jason J. Herne <jjherne@linux.ibm.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815184333.6554-5-akrowiak@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 3f67cfb53d0c..a489536c508a 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1677,9 +1677,11 @@ retry_zapq:
 	case AP_RESPONSE_DECONFIGURED:
 		/*
 		 * When an AP adapter is deconfigured, the associated
-		 * queues are reset, so let's return a value indicating the reset
-		 * completed successfully.
+		 * queues are reset, so let's set the status response code to 0
+		 * so the queue may be passed through (i.e., not filtered) and
+		 * return a value indicating the reset completed successfully.
 		 */
+		q->reset_rc = 0;
 		ret = 0;
 		vfio_ap_free_aqic_resources(q);
 		break;

From dd174833e44e7717f88f0925b1f78e9ba1d2626e Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Tue, 15 Aug 2023 14:43:26 -0400
Subject: [PATCH 1368/2047] s390/vfio-ap: remove upper limit on wait for queue
 reset to complete

The architecture does not define an upper limit on how long a queue reset
(RAPQ/ZAPQ) can take to complete. In order to ensure both the security
requirements and prevent resource leakage and corruption in the hypervisor,
it is necessary to remove the upper limit (200ms) the vfio_ap driver
currently waits for a reset to complete. This, of course, may result in a
hang which is a less than desirable user experience, but until a firmware
solution is provided, this is a necessary evil.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Jason J. Herne <jjherne@linux.ibm.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815184333.6554-6-akrowiak@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 64 +++++++++++++++++--------------
 1 file changed, 35 insertions(+), 29 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index a489536c508a..2517868aad56 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -30,7 +30,6 @@
 #define AP_QUEUE_UNASSIGNED "unassigned"
 #define AP_QUEUE_IN_USE "in use"
 
-#define MAX_RESET_CHECK_WAIT	200	/* Sleep max 200ms for reset check	*/
 #define AP_RESET_INTERVAL		20	/* Reset sleep interval (20ms)		*/
 
 static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable);
@@ -1622,58 +1621,66 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
 	}
 }
 
+#define WAIT_MSG "Waited %dms for reset of queue %02x.%04x (%u, %u, %u)"
+
 static int apq_reset_check(struct vfio_ap_queue *q)
 {
-	int ret;
-	int iters = MAX_RESET_CHECK_WAIT / AP_RESET_INTERVAL;
+	int ret = -EBUSY, elapsed = 0;
 	struct ap_queue_status status;
 
-	for (; iters > 0; iters--) {
+	while (true) {
 		msleep(AP_RESET_INTERVAL);
+		elapsed += AP_RESET_INTERVAL;
 		status = ap_tapq(q->apqn, NULL);
 		ret = apq_status_check(q->apqn, &status);
-		if (ret != -EBUSY)
+		if (ret == -EIO)
 			return ret;
+		if (ret == -EBUSY) {
+			pr_notice_ratelimited(WAIT_MSG, elapsed,
+					      AP_QID_CARD(q->apqn),
+					      AP_QID_QUEUE(q->apqn),
+					      status.response_code,
+					      status.queue_empty,
+					      status.irq_enabled);
+		} else {
+			if (q->reset_rc == AP_RESPONSE_RESET_IN_PROGRESS ||
+			    q->reset_rc == AP_RESPONSE_BUSY) {
+				status = ap_zapq(q->apqn, 0);
+				q->reset_rc = status.response_code;
+				continue;
+			}
+			/*
+			 * When an AP adapter is deconfigured, the associated
+			 * queues are reset, so let's set the status response
+			 * code to 0 so the queue may be passed through (i.e.,
+			 * not filtered).
+			 */
+			if (q->reset_rc == AP_RESPONSE_DECONFIGURED)
+				q->reset_rc = 0;
+			if (q->saved_isc != VFIO_AP_ISC_INVALID)
+				vfio_ap_free_aqic_resources(q);
+			break;
+		}
 	}
-	WARN_ONCE(iters <= 0,
-		  "timeout verifying reset of queue %02x.%04x (%u, %u, %u)",
-		  AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
-		  status.queue_empty, status.irq_enabled, status.response_code);
 	return ret;
 }
 
 static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
 {
 	struct ap_queue_status status;
-	int ret;
+	int ret = 0;
 
 	if (!q)
 		return 0;
-retry_zapq:
 	status = ap_zapq(q->apqn, 0);
 	q->reset_rc = status.response_code;
 	switch (status.response_code) {
 	case AP_RESPONSE_NORMAL:
-		ret = 0;
-		if (!status.irq_enabled)
-			vfio_ap_free_aqic_resources(q);
-		if (!status.queue_empty || status.irq_enabled) {
-			ret = apq_reset_check(q);
-			if (status.irq_enabled && ret == 0)
-				vfio_ap_free_aqic_resources(q);
-		}
-		break;
 	case AP_RESPONSE_RESET_IN_PROGRESS:
 	case AP_RESPONSE_BUSY:
-		/*
-		 * There is a reset issued by another process in progress. Let's wait
-		 * for that to complete. Since we have no idea whether it was a RAPQ or
-		 * ZAPQ, then if it completes successfully, let's issue the ZAPQ.
-		 */
+		/* Let's verify whether the ZAPQ completed successfully */
 		ret = apq_reset_check(q);
-		if (ret)
-			break;
-		goto retry_zapq;
+		break;
 	case AP_RESPONSE_DECONFIGURED:
 		/*
 		 * When an AP adapter is deconfigured, the associated
@@ -1682,7 +1689,6 @@ retry_zapq:
 		 * return a value indicating the reset completed successfully.
 		 */
 		q->reset_rc = 0;
-		ret = 0;
 		vfio_ap_free_aqic_resources(q);
 		break;
 	default:

From 62aab082e9993163731656ce270cd3c1d29079af Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Tue, 15 Aug 2023 14:43:27 -0400
Subject: [PATCH 1369/2047] s390/vfio-ap: store entire AP queue status word
 with the queue object

Store the entire AP queue status word returned from the ZAPQ command with
the struct vfio_ap_queue object instead of just the response code field.
The other information contained in the status word is need by the
apq_reset_check function to display a proper message to indicate that the
vfio_ap driver is waiting for the ZAPQ to complete because the queue is
not empty or IRQs are still enabled.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815184333.6554-7-akrowiak@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c     | 27 +++++++++++++++------------
 drivers/s390/crypto/vfio_ap_private.h |  4 ++--
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 2517868aad56..43224f7a40ea 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -674,7 +674,7 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
 			 */
 			apqn = AP_MKQID(apid, apqi);
 			q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
-			if (!q || q->reset_rc) {
+			if (!q || q->reset_status.response_code) {
 				clear_bit_inv(apid,
 					      matrix_mdev->shadow_apcb.apm);
 				break;
@@ -1628,6 +1628,7 @@ static int apq_reset_check(struct vfio_ap_queue *q)
 	int ret = -EBUSY, elapsed = 0;
 	struct ap_queue_status status;
 
+	memcpy(&status, &q->reset_status, sizeof(status));
 	while (true) {
 		msleep(AP_RESET_INTERVAL);
 		elapsed += AP_RESET_INTERVAL;
@@ -1643,20 +1644,20 @@ static int apq_reset_check(struct vfio_ap_queue *q)
 					      status.queue_empty,
 					      status.irq_enabled);
 		} else {
-			if (q->reset_rc == AP_RESPONSE_RESET_IN_PROGRESS ||
-			    q->reset_rc == AP_RESPONSE_BUSY) {
+			if (q->reset_status.response_code == AP_RESPONSE_RESET_IN_PROGRESS ||
+			    q->reset_status.response_code == AP_RESPONSE_BUSY) {
 				status = ap_zapq(q->apqn, 0);
-				q->reset_rc = status.response_code;
+				memcpy(&q->reset_status, &status, sizeof(status));
 				continue;
 			}
 			/*
-			 * When an AP adapter is deconfigured, the associated
-			 * queues are reset, so let's set the status response
-			 * code to 0 so the queue may be passed through (i.e.,
-			 * not filtered).
+			 * When an AP adapter is deconfigured, the
+			 * associated queues are reset, so let's set the
+			 * status response code to 0 so the queue may be
+			 * passed through (i.e., not filtered)
 			 */
-			if (q->reset_rc == AP_RESPONSE_DECONFIGURED)
-				q->reset_rc = 0;
+			if (status.response_code == AP_RESPONSE_DECONFIGURED)
+				q->reset_status.response_code = 0;
 			if (q->saved_isc != VFIO_AP_ISC_INVALID)
 				vfio_ap_free_aqic_resources(q);
 			break;
@@ -1673,7 +1674,7 @@ static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
 	if (!q)
 		return 0;
 	status = ap_zapq(q->apqn, 0);
-	q->reset_rc = status.response_code;
+	memcpy(&q->reset_status, &status, sizeof(status));
 	switch (status.response_code) {
 	case AP_RESPONSE_NORMAL:
 	case AP_RESPONSE_RESET_IN_PROGRESS:
@@ -1688,7 +1689,8 @@ static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
 		 * so the queue may be passed through (i.e., not filtered) and
 		 * return a value indicating the reset completed successfully.
 		 */
-		q->reset_rc = 0;
+		q->reset_status.response_code = 0;
+		ret = 0;
 		vfio_ap_free_aqic_resources(q);
 		break;
 	default:
@@ -2042,6 +2044,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
 
 	q->apqn = to_ap_queue(&apdev->device)->qid;
 	q->saved_isc = VFIO_AP_ISC_INVALID;
+	memset(&q->reset_status, 0, sizeof(q->reset_status));
 	matrix_mdev = get_update_locks_by_apqn(q->apqn);
 
 	if (matrix_mdev) {
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index 4642bbdbd1b2..d6eb3527e056 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -133,7 +133,7 @@ struct ap_matrix_mdev {
  * @apqn: the APQN of the AP queue device
  * @saved_isc: the guest ISC registered with the GIB interface
  * @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
- * @reset_rc: the status response code from the last reset of the queue
+ * @reset_status: the status from the last reset of the queue
  */
 struct vfio_ap_queue {
 	struct ap_matrix_mdev *matrix_mdev;
@@ -142,7 +142,7 @@ struct vfio_ap_queue {
 #define VFIO_AP_ISC_INVALID 0xff
 	unsigned char saved_isc;
 	struct hlist_node mdev_qnode;
-	unsigned int reset_rc;
+	struct ap_queue_status reset_status;
 };
 
 int vfio_ap_mdev_register(void);

From 9261f0438835a97254590046e1be83733cca440f Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Tue, 15 Aug 2023 14:43:28 -0400
Subject: [PATCH 1370/2047] s390/vfio-ap: use work struct to verify queue reset

Instead of waiting to verify that a queue is reset in the
vfio_ap_mdev_reset_queue function, let's use a wait queue to check the
the state of the reset. This way, when resetting all of the queues assigned
to a matrix mdev, we don't have to wait for each queue to be reset before
initiating a reset on the next queue to be reset.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Jason J. Herne <jjherne@linux.ibm.com>
Suggested-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Janosch Frank <frankja@linux.ibm.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815184333.6554-8-akrowiak@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c     | 48 +++++++++++++--------------
 drivers/s390/crypto/vfio_ap_private.h |  2 ++
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 43224f7a40ea..3a59f1c5390f 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -35,7 +35,7 @@
 static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable);
 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
 static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
-static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
+static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
 
 /**
  * get_update_locks_for_kvm: Acquire the locks required to dynamically update a
@@ -1623,11 +1623,13 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
 
 #define WAIT_MSG "Waited %dms for reset of queue %02x.%04x (%u, %u, %u)"
 
-static int apq_reset_check(struct vfio_ap_queue *q)
+static void apq_reset_check(struct work_struct *reset_work)
 {
 	int ret = -EBUSY, elapsed = 0;
 	struct ap_queue_status status;
+	struct vfio_ap_queue *q;
 
+	q = container_of(reset_work, struct vfio_ap_queue, reset_work);
 	memcpy(&status, &q->reset_status, sizeof(status));
 	while (true) {
 		msleep(AP_RESET_INTERVAL);
@@ -1635,7 +1637,7 @@ static int apq_reset_check(struct vfio_ap_queue *q)
 		status = ap_tapq(q->apqn, NULL);
 		ret = apq_status_check(q->apqn, &status);
 		if (ret == -EIO)
-			return ret;
+			return;
 		if (ret == -EBUSY) {
 			pr_notice_ratelimited(WAIT_MSG, elapsed,
 					      AP_QID_CARD(q->apqn),
@@ -1663,34 +1665,32 @@ static int apq_reset_check(struct vfio_ap_queue *q)
 			break;
 		}
 	}
-	return ret;
 }
 
-static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
+static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
 {
 	struct ap_queue_status status;
-	int ret = 0;
 
 	if (!q)
-		return 0;
+		return;
 	status = ap_zapq(q->apqn, 0);
 	memcpy(&q->reset_status, &status, sizeof(status));
 	switch (status.response_code) {
 	case AP_RESPONSE_NORMAL:
 	case AP_RESPONSE_RESET_IN_PROGRESS:
 	case AP_RESPONSE_BUSY:
-		/* Let's verify whether the ZAPQ completed successfully */
-		ret = apq_reset_check(q);
+		/*
+		 * Let's verify whether the ZAPQ completed successfully on a work queue.
+		 */
+		queue_work(system_long_wq, &q->reset_work);
 		break;
 	case AP_RESPONSE_DECONFIGURED:
 		/*
 		 * When an AP adapter is deconfigured, the associated
 		 * queues are reset, so let's set the status response code to 0
-		 * so the queue may be passed through (i.e., not filtered) and
-		 * return a value indicating the reset completed successfully.
+		 * so the queue may be passed through (i.e., not filtered).
 		 */
 		q->reset_status.response_code = 0;
-		ret = 0;
 		vfio_ap_free_aqic_resources(q);
 		break;
 	default:
@@ -1698,29 +1698,25 @@ static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
 		     "PQAP/ZAPQ for %02x.%04x failed with invalid rc=%u\n",
 		     AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
 		     status.response_code);
-		return -EIO;
 	}
-
-	return ret;
 }
 
 static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable)
 {
-	int ret, loop_cursor, rc = 0;
+	int ret = 0, loop_cursor;
 	struct vfio_ap_queue *q;
 
+	hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode)
+		vfio_ap_mdev_reset_queue(q);
+
 	hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
-		ret = vfio_ap_mdev_reset_queue(q);
-		/*
-		 * Regardless whether a queue turns out to be busy, or
-		 * is not operational, we need to continue resetting
-		 * the remaining queues.
-		 */
-		if (ret)
-			rc = ret;
+		flush_work(&q->reset_work);
+
+		if (q->reset_status.response_code)
+			ret = -EIO;
 	}
 
-	return rc;
+	return ret;
 }
 
 static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
@@ -2045,6 +2041,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
 	q->apqn = to_ap_queue(&apdev->device)->qid;
 	q->saved_isc = VFIO_AP_ISC_INVALID;
 	memset(&q->reset_status, 0, sizeof(q->reset_status));
+	INIT_WORK(&q->reset_work, apq_reset_check);
 	matrix_mdev = get_update_locks_by_apqn(q->apqn);
 
 	if (matrix_mdev) {
@@ -2094,6 +2091,7 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
 	}
 
 	vfio_ap_mdev_reset_queue(q);
+	flush_work(&q->reset_work);
 	dev_set_drvdata(&apdev->device, NULL);
 	kfree(q);
 	release_update_locks_for_mdev(matrix_mdev);
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index d6eb3527e056..88aff8b81f2f 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -134,6 +134,7 @@ struct ap_matrix_mdev {
  * @saved_isc: the guest ISC registered with the GIB interface
  * @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
  * @reset_status: the status from the last reset of the queue
+ * @reset_work: work to wait for queue reset to complete
  */
 struct vfio_ap_queue {
 	struct ap_matrix_mdev *matrix_mdev;
@@ -143,6 +144,7 @@ struct vfio_ap_queue {
 	unsigned char saved_isc;
 	struct hlist_node mdev_qnode;
 	struct ap_queue_status reset_status;
+	struct work_struct reset_work;
 };
 
 int vfio_ap_mdev_register(void);

From e1f17f8ea93d8fc9d6d0562d38bb0a5fb3e8355e Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Tue, 15 Aug 2023 14:43:29 -0400
Subject: [PATCH 1371/2047] s390/vfio-ap: handle queue state change in progress
 on reset

A new APQSW response code (0xA) indicating the designated queue is in the
process of being bound or associated to a configuration may be returned
from the PQAP(ZAPQ) command. This patch introduces code that will verify
when the PQAP(ZAPQ) command can be re-issued after receiving response code
0xA.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Jason J. Herne <jjherne@linux.ibm.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815184333.6554-9-akrowiak@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 3a59f1c5390f..43dea259fe23 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1647,7 +1647,8 @@ static void apq_reset_check(struct work_struct *reset_work)
 					      status.irq_enabled);
 		} else {
 			if (q->reset_status.response_code == AP_RESPONSE_RESET_IN_PROGRESS ||
-			    q->reset_status.response_code == AP_RESPONSE_BUSY) {
+			    q->reset_status.response_code == AP_RESPONSE_BUSY ||
+			    q->reset_status.response_code == AP_RESPONSE_STATE_CHANGE_IN_PROGRESS) {
 				status = ap_zapq(q->apqn, 0);
 				memcpy(&q->reset_status, &status, sizeof(status));
 				continue;
@@ -1679,6 +1680,7 @@ static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
 	case AP_RESPONSE_NORMAL:
 	case AP_RESPONSE_RESET_IN_PROGRESS:
 	case AP_RESPONSE_BUSY:
+	case AP_RESPONSE_STATE_CHANGE_IN_PROGRESS:
 		/*
 		 * Let's verify whether the ZAPQ completed successfully on a work queue.
 		 */

From 7847a19b5b6265f11e71c8499a3b608edac7f398 Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Tue, 15 Aug 2023 14:43:30 -0400
Subject: [PATCH 1372/2047] s390/vfio-ap: check for TAPQ response codes 0x35
 and 0x36

Check for response codes 0x35 and 0x36 which are asynchronous return codes
indicating a failure of the guest to associate a secret with a queue. Since
there can be no interaction with this queue from the guest (i.e., the vcpus
are out of SIE for hot unplug, the guest is being shut down or an emulated
subsystem reset of the guest is taking place), let's go ahead and re-issue
the ZAPQ to reset and zeroize the queue.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Jason J. Herne <jjherne@linux.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815184333.6554-10-akrowiak@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 43dea259fe23..8bda52c46df0 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1612,6 +1612,16 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
 	case AP_RESPONSE_RESET_IN_PROGRESS:
 	case AP_RESPONSE_BUSY:
 		return -EBUSY;
+	case AP_RESPONSE_ASSOC_SECRET_NOT_UNIQUE:
+	case AP_RESPONSE_ASSOC_FAILED:
+		/*
+		 * These asynchronous response codes indicate a PQAP(AAPQ)
+		 * instruction to associate a secret with the guest failed. All
+		 * subsequent AP instructions will end with the asynchronous
+		 * response code until the AP queue is reset; so, let's return
+		 * a value indicating a reset needs to be performed again.
+		 */
+		return -EAGAIN;
 	default:
 		WARN(true,
 		     "failed to verify reset of queue %02x.%04x: TAPQ rc=%u\n",
@@ -1648,7 +1658,8 @@ static void apq_reset_check(struct work_struct *reset_work)
 		} else {
 			if (q->reset_status.response_code == AP_RESPONSE_RESET_IN_PROGRESS ||
 			    q->reset_status.response_code == AP_RESPONSE_BUSY ||
-			    q->reset_status.response_code == AP_RESPONSE_STATE_CHANGE_IN_PROGRESS) {
+			    q->reset_status.response_code == AP_RESPONSE_STATE_CHANGE_IN_PROGRESS ||
+			    ret == -EAGAIN) {
 				status = ap_zapq(q->apqn, 0);
 				memcpy(&q->reset_status, &status, sizeof(status));
 				continue;

From cf3fa16a6fd49216ae83502e61bea0d8322b51eb Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Tue, 15 Aug 2023 14:43:31 -0400
Subject: [PATCH 1373/2047] s390/uv: export uv_pin_shared for direct usage

Export the uv_pin_shared function so that it can be called from other
modules that carry a GPL-compatible license.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815184333.6554-11-akrowiak@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/uv.h | 6 ++++++
 arch/s390/kernel/uv.c      | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index d6bb2f4f78d1..d2cd42bb2c26 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -463,6 +463,7 @@ static inline int is_prot_virt_host(void)
 	return prot_virt_host;
 }
 
+int uv_pin_shared(unsigned long paddr);
 int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
 int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
 int uv_destroy_owned_page(unsigned long paddr);
@@ -475,6 +476,11 @@ void setup_uv(void);
 #define is_prot_virt_host() 0
 static inline void setup_uv(void) {}
 
+static inline int uv_pin_shared(unsigned long paddr)
+{
+	return 0;
+}
+
 static inline int uv_destroy_owned_page(unsigned long paddr)
 {
 	return 0;
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 66f0eb1c872b..b771f1b4cdd1 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -88,7 +88,7 @@ fail:
  * Requests the Ultravisor to pin the page in the shared state. This will
  * cause an intercept when the guest attempts to unshare the pinned page.
  */
-static int uv_pin_shared(unsigned long paddr)
+int uv_pin_shared(unsigned long paddr)
 {
 	struct uv_cb_cfs uvcb = {
 		.header.cmd = UVC_CMD_PIN_PAGE_SHARED,
@@ -100,6 +100,7 @@ static int uv_pin_shared(unsigned long paddr)
 		return -EINVAL;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(uv_pin_shared);
 
 /*
  * Requests the Ultravisor to destroy a guest page and make it

From fb5040ef7f707525d0681cf6bfe424ccd1aadab7 Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Tue, 15 Aug 2023 14:43:32 -0400
Subject: [PATCH 1374/2047] KVM: s390: export kvm_s390_pv*_is_protected
 functions

Export the kvm_s390_pv_is_protected and kvm_s390_pv_cpu_is_protected
functions so that they can be called from other modules that carry a
GPL-compatible license.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815184333.6554-12-akrowiak@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |  3 +++
 arch/s390/kvm/kvm-s390.h         | 12 ------------
 arch/s390/kvm/pv.c               | 14 ++++++++++++++
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2bbc3d54959d..91bfecb91321 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -1028,6 +1028,9 @@ static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa)
 
 extern char sie_exit;
 
+bool kvm_s390_pv_is_protected(struct kvm *kvm);
+bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
+
 extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
 extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
 
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 0261d42c7d01..a7ea80cfa445 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -270,18 +270,6 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
 	return vcpu->arch.pv.handle;
 }
 
-static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
-{
-	lockdep_assert_held(&kvm->lock);
-	return !!kvm_s390_pv_get_handle(kvm);
-}
-
-static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
-{
-	lockdep_assert_held(&vcpu->mutex);
-	return !!kvm_s390_pv_cpu_get_handle(vcpu);
-}
-
 /* implemented in interrupt.c */
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 2f34c7c3c5ab..856140e9942e 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -18,6 +18,20 @@
 #include <linux/mmu_notifier.h>
 #include "kvm-s390.h"
 
+bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+	lockdep_assert_held(&kvm->lock);
+	return !!kvm_s390_pv_get_handle(kvm);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
+
+bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
+{
+	lockdep_assert_held(&vcpu->mutex);
+	return !!kvm_s390_pv_cpu_get_handle(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
+
 /**
  * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
  * be destroyed

From f88fb1335733029b4630fb93cfaad349a81e57b2 Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Tue, 15 Aug 2023 14:43:33 -0400
Subject: [PATCH 1375/2047] s390/vfio-ap: make sure nib is shared

Since the NIB is visible by HW, KVM and the (PV) guest it needs to be
in non-secure or secure but shared storage. Return code 6 is used to
indicate to a PV guest that its NIB would be on secure, unshared
storage and therefore the NIB address is invalid.

Unfortunately we have no easy way to check if a page is unshared after
vfio_pin_pages() since it will automatically export an unshared page
if the UV pin shared call did not succeed due to a page being in
unshared state.

Therefore we use the fact that UV pinning it a second time is a nop
but trying to pin an exported page is an error (0x102). If we
encounter this error, we do a vfio unpin and import the page again,
since vfio_pin_pages() exported it.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Tested-by: Viktor Mihajlovski <mihajlov@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815184333.6554-13-akrowiak@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 8bda52c46df0..0509f80622cd 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -359,6 +359,28 @@ static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib)
 	return 0;
 }
 
+static int ensure_nib_shared(unsigned long addr, struct gmap *gmap)
+{
+	int ret;
+
+	/*
+	 * The nib has to be located in shared storage since guest and
+	 * host access it. vfio_pin_pages() will do a pin shared and
+	 * if that fails (possibly because it's not a shared page) it
+	 * calls export. We try to do a second pin shared here so that
+	 * the UV gives us an error code if we try to pin a non-shared
+	 * page.
+	 *
+	 * If the page is already pinned shared the UV will return a success.
+	 */
+	ret = uv_pin_shared(addr);
+	if (ret) {
+		/* vfio_pin_pages() likely exported the page so let's re-import */
+		gmap_convert_to_secure(gmap, addr);
+	}
+	return ret;
+}
+
 /**
  * vfio_ap_irq_enable - Enable Interruption for a APQN
  *
@@ -422,6 +444,14 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
 	h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK);
 	aqic_gisa.gisc = isc;
 
+	/* NIB in non-shared storage is a rc 6 for PV guests */
+	if (kvm_s390_pv_cpu_is_protected(vcpu) &&
+	    ensure_nib_shared(h_nib & PAGE_MASK, kvm->arch.gmap)) {
+		vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1);
+		status.response_code = AP_RESPONSE_INVALID_ADDRESS;
+		return status;
+	}
+
 	nisc = kvm_s390_gisc_register(kvm, isc);
 	if (nisc < 0) {
 		VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n",

From 5c1212a67e5838aca49707ef96be71612a72ab43 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Fri, 18 Aug 2023 15:18:13 +0100
Subject: [PATCH 1376/2047] regulator: raa215300: Change rate from 32000->32768

Replace the rate 32000->32768 in devm_clk_hw_register_fixed_rate() as the
32kHz frequency mentioned in the hardware manual is actually 32.768kHz.

Reported-by: Pavel Machek <pavel@denx.de>
Closes: https://lore.kernel.org/all/ZN3%2FSjL50ls+3dnD@duo.ucw.cz/
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://lore.kernel.org/r/20230818141815.314197-2-biju.das.jz@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/raa215300.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/raa215300.c b/drivers/regulator/raa215300.c
index bdbf1e4ce468..0628ed3d0a93 100644
--- a/drivers/regulator/raa215300.c
+++ b/drivers/regulator/raa215300.c
@@ -127,7 +127,7 @@ static int raa215300_i2c_probe(struct i2c_client *client)
 		struct clk_hw *hw;
 		ssize_t size;
 
-		hw = devm_clk_hw_register_fixed_rate(dev, clk_name, NULL, 0, 32000);
+		hw = devm_clk_hw_register_fixed_rate(dev, clk_name, NULL, 0, 32768);
 		if (IS_ERR(hw))
 			return PTR_ERR(hw);
 

From 6673fc98953231f5d85f780d3025ea95c7584683 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Fri, 18 Aug 2023 15:18:14 +0100
Subject: [PATCH 1377/2047] regulator: raa215300: Add missing blank space

Add the missing space in the comment block.

Reported-by: Pavel Machek <pavel@denx.de>
Closes: https://lore.kernel.org/all/ZN3%2FSjL50ls+3dnD@duo.ucw.cz/
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://lore.kernel.org/r/20230818141815.314197-3-biju.das.jz@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/raa215300.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/raa215300.c b/drivers/regulator/raa215300.c
index 0628ed3d0a93..6982565c8aa4 100644
--- a/drivers/regulator/raa215300.c
+++ b/drivers/regulator/raa215300.c
@@ -86,7 +86,7 @@ static int raa215300_i2c_probe(struct i2c_client *client)
 	val &= RAA215300_REG_BLOCK_EN_RTC_EN;
 	regmap_write(regmap, RAA215300_REG_BLOCK_EN, val);
 
-	/*Clear the latched registers */
+	/* Clear the latched registers */
 	regmap_read(regmap, RAA215300_FAULT_LATCHED_STATUS_1, &val);
 	regmap_write(regmap, RAA215300_FAULT_LATCHED_STATUS_1, val);
 	regmap_read(regmap, RAA215300_FAULT_LATCHED_STATUS_2, &val);

From 8845252f6690e4fceca67f2bb7ee2920939d3ac5 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Fri, 18 Aug 2023 15:18:15 +0100
Subject: [PATCH 1378/2047] regulator: raa215300: Update help description

Add module description to the help section and update the existing
help description.

Reported-by: Pavel Machek <pavel@denx.de>
Closes: https://lore.kernel.org/all/ZN3%2FSjL50ls+3dnD@duo.ucw.cz/
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://lore.kernel.org/r/20230818141815.314197-4-biju.das.jz@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 086596239356..186b97c5aa16 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1072,7 +1072,11 @@ config REGULATOR_RAA215300
 	depends on COMMON_CLK
 	depends on I2C
 	help
-	  Support for the Renesas RAA215300 PMIC.
+	  If you say yes to this option, support will be included for the
+	  Renesas RAA215300 PMIC.
+
+	  Say M here if you want to include support for Renesas RAA215300 PMIC
+	  as a module. The module will be named "raa215300".
 
 config REGULATOR_RASPBERRYPI_TOUCHSCREEN_ATTINY
 	tristate "Raspberry Pi 7-inch touchscreen panel ATTINY regulator"

From d11a69873d9a7435fe6a48531e165ab80a8b1221 Mon Sep 17 00:00:00 2001
From: Tomislav Novak <tnovak@meta.com>
Date: Mon, 5 Jun 2023 12:19:23 -0700
Subject: [PATCH 1379/2047] hw_breakpoint: fix single-stepping when using
 bpf_overflow_handler

Arm platforms use is_default_overflow_handler() to determine if the
hw_breakpoint code should single-step over the breakpoint trigger or
let the custom handler deal with it.

Since bpf_overflow_handler() currently isn't recognized as a default
handler, attaching a BPF program to a PERF_TYPE_BREAKPOINT event causes
it to keep firing (the instruction triggering the data abort exception
is never skipped). For example:

  # bpftrace -e 'watchpoint:0x10000:4:w { print("hit") }' -c ./test
  Attaching 1 probe...
  hit
  hit
  [...]
  ^C

(./test performs a single 4-byte store to 0x10000)

This patch replaces the check with uses_default_overflow_handler(),
which accounts for the bpf_overflow_handler() case by also testing
if one of the perf_event_output functions gets invoked indirectly,
via orig_default_handler.

Signed-off-by: Tomislav Novak <tnovak@meta.com>
Tested-by: Samuel Gosselin <sgosselin@google.com> # arm64
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/linux-arm-kernel/20220923203644.2731604-1-tnovak@fb.com/
Link: https://lore.kernel.org/r/20230605191923.1219974-1-tnovak@meta.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm/kernel/hw_breakpoint.c   |  8 ++++----
 arch/arm64/kernel/hw_breakpoint.c |  4 ++--
 include/linux/perf_event.h        | 22 +++++++++++++++++++---
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 054e9199f30d..dc0fb7a81371 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -626,7 +626,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
 	hw->address &= ~alignment_mask;
 	hw->ctrl.len <<= offset;
 
-	if (is_default_overflow_handler(bp)) {
+	if (uses_default_overflow_handler(bp)) {
 		/*
 		 * Mismatch breakpoints are required for single-stepping
 		 * breakpoints.
@@ -798,7 +798,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
 		 * Otherwise, insert a temporary mismatch breakpoint so that
 		 * we can single-step over the watchpoint trigger.
 		 */
-		if (!is_default_overflow_handler(wp))
+		if (!uses_default_overflow_handler(wp))
 			continue;
 step:
 		enable_single_step(wp, instruction_pointer(regs));
@@ -811,7 +811,7 @@ step:
 		info->trigger = addr;
 		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
 		perf_bp_event(wp, regs);
-		if (is_default_overflow_handler(wp))
+		if (uses_default_overflow_handler(wp))
 			enable_single_step(wp, instruction_pointer(regs));
 	}
 
@@ -886,7 +886,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs)
 			info->trigger = addr;
 			pr_debug("breakpoint fired: address = 0x%x\n", addr);
 			perf_bp_event(bp, regs);
-			if (is_default_overflow_handler(bp))
+			if (uses_default_overflow_handler(bp))
 				enable_single_step(bp, addr);
 			goto unlock;
 		}
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index db2a1861bb97..35225632d70a 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -654,7 +654,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr,
 		perf_bp_event(bp, regs);
 
 		/* Do we need to handle the stepping? */
-		if (is_default_overflow_handler(bp))
+		if (uses_default_overflow_handler(bp))
 			step = 1;
 unlock:
 		rcu_read_unlock();
@@ -733,7 +733,7 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
 static int watchpoint_report(struct perf_event *wp, unsigned long addr,
 			     struct pt_regs *regs)
 {
-	int step = is_default_overflow_handler(wp);
+	int step = uses_default_overflow_handler(wp);
 	struct arch_hw_breakpoint *info = counter_arch_bp(wp);
 
 	info->trigger = addr;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2166a69e3bf2..e657916c9509 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1316,15 +1316,31 @@ extern int perf_event_output(struct perf_event *event,
 			     struct pt_regs *regs);
 
 static inline bool
-is_default_overflow_handler(struct perf_event *event)
+__is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
 {
-	if (likely(event->overflow_handler == perf_event_output_forward))
+	if (likely(overflow_handler == perf_event_output_forward))
 		return true;
-	if (unlikely(event->overflow_handler == perf_event_output_backward))
+	if (unlikely(overflow_handler == perf_event_output_backward))
 		return true;
 	return false;
 }
 
+#define is_default_overflow_handler(event) \
+	__is_default_overflow_handler((event)->overflow_handler)
+
+#ifdef CONFIG_BPF_SYSCALL
+static inline bool uses_default_overflow_handler(struct perf_event *event)
+{
+	if (likely(is_default_overflow_handler(event)))
+		return true;
+
+	return __is_default_overflow_handler(event->orig_overflow_handler);
+}
+#else
+#define uses_default_overflow_handler(event) \
+	is_default_overflow_handler(event)
+#endif
+
 extern void
 perf_event_header__init_id(struct perf_event_header *header,
 			   struct perf_sample_data *data,

From 94f23ac36f8b3c2140b00d78131c8df41cd9ac5d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 16 Aug 2023 19:39:32 +0100
Subject: [PATCH 1380/2047] kselftest/arm64: Fix hwcaps selftest build

The hwcaps selftest currently relies on the assembler being able to
assemble the crc32w instruction but this is not in the base v8.0 so is not
accepted by the standard GCC configurations used by many distributions.
Switch to manually encoding to fix the build.

Fixes: 09d2e95a04ad ("kselftest/arm64: add crc32 feature to hwcap test")
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230816-arm64-fix-crc32-build-v1-1-40165c1290f2@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/hwcap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index fe844b825a1e..0593192c0753 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -48,7 +48,8 @@ static void atomics_sigill(void)
 
 static void crc32_sigill(void)
 {
-	asm volatile("crc32w w0, w0, w1");
+	/* CRC32W W0, W0, W1 */
+	asm volatile(".inst 0x1ac14800" : : : );
 }
 
 static void cssc_sigill(void)

From ef75e767167a8f30c7690bc4689dba76329ee06e Mon Sep 17 00:00:00 2001
From: Lucas Tanure <tanureal@opensource.cirrus.com>
Date: Fri, 4 Aug 2023 11:46:01 +0100
Subject: [PATCH 1381/2047] spi: cs42l43: Add SPI controller support

The CS42L43 is an audio CODEC with integrated MIPI SoundWire interface
(Version 1.2.1 compliant), I2C, SPI, and I2S/TDM interfaces designed
for portable applications. It provides a high dynamic range, stereo
DAC for headphone output, two integrated Class D amplifiers for
loudspeakers, and two ADCs for wired headset microphone input or
stereo line input. PDM inputs are provided for digital microphones.

The SPI component incorporates a SPI controller interface for
communication with other peripheral components.

Signed-off-by: Lucas Tanure <tanureal@opensource.cirrus.com>
Signed-off-by: Maciej Strozek <mstrozek@opensource.cirrus.com>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20230804104602.395892-6-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 MAINTAINERS               |   1 +
 drivers/spi/Kconfig       |   7 +
 drivers/spi/Makefile      |   1 +
 drivers/spi/spi-cs42l43.c | 284 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 293 insertions(+)
 create mode 100644 drivers/spi/spi-cs42l43.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 0b5ecca0b39b..a0fb42cf4cca 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4888,6 +4888,7 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/sound/cirrus,cs*
 F:	drivers/mfd/cs42l43*
 F:	drivers/pinctrl/cirrus/pinctrl-cs42l43*
+F:	drivers/spi/spi-cs42l43*
 F:	include/dt-bindings/sound/cs*
 F:	include/linux/mfd/cs42l43*
 F:	include/sound/cs*
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 8962b2557615..083f71cf4829 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -281,6 +281,13 @@ config SPI_COLDFIRE_QSPI
 	  This enables support for the Coldfire QSPI controller in master
 	  mode.
 
+config SPI_CS42L43
+	tristate "Cirrus Logic CS42L43 SPI controller"
+	depends on MFD_CS42L43 && PINCTRL_CS42L43
+	help
+	  This enables support for the SPI controller inside the Cirrus Logic
+	  CS42L43 audio codec.
+
 config SPI_DAVINCI
 	tristate "Texas Instruments DaVinci/DA8x/OMAP-L/AM1x SoC SPI controller"
 	depends on ARCH_DAVINCI || ARCH_KEYSTONE || COMPILE_TEST
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 080c2c1b3ec1..c537640bed70 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_SPI_CADENCE_QUADSPI)	+= spi-cadence-quadspi.o
 obj-$(CONFIG_SPI_CADENCE_XSPI)		+= spi-cadence-xspi.o
 obj-$(CONFIG_SPI_CLPS711X)		+= spi-clps711x.o
 obj-$(CONFIG_SPI_COLDFIRE_QSPI)		+= spi-coldfire-qspi.o
+obj-$(CONFIG_SPI_CS42L43)		+= spi-cs42l43.o
 obj-$(CONFIG_SPI_DAVINCI)		+= spi-davinci.o
 obj-$(CONFIG_SPI_DLN2)			+= spi-dln2.o
 obj-$(CONFIG_SPI_DESIGNWARE)		+= spi-dw.o
diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c
new file mode 100644
index 000000000000..453a9b37ce78
--- /dev/null
+++ b/drivers/spi/spi-cs42l43.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// CS42L43 SPI Controller Driver
+//
+// Copyright (C) 2022-2023 Cirrus Logic, Inc. and
+//                         Cirrus Logic International Semiconductor Ltd.
+
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/mfd/cs42l43.h>
+#include <linux/mfd/cs42l43-regs.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/spi/spi.h>
+#include <linux/units.h>
+
+#define CS42L43_FIFO_SIZE		16
+#define CS42L43_SPI_ROOT_HZ		(40 * HZ_PER_MHZ)
+#define CS42L43_SPI_MAX_LENGTH		65532
+
+enum cs42l43_spi_cmd {
+	CS42L43_WRITE,
+	CS42L43_READ
+};
+
+struct cs42l43_spi {
+	struct device *dev;
+	struct regmap *regmap;
+	struct spi_controller *ctlr;
+};
+
+static const unsigned int cs42l43_clock_divs[] = {
+	2, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+};
+
+static int cs42l43_spi_tx(struct regmap *regmap, const u8 *buf, unsigned int len)
+{
+	const u8 *end = buf + len;
+	u32 val = 0;
+	int ret;
+
+	while (buf < end) {
+		const u8 *block = min(buf + CS42L43_FIFO_SIZE, end);
+
+		while (buf < block) {
+			const u8 *word = min(buf + sizeof(u32), block);
+			int pad = (buf + sizeof(u32)) - word;
+
+			while (buf < word) {
+				val >>= BITS_PER_BYTE;
+				val |= FIELD_PREP(GENMASK(31, 24), *buf);
+
+				buf++;
+			}
+
+			val >>= pad * BITS_PER_BYTE;
+
+			regmap_write(regmap, CS42L43_TX_DATA, val);
+		}
+
+		regmap_write(regmap, CS42L43_TRAN_CONFIG8, CS42L43_SPI_TX_DONE_MASK);
+
+		ret = regmap_read_poll_timeout(regmap, CS42L43_TRAN_STATUS1,
+					       val, (val & CS42L43_SPI_TX_REQUEST_MASK),
+					       1000, 5000);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int cs42l43_spi_rx(struct regmap *regmap, u8 *buf, unsigned int len)
+{
+	u8 *end = buf + len;
+	u32 val;
+	int ret;
+
+	while (buf < end) {
+		u8 *block = min(buf + CS42L43_FIFO_SIZE, end);
+
+		ret = regmap_read_poll_timeout(regmap, CS42L43_TRAN_STATUS1,
+					       val, (val & CS42L43_SPI_RX_REQUEST_MASK),
+					       1000, 5000);
+		if (ret)
+			return ret;
+
+		while (buf < block) {
+			u8 *word = min(buf + sizeof(u32), block);
+
+			ret = regmap_read(regmap, CS42L43_RX_DATA, &val);
+			if (ret)
+				return ret;
+
+			while (buf < word) {
+				*buf = FIELD_GET(GENMASK(7, 0), val);
+
+				val >>= BITS_PER_BYTE;
+				buf++;
+			}
+		}
+
+		regmap_write(regmap, CS42L43_TRAN_CONFIG8, CS42L43_SPI_RX_DONE_MASK);
+	}
+
+	return 0;
+}
+
+static int cs42l43_transfer_one(struct spi_controller *ctlr, struct spi_device *spi,
+				struct spi_transfer *tfr)
+{
+	struct cs42l43_spi *priv = spi_controller_get_devdata(spi->controller);
+	int i, ret = -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(cs42l43_clock_divs); i++) {
+		if (CS42L43_SPI_ROOT_HZ / cs42l43_clock_divs[i] <= tfr->speed_hz)
+			break;
+	}
+
+	if (i == ARRAY_SIZE(cs42l43_clock_divs))
+		return -EINVAL;
+
+	regmap_write(priv->regmap, CS42L43_SPI_CLK_CONFIG1, i);
+
+	if (tfr->tx_buf) {
+		regmap_write(priv->regmap, CS42L43_TRAN_CONFIG3, CS42L43_WRITE);
+		regmap_write(priv->regmap, CS42L43_TRAN_CONFIG4, tfr->len - 1);
+	} else if (tfr->rx_buf) {
+		regmap_write(priv->regmap, CS42L43_TRAN_CONFIG3, CS42L43_READ);
+		regmap_write(priv->regmap, CS42L43_TRAN_CONFIG5, tfr->len - 1);
+	}
+
+	regmap_write(priv->regmap, CS42L43_TRAN_CONFIG1, CS42L43_SPI_START_MASK);
+
+	if (tfr->tx_buf)
+		ret = cs42l43_spi_tx(priv->regmap, (const u8 *)tfr->tx_buf, tfr->len);
+	else if (tfr->rx_buf)
+		ret = cs42l43_spi_rx(priv->regmap, (u8 *)tfr->rx_buf, tfr->len);
+
+	return ret;
+}
+
+static void cs42l43_set_cs(struct spi_device *spi, bool is_high)
+{
+	struct cs42l43_spi *priv = spi_controller_get_devdata(spi->controller);
+
+	if (spi_get_chipselect(spi, 0) == 0)
+		regmap_write(priv->regmap, CS42L43_SPI_CONFIG2, !is_high);
+}
+
+static int cs42l43_prepare_message(struct spi_controller *ctlr, struct spi_message *msg)
+{
+	struct cs42l43_spi *priv = spi_controller_get_devdata(ctlr);
+	struct spi_device *spi = msg->spi;
+	unsigned int spi_config1 = 0;
+
+	/* select another internal CS, which doesn't exist, so CS 0 is not used */
+	if (spi_get_csgpiod(spi, 0))
+		spi_config1 |= 1 << CS42L43_SPI_SS_SEL_SHIFT;
+	if (spi->mode & SPI_CPOL)
+		spi_config1 |= CS42L43_SPI_CPOL_MASK;
+	if (spi->mode & SPI_CPHA)
+		spi_config1 |= CS42L43_SPI_CPHA_MASK;
+	if (spi->mode & SPI_3WIRE)
+		spi_config1 |= CS42L43_SPI_THREE_WIRE_MASK;
+
+	regmap_write(priv->regmap, CS42L43_SPI_CONFIG1, spi_config1);
+
+	return 0;
+}
+
+static int cs42l43_prepare_transfer_hardware(struct spi_controller *ctlr)
+{
+	struct cs42l43_spi *priv = spi_controller_get_devdata(ctlr);
+	int ret;
+
+	ret = regmap_write(priv->regmap, CS42L43_BLOCK_EN2, CS42L43_SPI_MSTR_EN_MASK);
+	if (ret)
+		dev_err(priv->dev, "Failed to enable SPI controller: %d\n", ret);
+
+	return ret;
+}
+
+static int cs42l43_unprepare_transfer_hardware(struct spi_controller *ctlr)
+{
+	struct cs42l43_spi *priv = spi_controller_get_devdata(ctlr);
+	int ret;
+
+	ret = regmap_write(priv->regmap, CS42L43_BLOCK_EN2, 0);
+	if (ret)
+		dev_err(priv->dev, "Failed to disable SPI controller: %d\n", ret);
+
+	return ret;
+}
+
+static size_t cs42l43_spi_max_length(struct spi_device *spi)
+{
+	return CS42L43_SPI_MAX_LENGTH;
+}
+
+static int cs42l43_spi_probe(struct platform_device *pdev)
+{
+	struct cs42l43 *cs42l43 = dev_get_drvdata(pdev->dev.parent);
+	struct cs42l43_spi *priv;
+	struct fwnode_handle *fwnode = dev_fwnode(cs42l43->dev);
+	int ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*priv->ctlr));
+	if (!priv->ctlr)
+		return -ENOMEM;
+
+	spi_controller_set_devdata(priv->ctlr, priv);
+
+	priv->dev = &pdev->dev;
+	priv->regmap = cs42l43->regmap;
+
+	priv->ctlr->prepare_message = cs42l43_prepare_message;
+	priv->ctlr->prepare_transfer_hardware = cs42l43_prepare_transfer_hardware;
+	priv->ctlr->unprepare_transfer_hardware = cs42l43_unprepare_transfer_hardware;
+	priv->ctlr->transfer_one = cs42l43_transfer_one;
+	priv->ctlr->set_cs = cs42l43_set_cs;
+	priv->ctlr->max_transfer_size = cs42l43_spi_max_length;
+
+	if (is_of_node(fwnode))
+		fwnode = fwnode_get_named_child_node(fwnode, "spi");
+
+	device_set_node(&priv->ctlr->dev, fwnode);
+
+	priv->ctlr->mode_bits = SPI_3WIRE | SPI_MODE_X_MASK;
+	priv->ctlr->flags = SPI_CONTROLLER_HALF_DUPLEX;
+	priv->ctlr->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16) |
+					 SPI_BPW_MASK(32);
+	priv->ctlr->min_speed_hz = CS42L43_SPI_ROOT_HZ /
+				   cs42l43_clock_divs[ARRAY_SIZE(cs42l43_clock_divs) - 1];
+	priv->ctlr->max_speed_hz = CS42L43_SPI_ROOT_HZ / cs42l43_clock_divs[0];
+	priv->ctlr->use_gpio_descriptors = true;
+	priv->ctlr->auto_runtime_pm = true;
+
+	devm_pm_runtime_enable(priv->dev);
+	pm_runtime_idle(priv->dev);
+
+	regmap_write(priv->regmap, CS42L43_TRAN_CONFIG6, CS42L43_FIFO_SIZE - 1);
+	regmap_write(priv->regmap, CS42L43_TRAN_CONFIG7, CS42L43_FIFO_SIZE - 1);
+
+	// Disable Watchdog timer and enable stall
+	regmap_write(priv->regmap, CS42L43_SPI_CONFIG3, 0);
+	regmap_write(priv->regmap, CS42L43_SPI_CONFIG4, CS42L43_SPI_STALL_ENA_MASK);
+
+	ret = devm_spi_register_controller(priv->dev, priv->ctlr);
+	if (ret) {
+		pm_runtime_disable(priv->dev);
+		dev_err(priv->dev, "Failed to register SPI controller: %d\n", ret);
+	}
+
+	return ret;
+}
+
+static const struct platform_device_id cs42l43_spi_id_table[] = {
+	{ "cs42l43-spi", },
+	{}
+};
+MODULE_DEVICE_TABLE(platform, cs42l43_spi_id_table);
+
+static struct platform_driver cs42l43_spi_driver = {
+	.driver = {
+		.name	= "cs42l43-spi",
+	},
+	.probe		= cs42l43_spi_probe,
+	.id_table	= cs42l43_spi_id_table,
+};
+module_platform_driver(cs42l43_spi_driver);
+
+MODULE_DESCRIPTION("CS42L43 SPI Driver");
+MODULE_AUTHOR("Lucas Tanure <tanureal@opensource.cirrus.com>");
+MODULE_AUTHOR("Maciej Strozek <mstrozek@opensource.cirrus.com>");
+MODULE_LICENSE("GPL");

From 81e5ee471609848ee1ebf3beb2a46788113fe0eb Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 17 Aug 2023 11:24:02 +0530
Subject: [PATCH 1382/2047] arm_pmu: acpi: Refactor
 arm_spe_acpi_register_device()

Sanity checking all the GICC tables for same interrupt number, and ensuring
a homogeneous ACPI based machine, could be used for other platform devices
as well. Hence this refactors arm_spe_acpi_register_device() into a common
helper arm_acpi_register_pmu_device().

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Co-developed-by: Will Deacon <will@kernel.org>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20230817055405.249630-2-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu_acpi.c | 104 ++++++++++++++++++++++--------------
 1 file changed, 64 insertions(+), 40 deletions(-)

diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 90815ad762eb..48bd62d3993e 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -69,6 +69,62 @@ static void arm_pmu_acpi_unregister_irq(int cpu)
 		acpi_unregister_gsi(gsi);
 }
 
+static int __maybe_unused
+arm_acpi_register_pmu_device(struct platform_device *pdev, u8 len,
+			     u16 (*parse_gsi)(struct acpi_madt_generic_interrupt *))
+{
+	int cpu, this_hetid, hetid, irq, ret;
+	u16 this_gsi = 0, gsi = 0;
+
+	/*
+	 * Ensure that platform device must have IORESOURCE_IRQ
+	 * resource to hold gsi interrupt.
+	 */
+	if (pdev->num_resources != 1)
+		return -ENXIO;
+
+	if (pdev->resource[0].flags != IORESOURCE_IRQ)
+		return -ENXIO;
+
+	/*
+	 * Sanity check all the GICC tables for the same interrupt
+	 * number. For now, only support homogeneous ACPI machines.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct acpi_madt_generic_interrupt *gicc;
+
+		gicc = acpi_cpu_get_madt_gicc(cpu);
+		if (gicc->header.length < len)
+			return gsi ? -ENXIO : 0;
+
+		this_gsi = parse_gsi(gicc);
+		this_hetid = find_acpi_cpu_topology_hetero_id(cpu);
+		if (!gsi) {
+			hetid = this_hetid;
+			gsi = this_gsi;
+		} else if (hetid != this_hetid || gsi != this_gsi) {
+			pr_warn("ACPI: %s: must be homogeneous\n", pdev->name);
+			return -ENXIO;
+		}
+	}
+
+	if (!this_gsi)
+		return 0;
+
+	irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH);
+	if (irq < 0) {
+		pr_warn("ACPI: %s Unable to register interrupt: %d\n", pdev->name, gsi);
+		return -ENXIO;
+	}
+
+	pdev->resource[0].start = irq;
+	ret = platform_device_register(pdev);
+	if (ret)
+		acpi_unregister_gsi(gsi);
+
+	return ret;
+}
+
 #if IS_ENABLED(CONFIG_ARM_SPE_PMU)
 static struct resource spe_resources[] = {
 	{
@@ -84,6 +140,11 @@ static struct platform_device spe_dev = {
 	.num_resources = ARRAY_SIZE(spe_resources)
 };
 
+static u16 arm_spe_parse_gsi(struct acpi_madt_generic_interrupt *gicc)
+{
+	return gicc->spe_interrupt;
+}
+
 /*
  * For lack of a better place, hook the normal PMU MADT walk
  * and create a SPE device if we detect a recent MADT with
@@ -91,47 +152,10 @@ static struct platform_device spe_dev = {
  */
 static void arm_spe_acpi_register_device(void)
 {
-	int cpu, hetid, irq, ret;
-	bool first = true;
-	u16 gsi = 0;
-
-	/*
-	 * Sanity check all the GICC tables for the same interrupt number.
-	 * For now, we only support homogeneous ACPI/SPE machines.
-	 */
-	for_each_possible_cpu(cpu) {
-		struct acpi_madt_generic_interrupt *gicc;
-
-		gicc = acpi_cpu_get_madt_gicc(cpu);
-		if (gicc->header.length < ACPI_MADT_GICC_SPE)
-			return;
-
-		if (first) {
-			gsi = gicc->spe_interrupt;
-			if (!gsi)
-				return;
-			hetid = find_acpi_cpu_topology_hetero_id(cpu);
-			first = false;
-		} else if ((gsi != gicc->spe_interrupt) ||
-			   (hetid != find_acpi_cpu_topology_hetero_id(cpu))) {
-			pr_warn("ACPI: SPE must be homogeneous\n");
-			return;
-		}
-	}
-
-	irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE,
-				ACPI_ACTIVE_HIGH);
-	if (irq < 0) {
-		pr_warn("ACPI: SPE Unable to register interrupt: %d\n", gsi);
-		return;
-	}
-
-	spe_resources[0].start = irq;
-	ret = platform_device_register(&spe_dev);
-	if (ret < 0) {
+	int ret = arm_acpi_register_pmu_device(&spe_dev, ACPI_MADT_GICC_SPE,
+					       arm_spe_parse_gsi);
+	if (ret)
 		pr_warn("ACPI: SPE: Unable to register device\n");
-		acpi_unregister_gsi(gsi);
-	}
 }
 #else
 static inline void arm_spe_acpi_register_device(void)

From 1aa3d0274a4aac338ee45a3dfc3b17c944bcc2bc Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 17 Aug 2023 11:24:03 +0530
Subject: [PATCH 1383/2047] arm_pmu: acpi: Add a representative platform device
 for TRBE

ACPI TRBE does not have a HID for identification which could create and add
a platform device into the platform bus. Also without a platform device, it
cannot be probed and bound to a platform driver.

This creates a dummy platform device for TRBE after ascertaining that ACPI
provides required interrupts uniformly across all cpus on the system. This
device gets created inside drivers/perf/arm_pmu_acpi.c to accommodate TRBE
being built as a module.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20230817055405.249630-3-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/acpi.h |  3 +++
 drivers/perf/arm_pmu_acpi.c   | 35 +++++++++++++++++++++++++++++++++++
 include/linux/perf/arm_pmu.h  |  1 +
 3 files changed, 39 insertions(+)

diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index bd68e1b7f29f..4d537d56eb84 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -42,6 +42,9 @@
 #define ACPI_MADT_GICC_SPE  (offsetof(struct acpi_madt_generic_interrupt, \
 	spe_interrupt) + sizeof(u16))
 
+#define ACPI_MADT_GICC_TRBE  (offsetof(struct acpi_madt_generic_interrupt, \
+	trbe_interrupt) + sizeof(u16))
+
 /* Basic configuration for ACPI */
 #ifdef	CONFIG_ACPI
 pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 48bd62d3993e..05dda19c5359 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -163,6 +163,40 @@ static inline void arm_spe_acpi_register_device(void)
 }
 #endif /* CONFIG_ARM_SPE_PMU */
 
+#if IS_ENABLED(CONFIG_CORESIGHT_TRBE)
+static struct resource trbe_resources[] = {
+	{
+		/* irq */
+		.flags          = IORESOURCE_IRQ,
+	}
+};
+
+static struct platform_device trbe_dev = {
+	.name = ARMV8_TRBE_PDEV_NAME,
+	.id = -1,
+	.resource = trbe_resources,
+	.num_resources = ARRAY_SIZE(trbe_resources)
+};
+
+static u16 arm_trbe_parse_gsi(struct acpi_madt_generic_interrupt *gicc)
+{
+	return gicc->trbe_interrupt;
+}
+
+static void arm_trbe_acpi_register_device(void)
+{
+	int ret = arm_acpi_register_pmu_device(&trbe_dev, ACPI_MADT_GICC_TRBE,
+					       arm_trbe_parse_gsi);
+	if (ret)
+		pr_warn("ACPI: TRBE: Unable to register device\n");
+}
+#else
+static inline void arm_trbe_acpi_register_device(void)
+{
+
+}
+#endif /* CONFIG_CORESIGHT_TRBE */
+
 static int arm_pmu_acpi_parse_irqs(void)
 {
 	int irq, cpu, irq_cpu, err;
@@ -398,6 +432,7 @@ static int arm_pmu_acpi_init(void)
 		return 0;
 
 	arm_spe_acpi_register_device();
+	arm_trbe_acpi_register_device();
 
 	return 0;
 }
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index a0801f68762b..143fbc10ecfe 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -187,5 +187,6 @@ void armpmu_free_irq(int irq, int cpu);
 #endif /* CONFIG_ARM_PMU */
 
 #define ARMV8_SPE_PDEV_NAME "arm,spe-v1"
+#define ARMV8_TRBE_PDEV_NAME "arm,trbe"
 
 #endif /* __ARM_PMU_H__ */

From a4b35d4d05b9f2e84c1dd6301d3dca65719334ef Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Aug 2023 14:03:28 -0700
Subject: [PATCH 1384/2047] integrity: Annotate struct ima_rule_opt_list with
 __counted_by

Prepare for the coming implementation by GCC and Clang of the __counted_by
attribute. Flexible array members annotated with __counted_by can have
their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS
(for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family
functions).

As found with Coccinelle[1], add __counted_by for struct ima_rule_opt_list.
Additionally, since the element count member must be set before accessing
the annotated flexible array member, move its initialization earlier.

[1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci

Cc: Mimi Zohar <zohar@linux.ibm.com>
Cc: Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: James Morris <jmorris@namei.org>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: linux-integrity@vger.kernel.org
Cc: linux-security-module@vger.kernel.org
Acked-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Link: https://lore.kernel.org/r/20230817210327.never.598-kees@kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 security/integrity/ima/ima_policy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index c9b3bd8f1bb9..7a0420cf1a6a 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -68,7 +68,7 @@ enum policy_rule_list { IMA_DEFAULT_POLICY = 1, IMA_CUSTOM_POLICY };
 
 struct ima_rule_opt_list {
 	size_t count;
-	char *items[];
+	char *items[] __counted_by(count);
 };
 
 /*
@@ -342,6 +342,7 @@ static struct ima_rule_opt_list *ima_alloc_rule_opt_list(const substring_t *src)
 		kfree(src_copy);
 		return ERR_PTR(-ENOMEM);
 	}
+	opt_list->count = count;
 
 	/*
 	 * strsep() has already replaced all instances of '|' with '\0',
@@ -357,7 +358,6 @@ static struct ima_rule_opt_list *ima_alloc_rule_opt_list(const substring_t *src)
 		opt_list->items[i] = cur;
 		cur = strchr(cur, '\0') + 1;
 	}
-	opt_list->count = count;
 
 	return opt_list;
 }

From af58740d8b06a6a97b7594235a1be11bd6aa37fa Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 18 Aug 2023 21:12:53 +0100
Subject: [PATCH 1385/2047] pstore: Fix kernel-doc warning

Fix the warning for the description of struct persistent_ram_buffer and
improve the descriptions of the other struct members while I'm here.

Signed-off-by: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Link: https://lore.kernel.org/r/20230818201253.2729485-1-willy@infradead.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/ram_core.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 0eb780b90281..650e437b55e6 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -25,12 +25,10 @@
 /**
  * struct persistent_ram_buffer - persistent circular RAM buffer
  *
- * @sig:
- *	signature to indicate header (PERSISTENT_RAM_SIG xor PRZ-type value)
- * @start:
- *	offset into @data where the beginning of the stored bytes begin
- * @size:
- *	number of valid bytes stored in @data
+ * @sig: Signature to indicate header (PERSISTENT_RAM_SIG xor PRZ-type value)
+ * @start: First valid byte in the buffer.
+ * @size: Number of valid bytes in the buffer.
+ * @data: The contents of the buffer.
  */
 struct persistent_ram_buffer {
 	uint32_t    sig;

From c01467355f8eb126cab0ef28b66bb506fe6a2e21 Mon Sep 17 00:00:00 2001
From: Andre Werner <andre.werner@systec-electronic.com>
Date: Fri, 18 Aug 2023 10:37:21 +0200
Subject: [PATCH 1386/2047] mfd: tps65086: Read DEVICE ID register 1 from
 device

This commit prepares a following commit for the regulator part of the MFD.
The driver should support different device chips that differ in their
register definitions, for instance to control LDOA1 and SWB2.
So it is necessary to use a dedicated regulator description for a
specific device variant. Thus, the content from DEVICEID Register 1 is
used to choose a dedicated configuration between the different device
variants.

Signed-off-by: Andre Werner <andre.werner@systec-electronic.com>
Link: https://lore.kernel.org/r/20230818083721.29790-2-andre.werner@systec-electronic.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/mfd/tps65086.c       | 17 ++++++++++++-----
 include/linux/mfd/tps65086.h | 20 ++++++++++++++------
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/drivers/mfd/tps65086.c b/drivers/mfd/tps65086.c
index 6a21000aad4a..9bb7d7d8dcfc 100644
--- a/drivers/mfd/tps65086.c
+++ b/drivers/mfd/tps65086.c
@@ -81,16 +81,23 @@ static int tps65086_probe(struct i2c_client *client)
 		return PTR_ERR(tps->regmap);
 	}
 
-	ret = regmap_read(tps->regmap, TPS65086_DEVICEID, &version);
+	/* Store device ID to load regulator configuration that fit to IC variant */
+	ret = regmap_read(tps->regmap, TPS65086_DEVICEID1, &tps->chip_id);
 	if (ret) {
-		dev_err(tps->dev, "Failed to read revision register\n");
+		dev_err(tps->dev, "Failed to read revision register 1\n");
+		return ret;
+	}
+
+	ret = regmap_read(tps->regmap, TPS65086_DEVICEID2, &version);
+	if (ret) {
+		dev_err(tps->dev, "Failed to read revision register 2\n");
 		return ret;
 	}
 
 	dev_info(tps->dev, "Device: TPS65086%01lX, OTP: %c, Rev: %ld\n",
-		 (version & TPS65086_DEVICEID_PART_MASK),
-		 (char)((version & TPS65086_DEVICEID_OTP_MASK) >> 4) + 'A',
-		 (version & TPS65086_DEVICEID_REV_MASK) >> 6);
+		 (version & TPS65086_DEVICEID2_PART_MASK),
+		 (char)((version & TPS65086_DEVICEID2_OTP_MASK) >> 4) + 'A',
+		 (version & TPS65086_DEVICEID2_REV_MASK) >> 6);
 
 	if (tps->irq > 0) {
 		ret = regmap_add_irq_chip(tps->regmap, tps->irq, IRQF_ONESHOT, 0,
diff --git a/include/linux/mfd/tps65086.h b/include/linux/mfd/tps65086.h
index 16f87cccc003..87e590de6ca5 100644
--- a/include/linux/mfd/tps65086.h
+++ b/include/linux/mfd/tps65086.h
@@ -13,8 +13,9 @@
 #include <linux/regmap.h>
 
 /* List of registers for TPS65086 */
-#define TPS65086_DEVICEID		0x01
-#define TPS65086_IRQ			0x02
+#define TPS65086_DEVICEID1		0x00
+#define TPS65086_DEVICEID2		0x01
+#define TPS65086_IRQ		0x02
 #define TPS65086_IRQ_MASK		0x03
 #define TPS65086_PMICSTAT		0x04
 #define TPS65086_SHUTDNSRC		0x05
@@ -75,10 +76,16 @@
 #define TPS65086_IRQ_SHUTDN_MASK	BIT(3)
 #define TPS65086_IRQ_FAULT_MASK		BIT(7)
 
-/* DEVICEID Register field definitions */
-#define TPS65086_DEVICEID_PART_MASK	GENMASK(3, 0)
-#define TPS65086_DEVICEID_OTP_MASK	GENMASK(5, 4)
-#define TPS65086_DEVICEID_REV_MASK	GENMASK(7, 6)
+/* DEVICEID1 Register field definitions */
+#define TPS6508640_ID			0x00
+#define TPS65086401_ID			0x01
+#define TPS6508641_ID			0x10
+#define TPS65086470_ID			0x70
+
+/* DEVICEID2 Register field definitions */
+#define TPS65086_DEVICEID2_PART_MASK	GENMASK(3, 0)
+#define TPS65086_DEVICEID2_OTP_MASK	GENMASK(5, 4)
+#define TPS65086_DEVICEID2_REV_MASK	GENMASK(7, 6)
 
 /* VID Masks */
 #define BUCK_VID_MASK			GENMASK(7, 1)
@@ -100,6 +107,7 @@ enum tps65086_irqs {
 struct tps65086 {
 	struct device *dev;
 	struct regmap *regmap;
+	unsigned int chip_id;
 
 	/* IRQ Data */
 	int irq;

From 3a5e6e49855661ad39b8fbb1dbd041178af98e00 Mon Sep 17 00:00:00 2001
From: Andre Werner <andre.werner@systec-electronic.com>
Date: Fri, 18 Aug 2023 10:37:24 +0200
Subject: [PATCH 1387/2047] regulator: tps65086: Select dedicated regulator
 config for chip variant

Some configurations differ between chip variants, e,g. the register
to control the on of state of LDOA1 and SWB2. Thus, it is necessary
to choose the correct configuration for a dedicated device.
If the wrong configuration was used, the LDOA1 output that was
disabled by the bootloader was enabled in  Kernel again.

Each chip variant gets its dedicated configuration selected by
the chip ID previously collected from MFD probe function.
The VTT enum value (tps65086_regulators) is shifted because not all
chip variants have a separate SWB2 switch. Sometimes they are merged.
So the configuration possibilities differ, thus the regulator
configuration arrays have a different length.

Signed-off-by: Andre Werner <andre.werner@systec-electronic.com>
Link: https://lore.kernel.org/r/20230818083721.29790-5-andre.werner@systec-electronic.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/tps65086-regulator.c | 188 +++++++++++++++++++++++--
 include/linux/mfd/tps65086.h           |   3 +
 2 files changed, 183 insertions(+), 8 deletions(-)

diff --git a/drivers/regulator/tps65086-regulator.c b/drivers/regulator/tps65086-regulator.c
index 663789198ba5..2d284c64eeb7 100644
--- a/drivers/regulator/tps65086-regulator.c
+++ b/drivers/regulator/tps65086-regulator.c
@@ -15,7 +15,15 @@
 #include <linux/mfd/tps65086.h>
 
 enum tps65086_regulators { BUCK1, BUCK2, BUCK3, BUCK4, BUCK5, BUCK6, LDOA1,
-	LDOA2, LDOA3, SWA1, SWB1, SWB2, VTT };
+	LDOA2, LDOA3, VTT, SWA1, SWB1, SWB2 };
+
+/* Selector for regulator configuration regarding PMIC chip ID. */
+enum tps65086_ids {
+	TPS6508640 = 0,
+	TPS65086401,
+	TPS6508641,
+	TPS65086470,
+};
 
 #define TPS65086_REGULATOR(_name, _of, _id, _nv, _vr, _vm, _er, _em, _lr, _dr, _dm)	\
 	[_id] = {							\
@@ -57,12 +65,24 @@ enum tps65086_regulators { BUCK1, BUCK2, BUCK3, BUCK4, BUCK5, BUCK6, LDOA1,
 		},							\
 	}
 
+
+#define TPS65086_REGULATOR_CONFIG(_chip_id, _config)			\
+	[_chip_id] = {							\
+		.config = _config,					\
+		.num_elems = ARRAY_SIZE(_config),			\
+	}
+
 struct tps65086_regulator {
 	struct regulator_desc desc;
 	unsigned int decay_reg;
 	unsigned int decay_mask;
 };
 
+struct tps65086_regulator_config {
+	struct tps65086_regulator * const config;
+	const unsigned int num_elems;
+};
+
 static const struct linear_range tps65086_10mv_ranges[] = {
 	REGULATOR_LINEAR_RANGE(0, 0x0, 0x0, 0),
 	REGULATOR_LINEAR_RANGE(410000, 0x1, 0x7F, 10000),
@@ -114,7 +134,125 @@ static int tps65086_of_parse_cb(struct device_node *dev,
 				const struct regulator_desc *desc,
 				struct regulator_config *config);
 
-static struct tps65086_regulator regulators[] = {
+static struct tps65086_regulator tps6508640_regulator_config[] = {
+	TPS65086_REGULATOR("BUCK1", "buck1", BUCK1, 0x80, TPS65086_BUCK1CTRL,
+			   BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(0),
+			   tps65086_10mv_ranges, TPS65086_BUCK1CTRL,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK2", "buck2", BUCK2, 0x80, TPS65086_BUCK2CTRL,
+			   BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(1),
+			   tps65086_10mv_ranges, TPS65086_BUCK2CTRL,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK3", "buck3", BUCK3, 0x80, TPS65086_BUCK3VID,
+			   BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(2),
+			   tps65086_10mv_ranges, TPS65086_BUCK3DECAY,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK4", "buck4", BUCK4, 0x80, TPS65086_BUCK4VID,
+			   BUCK_VID_MASK, TPS65086_BUCK4CTRL, BIT(0),
+			   tps65086_10mv_ranges, TPS65086_BUCK4VID,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK5", "buck5", BUCK5, 0x80, TPS65086_BUCK5VID,
+			   BUCK_VID_MASK, TPS65086_BUCK5CTRL, BIT(0),
+			   tps65086_10mv_ranges, TPS65086_BUCK5CTRL,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK6", "buck6", BUCK6, 0x80, TPS65086_BUCK6VID,
+			   BUCK_VID_MASK, TPS65086_BUCK6CTRL, BIT(0),
+			   tps65086_10mv_ranges, TPS65086_BUCK6CTRL,
+			   BIT(0)),
+	TPS65086_REGULATOR("LDOA1", "ldoa1", LDOA1, 0xF, TPS65086_LDOA1CTRL,
+			   VDOA1_VID_MASK, TPS65086_SWVTT_EN, BIT(7),
+			   tps65086_ldoa1_ranges, 0, 0),
+	TPS65086_REGULATOR("LDOA2", "ldoa2", LDOA2, 0x10, TPS65086_LDOA2VID,
+			   VDOA23_VID_MASK, TPS65086_LDOA2CTRL, BIT(0),
+			   tps65086_ldoa23_ranges, 0, 0),
+	TPS65086_REGULATOR("LDOA3", "ldoa3", LDOA3, 0x10, TPS65086_LDOA3VID,
+			   VDOA23_VID_MASK, TPS65086_LDOA3CTRL, BIT(0),
+			   tps65086_ldoa23_ranges, 0, 0),
+	TPS65086_SWITCH("VTT", "vtt", VTT, TPS65086_SWVTT_EN, BIT(4)),
+	TPS65086_SWITCH("SWA1", "swa1", SWA1, TPS65086_SWVTT_EN, BIT(5)),
+	TPS65086_SWITCH("SWB1", "swb1", SWB1, TPS65086_SWVTT_EN, BIT(6)),
+	TPS65086_SWITCH("SWB2", "swb2", SWB2, TPS65086_LDOA1CTRL, BIT(0)),
+};
+
+static struct tps65086_regulator tps65086401_regulator_config[] = {
+	TPS65086_REGULATOR("BUCK1", "buck1", BUCK1, 0x80, TPS65086_BUCK1CTRL,
+			   BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(0),
+			   tps65086_10mv_ranges, TPS65086_BUCK1CTRL,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK2", "buck2", BUCK2, 0x80, TPS65086_BUCK2CTRL,
+			   BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(1),
+			   tps65086_10mv_ranges, TPS65086_BUCK2CTRL,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK3", "buck3", BUCK3, 0x80, TPS65086_BUCK3VID,
+			   BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(2),
+			   tps65086_10mv_ranges, TPS65086_BUCK3DECAY,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK4", "buck4", BUCK4, 0x80, TPS65086_BUCK4VID,
+			   BUCK_VID_MASK, TPS65086_BUCK4CTRL, BIT(0),
+			   tps65086_10mv_ranges, TPS65086_BUCK4VID,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK5", "buck5", BUCK5, 0x80, TPS65086_BUCK5VID,
+			   BUCK_VID_MASK, TPS65086_BUCK5CTRL, BIT(0),
+			   tps65086_10mv_ranges, TPS65086_BUCK5CTRL,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK6", "buck6", BUCK6, 0x80, TPS65086_BUCK6VID,
+			   BUCK_VID_MASK, TPS65086_BUCK6CTRL, BIT(0),
+			   tps65086_10mv_ranges, TPS65086_BUCK6CTRL,
+			   BIT(0)),
+	TPS65086_REGULATOR("LDOA1", "ldoa1", LDOA1, 0xF, TPS65086_LDOA1CTRL,
+			   VDOA1_VID_MASK, TPS65086_SWVTT_EN, BIT(7),
+			   tps65086_ldoa1_ranges, 0, 0),
+	TPS65086_REGULATOR("LDOA2", "ldoa2", LDOA2, 0x10, TPS65086_LDOA2VID,
+			   VDOA23_VID_MASK, TPS65086_LDOA2CTRL, BIT(0),
+			   tps65086_ldoa23_ranges, 0, 0),
+	TPS65086_REGULATOR("LDOA3", "ldoa3", LDOA3, 0x10, TPS65086_LDOA3VID,
+			   VDOA23_VID_MASK, TPS65086_LDOA3CTRL, BIT(0),
+			   tps65086_ldoa23_ranges, 0, 0),
+	TPS65086_SWITCH("VTT", "vtt", VTT, TPS65086_SWVTT_EN, BIT(4)),
+	TPS65086_SWITCH("SWA1", "swa1", SWA1, TPS65086_SWVTT_EN, BIT(5)),
+	TPS65086_SWITCH("SWB1", "swb1", SWB1, TPS65086_SWVTT_EN, BIT(6)),
+};
+
+static struct tps65086_regulator tps6508641_regulator_config[] = {
+	TPS65086_REGULATOR("BUCK1", "buck1", BUCK1, 0x80, TPS65086_BUCK1CTRL,
+			   BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(0),
+			   tps65086_10mv_ranges, TPS65086_BUCK1CTRL,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK2", "buck2", BUCK2, 0x80, TPS65086_BUCK2CTRL,
+			   BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(1),
+			   tps65086_10mv_ranges, TPS65086_BUCK2CTRL,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK3", "buck3", BUCK3, 0x80, TPS65086_BUCK3VID,
+			   BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(2),
+			   tps65086_10mv_ranges, TPS65086_BUCK3DECAY,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK4", "buck4", BUCK4, 0x80, TPS65086_BUCK4VID,
+			   BUCK_VID_MASK, TPS65086_BUCK4CTRL, BIT(0),
+			   tps65086_10mv_ranges, TPS65086_BUCK4VID,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK5", "buck5", BUCK5, 0x80, TPS65086_BUCK5VID,
+			   BUCK_VID_MASK, TPS65086_BUCK5CTRL, BIT(0),
+			   tps65086_10mv_ranges, TPS65086_BUCK5CTRL,
+			   BIT(0)),
+	TPS65086_REGULATOR("BUCK6", "buck6", BUCK6, 0x80, TPS65086_BUCK6VID,
+			   BUCK_VID_MASK, TPS65086_BUCK6CTRL, BIT(0),
+			   tps65086_10mv_ranges, TPS65086_BUCK6CTRL,
+			   BIT(0)),
+	TPS65086_REGULATOR("LDOA1", "ldoa1", LDOA1, 0xF, TPS65086_LDOA1CTRL,
+			   VDOA1_VID_MASK, TPS65086_SWVTT_EN, BIT(7),
+			   tps65086_ldoa1_ranges, 0, 0),
+	TPS65086_REGULATOR("LDOA2", "ldoa2", LDOA2, 0x10, TPS65086_LDOA2VID,
+			   VDOA23_VID_MASK, TPS65086_LDOA2CTRL, BIT(0),
+			   tps65086_ldoa23_ranges, 0, 0),
+	TPS65086_REGULATOR("LDOA3", "ldoa3", LDOA3, 0x10, TPS65086_LDOA3VID,
+			   VDOA23_VID_MASK, TPS65086_LDOA3CTRL, BIT(0),
+			   tps65086_ldoa23_ranges, 0, 0),
+	TPS65086_SWITCH("VTT", "vtt", VTT, TPS65086_SWVTT_EN, BIT(4)),
+	TPS65086_SWITCH("SWA1", "swa1", SWA1, TPS65086_SWVTT_EN, BIT(5)),
+	TPS65086_SWITCH("SWB1", "swb1", SWB1, TPS65086_SWVTT_EN, BIT(6)),
+};
+
+static struct tps65086_regulator tps65086470_regulator_config[] = {
 	TPS65086_REGULATOR("BUCK1", "buck1", BUCK1, 0x80, TPS65086_BUCK1CTRL,
 			   BUCK_VID_MASK, TPS65086_BUCK123CTRL, BIT(0),
 			   tps65086_10mv_ranges, TPS65086_BUCK1CTRL,
@@ -148,16 +286,25 @@ static struct tps65086_regulator regulators[] = {
 	TPS65086_REGULATOR("LDOA3", "ldoa3", LDOA3, 0x10, TPS65086_LDOA3VID,
 			   VDOA23_VID_MASK, TPS65086_LDOA3CTRL, BIT(0),
 			   tps65086_ldoa23_ranges, 0, 0),
+	TPS65086_SWITCH("VTT", "vtt", VTT, TPS65086_SWVTT_EN, BIT(4)),
 	TPS65086_SWITCH("SWA1", "swa1", SWA1, TPS65086_SWVTT_EN, BIT(5)),
 	TPS65086_SWITCH("SWB1", "swb1", SWB1, TPS65086_SWVTT_EN, BIT(6)),
 	TPS65086_SWITCH("SWB2", "swb2", SWB2, TPS65086_SWVTT_EN, BIT(7)),
-	TPS65086_SWITCH("VTT", "vtt", VTT, TPS65086_SWVTT_EN, BIT(4)),
+};
+
+static const struct tps65086_regulator_config regulator_configs[] = {
+	TPS65086_REGULATOR_CONFIG(TPS6508640, tps6508640_regulator_config),
+	TPS65086_REGULATOR_CONFIG(TPS65086401, tps65086401_regulator_config),
+	TPS65086_REGULATOR_CONFIG(TPS6508641, tps6508641_regulator_config),
+	TPS65086_REGULATOR_CONFIG(TPS65086470, tps65086470_regulator_config)
 };
 
 static int tps65086_of_parse_cb(struct device_node *node,
 				const struct regulator_desc *desc,
 				struct regulator_config *config)
 {
+	struct tps65086 * const tps = dev_get_drvdata(config->dev);
+	struct tps65086_regulator *regulators = tps->reg_config->config;
 	int ret;
 
 	/* Check for 25mV step mode */
@@ -203,9 +350,30 @@ static int tps65086_regulator_probe(struct platform_device *pdev)
 {
 	struct tps65086 *tps = dev_get_drvdata(pdev->dev.parent);
 	struct regulator_config config = { };
+	unsigned int selector_reg_config;
 	struct regulator_dev *rdev;
 	int i;
 
+	/* Select regulator configuration for used PMIC device */
+	switch (tps->chip_id) {
+	case TPS6508640_ID:
+		selector_reg_config = TPS6508640;
+		break;
+	case TPS65086401_ID:
+		selector_reg_config = TPS65086401;
+		break;
+	case TPS6508641_ID:
+		selector_reg_config = TPS6508641;
+		break;
+	case TPS65086470_ID:
+		selector_reg_config = TPS65086470;
+		break;
+	default:
+		dev_err(tps->dev, "Unknown device ID. Cannot determine regulator config.\n");
+		return -ENODEV;
+	}
+	tps->reg_config = &regulator_configs[selector_reg_config];
+
 	platform_set_drvdata(pdev, tps);
 
 	config.dev = &pdev->dev;
@@ -213,12 +381,16 @@ static int tps65086_regulator_probe(struct platform_device *pdev)
 	config.driver_data = tps;
 	config.regmap = tps->regmap;
 
-	for (i = 0; i < ARRAY_SIZE(regulators); i++) {
-		rdev = devm_regulator_register(&pdev->dev, &regulators[i].desc,
-					       &config);
+	for (i = 0; i < tps->reg_config->num_elems; ++i) {
+		struct regulator_desc * const desc_ptr = &tps->reg_config->config[i].desc;
+
+		dev_dbg(tps->dev, "Index: %u; Regulator name: \"%s\"; Regulator ID: %d\n",
+			i, desc_ptr->name, desc_ptr->id);
+
+		rdev = devm_regulator_register(&pdev->dev, desc_ptr, &config);
 		if (IS_ERR(rdev)) {
-			dev_err(tps->dev, "failed to register %s regulator\n",
-				pdev->name);
+			dev_err(tps->dev, "failed to register %d \"%s\" regulator\n",
+				i, desc_ptr->name);
 			return PTR_ERR(rdev);
 		}
 	}
diff --git a/include/linux/mfd/tps65086.h b/include/linux/mfd/tps65086.h
index 87e590de6ca5..9185b5cd8371 100644
--- a/include/linux/mfd/tps65086.h
+++ b/include/linux/mfd/tps65086.h
@@ -99,6 +99,8 @@ enum tps65086_irqs {
 	TPS65086_IRQ_FAULT,
 };
 
+struct tps65086_regulator_config;
+
 /**
  * struct tps65086 - state holder for the tps65086 driver
  *
@@ -108,6 +110,7 @@ struct tps65086 {
 	struct device *dev;
 	struct regmap *regmap;
 	unsigned int chip_id;
+	const struct tps65086_regulator_config *reg_config;
 
 	/* IRQ Data */
 	int irq;

From 3b1f08833c45d0167741e4097b0150e7cf086102 Mon Sep 17 00:00:00 2001
From: BrenoRCBrito <brenorcbrito@gmail.com>
Date: Fri, 18 Aug 2023 18:14:16 -0300
Subject: [PATCH 1388/2047] ASoC: amd: yc: Add VivoBook Pro 15 to quirks list
 for acp6x

VivoBook Pro 15 Ryzen Edition uses Ryzen 6800H processor, and adding to
 quirks list for acp6x will enable internal mic.

Signed-off-by: BrenoRCBrito <brenorcbrito@gmail.com>
Link: https://lore.kernel.org/r/20230818211417.32167-1-brenorcbrito@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/amd/yc/acp6x-mach.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index a2fe3bd4f9a1..a2661dd637a5 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -248,6 +248,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "M3402RA"),
 		}
 	},
+	{
+		.driver_data = &acp6x_card,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "M6500RC"),
+		}
+	},
 	{
 		.driver_data = &acp6x_card,
 		.matches = {

From ec05b12634ad007f7a37161ecb7cd243c3b1970c Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 18 Aug 2023 21:10:03 +0100
Subject: [PATCH 1389/2047] devpts: Fix kernel-doc warnings

This documentation has bit-rotted over time.

Signed-off-by: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Message-Id: <20230818201003.2720257-1-willy@infradead.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/devpts/inode.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index fe3db0eda8e4..b31dfd79e474 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -534,12 +534,12 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx)
 
 /**
  * devpts_pty_new -- create a new inode in /dev/pts/
- * @ptmx_inode: inode of the master
- * @device: major+minor of the node to be created
+ * @fsi: Filesystem info for this instance.
  * @index: used as a name of the node
  * @priv: what's given back by devpts_get_priv
  *
- * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill.
+ * The dentry for the created inode is returned.
+ * Remove it from /dev/pts/ with devpts_pty_kill().
  */
 struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
 {
@@ -580,7 +580,7 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
 
 /**
  * devpts_get_priv -- get private data for a slave
- * @pts_inode: inode of the slave
+ * @dentry: dentry of the slave
  *
  * Returns whatever was passed as priv in devpts_pty_new for a given inode.
  */
@@ -593,7 +593,7 @@ void *devpts_get_priv(struct dentry *dentry)
 
 /**
  * devpts_pty_kill -- remove inode form /dev/pts/
- * @inode: inode of the slave to be removed
+ * @dentry: dentry of the slave to be removed
  *
  * This is an inverse operation of devpts_pty_new.
  */

From 35931eb3945b8d38c31f8e956aee3cf31c52121b Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 18 Aug 2023 21:08:24 +0100
Subject: [PATCH 1390/2047] fs: Fix kernel-doc warnings

These have a variety of causes and a corresponding variety of solutions.

Signed-off-by: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Message-Id: <20230818200824.2720007-1-willy@infradead.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/file.c             |  3 ++-
 fs/fs_context.c       | 12 +++++++++---
 fs/ioctl.c            | 10 +++++++---
 fs/kernel_read_file.c | 12 ++++++------
 fs/namei.c            |  3 +++
 fs/open.c             |  4 ++--
 6 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 7bb2cddbd82b..1ddf645ca691 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -668,7 +668,7 @@ EXPORT_SYMBOL(close_fd); /* for ksys_close() */
 
 /**
  * last_fd - return last valid index into fd table
- * @cur_fds: files struct
+ * @fdt: File descriptor table.
  *
  * Context: Either rcu read lock or files_lock must be held.
  *
@@ -724,6 +724,7 @@ static inline void __range_close(struct files_struct *files, unsigned int fd,
  *
  * @fd:     starting file descriptor to close
  * @max_fd: last file descriptor to close
+ * @flags:  CLOSE_RANGE flags.
  *
  * This closes a range of file descriptors. All file descriptors
  * from @fd up to and including @max_fd are closed.
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 375023e40161..a48a69caddce 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -162,6 +162,10 @@ EXPORT_SYMBOL(vfs_parse_fs_param);
 
 /**
  * vfs_parse_fs_string - Convenience function to just parse a string.
+ * @fc: Filesystem context.
+ * @key: Parameter name.
+ * @value: Default value.
+ * @v_size: Maximum number of bytes in the value.
  */
 int vfs_parse_fs_string(struct fs_context *fc, const char *key,
 			const char *value, size_t v_size)
@@ -189,7 +193,7 @@ EXPORT_SYMBOL(vfs_parse_fs_string);
 
 /**
  * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data
- * @ctx: The superblock configuration to fill in.
+ * @fc: The superblock configuration to fill in.
  * @data: The data to parse
  *
  * Parse a blob of data that's in key[=val][,key[=val]]* form.  This can be
@@ -354,7 +358,7 @@ void fc_drop_locked(struct fs_context *fc)
 static void legacy_fs_context_free(struct fs_context *fc);
 
 /**
- * vfs_dup_fc_config: Duplicate a filesystem context.
+ * vfs_dup_fs_context - Duplicate a filesystem context.
  * @src_fc: The context to copy.
  */
 struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
@@ -400,7 +404,9 @@ EXPORT_SYMBOL(vfs_dup_fs_context);
 
 /**
  * logfc - Log a message to a filesystem context
- * @fc: The filesystem context to log to.
+ * @log: The filesystem context to log to, or NULL to use printk.
+ * @prefix: A string to prefix the output with, or NULL.
+ * @level: 'w' for a warning, 'e' for an error.  Anything else is a notice.
  * @fmt: The format of the buffer.
  */
 void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...)
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5b2481cd4750..d413e0b8f6c2 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -109,9 +109,6 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
  * Returns 0 on success, -errno on error, 1 if this was the last
  * extent that will fit in user array.
  */
-#define SET_UNKNOWN_FLAGS	(FIEMAP_EXTENT_DELALLOC)
-#define SET_NO_UNMOUNTED_IO_FLAGS	(FIEMAP_EXTENT_DATA_ENCRYPTED)
-#define SET_NOT_ALIGNED_FLAGS	(FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
 int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
 			    u64 phys, u64 len, u32 flags)
 {
@@ -127,6 +124,10 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
 	if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
 		return 1;
 
+#define SET_UNKNOWN_FLAGS	(FIEMAP_EXTENT_DELALLOC)
+#define SET_NO_UNMOUNTED_IO_FLAGS	(FIEMAP_EXTENT_DATA_ENCRYPTED)
+#define SET_NOT_ALIGNED_FLAGS	(FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
+
 	if (flags & SET_UNKNOWN_FLAGS)
 		flags |= FIEMAP_EXTENT_UNKNOWN;
 	if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
@@ -877,6 +878,9 @@ out:
 #ifdef CONFIG_COMPAT
 /**
  * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation
+ * @file: The file to operate on.
+ * @cmd: The ioctl command number.
+ * @arg: The argument to the ioctl.
  *
  * This is not normally called as a function, but instead set in struct
  * file_operations as
diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c
index 5d826274570c..c429c42a6867 100644
--- a/fs/kernel_read_file.c
+++ b/fs/kernel_read_file.c
@@ -8,16 +8,16 @@
 /**
  * kernel_read_file() - read file contents into a kernel buffer
  *
- * @file	file to read from
- * @offset	where to start reading from (see below).
- * @buf		pointer to a "void *" buffer for reading into (if
+ * @file:	file to read from
+ * @offset:	where to start reading from (see below).
+ * @buf:	pointer to a "void *" buffer for reading into (if
  *		*@buf is NULL, a buffer will be allocated, and
  *		@buf_size will be ignored)
- * @buf_size	size of buf, if already allocated. If @buf not
+ * @buf_size:	size of buf, if already allocated. If @buf not
  *		allocated, this is the largest size to allocate.
- * @file_size	if non-NULL, the full size of @file will be
+ * @file_size:	if non-NULL, the full size of @file will be
  *		written here.
- * @id		the kernel_read_file_id identifying the type of
+ * @id:		the kernel_read_file_id identifying the type of
  *		file contents being read (for LSMs to examine)
  *
  * @offset must be 0 unless both @buf and @file_size are non-NULL
diff --git a/fs/namei.c b/fs/namei.c
index 2bae29ea52ff..567ee547492b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -643,6 +643,8 @@ static bool nd_alloc_stack(struct nameidata *nd)
 
 /**
  * path_connected - Verify that a dentry is below mnt.mnt_root
+ * @mnt: The mountpoint to check.
+ * @dentry: The dentry to check.
  *
  * Rename can sometimes move a file or directory outside of a bind
  * mount, path_connected allows those cases to be detected.
@@ -1083,6 +1085,7 @@ fs_initcall(init_fs_namei_sysctls);
 /**
  * may_follow_link - Check symlink following for unsafe situations
  * @nd: nameidata pathwalk data
+ * @inode: Used for idmapping.
  *
  * In the case of the sysctl_protected_symlinks sysctl being enabled,
  * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
diff --git a/fs/open.c b/fs/open.c
index 455fdddbe9aa..24c1ee83a4b6 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1150,7 +1150,7 @@ EXPORT_SYMBOL_GPL(kernel_file_open);
  * backing_file_open - open a backing file for kernel internal use
  * @path:	path of the file to open
  * @flags:	open flags
- * @path:	path of the backing file
+ * @real_path:	path of the backing file
  * @cred:	credentials for open
  *
  * Open a backing file for a stackable filesystem (e.g., overlayfs).
@@ -1567,7 +1567,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
 }
 
 /**
- * close_range() - Close all file descriptors in a given range.
+ * sys_close_range() - Close all file descriptors in a given range.
  *
  * @fd:     starting file descriptor to close
  * @max_fd: last file descriptor to close

From fbaa530e28afd203b0dc6605c7ecc584952bd606 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 18 Aug 2023 15:45:56 +0100
Subject: [PATCH 1391/2047] fs/pipe: remove redundant initialization of pointer
 buf

The pointer buf is being initializated with a value that is never read,
it is being re-assigned later on at the pointer where it is being used.
The initialization is redundant and can be removed. Cleans up clang scan
build warning:

fs/pipe.c:492:24: warning: Value stored to 'buf' during its
initialization is never read [deadcode.DeadStores]

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Message-Id: <20230818144556.1208082-1-colin.i.king@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/pipe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index a388612f0f86..34b7bf4d7adb 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -489,7 +489,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 		head = pipe->head;
 		if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
 			unsigned int mask = pipe->ring_size - 1;
-			struct pipe_buffer *buf = &pipe->bufs[head & mask];
+			struct pipe_buffer *buf;
 			struct page *page = pipe->tmp_page;
 			int copied;
 

From 8c8e7dba1032c6054540ab6f59ce3f3b9f3c6105 Mon Sep 17 00:00:00 2001
From: Anh Tuan Phan <tuananhlfc@gmail.com>
Date: Thu, 17 Aug 2023 23:31:42 +0700
Subject: [PATCH 1392/2047] fs/dcache: Replace printk and WARN_ON by WARN

Use WARN instead of printk + WARN_ON as reported from coccinelle:
./fs/dcache.c:1667:1-7: SUGGESTION: printk + WARN_ON can be just WARN

Signed-off-by: Anh Tuan Phan <tuananhlfc@gmail.com>
Message-Id: <20230817163142.117706-1-tuananhlfc@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/dcache.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 3aaf2c7ba110..25ac74d30bff 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1664,7 +1664,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
 	if (dentry == _data && dentry->d_lockref.count == 1)
 		return D_WALK_CONTINUE;
 
-	printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} "
+	WARN(1, "BUG: Dentry %p{i=%lx,n=%pd} "
 			" still in use (%d) [unmount of %s %s]\n",
 		       dentry,
 		       dentry->d_inode ?
@@ -1673,7 +1673,6 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
 		       dentry->d_lockref.count,
 		       dentry->d_sb->s_type->name,
 		       dentry->d_sb->s_id);
-	WARN_ON(1);
 	return D_WALK_CONTINUE;
 }
 

From 35b464e32c8bccef435e415db955787ead4ab44c Mon Sep 17 00:00:00 2001
From: Hugo Villeneuve <hvilleneuve@dimonoff.com>
Date: Sat, 22 Jul 2023 14:09:25 -0400
Subject: [PATCH 1393/2047] auxdisplay: hd44780: move cursor home after clear
 display command

The DISPLAY_CLEAR command on the NewHaven NHD-0220DZW-AG5 display
does NOT change the DDRAM address to 00h (home position) like the
standard Hitachi HD44780 controller. As a consequence, the starting
position of the initial string LCD_INIT_TEXT is not guaranteed to be
at 0,0 depending on where the cursor was before the DISPLAY_CLEAR
command.

Extract of DISPLAY_CLEAR command from datasheets of:

    Hitachi HD44780:
        ... It then sets DDRAM address 0 into the address counter...

    NewHaven NHD-0220DZW-AG5 datasheet:
	... This instruction does not change the DDRAM Address

Move the cursor home after sending DISPLAY_CLEAR command to support
non-standard LCDs.

Signed-off-by: Hugo Villeneuve <hvilleneuve@dimonoff.com>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Tested-by: David Reaver <me@davidreaver.com>
Link: https://lore.kernel.org/r/20230722180925.1408885-1-hugo@hugovil.com
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 drivers/auxdisplay/hd44780_common.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/auxdisplay/hd44780_common.c b/drivers/auxdisplay/hd44780_common.c
index 3934c2eebf33..7cbf375b0fa5 100644
--- a/drivers/auxdisplay/hd44780_common.c
+++ b/drivers/auxdisplay/hd44780_common.c
@@ -82,7 +82,15 @@ int hd44780_common_clear_display(struct charlcd *lcd)
 	hdc->write_cmd(hdc, LCD_CMD_DISPLAY_CLEAR);
 	/* datasheet says to wait 1,64 milliseconds */
 	long_sleep(2);
-	return 0;
+
+	/*
+	 * The Hitachi HD44780 controller (and compatible ones) reset the DDRAM
+	 * address when executing the DISPLAY_CLEAR command, thus the
+	 * following call is not required. However, other controllers do not
+	 * (e.g. NewHaven NHD-0220DZW-AG5), thus move the cursor to home
+	 * unconditionally to support both.
+	 */
+	return hd44780_common_home(lcd);
 }
 EXPORT_SYMBOL_GPL(hd44780_common_clear_display);
 

From 919dc320956ea353a7fb2d84265195ad5ef525ac Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 1 Aug 2023 21:03:53 -0700
Subject: [PATCH 1394/2047] fsverity: skip PKCS#7 parser when keyring is empty

If an fsverity builtin signature is given for a file but the
".fs-verity" keyring is empty, there's no real reason to run the PKCS#7
parser.  Skip this to avoid the PKCS#7 attack surface when builtin
signature support is configured into the kernel but is not being used.

This is a hardening improvement, not a fix per se, but I've added
Fixes and Cc stable to get it out to more users.

Fixes: 432434c9f8e1 ("fs-verity: support builtin file signatures")
Cc: stable@vger.kernel.org
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Link: https://lore.kernel.org/r/20230820173237.2579-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/verity/signature.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/fs/verity/signature.c b/fs/verity/signature.c
index b95acae64eac..90c07573dd77 100644
--- a/fs/verity/signature.c
+++ b/fs/verity/signature.c
@@ -62,6 +62,22 @@ int fsverity_verify_signature(const struct fsverity_info *vi,
 		return 0;
 	}
 
+	if (fsverity_keyring->keys.nr_leaves_on_tree == 0) {
+		/*
+		 * The ".fs-verity" keyring is empty, due to builtin signatures
+		 * being supported by the kernel but not actually being used.
+		 * In this case, verify_pkcs7_signature() would always return an
+		 * error, usually ENOKEY.  It could also be EBADMSG if the
+		 * PKCS#7 is malformed, but that isn't very important to
+		 * distinguish.  So, just skip to ENOKEY to avoid the attack
+		 * surface of the PKCS#7 parser, which would otherwise be
+		 * reachable by any task able to execute FS_IOC_ENABLE_VERITY.
+		 */
+		fsverity_err(inode,
+			     "fs-verity keyring is empty, rejecting signed file!");
+		return -ENOKEY;
+	}
+
 	d = kzalloc(sizeof(*d) + hash_alg->digest_size, GFP_KERNEL);
 	if (!d)
 		return -ENOMEM;

From 4f353e0d1282dfe6b8082290fe8e606c5739a954 Mon Sep 17 00:00:00 2001
From: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Date: Fri, 4 Aug 2023 14:14:39 -0300
Subject: [PATCH 1395/2047] scripts: generate_rust_analyzer: provide `cfg`s for
 `core` and `alloc`

Both `core` and `alloc` have their `cfgs` (such as `no_rc`) missing
in `rust-project.json`.

To remedy this, pass the flags to `generate_rust_analyzer.py` for
them to be added to a dictionary where each key corresponds to
a crate and each value to a list of `cfg`s. The dictionary is then
used to pass the `cfg`s to each crate in the generated file (for
`core` and `alloc` only).

Signed-off-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Link: https://lore.kernel.org/r/20230804171448.54976-1-yakoyoku@gmail.com
[ Removed `Suggested-by` as discussed in mailing list. ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/Makefile                     |  1 +
 scripts/generate_rust_analyzer.py | 16 ++++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/rust/Makefile b/rust/Makefile
index c3dfb3f6071b..31a5fb01d56b 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -374,6 +374,7 @@ quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L
 
 rust-analyzer:
 	$(Q)$(srctree)/scripts/generate_rust_analyzer.py \
+		--cfgs='core=$(core-cfgs)' --cfgs='alloc=$(alloc-cfgs)' \
 		$(abs_srctree) $(abs_objtree) \
 		$(RUST_LIB_SRC) $(KBUILD_EXTMOD) > \
 		$(if $(KBUILD_EXTMOD),$(extmod_prefix),$(objtree))/rust-project.json
diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py
index 848fa1ad92ba..fc52bc41d3e7 100755
--- a/scripts/generate_rust_analyzer.py
+++ b/scripts/generate_rust_analyzer.py
@@ -10,7 +10,15 @@ import os
 import pathlib
 import sys
 
-def generate_crates(srctree, objtree, sysroot_src, external_src):
+def args_crates_cfgs(cfgs):
+    crates_cfgs = {}
+    for cfg in cfgs:
+        crate, vals = cfg.split("=", 1)
+        crates_cfgs[crate] = vals.replace("--cfg", "").split()
+
+    return crates_cfgs
+
+def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs):
     # Generate the configuration list.
     cfg = []
     with open(objtree / "include" / "generated" / "rustc_cfg") as fd:
@@ -24,6 +32,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src):
     # Avoid O(n^2) iterations by keeping a map of indexes.
     crates = []
     crates_indexes = {}
+    crates_cfgs = args_crates_cfgs(cfgs)
 
     def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=True, is_proc_macro=False):
         crates_indexes[display_name] = len(crates)
@@ -45,6 +54,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src):
         "core",
         sysroot_src / "core" / "src" / "lib.rs",
         [],
+        cfg=crates_cfgs.get("core", []),
         is_workspace_member=False,
     )
 
@@ -58,6 +68,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src):
         "alloc",
         srctree / "rust" / "alloc" / "lib.rs",
         ["core", "compiler_builtins"],
+        cfg=crates_cfgs.get("alloc", []),
     )
 
     append_crate(
@@ -131,6 +142,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src):
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('--verbose', '-v', action='store_true')
+    parser.add_argument('--cfgs', action='append', default=[])
     parser.add_argument("srctree", type=pathlib.Path)
     parser.add_argument("objtree", type=pathlib.Path)
     parser.add_argument("sysroot_src", type=pathlib.Path)
@@ -143,7 +155,7 @@ def main():
     )
 
     rust_project = {
-        "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree),
+        "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs),
         "sysroot_src": str(args.sysroot_src),
     }
 

From b603c6cc405a1c25e84d9621ec05115ed63ff8b8 Mon Sep 17 00:00:00 2001
From: Guillaume Plourde <gplourde@protonmail.com>
Date: Mon, 14 Aug 2023 15:07:22 +0000
Subject: [PATCH 1396/2047] docs: rust: add command line to rust-analyzer
 section

Add command line to rust-analyzer section for convenience purposes.

Signed-off-by: Guillaume Plourde <gplourde@protonmail.com>
Link: https://lore.kernel.org/r/y4jBalhfESeCZDShmVaGwrdlIRoIHroqNVUUYLck6qGNwB5e7wbIJO5DoiLBTPpTNYtdneWRODjhXwlIl9VzokqxffdNU7y__1wIa7BBl94=@protonmail.com
[ Fixed indentation to tab and reworded title. ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 Documentation/rust/quick-start.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst
index 2cdf75eeae1d..1274cd74f767 100644
--- a/Documentation/rust/quick-start.rst
+++ b/Documentation/rust/quick-start.rst
@@ -196,7 +196,9 @@ be used with many editors to enable syntax highlighting, completion, go to
 definition, and other features.
 
 ``rust-analyzer`` needs a configuration file, ``rust-project.json``, which
-can be generated by the ``rust-analyzer`` Make target.
+can be generated by the ``rust-analyzer`` Make target::
+
+	make LLVM=1 rust-analyzer
 
 
 Configuration

From 8cb40124cf923d4627d2e29b84dbff72e41fa0ef Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 3 Aug 2023 02:04:36 -0400
Subject: [PATCH 1397/2047] docs: rust: update instructions for obtaining
 'core' source

The source for Rust's 'core' library is needed to build the kernel with
Rust support. This sometimes needs to be obtained by hand when using a
standalone version of 'rustc' not managed by 'rustup'. Currently, the
documentation suggests cloning the 'rust' repository to obtain these
sources, but this is quite slow (on the order of a multiple minutes).

Change this documentation to suggest using the source tarball instead.
The tarball includes only needed files (<5M) and is significantly faster
to download; this is more in line with what 'rustup' does.

Also simplify wording of the relevant section.

Link: https://github.com/Rust-for-Linux/linux/pull/1024
Signed-off-by: Trevor Gross <tmgross@umich.edu>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Link: https://lore.kernel.org/r/20230803060437.12157-2-tmgross@umich.edu
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 Documentation/rust/quick-start.rst | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst
index 1274cd74f767..a0a47a6f216b 100644
--- a/Documentation/rust/quick-start.rst
+++ b/Documentation/rust/quick-start.rst
@@ -56,16 +56,17 @@ If ``rustup`` is being used, run::
 The components are installed per toolchain, thus upgrading the Rust compiler
 version later on requires re-adding the component.
 
-Otherwise, if a standalone installer is used, the Rust repository may be cloned
-into the installation folder of the toolchain::
+Otherwise, if a standalone installer is used, the Rust source tree may be
+downloaded into the toolchain's installation folder::
 
-	git clone --recurse-submodules \
-		--branch $(scripts/min-tool-version.sh rustc) \
-		https://github.com/rust-lang/rust \
-		$(rustc --print sysroot)/lib/rustlib/src/rust
+	curl -L "https://static.rust-lang.org/dist/rust-src-$(scripts/min-tool-version.sh rustc).tar.gz" |
+		tar -xzf - -C "$(rustc --print sysroot)/lib" \
+		"rust-src-$(scripts/min-tool-version.sh rustc)/rust-src/lib/" \
+		--strip-components=3
 
 In this case, upgrading the Rust compiler version later on requires manually
-updating this clone.
+updating the source tree (this can be done by removing ``$(rustc --print
+sysroot)/lib/rustlib/src/rust`` then rerunning the above command).
 
 
 libclang

From 2285eb2f2429daadf9d57f08137b472139470aa9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 3 Aug 2023 02:04:37 -0400
Subject: [PATCH 1398/2047] docs: rust: clarify what 'rustup override' does

The behavior of 'rustup override' is not very well known. Add a small
note about what it does, so users have a better understanding of how it
affects their system toolchain (i.e., it does not affect system
toolchain and only sets a directory-specific override).

Signed-off-by: Trevor Gross <tmgross@umich.edu>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Link: https://lore.kernel.org/r/20230803060437.12157-3-tmgross@umich.edu
[ Undid the `:` to `::` change. ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 Documentation/rust/quick-start.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst
index a0a47a6f216b..f382914f4191 100644
--- a/Documentation/rust/quick-start.rst
+++ b/Documentation/rust/quick-start.rst
@@ -38,7 +38,9 @@ and run::
 
 	rustup override set $(scripts/min-tool-version.sh rustc)
 
-Otherwise, fetch a standalone installer from:
+This will configure your working directory to use the correct version of
+``rustc`` without affecting your default toolchain. If you are not using
+``rustup``, fetch a standalone installer from:
 
 	https://forge.rust-lang.org/infra/other-installation-methods.html#standalone
 

From bbc2bf13886846cfaf8b13aac6e43afdbafa189c Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Fri, 18 Aug 2023 07:45:03 -0500
Subject: [PATCH 1399/2047] dt-bindings: opp: Convert ti-omap5-opp-supply to
 json schema

Rename ti-omap5-opp-supply to be bit more generic omap-opp-supply and
convert the free text binding to json-schema.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Dhruva Gole <d-gole@ti.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 .../bindings/opp/ti,omap-opp-supply.yaml      | 101 ++++++++++++++++++
 .../bindings/opp/ti-omap5-opp-supply.txt      |  63 -----------
 2 files changed, 101 insertions(+), 63 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/opp/ti,omap-opp-supply.yaml
 delete mode 100644 Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt

diff --git a/Documentation/devicetree/bindings/opp/ti,omap-opp-supply.yaml b/Documentation/devicetree/bindings/opp/ti,omap-opp-supply.yaml
new file mode 100644
index 000000000000..693f22539606
--- /dev/null
+++ b/Documentation/devicetree/bindings/opp/ti,omap-opp-supply.yaml
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/opp/ti,omap-opp-supply.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments OMAP compatible OPP supply
+
+description:
+  OMAP5, DRA7, and AM57 families of SoCs have Class 0 AVS eFuse
+  registers, which contain OPP-specific voltage information tailored
+  for the specific device. This binding provides the information
+  needed to describe such a hardware values and relate them to program
+  the primary regulator during an OPP transition.
+
+  Also, some supplies may have an associated vbb-supply, an Adaptive
+  Body Bias regulator, which must transition in a specific sequence
+  w.r.t the vdd-supply and clk when making an OPP transition. By
+  supplying two regulators to the device that will undergo OPP
+  transitions, we can use the multi-regulator support implemented by
+  the OPP core to describe both regulators the platform needs. The
+  OPP core binding Documentation/devicetree/bindings/opp/opp-v2.yaml
+  provides further information (refer to Example 4 Handling multiple
+  regulators).
+
+maintainers:
+  - Nishanth Menon <nm@ti.com>
+
+properties:
+  $nodename:
+    pattern: '^opp-supply(@[0-9a-f]+)?$'
+
+  compatible:
+    oneOf:
+      - description: Basic OPP supply controlling VDD and VBB
+        const: ti,omap-opp-supply
+      - description: OMAP5+ optimized voltages in efuse(Class 0) VDD along with
+          VBB.
+        const: ti,omap5-opp-supply
+      - description: OMAP5+ optimized voltages in efuse(class0) VDD but no VBB
+        const: ti,omap5-core-opp-supply
+
+  reg:
+    maxItems: 1
+
+  ti,absolute-max-voltage-uv:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Absolute maximum voltage for the OPP supply in micro-volts.
+    minimum: 750000
+    maximum: 1500000
+
+  ti,efuse-settings:
+    description: An array of u32 tuple items providing information about
+      optimized efuse configuration.
+    minItems: 1
+    $ref: /schemas/types.yaml#/definitions/uint32-matrix
+    items:
+      items:
+        - description: Reference voltage in micro-volts (OPP Voltage)
+          minimum: 750000
+          maximum: 1500000
+          multipleOf: 10000
+        - description: efuse offset where the optimized voltage is located
+          multipleOf: 4
+          maximum: 256
+
+required:
+  - compatible
+  - ti,absolute-max-voltage-uv
+
+allOf:
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              const: ti,omap-opp-supply
+    then:
+      required:
+        - reg
+        - ti,efuse-settings
+
+additionalProperties: false
+
+examples:
+  - |
+    opp-supply {
+        compatible = "ti,omap-opp-supply";
+        ti,absolute-max-voltage-uv = <1375000>;
+    };
+  - |
+    opp-supply@4a003b20 {
+        compatible = "ti,omap5-opp-supply";
+        reg = <0x4a003b20 0x8>;
+        ti,efuse-settings =
+            /* uV   offset */
+            <1060000 0x0>,
+            <1160000 0x4>,
+            <1210000 0x8>;
+        ti,absolute-max-voltage-uv = <1500000>;
+    };
diff --git a/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt b/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt
deleted file mode 100644
index b70d326117cd..000000000000
--- a/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt
+++ /dev/null
@@ -1,63 +0,0 @@
-Texas Instruments OMAP compatible OPP supply description
-
-OMAP5, DRA7, and AM57 family of SoCs have Class0 AVS eFuse registers which
-contain data that can be used to adjust voltages programmed for some of their
-supplies for more efficient operation. This binding provides the information
-needed to read these values and use them to program the main regulator during
-an OPP transitions.
-
-Also, some supplies may have an associated vbb-supply which is an Adaptive Body
-Bias regulator which much be transitioned in a specific sequence with regards
-to the vdd-supply and clk when making an OPP transition. By supplying two
-regulators to the device that will undergo OPP transitions we can make use
-of the multi regulator binding that is part of the OPP core described here [1]
-to describe both regulators needed by the platform.
-
-[1] Documentation/devicetree/bindings/opp/opp-v2.yaml
-
-Required Properties for Device Node:
-- vdd-supply: phandle to regulator controlling VDD supply
-- vbb-supply: phandle to regulator controlling Body Bias supply
-	      (Usually Adaptive Body Bias regulator)
-
-Required Properties for opp-supply node:
-- compatible: Should be one of:
-	"ti,omap-opp-supply" - basic OPP supply controlling VDD and VBB
-	"ti,omap5-opp-supply" - OMAP5+ optimized voltages in efuse(class0)VDD
-			    along with VBB
-	"ti,omap5-core-opp-supply" - OMAP5+ optimized voltages in efuse(class0) VDD
-			    but no VBB.
-- reg: Address and length of the efuse register set for the device (mandatory
-	only for "ti,omap5-opp-supply")
-- ti,efuse-settings: An array of u32 tuple items providing information about
-	optimized efuse configuration. Each item consists of the following:
-	volt: voltage in uV - reference voltage (OPP voltage)
-	efuse_offseet: efuse offset from reg where the optimized voltage is stored.
-- ti,absolute-max-voltage-uv: absolute maximum voltage for the OPP supply.
-
-Example:
-
-/* Device Node (CPU)  */
-cpus {
-	cpu0: cpu@0 {
-		device_type = "cpu";
-
-		...
-
-		vdd-supply = <&vcc>;
-		vbb-supply = <&abb_mpu>;
-	};
-};
-
-/* OMAP OPP Supply with Class0 registers */
-opp_supply_mpu: opp_supply@4a003b20 {
-	compatible = "ti,omap5-opp-supply";
-	reg = <0x4a003b20 0x8>;
-	ti,efuse-settings = <
-	/* uV   offset */
-	1060000 0x0
-	1160000 0x4
-	1210000 0x8
-	>;
-	ti,absolute-max-voltage-uv = <1500000>;
-};

From e576a9a8603f7c6f8fed5159e2fe33f6d67a49e7 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Fri, 18 Aug 2023 07:45:04 -0500
Subject: [PATCH 1400/2047] dt-bindings: cpufreq: Convert ti-cpufreq to json
 schema

Move the ti-cpufreq binding over to opp and convert the free text
binding to json-schema.

Reviewed-by: Dhruva Gole <d-gole@ti.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 .../bindings/cpufreq/ti-cpufreq.txt           | 132 ------------------
 .../opp/operating-points-v2-ti-cpu.yaml       |  92 ++++++++++++
 2 files changed, 92 insertions(+), 132 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
 create mode 100644 Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml

diff --git a/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt b/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
deleted file mode 100644
index 1758051798fe..000000000000
--- a/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-TI CPUFreq and OPP bindings
-================================
-
-Certain TI SoCs, like those in the am335x, am437x, am57xx, and dra7xx
-families support different OPPs depending on the silicon variant in use.
-The ti-cpufreq driver can use revision and an efuse value from the SoC to
-provide the OPP framework with supported hardware information. This is
-used to determine which OPPs from the operating-points-v2 table get enabled
-when it is parsed by the OPP framework.
-
-Required properties:
---------------------
-In 'cpus' nodes:
-- operating-points-v2: Phandle to the operating-points-v2 table to use.
-
-In 'operating-points-v2' table:
-- compatible: Should be
-	- 'operating-points-v2-ti-cpu' for am335x, am43xx, and dra7xx/am57xx,
-	  omap34xx, omap36xx and am3517 SoCs
-- syscon: A phandle pointing to a syscon node representing the control module
-	  register space of the SoC.
-
-Optional properties:
---------------------
-- "vdd-supply", "vbb-supply": to define two regulators for dra7xx
-- "cpu0-supply", "vbb-supply": to define two regulators for omap36xx
-
-For each opp entry in 'operating-points-v2' table:
-- opp-supported-hw: Two bitfields indicating:
-	1. Which revision of the SoC the OPP is supported by
-	2. Which eFuse bits indicate this OPP is available
-
-	A bitwise AND is performed against these values and if any bit
-	matches, the OPP gets enabled.
-
-Example:
---------
-
-/* From arch/arm/boot/dts/am33xx.dtsi */
-cpus {
-	#address-cells = <1>;
-	#size-cells = <0>;
-	cpu@0 {
-		compatible = "arm,cortex-a8";
-		device_type = "cpu";
-		reg = <0>;
-
-		operating-points-v2 = <&cpu0_opp_table>;
-
-		clocks = <&dpll_mpu_ck>;
-		clock-names = "cpu";
-
-		clock-latency = <300000>; /* From omap-cpufreq driver */
-	};
-};
-
-/*
- * cpu0 has different OPPs depending on SoC revision and some on revisions
- * 0x2 and 0x4 have eFuse bits that indicate if they are available or not
- */
-cpu0_opp_table: opp-table {
-	compatible = "operating-points-v2-ti-cpu";
-	syscon = <&scm_conf>;
-
-	/*
-	 * The three following nodes are marked with opp-suspend
-	 * because they can not be enabled simultaneously on a
-	 * single SoC.
-	 */
-	opp50-300000000 {
-		opp-hz = /bits/ 64 <300000000>;
-		opp-microvolt = <950000 931000 969000>;
-		opp-supported-hw = <0x06 0x0010>;
-		opp-suspend;
-	};
-
-	opp100-275000000 {
-		opp-hz = /bits/ 64 <275000000>;
-		opp-microvolt = <1100000 1078000 1122000>;
-		opp-supported-hw = <0x01 0x00FF>;
-		opp-suspend;
-	};
-
-	opp100-300000000 {
-		opp-hz = /bits/ 64 <300000000>;
-		opp-microvolt = <1100000 1078000 1122000>;
-		opp-supported-hw = <0x06 0x0020>;
-		opp-suspend;
-	};
-
-	opp100-500000000 {
-		opp-hz = /bits/ 64 <500000000>;
-		opp-microvolt = <1100000 1078000 1122000>;
-		opp-supported-hw = <0x01 0xFFFF>;
-	};
-
-	opp100-600000000 {
-		opp-hz = /bits/ 64 <600000000>;
-		opp-microvolt = <1100000 1078000 1122000>;
-		opp-supported-hw = <0x06 0x0040>;
-	};
-
-	opp120-600000000 {
-		opp-hz = /bits/ 64 <600000000>;
-		opp-microvolt = <1200000 1176000 1224000>;
-		opp-supported-hw = <0x01 0xFFFF>;
-	};
-
-	opp120-720000000 {
-		opp-hz = /bits/ 64 <720000000>;
-		opp-microvolt = <1200000 1176000 1224000>;
-		opp-supported-hw = <0x06 0x0080>;
-	};
-
-	oppturbo-720000000 {
-		opp-hz = /bits/ 64 <720000000>;
-		opp-microvolt = <1260000 1234800 1285200>;
-		opp-supported-hw = <0x01 0xFFFF>;
-	};
-
-	oppturbo-800000000 {
-		opp-hz = /bits/ 64 <800000000>;
-		opp-microvolt = <1260000 1234800 1285200>;
-		opp-supported-hw = <0x06 0x0100>;
-	};
-
-	oppnitro-1000000000 {
-		opp-hz = /bits/ 64 <1000000000>;
-		opp-microvolt = <1325000 1298500 1351500>;
-		opp-supported-hw = <0x04 0x0200>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml b/Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml
new file mode 100644
index 000000000000..02d1d2c17129
--- /dev/null
+++ b/Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/opp/operating-points-v2-ti-cpu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI CPU OPP (Operating Performance Points)
+
+description:
+  TI SoCs, like those in the AM335x, AM437x, AM57xx, AM62x, and DRA7xx
+  families, the CPU frequencies subset and the voltage value of each
+  OPP vary based on the silicon variant used. The data sheet sections
+  corresponding to "Operating Performance Points" describe the frequency
+  and voltage values based on device type and speed bin information
+  blown in corresponding eFuse bits as referred to by the Technical
+  Reference Manual.
+
+  This document extends the operating-points-v2 binding by providing
+  the hardware description for the scheme mentioned above.
+
+maintainers:
+  - Nishanth Menon <nm@ti.com>
+
+allOf:
+  - $ref: opp-v2-base.yaml#
+
+properties:
+  compatible:
+    const: operating-points-v2-ti-cpu
+
+  syscon:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: |
+      points to syscon node representing the control module
+      register space of the SoC.
+
+  opp-shared: true
+
+patternProperties:
+  '^opp(-?[0-9]+)*$':
+    type: object
+    additionalProperties: false
+
+    properties:
+      clock-latency-ns: true
+      opp-hz: true
+      opp-microvolt: true
+      opp-supported-hw: true
+      opp-suspend: true
+      turbo-mode: true
+
+    required:
+      - opp-hz
+      - opp-supported-hw
+
+required:
+  - compatible
+  - syscon
+
+additionalProperties: false
+
+examples:
+  - |
+    opp-table {
+        compatible = "operating-points-v2-ti-cpu";
+        syscon = <&scm_conf>;
+
+        opp-300000000 {
+            opp-hz = /bits/ 64 <300000000>;
+            opp-microvolt = <1100000 1078000 1122000>;
+            opp-supported-hw = <0x06 0x0020>;
+            opp-suspend;
+        };
+
+        opp-500000000 {
+            opp-hz = /bits/ 64 <500000000>;
+            opp-microvolt = <1100000 1078000 1122000>;
+            opp-supported-hw = <0x01 0xFFFF>;
+        };
+
+        opp-600000000 {
+            opp-hz = /bits/ 64 <600000000>;
+            opp-microvolt = <1100000 1078000 1122000>;
+            opp-supported-hw = <0x06 0x0040>;
+        };
+
+        opp-1000000000 {
+            opp-hz = /bits/ 64 <1000000000>;
+            opp-microvolt = <1325000 1298500 1351500>;
+            opp-supported-hw = <0x04 0x0200>;
+        };
+    };

From 6aca480b76eacb5ae938131c37d06523962904c9 Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Tue, 1 Aug 2023 22:54:13 +0800
Subject: [PATCH 1401/2047] xen/evtchn: Remove unused function declaration
 xen_set_affinity_evtchn()

Commit 67473b8194bc ("xen/events: Remove disfunct affinity spreading")
leave this unused declaration.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Rahul Singh <rahul.singh@arm.com>
Link: https://lore.kernel.org/r/20230801145413.40684-1-yuehaibing@huawei.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 include/xen/events.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/xen/events.h b/include/xen/events.h
index 95970a2f7695..95d5e28de324 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -75,7 +75,6 @@ void evtchn_put(evtchn_port_t evtchn);
 
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
 void rebind_evtchn_irq(evtchn_port_t evtchn, int irq);
-int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu);
 
 static inline void notify_remote_via_evtchn(evtchn_port_t port)
 {

From d826c9e61c99120f8996f8fed6417167e32eb922 Mon Sep 17 00:00:00 2001
From: Petr Tesarik <petr.tesarik.ext@huawei.com>
Date: Wed, 2 Aug 2023 18:31:51 +0200
Subject: [PATCH 1402/2047] xen: remove a confusing comment on auto-translated
 guest I/O

After removing the conditional return from xen_create_contiguous_region(),
the accompanying comment was left in place, but it now precedes an
unrelated conditional and confuses readers.

Fixes: 989513a735f5 ("xen: cleanup pvh leftovers from pv-only sources")
Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/20230802163151.1486-1-petrtesarik@huaweicloud.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/mmu_pv.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index e0a975165de7..804a5441324c 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2310,12 +2310,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
 	int            success;
 	unsigned long vstart = (unsigned long)phys_to_virt(pstart);
 
-	/*
-	 * Currently an auto-translated guest will not perform I/O, nor will
-	 * it require PAE page directories below 4GB. Therefore any calls to
-	 * this function are redundant and can be ignored.
-	 */
-
 	if (unlikely(order > MAX_CONTIG_ORDER))
 		return -ENOMEM;
 

From 067e4f174f6d4ecf8872ad93d859d57bab44f7b6 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 10 Aug 2023 09:27:04 +0200
Subject: [PATCH 1403/2047] x86/xen: Make virt_to_pfn() a static inline

Making virt_to_pfn() a static inline taking a strongly typed
(const void *) makes the contract of a passing a pointer of that
type to the function explicit and exposes any misuse of the
macro virt_to_pfn() acting polymorphic and accepting many types
such as (void *), (unitptr_t) or (unsigned long) as arguments
without warnings.

Also fix all offending call sites to pass a (void *) rather
than an unsigned long. Since virt_to_mfn() is wrapping
virt_to_pfn() this function has become polymorphic as well
so the usage need to be fixed up.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20230810-virt-to-phys-x86-xen-v1-1-9e966d333e7a@linaro.org
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/include/asm/xen/page.h |  5 ++++-
 arch/x86/xen/enlighten_pv.c     |  2 +-
 arch/x86/xen/mmu_pv.c           | 12 ++++++------
 arch/x86/xen/setup.c            |  4 ++--
 4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index fa9ec20783fa..85e63d58c074 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -295,7 +295,10 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn)
 
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)	(phys_to_machine(XPADDR(__pa(v))))
-#define virt_to_pfn(v)          (PFN_DOWN(__pa(v)))
+static inline unsigned long virt_to_pfn(const void *v)
+{
+	return PFN_DOWN(__pa(v));
+}
 #define virt_to_mfn(v)		(pfn_to_mfn(virt_to_pfn(v)))
 #define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 93b658248d01..7178976c5dcf 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -523,7 +523,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
 	BUG_ON(size > PAGE_SIZE);
 	BUG_ON(va & ~PAGE_MASK);
 
-	pfn = virt_to_pfn(va);
+	pfn = virt_to_pfn((void *)va);
 	mfn = pfn_to_mfn(pfn);
 
 	pte = pfn_pte(pfn, PAGE_KERNEL_RO);
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 804a5441324c..eb3bac0b22a1 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2202,13 +2202,13 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
 		mcs = __xen_mc_entry(0);
 
 		if (in_frames)
-			in_frames[i] = virt_to_mfn(vaddr);
+			in_frames[i] = virt_to_mfn((void *)vaddr);
 
 		MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
-		__set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
+		__set_phys_to_machine(virt_to_pfn((void *)vaddr), INVALID_P2M_ENTRY);
 
 		if (out_frames)
-			out_frames[i] = virt_to_pfn(vaddr);
+			out_frames[i] = virt_to_pfn((void *)vaddr);
 	}
 	xen_mc_issue(0);
 }
@@ -2250,7 +2250,7 @@ static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
 		MULTI_update_va_mapping(mcs.mc, vaddr,
 				mfn_pte(mfn, PAGE_KERNEL), flags);
 
-		set_phys_to_machine(virt_to_pfn(vaddr), mfn);
+		set_phys_to_machine(virt_to_pfn((void *)vaddr), mfn);
 	}
 
 	xen_mc_issue(0);
@@ -2321,7 +2321,7 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
 	xen_zap_pfn_range(vstart, order, in_frames, NULL);
 
 	/* 2. Get a new contiguous memory extent. */
-	out_frame = virt_to_pfn(vstart);
+	out_frame = virt_to_pfn((void *)vstart);
 	success = xen_exchange_memory(1UL << order, 0, in_frames,
 				      1, order, &out_frame,
 				      address_bits);
@@ -2354,7 +2354,7 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 	spin_lock_irqsave(&xen_reservation_lock, flags);
 
 	/* 1. Find start MFN of contiguous extent. */
-	in_frame = virt_to_mfn(vstart);
+	in_frame = virt_to_mfn((void *)vstart);
 
 	/* 2. Zap current PTEs. */
 	xen_zap_pfn_range(vstart, order, NULL, out_frames);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8b5cf7bb1f47..50c998b844fb 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -340,7 +340,7 @@ static void __init xen_do_set_identity_and_remap_chunk(
 
 	WARN_ON(size == 0);
 
-	mfn_save = virt_to_mfn(buf);
+	mfn_save = virt_to_mfn((void *)buf);
 
 	for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
 	     ident_pfn_iter < ident_end_pfn;
@@ -503,7 +503,7 @@ void __init xen_remap_memory(void)
 	unsigned long pfn_s = ~0UL;
 	unsigned long len = 0;
 
-	mfn_save = virt_to_mfn(buf);
+	mfn_save = virt_to_mfn((void *)buf);
 
 	while (xen_remap_mfn != INVALID_P2M_ENTRY) {
 		/* Map the remap information */

From 3e0d473dcb688905dc443f3cfe986fa44b5fe8c4 Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Tue, 8 Aug 2023 23:09:12 +0800
Subject: [PATCH 1404/2047] xen-pciback: Remove unused function declarations

Commit a92336a1176b ("xen/pciback: Drop two backends, squash and cleanup some code.")
declared but never implemented these functions.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20230808150912.43416-1-yuehaibing@huawei.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xen-pciback/conf_space_quirks.h | 2 --
 drivers/xen/xen-pciback/pciback.h           | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/drivers/xen/xen-pciback/conf_space_quirks.h b/drivers/xen/xen-pciback/conf_space_quirks.h
index d873abe35bf6..fc1557dfef49 100644
--- a/drivers/xen/xen-pciback/conf_space_quirks.h
+++ b/drivers/xen/xen-pciback/conf_space_quirks.h
@@ -21,8 +21,6 @@ struct xen_pcibk_config_quirk {
 int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
 				    *field);
 
-int xen_pcibk_config_quirks_remove_field(struct pci_dev *dev, int reg);
-
 int xen_pcibk_config_quirks_init(struct pci_dev *dev);
 
 void xen_pcibk_config_field_free(struct config_field *field);
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index 9a64196e831d..f9599ed2f2e2 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -201,6 +201,3 @@ static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev,
 int xen_pcibk_xenbus_register(void);
 void xen_pcibk_xenbus_unregister(void);
 #endif
-
-/* Handles shared IRQs that can to device domain and control domain. */
-void xen_pcibk_irq_handler(struct pci_dev *dev, int reset);

From 71281ec9c82608c42d6841a75ced97aecd4274be Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Tue, 15 Aug 2023 17:24:34 +0800
Subject: [PATCH 1405/2047] xen: Switch to use kmemdup() helper

Use kmemdup() helper instead of open-coding to
simplify the code.

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Chen Jiahao <chenjiahao16@huawei.com>
Link: https://lore.kernel.org/r/20230815092434.1206386-1-ruanjinjie@huawei.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xen-acpi-processor.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 9cb61db67efd..296703939846 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -473,11 +473,8 @@ static int xen_upload_processor_pm_data(void)
 		if (!_pr)
 			continue;
 
-		if (!pr_backup) {
-			pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
-			if (pr_backup)
-				memcpy(pr_backup, _pr, sizeof(struct acpi_processor));
-		}
+		if (!pr_backup)
+			pr_backup = kmemdup(_pr, sizeof(*_pr), GFP_KERNEL);
 		(void)upload_pm_data(_pr);
 	}
 

From 035a69586f329e3e73750299695401e0c2b76ee2 Mon Sep 17 00:00:00 2001
From: Li Zetao <lizetao1@huawei.com>
Date: Thu, 17 Aug 2023 09:47:36 +0800
Subject: [PATCH 1406/2047] xen: xenbus: Use helper function IS_ERR_OR_NULL()

Use IS_ERR_OR_NULL() to detect an error pointer or a null pointer
open-coding to simplify the code.

Signed-off-by: Li Zetao <lizetao1@huawei.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20230817014736.3094289-1-lizetao1@huawei.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xenbus/xenbus_probe_frontend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 3f3836cb7279..fcb335bb7b18 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -429,7 +429,7 @@ static void xenbus_check_frontend(char *class, char *dev)
 		printk(KERN_DEBUG "XENBUS: frontend %s %s\n",
 				frontend, xenbus_strstate(fe_state));
 		backend = xenbus_read(XBT_NIL, frontend, "backend", NULL);
-		if (!backend || IS_ERR(backend))
+		if (IS_ERR_OR_NULL(backend))
 			goto out;
 		err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state);
 		if (err == 1)

From 82dbbfdf8f7ecc7381ce2adb22778418d520d84c Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Thu, 17 Aug 2023 21:44:49 +0200
Subject: [PATCH 1407/2047] gpio: pca9570: fix kerneldoc

While renaming one of the fields in the driver data struct, the kerneldoc
was not updated which apparently angers the test robot now.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202308171538.nKKUOtbg-lkp@intel.com/
Fixes: a3f7c1d6ddcb ("gpio: pca9570: rename platform_data to chip_data")
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-pca9570.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-pca9570.c b/drivers/gpio/gpio-pca9570.c
index d8db80ef1293..d37ba4049368 100644
--- a/drivers/gpio/gpio-pca9570.c
+++ b/drivers/gpio/gpio-pca9570.c
@@ -30,7 +30,7 @@ struct pca9570_chip_data {
 /**
  * struct pca9570 - GPIO driver data
  * @chip: GPIO controller chip
- * @p_data: GPIO controller platform data
+ * @chip_data: GPIO controller platform data
  * @lock: Protects write sequences
  * @out: Buffer for device register
  */

From 6bc3462a0f5ecaa376a0b3d76dafc55796799e17 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 18 Aug 2023 09:48:50 -0500
Subject: [PATCH 1408/2047] pinctrl: amd: Mask wake bits on probe again

Shubhra reports that their laptop is heating up over s2idle. Even though
it's getting into the deepest state, it appears to be having spurious
wakeup events.

While debugging a tangential issue with the RTC Carsten reports that recent
6.1.y based kernel face a similar problem.

Looking at acpidump and GPIO register comparisons these spurious wakeup
events are from the GPIO associated with the I2C touchpad on both laptops
and occur even when the touchpad is not marked as a wake source by the
kernel.

This means that the boot firmware has programmed these bits and because
Linux didn't touch them lead to spurious wakeup events from that GPIO.

To fix this issue, restore most of the code that previously would clear all
the bits associated with wakeup sources. This will allow the kernel to only
program the wake up sources that are necessary.

This is similar to what was done previously; but only the wake bits are
cleared by default instead of interrupts and wake bits.  If any other
problems are reported then it may make sense to clear interrupts again too.

Cc: Sachi King <nakato@nakato.io>
Cc: stable@vger.kernel.org
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Fixes: 65f6c7c91cb2 ("pinctrl: amd: Revert "pinctrl: amd: disable and mask interrupts on probe"")
Reported-by: Shubhra Prakash Nandi <email2shubhra@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217754
Reported-by: Carsten Hatger <xmb8dsv4@gmail.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217626#c28
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://lore.kernel.org/r/20230818144850.1439-1-mario.limonciello@amd.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-amd.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 4a8c1b57a90d..4dff656af3ad 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -862,6 +862,33 @@ static const struct pinconf_ops amd_pinconf_ops = {
 	.pin_config_group_set = amd_pinconf_group_set,
 };
 
+static void amd_gpio_irq_init(struct amd_gpio *gpio_dev)
+{
+	struct pinctrl_desc *desc = gpio_dev->pctrl->desc;
+	unsigned long flags;
+	u32 pin_reg, mask;
+	int i;
+
+	mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) |
+		BIT(WAKE_CNTRL_OFF_S4);
+
+	for (i = 0; i < desc->npins; i++) {
+		int pin = desc->pins[i].number;
+		const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin);
+
+		if (!pd)
+			continue;
+
+		raw_spin_lock_irqsave(&gpio_dev->lock, flags);
+
+		pin_reg = readl(gpio_dev->base + pin * 4);
+		pin_reg &= ~mask;
+		writel(pin_reg, gpio_dev->base + pin * 4);
+
+		raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+	}
+}
+
 #ifdef CONFIG_PM_SLEEP
 static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin)
 {
@@ -1099,6 +1126,9 @@ static int amd_gpio_probe(struct platform_device *pdev)
 		return PTR_ERR(gpio_dev->pctrl);
 	}
 
+	/* Disable and mask interrupts */
+	amd_gpio_irq_init(gpio_dev);
+
 	girq = &gpio_dev->gc.irq;
 	gpio_irq_chip_set_chip(girq, &amd_gpio_irqchip);
 	/* This will let us handle the parent IRQ in the driver */

From e346f9dd4cc24761c2939f46416719487480dd5c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 10 Jul 2023 13:18:57 +0200
Subject: [PATCH 1409/2047] m68k: defconfig: Update defconfigs for v6.5-rc1

  - Drop CONFIG_CRYPTO_SHA3=m (auto-enabled since commit
    bb897c55042e9330 ("crypto: jitter - replace LFSR with SHA3-256")),
  - Enable modular build of the Maple Tree test, which can now be
    enabled when debugging is disabled.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/274736e1cf7ab863fe84400611c300cb5a25b8c8.1688985387.git.geert@linux-m68k.org
---
 arch/m68k/configs/amiga_defconfig    | 2 +-
 arch/m68k/configs/apollo_defconfig   | 2 +-
 arch/m68k/configs/atari_defconfig    | 2 +-
 arch/m68k/configs/bvme6000_defconfig | 2 +-
 arch/m68k/configs/hp300_defconfig    | 2 +-
 arch/m68k/configs/mac_defconfig      | 2 +-
 arch/m68k/configs/multi_defconfig    | 2 +-
 arch/m68k/configs/mvme147_defconfig  | 2 +-
 arch/m68k/configs/mvme16x_defconfig  | 2 +-
 arch/m68k/configs/q40_defconfig      | 2 +-
 arch/m68k/configs/sun3_defconfig     | 2 +-
 arch/m68k/configs/sun3x_defconfig    | 2 +-
 12 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 4383ed851063..6deb8faa564b 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -591,7 +591,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -635,6 +634,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index ec0f9c9f9562..802c161827f4 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -548,7 +548,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -591,6 +590,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 8656ae1f239e..2cb3d755873b 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -568,7 +568,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -612,6 +611,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 496fb6a415ea..b13552caa6b3 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -540,7 +540,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -583,6 +582,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 4add7ab9973b..f88356c45440 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -550,7 +550,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -593,6 +592,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 5845f1f71fd1..7c2ebb616fba 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -614,6 +613,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index bbb251bab81a..d3b272910b38 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -656,7 +656,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -700,6 +699,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 4f9cfc70c66d..4529bc4b843c 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -539,7 +539,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -582,6 +581,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 67c42b4822f0..30824032e4d5 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -540,7 +540,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -583,6 +582,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 85f19515200b..3911211410ed 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -557,7 +557,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -601,6 +600,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index b1b15acb5d5f..991730c50957 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -538,7 +538,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -580,6 +579,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 91d66c0f5ab6..e80d7509ab1d 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -538,7 +538,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -581,6 +580,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m

From 7547cbe20ee80d24f3e182b4dcd3e05c473fb10b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 8 Aug 2023 00:36:53 +0900
Subject: [PATCH 1410/2047] m68k: Replace #include <asm/export.h> with #include
 <linux/export.h>

Commit ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost")
deprecated <asm/export.h>, which is now a wrapper of <linux/export.h>.

Replace #include <asm/export.h> with #include <linux/export.h>.

After all the <asm/export.h> lines are converted, <asm/export.h> and
<asm-generic/export.h> will be removed.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20230807153654.997091-1-masahiroy@kernel.org
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/lib/divsi3.S  | 2 +-
 arch/m68k/lib/modsi3.S  | 2 +-
 arch/m68k/lib/mulsi3.S  | 2 +-
 arch/m68k/lib/udivsi3.S | 2 +-
 arch/m68k/lib/umodsi3.S | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/m68k/lib/divsi3.S b/arch/m68k/lib/divsi3.S
index 3a2143f51631..62787b4333e7 100644
--- a/arch/m68k/lib/divsi3.S
+++ b/arch/m68k/lib/divsi3.S
@@ -33,7 +33,7 @@ General Public License for more details. */
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* These are predefined by new versions of GNU cpp.  */
 
diff --git a/arch/m68k/lib/modsi3.S b/arch/m68k/lib/modsi3.S
index 1c967649a4e0..1bcb742d0b76 100644
--- a/arch/m68k/lib/modsi3.S
+++ b/arch/m68k/lib/modsi3.S
@@ -33,7 +33,7 @@ General Public License for more details. */
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* These are predefined by new versions of GNU cpp.  */
 
diff --git a/arch/m68k/lib/mulsi3.S b/arch/m68k/lib/mulsi3.S
index 855675e69a8a..c2853248249e 100644
--- a/arch/m68k/lib/mulsi3.S
+++ b/arch/m68k/lib/mulsi3.S
@@ -32,7 +32,7 @@ General Public License for more details. */
    Some of this code comes from MINIX, via the folks at ericsson.
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
-#include <asm/export.h>
+#include <linux/export.h>
 /* These are predefined by new versions of GNU cpp.  */
 
 #ifndef __USER_LABEL_PREFIX__
diff --git a/arch/m68k/lib/udivsi3.S b/arch/m68k/lib/udivsi3.S
index 78440ae513bf..39ad70596293 100644
--- a/arch/m68k/lib/udivsi3.S
+++ b/arch/m68k/lib/udivsi3.S
@@ -32,7 +32,7 @@ General Public License for more details. */
    Some of this code comes from MINIX, via the folks at ericsson.
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
-#include <asm/export.h>
+#include <linux/export.h>
 /* These are predefined by new versions of GNU cpp.  */
 
 #ifndef __USER_LABEL_PREFIX__
diff --git a/arch/m68k/lib/umodsi3.S b/arch/m68k/lib/umodsi3.S
index b6fd11f58948..6640eaa9eb03 100644
--- a/arch/m68k/lib/umodsi3.S
+++ b/arch/m68k/lib/umodsi3.S
@@ -32,7 +32,7 @@ General Public License for more details. */
    Some of this code comes from MINIX, via the folks at ericsson.
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
-#include <asm/export.h>
+#include <linux/export.h>
 /* These are predefined by new versions of GNU cpp.  */
 
 #ifndef __USER_LABEL_PREFIX__

From 6831a8a3bd76adb7f7c91006a8ce8b3b67a666cc Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 8 Aug 2023 00:36:54 +0900
Subject: [PATCH 1411/2047] m68k: Remove <asm/export.h>

All *.S files under arch/m68k/ have been converted to include
<linux/export.h> instead of <asm/export.h>.

Remove <asm/export.h>.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20230807153654.997091-2-masahiroy@kernel.org
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/Kbuild | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 1b720299deb1..0dbf9c5c6fae 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
-generic-y += export.h
 generic-y += extable.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h

From 668a920208fffaf5308aabd304f7568d6815169b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 9 Aug 2023 23:10:49 +0200
Subject: [PATCH 1412/2047] m68k: Define __div64_32() to avoid a warning

The generic __div64_32() definition is built on all architectures that
don't provide this as a macro.  m68k however neither defines nor
requires __div64_32, so it should define an empty one to avoid the
warning:

    lib/math/div64.c:31:32: error: no previous prototype for '__div64_32' [-Werror=missing-prototypes]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20230809211057.60514-1-arnd@kernel.org
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/div64.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/m68k/include/asm/div64.h b/arch/m68k/include/asm/div64.h
index 365f39f5e256..df1f6b450cc5 100644
--- a/arch/m68k/include/asm/div64.h
+++ b/arch/m68k/include/asm/div64.h
@@ -31,6 +31,9 @@
 	__rem;							\
 })
 
+/* defining this stops the unused helper function from being built */
+#define __div64_32 __div64_32
+
 #endif /* CONFIG_CPU_HAS_NO_MULDIV64 */
 
 #endif /* _M68K_DIV64_H */

From 1a48e2ccd4385caa9ad9659ac8f9d75affacc9da Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 9 Aug 2023 23:10:50 +0200
Subject: [PATCH 1413/2047] m68k: Add memcmp() declaration

There is a global definition of memcmp() that gets built on m68k but is
never used and causes a warning because of the missing prototype:

    lib/string.c:671:15: error: no previous prototype for 'memcmp' [-Werror=missing-prototypes]

Add the corresponding declaration to avoid the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20230809211057.60514-2-arnd@kernel.org
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/string.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/m68k/include/asm/string.h b/arch/m68k/include/asm/string.h
index f0f5021d6327..760cc13acdf4 100644
--- a/arch/m68k/include/asm/string.h
+++ b/arch/m68k/include/asm/string.h
@@ -41,6 +41,7 @@ static inline char *strncpy(char *dest, const char *src, size_t n)
 #define __HAVE_ARCH_MEMMOVE
 extern void *memmove(void *, const void *, __kernel_size_t);
 
+extern int memcmp(const void *, const void *, __kernel_size_t);
 #define memcmp(d, s, n) __builtin_memcmp(d, s, n)
 
 #define __HAVE_ARCH_MEMSET

From eb1e6ccdcf785e633863c5c7cec32d6c1e7b09dd Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 10 Aug 2023 16:19:28 +0200
Subject: [PATCH 1414/2047] zorro: Include zorro.h in names.c

The zorro_name_device() function is defined in drivers/zorro/names.c,
but the declaration is not visible there:

    drivers/zorro/names.c:58:13: error: no previous prototype for 'zorro_name_device' [-Werror=missing-prototypes]
       58 | void __init zorro_name_device(struct zorro_dev *dev)

Include the header to avoid the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20230810141947.1236730-11-arnd@kernel.org
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 drivers/zorro/names.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/zorro/names.c b/drivers/zorro/names.c
index fa3c83dbe843..077114ccc840 100644
--- a/drivers/zorro/names.c
+++ b/drivers/zorro/names.c
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/zorro.h>
 
+#include "zorro.h"
 
 struct zorro_prod_info {
 	__u16 prod;

From 715dc9a1f0c8ae0bbd4e5b37f9a165ea39e47fe9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Leonard=20G=C3=B6hrs?= <l.goehrs@pengutronix.de>
Date: Wed, 16 Aug 2023 16:52:37 +0200
Subject: [PATCH 1415/2047] spi: stm32: fix accidential revert to byte-sized
 transfer splitting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 6f486556abe35 ("spi: stm32: renaming of spi_master into
spi_controller") included an accidential reverted of a change added in
commit 1e4929112507f ("spi: stm32: split large transfers based on word
size instead of bytes").

This breaks large SPI transfers with word sizes > 8 bits, which are
e.g. common when driving MIPI DBI displays.

Fix this by using `spi_split_transfers_maxwords()` instead of
`spi_split_transfers_maxsize()`.

Fixes: 6f486556abe35 ("spi: stm32: renaming of spi_master into spi_controller")
Signed-off-by: Leonard Göhrs <l.goehrs@pengutronix.de>
Link: https://lore.kernel.org/r/20230816145237.3159817-1-l.goehrs@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-stm32.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 6d10fa4ab783..7ddf9db776b0 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -1001,9 +1001,9 @@ static int stm32_spi_prepare_msg(struct spi_controller *ctrl,
 	if (spi->cfg->set_number_of_data) {
 		int ret;
 
-		ret = spi_split_transfers_maxsize(ctrl, msg,
-						  STM32H7_SPI_TSIZE_MAX,
-						  GFP_KERNEL | GFP_DMA);
+		ret = spi_split_transfers_maxwords(ctrl, msg,
+						   STM32H7_SPI_TSIZE_MAX,
+						   GFP_KERNEL | GFP_DMA);
 		if (ret)
 			return ret;
 	}

From 627d05a41ca1fbb9d390f9513af262f001f261f7 Mon Sep 17 00:00:00 2001
From: Srinivas Goud <srinivas.goud@amd.com>
Date: Mon, 21 Aug 2023 15:00:16 +0530
Subject: [PATCH 1416/2047] spi: spi-cadence: Fix data corruption issues in
 slave mode

Remove 10us delay in cdns_spi_process_fifo() (called from cdns_spi_irq())
to fix data corruption issue on Master side when this driver
configured in Slave mode, as Slave is failed to prepare the date
on time due to above delay.

Add 10us delay before processing the RX FIFO as TX empty doesn't
guarantee valid data in RX FIFO.

Signed-off-by: Srinivas Goud <srinivas.goud@amd.com>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Tested-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/1692610216-217644-1-git-send-email-srinivas.goud@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cadence.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index de8fe3c5becb..9b021390263e 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -317,12 +317,6 @@ static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx)
 	xspi->rx_bytes -= nrx;
 
 	while (ntx || nrx) {
-		/* When xspi in busy condition, bytes may send failed,
-		 * then spi control did't work thoroughly, add one byte delay
-		 */
-		if (cdns_spi_read(xspi, CDNS_SPI_ISR) & CDNS_SPI_IXR_TXFULL)
-			udelay(10);
-
 		if (ntx) {
 			if (xspi->txbuf)
 				cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++);
@@ -392,6 +386,11 @@ static irqreturn_t cdns_spi_irq(int irq, void *dev_id)
 		if (xspi->tx_bytes) {
 			cdns_spi_process_fifo(xspi, trans_cnt, trans_cnt);
 		} else {
+			/* Fixed delay due to controller limitation with
+			 * RX_NEMPTY incorrect status
+			 * Xilinx AR:65885 contains more details
+			 */
+			udelay(10);
 			cdns_spi_process_fifo(xspi, 0, trans_cnt);
 			cdns_spi_write(xspi, CDNS_SPI_IDR,
 				       CDNS_SPI_IXR_DEFAULT);
@@ -439,12 +438,18 @@ static int cdns_transfer_one(struct spi_controller *ctlr,
 		cdns_spi_setup_transfer(spi, transfer);
 	} else {
 		/* Set TX empty threshold to half of FIFO depth
-		 * only if TX bytes are more than half FIFO depth.
+		 * only if TX bytes are more than FIFO depth.
 		 */
 		if (xspi->tx_bytes > xspi->tx_fifo_depth)
 			cdns_spi_write(xspi, CDNS_SPI_THLD, xspi->tx_fifo_depth >> 1);
 	}
 
+	/* When xspi in busy condition, bytes may send failed,
+	 * then spi control didn't work thoroughly, add one byte delay
+	 */
+	if (cdns_spi_read(xspi, CDNS_SPI_ISR) & CDNS_SPI_IXR_TXFULL)
+		udelay(10);
+
 	cdns_spi_process_fifo(xspi, xspi->tx_fifo_depth, 0);
 	spi_transfer_delay_exec(transfer);
 

From 2796a01cdf2c639e605088c53a1ac36923ade93c Mon Sep 17 00:00:00 2001
From: Alec Li <like@awinic.com>
Date: Mon, 21 Aug 2023 03:53:54 +0000
Subject: [PATCH 1417/2047] regulator: aw37503: add regulator driver for Awinic
 AW37503

Add regulator driver for the device Awinic AW37503 which is single
inductor - dual output power supply device. AW37503 device is
designed to support general positive/negative driven applications
like TFT display panels.

AW37503 regulator driver supports to enable/disable and set voltage
on its output.

Signed-off-by: Alec Li <like@awinic.com>
Link: https://lore.kernel.org/r/20230821035355.1269976-2-like@awinic.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig             |   8 +
 drivers/regulator/Makefile            |   1 +
 drivers/regulator/aw37503-regulator.c | 240 ++++++++++++++++++++++++++
 3 files changed, 249 insertions(+)
 create mode 100644 drivers/regulator/aw37503-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 186b97c5aa16..965d4f0c18a6 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -178,6 +178,14 @@ config REGULATOR_ATC260X
 	  ATC260x PMICs. This will enable support for all the software
 	  controllable DCDC/LDO regulators.
 
+config REGULATOR_AW37503
+	tristate "Awinic AW37503 Dual Output Power regulators"
+	depends on I2C && GPIOLIB
+	select REGMAP_I2C
+	help
+	  This driver supports AW37503 single inductor - dual output
+	  power supply specifically designed for display panels.
+
 config REGULATOR_AXP20X
 	tristate "X-POWERS AXP20X PMIC Regulators"
 	depends on MFD_AXP20X
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 08727557b402..23074714a81a 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_REGULATOR_ARM_SCMI) += scmi-regulator.o
 obj-$(CONFIG_REGULATOR_AS3711) += as3711-regulator.o
 obj-$(CONFIG_REGULATOR_AS3722) += as3722-regulator.o
 obj-$(CONFIG_REGULATOR_ATC260X) += atc260x-regulator.o
+obj-$(CONFIG_REGULATOR_AW37503) += aw37503-regulator.o
 obj-$(CONFIG_REGULATOR_AXP20X) += axp20x-regulator.o
 obj-$(CONFIG_REGULATOR_BCM590XX) += bcm590xx-regulator.o
 obj-$(CONFIG_REGULATOR_BD71815)	+= bd71815-regulator.o
diff --git a/drivers/regulator/aw37503-regulator.c b/drivers/regulator/aw37503-regulator.c
new file mode 100644
index 000000000000..e01ef6ad3eb8
--- /dev/null
+++ b/drivers/regulator/aw37503-regulator.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// AWINIC AW37503 Regulator Driver
+//
+// Copyright (C) 2023 awinic. All Rights Reserved
+//
+// Author: <like@awinic.com>
+
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+
+#define AW37503_REG_VPOS		0x00
+#define AW37503_REG_VNEG		0x01
+#define AW37503_REG_APPS		0x03
+#define AW37503_REG_CONTROL		0x04
+#define AW37503_REG_WPRTEN		0x21
+
+#define AW37503_VOUT_MASK		0x1F
+#define AW37503_VOUT_N_VOLTAGE		0x15
+#define AW37503_VOUT_VMIN		4000000
+#define AW37503_VOUT_VMAX		6000000
+#define AW37503_VOUT_STEP		100000
+
+#define AW37503_REG_APPS_DIS_VPOS	BIT(1)
+#define AW37503_REG_APPS_DIS_VNEG	BIT(0)
+
+#define AW37503_REGULATOR_ID_VPOS	0
+#define AW37503_REGULATOR_ID_VNEG	1
+#define AW37503_MAX_REGULATORS		2
+
+struct aw37503_reg_pdata {
+	struct gpio_desc *en_gpiod;
+	int ena_gpio_state;
+};
+
+struct aw37503_regulator {
+	struct device *dev;
+	struct aw37503_reg_pdata reg_pdata[AW37503_MAX_REGULATORS];
+};
+
+static int aw37503_regulator_enable(struct regulator_dev *rdev)
+{
+	struct aw37503_regulator *chip = rdev_get_drvdata(rdev);
+	int id = rdev_get_id(rdev);
+	struct aw37503_reg_pdata *rpdata = &chip->reg_pdata[id];
+	int ret;
+
+	if (!IS_ERR(rpdata->en_gpiod)) {
+		gpiod_set_value_cansleep(rpdata->en_gpiod, 1);
+		rpdata->ena_gpio_state = 1;
+	}
+
+	/* Hardware automatically enable discharge bit in enable */
+	if (rdev->constraints->active_discharge ==
+			REGULATOR_ACTIVE_DISCHARGE_DISABLE) {
+		ret = regulator_set_active_discharge_regmap(rdev, false);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to disable active discharge: %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int aw37503_regulator_disable(struct regulator_dev *rdev)
+{
+	struct aw37503_regulator *chip = rdev_get_drvdata(rdev);
+	int id = rdev_get_id(rdev);
+	struct aw37503_reg_pdata *rpdata = &chip->reg_pdata[id];
+
+	if (!IS_ERR(rpdata->en_gpiod)) {
+		gpiod_set_value_cansleep(rpdata->en_gpiod, 0);
+		rpdata->ena_gpio_state = 0;
+	}
+
+	return 0;
+}
+
+static int aw37503_regulator_is_enabled(struct regulator_dev *rdev)
+{
+	struct aw37503_regulator *chip = rdev_get_drvdata(rdev);
+	int id = rdev_get_id(rdev);
+	struct aw37503_reg_pdata *rpdata = &chip->reg_pdata[id];
+
+	if (!IS_ERR(rpdata->en_gpiod))
+		return rpdata->ena_gpio_state;
+
+	return 1;
+}
+
+static const struct regulator_ops aw37503_regulator_ops = {
+	.enable = aw37503_regulator_enable,
+	.disable = aw37503_regulator_disable,
+	.is_enabled = aw37503_regulator_is_enabled,
+	.list_voltage = regulator_list_voltage_linear,
+	.map_voltage = regulator_map_voltage_linear,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.set_active_discharge = regulator_set_active_discharge_regmap,
+};
+
+static int aw37503_of_parse_cb(struct device_node *np,
+				const struct regulator_desc *desc,
+				struct regulator_config *config)
+{
+	struct aw37503_regulator *chip = config->driver_data;
+	struct aw37503_reg_pdata *rpdata = &chip->reg_pdata[desc->id];
+	int ret;
+
+	rpdata->en_gpiod = devm_fwnode_gpiod_get(chip->dev, of_fwnode_handle(np),
+						 "enable", GPIOD_OUT_LOW,
+						 "enable");
+
+	if (IS_ERR(rpdata->en_gpiod)) {
+		ret = PTR_ERR(rpdata->en_gpiod);
+
+		/* Ignore the error other than probe defer */
+		if (ret == -EPROBE_DEFER)
+			return ret;
+		return 0;
+	}
+
+	return 0;
+}
+
+#define AW37503_REGULATOR_DESC(_id, _name)		\
+	[AW37503_REGULATOR_ID_##_id] = {		\
+		.name = "aw37503-"#_name,		\
+		.supply_name = "vin",			\
+		.id = AW37503_REGULATOR_ID_##_id,	\
+		.of_match = of_match_ptr(#_name),	\
+		.of_parse_cb = aw37503_of_parse_cb,	\
+		.ops = &aw37503_regulator_ops,		\
+		.n_voltages = AW37503_VOUT_N_VOLTAGE,	\
+		.min_uV = AW37503_VOUT_VMIN,		\
+		.uV_step = AW37503_VOUT_STEP,		\
+		.enable_time = 500,			\
+		.vsel_mask = AW37503_VOUT_MASK,	\
+		.vsel_reg = AW37503_REG_##_id,		\
+		.active_discharge_off = 0,			\
+		.active_discharge_on = AW37503_REG_APPS_DIS_##_id, \
+		.active_discharge_mask = AW37503_REG_APPS_DIS_##_id, \
+		.active_discharge_reg = AW37503_REG_APPS, \
+		.type = REGULATOR_VOLTAGE,		\
+		.owner = THIS_MODULE,			\
+	}
+
+static const struct regulator_desc aw_regs_desc[AW37503_MAX_REGULATORS] = {
+	AW37503_REGULATOR_DESC(VPOS, outp),
+	AW37503_REGULATOR_DESC(VNEG, outn),
+};
+
+static const struct regmap_range aw37503_no_reg_ranges[] = {
+	regmap_reg_range(AW37503_REG_CONTROL + 1,
+			 AW37503_REG_WPRTEN - 1),
+};
+
+static const struct regmap_access_table aw37503_no_reg_table = {
+	.no_ranges = aw37503_no_reg_ranges,
+	.n_no_ranges = ARRAY_SIZE(aw37503_no_reg_ranges),
+};
+
+static const struct regmap_config aw37503_regmap_config = {
+	.reg_bits	= 8,
+	.val_bits	= 8,
+	.max_register	= AW37503_REG_WPRTEN,
+	.rd_table	= &aw37503_no_reg_table,
+	.wr_table	= &aw37503_no_reg_table,
+};
+
+static int aw37503_probe(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	struct aw37503_regulator *chip;
+	struct regulator_dev *rdev;
+	struct regmap *regmap;
+	struct regulator_config config = { };
+	int id;
+
+	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	regmap = devm_regmap_init_i2c(client, &aw37503_regmap_config);
+	if (IS_ERR(regmap))
+		return dev_err_probe(dev, PTR_ERR(regmap),
+				     "Failed to init regmap\n");
+
+	i2c_set_clientdata(client, chip);
+	chip->dev = dev;
+
+	config.regmap = regmap;
+	config.dev = dev;
+	config.driver_data = chip;
+
+	for (id = 0; id < AW37503_MAX_REGULATORS; ++id) {
+		rdev = devm_regulator_register(dev, &aw_regs_desc[id],
+					       &config);
+		if (IS_ERR(rdev))
+			return dev_err_probe(dev, PTR_ERR(rdev),
+					     "Failed to register regulator %s\n",
+					     aw_regs_desc[id].name);
+	}
+	return 0;
+}
+
+static const struct i2c_device_id aw37503_id[] = {
+	{.name = "aw37503",},
+	{},
+};
+MODULE_DEVICE_TABLE(i2c, aw37503_id);
+
+static const struct of_device_id aw37503_of_match[] = {
+	{.compatible = "awinic,aw37503",},
+	{ /* Sentinel */ },
+};
+
+MODULE_DEVICE_TABLE(of, aw37503_of_match);
+
+static struct i2c_driver aw37503_i2c_driver = {
+	.driver = {
+		.name = "aw37503",
+		.of_match_table = aw37503_of_match,
+	},
+	.probe_new = aw37503_probe,
+	.id_table = aw37503_id,
+};
+
+module_i2c_driver(aw37503_i2c_driver);
+
+MODULE_DESCRIPTION("aw37503 regulator driver");
+MODULE_AUTHOR("Alec Li <like@awinic.com>");
+MODULE_LICENSE("GPL");

From 86a1b61a0c7316febecd03d47eaf893eb5a57659 Mon Sep 17 00:00:00 2001
From: Alec Li <like@awinic.com>
Date: Mon, 21 Aug 2023 03:53:55 +0000
Subject: [PATCH 1418/2047] regulator: dt-bindings: Add Awinic AW37503

Add aw37503 regulator device-tree binding documentation.

Signed-off-by: Alec Li <like@awinic.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230821035355.1269976-3-like@awinic.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/awinic,aw37503.yaml    | 78 +++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/awinic,aw37503.yaml

diff --git a/Documentation/devicetree/bindings/regulator/awinic,aw37503.yaml b/Documentation/devicetree/bindings/regulator/awinic,aw37503.yaml
new file mode 100644
index 000000000000..c92a881ed60e
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/awinic,aw37503.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/awinic,aw37503.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Awinic AW37503 Voltage Regulator
+
+maintainers:
+  - Alec Li <like@awinic.com>
+
+description:
+  The AW37503 are dual voltage regulator, designed to support positive/negative
+  supply for driving TFT-LCD panels. It support software-configurable output
+  switching and monitoring. The output voltages can be programmed via an I2C
+  compatible interface.
+
+properties:
+  compatible:
+    const: awinic,aw37503
+
+  reg:
+    maxItems: 1
+
+patternProperties:
+  "^out[pn]$":
+    type: object
+    $ref: regulator.yaml#
+    unevaluatedProperties: false
+    description:
+      Properties for single regulator.
+
+    properties:
+      enable-gpios:
+        maxItems: 1
+        description:
+          GPIO specifier to enable the GPIO control (on/off) for regulator.
+
+    required:
+      - regulator-name
+
+required:
+  - compatible
+  - reg
+  - outp
+  - outn
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        regulator@3e {
+            compatible = "awinic,aw37503";
+            reg = <0x3e>;
+
+            outp {
+                regulator-name = "outp";
+                regulator-boot-on;
+                regulator-always-on;
+                enable-gpios = <&gpio 17 GPIO_ACTIVE_LOW>;
+            };
+
+            outn {
+                regulator-name = "outn";
+                regulator-boot-on;
+                regulator-always-on;
+                enable-gpios = <&gpio 27 GPIO_ACTIVE_LOW>;
+            };
+        };
+    };
+...
+

From 246f74bb11e7df33946443a21c03463937867a09 Mon Sep 17 00:00:00 2001
From: Zhang Zekun <zhangzekun11@huawei.com>
Date: Thu, 10 Aug 2023 21:16:50 +0800
Subject: [PATCH 1419/2047] spi: bcm-qspi: Simplify logic by using
 devm_platform_ioremap_resource_byname()

platform_get_resource_byname() and devm_ioremap_resource() can be
replaced by devm_platform_ioremap_resource_byname(), which can
simplify the code logic a bit, No functional change here.

Signed-off-by: Zhang Zekun <zhangzekun11@huawei.com>
Link: https://lore.kernel.org/r/20230810131650.71916-1-zhangzekun11@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-iproc-qspi.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/spi/spi-iproc-qspi.c b/drivers/spi/spi-iproc-qspi.c
index 5980a0dbbccb..39ee2b43a516 100644
--- a/drivers/spi/spi-iproc-qspi.c
+++ b/drivers/spi/spi-iproc-qspi.c
@@ -94,7 +94,6 @@ static int bcm_iproc_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct bcm_iproc_intc *priv;
 	struct bcm_qspi_soc_intc *soc_intc;
-	struct resource *res;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -104,14 +103,12 @@ static int bcm_iproc_probe(struct platform_device *pdev)
 
 	spin_lock_init(&priv->soclock);
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "intr_regs");
-	priv->int_reg = devm_ioremap_resource(dev, res);
+	priv->int_reg = devm_platform_ioremap_resource_byname(pdev, "intr_regs");
 	if (IS_ERR(priv->int_reg))
 		return PTR_ERR(priv->int_reg);
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-					   "intr_status_reg");
-	priv->int_status_reg = devm_ioremap_resource(dev, res);
+	priv->int_status_reg = devm_platform_ioremap_resource_byname(pdev,
+								     "intr_status_reg");
 	if (IS_ERR(priv->int_status_reg))
 		return PTR_ERR(priv->int_status_reg);
 

From 9e0fa5d85a4f49496e1a92889418c9d7767c1ab9 Mon Sep 17 00:00:00 2001
From: Li Zetao <lizetao1@huawei.com>
Date: Fri, 18 Aug 2023 17:30:14 +0800
Subject: [PATCH 1420/2047] gpio: mxc: Use helper function
 devm_clk_get_optional_enabled()

Since commit 7ef9651e9792 ("clk: Provide new devm_clk helpers for
prepared and enabled clocks"), devm_clk_get_optional() and
clk_prepare_enable() can now be replaced by
devm_clk_get_optional_enabled() when the driver enables (and possibly
prepares) the clocks for the whole lifetime of the device. Moreover,
it is no longer necessary to unprepare and disable the clocks explicitly.

Signed-off-by: Li Zetao <lizetao1@huawei.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-mxc.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index 004c6ad7ce52..4cb455b2bdee 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -452,16 +452,10 @@ static int mxc_gpio_probe(struct platform_device *pdev)
 		return port->irq;
 
 	/* the controller clock is optional */
-	port->clk = devm_clk_get_optional(&pdev->dev, NULL);
+	port->clk = devm_clk_get_optional_enabled(&pdev->dev, NULL);
 	if (IS_ERR(port->clk))
 		return PTR_ERR(port->clk);
 
-	err = clk_prepare_enable(port->clk);
-	if (err) {
-		dev_err(&pdev->dev, "Unable to enable clock.\n");
-		return err;
-	}
-
 	if (of_device_is_compatible(np, "fsl,imx7d-gpio"))
 		port->power_off = true;
 
@@ -535,7 +529,6 @@ out_irqdomain_remove:
 out_bgio:
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_put_noidle(&pdev->dev);
-	clk_disable_unprepare(port->clk);
 	dev_info(&pdev->dev, "%s failed with errno %d\n", __func__, err);
 	return err;
 }

From b3068ac37b1c10ee4b9fb6c07a2e46021376c374 Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:46:26 +0000
Subject: [PATCH 1421/2047] rust: init: consolidate init macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Merges the implementations of `try_init!` and `try_pin_init!`. These two
macros are very similar, but use different traits. The new macro
`__init_internal!` that is now the implementation for both takes these
traits as parameters.

This change does not affect any users, as no public API has been
changed, but it should simplify maintaining the init macros.

Reviewed-by: Björn Roy Baron <bjorn3_gh@protonmail.com>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20230814084602.25699-2-benno.lossin@proton.me
[ Cleaned a couple trivial nits. ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init.rs        | 388 +++----------------------------------
 rust/kernel/init/macros.rs | 237 +++++++++++++++++++++-
 2 files changed, 259 insertions(+), 366 deletions(-)

diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index b4332a4ec1f4..d9a91950cba2 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -540,11 +540,14 @@ macro_rules! pin_init {
     ($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? {
         $($fields:tt)*
     }) => {
-        $crate::try_pin_init!(
+        $crate::__init_internal!(
             @this($($this)?),
             @typ($t $(::<$($generics),*>)?),
             @fields($($fields)*),
             @error(::core::convert::Infallible),
+            @data(PinData, use_data),
+            @has_data(HasPinData, __pin_data),
+            @construct_closure(pin_init_from_closure),
         )
     };
 }
@@ -593,205 +596,29 @@ macro_rules! try_pin_init {
     ($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? {
         $($fields:tt)*
     }) => {
-        $crate::try_pin_init!(
+        $crate::__init_internal!(
             @this($($this)?),
             @typ($t $(::<$($generics),*>)? ),
             @fields($($fields)*),
             @error($crate::error::Error),
+            @data(PinData, use_data),
+            @has_data(HasPinData, __pin_data),
+            @construct_closure(pin_init_from_closure),
         )
     };
     ($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? {
         $($fields:tt)*
     }? $err:ty) => {
-        $crate::try_pin_init!(
+        $crate::__init_internal!(
             @this($($this)?),
             @typ($t $(::<$($generics),*>)? ),
             @fields($($fields)*),
             @error($err),
+            @data(PinData, use_data),
+            @has_data(HasPinData, __pin_data),
+            @construct_closure(pin_init_from_closure),
         )
     };
-    (
-        @this($($this:ident)?),
-        @typ($t:ident $(::<$($generics:ty),*>)?),
-        @fields($($fields:tt)*),
-        @error($err:ty),
-    ) => {{
-        // We do not want to allow arbitrary returns, so we declare this type as the `Ok` return
-        // type and shadow it later when we insert the arbitrary user code. That way there will be
-        // no possibility of returning without `unsafe`.
-        struct __InitOk;
-        // Get the pin data from the supplied type.
-        let data = unsafe {
-            use $crate::init::__internal::HasPinData;
-            $t$(::<$($generics),*>)?::__pin_data()
-        };
-        // Ensure that `data` really is of type `PinData` and help with type inference:
-        let init = $crate::init::__internal::PinData::make_closure::<_, __InitOk, $err>(
-            data,
-            move |slot| {
-                {
-                    // Shadow the structure so it cannot be used to return early.
-                    struct __InitOk;
-                    // Create the `this` so it can be referenced by the user inside of the
-                    // expressions creating the individual fields.
-                    $(let $this = unsafe { ::core::ptr::NonNull::new_unchecked(slot) };)?
-                    // Initialize every field.
-                    $crate::try_pin_init!(init_slot:
-                        @data(data),
-                        @slot(slot),
-                        @munch_fields($($fields)*,),
-                    );
-                    // We use unreachable code to ensure that all fields have been mentioned exactly
-                    // once, this struct initializer will still be type-checked and complain with a
-                    // very natural error message if a field is forgotten/mentioned more than once.
-                    #[allow(unreachable_code, clippy::diverging_sub_expression)]
-                    if false {
-                        $crate::try_pin_init!(make_initializer:
-                            @slot(slot),
-                            @type_name($t),
-                            @munch_fields($($fields)*,),
-                            @acc(),
-                        );
-                    }
-                    // Forget all guards, since initialization was a success.
-                    $crate::try_pin_init!(forget_guards:
-                        @munch_fields($($fields)*,),
-                    );
-                }
-                Ok(__InitOk)
-            }
-        );
-        let init = move |slot| -> ::core::result::Result<(), $err> {
-            init(slot).map(|__InitOk| ())
-        };
-        let init = unsafe { $crate::init::pin_init_from_closure::<_, $err>(init) };
-        init
-    }};
-    (init_slot:
-        @data($data:ident),
-        @slot($slot:ident),
-        @munch_fields($(,)?),
-    ) => {
-        // Endpoint of munching, no fields are left.
-    };
-    (init_slot:
-        @data($data:ident),
-        @slot($slot:ident),
-        // In-place initialization syntax.
-        @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
-    ) => {
-        let $field = $val;
-        // Call the initializer.
-        //
-        // SAFETY: `slot` is valid, because we are inside of an initializer closure, we
-        // return when an error/panic occurs.
-        // We also use the `data` to require the correct trait (`Init` or `PinInit`) for `$field`.
-        unsafe { $data.$field(::core::ptr::addr_of_mut!((*$slot).$field), $field)? };
-        // Create the drop guard.
-        //
-        // We only give access to `&DropGuard`, so it cannot be forgotten via safe code.
-        //
-        // SAFETY: We forget the guard later when initialization has succeeded.
-        let $field = &unsafe {
-            $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
-        };
-
-        $crate::try_pin_init!(init_slot:
-            @data($data),
-            @slot($slot),
-            @munch_fields($($rest)*),
-        );
-    };
-    (init_slot:
-        @data($data:ident),
-        @slot($slot:ident),
-        // Direct value init, this is safe for every field.
-        @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
-    ) => {
-        $(let $field = $val;)?
-        // Initialize the field.
-        //
-        // SAFETY: The memory at `slot` is uninitialized.
-        unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) };
-        // Create the drop guard:
-        //
-        // We only give access to `&DropGuard`, so it cannot be accidentally forgotten.
-        //
-        // SAFETY: We forget the guard later when initialization has succeeded.
-        let $field = &unsafe {
-            $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
-        };
-
-        $crate::try_pin_init!(init_slot:
-            @data($data),
-            @slot($slot),
-            @munch_fields($($rest)*),
-        );
-    };
-    (make_initializer:
-        @slot($slot:ident),
-        @type_name($t:ident),
-        @munch_fields($(,)?),
-        @acc($($acc:tt)*),
-    ) => {
-        // Endpoint, nothing more to munch, create the initializer.
-        // Since we are in the `if false` branch, this will never get executed. We abuse `slot` to
-        // get the correct type inference here:
-        unsafe {
-            ::core::ptr::write($slot, $t {
-                $($acc)*
-            });
-        }
-    };
-    (make_initializer:
-        @slot($slot:ident),
-        @type_name($t:ident),
-        @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
-        @acc($($acc:tt)*),
-    ) => {
-        $crate::try_pin_init!(make_initializer:
-            @slot($slot),
-            @type_name($t),
-            @munch_fields($($rest)*),
-            @acc($($acc)* $field: ::core::panic!(),),
-        );
-    };
-    (make_initializer:
-        @slot($slot:ident),
-        @type_name($t:ident),
-        @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
-        @acc($($acc:tt)*),
-    ) => {
-        $crate::try_pin_init!(make_initializer:
-            @slot($slot),
-            @type_name($t),
-            @munch_fields($($rest)*),
-            @acc($($acc)* $field: ::core::panic!(),),
-        );
-    };
-    (forget_guards:
-        @munch_fields($(,)?),
-    ) => {
-        // Munching finished.
-    };
-    (forget_guards:
-        @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
-    ) => {
-        unsafe { $crate::init::__internal::DropGuard::forget($field) };
-
-        $crate::try_pin_init!(forget_guards:
-            @munch_fields($($rest)*),
-        );
-    };
-    (forget_guards:
-        @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
-    ) => {
-        unsafe { $crate::init::__internal::DropGuard::forget($field) };
-
-        $crate::try_pin_init!(forget_guards:
-            @munch_fields($($rest)*),
-        );
-    };
 }
 
 /// Construct an in-place initializer for `struct`s.
@@ -816,11 +643,14 @@ macro_rules! init {
     ($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? {
         $($fields:tt)*
     }) => {
-        $crate::try_init!(
+        $crate::__init_internal!(
             @this($($this)?),
             @typ($t $(::<$($generics),*>)?),
             @fields($($fields)*),
             @error(::core::convert::Infallible),
+            @data(InitData, /*no use_data*/),
+            @has_data(HasInitData, __init_data),
+            @construct_closure(init_from_closure),
         )
     }
 }
@@ -863,199 +693,29 @@ macro_rules! try_init {
     ($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? {
         $($fields:tt)*
     }) => {
-        $crate::try_init!(
+        $crate::__init_internal!(
             @this($($this)?),
             @typ($t $(::<$($generics),*>)?),
             @fields($($fields)*),
             @error($crate::error::Error),
+            @data(InitData, /*no use_data*/),
+            @has_data(HasInitData, __init_data),
+            @construct_closure(init_from_closure),
         )
     };
     ($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? {
         $($fields:tt)*
     }? $err:ty) => {
-        $crate::try_init!(
+        $crate::__init_internal!(
             @this($($this)?),
             @typ($t $(::<$($generics),*>)?),
             @fields($($fields)*),
             @error($err),
+            @data(InitData, /*no use_data*/),
+            @has_data(HasInitData, __init_data),
+            @construct_closure(init_from_closure),
         )
     };
-    (
-        @this($($this:ident)?),
-        @typ($t:ident $(::<$($generics:ty),*>)?),
-        @fields($($fields:tt)*),
-        @error($err:ty),
-    ) => {{
-        // We do not want to allow arbitrary returns, so we declare this type as the `Ok` return
-        // type and shadow it later when we insert the arbitrary user code. That way there will be
-        // no possibility of returning without `unsafe`.
-        struct __InitOk;
-        // Get the init data from the supplied type.
-        let data = unsafe {
-            use $crate::init::__internal::HasInitData;
-            $t$(::<$($generics),*>)?::__init_data()
-        };
-        // Ensure that `data` really is of type `InitData` and help with type inference:
-        let init = $crate::init::__internal::InitData::make_closure::<_, __InitOk, $err>(
-            data,
-            move |slot| {
-                {
-                    // Shadow the structure so it cannot be used to return early.
-                    struct __InitOk;
-                    // Create the `this` so it can be referenced by the user inside of the
-                    // expressions creating the individual fields.
-                    $(let $this = unsafe { ::core::ptr::NonNull::new_unchecked(slot) };)?
-                    // Initialize every field.
-                    $crate::try_init!(init_slot:
-                        @slot(slot),
-                        @munch_fields($($fields)*,),
-                    );
-                    // We use unreachable code to ensure that all fields have been mentioned exactly
-                    // once, this struct initializer will still be type-checked and complain with a
-                    // very natural error message if a field is forgotten/mentioned more than once.
-                    #[allow(unreachable_code, clippy::diverging_sub_expression)]
-                    if false {
-                        $crate::try_init!(make_initializer:
-                            @slot(slot),
-                            @type_name($t),
-                            @munch_fields($($fields)*,),
-                            @acc(),
-                        );
-                    }
-                    // Forget all guards, since initialization was a success.
-                    $crate::try_init!(forget_guards:
-                        @munch_fields($($fields)*,),
-                    );
-                }
-                Ok(__InitOk)
-            }
-        );
-        let init = move |slot| -> ::core::result::Result<(), $err> {
-            init(slot).map(|__InitOk| ())
-        };
-        let init = unsafe { $crate::init::init_from_closure::<_, $err>(init) };
-        init
-    }};
-    (init_slot:
-        @slot($slot:ident),
-        @munch_fields( $(,)?),
-    ) => {
-        // Endpoint of munching, no fields are left.
-    };
-    (init_slot:
-        @slot($slot:ident),
-        @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
-    ) => {
-        let $field = $val;
-        // Call the initializer.
-        //
-        // SAFETY: `slot` is valid, because we are inside of an initializer closure, we
-        // return when an error/panic occurs.
-        unsafe {
-            $crate::init::Init::__init($field, ::core::ptr::addr_of_mut!((*$slot).$field))?;
-        }
-        // Create the drop guard.
-        //
-        // We only give access to `&DropGuard`, so it cannot be accidentally forgotten.
-        //
-        // SAFETY: We forget the guard later when initialization has succeeded.
-        let $field = &unsafe {
-            $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
-        };
-
-        $crate::try_init!(init_slot:
-            @slot($slot),
-            @munch_fields($($rest)*),
-        );
-    };
-    (init_slot:
-        @slot($slot:ident),
-        // Direct value init.
-        @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
-    ) => {
-        $(let $field = $val;)?
-        // Call the initializer.
-        //
-        // SAFETY: The memory at `slot` is uninitialized.
-        unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) };
-        // Create the drop guard.
-        //
-        // We only give access to `&DropGuard`, so it cannot be accidentally forgotten.
-        //
-        // SAFETY: We forget the guard later when initialization has succeeded.
-        let $field = &unsafe {
-            $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
-        };
-
-        $crate::try_init!(init_slot:
-            @slot($slot),
-            @munch_fields($($rest)*),
-        );
-    };
-    (make_initializer:
-        @slot($slot:ident),
-        @type_name($t:ident),
-        @munch_fields( $(,)?),
-        @acc($($acc:tt)*),
-    ) => {
-        // Endpoint, nothing more to munch, create the initializer.
-        // Since we are in the `if false` branch, this will never get executed. We abuse `slot` to
-        // get the correct type inference here:
-        unsafe {
-            ::core::ptr::write($slot, $t {
-                $($acc)*
-            });
-        }
-    };
-    (make_initializer:
-        @slot($slot:ident),
-        @type_name($t:ident),
-        @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
-        @acc($($acc:tt)*),
-    ) => {
-        $crate::try_init!(make_initializer:
-            @slot($slot),
-            @type_name($t),
-            @munch_fields($($rest)*),
-            @acc($($acc)*$field: ::core::panic!(),),
-        );
-    };
-    (make_initializer:
-        @slot($slot:ident),
-        @type_name($t:ident),
-        @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
-        @acc($($acc:tt)*),
-    ) => {
-        $crate::try_init!(make_initializer:
-            @slot($slot),
-            @type_name($t),
-            @munch_fields($($rest)*),
-            @acc($($acc)*$field: ::core::panic!(),),
-        );
-    };
-    (forget_guards:
-        @munch_fields($(,)?),
-    ) => {
-        // Munching finished.
-    };
-    (forget_guards:
-        @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
-    ) => {
-        unsafe { $crate::init::__internal::DropGuard::forget($field) };
-
-        $crate::try_init!(forget_guards:
-            @munch_fields($($rest)*),
-        );
-    };
-    (forget_guards:
-        @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
-    ) => {
-        unsafe { $crate::init::__internal::DropGuard::forget($field) };
-
-        $crate::try_init!(forget_guards:
-            @munch_fields($($rest)*),
-        );
-    };
 }
 
 /// A pin-initializer for the type `T`.
diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs
index 00aa4e956c0a..ad78dd0d9d9e 100644
--- a/rust/kernel/init/macros.rs
+++ b/rust/kernel/init/macros.rs
@@ -1,10 +1,12 @@
 // SPDX-License-Identifier: Apache-2.0 OR MIT
 
 //! This module provides the macros that actually implement the proc-macros `pin_data` and
-//! `pinned_drop`.
+//! `pinned_drop`. It also contains `__init_internal` the implementation of the `{try_}{pin_}init!`
+//! macros.
 //!
 //! These macros should never be called directly, since they expect their input to be
-//! in a certain format which is internal. Use the proc-macros instead.
+//! in a certain format which is internal. If used incorrectly, these macros can lead to UB even in
+//! safe code! Use the public facing macros instead.
 //!
 //! This architecture has been chosen because the kernel does not yet have access to `syn` which
 //! would make matters a lot easier for implementing these as proc-macros.
@@ -980,3 +982,234 @@ macro_rules! __pin_data {
         }
     };
 }
+
+/// The internal init macro. Do not call manually!
+///
+/// This is called by the `{try_}{pin_}init!` macros with various inputs.
+///
+/// This macro has multiple internal call configurations, these are always the very first ident:
+/// - nothing: this is the base case and called by the `{try_}{pin_}init!` macros.
+/// - `init_slot`: recursively creates the code that initializes all fields in `slot`.
+/// - `make_initializer`: recursively create the struct initializer that guarantees that every
+///   field has been initialized exactly once.
+/// - `forget_guards`: recursively forget the drop guards for every field.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __init_internal {
+    (
+        @this($($this:ident)?),
+        @typ($t:ident $(::<$($generics:ty),*>)?),
+        @fields($($fields:tt)*),
+        @error($err:ty),
+        // Either `PinData` or `InitData`, `$use_data` should only be present in the `PinData`
+        // case.
+        @data($data:ident, $($use_data:ident)?),
+        // `HasPinData` or `HasInitData`.
+        @has_data($has_data:ident, $get_data:ident),
+        // `pin_init_from_closure` or `init_from_closure`.
+        @construct_closure($construct_closure:ident),
+    ) => {{
+        // We do not want to allow arbitrary returns, so we declare this type as the `Ok` return
+        // type and shadow it later when we insert the arbitrary user code. That way there will be
+        // no possibility of returning without `unsafe`.
+        struct __InitOk;
+        // Get the data about fields from the supplied type.
+        let data = unsafe {
+            use $crate::init::__internal::$has_data;
+            $t$(::<$($generics),*>)?::$get_data()
+        };
+        // Ensure that `data` really is of type `$data` and help with type inference:
+        let init = $crate::init::__internal::$data::make_closure::<_, __InitOk, $err>(
+            data,
+            move |slot| {
+                {
+                    // Shadow the structure so it cannot be used to return early.
+                    struct __InitOk;
+                    // Create the `this` so it can be referenced by the user inside of the
+                    // expressions creating the individual fields.
+                    $(let $this = unsafe { ::core::ptr::NonNull::new_unchecked(slot) };)?
+                    // Initialize every field.
+                    $crate::__init_internal!(init_slot($($use_data)?):
+                        @data(data),
+                        @slot(slot),
+                        @munch_fields($($fields)*,),
+                    );
+                    // We use unreachable code to ensure that all fields have been mentioned exactly
+                    // once, this struct initializer will still be type-checked and complain with a
+                    // very natural error message if a field is forgotten/mentioned more than once.
+                    #[allow(unreachable_code, clippy::diverging_sub_expression)]
+                    if false {
+                        $crate::__init_internal!(make_initializer:
+                            @slot(slot),
+                            @type_name($t),
+                            @munch_fields($($fields)*,),
+                            @acc(),
+                        );
+                    }
+                    // Forget all guards, since initialization was a success.
+                    $crate::__init_internal!(forget_guards:
+                        @munch_fields($($fields)*,),
+                    );
+                }
+                Ok(__InitOk)
+            }
+        );
+        let init = move |slot| -> ::core::result::Result<(), $err> {
+            init(slot).map(|__InitOk| ())
+        };
+        let init = unsafe { $crate::init::$construct_closure::<_, $err>(init) };
+        init
+    }};
+    (init_slot($($use_data:ident)?):
+        @data($data:ident),
+        @slot($slot:ident),
+        @munch_fields($(,)?),
+    ) => {
+        // Endpoint of munching, no fields are left.
+    };
+    (init_slot($use_data:ident): // `use_data` is present, so we use the `data` to init fields.
+        @data($data:ident),
+        @slot($slot:ident),
+        // In-place initialization syntax.
+        @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
+    ) => {
+        let $field = $val;
+        // Call the initializer.
+        //
+        // SAFETY: `slot` is valid, because we are inside of an initializer closure, we
+        // return when an error/panic occurs.
+        // We also use the `data` to require the correct trait (`Init` or `PinInit`) for `$field`.
+        unsafe { $data.$field(::core::ptr::addr_of_mut!((*$slot).$field), $field)? };
+        // Create the drop guard.
+        //
+        // We only give access to `&DropGuard`, so it cannot be forgotten via safe code.
+        //
+        // SAFETY: We forget the guard later when initialization has succeeded.
+        let $field = &unsafe {
+            $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
+        };
+
+        $crate::__init_internal!(init_slot($use_data):
+            @data($data),
+            @slot($slot),
+            @munch_fields($($rest)*),
+        );
+    };
+    (init_slot(): // No `use_data`, so we use `Init::__init` directly.
+        @data($data:ident),
+        @slot($slot:ident),
+        // In-place initialization syntax.
+        @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
+    ) => {
+        let $field = $val;
+        // Call the initializer.
+        //
+        // SAFETY: `slot` is valid, because we are inside of an initializer closure, we
+        // return when an error/panic occurs.
+        unsafe { $crate::init::Init::__init($field, ::core::ptr::addr_of_mut!((*$slot).$field))? };
+        // Create the drop guard.
+        //
+        // We only give access to `&DropGuard`, so it cannot be forgotten via safe code.
+        //
+        // SAFETY: We forget the guard later when initialization has succeeded.
+        let $field = &unsafe {
+            $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
+        };
+
+        $crate::__init_internal!(init_slot():
+            @data($data),
+            @slot($slot),
+            @munch_fields($($rest)*),
+        );
+    };
+    (init_slot($($use_data:ident)?):
+        @data($data:ident),
+        @slot($slot:ident),
+        // Init by-value.
+        @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
+    ) => {
+        $(let $field = $val;)?
+        // Initialize the field.
+        //
+        // SAFETY: The memory at `slot` is uninitialized.
+        unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) };
+        // Create the drop guard:
+        //
+        // We only give access to `&DropGuard`, so it cannot be accidentally forgotten.
+        //
+        // SAFETY: We forget the guard later when initialization has succeeded.
+        let $field = &unsafe {
+            $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
+        };
+
+        $crate::__init_internal!(init_slot($($use_data)?):
+            @data($data),
+            @slot($slot),
+            @munch_fields($($rest)*),
+        );
+    };
+    (make_initializer:
+        @slot($slot:ident),
+        @type_name($t:ident),
+        @munch_fields($(,)?),
+        @acc($($acc:tt)*),
+    ) => {
+        // Endpoint, nothing more to munch, create the initializer.
+        // Since we are in the `if false` branch, this will never get executed. We abuse `slot` to
+        // get the correct type inference here:
+        unsafe {
+            ::core::ptr::write($slot, $t {
+                $($acc)*
+            });
+        }
+    };
+    (make_initializer:
+        @slot($slot:ident),
+        @type_name($t:ident),
+        @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
+        @acc($($acc:tt)*),
+    ) => {
+        $crate::__init_internal!(make_initializer:
+            @slot($slot),
+            @type_name($t),
+            @munch_fields($($rest)*),
+            @acc($($acc)* $field: ::core::panic!(),),
+        );
+    };
+    (make_initializer:
+        @slot($slot:ident),
+        @type_name($t:ident),
+        @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
+        @acc($($acc:tt)*),
+    ) => {
+        $crate::__init_internal!(make_initializer:
+            @slot($slot),
+            @type_name($t),
+            @munch_fields($($rest)*),
+            @acc($($acc)* $field: ::core::panic!(),),
+        );
+    };
+    (forget_guards:
+        @munch_fields($(,)?),
+    ) => {
+        // Munching finished.
+    };
+    (forget_guards:
+        @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
+    ) => {
+        unsafe { $crate::init::__internal::DropGuard::forget($field) };
+
+        $crate::__init_internal!(forget_guards:
+            @munch_fields($($rest)*),
+        );
+    };
+    (forget_guards:
+        @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
+    ) => {
+        unsafe { $crate::init::__internal::DropGuard::forget($field) };
+
+        $crate::__init_internal!(forget_guards:
+            @munch_fields($($rest)*),
+        );
+    };
+}

From f8badd150763ae0f9c8482fabe0fdbac81735d34 Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:46:31 +0000
Subject: [PATCH 1422/2047] rust: init: make `#[pin_data]` compatible with
 conditional compilation of fields

This patch allows one to write
```
#[pin_data]
pub struct Foo {
    #[cfg(CONFIG_BAR)]
    a: Bar,
    #[cfg(not(CONFIG_BAR))]
    a: Baz,
}
```
Before, this would result in a compile error, because `#[pin_data]`
would generate two functions named `a` for both fields unconditionally.

Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20230814084602.25699-3-benno.lossin@proton.me
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init/macros.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs
index ad78dd0d9d9e..474ed36f84a5 100644
--- a/rust/kernel/init/macros.rs
+++ b/rust/kernel/init/macros.rs
@@ -962,6 +962,7 @@ macro_rules! __pin_data {
         where $($whr)*
         {
             $(
+                $(#[$($p_attr)*])*
                 $pvis unsafe fn $p_field<E>(
                     self,
                     slot: *mut $p_type,
@@ -971,6 +972,7 @@ macro_rules! __pin_data {
                 }
             )*
             $(
+                $(#[$($attr)*])*
                 $fvis unsafe fn $field<E>(
                     self,
                     slot: *mut $type,

From 071cedc84e907f6984b3de3285ec2b077d3c3cdb Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:46:41 +0000
Subject: [PATCH 1423/2047] rust: add derive macro for `Zeroable`

Add a derive proc-macro for the `Zeroable` trait. The macro supports
structs where every field implements the `Zeroable` trait. This way
`unsafe` implementations can be avoided.

The macro is split into two parts:
- a proc-macro to parse generics into impl and ty generics,
- a declarative macro that expands to the impl block.

Suggested-by: Asahi Lina <lina@asahilina.net>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Reviewed-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Link: https://lore.kernel.org/r/20230814084602.25699-4-benno.lossin@proton.me
[ Added `ignore` to the `lib.rs` example and cleaned trivial nit. ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init/macros.rs | 35 ++++++++++++++++++
 rust/kernel/prelude.rs     |  2 +-
 rust/macros/lib.rs         | 20 +++++++++++
 rust/macros/quote.rs       | 12 +++++++
 rust/macros/zeroable.rs    | 72 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 140 insertions(+), 1 deletion(-)
 create mode 100644 rust/macros/zeroable.rs

diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs
index 474ed36f84a5..0b0ffbc901a7 100644
--- a/rust/kernel/init/macros.rs
+++ b/rust/kernel/init/macros.rs
@@ -1215,3 +1215,38 @@ macro_rules! __init_internal {
         );
     };
 }
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __derive_zeroable {
+    (parse_input:
+        @sig(
+            $(#[$($struct_attr:tt)*])*
+            $vis:vis struct $name:ident
+            $(where $($whr:tt)*)?
+        ),
+        @impl_generics($($impl_generics:tt)*),
+        @ty_generics($($ty_generics:tt)*),
+        @body({
+            $(
+                $(#[$($field_attr:tt)*])*
+                $field:ident : $field_ty:ty
+            ),* $(,)?
+        }),
+    ) => {
+        // SAFETY: Every field type implements `Zeroable` and padding bytes may be zero.
+        #[automatically_derived]
+        unsafe impl<$($impl_generics)*> $crate::init::Zeroable for $name<$($ty_generics)*>
+        where
+            $($($whr)*)?
+        {}
+        const _: () = {
+            fn assert_zeroable<T: ?::core::marker::Sized + $crate::init::Zeroable>() {}
+            fn ensure_zeroable<$($impl_generics)*>()
+                where $($($whr)*)?
+            {
+                $(assert_zeroable::<$field_ty>();)*
+            }
+        };
+    };
+}
diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs
index c28587d68ebc..ae21600970b3 100644
--- a/rust/kernel/prelude.rs
+++ b/rust/kernel/prelude.rs
@@ -18,7 +18,7 @@ pub use core::pin::Pin;
 pub use alloc::{boxed::Box, vec::Vec};
 
 #[doc(no_inline)]
-pub use macros::{module, pin_data, pinned_drop, vtable};
+pub use macros::{module, pin_data, pinned_drop, vtable, Zeroable};
 
 pub use super::build_assert;
 
diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs
index b4bc44c27bd4..c42105c2ff96 100644
--- a/rust/macros/lib.rs
+++ b/rust/macros/lib.rs
@@ -11,6 +11,7 @@ mod paste;
 mod pin_data;
 mod pinned_drop;
 mod vtable;
+mod zeroable;
 
 use proc_macro::TokenStream;
 
@@ -343,3 +344,22 @@ pub fn paste(input: TokenStream) -> TokenStream {
     paste::expand(&mut tokens);
     tokens.into_iter().collect()
 }
+
+/// Derives the [`Zeroable`] trait for the given struct.
+///
+/// This can only be used for structs where every field implements the [`Zeroable`] trait.
+///
+/// # Examples
+///
+/// ```rust,ignore
+/// #[derive(Zeroable)]
+/// pub struct DriverData {
+///     id: i64,
+///     buf_ptr: *mut u8,
+///     len: usize,
+/// }
+/// ```
+#[proc_macro_derive(Zeroable)]
+pub fn derive_zeroable(input: TokenStream) -> TokenStream {
+    zeroable::derive(input)
+}
diff --git a/rust/macros/quote.rs b/rust/macros/quote.rs
index dddbb4e6f4cb..33a199e4f176 100644
--- a/rust/macros/quote.rs
+++ b/rust/macros/quote.rs
@@ -124,6 +124,18 @@ macro_rules! quote_spanned {
         ));
         quote_spanned!(@proc $v $span $($tt)*);
     };
+    (@proc $v:ident $span:ident ; $($tt:tt)*) => {
+        $v.push(::proc_macro::TokenTree::Punct(
+                ::proc_macro::Punct::new(';', ::proc_macro::Spacing::Alone)
+        ));
+        quote_spanned!(@proc $v $span $($tt)*);
+    };
+    (@proc $v:ident $span:ident + $($tt:tt)*) => {
+        $v.push(::proc_macro::TokenTree::Punct(
+                ::proc_macro::Punct::new('+', ::proc_macro::Spacing::Alone)
+        ));
+        quote_spanned!(@proc $v $span $($tt)*);
+    };
     (@proc $v:ident $span:ident $id:ident $($tt:tt)*) => {
         $v.push(::proc_macro::TokenTree::Ident(::proc_macro::Ident::new(stringify!($id), $span)));
         quote_spanned!(@proc $v $span $($tt)*);
diff --git a/rust/macros/zeroable.rs b/rust/macros/zeroable.rs
new file mode 100644
index 000000000000..0d605c46ab3b
--- /dev/null
+++ b/rust/macros/zeroable.rs
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use crate::helpers::{parse_generics, Generics};
+use proc_macro::{TokenStream, TokenTree};
+
+pub(crate) fn derive(input: TokenStream) -> TokenStream {
+    let (
+        Generics {
+            impl_generics,
+            ty_generics,
+        },
+        mut rest,
+    ) = parse_generics(input);
+    // This should be the body of the struct `{...}`.
+    let last = rest.pop();
+    // Now we insert `Zeroable` as a bound for every generic parameter in `impl_generics`.
+    let mut new_impl_generics = Vec::with_capacity(impl_generics.len());
+    // Are we inside of a generic where we want to add `Zeroable`?
+    let mut in_generic = !impl_generics.is_empty();
+    // Have we already inserted `Zeroable`?
+    let mut inserted = false;
+    // Level of `<>` nestings.
+    let mut nested = 0;
+    for tt in impl_generics {
+        match &tt {
+            // If we find a `,`, then we have finished a generic/constant/lifetime parameter.
+            TokenTree::Punct(p) if nested == 0 && p.as_char() == ',' => {
+                if in_generic && !inserted {
+                    new_impl_generics.extend(quote! { : ::kernel::init::Zeroable });
+                }
+                in_generic = true;
+                inserted = false;
+                new_impl_generics.push(tt);
+            }
+            // If we find `'`, then we are entering a lifetime.
+            TokenTree::Punct(p) if nested == 0 && p.as_char() == '\'' => {
+                in_generic = false;
+                new_impl_generics.push(tt);
+            }
+            TokenTree::Punct(p) if nested == 0 && p.as_char() == ':' => {
+                new_impl_generics.push(tt);
+                if in_generic {
+                    new_impl_generics.extend(quote! { ::kernel::init::Zeroable + });
+                    inserted = true;
+                }
+            }
+            TokenTree::Punct(p) if p.as_char() == '<' => {
+                nested += 1;
+                new_impl_generics.push(tt);
+            }
+            TokenTree::Punct(p) if p.as_char() == '>' => {
+                assert!(nested > 0);
+                nested -= 1;
+                new_impl_generics.push(tt);
+            }
+            _ => new_impl_generics.push(tt),
+        }
+    }
+    assert_eq!(nested, 0);
+    if in_generic && !inserted {
+        new_impl_generics.extend(quote! { : ::kernel::init::Zeroable });
+    }
+    quote! {
+        ::kernel::__derive_zeroable!(
+            parse_input:
+                @sig(#(#rest)*),
+                @impl_generics(#(#new_impl_generics)*),
+                @ty_generics(#(#ty_generics)*),
+                @body(#last),
+        );
+    }
+}

From 97de919d574e6a22f0d43a169b96274f1862e770 Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:46:48 +0000
Subject: [PATCH 1424/2047] rust: init: make guards in the init macros hygienic

Use hygienic identifiers for the guards instead of the field names. This
makes the init macros feel more like normal struct initializers, since
assigning identifiers with the name of a field does not create
conflicts.

Also change the internals of the guards, no need to make the `forget`
function `unsafe`, since users cannot access the guards anyways. Now the
guards are carried directly on the stack and have no extra `Cell<bool>`
field that marks if they have been forgotten or not, instead they are
just forgotten via `mem::forget`.

Suggested-by: Asahi Lina <lina@asahilina.net>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20230814084602.25699-5-benno.lossin@proton.me
[ Cleaned a few trivial nits. ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init.rs            |   1 -
 rust/kernel/init/__internal.rs |  25 ++-----
 rust/kernel/init/macros.rs     | 116 +++++++++++++++------------------
 3 files changed, 56 insertions(+), 86 deletions(-)

diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index d9a91950cba2..ecf6a4bd0ce4 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -206,7 +206,6 @@ use crate::{
 use alloc::boxed::Box;
 use core::{
     alloc::AllocError,
-    cell::Cell,
     convert::Infallible,
     marker::PhantomData,
     mem::MaybeUninit,
diff --git a/rust/kernel/init/__internal.rs b/rust/kernel/init/__internal.rs
index 44751fb62b51..7abd1fb65e41 100644
--- a/rust/kernel/init/__internal.rs
+++ b/rust/kernel/init/__internal.rs
@@ -174,7 +174,6 @@ impl<T> StackInit<T> {
 /// Can be forgotten to prevent the drop.
 pub struct DropGuard<T: ?Sized> {
     ptr: *mut T,
-    do_drop: Cell<bool>,
 }
 
 impl<T: ?Sized> DropGuard<T> {
@@ -190,32 +189,16 @@ impl<T: ?Sized> DropGuard<T> {
     /// - will not be dropped by any other means.
     #[inline]
     pub unsafe fn new(ptr: *mut T) -> Self {
-        Self {
-            ptr,
-            do_drop: Cell::new(true),
-        }
-    }
-
-    /// Prevents this guard from dropping the supplied pointer.
-    ///
-    /// # Safety
-    ///
-    /// This function is unsafe in order to prevent safe code from forgetting this guard. It should
-    /// only be called by the macros in this module.
-    #[inline]
-    pub unsafe fn forget(&self) {
-        self.do_drop.set(false);
+        Self { ptr }
     }
 }
 
 impl<T: ?Sized> Drop for DropGuard<T> {
     #[inline]
     fn drop(&mut self) {
-        if self.do_drop.get() {
-            // SAFETY: A `DropGuard` can only be constructed using the unsafe `new` function
-            // ensuring that this operation is safe.
-            unsafe { ptr::drop_in_place(self.ptr) }
-        }
+        // SAFETY: A `DropGuard` can only be constructed using the unsafe `new` function
+        // ensuring that this operation is safe.
+        unsafe { ptr::drop_in_place(self.ptr) }
     }
 }
 
diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs
index 0b0ffbc901a7..5063b45970f3 100644
--- a/rust/kernel/init/macros.rs
+++ b/rust/kernel/init/macros.rs
@@ -994,7 +994,6 @@ macro_rules! __pin_data {
 /// - `init_slot`: recursively creates the code that initializes all fields in `slot`.
 /// - `make_initializer`: recursively create the struct initializer that guarantees that every
 ///   field has been initialized exactly once.
-/// - `forget_guards`: recursively forget the drop guards for every field.
 #[doc(hidden)]
 #[macro_export]
 macro_rules! __init_internal {
@@ -1034,6 +1033,7 @@ macro_rules! __init_internal {
                     $crate::__init_internal!(init_slot($($use_data)?):
                         @data(data),
                         @slot(slot),
+                        @guards(),
                         @munch_fields($($fields)*,),
                     );
                     // We use unreachable code to ensure that all fields have been mentioned exactly
@@ -1048,10 +1048,6 @@ macro_rules! __init_internal {
                             @acc(),
                         );
                     }
-                    // Forget all guards, since initialization was a success.
-                    $crate::__init_internal!(forget_guards:
-                        @munch_fields($($fields)*,),
-                    );
                 }
                 Ok(__InitOk)
             }
@@ -1065,13 +1061,17 @@ macro_rules! __init_internal {
     (init_slot($($use_data:ident)?):
         @data($data:ident),
         @slot($slot:ident),
+        @guards($($guards:ident,)*),
         @munch_fields($(,)?),
     ) => {
-        // Endpoint of munching, no fields are left.
+        // Endpoint of munching, no fields are left. If execution reaches this point, all fields
+        // have been initialized. Therefore we can now dismiss the guards by forgetting them.
+        $(::core::mem::forget($guards);)*
     };
     (init_slot($use_data:ident): // `use_data` is present, so we use the `data` to init fields.
         @data($data:ident),
         @slot($slot:ident),
+        @guards($($guards:ident,)*),
         // In-place initialization syntax.
         @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
     ) => {
@@ -1082,24 +1082,28 @@ macro_rules! __init_internal {
         // return when an error/panic occurs.
         // We also use the `data` to require the correct trait (`Init` or `PinInit`) for `$field`.
         unsafe { $data.$field(::core::ptr::addr_of_mut!((*$slot).$field), $field)? };
-        // Create the drop guard.
+        // Create the drop guard:
         //
-        // We only give access to `&DropGuard`, so it cannot be forgotten via safe code.
-        //
-        // SAFETY: We forget the guard later when initialization has succeeded.
-        let $field = &unsafe {
-            $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
-        };
+        // We rely on macro hygiene to make it impossible for users to access this local variable.
+        // We use `paste!` to create new hygiene for `$field`.
+        ::kernel::macros::paste! {
+            // SAFETY: We forget the guard later when initialization has succeeded.
+            let [<$field>] = unsafe {
+                $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
+            };
 
-        $crate::__init_internal!(init_slot($use_data):
-            @data($data),
-            @slot($slot),
-            @munch_fields($($rest)*),
-        );
+            $crate::__init_internal!(init_slot($use_data):
+                @data($data),
+                @slot($slot),
+                @guards([<$field>], $($guards,)*),
+                @munch_fields($($rest)*),
+            );
+        }
     };
     (init_slot(): // No `use_data`, so we use `Init::__init` directly.
         @data($data:ident),
         @slot($slot:ident),
+        @guards($($guards:ident,)*),
         // In-place initialization syntax.
         @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
     ) => {
@@ -1109,24 +1113,28 @@ macro_rules! __init_internal {
         // SAFETY: `slot` is valid, because we are inside of an initializer closure, we
         // return when an error/panic occurs.
         unsafe { $crate::init::Init::__init($field, ::core::ptr::addr_of_mut!((*$slot).$field))? };
-        // Create the drop guard.
+        // Create the drop guard:
         //
-        // We only give access to `&DropGuard`, so it cannot be forgotten via safe code.
-        //
-        // SAFETY: We forget the guard later when initialization has succeeded.
-        let $field = &unsafe {
-            $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
-        };
+        // We rely on macro hygiene to make it impossible for users to access this local variable.
+        // We use `paste!` to create new hygiene for `$field`.
+        ::kernel::macros::paste! {
+            // SAFETY: We forget the guard later when initialization has succeeded.
+            let [<$field>] = unsafe {
+                $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
+            };
 
-        $crate::__init_internal!(init_slot():
-            @data($data),
-            @slot($slot),
-            @munch_fields($($rest)*),
-        );
+            $crate::__init_internal!(init_slot():
+                @data($data),
+                @slot($slot),
+                @guards([<$field>], $($guards,)*),
+                @munch_fields($($rest)*),
+            );
+        }
     };
     (init_slot($($use_data:ident)?):
         @data($data:ident),
         @slot($slot:ident),
+        @guards($($guards:ident,)*),
         // Init by-value.
         @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
     ) => {
@@ -1137,18 +1145,21 @@ macro_rules! __init_internal {
         unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) };
         // Create the drop guard:
         //
-        // We only give access to `&DropGuard`, so it cannot be accidentally forgotten.
-        //
-        // SAFETY: We forget the guard later when initialization has succeeded.
-        let $field = &unsafe {
-            $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
-        };
+        // We rely on macro hygiene to make it impossible for users to access this local variable.
+        // We use `paste!` to create new hygiene for `$field`.
+        ::kernel::macros::paste! {
+            // SAFETY: We forget the guard later when initialization has succeeded.
+            let [<$field>] = unsafe {
+                $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field))
+            };
 
-        $crate::__init_internal!(init_slot($($use_data)?):
-            @data($data),
-            @slot($slot),
-            @munch_fields($($rest)*),
-        );
+            $crate::__init_internal!(init_slot($($use_data)?):
+                @data($data),
+                @slot($slot),
+                @guards([<$field>], $($guards,)*),
+                @munch_fields($($rest)*),
+            );
+        }
     };
     (make_initializer:
         @slot($slot:ident),
@@ -1191,29 +1202,6 @@ macro_rules! __init_internal {
             @acc($($acc)* $field: ::core::panic!(),),
         );
     };
-    (forget_guards:
-        @munch_fields($(,)?),
-    ) => {
-        // Munching finished.
-    };
-    (forget_guards:
-        @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
-    ) => {
-        unsafe { $crate::init::__internal::DropGuard::forget($field) };
-
-        $crate::__init_internal!(forget_guards:
-            @munch_fields($($rest)*),
-        );
-    };
-    (forget_guards:
-        @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
-    ) => {
-        unsafe { $crate::init::__internal::DropGuard::forget($field) };
-
-        $crate::__init_internal!(forget_guards:
-            @munch_fields($($rest)*),
-        );
-    };
 }
 
 #[doc(hidden)]

From b9b88be046a92a43d70badb340cac9ffd9695dcb Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:46:55 +0000
Subject: [PATCH 1425/2047] rust: init: wrap type checking struct initializers
 in a closure

In the implementation of the init macros there is a `if false` statement
that type checks the initializer to ensure every field is initialized.
Since the next patch has a stack variable to store the struct, the
function might allocate too much memory on debug builds. Putting the
struct into a closure that is never executed ensures that even in debug
builds no stack overflow error is caused. In release builds this was not
a problem since the code was optimized away due to the `if false`.

Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20230814084602.25699-6-benno.lossin@proton.me
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init/macros.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs
index 5063b45970f3..a1ccc978feab 100644
--- a/rust/kernel/init/macros.rs
+++ b/rust/kernel/init/macros.rs
@@ -1040,14 +1040,14 @@ macro_rules! __init_internal {
                     // once, this struct initializer will still be type-checked and complain with a
                     // very natural error message if a field is forgotten/mentioned more than once.
                     #[allow(unreachable_code, clippy::diverging_sub_expression)]
-                    if false {
+                    let _ = || {
                         $crate::__init_internal!(make_initializer:
                             @slot(slot),
                             @type_name($t),
                             @munch_fields($($fields)*,),
                             @acc(),
                         );
-                    }
+                    };
                 }
                 Ok(__InitOk)
             }
@@ -1168,8 +1168,8 @@ macro_rules! __init_internal {
         @acc($($acc:tt)*),
     ) => {
         // Endpoint, nothing more to munch, create the initializer.
-        // Since we are in the `if false` branch, this will never get executed. We abuse `slot` to
-        // get the correct type inference here:
+        // Since we are in the closure that is never called, this will never get executed.
+        // We abuse `slot` to get the correct type inference here:
         unsafe {
             ::core::ptr::write($slot, $t {
                 $($acc)*

From 92fd540d62701115b22b1f531c8c86454809931b Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:47:03 +0000
Subject: [PATCH 1426/2047] rust: init: make initializer values inaccessible
 after initializing

Previously the init macros would create a local variable with the name
and hygiene of the field that is being initialized to store the value of
the field. This would override any user defined variables. For example:
```
struct Foo {
    a: usize,
    b: usize,
}
let a = 10;
let foo = init!(Foo{
    a: a + 1, // This creates a local variable named `a`.
    b: a, // This refers to that variable!
});
let foo = Box::init!(foo)?;
assert_eq!(foo.a, 11);
assert_eq!(foo.b, 11);
```

This patch changes this behavior, so the above code would panic at the
last assertion, since `b` would have value 10.

Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20230814084602.25699-7-benno.lossin@proton.me
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init/macros.rs | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs
index a1ccc978feab..6e6c13b2fe78 100644
--- a/rust/kernel/init/macros.rs
+++ b/rust/kernel/init/macros.rs
@@ -1075,13 +1075,13 @@ macro_rules! __init_internal {
         // In-place initialization syntax.
         @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
     ) => {
-        let $field = $val;
+        let init = $val;
         // Call the initializer.
         //
         // SAFETY: `slot` is valid, because we are inside of an initializer closure, we
         // return when an error/panic occurs.
         // We also use the `data` to require the correct trait (`Init` or `PinInit`) for `$field`.
-        unsafe { $data.$field(::core::ptr::addr_of_mut!((*$slot).$field), $field)? };
+        unsafe { $data.$field(::core::ptr::addr_of_mut!((*$slot).$field), init)? };
         // Create the drop guard:
         //
         // We rely on macro hygiene to make it impossible for users to access this local variable.
@@ -1107,12 +1107,12 @@ macro_rules! __init_internal {
         // In-place initialization syntax.
         @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
     ) => {
-        let $field = $val;
+        let init = $val;
         // Call the initializer.
         //
         // SAFETY: `slot` is valid, because we are inside of an initializer closure, we
         // return when an error/panic occurs.
-        unsafe { $crate::init::Init::__init($field, ::core::ptr::addr_of_mut!((*$slot).$field))? };
+        unsafe { $crate::init::Init::__init(init, ::core::ptr::addr_of_mut!((*$slot).$field))? };
         // Create the drop guard:
         //
         // We rely on macro hygiene to make it impossible for users to access this local variable.
@@ -1138,11 +1138,13 @@ macro_rules! __init_internal {
         // Init by-value.
         @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
     ) => {
-        $(let $field = $val;)?
-        // Initialize the field.
-        //
-        // SAFETY: The memory at `slot` is uninitialized.
-        unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) };
+        {
+            $(let $field = $val;)?
+            // Initialize the field.
+            //
+            // SAFETY: The memory at `slot` is uninitialized.
+            unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) };
+        }
         // Create the drop guard:
         //
         // We rely on macro hygiene to make it impossible for users to access this local variable.

From 35e7fca2ff59d9d8f036aba3dcf5c34beb79fdb8 Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:47:10 +0000
Subject: [PATCH 1427/2047] rust: init: add `..Zeroable::zeroed()` syntax for
 zeroing all missing fields

Add the struct update syntax to the init macros, but only for
`..Zeroable::zeroed()`. Adding this at the end of the struct initializer
allows one to omit fields from the initializer, these fields will be
initialized with 0x00 set to every byte. Only types that implement the
`Zeroable` trait can utilize this.

Suggested-by: Asahi Lina <lina@asahilina.net>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20230814084602.25699-8-benno.lossin@proton.me
[ Rebased on `rust-next` and cleaned a few trivial nits. ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init.rs        |  16 +++++-
 rust/kernel/init/macros.rs | 115 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index ecf6a4bd0ce4..33f2666d5bae 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -508,14 +508,18 @@ macro_rules! stack_try_pin_init {
 /// - Fields that you want to initialize in-place have to use `<-` instead of `:`.
 /// - In front of the initializer you can write `&this in` to have access to a [`NonNull<Self>`]
 ///   pointer named `this` inside of the initializer.
+/// - Using struct update syntax one can place `..Zeroable::zeroed()` at the very end of the
+///   struct, this initializes every field with 0 and then runs all initializers specified in the
+///   body. This can only be done if [`Zeroable`] is implemented for the struct.
 ///
 /// For instance:
 ///
 /// ```rust
 /// # use kernel::pin_init;
-/// # use macros::pin_data;
+/// # use macros::{Zeroable, pin_data};
 /// # use core::{ptr::addr_of_mut, marker::PhantomPinned};
 /// #[pin_data]
+/// #[derive(Zeroable)]
 /// struct Buf {
 ///     // `ptr` points into `buf`.
 ///     ptr: *mut u8,
@@ -528,6 +532,10 @@ macro_rules! stack_try_pin_init {
 ///     ptr: unsafe { addr_of_mut!((*this.as_ptr()).buf).cast() },
 ///     pin: PhantomPinned,
 /// });
+/// pin_init!(Buf {
+///     buf: [1; 64],
+///     ..Zeroable::zeroed()
+/// });
 /// ```
 ///
 /// [`try_pin_init!`]: kernel::try_pin_init
@@ -547,6 +555,7 @@ macro_rules! pin_init {
             @data(PinData, use_data),
             @has_data(HasPinData, __pin_data),
             @construct_closure(pin_init_from_closure),
+            @munch_fields($($fields)*),
         )
     };
 }
@@ -603,6 +612,7 @@ macro_rules! try_pin_init {
             @data(PinData, use_data),
             @has_data(HasPinData, __pin_data),
             @construct_closure(pin_init_from_closure),
+            @munch_fields($($fields)*),
         )
     };
     ($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? {
@@ -616,6 +626,7 @@ macro_rules! try_pin_init {
             @data(PinData, use_data),
             @has_data(HasPinData, __pin_data),
             @construct_closure(pin_init_from_closure),
+            @munch_fields($($fields)*),
         )
     };
 }
@@ -650,6 +661,7 @@ macro_rules! init {
             @data(InitData, /*no use_data*/),
             @has_data(HasInitData, __init_data),
             @construct_closure(init_from_closure),
+            @munch_fields($($fields)*),
         )
     }
 }
@@ -700,6 +712,7 @@ macro_rules! try_init {
             @data(InitData, /*no use_data*/),
             @has_data(HasInitData, __init_data),
             @construct_closure(init_from_closure),
+            @munch_fields($($fields)*),
         )
     };
     ($(&$this:ident in)? $t:ident $(::<$($generics:ty),* $(,)?>)? {
@@ -713,6 +726,7 @@ macro_rules! try_init {
             @data(InitData, /*no use_data*/),
             @has_data(HasInitData, __init_data),
             @construct_closure(init_from_closure),
+            @munch_fields($($fields)*),
         )
     };
 }
diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs
index 6e6c13b2fe78..9931666293fa 100644
--- a/rust/kernel/init/macros.rs
+++ b/rust/kernel/init/macros.rs
@@ -991,6 +991,7 @@ macro_rules! __pin_data {
 ///
 /// This macro has multiple internal call configurations, these are always the very first ident:
 /// - nothing: this is the base case and called by the `{try_}{pin_}init!` macros.
+/// - `with_update_parsed`: when the `..Zeroable::zeroed()` syntax has been handled.
 /// - `init_slot`: recursively creates the code that initializes all fields in `slot`.
 /// - `make_initializer`: recursively create the struct initializer that guarantees that every
 ///   field has been initialized exactly once.
@@ -1009,6 +1010,82 @@ macro_rules! __init_internal {
         @has_data($has_data:ident, $get_data:ident),
         // `pin_init_from_closure` or `init_from_closure`.
         @construct_closure($construct_closure:ident),
+        @munch_fields(),
+    ) => {
+        $crate::__init_internal!(with_update_parsed:
+            @this($($this)?),
+            @typ($t $(::<$($generics),*>)? ),
+            @fields($($fields)*),
+            @error($err),
+            @data($data, $($use_data)?),
+            @has_data($has_data, $get_data),
+            @construct_closure($construct_closure),
+            @zeroed(), // Nothing means default behavior.
+        )
+    };
+    (
+        @this($($this:ident)?),
+        @typ($t:ident $(::<$($generics:ty),*>)?),
+        @fields($($fields:tt)*),
+        @error($err:ty),
+        // Either `PinData` or `InitData`, `$use_data` should only be present in the `PinData`
+        // case.
+        @data($data:ident, $($use_data:ident)?),
+        // `HasPinData` or `HasInitData`.
+        @has_data($has_data:ident, $get_data:ident),
+        // `pin_init_from_closure` or `init_from_closure`.
+        @construct_closure($construct_closure:ident),
+        @munch_fields(..Zeroable::zeroed()),
+    ) => {
+        $crate::__init_internal!(with_update_parsed:
+            @this($($this)?),
+            @typ($t $(::<$($generics),*>)? ),
+            @fields($($fields)*),
+            @error($err),
+            @data($data, $($use_data)?),
+            @has_data($has_data, $get_data),
+            @construct_closure($construct_closure),
+            @zeroed(()), // `()` means zero all fields not mentioned.
+        )
+    };
+    (
+        @this($($this:ident)?),
+        @typ($t:ident $(::<$($generics:ty),*>)?),
+        @fields($($fields:tt)*),
+        @error($err:ty),
+        // Either `PinData` or `InitData`, `$use_data` should only be present in the `PinData`
+        // case.
+        @data($data:ident, $($use_data:ident)?),
+        // `HasPinData` or `HasInitData`.
+        @has_data($has_data:ident, $get_data:ident),
+        // `pin_init_from_closure` or `init_from_closure`.
+        @construct_closure($construct_closure:ident),
+        @munch_fields($ignore:tt $($rest:tt)*),
+    ) => {
+        $crate::__init_internal!(
+            @this($($this)?),
+            @typ($t $(::<$($generics),*>)? ),
+            @fields($($fields)*),
+            @error($err),
+            @data($data, $($use_data)?),
+            @has_data($has_data, $get_data),
+            @construct_closure($construct_closure),
+            @munch_fields($($rest)*),
+        )
+    };
+    (with_update_parsed:
+        @this($($this:ident)?),
+        @typ($t:ident $(::<$($generics:ty),*>)?),
+        @fields($($fields:tt)*),
+        @error($err:ty),
+        // Either `PinData` or `InitData`, `$use_data` should only be present in the `PinData`
+        // case.
+        @data($data:ident, $($use_data:ident)?),
+        // `HasPinData` or `HasInitData`.
+        @has_data($has_data:ident, $get_data:ident),
+        // `pin_init_from_closure` or `init_from_closure`.
+        @construct_closure($construct_closure:ident),
+        @zeroed($($init_zeroed:expr)?),
     ) => {{
         // We do not want to allow arbitrary returns, so we declare this type as the `Ok` return
         // type and shadow it later when we insert the arbitrary user code. That way there will be
@@ -1026,6 +1103,17 @@ macro_rules! __init_internal {
                 {
                     // Shadow the structure so it cannot be used to return early.
                     struct __InitOk;
+                    // If `$init_zeroed` is present we should zero the slot now and not emit an
+                    // error when fields are missing (since they will be zeroed). We also have to
+                    // check that the type actually implements `Zeroable`.
+                    $({
+                        fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {}
+                        // Ensure that the struct is indeed `Zeroable`.
+                        assert_zeroable(slot);
+                        // SAFETY: The type implements `Zeroable` by the check above.
+                        unsafe { ::core::ptr::write_bytes(slot, 0, 1) };
+                        $init_zeroed // This will be `()` if set.
+                    })?
                     // Create the `this` so it can be referenced by the user inside of the
                     // expressions creating the individual fields.
                     $(let $this = unsafe { ::core::ptr::NonNull::new_unchecked(slot) };)?
@@ -1062,7 +1150,7 @@ macro_rules! __init_internal {
         @data($data:ident),
         @slot($slot:ident),
         @guards($($guards:ident,)*),
-        @munch_fields($(,)?),
+        @munch_fields($(..Zeroable::zeroed())? $(,)?),
     ) => {
         // Endpoint of munching, no fields are left. If execution reaches this point, all fields
         // have been initialized. Therefore we can now dismiss the guards by forgetting them.
@@ -1163,6 +1251,31 @@ macro_rules! __init_internal {
             );
         }
     };
+    (make_initializer:
+        @slot($slot:ident),
+        @type_name($t:ident),
+        @munch_fields(..Zeroable::zeroed() $(,)?),
+        @acc($($acc:tt)*),
+    ) => {
+        // Endpoint, nothing more to munch, create the initializer. Since the users specified
+        // `..Zeroable::zeroed()`, the slot will already have been zeroed and all field that have
+        // not been overwritten are thus zero and initialized. We still check that all fields are
+        // actually accessible by using the struct update syntax ourselves.
+        // We are inside of a closure that is never executed and thus we can abuse `slot` to
+        // get the correct type inference here:
+        #[allow(unused_assignments)]
+        unsafe {
+            let mut zeroed = ::core::mem::zeroed();
+            // We have to use type inference here to make zeroed have the correct type. This does
+            // not get executed, so it has no effect.
+            ::core::ptr::write($slot, zeroed);
+            zeroed = ::core::mem::zeroed();
+            ::core::ptr::write($slot, $t {
+                $($acc)*
+                ..zeroed
+            });
+        }
+    };
     (make_initializer:
         @slot($slot:ident),
         @type_name($t:ident),

From 9e49439077fe0a9aa062e682193b1f3257b314e2 Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:47:15 +0000
Subject: [PATCH 1428/2047] rust: init: add functions to create array
 initializers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add two functions `pin_init_array_from_fn` and `init_array_from_fn` that
take a function that generates initializers for `T` from `usize`, the added
functions then return an initializer for `[T; N]` where every element is
initialized by an element returned from the generator function.

Suggested-by: Asahi Lina <lina@asahilina.net>
Reviewed-by: Björn Roy Baron <bjorn3_gh@protonmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20230814084602.25699-9-benno.lossin@proton.me
[ Cleaned a couple trivial nits. ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init.rs | 88 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index 33f2666d5bae..429b485d8825 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -202,6 +202,7 @@
 use crate::{
     error::{self, Error},
     sync::UniqueArc,
+    types::ScopeGuard,
 };
 use alloc::boxed::Box;
 use core::{
@@ -867,6 +868,93 @@ pub fn uninit<T, E>() -> impl Init<MaybeUninit<T>, E> {
     unsafe { init_from_closure(|_| Ok(())) }
 }
 
+/// Initializes an array by initializing each element via the provided initializer.
+///
+/// # Examples
+///
+/// ```rust
+/// use kernel::{error::Error, init::init_array_from_fn};
+/// let array: Box<[usize; 1_000]>= Box::init::<Error>(init_array_from_fn(|i| i)).unwrap();
+/// assert_eq!(array.len(), 1_000);
+/// ```
+pub fn init_array_from_fn<I, const N: usize, T, E>(
+    mut make_init: impl FnMut(usize) -> I,
+) -> impl Init<[T; N], E>
+where
+    I: Init<T, E>,
+{
+    let init = move |slot: *mut [T; N]| {
+        let slot = slot.cast::<T>();
+        // Counts the number of initialized elements and when dropped drops that many elements from
+        // `slot`.
+        let mut init_count = ScopeGuard::new_with_data(0, |i| {
+            // We now free every element that has been initialized before:
+            // SAFETY: The loop initialized exactly the values from 0..i and since we
+            // return `Err` below, the caller will consider the memory at `slot` as
+            // uninitialized.
+            unsafe { ptr::drop_in_place(ptr::slice_from_raw_parts_mut(slot, i)) };
+        });
+        for i in 0..N {
+            let init = make_init(i);
+            // SAFETY: Since 0 <= `i` < N, it is still in bounds of `[T; N]`.
+            let ptr = unsafe { slot.add(i) };
+            // SAFETY: The pointer is derived from `slot` and thus satisfies the `__init`
+            // requirements.
+            unsafe { init.__init(ptr) }?;
+            *init_count += 1;
+        }
+        init_count.dismiss();
+        Ok(())
+    };
+    // SAFETY: The initializer above initializes every element of the array. On failure it drops
+    // any initialized elements and returns `Err`.
+    unsafe { init_from_closure(init) }
+}
+
+/// Initializes an array by initializing each element via the provided initializer.
+///
+/// # Examples
+///
+/// ```rust
+/// use kernel::{sync::{Arc, Mutex}, init::pin_init_array_from_fn, new_mutex};
+/// let array: Arc<[Mutex<usize>; 1_000]>=
+///     Arc::pin_init(pin_init_array_from_fn(|i| new_mutex!(i))).unwrap();
+/// assert_eq!(array.len(), 1_000);
+/// ```
+pub fn pin_init_array_from_fn<I, const N: usize, T, E>(
+    mut make_init: impl FnMut(usize) -> I,
+) -> impl PinInit<[T; N], E>
+where
+    I: PinInit<T, E>,
+{
+    let init = move |slot: *mut [T; N]| {
+        let slot = slot.cast::<T>();
+        // Counts the number of initialized elements and when dropped drops that many elements from
+        // `slot`.
+        let mut init_count = ScopeGuard::new_with_data(0, |i| {
+            // We now free every element that has been initialized before:
+            // SAFETY: The loop initialized exactly the values from 0..i and since we
+            // return `Err` below, the caller will consider the memory at `slot` as
+            // uninitialized.
+            unsafe { ptr::drop_in_place(ptr::slice_from_raw_parts_mut(slot, i)) };
+        });
+        for i in 0..N {
+            let init = make_init(i);
+            // SAFETY: Since 0 <= `i` < N, it is still in bounds of `[T; N]`.
+            let ptr = unsafe { slot.add(i) };
+            // SAFETY: The pointer is derived from `slot` and thus satisfies the `__init`
+            // requirements.
+            unsafe { init.__pinned_init(ptr) }?;
+            *init_count += 1;
+        }
+        init_count.dismiss();
+        Ok(())
+    };
+    // SAFETY: The initializer above initializes every element of the array. On failure it drops
+    // any initialized elements and returns `Err`.
+    unsafe { pin_init_from_closure(init) }
+}
+
 // SAFETY: Every type can be initialized by-value.
 unsafe impl<T, E> Init<T, E> for T {
     unsafe fn __init(self, slot: *mut T) -> Result<(), E> {

From 674b1c7aed6082e1ce329bb3bcb49e7eb9913e79 Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:47:28 +0000
Subject: [PATCH 1429/2047] rust: init: add support for arbitrary paths in init
 macros

Previously only `ident` and generic types were supported in the
`{try_}{pin_}init!` macros. This patch allows arbitrary path fragments,
so for example `Foo::Bar` but also very complex paths such as
`<Foo as Baz>::Bar::<0, i32>`.

Internally this is accomplished by using `path` fragments. Due to some
peculiar declarative macro limitations, we have to "forget" certain
additional parsing information in the token trees. This is achieved by
using the `paste!` proc macro. It does not actually modify the input,
since no `[< >]` will be present in the input, so it just strips the
information held by declarative macros. For example, if a declarative
macro takes `$t:path` as its input, it cannot sensibly propagate this to
a macro that takes `$($p:tt)*` as its input, since the `$t` token will
only be considered one `tt` token for the second macro. If we first pipe
the tokens through `paste!`, then it parses as expected.

Suggested-by: Asahi Lina <lina@asahilina.net>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20230814084602.25699-10-benno.lossin@proton.me
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init/macros.rs | 54 ++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs
index 9931666293fa..5f0a0bdb9896 100644
--- a/rust/kernel/init/macros.rs
+++ b/rust/kernel/init/macros.rs
@@ -1000,7 +1000,7 @@ macro_rules! __pin_data {
 macro_rules! __init_internal {
     (
         @this($($this:ident)?),
-        @typ($t:ident $(::<$($generics:ty),*>)?),
+        @typ($t:path),
         @fields($($fields:tt)*),
         @error($err:ty),
         // Either `PinData` or `InitData`, `$use_data` should only be present in the `PinData`
@@ -1014,7 +1014,7 @@ macro_rules! __init_internal {
     ) => {
         $crate::__init_internal!(with_update_parsed:
             @this($($this)?),
-            @typ($t $(::<$($generics),*>)? ),
+            @typ($t),
             @fields($($fields)*),
             @error($err),
             @data($data, $($use_data)?),
@@ -1025,7 +1025,7 @@ macro_rules! __init_internal {
     };
     (
         @this($($this:ident)?),
-        @typ($t:ident $(::<$($generics:ty),*>)?),
+        @typ($t:path),
         @fields($($fields:tt)*),
         @error($err:ty),
         // Either `PinData` or `InitData`, `$use_data` should only be present in the `PinData`
@@ -1039,7 +1039,7 @@ macro_rules! __init_internal {
     ) => {
         $crate::__init_internal!(with_update_parsed:
             @this($($this)?),
-            @typ($t $(::<$($generics),*>)? ),
+            @typ($t),
             @fields($($fields)*),
             @error($err),
             @data($data, $($use_data)?),
@@ -1050,7 +1050,7 @@ macro_rules! __init_internal {
     };
     (
         @this($($this:ident)?),
-        @typ($t:ident $(::<$($generics:ty),*>)?),
+        @typ($t:path),
         @fields($($fields:tt)*),
         @error($err:ty),
         // Either `PinData` or `InitData`, `$use_data` should only be present in the `PinData`
@@ -1064,7 +1064,7 @@ macro_rules! __init_internal {
     ) => {
         $crate::__init_internal!(
             @this($($this)?),
-            @typ($t $(::<$($generics),*>)? ),
+            @typ($t),
             @fields($($fields)*),
             @error($err),
             @data($data, $($use_data)?),
@@ -1075,7 +1075,7 @@ macro_rules! __init_internal {
     };
     (with_update_parsed:
         @this($($this:ident)?),
-        @typ($t:ident $(::<$($generics:ty),*>)?),
+        @typ($t:path),
         @fields($($fields:tt)*),
         @error($err:ty),
         // Either `PinData` or `InitData`, `$use_data` should only be present in the `PinData`
@@ -1094,7 +1094,11 @@ macro_rules! __init_internal {
         // Get the data about fields from the supplied type.
         let data = unsafe {
             use $crate::init::__internal::$has_data;
-            $t$(::<$($generics),*>)?::$get_data()
+            // Here we abuse `paste!` to retokenize `$t`. Declarative macros have some internal
+            // information that is associated to already parsed fragments, so a path fragment
+            // cannot be used in this position. Doing the retokenization results in valid rust
+            // code.
+            ::kernel::macros::paste!($t::$get_data())
         };
         // Ensure that `data` really is of type `$data` and help with type inference:
         let init = $crate::init::__internal::$data::make_closure::<_, __InitOk, $err>(
@@ -1253,7 +1257,7 @@ macro_rules! __init_internal {
     };
     (make_initializer:
         @slot($slot:ident),
-        @type_name($t:ident),
+        @type_name($t:path),
         @munch_fields(..Zeroable::zeroed() $(,)?),
         @acc($($acc:tt)*),
     ) => {
@@ -1270,15 +1274,21 @@ macro_rules! __init_internal {
             // not get executed, so it has no effect.
             ::core::ptr::write($slot, zeroed);
             zeroed = ::core::mem::zeroed();
-            ::core::ptr::write($slot, $t {
-                $($acc)*
-                ..zeroed
-            });
+            // Here we abuse `paste!` to retokenize `$t`. Declarative macros have some internal
+            // information that is associated to already parsed fragments, so a path fragment
+            // cannot be used in this position. Doing the retokenization results in valid rust
+            // code.
+            ::kernel::macros::paste!(
+                ::core::ptr::write($slot, $t {
+                    $($acc)*
+                    ..zeroed
+                });
+            );
         }
     };
     (make_initializer:
         @slot($slot:ident),
-        @type_name($t:ident),
+        @type_name($t:path),
         @munch_fields($(,)?),
         @acc($($acc:tt)*),
     ) => {
@@ -1286,14 +1296,20 @@ macro_rules! __init_internal {
         // Since we are in the closure that is never called, this will never get executed.
         // We abuse `slot` to get the correct type inference here:
         unsafe {
-            ::core::ptr::write($slot, $t {
-                $($acc)*
-            });
+            // Here we abuse `paste!` to retokenize `$t`. Declarative macros have some internal
+            // information that is associated to already parsed fragments, so a path fragment
+            // cannot be used in this position. Doing the retokenization results in valid rust
+            // code.
+            ::kernel::macros::paste!(
+                ::core::ptr::write($slot, $t {
+                    $($acc)*
+                });
+            );
         }
     };
     (make_initializer:
         @slot($slot:ident),
-        @type_name($t:ident),
+        @type_name($t:path),
         @munch_fields($field:ident <- $val:expr, $($rest:tt)*),
         @acc($($acc:tt)*),
     ) => {
@@ -1306,7 +1322,7 @@ macro_rules! __init_internal {
     };
     (make_initializer:
         @slot($slot:ident),
-        @type_name($t:ident),
+        @type_name($t:path),
         @munch_fields($field:ident $(: $val:expr)?, $($rest:tt)*),
         @acc($($acc:tt)*),
     ) => {

From 2e704f1883f5dd2f1380944c7d969c817fcd189e Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:47:32 +0000
Subject: [PATCH 1430/2047] rust: init: implement `Zeroable` for
 `UnsafeCell<T>` and `Opaque<T>`

`UnsafeCell<T>` and `T` have the same layout so if `T` is `Zeroable`
then so should `UnsafeCell<T>` be. This allows using the derive macro
for `Zeroable` on types that contain an `UnsafeCell<T>`.
Since `Opaque<T>` contains a `MaybeUninit<T>`, all bytes zero is a valid
bit pattern for that type.

Reviewed-by: Gary Guo <gary@garyguo.net>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20230814084602.25699-11-benno.lossin@proton.me
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init.rs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index 429b485d8825..0e44b3cc2eed 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -202,11 +202,12 @@
 use crate::{
     error::{self, Error},
     sync::UniqueArc,
-    types::ScopeGuard,
+    types::{Opaque, ScopeGuard},
 };
 use alloc::boxed::Box;
 use core::{
     alloc::AllocError,
+    cell::UnsafeCell,
     convert::Infallible,
     marker::PhantomData,
     mem::MaybeUninit,
@@ -1151,6 +1152,11 @@ impl_zeroable! {
 
     // SAFETY: Type is allowed to take any value, including all zeros.
     {<T>} MaybeUninit<T>,
+    // SAFETY: Type is allowed to take any value, including all zeros.
+    {<T>} Opaque<T>,
+
+    // SAFETY: `T: Zeroable` and `UnsafeCell` is `repr(transparent)`.
+    {<T: ?Sized + Zeroable>} UnsafeCell<T>,
 
     // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee).
     Option<NonZeroU8>, Option<NonZeroU16>, Option<NonZeroU32>, Option<NonZeroU64>,

From 1a8076ac6d83825bedb2050e67db0f2635acbb09 Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:47:40 +0000
Subject: [PATCH 1431/2047] rust: init: make `PinInit<T, E>` a supertrait of
 `Init<T, E>`

Remove the blanket implementation of `PinInit<T, E> for I where I:
Init<T, E>`. This blanket implementation prevented custom types that
implement `PinInit`.

Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Gary Guo <gary@garyguo.net>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20230814084602.25699-12-benno.lossin@proton.me
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init.rs            | 21 ++++++++-------------
 rust/kernel/init/__internal.rs | 12 ++++++++++++
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index 0e44b3cc2eed..0f9c8576697e 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -799,7 +799,7 @@ pub unsafe trait PinInit<T: ?Sized, E = Infallible>: Sized {
 ///
 /// [`Arc<T>`]: crate::sync::Arc
 #[must_use = "An initializer must be used in order to create its value."]
-pub unsafe trait Init<T: ?Sized, E = Infallible>: Sized {
+pub unsafe trait Init<T: ?Sized, E = Infallible>: PinInit<T, E> {
     /// Initializes `slot`.
     ///
     /// # Safety
@@ -810,18 +810,6 @@ pub unsafe trait Init<T: ?Sized, E = Infallible>: Sized {
     unsafe fn __init(self, slot: *mut T) -> Result<(), E>;
 }
 
-// SAFETY: Every in-place initializer can also be used as a pin-initializer.
-unsafe impl<T: ?Sized, E, I> PinInit<T, E> for I
-where
-    I: Init<T, E>,
-{
-    unsafe fn __pinned_init(self, slot: *mut T) -> Result<(), E> {
-        // SAFETY: `__init` meets the same requirements as `__pinned_init`, except that it does not
-        // require `slot` to not move after init.
-        unsafe { self.__init(slot) }
-    }
-}
-
 /// Creates a new [`PinInit<T, E>`] from the given closure.
 ///
 /// # Safety
@@ -964,6 +952,13 @@ unsafe impl<T, E> Init<T, E> for T {
     }
 }
 
+// SAFETY: Every type can be initialized by-value. `__pinned_init` calls `__init`.
+unsafe impl<T, E> PinInit<T, E> for T {
+    unsafe fn __pinned_init(self, slot: *mut T) -> Result<(), E> {
+        unsafe { self.__init(slot) }
+    }
+}
+
 /// Smart pointer that can initialize memory in-place.
 pub trait InPlaceInit<T>: Sized {
     /// Use the given pin-initializer to pin-initialize a `T` inside of a new smart pointer of this
diff --git a/rust/kernel/init/__internal.rs b/rust/kernel/init/__internal.rs
index 7abd1fb65e41..12e195061525 100644
--- a/rust/kernel/init/__internal.rs
+++ b/rust/kernel/init/__internal.rs
@@ -32,6 +32,18 @@ where
     }
 }
 
+// SAFETY: While constructing the `InitClosure`, the user promised that it upholds the
+// `__pinned_init` invariants.
+unsafe impl<T: ?Sized, F, E> PinInit<T, E> for InitClosure<F, T, E>
+where
+    F: FnOnce(*mut T) -> Result<(), E>,
+{
+    #[inline]
+    unsafe fn __pinned_init(self, slot: *mut T) -> Result<(), E> {
+        (self.0)(slot)
+    }
+}
+
 /// This trait is only implemented via the `#[pin_data]` proc-macro. It is used to facilitate
 /// the pin projections within the initializers.
 ///

From 7f8977a7fe6df9cdfe489c641058ca5534ec73eb Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:47:48 +0000
Subject: [PATCH 1432/2047] rust: init: add `{pin_}chain` functions to
 `{Pin}Init<T, E>`

The `{pin_}chain` functions extend an initializer: it not only
initializes the value, but also executes a closure taking a reference to
the initialized value. This allows to do something with a value directly
after initialization.

Suggested-by: Asahi Lina <lina@asahilina.net>
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Link: https://lore.kernel.org/r/20230814084602.25699-13-benno.lossin@proton.me
[ Cleaned a few trivial nits. ]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init.rs            | 142 +++++++++++++++++++++++++++++++++
 rust/kernel/init/__internal.rs |   2 +-
 2 files changed, 143 insertions(+), 1 deletion(-)

diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index 0f9c8576697e..0071b2834b78 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -767,6 +767,79 @@ pub unsafe trait PinInit<T: ?Sized, E = Infallible>: Sized {
     ///   deallocate.
     /// - `slot` will not move until it is dropped, i.e. it will be pinned.
     unsafe fn __pinned_init(self, slot: *mut T) -> Result<(), E>;
+
+    /// First initializes the value using `self` then calls the function `f` with the initialized
+    /// value.
+    ///
+    /// If `f` returns an error the value is dropped and the initializer will forward the error.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # #![allow(clippy::disallowed_names)]
+    /// use kernel::{types::Opaque, init::pin_init_from_closure};
+    /// #[repr(C)]
+    /// struct RawFoo([u8; 16]);
+    /// extern {
+    ///     fn init_foo(_: *mut RawFoo);
+    /// }
+    ///
+    /// #[pin_data]
+    /// struct Foo {
+    ///     #[pin]
+    ///     raw: Opaque<RawFoo>,
+    /// }
+    ///
+    /// impl Foo {
+    ///     fn setup(self: Pin<&mut Self>) {
+    ///         pr_info!("Setting up foo");
+    ///     }
+    /// }
+    ///
+    /// let foo = pin_init!(Foo {
+    ///     raw <- unsafe {
+    ///         Opaque::ffi_init(|s| {
+    ///             init_foo(s);
+    ///         })
+    ///     },
+    /// }).pin_chain(|foo| {
+    ///     foo.setup();
+    ///     Ok(())
+    /// });
+    /// ```
+    fn pin_chain<F>(self, f: F) -> ChainPinInit<Self, F, T, E>
+    where
+        F: FnOnce(Pin<&mut T>) -> Result<(), E>,
+    {
+        ChainPinInit(self, f, PhantomData)
+    }
+}
+
+/// An initializer returned by [`PinInit::pin_chain`].
+pub struct ChainPinInit<I, F, T: ?Sized, E>(I, F, __internal::Invariant<(E, Box<T>)>);
+
+// SAFETY: The `__pinned_init` function is implemented such that it
+// - returns `Ok(())` on successful initialization,
+// - returns `Err(err)` on error and in this case `slot` will be dropped.
+// - considers `slot` pinned.
+unsafe impl<T: ?Sized, E, I, F> PinInit<T, E> for ChainPinInit<I, F, T, E>
+where
+    I: PinInit<T, E>,
+    F: FnOnce(Pin<&mut T>) -> Result<(), E>,
+{
+    unsafe fn __pinned_init(self, slot: *mut T) -> Result<(), E> {
+        // SAFETY: All requirements fulfilled since this function is `__pinned_init`.
+        unsafe { self.0.__pinned_init(slot)? };
+        // SAFETY: The above call initialized `slot` and we still have unique access.
+        let val = unsafe { &mut *slot };
+        // SAFETY: `slot` is considered pinned.
+        let val = unsafe { Pin::new_unchecked(val) };
+        (self.1)(val).map_err(|e| {
+            // SAFETY: `slot` was initialized above.
+            unsafe { core::ptr::drop_in_place(slot) };
+            e
+        })
+    }
 }
 
 /// An initializer for `T`.
@@ -808,6 +881,75 @@ pub unsafe trait Init<T: ?Sized, E = Infallible>: PinInit<T, E> {
     /// - the caller does not touch `slot` when `Err` is returned, they are only permitted to
     ///   deallocate.
     unsafe fn __init(self, slot: *mut T) -> Result<(), E>;
+
+    /// First initializes the value using `self` then calls the function `f` with the initialized
+    /// value.
+    ///
+    /// If `f` returns an error the value is dropped and the initializer will forward the error.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # #![allow(clippy::disallowed_names)]
+    /// use kernel::{types::Opaque, init::{self, init_from_closure}};
+    /// struct Foo {
+    ///     buf: [u8; 1_000_000],
+    /// }
+    ///
+    /// impl Foo {
+    ///     fn setup(&mut self) {
+    ///         pr_info!("Setting up foo");
+    ///     }
+    /// }
+    ///
+    /// let foo = init!(Foo {
+    ///     buf <- init::zeroed()
+    /// }).chain(|foo| {
+    ///     foo.setup();
+    ///     Ok(())
+    /// });
+    /// ```
+    fn chain<F>(self, f: F) -> ChainInit<Self, F, T, E>
+    where
+        F: FnOnce(&mut T) -> Result<(), E>,
+    {
+        ChainInit(self, f, PhantomData)
+    }
+}
+
+/// An initializer returned by [`Init::chain`].
+pub struct ChainInit<I, F, T: ?Sized, E>(I, F, __internal::Invariant<(E, Box<T>)>);
+
+// SAFETY: The `__init` function is implemented such that it
+// - returns `Ok(())` on successful initialization,
+// - returns `Err(err)` on error and in this case `slot` will be dropped.
+unsafe impl<T: ?Sized, E, I, F> Init<T, E> for ChainInit<I, F, T, E>
+where
+    I: Init<T, E>,
+    F: FnOnce(&mut T) -> Result<(), E>,
+{
+    unsafe fn __init(self, slot: *mut T) -> Result<(), E> {
+        // SAFETY: All requirements fulfilled since this function is `__init`.
+        unsafe { self.0.__pinned_init(slot)? };
+        // SAFETY: The above call initialized `slot` and we still have unique access.
+        (self.1)(unsafe { &mut *slot }).map_err(|e| {
+            // SAFETY: `slot` was initialized above.
+            unsafe { core::ptr::drop_in_place(slot) };
+            e
+        })
+    }
+}
+
+// SAFETY: `__pinned_init` behaves exactly the same as `__init`.
+unsafe impl<T: ?Sized, E, I, F> PinInit<T, E> for ChainInit<I, F, T, E>
+where
+    I: Init<T, E>,
+    F: FnOnce(&mut T) -> Result<(), E>,
+{
+    unsafe fn __pinned_init(self, slot: *mut T) -> Result<(), E> {
+        // SAFETY: `__init` has less strict requirements compared to `__pinned_init`.
+        unsafe { self.__init(slot) }
+    }
 }
 
 /// Creates a new [`PinInit<T, E>`] from the given closure.
diff --git a/rust/kernel/init/__internal.rs b/rust/kernel/init/__internal.rs
index 12e195061525..db3372619ecd 100644
--- a/rust/kernel/init/__internal.rs
+++ b/rust/kernel/init/__internal.rs
@@ -13,7 +13,7 @@ use super::*;
 ///
 /// [nomicon]: https://doc.rust-lang.org/nomicon/subtyping.html
 /// [this table]: https://doc.rust-lang.org/nomicon/phantom-data.html#table-of-phantomdata-patterns
-type Invariant<T> = PhantomData<fn(*mut T) -> *mut T>;
+pub(super) type Invariant<T> = PhantomData<fn(*mut T) -> *mut T>;
 
 /// This is the module-internal type implementing `PinInit` and `Init`. It is unsafe to create this
 /// type, since the closure needs to fulfill the same safety requirement as the

From 4af84c6a85c63bec24611e46bb3de2c0a6602a51 Mon Sep 17 00:00:00 2001
From: Benno Lossin <benno.lossin@proton.me>
Date: Mon, 14 Aug 2023 08:47:54 +0000
Subject: [PATCH 1433/2047] rust: init: update expanded macro explanation

The previous patches changed the internals of the macros resulting in
the example expanded code being outdated. This patch updates the example
and only changes documentation.

Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Link: https://lore.kernel.org/r/20230814084602.25699-14-benno.lossin@proton.me
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 rust/kernel/init/macros.rs | 126 ++++++++++++++++++++-----------------
 1 file changed, 69 insertions(+), 57 deletions(-)

diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs
index 5f0a0bdb9896..cb6e61b6c50b 100644
--- a/rust/kernel/init/macros.rs
+++ b/rust/kernel/init/macros.rs
@@ -45,7 +45,7 @@
 //! #[pinned_drop]
 //! impl PinnedDrop for Foo {
 //!     fn drop(self: Pin<&mut Self>) {
-//!         println!("{self:p} is getting dropped.");
+//!         pr_info!("{self:p} is getting dropped.");
 //!     }
 //! }
 //!
@@ -170,8 +170,10 @@
 //!         t: T,
 //!     }
 //!     #[doc(hidden)]
-//!     impl<'__pin, T>
-//!         ::core::marker::Unpin for Bar<T> where __Unpin<'__pin, T>: ::core::marker::Unpin {}
+//!     impl<'__pin, T> ::core::marker::Unpin for Bar<T>
+//!     where
+//!         __Unpin<'__pin, T>: ::core::marker::Unpin,
+//!     {}
 //!     // Now we need to ensure that `Bar` does not implement `Drop`, since that would give users
 //!     // access to `&mut self` inside of `drop` even if the struct was pinned. This could lead to
 //!     // UB with only safe code, so we disallow this by giving a trait implementation error using
@@ -188,8 +190,9 @@
 //!     // for safety, but a good sanity check, since no normal code calls `PinnedDrop::drop`.
 //!     #[allow(non_camel_case_types)]
 //!     trait UselessPinnedDropImpl_you_need_to_specify_PinnedDrop {}
-//!     impl<T: ::kernel::init::PinnedDrop>
-//!         UselessPinnedDropImpl_you_need_to_specify_PinnedDrop for T {}
+//!     impl<
+//!         T: ::kernel::init::PinnedDrop,
+//!     > UselessPinnedDropImpl_you_need_to_specify_PinnedDrop for T {}
 //!     impl<T> UselessPinnedDropImpl_you_need_to_specify_PinnedDrop for Bar<T> {}
 //! };
 //! ```
@@ -219,7 +222,7 @@
 //!             // return type and shadow it later when we insert the arbitrary user code. That way
 //!             // there will be no possibility of returning without `unsafe`.
 //!             struct __InitOk;
-//!             // Get the pin-data type from the initialized type.
+//!             // Get the data about fields from the supplied type.
 //!             // - the function is unsafe, hence the unsafe block
 //!             // - we `use` the `HasPinData` trait in the block, it is only available in that
 //!             //   scope.
@@ -227,8 +230,7 @@
 //!                 use ::kernel::init::__internal::HasPinData;
 //!                 Self::__pin_data()
 //!             };
-//!             // Use `data` to help with type inference, the closure supplied will have the type
-//!             // `FnOnce(*mut Self) -> Result<__InitOk, Infallible>`.
+//!             // Ensure that `data` really is of type `PinData` and help with type inference:
 //!             let init = ::kernel::init::__internal::PinData::make_closure::<
 //!                 _,
 //!                 __InitOk,
@@ -236,71 +238,75 @@
 //!             >(data, move |slot| {
 //!                 {
 //!                     // Shadow the structure so it cannot be used to return early. If a user
-//!                     // tries to write `return Ok(__InitOk)`, then they get a type error, since
-//!                     // that will refer to this struct instead of the one defined above.
+//!                     // tries to write `return Ok(__InitOk)`, then they get a type error,
+//!                     // since that will refer to this struct instead of the one defined
+//!                     // above.
 //!                     struct __InitOk;
 //!                     // This is the expansion of `t,`, which is syntactic sugar for `t: t,`.
-//!                     unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).t), t) };
-//!                     // Since initialization could fail later (not in this case, since the error
-//!                     // type is `Infallible`) we will need to drop this field if there is an
-//!                     // error later. This `DropGuard` will drop the field when it gets dropped
-//!                     // and has not yet been forgotten. We make a reference to it, so users
-//!                     // cannot `mem::forget` it from the initializer, since the name is the same
-//!                     // as the field (including hygiene).
-//!                     let t = &unsafe {
-//!                         ::kernel::init::__internal::DropGuard::new(
-//!                             ::core::addr_of_mut!((*slot).t),
-//!                         )
+//!                     {
+//!                         unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).t), t) };
+//!                     }
+//!                     // Since initialization could fail later (not in this case, since the
+//!                     // error type is `Infallible`) we will need to drop this field if there
+//!                     // is an error later. This `DropGuard` will drop the field when it gets
+//!                     // dropped and has not yet been forgotten.
+//!                     let t = unsafe {
+//!                         ::pinned_init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).t))
 //!                     };
 //!                     // Expansion of `x: 0,`:
-//!                     // Since this can be an arbitrary expression we cannot place it inside of
-//!                     // the `unsafe` block, so we bind it here.
-//!                     let x = 0;
-//!                     unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).x), x) };
+//!                     // Since this can be an arbitrary expression we cannot place it inside
+//!                     // of the `unsafe` block, so we bind it here.
+//!                     {
+//!                         let x = 0;
+//!                         unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).x), x) };
+//!                     }
 //!                     // We again create a `DropGuard`.
-//!                     let x = &unsafe {
-//!                         ::kernel::init::__internal::DropGuard::new(
-//!                             ::core::addr_of_mut!((*slot).x),
-//!                         )
+//!                     let x = unsafe {
+//!                         ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).x))
 //!                     };
-//!
+//!                     // Since initialization has successfully completed, we can now forget
+//!                     // the guards. This is not `mem::forget`, since we only have
+//!                     // `&DropGuard`.
+//!                     ::core::mem::forget(x);
+//!                     ::core::mem::forget(t);
 //!                     // Here we use the type checker to ensure that every field has been
 //!                     // initialized exactly once, since this is `if false` it will never get
 //!                     // executed, but still type-checked.
-//!                     // Additionally we abuse `slot` to automatically infer the correct type for
-//!                     // the struct. This is also another check that every field is accessible
-//!                     // from this scope.
+//!                     // Additionally we abuse `slot` to automatically infer the correct type
+//!                     // for the struct. This is also another check that every field is
+//!                     // accessible from this scope.
 //!                     #[allow(unreachable_code, clippy::diverging_sub_expression)]
-//!                     if false {
+//!                     let _ = || {
 //!                         unsafe {
 //!                             ::core::ptr::write(
 //!                                 slot,
 //!                                 Self {
-//!                                     // We only care about typecheck finding every field here,
-//!                                     // the expression does not matter, just conjure one using
-//!                                     // `panic!()`:
+//!                                     // We only care about typecheck finding every field
+//!                                     // here, the expression does not matter, just conjure
+//!                                     // one using `panic!()`:
 //!                                     t: ::core::panic!(),
 //!                                     x: ::core::panic!(),
 //!                                 },
 //!                             );
 //!                         };
-//!                     }
-//!                     // Since initialization has successfully completed, we can now forget the
-//!                     // guards. This is not `mem::forget`, since we only have `&DropGuard`.
-//!                     unsafe { ::kernel::init::__internal::DropGuard::forget(t) };
-//!                     unsafe { ::kernel::init::__internal::DropGuard::forget(x) };
+//!                     };
 //!                 }
 //!                 // We leave the scope above and gain access to the previously shadowed
 //!                 // `__InitOk` that we need to return.
 //!                 Ok(__InitOk)
 //!             });
 //!             // Change the return type from `__InitOk` to `()`.
-//!             let init = move |slot| -> ::core::result::Result<(), ::core::convert::Infallible> {
+//!             let init = move |
+//!                 slot,
+//!             | -> ::core::result::Result<(), ::core::convert::Infallible> {
 //!                 init(slot).map(|__InitOk| ())
 //!             };
 //!             // Construct the initializer.
 //!             let init = unsafe {
-//!                 ::kernel::init::pin_init_from_closure::<_, ::core::convert::Infallible>(init)
+//!                 ::kernel::init::pin_init_from_closure::<
+//!                     _,
+//!                     ::core::convert::Infallible,
+//!                 >(init)
 //!             };
 //!             init
 //!         }
@@ -374,7 +380,10 @@
 //!         b: Bar<u32>,
 //!     }
 //!     #[doc(hidden)]
-//!     impl<'__pin> ::core::marker::Unpin for Foo where __Unpin<'__pin>: ::core::marker::Unpin {}
+//!     impl<'__pin> ::core::marker::Unpin for Foo
+//!     where
+//!         __Unpin<'__pin>: ::core::marker::Unpin,
+//!     {}
 //!     // Since we specified `PinnedDrop` as the argument to `#[pin_data]`, we expect `Foo` to
 //!     // implement `PinnedDrop`. Thus we do not need to prevent `Drop` implementations like
 //!     // before, instead we implement `Drop` here and delegate to `PinnedDrop`.
@@ -403,7 +412,7 @@
 //! #[pinned_drop]
 //! impl PinnedDrop for Foo {
 //!     fn drop(self: Pin<&mut Self>) {
-//!         println!("{self:p} is getting dropped.");
+//!         pr_info!("{self:p} is getting dropped.");
 //!     }
 //! }
 //! ```
@@ -414,7 +423,7 @@
 //! // `unsafe`, full path and the token parameter are added, everything else stays the same.
 //! unsafe impl ::kernel::init::PinnedDrop for Foo {
 //!     fn drop(self: Pin<&mut Self>, _: ::kernel::init::__internal::OnlyCallFromDrop) {
-//!         println!("{self:p} is getting dropped.");
+//!         pr_info!("{self:p} is getting dropped.");
 //!     }
 //! }
 //! ```
@@ -449,18 +458,21 @@
 //!     >(data, move |slot| {
 //!         {
 //!             struct __InitOk;
-//!             unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).a), a) };
-//!             let a = &unsafe {
+//!             {
+//!                 unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).a), a) };
+//!             }
+//!             let a = unsafe {
 //!                 ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).a))
 //!             };
-//!             let b = Bar::new(36);
+//!             let init = Bar::new(36);
 //!             unsafe { data.b(::core::addr_of_mut!((*slot).b), b)? };
-//!             let b = &unsafe {
+//!             let b = unsafe {
 //!                 ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).b))
 //!             };
-//!
+//!             ::core::mem::forget(b);
+//!             ::core::mem::forget(a);
 //!             #[allow(unreachable_code, clippy::diverging_sub_expression)]
-//!             if false {
+//!             let _ = || {
 //!                 unsafe {
 //!                     ::core::ptr::write(
 //!                         slot,
@@ -470,13 +482,13 @@
 //!                         },
 //!                     );
 //!                 };
-//!             }
-//!             unsafe { ::kernel::init::__internal::DropGuard::forget(a) };
-//!             unsafe { ::kernel::init::__internal::DropGuard::forget(b) };
+//!             };
 //!         }
 //!         Ok(__InitOk)
 //!     });
-//!     let init = move |slot| -> ::core::result::Result<(), ::core::convert::Infallible> {
+//!     let init = move |
+//!         slot,
+//!     | -> ::core::result::Result<(), ::core::convert::Infallible> {
 //!         init(slot).map(|__InitOk| ())
 //!     };
 //!     let init = unsafe {

From 38a700efc51080c7184f71edbf5e49561da9754f Mon Sep 17 00:00:00 2001
From: Asmaa Mnebhi <asmaa@nvidia.com>
Date: Fri, 18 Aug 2023 12:43:14 -0400
Subject: [PATCH 1434/2047] gpio: mlxbf3: Support add_pin_ranges()

Support add_pin_ranges() so that pinctrl_gpio_request() can be called.
The GPIO value is not modified when the user runs the "gpioset" tool.
This is because when gpiochip_generic_request is invoked by the gpio-mlxbf3
driver, "pin_ranges" is empty so it skips "pinctrl_gpio_request()".
pinctrl_gpio_request() is essential in the code flow because it changes the
mux value so that software has control over modifying the GPIO value.
Adding add_pin_ranges() creates a dependency on the pinctrl-mlxbf3.c driver.

Fixes: cd33f216d24 ("gpio: mlxbf3: Add gpio driver support")
Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-mlxbf3.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c
index e30cee108986..0a5f241a8352 100644
--- a/drivers/gpio/gpio-mlxbf3.c
+++ b/drivers/gpio/gpio-mlxbf3.c
@@ -19,6 +19,8 @@
  * gpio[1]: HOST_GPIO32->HOST_GPIO55
  */
 #define MLXBF3_GPIO_MAX_PINS_PER_BLOCK 32
+#define MLXBF3_GPIO_MAX_PINS_BLOCK0    32
+#define MLXBF3_GPIO_MAX_PINS_BLOCK1    24
 
 /*
  * fw_gpio[x] block registers and their offset
@@ -158,6 +160,26 @@ static const struct irq_chip gpio_mlxbf3_irqchip = {
 	GPIOCHIP_IRQ_RESOURCE_HELPERS,
 };
 
+static int mlxbf3_gpio_add_pin_ranges(struct gpio_chip *chip)
+{
+	unsigned int id;
+
+	switch(chip->ngpio) {
+	case MLXBF3_GPIO_MAX_PINS_BLOCK0:
+		id = 0;
+		break;
+	case MLXBF3_GPIO_MAX_PINS_BLOCK1:
+		id = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return gpiochip_add_pin_range(chip, "MLNXBF34:00",
+			chip->base, id * MLXBF3_GPIO_MAX_PINS_PER_BLOCK,
+			chip->ngpio);
+}
+
 static int mlxbf3_gpio_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -197,6 +219,7 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev)
 	gc->request = gpiochip_generic_request;
 	gc->free = gpiochip_generic_free;
 	gc->owner = THIS_MODULE;
+	gc->add_pin_ranges = mlxbf3_gpio_add_pin_ranges;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq >= 0) {
@@ -243,6 +266,7 @@ static struct platform_driver mlxbf3_gpio_driver = {
 };
 module_platform_driver(mlxbf3_gpio_driver);
 
+MODULE_SOFTDEP("pre: pinctrl-mlxbf3");
 MODULE_DESCRIPTION("NVIDIA BlueField-3 GPIO Driver");
 MODULE_AUTHOR("Asmaa Mnebhi <asmaa@nvidia.com>");
 MODULE_LICENSE("Dual BSD/GPL");

From 0c1c9a27ce909e3988f8c6407e26a22a7e1cd276 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Aug 2023 12:08:18 +0200
Subject: [PATCH 1435/2047] nbd: call blk_mark_disk_dead in
 nbd_clear_sock_ioctl

nbd_clear_sock_ioctl kills the socket and with that the block
device.  Instead of just invalidating file system buffers,
mark the device as dead, which will also invalidate the buffers
as part of the proper shutdown sequence.  This also includes
invalidating partitions if there are any.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Message-Id: <20230811100828.1897174-8-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 drivers/block/nbd.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 8576d696c7a2..42e0159bb258 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1434,12 +1434,10 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd)
 	return ret;
 }
 
-static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
-				 struct block_device *bdev)
+static void nbd_clear_sock_ioctl(struct nbd_device *nbd)
 {
+	blk_mark_disk_dead(nbd->disk);
 	nbd_clear_sock(nbd);
-	__invalidate_device(bdev, true);
-	nbd_bdev_reset(nbd);
 	if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
 			       &nbd->config->runtime_flags))
 		nbd_config_put(nbd);
@@ -1465,7 +1463,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 	case NBD_DISCONNECT:
 		return nbd_disconnect(nbd);
 	case NBD_CLEAR_SOCK:
-		nbd_clear_sock_ioctl(nbd, bdev);
+		nbd_clear_sock_ioctl(nbd);
 		return 0;
 	case NBD_SET_SOCK:
 		return nbd_add_socket(nbd, arg, false);

From ab6860f62bfe329e11e5b5b7295b673c9c3a62d0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Aug 2023 12:08:19 +0200
Subject: [PATCH 1436/2047] block: simplify the disk_force_media_change
 interface

Hard code the events to DISK_EVENT_MEDIA_CHANGE as that is the only
useful use case, and drop the superfluous return value.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Message-Id: <20230811100828.1897174-9-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 block/disk-events.c    | 15 ++++-----------
 drivers/block/loop.c   |  6 +++---
 include/linux/blkdev.h |  2 +-
 3 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/block/disk-events.c b/block/disk-events.c
index 0cfac464e6d1..6189b819b235 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -294,25 +294,18 @@ EXPORT_SYMBOL(disk_check_media_change);
  * @disk: the disk which will raise the event
  * @events: the events to raise
  *
- * Generate uevents for the disk. If DISK_EVENT_MEDIA_CHANGE is present,
- * attempt to free all dentries and inodes and invalidates all block
+ * Should be called when the media changes for @disk.  Generates a uevent
+ * and attempts to free all dentries and inodes and invalidates all block
  * device page cache entries in that case.
- *
- * Returns %true if DISK_EVENT_MEDIA_CHANGE was raised, or %false if not.
  */
-bool disk_force_media_change(struct gendisk *disk, unsigned int events)
+void disk_force_media_change(struct gendisk *disk)
 {
-	disk_event_uevent(disk, events);
-
-	if (!(events & DISK_EVENT_MEDIA_CHANGE))
-		return false;
-
+	disk_event_uevent(disk, DISK_EVENT_MEDIA_CHANGE);
 	inc_diskseq(disk);
 	if (__invalidate_device(disk->part0, true))
 		pr_warn("VFS: busy inodes on changed media %s\n",
 			disk->disk_name);
 	set_bit(GD_NEED_PART_SCAN, &disk->state);
-	return true;
 }
 EXPORT_SYMBOL_GPL(disk_force_media_change);
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 37511d2b2caf..705a0effa7d8 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -603,7 +603,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 		goto out_err;
 
 	/* and ... switch */
-	disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
+	disk_force_media_change(lo->lo_disk);
 	blk_mq_freeze_queue(lo->lo_queue);
 	mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
 	lo->lo_backing_file = file;
@@ -1067,7 +1067,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
 	/* suppress uevents while reconfiguring the device */
 	dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
 
-	disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
+	disk_force_media_change(lo->lo_disk);
 	set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
 	lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
@@ -1171,7 +1171,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
 	if (!release)
 		blk_mq_unfreeze_queue(lo->lo_queue);
 
-	disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
+	disk_force_media_change(lo->lo_disk);
 
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
 		int err;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 83262702eea7..c8eab6effc22 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -750,7 +750,7 @@ static inline int bdev_read_only(struct block_device *bdev)
 }
 
 bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
-bool disk_force_media_change(struct gendisk *disk, unsigned int events);
+void disk_force_media_change(struct gendisk *disk);
 
 void add_disk_randomness(struct gendisk *disk) __latent_entropy;
 void rand_initialize_disk(struct gendisk *disk);

From a47145f2361976c83bb7d3198e7ff0e13a29fb7e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Aug 2023 12:08:20 +0200
Subject: [PATCH 1437/2047] floppy: call disk_force_media_change when changing
 the format

While changing the format of a floppy isn't strictly speaking a media
change, the effects are the same in that the content of the media
changes and the diskseq should be increased and uevent should be
sent.  Switch from calling __invalidate_device to
disk_force_media_change to do so.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Message-Id: <20230811100828.1897174-10-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 drivers/block/floppy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 2db9b186b977..ea4eb88a2e45 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3255,7 +3255,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
 
 			if (!disk || ITYPE(drive_state[cnt].fd_device) != type)
 				continue;
-			__invalidate_device(disk->part0, true);
+			disk_force_media_change(disk);
 		}
 		mutex_unlock(&open_lock);
 	} else {

From 2c0326c587965a40c4013361b1f4d0e5cca5194e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Aug 2023 12:08:21 +0200
Subject: [PATCH 1438/2047] amiflop: don't call fsync_bdev in FDFMTBEG

FDFMTBEG is used by fdformat to calibrate before formatting a disk.
Neither the atari nor PC floppy driver sync data, which also seems
a bit pointless for a disk hat is about to get formatted.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Message-Id: <20230811100828.1897174-11-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 drivers/block/amiflop.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index e460c9799d9f..2b98114a9fe0 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1547,7 +1547,6 @@ static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
 			rel_fdc();
 			return -EBUSY;
 		}
-		fsync_bdev(bdev);
 		if (fd_motor_on(drive) == 0) {
 			rel_fdc();
 			return -ENODEV;

From 2527fd38772fea30c1d1cbf94839a0bbf4122133 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Aug 2023 12:08:22 +0200
Subject: [PATCH 1439/2047] dasd: also call __invalidate_device when setting
 the device offline

Don't just write out the data, but also invalidate all caches when setting
the device offline.  Stop canceling the offlining when writeback fails
as there is no way to recover from that anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Message-Id: <20230811100828.1897174-12-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 drivers/s390/block/dasd.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index edcbf77852c3..675b38ad00dc 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3627,9 +3627,8 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
 		 * empty
 		 */
 		if (device->block) {
-			rc = fsync_bdev(device->block->bdev);
-			if (rc != 0)
-				goto interrupted;
+			fsync_bdev(device->block->bdev);
+			__invalidate_device(device->block->bdev, true);
 		}
 		dasd_schedule_device_bh(device);
 		rc = wait_event_interruptible(shutdown_waitq,

From 127a5093c79d9cdea9e6edc016ab346e6dd16c23 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Aug 2023 12:08:23 +0200
Subject: [PATCH 1440/2047] block: drop the "busy inodes on changed media" log
 message

This message isn't exactly helpful, and file systems already print way more
useful messages when shut down while active.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Message-Id: <20230811100828.1897174-13-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 block/disk-events.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/block/disk-events.c b/block/disk-events.c
index 6189b819b235..6b858d350477 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -281,9 +281,7 @@ bool disk_check_media_change(struct gendisk *disk)
 	if (!(events & DISK_EVENT_MEDIA_CHANGE))
 		return false;
 
-	if (__invalidate_device(disk->part0, true))
-		pr_warn("VFS: busy inodes on changed media %s\n",
-			disk->disk_name);
+	__invalidate_device(disk->part0, true);
 	set_bit(GD_NEED_PART_SCAN, &disk->state);
 	return true;
 }
@@ -302,9 +300,7 @@ void disk_force_media_change(struct gendisk *disk)
 {
 	disk_event_uevent(disk, DISK_EVENT_MEDIA_CHANGE);
 	inc_diskseq(disk);
-	if (__invalidate_device(disk->part0, true))
-		pr_warn("VFS: busy inodes on changed media %s\n",
-			disk->disk_name);
+	__invalidate_device(disk->part0, true);
 	set_bit(GD_NEED_PART_SCAN, &disk->state);
 }
 EXPORT_SYMBOL_GPL(disk_force_media_change);

From 560e20e4bf6484a0c12f9f3c7a1aa55056948e1e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Aug 2023 12:08:24 +0200
Subject: [PATCH 1441/2047] block: consolidate __invalidate_device and
 fsync_bdev

We currently have two interfaces that take a block_devices and the find
a mounted file systems to flush or invaldidate data on it.  Both are a
bit problematic because they only work for the "main" block devices
that is used as s_dev for the super_block, and because they don't call
into the file system at all.

Merge the two into a new bdev_mark_dead helper that does both the
syncing and invalidation and which is properly documented.  This is
in preparation of merging the functionality into the ->mark_dead
holder operation so that it will work on additional block devices
used by a file systems and give us a single entry point for invalidation
of dead devices or media.

Note that a single standalone fsync_bdev call for an obscure ioctl
remains for now, but that one will also be deal with in a bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Message-Id: <20230811100828.1897174-14-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 block/bdev.c              | 33 ++++++++++++++++++++++++++++-----
 block/disk-events.c       |  4 ++--
 block/genhd.c             |  3 +--
 block/partitions/core.c   |  5 +----
 drivers/s390/block/dasd.c |  6 ++----
 fs/super.c                |  4 ++--
 include/linux/blkdev.h    |  2 +-
 7 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index 979e28a46b98..074a9ffa9d8b 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -221,7 +221,6 @@ int fsync_bdev(struct block_device *bdev)
 	}
 	return sync_blockdev(bdev);
 }
-EXPORT_SYMBOL(fsync_bdev);
 
 /**
  * freeze_bdev - lock a filesystem and force it into a consistent state
@@ -960,12 +959,27 @@ out_path_put:
 }
 EXPORT_SYMBOL(lookup_bdev);
 
-int __invalidate_device(struct block_device *bdev, bool kill_dirty)
+/**
+ * bdev_mark_dead - mark a block device as dead
+ * @bdev: block device to operate on
+ * @surprise: indicate a surprise removal
+ *
+ * Tell the file system that this devices or media is dead.  If @surprise is set
+ * to %true the device or media is already gone, if not we are preparing for an
+ * orderly removal.
+ *
+ * This syncs out all dirty data and writes back inodes and then invalidates any
+ * cached data in the inodes on the file system, the inodes themselves and the
+ * block device mapping.
+ */
+void bdev_mark_dead(struct block_device *bdev, bool surprise)
 {
 	struct super_block *sb = get_super(bdev);
 	int res = 0;
 
 	if (sb) {
+		if (!surprise)
+			sync_filesystem(sb);
 		/*
 		 * no need to lock the super, get_super holds the
 		 * read mutex so the filesystem cannot go away
@@ -973,13 +987,22 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty)
 		 * hold).
 		 */
 		shrink_dcache_sb(sb);
-		res = invalidate_inodes(sb, kill_dirty);
+		res = invalidate_inodes(sb, true);
 		drop_super(sb);
+	} else {
+		if (!surprise)
+			sync_blockdev(bdev);
 	}
 	invalidate_bdev(bdev);
-	return res;
 }
-EXPORT_SYMBOL(__invalidate_device);
+#ifdef CONFIG_DASD_MODULE
+/*
+ * Drivers should not use this directly, but the DASD driver has historically
+ * had a shutdown to offline mode that doesn't actually remove the gendisk
+ * that otherwise looks a lot like a safe device removal.
+ */
+EXPORT_SYMBOL_GPL(bdev_mark_dead);
+#endif
 
 void sync_bdevs(bool wait)
 {
diff --git a/block/disk-events.c b/block/disk-events.c
index 6b858d350477..422db8292d09 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -281,7 +281,7 @@ bool disk_check_media_change(struct gendisk *disk)
 	if (!(events & DISK_EVENT_MEDIA_CHANGE))
 		return false;
 
-	__invalidate_device(disk->part0, true);
+	bdev_mark_dead(disk->part0, true);
 	set_bit(GD_NEED_PART_SCAN, &disk->state);
 	return true;
 }
@@ -300,7 +300,7 @@ void disk_force_media_change(struct gendisk *disk)
 {
 	disk_event_uevent(disk, DISK_EVENT_MEDIA_CHANGE);
 	inc_diskseq(disk);
-	__invalidate_device(disk->part0, true);
+	bdev_mark_dead(disk->part0, true);
 	set_bit(GD_NEED_PART_SCAN, &disk->state);
 }
 EXPORT_SYMBOL_GPL(disk_force_media_change);
diff --git a/block/genhd.c b/block/genhd.c
index 3d287b32d50d..afc2cb09eb94 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -647,8 +647,7 @@ void del_gendisk(struct gendisk *disk)
 	mutex_lock(&disk->open_mutex);
 	xa_for_each(&disk->part_tbl, idx, part) {
 		remove_inode_hash(part->bd_inode);
-		fsync_bdev(part);
-		__invalidate_device(part, true);
+		bdev_mark_dead(part, false);
 	}
 	mutex_unlock(&disk->open_mutex);
 
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 13a7341299a9..e137a87f4db0 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -281,10 +281,7 @@ static void delete_partition(struct block_device *part)
 	 * looked up any more even when openers still hold references.
 	 */
 	remove_inode_hash(part->bd_inode);
-
-	fsync_bdev(part);
-	__invalidate_device(part, true);
-
+	bdev_mark_dead(part, false);
 	drop_partition(part);
 }
 
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 675b38ad00dc..1f642be840c3 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3626,10 +3626,8 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
 		 * so sync bdev first and then wait for our queues to become
 		 * empty
 		 */
-		if (device->block) {
-			fsync_bdev(device->block->bdev);
-			__invalidate_device(device->block->bdev, true);
-		}
+		if (device->block)
+			bdev_mark_dead(device->block->bdev, false);
 		dasd_schedule_device_bh(device);
 		rc = wait_event_interruptible(shutdown_waitq,
 					      _wait_for_empty_queues(device));
diff --git a/fs/super.c b/fs/super.c
index f72a1112a31b..9b2188e08bcc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1359,7 +1359,7 @@ int get_tree_bdev(struct fs_context *fc,
 		/*
 		 * We drop s_umount here because we need to open the bdev and
 		 * bdev->open_mutex ranks above s_umount (blkdev_put() ->
-		 * __invalidate_device()). It is safe because we have active sb
+		 * bdev_mark_dead()). It is safe because we have active sb
 		 * reference and SB_BORN is not set yet.
 		 */
 		up_write(&s->s_umount);
@@ -1411,7 +1411,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 		/*
 		 * We drop s_umount here because we need to open the bdev and
 		 * bdev->open_mutex ranks above s_umount (blkdev_put() ->
-		 * __invalidate_device()). It is safe because we have active sb
+		 * bdev_mark_dead()). It is safe because we have active sb
 		 * reference and SB_BORN is not set yet.
 		 */
 		up_write(&s->s_umount);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c8eab6effc22..6721595b9f97 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -751,6 +751,7 @@ static inline int bdev_read_only(struct block_device *bdev)
 
 bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
 void disk_force_media_change(struct gendisk *disk);
+void bdev_mark_dead(struct block_device *bdev, bool surprise);
 
 void add_disk_randomness(struct gendisk *disk) __latent_entropy;
 void rand_initialize_disk(struct gendisk *disk);
@@ -809,7 +810,6 @@ int __register_blkdev(unsigned int major, const char *name,
 void unregister_blkdev(unsigned int major, const char *name);
 
 bool disk_check_media_change(struct gendisk *disk);
-int __invalidate_device(struct block_device *bdev, bool kill_dirty);
 void set_capacity(struct gendisk *disk, sector_t size);
 
 #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED

From d8530de5a6e82be0ce17a5fdf727a394bcf6444c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Aug 2023 12:08:25 +0200
Subject: [PATCH 1442/2047] block: call into the file system for bdev_mark_dead

Combine the newly merged bdev_mark_dead helper with the existing
mark_dead holder operation so that all operations that invalidate
a device that is dead or being removed now go through the holder
ops.  This allows file systems to explicitly shutdown either ASAP
(for a surprise removal) or after writing back data (for an orderly
removal), and do so not only for the main device.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Message-Id: <20230811100828.1897174-15-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 block/bdev.c           | 30 +++++++++-------------------
 block/genhd.c          | 44 +++++++++++++++++++++++-------------------
 fs/super.c             |  8 ++++++--
 include/linux/blkdev.h |  2 +-
 4 files changed, 40 insertions(+), 44 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index 074a9ffa9d8b..4d580083a114 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -968,31 +968,19 @@ EXPORT_SYMBOL(lookup_bdev);
  * to %true the device or media is already gone, if not we are preparing for an
  * orderly removal.
  *
- * This syncs out all dirty data and writes back inodes and then invalidates any
- * cached data in the inodes on the file system, the inodes themselves and the
- * block device mapping.
+ * This calls into the file system, which then typicall syncs out all dirty data
+ * and writes back inodes and then invalidates any cached data in the inodes on
+ * the file system.  In addition we also invalidate the block device mapping.
  */
 void bdev_mark_dead(struct block_device *bdev, bool surprise)
 {
-	struct super_block *sb = get_super(bdev);
-	int res = 0;
+	mutex_lock(&bdev->bd_holder_lock);
+	if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
+		bdev->bd_holder_ops->mark_dead(bdev, surprise);
+	else
+		sync_blockdev(bdev);
+	mutex_unlock(&bdev->bd_holder_lock);
 
-	if (sb) {
-		if (!surprise)
-			sync_filesystem(sb);
-		/*
-		 * no need to lock the super, get_super holds the
-		 * read mutex so the filesystem cannot go away
-		 * under us (->put_super runs with the write lock
-		 * hold).
-		 */
-		shrink_dcache_sb(sb);
-		res = invalidate_inodes(sb, true);
-		drop_super(sb);
-	} else {
-		if (!surprise)
-			sync_blockdev(bdev);
-	}
 	invalidate_bdev(bdev);
 }
 #ifdef CONFIG_DASD_MODULE
diff --git a/block/genhd.c b/block/genhd.c
index afc2cb09eb94..cc32a0c704eb 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -554,7 +554,7 @@ out_exit_elevator:
 }
 EXPORT_SYMBOL(device_add_disk);
 
-static void blk_report_disk_dead(struct gendisk *disk)
+static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
 {
 	struct block_device *bdev;
 	unsigned long idx;
@@ -565,10 +565,7 @@ static void blk_report_disk_dead(struct gendisk *disk)
 			continue;
 		rcu_read_unlock();
 
-		mutex_lock(&bdev->bd_holder_lock);
-		if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
-			bdev->bd_holder_ops->mark_dead(bdev);
-		mutex_unlock(&bdev->bd_holder_lock);
+		bdev_mark_dead(bdev, surprise);
 
 		put_device(&bdev->bd_device);
 		rcu_read_lock();
@@ -576,14 +573,7 @@ static void blk_report_disk_dead(struct gendisk *disk)
 	rcu_read_unlock();
 }
 
-/**
- * blk_mark_disk_dead - mark a disk as dead
- * @disk: disk to mark as dead
- *
- * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
- * to this disk.
- */
-void blk_mark_disk_dead(struct gendisk *disk)
+static void __blk_mark_disk_dead(struct gendisk *disk)
 {
 	/*
 	 * Fail any new I/O.
@@ -603,8 +593,19 @@ void blk_mark_disk_dead(struct gendisk *disk)
 	 * Prevent new I/O from crossing bio_queue_enter().
 	 */
 	blk_queue_start_drain(disk->queue);
+}
 
-	blk_report_disk_dead(disk);
+/**
+ * blk_mark_disk_dead - mark a disk as dead
+ * @disk: disk to mark as dead
+ *
+ * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
+ * to this disk.
+ */
+void blk_mark_disk_dead(struct gendisk *disk)
+{
+	__blk_mark_disk_dead(disk);
+	blk_report_disk_dead(disk, true);
 }
 EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
 
@@ -641,17 +642,20 @@ void del_gendisk(struct gendisk *disk)
 	disk_del_events(disk);
 
 	/*
-	 * Prevent new openers by unlinked the bdev inode, and write out
-	 * dirty data before marking the disk dead and stopping all I/O.
+	 * Prevent new openers by unlinked the bdev inode.
 	 */
 	mutex_lock(&disk->open_mutex);
-	xa_for_each(&disk->part_tbl, idx, part) {
+	xa_for_each(&disk->part_tbl, idx, part)
 		remove_inode_hash(part->bd_inode);
-		bdev_mark_dead(part, false);
-	}
 	mutex_unlock(&disk->open_mutex);
 
-	blk_mark_disk_dead(disk);
+	/*
+	 * Tell the file system to write back all dirty data and shut down if
+	 * it hasn't been notified earlier.
+	 */
+	if (!test_bit(GD_DEAD, &disk->state))
+		blk_report_disk_dead(disk, false);
+	__blk_mark_disk_dead(disk);
 
 	/*
 	 * Drop all partitions now that the disk is marked dead.
diff --git a/fs/super.c b/fs/super.c
index 9b2188e08bcc..11fa21da130c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1228,7 +1228,7 @@ static bool lock_active_super(struct super_block *sb)
 	return true;
 }
 
-static void fs_mark_dead(struct block_device *bdev)
+static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
 {
 	struct super_block *sb = bdev->bd_holder;
 
@@ -1238,6 +1238,10 @@ static void fs_mark_dead(struct block_device *bdev)
 	if (!lock_active_super(sb))
 		return;
 
+	if (!surprise)
+		sync_filesystem(sb);
+	shrink_dcache_sb(sb);
+	invalidate_inodes(sb, true);
 	if (sb->s_op->shutdown)
 		sb->s_op->shutdown(sb);
 
@@ -1245,7 +1249,7 @@ static void fs_mark_dead(struct block_device *bdev)
 }
 
 const struct blk_holder_ops fs_holder_ops = {
-	.mark_dead		= fs_mark_dead,
+	.mark_dead		= fs_bdev_mark_dead,
 };
 EXPORT_SYMBOL_GPL(fs_holder_ops);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6721595b9f97..cdd03c612d39 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1461,7 +1461,7 @@ void blkdev_show(struct seq_file *seqf, off_t offset);
 #endif
 
 struct blk_holder_ops {
-	void (*mark_dead)(struct block_device *bdev);
+	void (*mark_dead)(struct block_device *bdev, bool surprise);
 };
 
 extern const struct blk_holder_ops fs_holder_ops;

From 2142b88c37a3e49fbca4a36b8674626917d9bf40 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Aug 2023 12:08:26 +0200
Subject: [PATCH 1443/2047] block: call into the file system for ioctl
 BLKFLSBUF

BLKFLSBUF is a historic ioctl that is called on a file handle to a
block device and syncs either the file system mounted on that block
device if there is one, or otherwise the just the data on the block
device.

Replace the get_super based syncing with a holder operation to remove
the last usage of get_super, and to also support syncing the file system
if the block device is not the main block device stored in s_dev.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Message-Id: <20230811100828.1897174-16-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 block/bdev.c           | 16 ----------------
 block/ioctl.c          |  9 ++++++++-
 fs/super.c             | 13 +++++++++++++
 include/linux/blkdev.h |  7 +++++--
 4 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index 4d580083a114..a20263fa27a4 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -206,22 +206,6 @@ int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend)
 }
 EXPORT_SYMBOL(sync_blockdev_range);
 
-/*
- * Write out and wait upon all dirty data associated with this
- * device.   Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_bdev(struct block_device *bdev)
-{
-	struct super_block *sb = get_super(bdev);
-	if (sb) {
-		int res = sync_filesystem(sb);
-		drop_super(sb);
-		return res;
-	}
-	return sync_blockdev(bdev);
-}
-
 /**
  * freeze_bdev - lock a filesystem and force it into a consistent state
  * @bdev:	blockdevice to lock
diff --git a/block/ioctl.c b/block/ioctl.c
index 3be11941fb2d..648670ddb164 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -364,7 +364,14 @@ static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd,
 {
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
-	fsync_bdev(bdev);
+
+	mutex_lock(&bdev->bd_holder_lock);
+	if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync)
+		bdev->bd_holder_ops->sync(bdev);
+	else
+		sync_blockdev(bdev);
+	mutex_unlock(&bdev->bd_holder_lock);
+
 	invalidate_bdev(bdev);
 	return 0;
 }
diff --git a/fs/super.c b/fs/super.c
index 11fa21da130c..1a369fa3f0a6 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1248,8 +1248,21 @@ static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
 	up_read(&sb->s_umount);
 }
 
+static void fs_bdev_sync(struct block_device *bdev)
+{
+	struct super_block *sb = bdev->bd_holder;
+
+	lockdep_assert_held(&bdev->bd_holder_lock);
+
+	if (!lock_active_super(sb))
+		return;
+	sync_filesystem(sb);
+	up_read(&sb->s_umount);
+}
+
 const struct blk_holder_ops fs_holder_ops = {
 	.mark_dead		= fs_bdev_mark_dead,
+	.sync			= fs_bdev_sync,
 };
 EXPORT_SYMBOL_GPL(fs_holder_ops);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cdd03c612d39..fa462d48ee96 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1462,6 +1462,11 @@ void blkdev_show(struct seq_file *seqf, off_t offset);
 
 struct blk_holder_ops {
 	void (*mark_dead)(struct block_device *bdev, bool surprise);
+
+	/*
+	 * Sync the file system mounted on the block device.
+	 */
+	void (*sync)(struct block_device *bdev);
 };
 
 extern const struct blk_holder_ops fs_holder_ops;
@@ -1524,8 +1529,6 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
 }
 #endif /* CONFIG_BLOCK */
 
-int fsync_bdev(struct block_device *bdev);
-
 int freeze_bdev(struct block_device *bdev);
 int thaw_bdev(struct block_device *bdev);
 

From 38bcdd38935350abceace901313e007e84d84456 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Aug 2023 12:08:27 +0200
Subject: [PATCH 1444/2047] fs: remove get_super

get_super is unused now, remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Message-Id: <20230811100828.1897174-17-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/super.c         | 37 -------------------------------------
 include/linux/fs.h |  1 -
 2 files changed, 38 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 1a369fa3f0a6..1c7c74855437 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -790,43 +790,6 @@ void iterate_supers_type(struct file_system_type *type,
 
 EXPORT_SYMBOL(iterate_supers_type);
 
-/**
- * get_super - get the superblock of a device
- * @bdev: device to get the superblock for
- *
- * Scans the superblock list and finds the superblock of the file system
- * mounted on the device given. %NULL is returned if no match is found.
- */
-struct super_block *get_super(struct block_device *bdev)
-{
-	struct super_block *sb;
-
-	if (!bdev)
-		return NULL;
-
-	spin_lock(&sb_lock);
-rescan:
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (hlist_unhashed(&sb->s_instances))
-			continue;
-		if (sb->s_bdev == bdev) {
-			sb->s_count++;
-			spin_unlock(&sb_lock);
-			down_read(&sb->s_umount);
-			/* still alive? */
-			if (sb->s_root && (sb->s_flags & SB_BORN))
-				return sb;
-			up_read(&sb->s_umount);
-			/* nope, got unmounted */
-			spin_lock(&sb_lock);
-			__put_super(sb);
-			goto rescan;
-		}
-	}
-	spin_unlock(&sb_lock);
-	return NULL;
-}
-
 /**
  * get_active_super - get an active reference to the superblock of a device
  * @bdev: device to get the superblock for
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6867512907d6..14b5777a24a0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2913,7 +2913,6 @@ extern int vfs_readlink(struct dentry *, char __user *, int);
 extern struct file_system_type *get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
-extern struct super_block *get_super(struct block_device *);
 extern struct super_block *get_active_super(struct block_device *bdev);
 extern void drop_super(struct super_block *sb);
 extern void drop_super_exclusive(struct super_block *sb);

From e127b9bccdb04e5fc4444431de37309a68aedafa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Aug 2023 12:08:28 +0200
Subject: [PATCH 1445/2047] fs: simplify invalidate_inodes

kill_dirty has always been true for a long time, so hard code it and
remove the unused return value.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Message-Id: <20230811100828.1897174-18-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/inode.c    | 17 ++---------------
 fs/internal.h |  2 +-
 fs/super.c    |  2 +-
 3 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 8fefb69e1f84..25bd2635c161 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -752,16 +752,11 @@ EXPORT_SYMBOL_GPL(evict_inodes);
 /**
  * invalidate_inodes	- attempt to free all inodes on a superblock
  * @sb:		superblock to operate on
- * @kill_dirty: flag to guide handling of dirty inodes
  *
- * Attempts to free all inodes for a given superblock.  If there were any
- * busy inodes return a non-zero value, else zero.
- * If @kill_dirty is set, discard dirty inodes too, otherwise treat
- * them as busy.
+ * Attempts to free all inodes (including dirty inodes) for a given superblock.
  */
-int invalidate_inodes(struct super_block *sb, bool kill_dirty)
+void invalidate_inodes(struct super_block *sb)
 {
-	int busy = 0;
 	struct inode *inode, *next;
 	LIST_HEAD(dispose);
 
@@ -773,14 +768,8 @@ again:
 			spin_unlock(&inode->i_lock);
 			continue;
 		}
-		if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
-			spin_unlock(&inode->i_lock);
-			busy = 1;
-			continue;
-		}
 		if (atomic_read(&inode->i_count)) {
 			spin_unlock(&inode->i_lock);
-			busy = 1;
 			continue;
 		}
 
@@ -798,8 +787,6 @@ again:
 	spin_unlock(&sb->s_inode_list_lock);
 
 	dispose_list(&dispose);
-
-	return busy;
 }
 
 /*
diff --git a/fs/internal.h b/fs/internal.h
index f7a3dc111026..b94290f61714 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -201,7 +201,7 @@ void lock_two_inodes(struct inode *inode1, struct inode *inode2,
  * fs-writeback.c
  */
 extern long get_nr_dirty_inodes(void);
-extern int invalidate_inodes(struct super_block *, bool);
+void invalidate_inodes(struct super_block *sb);
 
 /*
  * dcache.c
diff --git a/fs/super.c b/fs/super.c
index 1c7c74855437..c878e7373f93 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1204,7 +1204,7 @@ static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
 	if (!surprise)
 		sync_filesystem(sb);
 	shrink_dcache_sb(sb);
-	invalidate_inodes(sb, true);
+	invalidate_inodes(sb);
 	if (sb->s_op->shutdown)
 		sb->s_op->shutdown(sb);
 

From 0ed33598ddf308782ca621755df5d23dcff34b64 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 18 Aug 2023 16:00:48 +0200
Subject: [PATCH 1446/2047] super: use locking helpers

Replace the open-coded {down,up}_{read,write}() calls with simple
wrappers. Follow-up patches will benefit from this as well.

Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230818-vfs-super-fixes-v3-v3-1-9f0b1876e46b@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/super.c | 126 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 78 insertions(+), 48 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index c878e7373f93..b12e2f247e1e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -50,6 +50,42 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
 	"sb_internal",
 };
 
+static inline void super_lock(struct super_block *sb, bool excl)
+{
+	if (excl)
+		down_write(&sb->s_umount);
+	else
+		down_read(&sb->s_umount);
+}
+
+static inline void super_unlock(struct super_block *sb, bool excl)
+{
+	if (excl)
+		up_write(&sb->s_umount);
+	else
+		up_read(&sb->s_umount);
+}
+
+static inline void super_lock_excl(struct super_block *sb)
+{
+	super_lock(sb, true);
+}
+
+static inline void super_lock_shared(struct super_block *sb)
+{
+	super_lock(sb, false);
+}
+
+static inline void super_unlock_excl(struct super_block *sb)
+{
+	super_unlock(sb, true);
+}
+
+static inline void super_unlock_shared(struct super_block *sb)
+{
+	super_unlock(sb, false);
+}
+
 /*
  * One thing we have to be careful of with a per-sb shrinker is that we don't
  * drop the last active reference to the superblock from within the shrinker.
@@ -110,7 +146,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
 		freed += sb->s_op->free_cached_objects(sb, sc);
 	}
 
-	up_read(&sb->s_umount);
+	super_unlock_shared(sb);
 	return freed;
 }
 
@@ -176,7 +212,7 @@ static void destroy_unused_super(struct super_block *s)
 {
 	if (!s)
 		return;
-	up_write(&s->s_umount);
+	super_unlock_excl(s);
 	list_lru_destroy(&s->s_dentry_lru);
 	list_lru_destroy(&s->s_inode_lru);
 	security_sb_free(s);
@@ -340,7 +376,7 @@ void deactivate_locked_super(struct super_block *s)
 		put_filesystem(fs);
 		put_super(s);
 	} else {
-		up_write(&s->s_umount);
+		super_unlock_excl(s);
 	}
 }
 
@@ -357,7 +393,7 @@ EXPORT_SYMBOL(deactivate_locked_super);
 void deactivate_super(struct super_block *s)
 {
 	if (!atomic_add_unless(&s->s_active, -1, 1)) {
-		down_write(&s->s_umount);
+		super_lock_excl(s);
 		deactivate_locked_super(s);
 	}
 }
@@ -381,12 +417,12 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
 {
 	s->s_count++;
 	spin_unlock(&sb_lock);
-	down_write(&s->s_umount);
+	super_lock_excl(s);
 	if ((s->s_flags & SB_BORN) && atomic_inc_not_zero(&s->s_active)) {
 		put_super(s);
 		return 1;
 	}
-	up_write(&s->s_umount);
+	super_unlock_excl(s);
 	put_super(s);
 	return 0;
 }
@@ -414,7 +450,7 @@ bool trylock_super(struct super_block *sb)
 		if (!hlist_unhashed(&sb->s_instances) &&
 		    sb->s_root && (sb->s_flags & SB_BORN))
 			return true;
-		up_read(&sb->s_umount);
+		super_unlock_shared(sb);
 	}
 
 	return false;
@@ -439,13 +475,13 @@ bool trylock_super(struct super_block *sb)
 void retire_super(struct super_block *sb)
 {
 	WARN_ON(!sb->s_bdev);
-	down_write(&sb->s_umount);
+	super_lock_excl(sb);
 	if (sb->s_iflags & SB_I_PERSB_BDI) {
 		bdi_unregister(sb->s_bdi);
 		sb->s_iflags &= ~SB_I_PERSB_BDI;
 	}
 	sb->s_iflags |= SB_I_RETIRED;
-	up_write(&sb->s_umount);
+	super_unlock_excl(sb);
 }
 EXPORT_SYMBOL(retire_super);
 
@@ -521,7 +557,7 @@ void generic_shutdown_super(struct super_block *sb)
 	/* should be initialized for __put_super_and_need_restart() */
 	hlist_del_init(&sb->s_instances);
 	spin_unlock(&sb_lock);
-	up_write(&sb->s_umount);
+	super_unlock_excl(sb);
 	if (sb->s_bdi != &noop_backing_dev_info) {
 		if (sb->s_iflags & SB_I_PERSB_BDI)
 			bdi_unregister(sb->s_bdi);
@@ -685,7 +721,7 @@ EXPORT_SYMBOL(sget);
 
 void drop_super(struct super_block *sb)
 {
-	up_read(&sb->s_umount);
+	super_unlock_shared(sb);
 	put_super(sb);
 }
 
@@ -693,7 +729,7 @@ EXPORT_SYMBOL(drop_super);
 
 void drop_super_exclusive(struct super_block *sb)
 {
-	up_write(&sb->s_umount);
+	super_unlock_excl(sb);
 	put_super(sb);
 }
 EXPORT_SYMBOL(drop_super_exclusive);
@@ -739,10 +775,10 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 
-		down_read(&sb->s_umount);
+		super_lock_shared(sb);
 		if (sb->s_root && (sb->s_flags & SB_BORN))
 			f(sb, arg);
-		up_read(&sb->s_umount);
+		super_unlock_shared(sb);
 
 		spin_lock(&sb_lock);
 		if (p)
@@ -773,10 +809,10 @@ void iterate_supers_type(struct file_system_type *type,
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 
-		down_read(&sb->s_umount);
+		super_lock_shared(sb);
 		if (sb->s_root && (sb->s_flags & SB_BORN))
 			f(sb, arg);
-		up_read(&sb->s_umount);
+		super_unlock_shared(sb);
 
 		spin_lock(&sb_lock);
 		if (p)
@@ -813,7 +849,7 @@ restart:
 		if (sb->s_bdev == bdev) {
 			if (!grab_super(sb))
 				goto restart;
-			up_write(&sb->s_umount);
+			super_unlock_excl(sb);
 			return sb;
 		}
 	}
@@ -833,17 +869,11 @@ rescan:
 		if (sb->s_dev ==  dev) {
 			sb->s_count++;
 			spin_unlock(&sb_lock);
-			if (excl)
-				down_write(&sb->s_umount);
-			else
-				down_read(&sb->s_umount);
+			super_lock(sb, excl);
 			/* still alive? */
 			if (sb->s_root && (sb->s_flags & SB_BORN))
 				return sb;
-			if (excl)
-				up_write(&sb->s_umount);
-			else
-				up_read(&sb->s_umount);
+			super_unlock(sb, excl);
 			/* nope, got unmounted */
 			spin_lock(&sb_lock);
 			__put_super(sb);
@@ -889,9 +919,9 @@ int reconfigure_super(struct fs_context *fc)
 
 	if (remount_ro) {
 		if (!hlist_empty(&sb->s_pins)) {
-			up_write(&sb->s_umount);
+			super_unlock_excl(sb);
 			group_pin_kill(&sb->s_pins);
-			down_write(&sb->s_umount);
+			super_lock_excl(sb);
 			if (!sb->s_root)
 				return 0;
 			if (sb->s_writers.frozen != SB_UNFROZEN)
@@ -954,7 +984,7 @@ cancel_readonly:
 
 static void do_emergency_remount_callback(struct super_block *sb)
 {
-	down_write(&sb->s_umount);
+	super_lock_excl(sb);
 	if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
 	    !sb_rdonly(sb)) {
 		struct fs_context *fc;
@@ -967,7 +997,7 @@ static void do_emergency_remount_callback(struct super_block *sb)
 			put_fs_context(fc);
 		}
 	}
-	up_write(&sb->s_umount);
+	super_unlock_excl(sb);
 }
 
 static void do_emergency_remount(struct work_struct *work)
@@ -990,12 +1020,12 @@ void emergency_remount(void)
 
 static void do_thaw_all_callback(struct super_block *sb)
 {
-	down_write(&sb->s_umount);
+	super_lock_excl(sb);
 	if (sb->s_root && sb->s_flags & SB_BORN) {
 		emergency_thaw_bdev(sb);
 		thaw_super_locked(sb);
 	} else {
-		up_write(&sb->s_umount);
+		super_unlock_excl(sb);
 	}
 }
 
@@ -1182,10 +1212,10 @@ EXPORT_SYMBOL(get_tree_keyed);
  */
 static bool lock_active_super(struct super_block *sb)
 {
-	down_read(&sb->s_umount);
+	super_lock_shared(sb);
 	if (!sb->s_root ||
 	    (sb->s_flags & (SB_ACTIVE | SB_BORN)) != (SB_ACTIVE | SB_BORN)) {
-		up_read(&sb->s_umount);
+		super_unlock_shared(sb);
 		return false;
 	}
 	return true;
@@ -1208,7 +1238,7 @@ static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
 	if (sb->s_op->shutdown)
 		sb->s_op->shutdown(sb);
 
-	up_read(&sb->s_umount);
+	super_unlock_shared(sb);
 }
 
 static void fs_bdev_sync(struct block_device *bdev)
@@ -1220,7 +1250,7 @@ static void fs_bdev_sync(struct block_device *bdev)
 	if (!lock_active_super(sb))
 		return;
 	sync_filesystem(sb);
-	up_read(&sb->s_umount);
+	super_unlock_shared(sb);
 }
 
 const struct blk_holder_ops fs_holder_ops = {
@@ -1342,9 +1372,9 @@ int get_tree_bdev(struct fs_context *fc,
 		 * bdev_mark_dead()). It is safe because we have active sb
 		 * reference and SB_BORN is not set yet.
 		 */
-		up_write(&s->s_umount);
+		super_unlock_excl(s);
 		error = setup_bdev_super(s, fc->sb_flags, fc);
-		down_write(&s->s_umount);
+		super_lock_excl(s);
 		if (!error)
 			error = fill_super(s, fc);
 		if (error) {
@@ -1394,9 +1424,9 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 		 * bdev_mark_dead()). It is safe because we have active sb
 		 * reference and SB_BORN is not set yet.
 		 */
-		up_write(&s->s_umount);
+		super_unlock_excl(s);
 		error = setup_bdev_super(s, flags, NULL);
-		down_write(&s->s_umount);
+		super_lock_excl(s);
 		if (!error)
 			error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
 		if (error) {
@@ -1685,29 +1715,29 @@ int freeze_super(struct super_block *sb)
 	int ret;
 
 	atomic_inc(&sb->s_active);
-	down_write(&sb->s_umount);
+	super_lock_excl(sb);
 	if (sb->s_writers.frozen != SB_UNFROZEN) {
 		deactivate_locked_super(sb);
 		return -EBUSY;
 	}
 
 	if (!(sb->s_flags & SB_BORN)) {
-		up_write(&sb->s_umount);
+		super_unlock_excl(sb);
 		return 0;	/* sic - it's "nothing to do" */
 	}
 
 	if (sb_rdonly(sb)) {
 		/* Nothing to do really... */
 		sb->s_writers.frozen = SB_FREEZE_COMPLETE;
-		up_write(&sb->s_umount);
+		super_unlock_excl(sb);
 		return 0;
 	}
 
 	sb->s_writers.frozen = SB_FREEZE_WRITE;
 	/* Release s_umount to preserve sb_start_write -> s_umount ordering */
-	up_write(&sb->s_umount);
+	super_unlock_excl(sb);
 	sb_wait_write(sb, SB_FREEZE_WRITE);
-	down_write(&sb->s_umount);
+	super_lock_excl(sb);
 
 	/* Now we go and block page faults... */
 	sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
@@ -1743,7 +1773,7 @@ int freeze_super(struct super_block *sb)
 	 */
 	sb->s_writers.frozen = SB_FREEZE_COMPLETE;
 	lockdep_sb_freeze_release(sb);
-	up_write(&sb->s_umount);
+	super_unlock_excl(sb);
 	return 0;
 }
 EXPORT_SYMBOL(freeze_super);
@@ -1753,7 +1783,7 @@ static int thaw_super_locked(struct super_block *sb)
 	int error;
 
 	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
-		up_write(&sb->s_umount);
+		super_unlock_excl(sb);
 		return -EINVAL;
 	}
 
@@ -1770,7 +1800,7 @@ static int thaw_super_locked(struct super_block *sb)
 			printk(KERN_ERR
 				"VFS:Filesystem thaw failed\n");
 			lockdep_sb_freeze_release(sb);
-			up_write(&sb->s_umount);
+			super_unlock_excl(sb);
 			return error;
 		}
 	}
@@ -1790,7 +1820,7 @@ out:
  */
 int thaw_super(struct super_block *sb)
 {
-	down_write(&sb->s_umount);
+	super_lock_excl(sb);
 	return thaw_super_locked(sb);
 }
 EXPORT_SYMBOL(thaw_super);

From d8ce82efdece373b570f35acc8a29487b2087b84 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 18 Aug 2023 16:00:49 +0200
Subject: [PATCH 1447/2047] super: make locking naming consistent

Make the naming consistent with the earlier introduced
super_lock_{read,write}() helpers.

Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230818-vfs-super-fixes-v3-v3-2-9f0b1876e46b@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/fs-writeback.c |  4 ++--
 fs/internal.h     |  2 +-
 fs/super.c        | 28 ++++++++++++++--------------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index aca4b4811394..969ce991b0b0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1953,9 +1953,9 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb,
 		struct inode *inode = wb_inode(wb->b_io.prev);
 		struct super_block *sb = inode->i_sb;
 
-		if (!trylock_super(sb)) {
+		if (!super_trylock_shared(sb)) {
 			/*
-			 * trylock_super() may fail consistently due to
+			 * super_trylock_shared() may fail consistently due to
 			 * s_umount being grabbed by someone else. Don't use
 			 * requeue_io() to avoid busy retrying the inode/sb.
 			 */
diff --git a/fs/internal.h b/fs/internal.h
index b94290f61714..74d3b161dd2c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -115,7 +115,7 @@ static inline void put_file_access(struct file *file)
  * super.c
  */
 extern int reconfigure_super(struct fs_context *);
-extern bool trylock_super(struct super_block *sb);
+extern bool super_trylock_shared(struct super_block *sb);
 struct super_block *user_get_super(dev_t, bool excl);
 void put_super(struct super_block *sb);
 extern bool mount_capable(struct fs_context *);
diff --git a/fs/super.c b/fs/super.c
index b12e2f247e1e..ba5d813c5804 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -112,7 +112,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
 	if (!(sc->gfp_mask & __GFP_FS))
 		return SHRINK_STOP;
 
-	if (!trylock_super(sb))
+	if (!super_trylock_shared(sb))
 		return SHRINK_STOP;
 
 	if (sb->s_op->nr_cached_objects)
@@ -159,17 +159,17 @@ static unsigned long super_cache_count(struct shrinker *shrink,
 	sb = container_of(shrink, struct super_block, s_shrink);
 
 	/*
-	 * We don't call trylock_super() here as it is a scalability bottleneck,
-	 * so we're exposed to partial setup state. The shrinker rwsem does not
-	 * protect filesystem operations backing list_lru_shrink_count() or
-	 * s_op->nr_cached_objects(). Counts can change between
-	 * super_cache_count and super_cache_scan, so we really don't need locks
-	 * here.
+	 * We don't call super_trylock_shared() here as it is a scalability
+	 * bottleneck, so we're exposed to partial setup state. The shrinker
+	 * rwsem does not protect filesystem operations backing
+	 * list_lru_shrink_count() or s_op->nr_cached_objects(). Counts can
+	 * change between super_cache_count and super_cache_scan, so we really
+	 * don't need locks here.
 	 *
 	 * However, if we are currently mounting the superblock, the underlying
 	 * filesystem might be in a state of partial construction and hence it
-	 * is dangerous to access it.  trylock_super() uses a SB_BORN check to
-	 * avoid this situation, so do the same here. The memory barrier is
+	 * is dangerous to access it.  super_trylock_shared() uses a SB_BORN check
+	 * to avoid this situation, so do the same here. The memory barrier is
 	 * matched with the one in mount_fs() as we don't hold locks here.
 	 */
 	if (!(sb->s_flags & SB_BORN))
@@ -428,7 +428,7 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
 }
 
 /*
- *	trylock_super - try to grab ->s_umount shared
+ *	super_trylock_shared - try to grab ->s_umount shared
  *	@sb: reference we are trying to grab
  *
  *	Try to prevent fs shutdown.  This is used in places where we
@@ -444,7 +444,7 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
  *	of down_read().  There's a couple of places that are OK with that, but
  *	it's very much not a general-purpose interface.
  */
-bool trylock_super(struct super_block *sb)
+bool super_trylock_shared(struct super_block *sb)
 {
 	if (down_read_trylock(&sb->s_umount)) {
 		if (!hlist_unhashed(&sb->s_instances) &&
@@ -1210,7 +1210,7 @@ EXPORT_SYMBOL(get_tree_keyed);
  * and the place that clears the pointer to the superblock used by this function
  * before freeing the superblock.
  */
-static bool lock_active_super(struct super_block *sb)
+static bool super_lock_shared_active(struct super_block *sb)
 {
 	super_lock_shared(sb);
 	if (!sb->s_root ||
@@ -1228,7 +1228,7 @@ static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
 	/* bd_holder_lock ensures that the sb isn't freed */
 	lockdep_assert_held(&bdev->bd_holder_lock);
 
-	if (!lock_active_super(sb))
+	if (!super_lock_shared_active(sb))
 		return;
 
 	if (!surprise)
@@ -1247,7 +1247,7 @@ static void fs_bdev_sync(struct block_device *bdev)
 
 	lockdep_assert_held(&bdev->bd_holder_lock);
 
-	if (!lock_active_super(sb))
+	if (!super_lock_shared_active(sb))
 		return;
 	sync_filesystem(sb);
 	super_unlock_shared(sb);

From 07a3bb95ea1f7fa77e360cf3b0f06dff41dbf391 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@inria.fr>
Date: Fri, 23 Jun 2023 23:14:43 +0200
Subject: [PATCH 1448/2047] btrfs: zoned: use vcalloc instead of for vzalloc in
 btrfs_get_dev_zone_info

Use vcalloc that checks potential multiplication overflows.  The changes
were done using Coccinelle semantic patch.

Reviewed-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Julia Lawall <Julia.Lawall@inria.fr>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 72b90bc19a19..b43c4536e685 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -465,8 +465,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
 	 * use the cache.
 	 */
 	if (populate_cache && bdev_is_zoned(device->bdev)) {
-		zone_info->zone_cache = vzalloc(sizeof(struct blk_zone) *
-						zone_info->nr_zones);
+		zone_info->zone_cache = vcalloc(zone_info->nr_zones,
+						sizeof(struct blk_zone));
 		if (!zone_info->zone_cache) {
 			btrfs_err_in_rcu(device->fs_info,
 				"zoned: failed to allocate zone cache for %s",

From cf4ac2b9049be459dfbaddbeb9322366f1ac2dc6 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 21 Jun 2023 14:09:37 +0100
Subject: [PATCH 1449/2047] btrfs: scrub: remove redundant division of
 stripe_nr

Variable stripe_nr is being divided by map->num_stripes however the
result is never read. The division and assignment are redundant and
can be removed. Cleans up clang scan build warning:

fs/btrfs/scrub.c:1264:3: warning: Value stored to 'stripe_nr' is
never read [deadcode.DeadStores]

The code is a leftover from 6ded22c1bfe6 ("btrfs: reduce div64 calls by
limiting the number of stripes of a chunk to u32") that converted div64
to normal division, it's the same but previous version did not trigger a
warning.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 7289f5bff397..fe1c5bc7f601 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1262,7 +1262,6 @@ static int get_raid56_logic_offset(u64 physical, int num,
 
 		/* Work out the disk rotation on this stripe-set */
 		rot = stripe_nr % map->num_stripes;
-		stripe_nr /= map->num_stripes;
 		/* calculate which stripe this data locates */
 		rot += i;
 		stripe_index = rot % map->num_stripes;

From 966de47ff0c9e64d74e1719e4480b7c34f6190fa Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Thu, 22 Jun 2023 08:54:30 +0100
Subject: [PATCH 1450/2047] btrfs: remove redundant initialization of variables
 in log_new_ancestors

The variables leaf and slot are initialized when declared but the values
assigned to them are never read as they are being re-assigned later on.
The initializations are redundant and can be removed. Cleans up clang
scan build warnings:

fs/btrfs/tree-log.c:6797:25: warning: Value stored to 'leaf' during its
initialization is never read [deadcode.DeadStores]
fs/btrfs/tree-log.c:6798:7: warning: Value stored to 'slot' during its
initialization is never read [deadcode.DeadStores]

It's been there since b8aa330d2acb ("Btrfs: improve performance on fsync
of files with multiple hardlinks") without any usage so it's safe to be
removed.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 365a1cc0a3c3..8ad7e7e38d18 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6794,8 +6794,8 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
 
 	while (true) {
 		struct btrfs_fs_info *fs_info = root->fs_info;
-		struct extent_buffer *leaf = path->nodes[0];
-		int slot = path->slots[0];
+		struct extent_buffer *leaf;
+		int slot;
 		struct btrfs_key search_key;
 		struct inode *inode;
 		u64 ino;

From ed3764f726b24099396734aa1a5b41ae7e0fac60 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 27 Jun 2023 15:34:31 +0800
Subject: [PATCH 1451/2047] btrfs: add comments for btrfs_map_block()

The function btrfs_map_block() is a critical part of the btrfs storage
layer, which handles mapping of logical ranges to physical ranges.

Thus it's better to have some basic explanation, especially on the
following points:

- Segment split by various boundaries
  As a continuous logical range may be split into different segments,
  due to various factors like zones and RAID0/5/6/10 boundaries.

- The meaning of @mirror_num

- The possible single stripe optimization

- One deprecated parameter @need_raid_map
  Just explicitly mark it deprecated so we're aware of the problem.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6aa9bf3661ac..f47a9c42431e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6219,6 +6219,45 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *
 			stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
 }
 
+/*
+ * Map one logical range to one or more physical ranges.
+ *
+ * @length:		(Mandatory) mapped length of this run.
+ *			One logical range can be split into different segments
+ *			due to factors like zones and RAID0/5/6/10 stripe
+ *			boundaries.
+ *
+ * @bioc_ret:		(Mandatory) returned btrfs_io_context structure.
+ *			which has one or more physical ranges (btrfs_io_stripe)
+ *			recorded inside.
+ *			Caller should call btrfs_put_bioc() to free it after use.
+ *
+ * @smap:		(Optional) single physical range optimization.
+ *			If the map request can be fulfilled by one single
+ *			physical range, and this is parameter is not NULL,
+ *			then @bioc_ret would be NULL, and @smap would be
+ *			updated.
+ *
+ * @mirror_num_ret:	(Mandatory) returned mirror number if the original
+ *			value is 0.
+ *
+ *			Mirror number 0 means to choose any live mirrors.
+ *
+ *			For non-RAID56 profiles, non-zero mirror_num means
+ *			the Nth mirror. (e.g. mirror_num 1 means the first
+ *			copy).
+ *
+ *			For RAID56 profile, mirror 1 means rebuild from P and
+ *			the remaining data stripes.
+ *
+ *			For RAID6 profile, mirror > 2 means mark another
+ *			data/P stripe error and rebuild from the remaining
+ *			stripes..
+ *
+ * @need_raid_map:	(Used only for integrity checker) whether the map wants
+ *                      a full stripe map (including all data and P/Q stripes)
+ *                      for RAID56. Should always be 1 except integrity checker.
+ */
 int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 		    u64 logical, u64 *length,
 		    struct btrfs_io_context **bioc_ret,

From 3a3c7a7f6506338bcc4ebe6c5a2cbfec016ce50c Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 28 Jun 2023 16:11:15 +0800
Subject: [PATCH 1452/2047] btrfs: raid56: remove unused
 BTRFS_RBIO_REBUILD_MISSING

Commit aca43fe839e4 ("btrfs: remove unused raid56 functions which were
dedicated for scrub") removed the special handling of RAID56 scrub for
missing device.

As scrub goes full mirror_num based recovery, that means if it hits a
missing device in RAID56, it would just try the next mirror, which would
go through the BTRFS_RBIO_READ_REBUILD operation.

This means there is no longer any use of BTRFS_RBIO_REBUILD_MISSING
operation and we can safely remove it.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/raid56.c | 17 +++++------------
 fs/btrfs/raid56.h |  1 -
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 0249ea52bb80..35fd6e152731 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -584,8 +584,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
 	if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
 		return 0;
 
-	if (last->operation == BTRFS_RBIO_REBUILD_MISSING ||
-	    last->operation == BTRFS_RBIO_READ_REBUILD)
+	if (last->operation == BTRFS_RBIO_READ_REBUILD)
 		return 0;
 
 	return 1;
@@ -784,10 +783,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
 			spin_unlock(&rbio->bio_list_lock);
 			spin_unlock(&h->lock);
 
-			if (next->operation == BTRFS_RBIO_READ_REBUILD)
-				start_async_work(next, recover_rbio_work_locked);
-			else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
-				steal_rbio(rbio, next);
+			if (next->operation == BTRFS_RBIO_READ_REBUILD) {
 				start_async_work(next, recover_rbio_work_locked);
 			} else if (next->operation == BTRFS_RBIO_WRITE) {
 				steal_rbio(rbio, next);
@@ -1698,8 +1694,7 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
 	 * If we're rebuilding a read, we have to use pages from the
 	 * bio list if possible.
 	 */
-	if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
-	     rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
+	if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
 		sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
 	} else {
 		sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
@@ -1763,8 +1758,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
 		 * If we're rebuilding a read, we have to use pages from the
 		 * bio list if possible.
 		 */
-		if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
-		     rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
+		if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
 			sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
 		} else {
 			sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
@@ -1897,8 +1891,7 @@ static int recover_sectors(struct btrfs_raid_bio *rbio)
 		goto out;
 	}
 
-	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
-	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
+	if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
 		spin_lock(&rbio->bio_list_lock);
 		set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
 		spin_unlock(&rbio->bio_list_lock);
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 0e84c9c9293f..45e6ff78316f 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -14,7 +14,6 @@ enum btrfs_rbio_ops {
 	BTRFS_RBIO_WRITE,
 	BTRFS_RBIO_READ_REBUILD,
 	BTRFS_RBIO_PARITY_SCRUB,
-	BTRFS_RBIO_REBUILD_MISSING,
 };
 
 struct btrfs_raid_bio {

From 070bb0011ccfd00c7de25dce2489fbe9cce5ba44 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Tue, 20 Jun 2023 16:55:09 +0800
Subject: [PATCH 1453/2047] btrfs: sysfs: show if ACL support has been compiled
 in

ACL support depends on the compile-time configuration option
CONFIG_BTRFS_FS_POSIX_ACL. Prior to mounting a btrfs filesystem, it is not
possible to determine whether ACL support has been compiled in. To address
this, add a sysfs interface, /sys/fs/btrfs/features/acl, and check for ACL
support in the system's btrfs.

  To determine ACL support:

  Return 0 indicates ACL is not supported:
    $ cat /sys/fs/btrfs/features/acl
    0

  Return 1 indicates ACL is supported:
    $ cat /sys/fs/btrfs/features/acl
    1

IMO, this is a better approach, so that we also know if kernel is older.

  On an older kernel
    $ ls /sys/fs/btrfs/features/acl
    ls: cannot access '/sys/fs/btrfs/features/acl': No such file or directory

    mount a btrfs filesystem
    $ cat /proc/self/mounts | grep btrfs | grep -q noacl
    $ echo $?
    0

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/sysfs.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 25294e624851..b1d1ac25237b 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -414,6 +414,12 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
 BTRFS_ATTR(static_feature, supported_sectorsizes,
 	   supported_sectorsizes_show);
 
+static ssize_t acl_show(struct kobject *kobj, struct kobj_attribute *a, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", !!IS_ENABLED(CONFIG_BTRFS_FS_POSIX_ACL));
+}
+BTRFS_ATTR(static_feature, acl, acl_show);
+
 /*
  * Features which only depend on kernel version.
  *
@@ -421,6 +427,7 @@ BTRFS_ATTR(static_feature, supported_sectorsizes,
  * btrfs_supported_feature_attrs.
  */
 static struct attribute *btrfs_supported_static_feature_attrs[] = {
+	BTRFS_ATTR_PTR(static_feature, acl),
 	BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
 	BTRFS_ATTR_PTR(static_feature, supported_checksums),
 	BTRFS_ATTR_PTR(static_feature, send_stream_version),

From dbb6ecb328cb5f974ff0a04ad31af332be5a122f Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 20 Jun 2023 15:06:05 +0800
Subject: [PATCH 1454/2047] btrfs: tracepoints: simplify raid56 events

After commit 6bfd0133bee2 ("btrfs: raid56: switch scrub path to use a
single function"), the raid56 implementation no longer uses different
endio functions for RMW/recover/scrub.

All read operations end in submit_read_wait_bio_list(), while all write
operations end in submit_write_bios().  This means quite some trace
events are out-of-date and no longer utilized.

This patch would unify the trace events into just two:

- trace_raid56_read()
  Replaces trace_raid56_read_partial(), trace_raid56_scrub_read() and
  trace_raid56_scrub_read_recover().

- trace_raid56_write()
  Replaces trace_raid56_write_stripe() and
  trace_raid56_scrub_write_stripe().

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/raid56.c            |  8 ++++----
 include/trace/events/btrfs.h | 29 ++---------------------------
 2 files changed, 6 insertions(+), 31 deletions(-)

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 35fd6e152731..f97fc62fbab2 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1513,11 +1513,11 @@ static void submit_read_wait_bio_list(struct btrfs_raid_bio *rbio,
 	while ((bio = bio_list_pop(bio_list))) {
 		bio->bi_end_io = raid_wait_read_end_io;
 
-		if (trace_raid56_scrub_read_recover_enabled()) {
+		if (trace_raid56_read_enabled()) {
 			struct raid56_bio_trace_info trace_info = { 0 };
 
 			bio_get_trace_info(rbio, bio, &trace_info);
-			trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
+			trace_raid56_read(rbio, bio, &trace_info);
 		}
 		submit_bio(bio);
 	}
@@ -2191,11 +2191,11 @@ static void submit_write_bios(struct btrfs_raid_bio *rbio,
 	while ((bio = bio_list_pop(bio_list))) {
 		bio->bi_end_io = raid_wait_write_end_io;
 
-		if (trace_raid56_write_stripe_enabled()) {
+		if (trace_raid56_write_enabled()) {
 			struct raid56_bio_trace_info trace_info = { 0 };
 
 			bio_get_trace_info(rbio, bio, &trace_info);
-			trace_raid56_write_stripe(rbio, bio, &trace_info);
+			trace_raid56_write(rbio, bio, &trace_info);
 		}
 		submit_bio(bio);
 	}
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index a8206f5332e9..a76a279c5f0f 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -2482,7 +2482,7 @@ DECLARE_EVENT_CLASS(btrfs_raid56_bio,
 		__entry->offset, __entry->opf, __entry->physical, __entry->len)
 );
 
-DEFINE_EVENT(btrfs_raid56_bio, raid56_read_partial,
+DEFINE_EVENT(btrfs_raid56_bio, raid56_read,
 	TP_PROTO(const struct btrfs_raid_bio *rbio,
 		 const struct bio *bio,
 		 const struct raid56_bio_trace_info *trace_info),
@@ -2490,32 +2490,7 @@ DEFINE_EVENT(btrfs_raid56_bio, raid56_read_partial,
 	TP_ARGS(rbio, bio, trace_info)
 );
 
-DEFINE_EVENT(btrfs_raid56_bio, raid56_write_stripe,
-	TP_PROTO(const struct btrfs_raid_bio *rbio,
-		 const struct bio *bio,
-		 const struct raid56_bio_trace_info *trace_info),
-
-	TP_ARGS(rbio, bio, trace_info)
-);
-
-
-DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_write_stripe,
-	TP_PROTO(const struct btrfs_raid_bio *rbio,
-		 const struct bio *bio,
-		 const struct raid56_bio_trace_info *trace_info),
-
-	TP_ARGS(rbio, bio, trace_info)
-);
-
-DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read,
-	TP_PROTO(const struct btrfs_raid_bio *rbio,
-		 const struct bio *bio,
-		 const struct raid56_bio_trace_info *trace_info),
-
-	TP_ARGS(rbio, bio, trace_info)
-);
-
-DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read_recover,
+DEFINE_EVENT(btrfs_raid56_bio, raid56_write,
 	TP_PROTO(const struct btrfs_raid_bio *rbio,
 		 const struct bio *bio,
 		 const struct raid56_bio_trace_info *trace_info),

From 28f60894902ea40ed9a74cbbfbe812b979d9aa66 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 30 Jun 2023 16:03:45 +0100
Subject: [PATCH 1455/2047] btrfs: update documentation for
 add_new_free_space()

The documentation for add_new_free_space() is stale and no longer correct:

1) It's no longer used only when caching a block group. It's also called
   when creating a block group (btrfs_make_block_group()), when reading
   a block group at mount time (read_one_block_group()) and when reading
   the free space tree for a block group (typically the first time we
   attempt to allocate from the block group);

2) It has nothing to do with pinned extents. It only deals with the
   excluded extents io tree, which is used to track the locations of
   super blocks in order to make sure we never add the location of a
   super block to the free space cache of a block group.

So update the documention and also add a description of the arguments
and return values.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 82324c327a50..5ea515fc8c16 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -504,10 +504,17 @@ static void fragment_free_space(struct btrfs_block_group *block_group)
 #endif
 
 /*
- * This is only called by btrfs_cache_block_group, since we could have freed
- * extents we need to check the pinned_extents for any extents that can't be
- * used yet since their free space will be released as soon as the transaction
- * commits.
+ * Add a free space range to the in memory free space cache of a block group.
+ * This checks if the range contains super block locations and any such
+ * locations are not added to the free space cache.
+ *
+ * @block_group:      The target block group.
+ * @start:            Start offset of the range.
+ * @end:              End offset of the range (exclusive).
+ * @total_added_ret:  Optional pointer to return the total amount of space
+ *                    added to the block group's free space cache.
+ *
+ * Returns 0 on success or < 0 on error.
  */
 int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end,
 		       u64 *total_added_ret)

From 3b9f0995d819157f1d2b37ad291ad6c7eb8b762f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 30 Jun 2023 16:03:46 +0100
Subject: [PATCH 1456/2047] btrfs: rename add_new_free_space() to
 btrfs_add_new_free_space()

Since add_new_free_space() is exported, used outside block-group.c, rename
it to include the 'btrfs_' prefix.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c     | 20 ++++++++++----------
 fs/btrfs/block-group.h     |  4 ++--
 fs/btrfs/free-space-tree.c | 13 ++++++++-----
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 5ea515fc8c16..7bd1eee9367b 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -516,8 +516,8 @@ static void fragment_free_space(struct btrfs_block_group *block_group)
  *
  * Returns 0 on success or < 0 on error.
  */
-int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end,
-		       u64 *total_added_ret)
+int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start,
+			     u64 end, u64 *total_added_ret)
 {
 	struct btrfs_fs_info *info = block_group->fs_info;
 	u64 extent_start, extent_end, size;
@@ -806,8 +806,8 @@ next:
 		    key.type == BTRFS_METADATA_ITEM_KEY) {
 			u64 space_added;
 
-			ret = add_new_free_space(block_group, last, key.objectid,
-						 &space_added);
+			ret = btrfs_add_new_free_space(block_group, last,
+						       key.objectid, &space_added);
 			if (ret)
 				goto out;
 			total_found += space_added;
@@ -828,9 +828,9 @@ next:
 		path->slots[0]++;
 	}
 
-	ret = add_new_free_space(block_group, last,
-				 block_group->start + block_group->length,
-				 NULL);
+	ret = btrfs_add_new_free_space(block_group, last,
+				       block_group->start + block_group->length,
+				       NULL);
 out:
 	btrfs_free_path(path);
 	return ret;
@@ -2326,8 +2326,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
 		btrfs_free_excluded_extents(cache);
 	} else if (cache->used == 0) {
 		cache->cached = BTRFS_CACHE_FINISHED;
-		ret = add_new_free_space(cache, cache->start,
-					 cache->start + cache->length, NULL);
+		ret = btrfs_add_new_free_space(cache, cache->start,
+					       cache->start + cache->length, NULL);
 		btrfs_free_excluded_extents(cache);
 		if (ret)
 			goto error;
@@ -2774,7 +2774,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
 		return ERR_PTR(ret);
 	}
 
-	ret = add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL);
+	ret = btrfs_add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL);
 	btrfs_free_excluded_extents(cache);
 	if (ret) {
 		btrfs_put_block_group(cache);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 74b61e663028..2bdbcb834f95 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -291,8 +291,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
 void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
 struct btrfs_caching_control *btrfs_get_caching_control(
 		struct btrfs_block_group *cache);
-int add_new_free_space(struct btrfs_block_group *block_group,
-		       u64 start, u64 end, u64 *total_added_ret);
+int btrfs_add_new_free_space(struct btrfs_block_group *block_group,
+			     u64 start, u64 end, u64 *total_added_ret);
 struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
 				struct btrfs_fs_info *fs_info,
 				const u64 chunk_offset);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index f169378e2ca6..c0e734082dcc 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1517,8 +1517,10 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
 			} else if (prev_bit == 1 && bit == 0) {
 				u64 space_added;
 
-				ret = add_new_free_space(block_group, extent_start,
-							 offset, &space_added);
+				ret = btrfs_add_new_free_space(block_group,
+							       extent_start,
+							       offset,
+							       &space_added);
 				if (ret)
 					goto out;
 				total_found += space_added;
@@ -1533,7 +1535,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
 		}
 	}
 	if (prev_bit == 1) {
-		ret = add_new_free_space(block_group, extent_start, end, NULL);
+		ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
 		if (ret)
 			goto out;
 		extent_count++;
@@ -1590,8 +1592,9 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
 		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
 
-		ret = add_new_free_space(block_group, key.objectid,
-					 key.objectid + key.offset, &space_added);
+		ret = btrfs_add_new_free_space(block_group, key.objectid,
+					       key.objectid + key.offset,
+					       &space_added);
 		if (ret)
 			goto out;
 		total_found += space_added;

From aec5716c3e51a34ae8d98d287613b219ba267105 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 30 Jun 2023 16:03:47 +0100
Subject: [PATCH 1457/2047] btrfs: make btrfs_destroy_marked_extents() return
 void

Currently btrfs_destroy_marked_extents() is returning the value of the
last call to find_first_extent_bit(), which returns a value of 1 meaning
no more ranges found the dirty pages io tree. This value is useless to the
single caller of btrfs_destroy_marked_extents(), which ignores any return
value from btrfs_destroy_marked_extents(). This is because it's only used
in the transaction abort path, where we can't even deal with any errors
since we are in a critical situation already and cleanup of resources is
done in a best effort fashion.

So make btrfs_destroy_marked_extents() return void.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a9a2c5446c18..1957bb1dfb02 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4716,21 +4716,16 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
 	spin_unlock(&fs_info->delalloc_root_lock);
 }
 
-static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
-					struct extent_io_tree *dirty_pages,
-					int mark)
+static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
+					 struct extent_io_tree *dirty_pages,
+					 int mark)
 {
-	int ret;
 	struct extent_buffer *eb;
 	u64 start = 0;
 	u64 end;
 
-	while (1) {
-		ret = find_first_extent_bit(dirty_pages, start, &start, &end,
-					    mark, NULL);
-		if (ret)
-			break;
-
+	while (!find_first_extent_bit(dirty_pages, start, &start, &end,
+				      mark, NULL)) {
 		clear_extent_bits(dirty_pages, start, end, mark);
 		while (start <= end) {
 			eb = find_extent_buffer(fs_info, start);
@@ -4746,8 +4741,6 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
 			free_extent_buffer_stale(eb);
 		}
 	}
-
-	return ret;
 }
 
 static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,

From 46d81ebd4a521797481ef193397c099eaf97e83f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 30 Jun 2023 16:03:48 +0100
Subject: [PATCH 1458/2047] btrfs: make btrfs_destroy_pinned_extent() return
 void

Currently btrfs_destroy_pinned_extent() is always returning 0 no matter
what and its caller ignores its return value (as well everything up in
the call chain). This is because this is called in the transaction abort
path, where we can't even deal with any errors since we are in a critical
situation already and cleanup of resources is done in a best effort
fashion.

So make btrfs_destroy_pinned_extent() return void.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1957bb1dfb02..f7b678b94666 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4743,12 +4743,11 @@ static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
 	}
 }
 
-static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
-				       struct extent_io_tree *unpin)
+static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
+					struct extent_io_tree *unpin)
 {
 	u64 start;
 	u64 end;
-	int ret;
 
 	while (1) {
 		struct extent_state *cached_state = NULL;
@@ -4760,9 +4759,8 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
 		 * the same extent range.
 		 */
 		mutex_lock(&fs_info->unused_bg_unpin_mutex);
-		ret = find_first_extent_bit(unpin, 0, &start, &end,
-					    EXTENT_DIRTY, &cached_state);
-		if (ret) {
+		if (find_first_extent_bit(unpin, 0, &start, &end,
+					  EXTENT_DIRTY, &cached_state)) {
 			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 			break;
 		}
@@ -4773,8 +4771,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
 		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 		cond_resched();
 	}
-
-	return 0;
 }
 
 static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)

From e5860f8207ed341502ecee80688cef93e367b615 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 30 Jun 2023 16:03:49 +0100
Subject: [PATCH 1459/2047] btrfs: make find_first_extent_bit() return a
 boolean

Currently find_first_extent_bit() returns a 0 if it found a range in the
given io tree and 1 if it didn't find any. There's no need to return any
errors, so make the return value a boolean and invert the logic to make
more sense: return true if it found a range and false if it didn't find
any range.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c      |  9 ++++-----
 fs/btrfs/dev-replace.c      |  6 +++---
 fs/btrfs/disk-io.c          | 10 +++++-----
 fs/btrfs/extent-io-tree.c   | 14 +++++++-------
 fs/btrfs/extent-io-tree.h   |  6 +++---
 fs/btrfs/extent-tree.c      |  5 ++---
 fs/btrfs/free-space-cache.c |  7 +++----
 fs/btrfs/relocation.c       | 10 ++++++----
 fs/btrfs/transaction.c      |  8 ++++----
 fs/btrfs/volumes.c          |  6 +++---
 10 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 7bd1eee9367b..9479c9e1507d 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -527,11 +527,10 @@ int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start,
 		*total_added_ret = 0;
 
 	while (start < end) {
-		ret = find_first_extent_bit(&info->excluded_extents, start,
-					    &extent_start, &extent_end,
-					    EXTENT_DIRTY | EXTENT_UPTODATE,
-					    NULL);
-		if (ret)
+		if (!find_first_extent_bit(&info->excluded_extents, start,
+					   &extent_start, &extent_end,
+					   EXTENT_DIRTY | EXTENT_UPTODATE,
+					   NULL))
 			break;
 
 		if (extent_start <= start) {
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5f10965fd72b..fff22ed55c42 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -792,9 +792,9 @@ static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev,
 
 	lockdep_assert_held(&srcdev->fs_info->chunk_mutex);
 
-	while (!find_first_extent_bit(&srcdev->alloc_state, start,
-				      &found_start, &found_end,
-				      CHUNK_ALLOCATED, &cached_state)) {
+	while (find_first_extent_bit(&srcdev->alloc_state, start,
+				     &found_start, &found_end,
+				     CHUNK_ALLOCATED, &cached_state)) {
 		ret = set_extent_bit(&tgtdev->alloc_state, found_start,
 				     found_end, CHUNK_ALLOCATED, NULL);
 		if (ret)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f7b678b94666..e7a096a8c0f6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4228,7 +4228,7 @@ static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
 		u64 found_end;
 
 		found = true;
-		while (!find_first_extent_bit(&trans->dirty_pages, cur,
+		while (find_first_extent_bit(&trans->dirty_pages, cur,
 			&found_start, &found_end, EXTENT_DIRTY, &cached)) {
 			dirty_bytes += found_end + 1 - found_start;
 			cur = found_end + 1;
@@ -4724,8 +4724,8 @@ static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
 	u64 start = 0;
 	u64 end;
 
-	while (!find_first_extent_bit(dirty_pages, start, &start, &end,
-				      mark, NULL)) {
+	while (find_first_extent_bit(dirty_pages, start, &start, &end,
+				     mark, NULL)) {
 		clear_extent_bits(dirty_pages, start, end, mark);
 		while (start <= end) {
 			eb = find_extent_buffer(fs_info, start);
@@ -4759,8 +4759,8 @@ static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
 		 * the same extent range.
 		 */
 		mutex_lock(&fs_info->unused_bg_unpin_mutex);
-		if (find_first_extent_bit(unpin, 0, &start, &end,
-					  EXTENT_DIRTY, &cached_state)) {
+		if (!find_first_extent_bit(unpin, 0, &start, &end,
+					   EXTENT_DIRTY, &cached_state)) {
 			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 			break;
 		}
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index a2315a4b8b75..ff8e117a1ace 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -831,15 +831,15 @@ static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *t
  *
  * Note: If there are multiple bits set in @bits, any of them will match.
  *
- * Return 0 if we find something, and update @start_ret and @end_ret.
- * Return 1 if we found nothing.
+ * Return true if we find something, and update @start_ret and @end_ret.
+ * Return false if we found nothing.
  */
-int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
-			  u64 *start_ret, u64 *end_ret, u32 bits,
-			  struct extent_state **cached_state)
+bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+			   u64 *start_ret, u64 *end_ret, u32 bits,
+			   struct extent_state **cached_state)
 {
 	struct extent_state *state;
-	int ret = 1;
+	bool ret = false;
 
 	spin_lock(&tree->lock);
 	if (cached_state && *cached_state) {
@@ -863,7 +863,7 @@ got_it:
 		cache_state_if_flags(state, cached_state, 0);
 		*start_ret = state->start;
 		*end_ret = state->end;
-		ret = 0;
+		ret = true;
 	}
 out:
 	spin_unlock(&tree->lock);
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index fbd3b275ab1c..28c23a23d121 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -182,9 +182,9 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 		       u32 bits, u32 clear_bits,
 		       struct extent_state **cached_state);
 
-int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
-			  u64 *start_ret, u64 *end_ret, u32 bits,
-			  struct extent_state **cached_state);
+bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+			   u64 *start_ret, u64 *end_ret, u32 bits,
+			   struct extent_state **cached_state);
 void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
 				 u64 *start_ret, u64 *end_ret, u32 bits);
 int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f396a9afa403..ab7b588baa7e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2751,9 +2751,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
 		struct extent_state *cached_state = NULL;
 
 		mutex_lock(&fs_info->unused_bg_unpin_mutex);
-		ret = find_first_extent_bit(unpin, 0, &start, &end,
-					    EXTENT_DIRTY, &cached_state);
-		if (ret) {
+		if (!find_first_extent_bit(unpin, 0, &start, &end,
+					   EXTENT_DIRTY, &cached_state)) {
 			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 			break;
 		}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 880800418075..bfc01352351c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1219,10 +1219,9 @@ static noinline_for_stack int write_pinned_extent_entries(
 	start = block_group->start;
 
 	while (start < block_group->start + block_group->length) {
-		ret = find_first_extent_bit(unpin, start,
-					    &extent_start, &extent_end,
-					    EXTENT_DIRTY, NULL);
-		if (ret)
+		if (!find_first_extent_bit(unpin, start,
+					   &extent_start, &extent_end,
+					   EXTENT_DIRTY, NULL))
 			return 0;
 
 		/* This pinned extent is out of our range */
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 46c3c1d57266..8ff0e9dcf85c 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3498,6 +3498,8 @@ int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
 
 	last = rc->block_group->start + rc->block_group->length;
 	while (1) {
+		bool block_found;
+
 		cond_resched();
 		if (rc->search_start >= last) {
 			ret = 1;
@@ -3548,11 +3550,11 @@ next:
 			goto next;
 		}
 
-		ret = find_first_extent_bit(&rc->processed_blocks,
-					    key.objectid, &start, &end,
-					    EXTENT_DIRTY, NULL);
+		block_found = find_first_extent_bit(&rc->processed_blocks,
+						    key.objectid, &start, &end,
+						    EXTENT_DIRTY, NULL);
 
-		if (ret == 0 && start <= key.objectid) {
+		if (block_found && start <= key.objectid) {
 			btrfs_release_path(path);
 			rc->search_start = end + 1;
 		} else {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 91b6c2fdc420..4bb9716ad24a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1060,8 +1060,8 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
 	u64 start = 0;
 	u64 end;
 
-	while (!find_first_extent_bit(dirty_pages, start, &start, &end,
-				      mark, &cached_state)) {
+	while (find_first_extent_bit(dirty_pages, start, &start, &end,
+				     mark, &cached_state)) {
 		bool wait_writeback = false;
 
 		err = convert_extent_bit(dirty_pages, start, end,
@@ -1114,8 +1114,8 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
 	u64 start = 0;
 	u64 end;
 
-	while (!find_first_extent_bit(dirty_pages, start, &start, &end,
-				      EXTENT_NEED_WAIT, &cached_state)) {
+	while (find_first_extent_bit(dirty_pages, start, &start, &end,
+				     EXTENT_NEED_WAIT, &cached_state)) {
 		/*
 		 * Ignore -ENOMEM errors returned by clear_extent_bit().
 		 * When committing the transaction, we'll remove any entries
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f47a9c42431e..6d3aa26186a3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1424,9 +1424,9 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
 
 	lockdep_assert_held(&device->fs_info->chunk_mutex);
 
-	if (!find_first_extent_bit(&device->alloc_state, *start,
-				   &physical_start, &physical_end,
-				   CHUNK_ALLOCATED, NULL)) {
+	if (find_first_extent_bit(&device->alloc_state, *start,
+				  &physical_start, &physical_end,
+				  CHUNK_ALLOCATED, NULL)) {
 
 		if (in_range(physical_start, *start, len) ||
 		    in_range(*start, physical_start,

From b1c8f527feb858927f50a136d13196f107c63631 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 30 Jun 2023 16:03:50 +0100
Subject: [PATCH 1460/2047] btrfs: open code trivial
 btrfs_add_excluded_extent()

The code for btrfs_add_excluded_extent() is trivial, it's just a
set_extent_bit() call. However it's defined in extent-tree.c but it is
only used (twice) in block-group.c. So open code it in block-group.c,
reducing the need to export a trivial function.

Also since the only caller btrfs_add_excluded_extent() is prepared to
deal with errors, stop ignoring errors from the set_extent_bit() call.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c | 10 ++++++----
 fs/btrfs/extent-tree.c |  9 ---------
 fs/btrfs/extent-tree.h |  2 --
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 9479c9e1507d..3cbd80b6f765 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2104,8 +2104,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
 	if (cache->start < BTRFS_SUPER_INFO_OFFSET) {
 		stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start;
 		cache->bytes_super += stripe_len;
-		ret = btrfs_add_excluded_extent(fs_info, cache->start,
-						stripe_len);
+		ret = set_extent_bit(&fs_info->excluded_extents, cache->start,
+				     cache->start + stripe_len - 1,
+				     EXTENT_UPTODATE, NULL);
 		if (ret)
 			return ret;
 	}
@@ -2131,8 +2132,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
 				cache->start + cache->length - logical[nr]);
 
 			cache->bytes_super += len;
-			ret = btrfs_add_excluded_extent(fs_info, logical[nr],
-							len);
+			ret = set_extent_bit(&fs_info->excluded_extents, logical[nr],
+					     logical[nr] + len - 1,
+					     EXTENT_UPTODATE, NULL);
 			if (ret) {
 				kfree(logical);
 				return ret;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ab7b588baa7e..1a0fb156f602 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -69,15 +69,6 @@ static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
 	return (cache->flags & bits) == bits;
 }
 
-int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
-			      u64 start, u64 num_bytes)
-{
-	u64 end = start + num_bytes - 1;
-	set_extent_bit(&fs_info->excluded_extents, start, end,
-		       EXTENT_UPTODATE, NULL);
-	return 0;
-}
-
 void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
 {
 	struct btrfs_fs_info *fs_info = cache->fs_info;
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index 429d5c570061..3b2f265f4653 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -96,8 +96,6 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
 				     enum btrfs_inline_ref_type is_data);
 u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
 
-int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
-			      u64 start, u64 num_bytes);
 void btrfs_free_excluded_extents(struct btrfs_block_group *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count);
 void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,

From 98b5a8fd2af7005ba5a1a5cd9079c98266be7499 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 30 Jun 2023 16:03:51 +0100
Subject: [PATCH 1461/2047] btrfs: move btrfs_free_excluded_extents() into
 block-group.c

The function btrfs_free_excluded_extents() is only used by block-group.c,
so move it into block-group.c and make it static. Also removed unnecessary
variables that are used only once.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c |  6 ++++++
 fs/btrfs/extent-tree.c | 12 ------------
 fs/btrfs/extent-tree.h |  1 -
 3 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 3cbd80b6f765..a127865f49f9 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -835,6 +835,12 @@ out:
 	return ret;
 }
 
+static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg)
+{
+	clear_extent_bits(&bg->fs_info->excluded_extents, bg->start,
+			  bg->start + bg->length - 1, EXTENT_UPTODATE);
+}
+
 static noinline void caching_thread(struct btrfs_work *work)
 {
 	struct btrfs_block_group *block_group;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1a0fb156f602..d674e421ea75 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -69,18 +69,6 @@ static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
 	return (cache->flags & bits) == bits;
 }
 
-void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
-{
-	struct btrfs_fs_info *fs_info = cache->fs_info;
-	u64 start, end;
-
-	start = cache->start;
-	end = start + cache->length - 1;
-
-	clear_extent_bits(&fs_info->excluded_extents, start, end,
-			  EXTENT_UPTODATE);
-}
-
 /* simple helper to search for an existing data extent at a given offset */
 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
 {
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index 3b2f265f4653..b9e148adcd28 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -96,7 +96,6 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
 				     enum btrfs_inline_ref_type is_data);
 u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
 
-void btrfs_free_excluded_extents(struct btrfs_block_group *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count);
 void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
 				  struct btrfs_delayed_ref_root *delayed_refs,

From 16c3a47648383efa625f771999182db9f34fb8d9 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 26 Jun 2023 17:55:25 +0800
Subject: [PATCH 1462/2047] btrfs: deprecate integrity checker feature

The integrity checker feature needs to be enabled at compile time
(BTRFS_FS_CHECK_INTEGRITY) and then enabled by mount options check_int*.

Although it provides some unique features which can not be provided by
any other sanity checks like tree-checker, it does not only have high
CPU and memory overhead, but is also a maintenance burden.

For example, it's the only caller of btrfs_map_block() with
@need_raid_map = 0.

Considering most btrfs developers are not even testing this feature, I'm
here to propose deprecation of this feature.

For now only warning messages will be printed, the feature itself would
still work.

Removal time has been set to 6.7 release.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/Kconfig | 4 +++-
 fs/btrfs/super.c | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 66fa9ab2c046..3282adc84d52 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -49,9 +49,11 @@ config BTRFS_FS_POSIX_ACL
 	  If you don't know what Access Control Lists are, say N
 
 config BTRFS_FS_CHECK_INTEGRITY
-	bool "Btrfs with integrity check tool compiled in (DANGEROUS)"
+	bool "Btrfs with integrity check tool compiled in (DEPRECATED)"
 	depends on BTRFS_FS
 	help
+	  This feature has been deprecated and will be removed in 6.7.
+
 	  Adds code that examines all block write requests (including
 	  writes of the super block). The goal is to verify that the
 	  state of the filesystem on disk is always consistent, i.e.,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f1dd172d8d5b..cffdd6f7f8e8 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -709,12 +709,16 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			break;
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
 		case Opt_check_integrity_including_extent_data:
+			btrfs_warn(info,
+	"integrity checker is deprecated and will be removed in 6.7");
 			btrfs_info(info,
 				   "enabling check integrity including extent data");
 			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY_DATA);
 			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
 			break;
 		case Opt_check_integrity:
+			btrfs_warn(info,
+	"integrity checker is deprecated and will be removed in 6.7");
 			btrfs_info(info, "enabling check integrity");
 			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
 			break;
@@ -727,6 +731,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 				goto out;
 			}
 			info->check_integrity_print_mask = intarg;
+			btrfs_warn(info,
+	"integrity checker is deprecated and will be removed in 6.7");
 			btrfs_info(info, "check_integrity_print_mask 0x%x",
 				   info->check_integrity_print_mask);
 			break;

From 98efb4eb310d0d72e663924adc7b5b6e14813f9d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 13 Jul 2023 16:10:26 +0200
Subject: [PATCH 1463/2047] btrfs: use helper sizeof_field in struct accessors

There's a helper for obtaining size of a struct member, we can use it
instead of open coding the pointer magic.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/accessors.h | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
index ceadfc5d6c66..8cfc8214109c 100644
--- a/fs/btrfs/accessors.h
+++ b/fs/btrfs/accessors.h
@@ -3,6 +3,8 @@
 #ifndef BTRFS_ACCESSORS_H
 #define BTRFS_ACCESSORS_H
 
+#include <linux/stddef.h>
+
 struct btrfs_map_token {
 	struct extent_buffer *eb;
 	char *kaddr;
@@ -34,13 +36,13 @@ static inline void put_unaligned_le8(u8 val, void *p)
 	read_extent_buffer(eb, (char *)(result),			\
 			   ((unsigned long)(ptr)) +			\
 			    offsetof(type, member),			\
-			   sizeof(((type *)0)->member)))
+			    sizeof_field(type, member)))
 
 #define write_eb_member(eb, ptr, type, member, result) (\
 	write_extent_buffer(eb, (char *)(result),			\
 			   ((unsigned long)(ptr)) +			\
 			    offsetof(type, member),			\
-			   sizeof(((type *)0)->member)))
+			    sizeof_field(type, member)))
 
 #define DECLARE_BTRFS_SETGET_BITS(bits)					\
 u##bits btrfs_get_token_##bits(struct btrfs_map_token *token,		\
@@ -62,25 +64,25 @@ DECLARE_BTRFS_SETGET_BITS(64)
 static inline u##bits btrfs_##name(const struct extent_buffer *eb,	\
 				   const type *s)			\
 {									\
-	static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);	\
+	static_assert(sizeof(u##bits) == sizeof_field(type, member));	\
 	return btrfs_get_##bits(eb, s, offsetof(type, member));		\
 }									\
 static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
 				    u##bits val)			\
 {									\
-	static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);	\
+	static_assert(sizeof(u##bits) == sizeof_field(type, member));	\
 	btrfs_set_##bits(eb, s, offsetof(type, member), val);		\
 }									\
 static inline u##bits btrfs_token_##name(struct btrfs_map_token *token,	\
 					 const type *s)			\
 {									\
-	static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);	\
+	static_assert(sizeof(u##bits) == sizeof_field(type, member));	\
 	return btrfs_get_token_##bits(token, s, offsetof(type, member));\
 }									\
 static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
 					  type *s, u##bits val)		\
 {									\
-	static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);	\
+	static_assert(sizeof(u##bits) == sizeof_field(type, member));	\
 	btrfs_set_token_##bits(token, s, offsetof(type, member), val);	\
 }
 
@@ -111,17 +113,14 @@ static inline void btrfs_set_##name(type *s, u##bits val)		\
 static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb,
 					   struct btrfs_dev_item *s)
 {
-	static_assert(sizeof(u64) ==
-		      sizeof(((struct btrfs_dev_item *)0))->total_bytes);
-	return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
-					    total_bytes));
+	static_assert(sizeof(u64) == sizeof_field(struct btrfs_dev_item, total_bytes));
+	return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes));
 }
 static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb,
 						struct btrfs_dev_item *s,
 						u64 val)
 {
-	static_assert(sizeof(u64) ==
-		      sizeof(((struct btrfs_dev_item *)0))->total_bytes);
+	static_assert(sizeof(u64) == sizeof_field(struct btrfs_dev_item, total_bytes));
 	WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize));
 	btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val);
 }

From 7b365a2a3d31cc523ad24f2147285c3707297ce8 Mon Sep 17 00:00:00 2001
From: Minjie Du <duminjie@vivo.com>
Date: Mon, 17 Jul 2023 15:16:22 +0800
Subject: [PATCH 1464/2047] btrfs: use folio_next_index() helper in
 extent_write_cache_pages

Simplify code pattern of 'folio->index + folio_nr_pages(folio)' by using
the existing helper folio_next_index().

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Minjie Du <duminjie@vivo.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 90ad3006ef3a..91fba78c9996 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2150,7 +2150,7 @@ retry:
 		for (i = 0; i < nr_folios; i++) {
 			struct folio *folio = fbatch.folios[i];
 
-			done_index = folio->index + folio_nr_pages(folio);
+			done_index = folio_next_index(folio);
 			/*
 			 * At this point we hold neither the i_pages lock nor
 			 * the page lock: the page may be truncated or

From 17353a3447927455b8242aa371488b7af01c5e14 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 14 Jun 2023 14:39:55 +0800
Subject: [PATCH 1465/2047] btrfs: scrub: remove unused btrfs_path in
 scrub_simple_mirror()

The @path in scrub_simple_mirror() is no longer utilized after commit
e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe
infrastructure").

Before that commit, we call find_first_extent_item() directly, which
needs a path and that path can be reused.  But after that switch commit,
the extent search is done inside queue_scrub_stripe(), which will no
longer accept a path from outside.

So the @path variable can be safely removed.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ remove the stale comment ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index fe1c5bc7f601..8381174bda15 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1957,16 +1957,12 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
 {
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
 	const u64 logical_end = logical_start + logical_length;
-	/* An artificial limit, inherit from old scrub behavior */
-	struct btrfs_path path = { 0 };
 	u64 cur_logical = logical_start;
 	int ret;
 
 	/* The range must be inside the bg */
 	ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length);
 
-	path.search_commit_root = 1;
-	path.skip_locking = 1;
 	/* Go through each extent items inside the logical range */
 	while (cur_logical < logical_end) {
 		u64 cur_physical = physical + cur_logical - logical_start;
@@ -2010,7 +2006,6 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
 		/* Don't hold CPU for too long time */
 		cond_resched();
 	}
-	btrfs_release_path(&path);
 	return ret;
 }
 

From b2cc440058baf3f4783a687a2caa35b31a1da553 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 27 Jun 2023 08:13:24 +0200
Subject: [PATCH 1466/2047] btrfs: simplify the no-bioc fast path condition in
 btrfs_map_block

nr_alloc_stripes can't be one if we are writing to a replacement device,
as it is incremented for that case right above.  Remove the duplicate
checks.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6d3aa26186a3..8af0e54099c2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6432,9 +6432,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 	 * I/O context structure.
 	 */
 	if (smap && num_alloc_stripes == 1 &&
-	    !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) &&
-	    (op == BTRFS_MAP_READ || !dev_replace_is_ongoing ||
-	     !dev_replace->tgtdev)) {
+	    !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
 		set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
 		if (mirror_num_ret)
 			*mirror_num_ret = mirror_num;

From 52ea5bfbfa6d5432ed46e0a18b9e5ca4e4f91852 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Sun, 9 Jul 2023 15:08:18 +0800
Subject: [PATCH 1467/2047] btrfs: move eb subpage preallocation out of the
 loop

Initially we preallocate btrfs_subpage structure in the main loop of
alloc_extent_buffer().

But later commit fbca46eb46ec ("btrfs: make nodesize >= PAGE_SIZE case
to reuse the non-subpage routine") has made sure we only go subpage
routine if our nodesize is smaller than PAGE_SIZE.

This means for that case, we only need to allocate the subpage structure
once anyway.

So this patch would make the preallocation out of the main loop.  This
would slightly reduce the workload when we hold the page lock, and make
code a little easier to read.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 41 ++++++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 91fba78c9996..662d991fb45f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3559,6 +3559,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	struct extent_buffer *exists = NULL;
 	struct page *p;
 	struct address_space *mapping = fs_info->btree_inode->i_mapping;
+	struct btrfs_subpage *prealloc = NULL;
 	u64 lockdep_owner = owner_root;
 	int uptodate = 1;
 	int ret;
@@ -3595,36 +3596,30 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	btrfs_set_buffer_lockdep_class(lockdep_owner, eb, level);
 
 	num_pages = num_extent_pages(eb);
-	for (i = 0; i < num_pages; i++, index++) {
-		struct btrfs_subpage *prealloc = NULL;
 
+	/*
+	 * Preallocate page->private for subpage case, so that we won't
+	 * allocate memory with private_lock nor page lock hold.
+	 *
+	 * The memory will be freed by attach_extent_buffer_page() or freed
+	 * manually if we exit earlier.
+	 */
+	if (fs_info->nodesize < PAGE_SIZE) {
+		prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
+		if (IS_ERR(prealloc)) {
+			exists = ERR_CAST(prealloc);
+			goto free_eb;
+		}
+	}
+
+	for (i = 0; i < num_pages; i++, index++) {
 		p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
 		if (!p) {
 			exists = ERR_PTR(-ENOMEM);
+			btrfs_free_subpage(prealloc);
 			goto free_eb;
 		}
 
-		/*
-		 * Preallocate page->private for subpage case, so that we won't
-		 * allocate memory with private_lock hold.  The memory will be
-		 * freed by attach_extent_buffer_page() or freed manually if
-		 * we exit earlier.
-		 *
-		 * Although we have ensured one subpage eb can only have one
-		 * page, but it may change in the future for 16K page size
-		 * support, so we still preallocate the memory in the loop.
-		 */
-		if (fs_info->nodesize < PAGE_SIZE) {
-			prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
-			if (IS_ERR(prealloc)) {
-				ret = PTR_ERR(prealloc);
-				unlock_page(p);
-				put_page(p);
-				exists = ERR_PTR(ret);
-				goto free_eb;
-			}
-		}
-
 		spin_lock(&mapping->private_lock);
 		exists = grab_extent_buffer(fs_info, p);
 		if (exists) {

From 75d305c55b130bae5077ec262440240fec4e6281 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.de>
Date: Fri, 14 Jul 2023 10:18:19 -0500
Subject: [PATCH 1468/2047] btrfs: remove duplicate free_async_extent_pages()
 on reservation error

While performing compressed writes, if the extent reservation fails, the
async extent pages are first freed in the error check for return value
ret, and then again at out_free label.

Remove the first call to free_async_extent_pages().

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index aa090b0b5d29..5508597be614 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1217,7 +1217,6 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
 				   async_extent->compressed_size,
 				   0, *alloc_hint, &ins, 1, 1);
 	if (ret) {
-		free_async_extent_pages(async_extent);
 		/*
 		 * Here we used to try again by going back to non-compressed
 		 * path for ENOSPC.  But we can't reserve space even for

From e28b02118b94e42be3355458a2406c6861e2dd32 Mon Sep 17 00:00:00 2001
From: Boris Burkov <boris@bur.io>
Date: Fri, 21 Jul 2023 09:02:06 -0700
Subject: [PATCH 1469/2047] btrfs: free qgroup rsv on io failure

If we do a write whose bio suffers an error, we will never reclaim the
qgroup reserved space for it. We allocate the space in the write_iter
codepath, then release the reservation as we allocate the ordered
extent, but we only create a delayed ref if the ordered extent finishes.
If it has an error, we simply leak the rsv. This is apparent in running
any error injecting (dmerror) fstests like btrfs/146 or btrfs/160. Such
tests fail due to dmesg on umount complaining about the leaked qgroup
data space.

When we clean up other aspects of space on failed ordered_extents, also
free the qgroup rsv.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
CC: stable@vger.kernel.org # 5.10+
Signed-off-by: Boris Burkov <boris@bur.io>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5508597be614..8e53df3516c2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3358,6 +3358,13 @@ out:
 			btrfs_free_reserved_extent(fs_info,
 					ordered_extent->disk_bytenr,
 					ordered_extent->disk_num_bytes, 1);
+			/*
+			 * Actually free the qgroup rsv which was released when
+			 * the ordered extent was created.
+			 */
+			btrfs_qgroup_free_refroot(fs_info, inode->root->root_key.objectid,
+						  ordered_extent->qgroup_rsv,
+						  BTRFS_QGROUP_RSV_DATA);
 		}
 	}
 

From a6496849671a5bc9218ecec25a983253b34351b1 Mon Sep 17 00:00:00 2001
From: Boris Burkov <boris@bur.io>
Date: Fri, 21 Jul 2023 09:02:07 -0700
Subject: [PATCH 1470/2047] btrfs: fix start transaction qgroup rsv double free

btrfs_start_transaction reserves metadata space of the PERTRANS type
before it identifies a transaction to start/join. This allows flushing
when reserving that space without a deadlock. However, it results in a
race which temporarily breaks qgroup rsv accounting.

T1                                              T2
start_transaction
do_stuff
                                            start_transaction
                                                qgroup_reserve_meta_pertrans
commit_transaction
    qgroup_free_meta_all_pertrans
                                            hit an error starting txn
                                            goto reserve_fail
                                            qgroup_free_meta_pertrans (already freed!)

The basic issue is that there is nothing preventing another commit from
committing before start_transaction finishes (in fact sometimes we
intentionally wait for it) so any error path that frees the reserve is
at risk of this race.

While this exact space was getting freed anyway, and it's not a huge
deal to double free it (just a warning, the free code catches this), it
can result in incorrectly freeing some other pertrans reservation in
this same reservation, which could then lead to spuriously granting
reservations we might not have the space for. Therefore, I do believe it
is worth fixing.

To fix it, use the existing prealloc->pertrans conversion mechanism.
When we first reserve the space, we reserve prealloc space and only when
we are sure we have a transaction do we convert it to pertrans. This way
any racing commits do not blow away our reservation, but we still get a
pertrans reservation that is freed when _this_ transaction gets committed.

This issue can be reproduced by running generic/269 with either qgroups
or squotas enabled via mkfs on the scratch device.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
CC: stable@vger.kernel.org # 5.10+
Signed-off-by: Boris Burkov <boris@bur.io>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/transaction.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4bb9716ad24a..815f61d6b506 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -591,8 +591,13 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
 		u64 delayed_refs_bytes = 0;
 
 		qgroup_reserved = num_items * fs_info->nodesize;
-		ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
-				enforce_qgroups);
+		/*
+		 * Use prealloc for now, as there might be a currently running
+		 * transaction that could free this reserved space prematurely
+		 * by committing.
+		 */
+		ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserved,
+							 enforce_qgroups, false);
 		if (ret)
 			return ERR_PTR(ret);
 
@@ -705,6 +710,14 @@ again:
 		h->reloc_reserved = reloc_reserved;
 	}
 
+	/*
+	 * Now that we have found a transaction to be a part of, convert the
+	 * qgroup reservation from prealloc to pertrans. A different transaction
+	 * can't race in and free our pertrans out from under us.
+	 */
+	if (qgroup_reserved)
+		btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+
 got_it:
 	if (!current->journal_info)
 		current->journal_info = h;
@@ -752,7 +765,7 @@ alloc_fail:
 		btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
 					num_bytes, NULL);
 reserve_fail:
-	btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
+	btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
 	return ERR_PTR(ret);
 }
 

From ba9145add52422779935f6d1b645a31a50c02af2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:22 +0200
Subject: [PATCH 1471/2047] btrfs: pass a flags argument to cow_file_range

The int used as bool unlock is not a very good way to describe the
behavior, and the next patch will have to add another behavior modifier.
We'll do that by two bool parameters instead of adding bit flags.  Now
specifies that the pages should always be kept locked.  This is the
inverse of the old unlock argument.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
[ switch flags to bool ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 48 +++++++++++++++++++++---------------------------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8e53df3516c2..820444fbb564 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -124,11 +124,12 @@ static struct kmem_cache *btrfs_inode_cachep;
 
 static int btrfs_setsize(struct inode *inode, struct iattr *attr);
 static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback);
+
 static noinline int cow_file_range(struct btrfs_inode *inode,
 				   struct page *locked_page,
 				   u64 start, u64 end, int *page_started,
-				   unsigned long *nr_written, int unlock,
-				   u64 *done_offset);
+				   unsigned long *nr_written, u64 *done_offset,
+				   bool keep_locked);
 static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
 				       u64 len, u64 orig_start, u64 block_start,
 				       u64 block_len, u64 orig_block_len,
@@ -1148,7 +1149,7 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
 	 * can directly submit them without interruption.
 	 */
 	ret = cow_file_range(inode, locked_page, start, end, &page_started,
-			     &nr_written, 0, NULL);
+			     &nr_written, NULL, true);
 	/* Inline extent inserted, page gets unlocked and everything is done */
 	if (page_started)
 		return 0;
@@ -1361,25 +1362,18 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
  * locked_page is the page that writepage had locked already.  We use
  * it to make sure we don't do extra locks or unlocks.
  *
- * *page_started is set to one if we unlock locked_page and do everything
- * required to start IO on it.  It may be clean and already done with
- * IO when we return.
+ * When this function fails, it unlocks all pages except @locked_page.
  *
- * When unlock == 1, we unlock the pages in successfully allocated regions.
- * When unlock == 0, we leave them locked for writing them out.
+ * When this function successfully creates an inline extent, it sets page_started
+ * to 1 and unlocks all pages including locked_page and starts I/O on them.
+ * (In reality inline extents are limited to a single page, so locked_page is
+ * the only page handled anyway).
  *
- * However, we unlock all the pages except @locked_page in case of failure.
+ * When this function succeed and creates a normal extent, the page locking
+ * status depends on the passed in flags:
  *
- * In summary, page locking state will be as follow:
- *
- * - page_started == 1 (return value)
- *     - All the pages are unlocked. IO is started.
- *     - Note that this can happen only on success
- * - unlock == 1
- *     - All the pages except @locked_page are unlocked in any case
- * - unlock == 0
- *     - On success, all the pages are locked for writing out them
- *     - On failure, all the pages except @locked_page are unlocked
+ * - If @keep_locked is set, all pages are kept locked.
+ * - Else all pages except for @locked_page are unlocked.
  *
  * When a failure happens in the second or later iteration of the
  * while-loop, the ordered extents created in previous iterations are kept
@@ -1390,8 +1384,8 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
 static noinline int cow_file_range(struct btrfs_inode *inode,
 				   struct page *locked_page,
 				   u64 start, u64 end, int *page_started,
-				   unsigned long *nr_written, int unlock,
-				   u64 *done_offset)
+				   unsigned long *nr_written, u64 *done_offset,
+				   bool keep_locked)
 {
 	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1557,7 +1551,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 		 * Do set the Ordered (Private2) bit so we know this page was
 		 * properly setup for writepage.
 		 */
-		page_ops = unlock ? PAGE_UNLOCK : 0;
+		page_ops = (keep_locked ? 0 : PAGE_UNLOCK);
 		page_ops |= PAGE_SET_ORDERED;
 
 		extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
@@ -1626,10 +1620,10 @@ out_unlock:
 	 * EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV are handled by the cleanup
 	 * function.
 	 *
-	 * However, in case of unlock == 0, we still need to unlock the pages
+	 * However, in case of @keep_locked, we still need to unlock the pages
 	 * (except @locked_page) to ensure all the pages are unlocked.
 	 */
-	if (!unlock && orig_start < start) {
+	if (keep_locked && orig_start < start) {
 		if (!locked_page)
 			mapping_set_error(inode->vfs_inode.i_mapping, ret);
 		extent_clear_unlock_delalloc(inode, orig_start, start - 1,
@@ -1835,7 +1829,7 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
 
 	while (start <= end) {
 		ret = cow_file_range(inode, locked_page, start, end, page_started,
-				     nr_written, 0, &done_offset);
+				     nr_written, &done_offset, true);
 		if (ret && ret != -EAGAIN)
 			return ret;
 
@@ -1955,7 +1949,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
 	}
 
 	return cow_file_range(inode, locked_page, start, end, page_started,
-			      nr_written, 1, NULL);
+			      nr_written, NULL, false);
 }
 
 struct can_nocow_file_extent_args {
@@ -2432,7 +2426,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
 					 page_started, nr_written, wbc);
 	else
 		ret = cow_file_range(inode, locked_page, start, end,
-				     page_started, nr_written, 1, NULL);
+				     page_started, nr_written, NULL, false);
 
 out:
 	ASSERT(ret <= 0);

From 53ffb30a784d30db4d4cd074b4da46fa2472ab60 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:23 +0200
Subject: [PATCH 1472/2047] btrfs: don't create inline extents in
 fallback_to_cow

For NOCOW files, run_delalloc_nocow can still fall back to COW
allocations when required and calls to fallback_to_cow helper for
that.  For such an allocation we can have multiple ordered_extents
for existing extents that NOCOW overwrites and new allocations that
fallback_to_cow creates.  If one of the new extents is an inline
extent, the writepages could would have to avoid normal page writeback
for them as indicated by the page_started return argument, which
run_delalloc_nocow can't return.   Fix this by never creating inline
extents from fallback_to_cow.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 42 +++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 820444fbb564..aa1cda22a23a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -129,7 +129,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 				   struct page *locked_page,
 				   u64 start, u64 end, int *page_started,
 				   unsigned long *nr_written, u64 *done_offset,
-				   bool keep_locked);
+				   bool keep_locked, bool no_inline);
 static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
 				       u64 len, u64 orig_start, u64 block_start,
 				       u64 block_len, u64 orig_block_len,
@@ -1149,7 +1149,7 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
 	 * can directly submit them without interruption.
 	 */
 	ret = cow_file_range(inode, locked_page, start, end, &page_started,
-			     &nr_written, NULL, true);
+			     &nr_written, NULL, true, false);
 	/* Inline extent inserted, page gets unlocked and everything is done */
 	if (page_started)
 		return 0;
@@ -1385,7 +1385,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 				   struct page *locked_page,
 				   u64 start, u64 end, int *page_started,
 				   unsigned long *nr_written, u64 *done_offset,
-				   bool keep_locked)
+				   bool keep_locked, bool no_inline)
 {
 	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1424,7 +1424,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 	 * This means we can trigger inline extent even if we didn't want to.
 	 * So here we skip inline extent creation completely.
 	 */
-	if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
+	if (start == 0 && fs_info->sectorsize == PAGE_SIZE && !no_inline) {
 		u64 actual_end = min_t(u64, i_size_read(&inode->vfs_inode),
 				       end + 1);
 
@@ -1829,7 +1829,7 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
 
 	while (start <= end) {
 		ret = cow_file_range(inode, locked_page, start, end, page_started,
-				     nr_written, &done_offset, true);
+				     nr_written, &done_offset, true, false);
 		if (ret && ret != -EAGAIN)
 			return ret;
 
@@ -1887,15 +1887,17 @@ static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
 }
 
 static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
-			   const u64 start, const u64 end,
-			   int *page_started, unsigned long *nr_written)
+			   const u64 start, const u64 end)
 {
 	const bool is_space_ino = btrfs_is_free_space_inode(inode);
 	const bool is_reloc_ino = btrfs_is_data_reloc_root(inode->root);
 	const u64 range_bytes = end + 1 - start;
 	struct extent_io_tree *io_tree = &inode->io_tree;
+	int page_started = 0;
+	unsigned long nr_written;
 	u64 range_start = start;
 	u64 count;
+	int ret;
 
 	/*
 	 * If EXTENT_NORESERVE is set it means that when the buffered write was
@@ -1948,8 +1950,15 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
 					 NULL);
 	}
 
-	return cow_file_range(inode, locked_page, start, end, page_started,
-			      nr_written, NULL, false);
+	/*
+	 * Don't try to create inline extents, as a mix of inline extent that
+	 * is written out and unlocked directly and a normal NOCOW extent
+	 * doesn't work.
+	 */
+	ret = cow_file_range(inode, locked_page, start, end, &page_started,
+			     &nr_written, NULL, false, true);
+	ASSERT(!page_started);
+	return ret;
 }
 
 struct can_nocow_file_extent_args {
@@ -2098,9 +2107,7 @@ static int can_nocow_file_extent(struct btrfs_path *path,
  */
 static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
 				       struct page *locked_page,
-				       const u64 start, const u64 end,
-				       int *page_started,
-				       unsigned long *nr_written)
+				       const u64 start, const u64 end)
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_root *root = inode->root;
@@ -2268,8 +2275,7 @@ out_check:
 		 */
 		if (cow_start != (u64)-1) {
 			ret = fallback_to_cow(inode, locked_page,
-					      cow_start, found_key.offset - 1,
-					      page_started, nr_written);
+					      cow_start, found_key.offset - 1);
 			if (ret)
 				goto error;
 			cow_start = (u64)-1;
@@ -2350,8 +2356,7 @@ out_check:
 
 	if (cow_start != (u64)-1) {
 		cur_offset = end;
-		ret = fallback_to_cow(inode, locked_page, cow_start, end,
-				      page_started, nr_written);
+		ret = fallback_to_cow(inode, locked_page, cow_start, end);
 		if (ret)
 			goto error;
 	}
@@ -2410,8 +2415,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
 		 * preallocated inodes.
 		 */
 		ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
-		ret = run_delalloc_nocow(inode, locked_page, start, end,
-					 page_started, nr_written);
+		ret = run_delalloc_nocow(inode, locked_page, start, end);
 		goto out;
 	}
 
@@ -2426,7 +2430,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
 					 page_started, nr_written, wbc);
 	else
 		ret = cow_file_range(inode, locked_page, start, end,
-				     page_started, nr_written, NULL, false);
+				     page_started, nr_written, NULL, false, false);
 
 out:
 	ASSERT(ret <= 0);

From ef4e88e6a55babd825aed2940a72a0562f49ca7e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:24 +0200
Subject: [PATCH 1473/2047] btrfs: split page locking out of
 __process_pages_contig

There is a lot of complexity in __process_pages_contig to deal with the
PAGE_LOCK case that can return an error unlike all the other actions.

Open code the page iteration for page locking in lock_delalloc_pages and
remove all the now unused code from __process_pages_contig.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 151 +++++++++++++++++--------------------------
 fs/btrfs/extent_io.h |   1 -
 2 files changed, 59 insertions(+), 93 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 662d991fb45f..7925c8ba3571 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -197,18 +197,9 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
 	}
 }
 
-/*
- * Process one page for __process_pages_contig().
- *
- * Return >0 if we hit @page == @locked_page.
- * Return 0 if we updated the page status.
- * Return -EGAIN if the we need to try again.
- * (For PAGE_LOCK case but got dirty page or page not belong to mapping)
- */
-static int process_one_page(struct btrfs_fs_info *fs_info,
-			    struct address_space *mapping,
-			    struct page *page, struct page *locked_page,
-			    unsigned long page_ops, u64 start, u64 end)
+static void process_one_page(struct btrfs_fs_info *fs_info,
+			     struct page *page, struct page *locked_page,
+			     unsigned long page_ops, u64 start, u64 end)
 {
 	u32 len;
 
@@ -224,94 +215,36 @@ static int process_one_page(struct btrfs_fs_info *fs_info,
 	if (page_ops & PAGE_END_WRITEBACK)
 		btrfs_page_clamp_clear_writeback(fs_info, page, start, len);
 
-	if (page == locked_page)
-		return 1;
-
-	if (page_ops & PAGE_LOCK) {
-		int ret;
-
-		ret = btrfs_page_start_writer_lock(fs_info, page, start, len);
-		if (ret)
-			return ret;
-		if (!PageDirty(page) || page->mapping != mapping) {
-			btrfs_page_end_writer_lock(fs_info, page, start, len);
-			return -EAGAIN;
-		}
-	}
-	if (page_ops & PAGE_UNLOCK)
+	if (page != locked_page && (page_ops & PAGE_UNLOCK))
 		btrfs_page_end_writer_lock(fs_info, page, start, len);
-	return 0;
 }
 
-static int __process_pages_contig(struct address_space *mapping,
-				  struct page *locked_page,
-				  u64 start, u64 end, unsigned long page_ops,
-				  u64 *processed_end)
+static void __process_pages_contig(struct address_space *mapping,
+				   struct page *locked_page, u64 start, u64 end,
+				   unsigned long page_ops)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
 	pgoff_t start_index = start >> PAGE_SHIFT;
 	pgoff_t end_index = end >> PAGE_SHIFT;
 	pgoff_t index = start_index;
-	unsigned long pages_processed = 0;
 	struct folio_batch fbatch;
-	int err = 0;
 	int i;
 
-	if (page_ops & PAGE_LOCK) {
-		ASSERT(page_ops == PAGE_LOCK);
-		ASSERT(processed_end && *processed_end == start);
-	}
-
 	folio_batch_init(&fbatch);
 	while (index <= end_index) {
 		int found_folios;
 
 		found_folios = filemap_get_folios_contig(mapping, &index,
 				end_index, &fbatch);
-
-		if (found_folios == 0) {
-			/*
-			 * Only if we're going to lock these pages, we can find
-			 * nothing at @index.
-			 */
-			ASSERT(page_ops & PAGE_LOCK);
-			err = -EAGAIN;
-			goto out;
-		}
-
 		for (i = 0; i < found_folios; i++) {
-			int process_ret;
 			struct folio *folio = fbatch.folios[i];
-			process_ret = process_one_page(fs_info, mapping,
-					&folio->page, locked_page, page_ops,
-					start, end);
-			if (process_ret < 0) {
-				err = -EAGAIN;
-				folio_batch_release(&fbatch);
-				goto out;
-			}
-			pages_processed += folio_nr_pages(folio);
+
+			process_one_page(fs_info, &folio->page, locked_page,
+					 page_ops, start, end);
 		}
 		folio_batch_release(&fbatch);
 		cond_resched();
 	}
-out:
-	if (err && processed_end) {
-		/*
-		 * Update @processed_end. I know this is awful since it has
-		 * two different return value patterns (inclusive vs exclusive).
-		 *
-		 * But the exclusive pattern is necessary if @start is 0, or we
-		 * underflow and check against processed_end won't work as
-		 * expected.
-		 */
-		if (pages_processed)
-			*processed_end = min(end,
-			((u64)(start_index + pages_processed) << PAGE_SHIFT) - 1);
-		else
-			*processed_end = start;
-	}
-	return err;
 }
 
 static noinline void __unlock_for_delalloc(struct inode *inode,
@@ -326,29 +259,63 @@ static noinline void __unlock_for_delalloc(struct inode *inode,
 		return;
 
 	__process_pages_contig(inode->i_mapping, locked_page, start, end,
-			       PAGE_UNLOCK, NULL);
+			       PAGE_UNLOCK);
 }
 
 static noinline int lock_delalloc_pages(struct inode *inode,
 					struct page *locked_page,
-					u64 delalloc_start,
-					u64 delalloc_end)
+					u64 start,
+					u64 end)
 {
-	unsigned long index = delalloc_start >> PAGE_SHIFT;
-	unsigned long end_index = delalloc_end >> PAGE_SHIFT;
-	u64 processed_end = delalloc_start;
-	int ret;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct address_space *mapping = inode->i_mapping;
+	pgoff_t start_index = start >> PAGE_SHIFT;
+	pgoff_t end_index = end >> PAGE_SHIFT;
+	pgoff_t index = start_index;
+	u64 processed_end = start;
+	struct folio_batch fbatch;
 
-	ASSERT(locked_page);
 	if (index == locked_page->index && index == end_index)
 		return 0;
 
-	ret = __process_pages_contig(inode->i_mapping, locked_page, delalloc_start,
-				     delalloc_end, PAGE_LOCK, &processed_end);
-	if (ret == -EAGAIN && processed_end > delalloc_start)
-		__unlock_for_delalloc(inode, locked_page, delalloc_start,
-				      processed_end);
-	return ret;
+	folio_batch_init(&fbatch);
+	while (index <= end_index) {
+		unsigned int found_folios, i;
+
+		found_folios = filemap_get_folios_contig(mapping, &index,
+				end_index, &fbatch);
+		if (found_folios == 0)
+			goto out;
+
+		for (i = 0; i < found_folios; i++) {
+			struct page *page = &fbatch.folios[i]->page;
+			u32 len = end + 1 - start;
+
+			if (page == locked_page)
+				continue;
+
+			if (btrfs_page_start_writer_lock(fs_info, page, start,
+							 len))
+				goto out;
+
+			if (!PageDirty(page) || page->mapping != mapping) {
+				btrfs_page_end_writer_lock(fs_info, page, start,
+							   len);
+				goto out;
+			}
+
+			processed_end = page_offset(page) + PAGE_SIZE - 1;
+		}
+		folio_batch_release(&fbatch);
+		cond_resched();
+	}
+
+	return 0;
+out:
+	folio_batch_release(&fbatch);
+	if (processed_end > start)
+		__unlock_for_delalloc(inode, locked_page, start, processed_end);
+	return -EAGAIN;
 }
 
 /*
@@ -467,7 +434,7 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
 	clear_extent_bit(&inode->io_tree, start, end, clear_bits, NULL);
 
 	__process_pages_contig(inode->vfs_inode.i_mapping, locked_page,
-			       start, end, page_ops, NULL);
+			       start, end, page_ops);
 }
 
 static bool btrfs_verify_page(struct page *page, u64 start)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index c5fae3a7d911..285754154fdc 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -40,7 +40,6 @@ enum {
 	ENUM_BIT(PAGE_START_WRITEBACK),
 	ENUM_BIT(PAGE_END_WRITEBACK),
 	ENUM_BIT(PAGE_SET_ORDERED),
-	ENUM_BIT(PAGE_LOCK),
 };
 
 /*

From 6648cedd86135db197410e56b5372b2945f2b311 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:25 +0200
Subject: [PATCH 1474/2047] btrfs: remove btrfs_writepage_endio_finish_ordered

btrfs_writepage_endio_finish_ordered is a small wrapper around
btrfs_mark_ordered_io_finished that just changs the argument passing
slightly, and adds a tracepoint.

Move the tracpoint to btrfs_mark_ordered_io_finished, which means
it now also covers the error handling in btrfs_cleanup_ordered_extent
and switch all callers to just call btrfs_mark_ordered_io_finished
directly.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h  |  3 ---
 fs/btrfs/extent_io.c    | 17 ++++++++---------
 fs/btrfs/inode.c        |  9 ---------
 fs/btrfs/ordered-data.c |  4 ++++
 4 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d47a927b3504..90e60ad9db62 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -501,9 +501,6 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
 			     u64 start, u64 end, int *page_started,
 			     unsigned long *nr_written, struct writeback_control *wbc);
 int btrfs_writepage_cow_fixup(struct page *page);
-void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
-					  struct page *page, u64 start,
-					  u64 end, bool uptodate);
 int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
 					     int compress_type);
 int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 7925c8ba3571..6a7128318fbe 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -471,17 +471,15 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
 	struct btrfs_inode *inode;
 	const bool uptodate = (err == 0);
 	int ret = 0;
+	u32 len = end + 1 - start;
 
+	ASSERT(end + 1 - start <= U32_MAX);
 	ASSERT(page && page->mapping);
 	inode = BTRFS_I(page->mapping->host);
-	btrfs_writepage_endio_finish_ordered(inode, page, start, end, uptodate);
+	btrfs_mark_ordered_io_finished(inode, page, start, len, uptodate);
 
 	if (!uptodate) {
 		const struct btrfs_fs_info *fs_info = inode->root->fs_info;
-		u32 len;
-
-		ASSERT(end + 1 - start <= U32_MAX);
-		len = end + 1 - start;
 
 		btrfs_page_clear_uptodate(fs_info, page, start, len);
 		ret = err < 0 ? err : -EIO;
@@ -1349,6 +1347,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 
 	bio_ctrl->end_io_func = end_bio_extent_writepage;
 	while (cur <= end) {
+		u32 len = end - cur + 1;
 		u64 disk_bytenr;
 		u64 em_end;
 		u64 dirty_range_start = cur;
@@ -1356,8 +1355,8 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		u32 iosize;
 
 		if (cur >= i_size) {
-			btrfs_writepage_endio_finish_ordered(inode, page, cur,
-							     end, true);
+			btrfs_mark_ordered_io_finished(inode, page, cur, len,
+						       true);
 			/*
 			 * This range is beyond i_size, thus we don't need to
 			 * bother writing back.
@@ -1366,7 +1365,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 			 * writeback the sectors with subpage dirty bits,
 			 * causing writeback without ordered extent.
 			 */
-			btrfs_page_clear_dirty(fs_info, page, cur, end + 1 - cur);
+			btrfs_page_clear_dirty(fs_info, page, cur, len);
 			break;
 		}
 
@@ -1377,7 +1376,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 			continue;
 		}
 
-		em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
+		em = btrfs_get_extent(inode, NULL, 0, cur, len);
 		if (IS_ERR(em)) {
 			ret = PTR_ERR_OR_ZERO(em);
 			goto out_error;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index aa1cda22a23a..f231e7623f18 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3388,15 +3388,6 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
 	return btrfs_finish_one_ordered(ordered);
 }
 
-void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
-					  struct page *page, u64 start,
-					  u64 end, bool uptodate)
-{
-	trace_btrfs_writepage_end_io_hook(inode, start, end, uptodate);
-
-	btrfs_mark_ordered_io_finished(inode, page, start, end + 1 - start, uptodate);
-}
-
 /*
  * Verify the checksum for a single sector without any extra action that depend
  * on the type of I/O.
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a629532283bc..109e80ed25b6 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -410,6 +410,10 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
 	unsigned long flags;
 	u64 cur = file_offset;
 
+	trace_btrfs_writepage_end_io_hook(inode, file_offset,
+					  file_offset + num_bytes - 1,
+					  uptodate);
+
 	spin_lock_irqsave(&tree->lock, flags);
 	while (cur < file_offset + num_bytes) {
 		u64 entry_end;

From 9783e4deed7291996459858a1a16f41a8988dd60 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:26 +0200
Subject: [PATCH 1475/2047] btrfs: remove end_extent_writepage

end_extent_writepage is a small helper that combines a call to
btrfs_mark_ordered_io_finished with conditional error-only calls to
btrfs_page_clear_uptodate and mapping_set_error with a somewhat
unfortunate calling convention that passes and inclusive end instead
of the len expected by the underlying functions.

Remove end_extent_writepage and open code it in the 4 callers. Out
of those two already are error-only and thus don't need the extra
conditional, and one already has the mapping_set_error, so a duplicate
call can be avoided.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 44 +++++++++++++++-----------------------------
 fs/btrfs/extent_io.h |  2 --
 fs/btrfs/inode.c     | 42 ++++++++++++++++++++++--------------------
 3 files changed, 37 insertions(+), 51 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6a7128318fbe..b99081a407a4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -464,29 +464,6 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
 		btrfs_subpage_end_reader(fs_info, page, start, len);
 }
 
-/* lots and lots of room for performance fixes in the end_bio funcs */
-
-void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
-{
-	struct btrfs_inode *inode;
-	const bool uptodate = (err == 0);
-	int ret = 0;
-	u32 len = end + 1 - start;
-
-	ASSERT(end + 1 - start <= U32_MAX);
-	ASSERT(page && page->mapping);
-	inode = BTRFS_I(page->mapping->host);
-	btrfs_mark_ordered_io_finished(inode, page, start, len, uptodate);
-
-	if (!uptodate) {
-		const struct btrfs_fs_info *fs_info = inode->root->fs_info;
-
-		btrfs_page_clear_uptodate(fs_info, page, start, len);
-		ret = err < 0 ? err : -EIO;
-		mapping_set_error(page->mapping, ret);
-	}
-}
-
 /*
  * after a writepage IO is done, we need to:
  * clear the uptodate bits on error
@@ -1452,7 +1429,6 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl
 	struct folio *folio = page_folio(page);
 	struct inode *inode = page->mapping->host;
 	const u64 page_start = page_offset(page);
-	const u64 page_end = page_start + PAGE_SIZE - 1;
 	int ret;
 	int nr = 0;
 	size_t pg_offset;
@@ -1496,8 +1472,13 @@ done:
 		set_page_writeback(page);
 		end_page_writeback(page);
 	}
-	if (ret)
-		end_extent_writepage(page, ret, page_start, page_end);
+	if (ret) {
+		btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, page_start,
+					       PAGE_SIZE, !ret);
+		btrfs_page_clear_uptodate(btrfs_sb(inode->i_sb), page,
+					  page_start, PAGE_SIZE);
+		mapping_set_error(page->mapping, ret);
+	}
 	unlock_page(page);
 	ASSERT(ret <= 0);
 	return ret;
@@ -2222,6 +2203,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
 
 	while (cur <= end) {
 		u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end);
+		u32 cur_len = cur_end + 1 - cur;
 		struct page *page;
 		int nr = 0;
 
@@ -2245,9 +2227,13 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
 			set_page_writeback(page);
 			end_page_writeback(page);
 		}
-		if (ret)
-			end_extent_writepage(page, ret, cur, cur_end);
-		btrfs_page_unlock_writer(fs_info, page, cur, cur_end + 1 - cur);
+		if (ret) {
+			btrfs_mark_ordered_io_finished(BTRFS_I(inode), page,
+						       cur, cur_len, !ret);
+			btrfs_page_clear_uptodate(fs_info, page, cur, cur_len);
+			mapping_set_error(page->mapping, ret);
+		}
+		btrfs_page_unlock_writer(fs_info, page, cur, cur_len);
 		if (ret < 0) {
 			found_error = true;
 			first_error = ret;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 285754154fdc..8d11e17c0be9 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -276,8 +276,6 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
 
 int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
 
-void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
-
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 bool find_lock_delalloc_range(struct inode *inode,
 			     struct page *locked_page, u64 *start,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f231e7623f18..eb06c7c8d6c1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -424,11 +424,10 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
 
 	while (index <= end_index) {
 		/*
-		 * For locked page, we will call end_extent_writepage() on it
-		 * in run_delalloc_range() for the error handling.  That
-		 * end_extent_writepage() function will call
-		 * btrfs_mark_ordered_io_finished() to clear page Ordered and
-		 * run the ordered extent accounting.
+		 * For locked page, we will call btrfs_mark_ordered_io_finished
+		 * through btrfs_mark_ordered_io_finished() on it
+		 * in run_delalloc_range() for the error handling, which will
+		 * clear page Ordered and run the ordered extent accounting.
 		 *
 		 * Here we can't just clear the Ordered bit, or
 		 * btrfs_mark_ordered_io_finished() would skip the accounting
@@ -1158,11 +1157,16 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
 		btrfs_cleanup_ordered_extents(inode, locked_page, start, end - start + 1);
 		if (locked_page) {
 			const u64 page_start = page_offset(locked_page);
-			const u64 page_end = page_start + PAGE_SIZE - 1;
 
 			set_page_writeback(locked_page);
 			end_page_writeback(locked_page);
-			end_extent_writepage(locked_page, ret, page_start, page_end);
+			btrfs_mark_ordered_io_finished(inode, locked_page,
+						       page_start, PAGE_SIZE,
+						       !ret);
+			btrfs_page_clear_uptodate(inode->root->fs_info,
+						  locked_page, page_start,
+						  PAGE_SIZE);
+			mapping_set_error(locked_page->mapping, ret);
 			unlock_page(locked_page);
 		}
 		return ret;
@@ -2837,23 +2841,19 @@ struct btrfs_writepage_fixup {
 
 static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 {
-	struct btrfs_writepage_fixup *fixup;
+	struct btrfs_writepage_fixup *fixup =
+		container_of(work, struct btrfs_writepage_fixup, work);
 	struct btrfs_ordered_extent *ordered;
 	struct extent_state *cached_state = NULL;
 	struct extent_changeset *data_reserved = NULL;
-	struct page *page;
-	struct btrfs_inode *inode;
-	u64 page_start;
-	u64 page_end;
+	struct page *page = fixup->page;
+	struct btrfs_inode *inode = fixup->inode;
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+	u64 page_start = page_offset(page);
+	u64 page_end = page_offset(page) + PAGE_SIZE - 1;
 	int ret = 0;
 	bool free_delalloc_space = true;
 
-	fixup = container_of(work, struct btrfs_writepage_fixup, work);
-	page = fixup->page;
-	inode = fixup->inode;
-	page_start = page_offset(page);
-	page_end = page_offset(page) + PAGE_SIZE - 1;
-
 	/*
 	 * This is similar to page_mkwrite, we need to reserve the space before
 	 * we take the page lock.
@@ -2946,10 +2946,12 @@ out_page:
 		 * to reflect the errors and clean the page.
 		 */
 		mapping_set_error(page->mapping, ret);
-		end_extent_writepage(page, ret, page_start, page_end);
+		btrfs_mark_ordered_io_finished(inode, page, page_start,
+					       PAGE_SIZE, !ret);
+		btrfs_page_clear_uptodate(fs_info, page, page_start, PAGE_SIZE);
 		clear_page_dirty_for_io(page);
 	}
-	btrfs_page_clear_checked(inode->root->fs_info, page, page_start, PAGE_SIZE);
+	btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE);
 	unlock_page(page);
 	put_page(page);
 	kfree(fixup);

From 84f262f00910996f6cfd27094b9bca9422c630d4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:27 +0200
Subject: [PATCH 1476/2047] btrfs: reduce debug spam from
 submit_compressed_extents

Move the printk that is supposed to help to debug failures in
submit_one_async_extent into submit_one_async_extent and make it
coniditonal on actually having an error condition instead of spamming
the log unconditionally.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index eb06c7c8d6c1..bd62985c3520 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1179,11 +1179,11 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
 	return ret;
 }
 
-static int submit_one_async_extent(struct btrfs_inode *inode,
-				   struct async_chunk *async_chunk,
-				   struct async_extent *async_extent,
-				   u64 *alloc_hint)
+static void submit_one_async_extent(struct async_chunk *async_chunk,
+				    struct async_extent *async_extent,
+				    u64 *alloc_hint)
 {
+	struct btrfs_inode *inode = async_chunk->inode;
 	struct extent_io_tree *io_tree = &inode->io_tree;
 	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1276,7 +1276,7 @@ done:
 	if (async_chunk->blkcg_css)
 		kthread_associate_blkcg(NULL);
 	kfree(async_extent);
-	return ret;
+	return;
 
 out_free_reserve:
 	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@@ -1290,7 +1290,13 @@ out_free:
 				     PAGE_UNLOCK | PAGE_START_WRITEBACK |
 				     PAGE_END_WRITEBACK);
 	free_async_extent_pages(async_extent);
-	goto done;
+	if (async_chunk->blkcg_css)
+		kthread_associate_blkcg(NULL);
+	btrfs_debug(fs_info,
+"async extent submission failed root=%lld inode=%llu start=%llu len=%llu ret=%d",
+		    root->root_key.objectid, btrfs_ino(inode), start,
+		    async_extent->ram_size, ret);
+	kfree(async_extent);
 }
 
 /*
@@ -1300,28 +1306,15 @@ out_free:
  */
 static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
 {
-	struct btrfs_inode *inode = async_chunk->inode;
-	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct async_extent *async_extent;
 	u64 alloc_hint = 0;
-	int ret = 0;
 
 	while (!list_empty(&async_chunk->extents)) {
-		u64 extent_start;
-		u64 ram_size;
-
 		async_extent = list_entry(async_chunk->extents.next,
 					  struct async_extent, list);
 		list_del(&async_extent->list);
-		extent_start = async_extent->start;
-		ram_size = async_extent->ram_size;
 
-		ret = submit_one_async_extent(inode, async_chunk, async_extent,
-					      &alloc_hint);
-		btrfs_debug(fs_info,
-"async extent submission failed root=%lld inode=%llu start=%llu len=%llu ret=%d",
-			    inode->root->root_key.objectid,
-			    btrfs_ino(inode), extent_start, ram_size, ret);
+		submit_one_async_extent(async_chunk, async_extent, &alloc_hint);
 	}
 }
 

From ff20d6a4a9ed1b6e0ed78953830cff9fa8b9dbfd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:28 +0200
Subject: [PATCH 1477/2047] btrfs: remove the return value from
 submit_uncompressed_range

The return value from submit_uncompressed_range is ignored, and that's
fine because the error reporting happens through the mapping and
ordered_extent.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bd62985c3520..dad0a6113a04 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1124,9 +1124,9 @@ static void free_async_extent_pages(struct async_extent *async_extent)
 	async_extent->pages = NULL;
 }
 
-static int submit_uncompressed_range(struct btrfs_inode *inode,
-				     struct async_extent *async_extent,
-				     struct page *locked_page)
+static void submit_uncompressed_range(struct btrfs_inode *inode,
+				      struct async_extent *async_extent,
+				      struct page *locked_page)
 {
 	u64 start = async_extent->start;
 	u64 end = async_extent->start + async_extent->ram_size - 1;
@@ -1151,7 +1151,7 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
 			     &nr_written, NULL, true, false);
 	/* Inline extent inserted, page gets unlocked and everything is done */
 	if (page_started)
-		return 0;
+		return;
 
 	if (ret < 0) {
 		btrfs_cleanup_ordered_extents(inode, locked_page, start, end - start + 1);
@@ -1169,14 +1169,13 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
 			mapping_set_error(locked_page->mapping, ret);
 			unlock_page(locked_page);
 		}
-		return ret;
+		return;
 	}
 
 	/* All pages will be unlocked, including @locked_page */
 	wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
-	ret = extent_write_locked_range(&inode->vfs_inode, start, end, &wbc);
+	extent_write_locked_range(&inode->vfs_inode, start, end, &wbc);
 	wbc_detach_inode(&wbc);
-	return ret;
 }
 
 static void submit_one_async_extent(struct async_chunk *async_chunk,
@@ -1213,7 +1212,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
 
 	/* We have fall back to uncompressed write */
 	if (!async_extent->pages) {
-		ret = submit_uncompressed_range(inode, async_extent, locked_page);
+		submit_uncompressed_range(inode, async_extent, locked_page);
 		goto done;
 	}
 

From 0835d1e66e7f22428bbc2ea56cb488a7759b51e2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:29 +0200
Subject: [PATCH 1478/2047] btrfs: remove the return value from
 extent_write_locked_range

The return value from extent_write_locked_range is ignored, and that's
fine because the error reporting happens through the mapping and
ordered_extent.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 13 +++----------
 fs/btrfs/extent_io.h |  4 ++--
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b99081a407a4..4981058d3877 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2180,11 +2180,10 @@ retry:
  * already been ran (aka, ordered extent inserted) and all pages are still
  * locked.
  */
-int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
-			      struct writeback_control *wbc)
+void extent_write_locked_range(struct inode *inode, u64 start, u64 end,
+			       struct writeback_control *wbc)
 {
 	bool found_error = false;
-	int first_error = 0;
 	int ret = 0;
 	struct address_space *mapping = inode->i_mapping;
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2234,20 +2233,14 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
 			mapping_set_error(page->mapping, ret);
 		}
 		btrfs_page_unlock_writer(fs_info, page, cur, cur_len);
-		if (ret < 0) {
+		if (ret < 0)
 			found_error = true;
-			first_error = ret;
-		}
 next_page:
 		put_page(page);
 		cur = cur_end + 1;
 	}
 
 	submit_write_bio(&bio_ctrl, found_error ? ret : 0);
-
-	if (found_error)
-		return first_error;
-	return ret;
 }
 
 int extent_writepages(struct address_space *mapping,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 8d11e17c0be9..0312022bbf4b 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -177,8 +177,8 @@ int try_release_extent_mapping(struct page *page, gfp_t mask);
 int try_release_extent_buffer(struct page *page);
 
 int btrfs_read_folio(struct file *file, struct folio *folio);
-int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
-			      struct writeback_control *wbc);
+void extent_write_locked_range(struct inode *inode, u64 start, u64 end,
+			       struct writeback_control *wbc);
 int extent_writepages(struct address_space *mapping,
 		      struct writeback_control *wbc);
 int btree_write_cache_pages(struct address_space *mapping,

From 2c73162d649670045415537dc746f7524cda1405 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:30 +0200
Subject: [PATCH 1479/2047] btrfs: improve the delalloc_to_write calculation in
 writepage_delalloc

Currently writepage_delalloc adds to delalloc_to_write in every loop
operation.  That is not only more work than doing it once after the
loop, but can also over-increment the counter due to rounding errors
when a new loop iteration starts with an offset into a page.

Add a new page_start variable instead of recaculation that value over
and over, move the delalloc_to_write calculation out of the loop, use
the DIV_ROUND_UP helper instead of open coding it and remove the pointless
found local variable.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4981058d3877..148d289f70f0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1185,8 +1185,10 @@ static inline void contiguous_readpages(struct page *pages[], int nr_pages,
 static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
 		struct page *page, struct writeback_control *wbc)
 {
-	const u64 page_end = page_offset(page) + PAGE_SIZE - 1;
-	u64 delalloc_start = page_offset(page);
+	const u64 page_start = page_offset(page);
+	const u64 page_end = page_start + PAGE_SIZE - 1;
+	u64 delalloc_start = page_start;
+	u64 delalloc_end = page_end;
 	u64 delalloc_to_write = 0;
 	/* How many pages are started by btrfs_run_delalloc_range() */
 	unsigned long nr_written = 0;
@@ -1194,13 +1196,9 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
 	int page_started = 0;
 
 	while (delalloc_start < page_end) {
-		u64 delalloc_end = page_end;
-		bool found;
-
-		found = find_lock_delalloc_range(&inode->vfs_inode, page,
-					       &delalloc_start,
-					       &delalloc_end);
-		if (!found) {
+		delalloc_end = page_end;
+		if (!find_lock_delalloc_range(&inode->vfs_inode, page,
+					      &delalloc_start, &delalloc_end)) {
 			delalloc_start = delalloc_end + 1;
 			continue;
 		}
@@ -1209,14 +1207,15 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
 		if (ret)
 			return ret;
 
-		/*
-		 * delalloc_end is already one less than the total length, so
-		 * we don't subtract one from PAGE_SIZE
-		 */
-		delalloc_to_write += (delalloc_end - delalloc_start +
-				      PAGE_SIZE) >> PAGE_SHIFT;
 		delalloc_start = delalloc_end + 1;
 	}
+
+	/*
+	 * delalloc_end is already one less than the total length, so
+	 * we don't subtract one from PAGE_SIZE
+	 */
+	delalloc_to_write +=
+		DIV_ROUND_UP(delalloc_end + 1 - page_start, PAGE_SIZE);
 	if (wbc->nr_to_write < delalloc_to_write) {
 		int thresh = 8192;
 

From c56cbe90599338c0aaa84cab0e99a9e8a8501a38 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:31 +0200
Subject: [PATCH 1480/2047] btrfs: reduce the number of arguments to
 btrfs_run_delalloc_range

Instead of a separate page_started argument that tells the callers that
btrfs_run_delalloc_range already started writeback by itself, overload
the return value with a positive 1 in additio to 0 and a negative error
code to indicate that is has already started writeback, and remove the
nr_written argument as that caller can calculate it directly based on
the range, and in fact already does so for the case where writeback
wasn't started yet.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h |  3 +-
 fs/btrfs/extent_io.c   | 30 +++++++--------
 fs/btrfs/inode.c       | 85 ++++++++++++++----------------------------
 3 files changed, 43 insertions(+), 75 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 90e60ad9db62..bda1fdbba666 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -498,8 +498,7 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
 				    u64 start, u64 num_bytes, u64 min_size,
 				    loff_t actual_len, u64 *alloc_hint);
 int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
-			     u64 start, u64 end, int *page_started,
-			     unsigned long *nr_written, struct writeback_control *wbc);
+			     u64 start, u64 end, struct writeback_control *wbc);
 int btrfs_writepage_cow_fixup(struct page *page);
 int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
 					     int compress_type);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 148d289f70f0..08fb26d58172 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1190,10 +1190,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
 	u64 delalloc_start = page_start;
 	u64 delalloc_end = page_end;
 	u64 delalloc_to_write = 0;
-	/* How many pages are started by btrfs_run_delalloc_range() */
-	unsigned long nr_written = 0;
-	int ret;
-	int page_started = 0;
+	int ret = 0;
 
 	while (delalloc_start < page_end) {
 		delalloc_end = page_end;
@@ -1202,9 +1199,10 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
 			delalloc_start = delalloc_end + 1;
 			continue;
 		}
+
 		ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
-				delalloc_end, &page_started, &nr_written, wbc);
-		if (ret)
+					       delalloc_end, wbc);
+		if (ret < 0)
 			return ret;
 
 		delalloc_start = delalloc_end + 1;
@@ -1216,6 +1214,16 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
 	 */
 	delalloc_to_write +=
 		DIV_ROUND_UP(delalloc_end + 1 - page_start, PAGE_SIZE);
+
+	/*
+	 * If btrfs_run_dealloc_range() already started I/O and unlocked
+	 * the pages, we just need to account for them here.
+	 */
+	if (ret == 1) {
+		wbc->nr_to_write -= delalloc_to_write;
+		return 1;
+	}
+
 	if (wbc->nr_to_write < delalloc_to_write) {
 		int thresh = 8192;
 
@@ -1225,16 +1233,6 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
 					 thresh);
 	}
 
-	/* Did btrfs_run_dealloc_range() already unlock and start the IO? */
-	if (page_started) {
-		/*
-		 * We've unlocked the page, so we can't update the mapping's
-		 * writeback index, just update nr_to_write.
-		 */
-		wbc->nr_to_write -= nr_written;
-		return 1;
-	}
-
 	return 0;
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dad0a6113a04..ada70af9c8a2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -127,8 +127,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback);
 
 static noinline int cow_file_range(struct btrfs_inode *inode,
 				   struct page *locked_page,
-				   u64 start, u64 end, int *page_started,
-				   unsigned long *nr_written, u64 *done_offset,
+				   u64 start, u64 end, u64 *done_offset,
 				   bool keep_locked, bool no_inline);
 static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
 				       u64 len, u64 orig_start, u64 block_start,
@@ -1130,8 +1129,6 @@ static void submit_uncompressed_range(struct btrfs_inode *inode,
 {
 	u64 start = async_extent->start;
 	u64 end = async_extent->start + async_extent->ram_size - 1;
-	unsigned long nr_written = 0;
-	int page_started = 0;
 	int ret;
 	struct writeback_control wbc = {
 		.sync_mode		= WB_SYNC_ALL,
@@ -1147,10 +1144,9 @@ static void submit_uncompressed_range(struct btrfs_inode *inode,
 	 * Also we call cow_file_range() with @unlock_page == 0, so that we
 	 * can directly submit them without interruption.
 	 */
-	ret = cow_file_range(inode, locked_page, start, end, &page_started,
-			     &nr_written, NULL, true, false);
+	ret = cow_file_range(inode, locked_page, start, end, NULL, true, false);
 	/* Inline extent inserted, page gets unlocked and everything is done */
-	if (page_started)
+	if (ret == 1)
 		return;
 
 	if (ret < 0) {
@@ -1360,8 +1356,8 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
  *
  * When this function fails, it unlocks all pages except @locked_page.
  *
- * When this function successfully creates an inline extent, it sets page_started
- * to 1 and unlocks all pages including locked_page and starts I/O on them.
+ * When this function successfully creates an inline extent, it returns 1 and
+ * unlocks all pages including locked_page and starts I/O on them.
  * (In reality inline extents are limited to a single page, so locked_page is
  * the only page handled anyway).
  *
@@ -1378,9 +1374,8 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
  * example.
  */
 static noinline int cow_file_range(struct btrfs_inode *inode,
-				   struct page *locked_page,
-				   u64 start, u64 end, int *page_started,
-				   unsigned long *nr_written, u64 *done_offset,
+				   struct page *locked_page, u64 start, u64 end,
+				   u64 *done_offset,
 				   bool keep_locked, bool no_inline)
 {
 	struct btrfs_root *root = inode->root;
@@ -1440,9 +1435,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 				     EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
 				     EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
 				     PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
-			*nr_written = *nr_written +
-			     (end - start + PAGE_SIZE) / PAGE_SIZE;
-			*page_started = 1;
 			/*
 			 * locked_page is locked by the caller of
 			 * writepage_delalloc(), not locked by
@@ -1452,11 +1444,11 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 			 * as it doesn't have any subpage::writers recorded.
 			 *
 			 * Here we manually unlock the page, since the caller
-			 * can't use page_started to determine if it's an
-			 * inline extent or a compressed extent.
+			 * can't determine if it's an inline extent or a
+			 * compressed extent.
 			 */
 			unlock_page(locked_page);
-			goto out;
+			return 1;
 		} else if (ret < 0) {
 			goto out_unlock;
 		}
@@ -1570,7 +1562,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 		if (ret)
 			goto out_unlock;
 	}
-out:
 	return ret;
 
 out_drop_extent_cache:
@@ -1721,10 +1712,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
 }
 
 static bool run_delalloc_compressed(struct btrfs_inode *inode,
-				    struct writeback_control *wbc,
-				    struct page *locked_page,
-				    u64 start, u64 end, int *page_started,
-				    unsigned long *nr_written)
+				    struct page *locked_page, u64 start,
+				    u64 end, struct writeback_control *wbc)
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
@@ -1806,34 +1795,25 @@ static bool run_delalloc_compressed(struct btrfs_inode *inode,
 
 		btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
 
-		*nr_written += nr_pages;
 		start = cur_end + 1;
 	}
-	*page_started = 1;
 	return true;
 }
 
 static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
 				       struct page *locked_page, u64 start,
-				       u64 end, int *page_started,
-				       unsigned long *nr_written,
-				       struct writeback_control *wbc)
+				       u64 end, struct writeback_control *wbc)
 {
 	u64 done_offset = end;
 	int ret;
 	bool locked_page_done = false;
 
 	while (start <= end) {
-		ret = cow_file_range(inode, locked_page, start, end, page_started,
-				     nr_written, &done_offset, true, false);
+		ret = cow_file_range(inode, locked_page, start, end, &done_offset,
+				     true, false);
 		if (ret && ret != -EAGAIN)
 			return ret;
 
-		if (*page_started) {
-			ASSERT(ret == 0);
-			return 0;
-		}
-
 		if (ret == 0)
 			done_offset = end;
 
@@ -1854,9 +1834,7 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
 		start = done_offset + 1;
 	}
 
-	*page_started = 1;
-
-	return 0;
+	return 1;
 }
 
 static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
@@ -1889,8 +1867,6 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
 	const bool is_reloc_ino = btrfs_is_data_reloc_root(inode->root);
 	const u64 range_bytes = end + 1 - start;
 	struct extent_io_tree *io_tree = &inode->io_tree;
-	int page_started = 0;
-	unsigned long nr_written;
 	u64 range_start = start;
 	u64 count;
 	int ret;
@@ -1951,9 +1927,8 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
 	 * is written out and unlocked directly and a normal NOCOW extent
 	 * doesn't work.
 	 */
-	ret = cow_file_range(inode, locked_page, start, end, &page_started,
-			     &nr_written, NULL, false, true);
-	ASSERT(!page_started);
+	ret = cow_file_range(inode, locked_page, start, end, NULL, false, true);
+	ASSERT(ret != 1);
 	return ret;
 }
 
@@ -2389,15 +2364,14 @@ static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
  * being touched for the first time.
  */
 int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
-		u64 start, u64 end, int *page_started, unsigned long *nr_written,
-		struct writeback_control *wbc)
+			     u64 start, u64 end, struct writeback_control *wbc)
 {
-	int ret = 0;
 	const bool zoned = btrfs_is_zoned(inode->root->fs_info);
+	int ret;
 
 	/*
-	 * The range must cover part of the @locked_page, or the returned
-	 * @page_started can confuse the caller.
+	 * The range must cover part of the @locked_page, or a return of 1
+	 * can confuse the caller.
 	 */
 	ASSERT(!(end <= page_offset(locked_page) ||
 		 start >= page_offset(locked_page) + PAGE_SIZE));
@@ -2417,20 +2391,17 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
 
 	if (btrfs_inode_can_compress(inode) &&
 	    inode_need_compress(inode, start, end) &&
-	    run_delalloc_compressed(inode, wbc, locked_page, start,
-				    end, page_started, nr_written))
-		goto out;
+	    run_delalloc_compressed(inode, locked_page, start, end, wbc))
+		return 1;
 
 	if (zoned)
-		ret = run_delalloc_zoned(inode, locked_page, start, end,
-					 page_started, nr_written, wbc);
+		ret = run_delalloc_zoned(inode, locked_page, start, end, wbc);
 	else
-		ret = cow_file_range(inode, locked_page, start, end,
-				     page_started, nr_written, NULL, false, false);
+		ret = cow_file_range(inode, locked_page, start, end, NULL,
+				     false, false);
 
 out:
-	ASSERT(ret <= 0);
-	if (ret)
+	if (ret < 0)
 		btrfs_cleanup_ordered_extents(inode, locked_page, start,
 					      end - start + 1);
 	return ret;

From 67583468086c2a12602b915184118c4863402b02 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:32 +0200
Subject: [PATCH 1481/2047] btrfs: clean up the check for uncompressed ranges
 in submit_one_async_extent

Instead of checking for a NULL !pages and explaining this with a cryptic
comment, just check the compression type for BTRFS_COMPRESS_NONE to make
the check self-explanatory.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ada70af9c8a2..27fdfd08a3ef 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1206,8 +1206,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
 	}
 	lock_extent(io_tree, start, end, NULL);
 
-	/* We have fall back to uncompressed write */
-	if (!async_extent->pages) {
+	if (async_extent->compress_type == BTRFS_COMPRESS_NONE) {
 		submit_uncompressed_range(inode, async_extent, locked_page);
 		goto done;
 	}

From 3134508e47df8f721d1bc6a31980d10cb71f3f18 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:33 +0200
Subject: [PATCH 1482/2047] btrfs: don't clear async_chunk->inode in
 async_cow_start

Now that the ->inode check isn't needed in submit_compressed_extents
any more, there is no reason to clear the field early.  Always keep
the inode around until the work item is finished and remove the special
casing, and the counting of compressed extents in compress_file_range.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 27fdfd08a3ef..76496b084f5e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -830,7 +830,7 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
  * are written in the same order that the flusher thread sent them
  * down.
  */
-static noinline int compress_file_range(struct async_chunk *async_chunk)
+static noinline void compress_file_range(struct async_chunk *async_chunk)
 {
 	struct btrfs_inode *inode = async_chunk->inode;
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
@@ -848,7 +848,6 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
 	int i;
 	int will_compress;
 	int compress_type = fs_info->compress_type;
-	int compressed_extents = 0;
 	int redirty = 0;
 
 	inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
@@ -1025,7 +1024,7 @@ cont:
 				}
 				kfree(pages);
 			}
-			return 0;
+			return;
 		}
 	}
 
@@ -1044,8 +1043,6 @@ cont:
 		 */
 		total_in = round_up(total_in, fs_info->sectorsize);
 		if (total_compressed + blocksize <= total_in) {
-			compressed_extents++;
-
 			/*
 			 * The async work queues will take care of doing actual
 			 * allocation on disk for these compressed pages, and
@@ -1061,7 +1058,7 @@ cont:
 				cond_resched();
 				goto again;
 			}
-			return compressed_extents;
+			return;
 		}
 	}
 	if (pages) {
@@ -1102,9 +1099,6 @@ cleanup_and_bail_uncompressed:
 		extent_range_redirty_for_io(&inode->vfs_inode, start, end);
 	add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
 			 BTRFS_COMPRESS_NONE);
-	compressed_extents++;
-
-	return compressed_extents;
 }
 
 static void free_async_extent_pages(struct async_extent *async_extent)
@@ -1655,15 +1649,9 @@ out_unlock:
 static noinline void async_cow_start(struct btrfs_work *work)
 {
 	struct async_chunk *async_chunk;
-	int compressed_extents;
 
 	async_chunk = container_of(work, struct async_chunk, work);
-
-	compressed_extents = compress_file_range(async_chunk);
-	if (compressed_extents == 0) {
-		btrfs_add_delayed_iput(async_chunk->inode);
-		async_chunk->inode = NULL;
-	}
+	compress_file_range(async_chunk);
 }
 
 /*
@@ -1700,8 +1688,7 @@ static noinline void async_cow_free(struct btrfs_work *work)
 	struct async_cow *async_cow;
 
 	async_chunk = container_of(work, struct async_chunk, work);
-	if (async_chunk->inode)
-		btrfs_add_delayed_iput(async_chunk->inode);
+	btrfs_add_delayed_iput(async_chunk->inode);
 	if (async_chunk->blkcg_css)
 		css_put(async_chunk->blkcg_css);
 

From c15d8cf29551ea35a42b4c16cfd08388680be59c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:34 +0200
Subject: [PATCH 1483/2047] btrfs: merge async_cow_start and
 compress_file_range

There is no good reason to have the simple async_cow_start wrapper,
merge the argument conversion into the main compress_file_range function.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 43 ++++++++++++++++---------------------------
 1 file changed, 16 insertions(+), 27 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 76496b084f5e..065d29efc7a0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -814,24 +814,22 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
 }
 
 /*
- * we create compressed extents in two phases.  The first
- * phase compresses a range of pages that have already been
- * locked (both pages and state bits are locked).
+ * Work queue call back to started compression on a file and pages.
  *
- * This is done inside an ordered work queue, and the compression
- * is spread across many cpus.  The actual IO submission is step
- * two, and the ordered work queue takes care of making sure that
- * happens in the same order things were put onto the queue by
- * writepages and friends.
+ * This is done inside an ordered work queue, and the compression is spread
+ * across many cpus.  The actual IO submission is step two, and the ordered work
+ * queue takes care of making sure that happens in the same order things were
+ * put onto the queue by writepages and friends.
  *
- * If this code finds it can't get good compression, it puts an
- * entry onto the work queue to write the uncompressed bytes.  This
- * makes sure that both compressed inodes and uncompressed inodes
- * are written in the same order that the flusher thread sent them
- * down.
+ * If this code finds it can't get good compression, it puts an entry onto the
+ * work queue to write the uncompressed bytes.  This makes sure that both
+ * compressed inodes and uncompressed inodes are written in the same order that
+ * the flusher thread sent them down.
  */
-static noinline void compress_file_range(struct async_chunk *async_chunk)
+static void compress_file_range(struct btrfs_work *work)
 {
+	struct async_chunk *async_chunk =
+		container_of(work, struct async_chunk, work);
 	struct btrfs_inode *inode = async_chunk->inode;
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct address_space *mapping = inode->vfs_inode.i_mapping;
@@ -1644,18 +1642,9 @@ out_unlock:
 }
 
 /*
- * work queue call back to started compression on a file and pages
- */
-static noinline void async_cow_start(struct btrfs_work *work)
-{
-	struct async_chunk *async_chunk;
-
-	async_chunk = container_of(work, struct async_chunk, work);
-	compress_file_range(async_chunk);
-}
-
-/*
- * work queue call back to submit previously compressed pages
+ * Phase two of compressed writeback.  This is the ordered portion of the code,
+ * which only gets called in the order the work was queued.  We walk all the
+ * async extents created by compress_file_range and send them down to the disk.
  */
 static noinline void async_cow_submit(struct btrfs_work *work)
 {
@@ -1773,7 +1762,7 @@ static bool run_delalloc_compressed(struct btrfs_inode *inode,
 			async_chunk[i].blkcg_css = NULL;
 		}
 
-		btrfs_init_work(&async_chunk[i].work, async_cow_start,
+		btrfs_init_work(&async_chunk[i].work, compress_file_range,
 				async_cow_submit, async_cow_free);
 
 		nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);

From 00d31d17663c55de99c1fc1224d06009c4799fc1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:35 +0200
Subject: [PATCH 1484/2047] btrfs: merge submit_compressed_extents and
 async_cow_submit

The code in submit_compressed_extents just loops over the async_extents,
and doesn't need to be conditional on an inode being present, as there
won't be any async_extent in the list if we created and inline extent.
Merge the two functions to simplify the logic.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 39 ++++++++++-----------------------------
 1 file changed, 10 insertions(+), 29 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 065d29efc7a0..b119d828cc4e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1285,25 +1285,6 @@ out_free:
 	kfree(async_extent);
 }
 
-/*
- * Phase two of compressed writeback.  This is the ordered portion of the code,
- * which only gets called in the order the work was queued.  We walk all the
- * async extents created by compress_file_range and send them down to the disk.
- */
-static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
-{
-	struct async_extent *async_extent;
-	u64 alloc_hint = 0;
-
-	while (!list_empty(&async_chunk->extents)) {
-		async_extent = list_entry(async_chunk->extents.next,
-					  struct async_extent, list);
-		list_del(&async_extent->list);
-
-		submit_one_async_extent(async_chunk, async_extent, &alloc_hint);
-	}
-}
-
 static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
 				      u64 num_bytes)
 {
@@ -1646,24 +1627,24 @@ out_unlock:
  * which only gets called in the order the work was queued.  We walk all the
  * async extents created by compress_file_range and send them down to the disk.
  */
-static noinline void async_cow_submit(struct btrfs_work *work)
+static noinline void submit_compressed_extents(struct btrfs_work *work)
 {
 	struct async_chunk *async_chunk = container_of(work, struct async_chunk,
 						     work);
 	struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
+	struct async_extent *async_extent;
 	unsigned long nr_pages;
+	u64 alloc_hint = 0;
 
 	nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
 		PAGE_SHIFT;
 
-	/*
-	 * ->inode could be NULL if async_chunk_start has failed to compress,
-	 * in which case we don't have anything to submit, yet we need to
-	 * always adjust ->async_delalloc_pages as its paired with the init
-	 * happening in run_delalloc_compressed
-	 */
-	if (async_chunk->inode)
-		submit_compressed_extents(async_chunk);
+	while (!list_empty(&async_chunk->extents)) {
+		async_extent = list_entry(async_chunk->extents.next,
+					  struct async_extent, list);
+		list_del(&async_extent->list);
+		submit_one_async_extent(async_chunk, async_extent, &alloc_hint);
+	}
 
 	/* atomic_sub_return implies a barrier */
 	if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
@@ -1763,7 +1744,7 @@ static bool run_delalloc_compressed(struct btrfs_inode *inode,
 		}
 
 		btrfs_init_work(&async_chunk[i].work, compress_file_range,
-				async_cow_submit, async_cow_free);
+				submit_compressed_extents, async_cow_free);
 
 		nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
 		atomic_add(nr_pages, &fs_info->async_delalloc_pages);

From e94e54e89b4f9cbbdc2dbbf0c44adae0f4722ab6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:36 +0200
Subject: [PATCH 1485/2047] btrfs: streamline compress_file_range

Reorder compress_file_range so that the main compression flow happens
straight line and not in branches.  To do this ensure that pages is
always zeroed before a page allocation happens, which allows the
cleanup_and_bail_uncompressed label to clean up the page allocations
as needed.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 167 +++++++++++++++++++++++------------------------
 1 file changed, 83 insertions(+), 84 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b119d828cc4e..e994582358e0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -839,10 +839,11 @@ static void compress_file_range(struct btrfs_work *work)
 	u64 actual_end;
 	u64 i_size;
 	int ret = 0;
-	struct page **pages = NULL;
+	struct page **pages;
 	unsigned long nr_pages;
 	unsigned long total_compressed = 0;
 	unsigned long total_in = 0;
+	unsigned int poff;
 	int i;
 	int will_compress;
 	int compress_type = fs_info->compress_type;
@@ -865,6 +866,7 @@ static void compress_file_range(struct btrfs_work *work)
 	actual_end = min_t(u64, i_size, end + 1);
 again:
 	will_compress = 0;
+	pages = NULL;
 	nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
 	nr_pages = min_t(unsigned long, nr_pages, BTRFS_MAX_COMPRESSED_PAGES);
 
@@ -908,66 +910,64 @@ again:
 	ret = 0;
 
 	/*
-	 * we do compression for mount -o compress and when the
-	 * inode has not been flagged as nocompress.  This flag can
-	 * change at any time if we discover bad compression ratios.
+	 * We do compression for mount -o compress and when the inode has not
+	 * been flagged as NOCOMPRESS.  This flag can change at any time if we
+	 * discover bad compression ratios.
 	 */
-	if (inode_need_compress(inode, start, end)) {
-		WARN_ON(pages);
-		pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
-		if (!pages) {
-			/* just bail out to the uncompressed code */
-			nr_pages = 0;
-			goto cont;
-		}
-
-		if (inode->defrag_compress)
-			compress_type = inode->defrag_compress;
-		else if (inode->prop_compress)
-			compress_type = inode->prop_compress;
+	if (!inode_need_compress(inode, start, end))
+		goto cont;
 
+	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
+	if (!pages) {
 		/*
-		 * we need to call clear_page_dirty_for_io on each
-		 * page in the range.  Otherwise applications with the file
-		 * mmap'd can wander in and change the page contents while
-		 * we are compressing them.
-		 *
-		 * If the compression fails for any reason, we set the pages
-		 * dirty again later on.
-		 *
-		 * Note that the remaining part is redirtied, the start pointer
-		 * has moved, the end is the original one.
+		 * Memory allocation failure is not a fatal error, we can fall
+		 * back to uncompressed code.
 		 */
-		if (!redirty) {
-			extent_range_clear_dirty_for_io(&inode->vfs_inode, start, end);
-			redirty = 1;
-		}
-
-		/* Compression level is applied here and only here */
-		ret = btrfs_compress_pages(
-			compress_type | (fs_info->compress_level << 4),
-					   mapping, start,
-					   pages,
-					   &nr_pages,
-					   &total_in,
-					   &total_compressed);
-
-		if (!ret) {
-			unsigned long offset = offset_in_page(total_compressed);
-			struct page *page = pages[nr_pages - 1];
-
-			/* zero the tail end of the last page, we might be
-			 * sending it down to disk
-			 */
-			if (offset)
-				memzero_page(page, offset, PAGE_SIZE - offset);
-			will_compress = 1;
-		}
+		nr_pages = 0;
+		goto cont;
 	}
+
+	if (inode->defrag_compress)
+		compress_type = inode->defrag_compress;
+	else if (inode->prop_compress)
+		compress_type = inode->prop_compress;
+
+	/*
+	 * We need to call clear_page_dirty_for_io on each page in the range.
+	 * Otherwise applications with the file mmap'd can wander in and change
+	 * the page contents while we are compressing them.
+	 *
+	 * If the compression fails for any reason, we set the pages dirty again
+	 * later on.
+	 *
+	 * Note that the remaining part is redirtied, the start pointer has
+	 * moved, the end is the original one.
+	 */
+	if (!redirty) {
+		extent_range_clear_dirty_for_io(&inode->vfs_inode, start, end);
+		redirty = 1;
+	}
+
+	/* Compression level is applied here. */
+	ret = btrfs_compress_pages(compress_type | (fs_info->compress_level << 4),
+				   mapping, start, pages, &nr_pages, &total_in,
+				   &total_compressed);
+	if (ret)
+		goto cont;
+
+	/*
+	 * Zero the tail end of the last page, as we might be sending it down
+	 * to disk.
+	 */
+	poff = offset_in_page(total_compressed);
+	if (poff)
+		memzero_page(pages[nr_pages - 1], poff, PAGE_SIZE - poff);
+	will_compress = 1;
+
 cont:
 	/*
 	 * Check cow_file_range() for why we don't even try to create inline
-	 * extent for subpage case.
+	 * extent for the subpage case.
 	 */
 	if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
 		/* lets try to make an inline extent */
@@ -1026,39 +1026,38 @@ cont:
 		}
 	}
 
-	if (will_compress) {
-		/*
-		 * we aren't doing an inline extent round the compressed size
-		 * up to a block size boundary so the allocator does sane
-		 * things
-		 */
-		total_compressed = ALIGN(total_compressed, blocksize);
+	if (!will_compress)
+		goto cleanup_and_bail_uncompressed;
 
-		/*
-		 * one last check to make sure the compression is really a
-		 * win, compare the page count read with the blocks on disk,
-		 * compression must free at least one sector size
-		 */
-		total_in = round_up(total_in, fs_info->sectorsize);
-		if (total_compressed + blocksize <= total_in) {
-			/*
-			 * The async work queues will take care of doing actual
-			 * allocation on disk for these compressed pages, and
-			 * will submit them to the elevator.
-			 */
-			add_async_extent(async_chunk, start, total_in,
-					total_compressed, pages, nr_pages,
-					compress_type);
+	/*
+	 * We aren't doing an inline extent. Round the compressed size up to a
+	 * block size boundary so the allocator does sane things.
+	 */
+	total_compressed = ALIGN(total_compressed, blocksize);
 
-			if (start + total_in < end) {
-				start += total_in;
-				pages = NULL;
-				cond_resched();
-				goto again;
-			}
-			return;
-		}
+	/*
+	 * One last check to make sure the compression is really a win, compare
+	 * the page count read with the blocks on disk, compression must free at
+	 * least one sector.
+	 */
+	total_in = round_up(total_in, fs_info->sectorsize);
+	if (total_compressed + blocksize > total_in)
+		goto cleanup_and_bail_uncompressed;
+
+	/*
+	 * The async work queues will take care of doing actual allocation on
+	 * disk for these compressed pages, and will submit the bios.
+	 */
+	add_async_extent(async_chunk, start, total_in, total_compressed, pages,
+			 nr_pages, compress_type);
+	if (start + total_in < end) {
+		start += total_in;
+		cond_resched();
+		goto again;
 	}
+	return;
+
+cleanup_and_bail_uncompressed:
 	if (pages) {
 		/*
 		 * the compression code ran but failed to make things smaller,
@@ -1079,7 +1078,7 @@ cont:
 			inode->flags |= BTRFS_INODE_NOCOMPRESS;
 		}
 	}
-cleanup_and_bail_uncompressed:
+
 	/*
 	 * No compression, but we still need to write the pages in the file
 	 * we've been given so far.  redirty the locked page if it corresponds

From 6a7167bf9c9258a214bb0526bd3ecbc96db893ef Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:37 +0200
Subject: [PATCH 1486/2047] btrfs: further simplify the compress or not logic
 in compress_file_range

Currently the logic whether to compress or not in compress_file_range is
a bit convoluted because it tries to share code for creating inline
extents for the compressible [1] path and the bail to uncompressed path.

But the latter isn't needed at all, because cow_file_range as called by
submit_uncompressed_range will already create inline extents as needed,
so there is no need to have special handling for it if we can live with
the fact that it will be called a bit later in the ->ordered_func of the
workqueue instead of right now.

[1] there is undocumented logic that creates an uncompressed inline
extent outside of the shall not compress logic if total_in is too small.
This logic isn't explained in comments or any commit log I could find,
so I've preserved it.  Documentation explaining it would be appreciated
if anyone understands this code.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 48 ++++++++++++++++--------------------------------
 1 file changed, 16 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e994582358e0..8307d2a3083a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -845,7 +845,6 @@ static void compress_file_range(struct btrfs_work *work)
 	unsigned long total_in = 0;
 	unsigned int poff;
 	int i;
-	int will_compress;
 	int compress_type = fs_info->compress_type;
 	int redirty = 0;
 
@@ -865,7 +864,6 @@ static void compress_file_range(struct btrfs_work *work)
 	barrier();
 	actual_end = min_t(u64, i_size, end + 1);
 again:
-	will_compress = 0;
 	pages = NULL;
 	nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
 	nr_pages = min_t(unsigned long, nr_pages, BTRFS_MAX_COMPRESSED_PAGES);
@@ -915,7 +913,7 @@ again:
 	 * discover bad compression ratios.
 	 */
 	if (!inode_need_compress(inode, start, end))
-		goto cont;
+		goto cleanup_and_bail_uncompressed;
 
 	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
 	if (!pages) {
@@ -923,8 +921,7 @@ again:
 		 * Memory allocation failure is not a fatal error, we can fall
 		 * back to uncompressed code.
 		 */
-		nr_pages = 0;
-		goto cont;
+		goto cleanup_and_bail_uncompressed;
 	}
 
 	if (inode->defrag_compress)
@@ -953,7 +950,7 @@ again:
 				   mapping, start, pages, &nr_pages, &total_in,
 				   &total_compressed);
 	if (ret)
-		goto cont;
+		goto cleanup_and_bail_uncompressed;
 
 	/*
 	 * Zero the tail end of the last page, as we might be sending it down
@@ -962,24 +959,22 @@ again:
 	poff = offset_in_page(total_compressed);
 	if (poff)
 		memzero_page(pages[nr_pages - 1], poff, PAGE_SIZE - poff);
-	will_compress = 1;
 
-cont:
 	/*
+	 * Try to create an inline extent.
+	 *
+	 * If we didn't compress the entire range, try to create an uncompressed
+	 * inline extent, else a compressed one.
+	 *
 	 * Check cow_file_range() for why we don't even try to create inline
 	 * extent for the subpage case.
 	 */
 	if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
-		/* lets try to make an inline extent */
-		if (ret || total_in < actual_end) {
-			/* we didn't compress the entire range, try
-			 * to make an uncompressed inline extent.
-			 */
-			ret = cow_file_range_inline(inode, actual_end,
-						    0, BTRFS_COMPRESS_NONE,
-						    NULL, false);
+		if (total_in < actual_end) {
+			ret = cow_file_range_inline(inode, actual_end, 0,
+						    BTRFS_COMPRESS_NONE, NULL,
+						    false);
 		} else {
-			/* try making a compressed inline extent */
 			ret = cow_file_range_inline(inode, actual_end,
 						    total_compressed,
 						    compress_type, pages,
@@ -1009,26 +1004,15 @@ cont:
 						     PAGE_UNLOCK |
 						     PAGE_START_WRITEBACK |
 						     PAGE_END_WRITEBACK);
-
-			/*
-			 * Ensure we only free the compressed pages if we have
-			 * them allocated, as we can still reach here with
-			 * inode_need_compress() == false.
-			 */
-			if (pages) {
-				for (i = 0; i < nr_pages; i++) {
-					WARN_ON(pages[i]->mapping);
-					put_page(pages[i]);
-				}
-				kfree(pages);
+			for (i = 0; i < nr_pages; i++) {
+				WARN_ON(pages[i]->mapping);
+				put_page(pages[i]);
 			}
+			kfree(pages);
 			return;
 		}
 	}
 
-	if (!will_compress)
-		goto cleanup_and_bail_uncompressed;
-
 	/*
 	 * We aren't doing an inline extent. Round the compressed size up to a
 	 * block size boundary so the allocator does sane things.

From 184aa1ffa5fdf77853488c7d7fddb113cf2e33fe Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:38 +0200
Subject: [PATCH 1487/2047] btrfs: use a separate label for the incompressible
 case in compress_file_range

compress_file_range can fail to compress either because of resource or
alignment constraints or because the data is incompressible.  In the latter
case the inode is marked so that compression isn't tried again.  Currently
that check is based on the condition that the pages array has been allocated
which is rather cryptic.  Use a separate label to clearly distinguish this
case.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8307d2a3083a..db0dc223c4c6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -950,7 +950,7 @@ again:
 				   mapping, start, pages, &nr_pages, &total_in,
 				   &total_compressed);
 	if (ret)
-		goto cleanup_and_bail_uncompressed;
+		goto mark_incompressible;
 
 	/*
 	 * Zero the tail end of the last page, as we might be sending it down
@@ -1026,7 +1026,7 @@ again:
 	 */
 	total_in = round_up(total_in, fs_info->sectorsize);
 	if (total_compressed + blocksize > total_in)
-		goto cleanup_and_bail_uncompressed;
+		goto mark_incompressible;
 
 	/*
 	 * The async work queues will take care of doing actual allocation on
@@ -1041,6 +1041,9 @@ again:
 	}
 	return;
 
+mark_incompressible:
+	if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) && !inode->prop_compress)
+		inode->flags |= BTRFS_INODE_NOCOMPRESS;
 cleanup_and_bail_uncompressed:
 	if (pages) {
 		/*
@@ -1055,12 +1058,6 @@ cleanup_and_bail_uncompressed:
 		pages = NULL;
 		total_compressed = 0;
 		nr_pages = 0;
-
-		/* flag the file so we don't compress in the future */
-		if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
-		    !(inode->prop_compress)) {
-			inode->flags |= BTRFS_INODE_NOCOMPRESS;
-		}
 	}
 
 	/*

From f778b6b8e0136c0da9ffcf560c8cd8619b1cabf4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:39 +0200
Subject: [PATCH 1488/2047] btrfs: share the code to free the page array in
 compress_file_range

compress_file_range has two code blocks to free the page array for the
compressed data.  Share the code using a goto label.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index db0dc223c4c6..cda511d76084 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1004,12 +1004,7 @@ again:
 						     PAGE_UNLOCK |
 						     PAGE_START_WRITEBACK |
 						     PAGE_END_WRITEBACK);
-			for (i = 0; i < nr_pages; i++) {
-				WARN_ON(pages[i]->mapping);
-				put_page(pages[i]);
-			}
-			kfree(pages);
-			return;
+			goto free_pages;
 		}
 	}
 
@@ -1045,21 +1040,6 @@ mark_incompressible:
 	if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) && !inode->prop_compress)
 		inode->flags |= BTRFS_INODE_NOCOMPRESS;
 cleanup_and_bail_uncompressed:
-	if (pages) {
-		/*
-		 * the compression code ran but failed to make things smaller,
-		 * free any pages it allocated and our page pointer array
-		 */
-		for (i = 0; i < nr_pages; i++) {
-			WARN_ON(pages[i]->mapping);
-			put_page(pages[i]);
-		}
-		kfree(pages);
-		pages = NULL;
-		total_compressed = 0;
-		nr_pages = 0;
-	}
-
 	/*
 	 * No compression, but we still need to write the pages in the file
 	 * we've been given so far.  redirty the locked page if it corresponds
@@ -1077,6 +1057,14 @@ cleanup_and_bail_uncompressed:
 		extent_range_redirty_for_io(&inode->vfs_inode, start, end);
 	add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
 			 BTRFS_COMPRESS_NONE);
+free_pages:
+	if (pages) {
+		for (i = 0; i < nr_pages; i++) {
+			WARN_ON(pages[i]->mapping);
+			put_page(pages[i]);
+		}
+		kfree(pages);
+	}
 }
 
 static void free_async_extent_pages(struct async_extent *async_extent)

From 44962ca37c8cf882ba72e4cd67f30eaecb920a52 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:40 +0200
Subject: [PATCH 1489/2047] btrfs: don't redirty pages in compress_file_range

compress_file_range needs to clear the dirty bit before handing off work
to the compression worker threads to prevent processes coming in through
mmap and changing the file contents while the compression is accessing
the data (See commit 4adaa611020f ("Btrfs: fix race between mmap writes
and compression").

But when compress_file_range decides to not compress the data, it falls
back to submit_uncompressed_range which uses extent_write_locked_range
to write the uncompressed data.  extent_write_locked_range currently
expects all pages to be marked dirty so that it can clear the dirty
bit itself, and thus compress_file_range has to redirty the page range.

Redirtying the page range is rather inefficient and also pointless,
so instead pass a pages_dirty parameter to extent_write_locked_range
and skip the redirty game entirely.

Note that compress_file_range was even redirtying the locked_page twice
given that extent_range_clear_dirty_for_io already redirties all pages
in the range, which must include locked_page if there is one.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 29 +++++------------------------
 fs/btrfs/extent_io.h |  3 +--
 fs/btrfs/inode.c     | 43 +++++++++----------------------------------
 3 files changed, 15 insertions(+), 60 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 08fb26d58172..bcf8244d8e1a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -181,22 +181,6 @@ void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
 	}
 }
 
-void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
-{
-	struct address_space *mapping = inode->i_mapping;
-	unsigned long index = start >> PAGE_SHIFT;
-	unsigned long end_index = end >> PAGE_SHIFT;
-	struct folio *folio;
-
-	while (index <= end_index) {
-		folio = filemap_get_folio(mapping, index);
-		filemap_dirty_folio(mapping, folio);
-		folio_account_redirty(folio);
-		index += folio_nr_pages(folio);
-		folio_put(folio);
-	}
-}
-
 static void process_one_page(struct btrfs_fs_info *fs_info,
 			     struct page *page, struct page *locked_page,
 			     unsigned long page_ops, u64 start, u64 end)
@@ -2178,7 +2162,7 @@ retry:
  * locked.
  */
 void extent_write_locked_range(struct inode *inode, u64 start, u64 end,
-			       struct writeback_control *wbc)
+			       struct writeback_control *wbc, bool pages_dirty)
 {
 	bool found_error = false;
 	int ret = 0;
@@ -2204,14 +2188,11 @@ void extent_write_locked_range(struct inode *inode, u64 start, u64 end,
 		int nr = 0;
 
 		page = find_get_page(mapping, cur >> PAGE_SHIFT);
-		/*
-		 * All pages in the range are locked since
-		 * btrfs_run_delalloc_range(), thus there is no way to clear
-		 * the page dirty flag.
-		 */
 		ASSERT(PageLocked(page));
-		ASSERT(PageDirty(page));
-		clear_page_dirty_for_io(page);
+		if (pages_dirty) {
+			ASSERT(PageDirty(page));
+			clear_page_dirty_for_io(page);
+		}
 
 		ret = __extent_writepage_io(BTRFS_I(inode), page, &bio_ctrl,
 					    i_size, &nr);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0312022bbf4b..2678906e87c5 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -178,7 +178,7 @@ int try_release_extent_buffer(struct page *page);
 
 int btrfs_read_folio(struct file *file, struct folio *folio);
 void extent_write_locked_range(struct inode *inode, u64 start, u64 end,
-			       struct writeback_control *wbc);
+			       struct writeback_control *wbc, bool pages_dirty);
 int extent_writepages(struct address_space *mapping,
 		      struct writeback_control *wbc);
 int btree_write_cache_pages(struct address_space *mapping,
@@ -265,7 +265,6 @@ void set_extent_buffer_dirty(struct extent_buffer *eb);
 void set_extent_buffer_uptodate(struct extent_buffer *eb);
 void clear_extent_buffer_uptodate(struct extent_buffer *eb);
 void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
-void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
 void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
 				  struct page *locked_page,
 				  u32 bits_to_clear, unsigned long page_ops);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cda511d76084..7447f3b44cd2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -846,10 +846,16 @@ static void compress_file_range(struct btrfs_work *work)
 	unsigned int poff;
 	int i;
 	int compress_type = fs_info->compress_type;
-	int redirty = 0;
 
 	inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
 
+	/*
+	 * We need to call clear_page_dirty_for_io on each page in the range.
+	 * Otherwise applications with the file mmap'd can wander in and change
+	 * the page contents while we are compressing them.
+	 */
+	extent_range_clear_dirty_for_io(&inode->vfs_inode, start, end);
+
 	/*
 	 * We need to save i_size before now because it could change in between
 	 * us evaluating the size and assigning it.  This is because we lock and
@@ -929,22 +935,6 @@ again:
 	else if (inode->prop_compress)
 		compress_type = inode->prop_compress;
 
-	/*
-	 * We need to call clear_page_dirty_for_io on each page in the range.
-	 * Otherwise applications with the file mmap'd can wander in and change
-	 * the page contents while we are compressing them.
-	 *
-	 * If the compression fails for any reason, we set the pages dirty again
-	 * later on.
-	 *
-	 * Note that the remaining part is redirtied, the start pointer has
-	 * moved, the end is the original one.
-	 */
-	if (!redirty) {
-		extent_range_clear_dirty_for_io(&inode->vfs_inode, start, end);
-		redirty = 1;
-	}
-
 	/* Compression level is applied here. */
 	ret = btrfs_compress_pages(compress_type | (fs_info->compress_level << 4),
 				   mapping, start, pages, &nr_pages, &total_in,
@@ -1040,21 +1030,6 @@ mark_incompressible:
 	if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) && !inode->prop_compress)
 		inode->flags |= BTRFS_INODE_NOCOMPRESS;
 cleanup_and_bail_uncompressed:
-	/*
-	 * No compression, but we still need to write the pages in the file
-	 * we've been given so far.  redirty the locked page if it corresponds
-	 * to our extent and set things up for the async work queue to run
-	 * cow_file_range to do the normal delalloc dance.
-	 */
-	if (async_chunk->locked_page &&
-	    (page_offset(async_chunk->locked_page) >= start &&
-	     page_offset(async_chunk->locked_page)) <= end) {
-		__set_page_dirty_nobuffers(async_chunk->locked_page);
-		/* unlocked later on in the async handlers */
-	}
-
-	if (redirty)
-		extent_range_redirty_for_io(&inode->vfs_inode, start, end);
 	add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
 			 BTRFS_COMPRESS_NONE);
 free_pages:
@@ -1130,7 +1105,7 @@ static void submit_uncompressed_range(struct btrfs_inode *inode,
 
 	/* All pages will be unlocked, including @locked_page */
 	wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
-	extent_write_locked_range(&inode->vfs_inode, start, end, &wbc);
+	extent_write_locked_range(&inode->vfs_inode, start, end, &wbc, false);
 	wbc_detach_inode(&wbc);
 }
 
@@ -1754,7 +1729,7 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
 		}
 		locked_page_done = true;
 		extent_write_locked_range(&inode->vfs_inode, start, done_offset,
-					  wbc);
+					  wbc, true);
 		start = done_offset + 1;
 	}
 

From 6e144bf16ba07dff649e6bd6afd79d2e353f6216 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:41 +0200
Subject: [PATCH 1490/2047] btrfs: refactor the zoned device handling in
 cow_file_range

Handling of the done_offset to cow_file_range is a bit confusing, as
it is not updated at all when the function succeeds, and the -EAGAIN
status is used bother for the case where we need to wait for a zone
finish and the one where the allocation was partially successful.

Change the calling convention so that done_offset is always updated,
and 0 is returned if some allocation was successful (partial allocation
can still only happen for zoned devices), and waiting for a zone
finish is done internally in cow_file_range instead of the caller.

Also write a comment explaining the logic.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 58 ++++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 27 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7447f3b44cd2..0617e47fb9e3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1363,7 +1363,8 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 			 * compressed extent.
 			 */
 			unlock_page(locked_page);
-			return 1;
+			ret = 1;
+			goto done;
 		} else if (ret < 0) {
 			goto out_unlock;
 		}
@@ -1394,6 +1395,31 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 		ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
 					   min_alloc_size, 0, alloc_hint,
 					   &ins, 1, 1);
+		if (ret == -EAGAIN) {
+			/*
+			 * btrfs_reserve_extent only returns -EAGAIN for zoned
+			 * file systems, which is an indication that there are
+			 * no active zones to allocate from at the moment.
+			 *
+			 * If this is the first loop iteration, wait for at
+			 * least one zone to finish before retrying the
+			 * allocation.  Otherwise ask the caller to write out
+			 * the already allocated blocks before coming back to
+			 * us, or return -ENOSPC if it can't handle retries.
+			 */
+			ASSERT(btrfs_is_zoned(fs_info));
+			if (start == orig_start) {
+				wait_on_bit_io(&inode->root->fs_info->flags,
+					       BTRFS_FS_NEED_ZONE_FINISH,
+					       TASK_UNINTERRUPTIBLE);
+				continue;
+			}
+			if (done_offset) {
+				*done_offset = start - 1;
+				return 0;
+			}
+			ret = -ENOSPC;
+		}
 		if (ret < 0)
 			goto out_unlock;
 		cur_alloc_size = ins.offset;
@@ -1477,6 +1503,9 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 		if (ret)
 			goto out_unlock;
 	}
+done:
+	if (done_offset)
+		*done_offset = end;
 	return ret;
 
 out_drop_extent_cache:
@@ -1485,21 +1514,6 @@ out_reserve:
 	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
 out_unlock:
-	/*
-	 * If done_offset is non-NULL and ret == -EAGAIN, we expect the
-	 * caller to write out the successfully allocated region and retry.
-	 */
-	if (done_offset && ret == -EAGAIN) {
-		if (orig_start < start)
-			*done_offset = start - 1;
-		else
-			*done_offset = start;
-		return ret;
-	} else if (ret == -EAGAIN) {
-		/* Convert to -ENOSPC since the caller cannot retry. */
-		ret = -ENOSPC;
-	}
-
 	/*
 	 * Now, we have three regions to clean up:
 	 *
@@ -1710,19 +1724,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
 	while (start <= end) {
 		ret = cow_file_range(inode, locked_page, start, end, &done_offset,
 				     true, false);
-		if (ret && ret != -EAGAIN)
+		if (ret)
 			return ret;
 
-		if (ret == 0)
-			done_offset = end;
-
-		if (done_offset == start) {
-			wait_on_bit_io(&inode->root->fs_info->flags,
-				       BTRFS_FS_NEED_ZONE_FINISH,
-				       TASK_UNINTERRUPTIBLE);
-			continue;
-		}
-
 		if (!locked_page_done) {
 			__set_page_dirty_nobuffers(locked_page);
 			account_page_redirty(locked_page);

From 778b878543f05ddf11843cd44ba9b1e775216fc0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:42 +0200
Subject: [PATCH 1491/2047] btrfs: don't redirty locked_page in
 run_delalloc_zoned

extent_write_locked_range currently expects that either all or no
pages are dirty when it is called.  Bur run_delalloc_zoned is called
directly in the writepages path, and has the dirty bit cleared only
for locked_page and which the extent_write_cache_pages currently
operates.  It currently works around this by redirtying locked_page,
but that is a bit inefficient and cumbersome.  Pass a locked_page
argument to run_delalloc_zoned so that clearing the dirty bit can
be skipped on just that page.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c |  7 ++++---
 fs/btrfs/extent_io.h |  5 +++--
 fs/btrfs/inode.c     | 13 ++++---------
 3 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index bcf8244d8e1a..90a58a38a9f3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2161,8 +2161,9 @@ retry:
  * already been ran (aka, ordered extent inserted) and all pages are still
  * locked.
  */
-void extent_write_locked_range(struct inode *inode, u64 start, u64 end,
-			       struct writeback_control *wbc, bool pages_dirty)
+void extent_write_locked_range(struct inode *inode, struct page *locked_page,
+			       u64 start, u64 end, struct writeback_control *wbc,
+			       bool pages_dirty)
 {
 	bool found_error = false;
 	int ret = 0;
@@ -2189,7 +2190,7 @@ void extent_write_locked_range(struct inode *inode, u64 start, u64 end,
 
 		page = find_get_page(mapping, cur >> PAGE_SHIFT);
 		ASSERT(PageLocked(page));
-		if (pages_dirty) {
+		if (pages_dirty && page != locked_page) {
 			ASSERT(PageDirty(page));
 			clear_page_dirty_for_io(page);
 		}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 2678906e87c5..c01f9c5ddc13 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -177,8 +177,9 @@ int try_release_extent_mapping(struct page *page, gfp_t mask);
 int try_release_extent_buffer(struct page *page);
 
 int btrfs_read_folio(struct file *file, struct folio *folio);
-void extent_write_locked_range(struct inode *inode, u64 start, u64 end,
-			       struct writeback_control *wbc, bool pages_dirty);
+void extent_write_locked_range(struct inode *inode, struct page *locked_page,
+			       u64 start, u64 end, struct writeback_control *wbc,
+			       bool pages_dirty);
 int extent_writepages(struct address_space *mapping,
 		      struct writeback_control *wbc);
 int btree_write_cache_pages(struct address_space *mapping,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0617e47fb9e3..29a137d55cf2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1105,7 +1105,8 @@ static void submit_uncompressed_range(struct btrfs_inode *inode,
 
 	/* All pages will be unlocked, including @locked_page */
 	wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
-	extent_write_locked_range(&inode->vfs_inode, start, end, &wbc, false);
+	extent_write_locked_range(&inode->vfs_inode, NULL, start, end, &wbc,
+				  false);
 	wbc_detach_inode(&wbc);
 }
 
@@ -1719,7 +1720,6 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
 {
 	u64 done_offset = end;
 	int ret;
-	bool locked_page_done = false;
 
 	while (start <= end) {
 		ret = cow_file_range(inode, locked_page, start, end, &done_offset,
@@ -1727,13 +1727,8 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
 		if (ret)
 			return ret;
 
-		if (!locked_page_done) {
-			__set_page_dirty_nobuffers(locked_page);
-			account_page_redirty(locked_page);
-		}
-		locked_page_done = true;
-		extent_write_locked_range(&inode->vfs_inode, start, done_offset,
-					  wbc, true);
+		extent_write_locked_range(&inode->vfs_inode, locked_page, start,
+					  done_offset, wbc, true);
 		start = done_offset + 1;
 	}
 

From 256b0cf90d2a052f546bc118784574298c5818f2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:43 +0200
Subject: [PATCH 1492/2047] btrfs: fix zoned handling in
 submit_uncompressed_range

For zoned file systems we need to use run_delalloc_zoned to submit
writeback, as we need to write out partial allocations when running into
zone active limits.

submit_uncompressed_range currently always calls cow_file_range to
allocate blocks and thus misses the active zone limits handling.  Fix
this by passing the pages_dirty argument to run_delalloc_zoned and always
using it from submit_uncompressed_range as it does the right thing for
zoned and non-zoned file systems.

To account for the fact that run_delalloc_zoned is now also used for
non-zoned file systems rename it to run_delalloc_cow, and add comment
describing it.

Fixes: 42c011000963 ("btrfs: zoned: introduce dedicated data write path for zoned filesystems")
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 47 ++++++++++++++++++-----------------------------
 1 file changed, 18 insertions(+), 29 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 29a137d55cf2..19dc0b8ff427 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -125,10 +125,10 @@ static struct kmem_cache *btrfs_inode_cachep;
 static int btrfs_setsize(struct inode *inode, struct iattr *attr);
 static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback);
 
-static noinline int cow_file_range(struct btrfs_inode *inode,
-				   struct page *locked_page,
-				   u64 start, u64 end, u64 *done_offset,
-				   bool keep_locked, bool no_inline);
+static noinline int run_delalloc_cow(struct btrfs_inode *inode,
+				     struct page *locked_page, u64 start,
+				     u64 end, struct writeback_control *wbc,
+				     bool pages_dirty);
 static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
 				       u64 len, u64 orig_start, u64 block_start,
 				       u64 block_len, u64 orig_block_len,
@@ -1072,18 +1072,9 @@ static void submit_uncompressed_range(struct btrfs_inode *inode,
 		.no_cgroup_owner	= 1,
 	};
 
-	/*
-	 * Call cow_file_range() to run the delalloc range directly, since we
-	 * won't go to NOCOW or async path again.
-	 *
-	 * Also we call cow_file_range() with @unlock_page == 0, so that we
-	 * can directly submit them without interruption.
-	 */
-	ret = cow_file_range(inode, locked_page, start, end, NULL, true, false);
-	/* Inline extent inserted, page gets unlocked and everything is done */
-	if (ret == 1)
-		return;
-
+	wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
+	ret = run_delalloc_cow(inode, locked_page, start, end, &wbc, false);
+	wbc_detach_inode(&wbc);
 	if (ret < 0) {
 		btrfs_cleanup_ordered_extents(inode, locked_page, start, end - start + 1);
 		if (locked_page) {
@@ -1100,14 +1091,7 @@ static void submit_uncompressed_range(struct btrfs_inode *inode,
 			mapping_set_error(locked_page->mapping, ret);
 			unlock_page(locked_page);
 		}
-		return;
 	}
-
-	/* All pages will be unlocked, including @locked_page */
-	wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
-	extent_write_locked_range(&inode->vfs_inode, NULL, start, end, &wbc,
-				  false);
-	wbc_detach_inode(&wbc);
 }
 
 static void submit_one_async_extent(struct async_chunk *async_chunk,
@@ -1714,9 +1698,14 @@ static bool run_delalloc_compressed(struct btrfs_inode *inode,
 	return true;
 }
 
-static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
-				       struct page *locked_page, u64 start,
-				       u64 end, struct writeback_control *wbc)
+/*
+ * Run the delalloc range from start to end, and write back any dirty pages
+ * covered by the range.
+ */
+static noinline int run_delalloc_cow(struct btrfs_inode *inode,
+				     struct page *locked_page, u64 start,
+				     u64 end, struct writeback_control *wbc,
+				     bool pages_dirty)
 {
 	u64 done_offset = end;
 	int ret;
@@ -1726,9 +1715,8 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
 				     true, false);
 		if (ret)
 			return ret;
-
 		extent_write_locked_range(&inode->vfs_inode, locked_page, start,
-					  done_offset, wbc, true);
+					  done_offset, wbc, pages_dirty);
 		start = done_offset + 1;
 	}
 
@@ -2293,7 +2281,8 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
 		return 1;
 
 	if (zoned)
-		ret = run_delalloc_zoned(inode, locked_page, start, end, wbc);
+		ret = run_delalloc_cow(inode, locked_page, start, end, wbc,
+				       true);
 	else
 		ret = cow_file_range(inode, locked_page, start, end, NULL,
 				     false, false);

From ed2da9246f324ae88a2dcae629fc2008632ff151 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jun 2023 17:31:44 +0200
Subject: [PATCH 1493/2047] mm: remove folio_account_redirty

Fold folio_account_redirty into folio_redirty_for_writepage now
that all other users except for the also unused account_page_redirty
wrapper are gone.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/linux/writeback.h |  5 ----
 mm/page-writeback.c       | 49 +++++++++++----------------------------
 2 files changed, 14 insertions(+), 40 deletions(-)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index fba937999fbf..083387c00f0c 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -375,11 +375,6 @@ void tag_pages_for_writeback(struct address_space *mapping,
 			     pgoff_t start, pgoff_t end);
 
 bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
-void folio_account_redirty(struct folio *folio);
-static inline void account_page_redirty(struct page *page)
-{
-	folio_account_redirty(page_folio(page));
-}
 bool folio_redirty_for_writepage(struct writeback_control *, struct folio *);
 bool redirty_page_for_writepage(struct writeback_control *, struct page *);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d3f42009bb70..b8d3d7040a50 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1193,7 +1193,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb,
 	 * write_bandwidth = ---------------------------------------------------
 	 *                                          period
 	 *
-	 * @written may have decreased due to folio_account_redirty().
+	 * @written may have decreased due to folio_redirty_for_writepage().
 	 * Avoid underflowing @bw calculation.
 	 */
 	bw = written - min(written, wb->written_stamp);
@@ -2711,37 +2711,6 @@ bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio)
 }
 EXPORT_SYMBOL(filemap_dirty_folio);
 
-/**
- * folio_account_redirty - Manually account for redirtying a page.
- * @folio: The folio which is being redirtied.
- *
- * Most filesystems should call folio_redirty_for_writepage() instead
- * of this fuction.  If your filesystem is doing writeback outside the
- * context of a writeback_control(), it can call this when redirtying
- * a folio, to de-account the dirty counters (NR_DIRTIED, WB_DIRTIED,
- * tsk->nr_dirtied), so that they match the written counters (NR_WRITTEN,
- * WB_WRITTEN) in long term. The mismatches will lead to systematic errors
- * in balanced_dirty_ratelimit and the dirty pages position control.
- */
-void folio_account_redirty(struct folio *folio)
-{
-	struct address_space *mapping = folio->mapping;
-
-	if (mapping && mapping_can_writeback(mapping)) {
-		struct inode *inode = mapping->host;
-		struct bdi_writeback *wb;
-		struct wb_lock_cookie cookie = {};
-		long nr = folio_nr_pages(folio);
-
-		wb = unlocked_inode_to_wb_begin(inode, &cookie);
-		current->nr_dirtied -= nr;
-		node_stat_mod_folio(folio, NR_DIRTIED, -nr);
-		wb_stat_mod(wb, WB_DIRTIED, -nr);
-		unlocked_inode_to_wb_end(inode, &cookie);
-	}
-}
-EXPORT_SYMBOL(folio_account_redirty);
-
 /**
  * folio_redirty_for_writepage - Decline to write a dirty folio.
  * @wbc: The writeback control.
@@ -2757,13 +2726,23 @@ EXPORT_SYMBOL(folio_account_redirty);
 bool folio_redirty_for_writepage(struct writeback_control *wbc,
 		struct folio *folio)
 {
-	bool ret;
+	struct address_space *mapping = folio->mapping;
 	long nr = folio_nr_pages(folio);
+	bool ret;
 
 	wbc->pages_skipped += nr;
-	ret = filemap_dirty_folio(folio->mapping, folio);
-	folio_account_redirty(folio);
+	ret = filemap_dirty_folio(mapping, folio);
+	if (mapping && mapping_can_writeback(mapping)) {
+		struct inode *inode = mapping->host;
+		struct bdi_writeback *wb;
+		struct wb_lock_cookie cookie = {};
 
+		wb = unlocked_inode_to_wb_begin(inode, &cookie);
+		current->nr_dirtied -= nr;
+		node_stat_mod_folio(folio, NR_DIRTIED, -nr);
+		wb_stat_mod(wb, WB_DIRTIED, -nr);
+		unlocked_inode_to_wb_end(inode, &cookie);
+	}
 	return ret;
 }
 EXPORT_SYMBOL(folio_redirty_for_writepage);

From 7f9879eb6062754a126a874f784f8fa571e767ee Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Thu, 27 Jul 2023 21:53:03 +0800
Subject: [PATCH 1494/2047] btrfs: print name and pid when device scanning
 processes race

There is a race between systemd and mount, as both of them try to register
the device in the kernel. When systemd loses the race, it prints the
following message:

  BTRFS error: device /dev/sdb7 belongs to fsid 1b3bacbf-14db-49c9-a3ef-547998aacc4e, and the fs is already mounted.

The 'btrfs dev scan' registers one device at a time, so there is no way
for the mount thread to wait in the kernel for all the devices to have
registered as it won't know if all the devices are discovered.

For now, improve the error log by printing the command name and process
ID along with the error message.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8af0e54099c2..89410a480293 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -851,8 +851,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 
 		if (fs_devices->opened) {
 			btrfs_err(NULL,
-		"device %s belongs to fsid %pU, and the fs is already mounted",
-				  path, fs_devices->fsid);
+"device %s belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)",
+				  path, fs_devices->fsid, current->comm,
+				  task_pid_nr(current));
 			mutex_unlock(&fs_devices->device_list_mutex);
 			return ERR_PTR(-EBUSY);
 		}

From b9d97cff256f0efafd00f76b8d10d668f64436e9 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 20 Jul 2023 16:12:15 -0400
Subject: [PATCH 1495/2047] btrfs: move comments to btrfs_loop_type definition

Some of these loop types aren't described, and they should be with the
definitions to make it easier to tell what each of them do.

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d674e421ea75..6dfe086523f9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3329,11 +3329,38 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
 }
 
 enum btrfs_loop_type {
+	/*
+	 * Start caching block groups but do not wait for progress or for them
+	 * to be done.
+	 */
 	LOOP_CACHING_NOWAIT,
+
+	/*
+	 * Wait for the block group free_space >= the space we're waiting for if
+	 * the block group isn't cached.
+	 */
 	LOOP_CACHING_WAIT,
+
+	/*
+	 * Allow allocations to happen from block groups that do not yet have a
+	 * size classification.
+	 */
 	LOOP_UNSET_SIZE_CLASS,
+
+	/*
+	 * Allocate a chunk and then retry the allocation.
+	 */
 	LOOP_ALLOC_CHUNK,
+
+	/*
+	 * Ignore the size class restrictions for this allocation.
+	 */
 	LOOP_WRONG_SIZE_CLASS,
+
+	/*
+	 * Ignore the empty size, only try to allocate the number of bytes
+	 * needed for this allocation.
+	 */
 	LOOP_NO_EMPTY_SIZE,
 };
 
@@ -3927,15 +3954,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
 	if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
 		return 1;
 
-	/*
-	 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
-	 *			caching kthreads as we move along
-	 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
-	 * LOOP_UNSET_SIZE_CLASS, allow unset size class
-	 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
-	 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
-	 *		       again
-	 */
+	/* See the comments for btrfs_loop_type for an explanation of the phases. */
 	if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
 		ffe_ctl->index = 0;
 		/*

From 257deed2a955fd73cc1ef53a96583edeab3ece1a Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Sat, 15 Jul 2023 19:08:27 +0800
Subject: [PATCH 1496/2047] btrfs: tests: enhance extent buffer bitmap tests

Enhance extent bitmap tests for the following aspects:

- Remove unnecessary @len from __test_eb_bitmaps()
  We can fetch the length from extent buffer

- Explicitly distinguish bit and byte length
  Now every start/len inside bitmap tests would have either "byte_" or
  "bit_" prefix to make it more explicit.

- Better error reporting

  If we have mismatch bits, the error report would dump the following
  contents:

  * start bytenr
  * bit number
  * the full byte from bitmap
  * the full byte from the extent

  This is to save developers time so obvious problem can be found
  immediately

- Extract bitmap set/clear and check operation into two helpers
  This is to save some code lines, as we will have more tests to do.

- Add new tests

  The following tests are added, mostly for the incoming extent bitmap
  accessor refactoring:

  * Set bits inside the same byte
  * Clear bits inside the same byte
  * Cross byte boundary set
  * Cross byte boundary clear
  * Cross multi-byte boundary set
  * Cross multi-byte boundary clear

  Those new tests have already saved my backend for the incoming extent
  buffer bitmap refactoring.

Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tests/extent-io-tests.c | 162 +++++++++++++++++++++----------
 1 file changed, 109 insertions(+), 53 deletions(-)

diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index f6bc6d738555..3e625c558b0b 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -319,86 +319,139 @@ out:
 	return ret;
 }
 
-static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb,
-			   unsigned long len)
+static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb)
 {
 	unsigned long i;
 
-	for (i = 0; i < len * BITS_PER_BYTE; i++) {
+	for (i = 0; i < eb->len * BITS_PER_BYTE; i++) {
 		int bit, bit1;
 
 		bit = !!test_bit(i, bitmap);
 		bit1 = !!extent_buffer_test_bit(eb, 0, i);
 		if (bit1 != bit) {
-			test_err("bits do not match");
+			u8 has;
+			u8 expect;
+
+			read_extent_buffer(eb, &has, i / BITS_PER_BYTE, 1);
+			expect = bitmap_get_value8(bitmap, ALIGN(i, BITS_PER_BYTE));
+
+			test_err(
+		"bits do not match, start byte 0 bit %lu, byte %lu has 0x%02x expect 0x%02x",
+				 i, i / BITS_PER_BYTE, has, expect);
 			return -EINVAL;
 		}
 
 		bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
 						i % BITS_PER_BYTE);
 		if (bit1 != bit) {
-			test_err("offset bits do not match");
+			u8 has;
+			u8 expect;
+
+			read_extent_buffer(eb, &has, i / BITS_PER_BYTE, 1);
+			expect = bitmap_get_value8(bitmap, ALIGN(i, BITS_PER_BYTE));
+
+			test_err(
+		"bits do not match, start byte %lu bit %lu, byte %lu has 0x%02x expect 0x%02x",
+				 i / BITS_PER_BYTE, i % BITS_PER_BYTE,
+				 i / BITS_PER_BYTE, has, expect);
 			return -EINVAL;
 		}
 	}
 	return 0;
 }
 
-static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
-			     unsigned long len)
+static int test_bitmap_set(const char *name, unsigned long *bitmap,
+			   struct extent_buffer *eb,
+			   unsigned long byte_start, unsigned long bit_start,
+			   unsigned long bit_len)
+{
+	int ret;
+
+	bitmap_set(bitmap, byte_start * BITS_PER_BYTE + bit_start, bit_len);
+	extent_buffer_bitmap_set(eb, byte_start, bit_start, bit_len);
+	ret = check_eb_bitmap(bitmap, eb);
+	if (ret < 0)
+		test_err("%s test failed", name);
+	return ret;
+}
+
+static int test_bitmap_clear(const char *name, unsigned long *bitmap,
+			     struct extent_buffer *eb,
+			     unsigned long byte_start, unsigned long bit_start,
+			     unsigned long bit_len)
+{
+	int ret;
+
+	bitmap_clear(bitmap, byte_start * BITS_PER_BYTE + bit_start, bit_len);
+	extent_buffer_bitmap_clear(eb, byte_start, bit_start, bit_len);
+	ret = check_eb_bitmap(bitmap, eb);
+	if (ret < 0)
+		test_err("%s test failed", name);
+	return ret;
+}
+static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb)
 {
 	unsigned long i, j;
+	unsigned long byte_len = eb->len;
 	u32 x;
 	int ret;
 
-	memset(bitmap, 0, len);
-	memzero_extent_buffer(eb, 0, len);
-	if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
-		test_err("bitmap was not zeroed");
-		return -EINVAL;
-	}
-
-	bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
-	extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
-	ret = check_eb_bitmap(bitmap, eb, len);
-	if (ret) {
-		test_err("setting all bits failed");
+	ret = test_bitmap_clear("clear all run 1", bitmap, eb, 0, 0,
+				byte_len * BITS_PER_BYTE);
+	if (ret < 0)
 		return ret;
-	}
 
-	bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
-	extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
-	ret = check_eb_bitmap(bitmap, eb, len);
-	if (ret) {
-		test_err("clearing all bits failed");
+	ret = test_bitmap_set("set all", bitmap, eb, 0, 0, byte_len * BITS_PER_BYTE);
+	if (ret < 0)
+		return ret;
+
+	ret = test_bitmap_clear("clear all run 2", bitmap, eb, 0, 0,
+				byte_len * BITS_PER_BYTE);
+	if (ret < 0)
+		return ret;
+
+	ret = test_bitmap_set("same byte set", bitmap, eb, 0, 2, 4);
+	if (ret < 0)
+		return ret;
+
+	ret = test_bitmap_clear("same byte partial clear", bitmap, eb, 0, 4, 1);
+	if (ret < 0)
+		return ret;
+
+	ret = test_bitmap_set("cross byte set", bitmap, eb, 2, 4, 8);
+	if (ret < 0)
+		return ret;
+
+	ret = test_bitmap_set("cross multi byte set", bitmap, eb, 4, 4, 24);
+	if (ret < 0)
+		return ret;
+
+	ret = test_bitmap_clear("cross byte clear", bitmap, eb, 2, 6, 4);
+	if (ret < 0)
+		return ret;
+
+	ret = test_bitmap_clear("cross multi byte clear", bitmap, eb, 4, 6, 20);
+	if (ret < 0)
 		return ret;
-	}
 
 	/* Straddling pages test */
-	if (len > PAGE_SIZE) {
-		bitmap_set(bitmap,
-			(PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
-			sizeof(long) * BITS_PER_BYTE);
-		extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
-					sizeof(long) * BITS_PER_BYTE);
-		ret = check_eb_bitmap(bitmap, eb, len);
-		if (ret) {
-			test_err("setting straddling pages failed");
+	if (byte_len > PAGE_SIZE) {
+		ret = test_bitmap_set("cross page set", bitmap, eb,
+				      PAGE_SIZE - sizeof(long) / 2, 0,
+				      sizeof(long) * BITS_PER_BYTE);
+		if (ret < 0)
 			return ret;
-		}
 
-		bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
-		bitmap_clear(bitmap,
-			(PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
-			sizeof(long) * BITS_PER_BYTE);
-		extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
-		extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
-					sizeof(long) * BITS_PER_BYTE);
-		ret = check_eb_bitmap(bitmap, eb, len);
-		if (ret) {
-			test_err("clearing straddling pages failed");
+		ret = test_bitmap_set("cross page set all", bitmap, eb, 0, 0,
+				      byte_len * BITS_PER_BYTE);
+		if (ret < 0)
+			return ret;
+
+		ret = test_bitmap_clear("cross page clear", bitmap, eb,
+					PAGE_SIZE - sizeof(long) / 2, 0,
+					sizeof(long) * BITS_PER_BYTE);
+		if (ret < 0)
 			return ret;
-		}
 	}
 
 	/*
@@ -406,9 +459,12 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 	 * something repetitive that could miss some hypothetical off-by-n bug.
 	 */
 	x = 0;
-	bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
-	extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
-	for (i = 0; i < len * BITS_PER_BYTE / 32; i++) {
+	ret = test_bitmap_clear("clear all run 3", bitmap, eb, 0, 0,
+				byte_len * BITS_PER_BYTE);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < byte_len * BITS_PER_BYTE / 32; i++) {
 		x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffU;
 		for (j = 0; j < 32; j++) {
 			if (x & (1U << j)) {
@@ -418,7 +474,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 		}
 	}
 
-	ret = check_eb_bitmap(bitmap, eb, len);
+	ret = check_eb_bitmap(bitmap, eb);
 	if (ret) {
 		test_err("random bit pattern failed");
 		return ret;
@@ -456,7 +512,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
 		goto out;
 	}
 
-	ret = __test_eb_bitmaps(bitmap, eb, nodesize);
+	ret = __test_eb_bitmaps(bitmap, eb);
 	if (ret)
 		goto out;
 
@@ -473,7 +529,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
 		goto out;
 	}
 
-	ret = __test_eb_bitmaps(bitmap, eb, nodesize);
+	ret = __test_eb_bitmaps(bitmap, eb);
 out:
 	free_extent_buffer(eb);
 	kfree(bitmap);

From 5864f1da6b165fc4735f02140b04fbfcb6ff27bf Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Sat, 15 Jul 2023 19:08:28 +0800
Subject: [PATCH 1497/2047] btrfs: tests: add self tests for extent buffer
 memory operations

The new self tests would populate a memory range with random bytes, then
copy it to the extent buffer, so that we can verify if the extent buffer
memory operation and memmove()/memcopy() are resulting the same
contents.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tests/extent-io-tests.c | 144 +++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)

diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 3e625c558b0b..1cc86af97dc6 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -648,6 +648,146 @@ out:
 	return ret;
 }
 
+static void dump_eb_and_memory_contents(struct extent_buffer *eb, void *memory,
+					const char *test_name)
+{
+	for (int i = 0; i < eb->len; i++) {
+		struct page *page = eb->pages[i >> PAGE_SHIFT];
+		void *addr = page_address(page) + offset_in_page(i);
+
+		if (memcmp(addr, memory + i, 1) != 0) {
+			test_err("%s failed", test_name);
+			test_err("eb and memory diffs at byte %u, eb has 0x%02x memory has 0x%02x",
+				 i, *(u8 *)addr, *(u8 *)(memory + i));
+			return;
+		}
+	}
+}
+
+static int verify_eb_and_memory(struct extent_buffer *eb, void *memory,
+				const char *test_name)
+{
+	for (int i = 0; i < (eb->len >> PAGE_SHIFT); i++) {
+		void *eb_addr = page_address(eb->pages[i]);
+
+		if (memcmp(memory + (i << PAGE_SHIFT), eb_addr, PAGE_SIZE) != 0) {
+			dump_eb_and_memory_contents(eb, memory, test_name);
+			return -EUCLEAN;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Init both memory and extent buffer contents to the same randomly generated
+ * contents.
+ */
+static void init_eb_and_memory(struct extent_buffer *eb, void *memory)
+{
+	get_random_bytes(memory, eb->len);
+	write_extent_buffer(eb, memory, 0, eb->len);
+}
+
+static int test_eb_mem_ops(u32 sectorsize, u32 nodesize)
+{
+	struct btrfs_fs_info *fs_info;
+	struct extent_buffer *eb = NULL;
+	void *memory = NULL;
+	int ret;
+
+	test_msg("running extent buffer memory operation tests");
+
+	fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
+	if (!fs_info) {
+		test_std_err(TEST_ALLOC_FS_INFO);
+		return -ENOMEM;
+	}
+
+	memory = kvzalloc(nodesize, GFP_KERNEL);
+	if (!memory) {
+		test_err("failed to allocate memory");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	eb = __alloc_dummy_extent_buffer(fs_info, SZ_1M, nodesize);
+	if (!eb) {
+		test_std_err(TEST_ALLOC_EXTENT_BUFFER);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_eb_and_memory(eb, memory);
+	ret = verify_eb_and_memory(eb, memory, "full eb write");
+	if (ret < 0)
+		goto out;
+
+	memcpy(memory, memory + 16, 16);
+	memcpy_extent_buffer(eb, 0, 16, 16);
+	ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 1");
+	if (ret < 0)
+		goto out;
+
+	memcpy(memory, memory + 2048, 16);
+	memcpy_extent_buffer(eb, 0, 2048, 16);
+	ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 2");
+	if (ret < 0)
+		goto out;
+	memcpy(memory, memory + 2048, 2048);
+	memcpy_extent_buffer(eb, 0, 2048, 2048);
+	ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 3");
+	if (ret < 0)
+		goto out;
+
+	memmove(memory + 512, memory + 256, 512);
+	memmove_extent_buffer(eb, 512, 256, 512);
+	ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 1");
+	if (ret < 0)
+		goto out;
+
+	memmove(memory + 2048, memory + 512, 2048);
+	memmove_extent_buffer(eb, 2048, 512, 2048);
+	ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 2");
+	if (ret < 0)
+		goto out;
+	memmove(memory + 512, memory + 2048, 2048);
+	memmove_extent_buffer(eb, 512, 2048, 2048);
+	ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 3");
+	if (ret < 0)
+		goto out;
+
+	if (nodesize > PAGE_SIZE) {
+		memcpy(memory, memory + 4096 - 128, 256);
+		memcpy_extent_buffer(eb, 0, 4096 - 128, 256);
+		ret = verify_eb_and_memory(eb, memory, "cross page non-overlapping memcpy 1");
+		if (ret < 0)
+			goto out;
+
+		memcpy(memory + 4096 - 128, memory + 4096 + 128, 256);
+		memcpy_extent_buffer(eb, 4096 - 128, 4096 + 128, 256);
+		ret = verify_eb_and_memory(eb, memory, "cross page non-overlapping memcpy 2");
+		if (ret < 0)
+			goto out;
+
+		memmove(memory + 4096 - 128, memory + 4096 - 64, 256);
+		memmove_extent_buffer(eb, 4096 - 128, 4096 - 64, 256);
+		ret = verify_eb_and_memory(eb, memory, "cross page overlapping memcpy 1");
+		if (ret < 0)
+			goto out;
+
+		memmove(memory + 4096 - 64, memory + 4096 - 128, 256);
+		memmove_extent_buffer(eb, 4096 - 64, 4096 - 128, 256);
+		ret = verify_eb_and_memory(eb, memory, "cross page overlapping memcpy 2");
+		if (ret < 0)
+			goto out;
+	}
+out:
+	free_extent_buffer(eb);
+	kvfree(memory);
+	btrfs_free_dummy_fs_info(fs_info);
+	return ret;
+}
+
 int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
 {
 	int ret;
@@ -663,6 +803,10 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
 		goto out;
 
 	ret = test_eb_bitmaps(sectorsize, nodesize);
+	if (ret)
+		goto out;
+
+	ret = test_eb_mem_ops(sectorsize, nodesize);
 out:
 	return ret;
 }

From cb22964f1dad228fc2a1e5aba50c16b4b5680cd5 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Sat, 15 Jul 2023 19:08:29 +0800
Subject: [PATCH 1498/2047] btrfs: refactor extent buffer bitmaps operations

[BACKGROUND]
Currently we handle extent bitmaps manually in
extent_buffer_bitmap_set() and extent_buffer_bitmap_clear().

Although with various helpers like eb_bitmap_offset() it's still a little
messy to read.  The code seems to be a copy of bitmap_set(), but with
all the cross-page handling embedded into the code.

[ENHANCEMENT]
This patch would enhance the readability by introducing two helpers:

- memset_extent_buffer()
  To handle the byte aligned range, thus all the cross-page handling is
  done there.

- extent_buffer_get_byte()
  This for the first and the last byte operations, which only need to
  grab one byte, thus no need for any cross-page handling.

So we can split both extent_buffer_bitmap_set() and
extent_buffer_bitmap_clear() into 3 parts:

- Handle the first byte
  If the range fits inside the first byte, we can exit early.

- Handle the byte aligned part
  This is the part which can have cross-page operations, and it would
  be handled by memset_extent_buffer().

- Handle the last byte

This refactoring does not only make the code a little easier to read,
but also makes later folio/page switch much easier, as the switch only
needs to be done inside memset_extent_buffer() and extent_buffer_get_byte().

Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 141 ++++++++++++++++++++-----------------------
 1 file changed, 67 insertions(+), 74 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 90a58a38a9f3..3e9f39bda9f2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4183,32 +4183,30 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
 	}
 }
 
-void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
-		unsigned long len)
+static void memset_extent_buffer(const struct extent_buffer *eb, int c,
+				 unsigned long start, unsigned long len)
 {
-	size_t cur;
-	size_t offset;
-	struct page *page;
-	char *kaddr;
-	unsigned long i = get_eb_page_index(start);
+	unsigned long cur = start;
 
+	while (cur < start + len) {
+		unsigned long index = get_eb_page_index(cur);
+		unsigned int offset = get_eb_offset_in_page(eb, cur);
+		unsigned int cur_len = min(start + len - cur, PAGE_SIZE - offset);
+		struct page *page = eb->pages[index];
+
+		assert_eb_page_uptodate(eb, page);
+		memset(page_address(page) + offset, c, cur_len);
+
+		cur += cur_len;
+	}
+}
+
+void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
+			   unsigned long len)
+{
 	if (check_eb_range(eb, start, len))
 		return;
-
-	offset = get_eb_offset_in_page(eb, start);
-
-	while (len > 0) {
-		page = eb->pages[i];
-		assert_eb_page_uptodate(eb, page);
-
-		cur = min(len, PAGE_SIZE - offset);
-		kaddr = page_address(page);
-		memset(kaddr + offset, 0, cur);
-
-		len -= cur;
-		offset = 0;
-		i++;
-	}
+	return memset_extent_buffer(eb, 0, start, len);
 }
 
 void copy_extent_buffer_full(const struct extent_buffer *dst,
@@ -4325,6 +4323,15 @@ int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
 	return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
 }
 
+static u8 *extent_buffer_get_byte(const struct extent_buffer *eb, unsigned long bytenr)
+{
+	unsigned long index = get_eb_page_index(bytenr);
+
+	if (check_eb_range(eb, bytenr, 1))
+		return NULL;
+	return page_address(eb->pages[index]) + get_eb_offset_in_page(eb, bytenr);
+}
+
 /*
  * Set an area of a bitmap to 1.
  *
@@ -4336,35 +4343,28 @@ int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
 void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
 			      unsigned long pos, unsigned long len)
 {
+	unsigned int first_byte = start + BIT_BYTE(pos);
+	unsigned int last_byte = start + BIT_BYTE(pos + len - 1);
+	const bool same_byte = (first_byte == last_byte);
+	u8 mask = BITMAP_FIRST_BYTE_MASK(pos);
 	u8 *kaddr;
-	struct page *page;
-	unsigned long i;
-	size_t offset;
-	const unsigned int size = pos + len;
-	int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
-	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
 
-	eb_bitmap_offset(eb, start, pos, &i, &offset);
-	page = eb->pages[i];
-	assert_eb_page_uptodate(eb, page);
-	kaddr = page_address(page);
+	if (same_byte)
+		mask &= BITMAP_LAST_BYTE_MASK(pos + len);
 
-	while (len >= bits_to_set) {
-		kaddr[offset] |= mask_to_set;
-		len -= bits_to_set;
-		bits_to_set = BITS_PER_BYTE;
-		mask_to_set = ~0;
-		if (++offset >= PAGE_SIZE && len > 0) {
-			offset = 0;
-			page = eb->pages[++i];
-			assert_eb_page_uptodate(eb, page);
-			kaddr = page_address(page);
-		}
-	}
-	if (len) {
-		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
-		kaddr[offset] |= mask_to_set;
-	}
+	/* Handle the first byte. */
+	kaddr = extent_buffer_get_byte(eb, first_byte);
+	*kaddr |= mask;
+	if (same_byte)
+		return;
+
+	/* Handle the byte aligned part. */
+	ASSERT(first_byte + 1 <= last_byte);
+	memset_extent_buffer(eb, 0xff, first_byte + 1, last_byte - first_byte - 1);
+
+	/* Handle the last byte. */
+	kaddr = extent_buffer_get_byte(eb, last_byte);
+	*kaddr |= BITMAP_LAST_BYTE_MASK(pos + len);
 }
 
 
@@ -4380,35 +4380,28 @@ void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
 				unsigned long start, unsigned long pos,
 				unsigned long len)
 {
+	unsigned int first_byte = start + BIT_BYTE(pos);
+	unsigned int last_byte = start + BIT_BYTE(pos + len - 1);
+	const bool same_byte = (first_byte == last_byte);
+	u8 mask = BITMAP_FIRST_BYTE_MASK(pos);
 	u8 *kaddr;
-	struct page *page;
-	unsigned long i;
-	size_t offset;
-	const unsigned int size = pos + len;
-	int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
-	u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
 
-	eb_bitmap_offset(eb, start, pos, &i, &offset);
-	page = eb->pages[i];
-	assert_eb_page_uptodate(eb, page);
-	kaddr = page_address(page);
+	if (same_byte)
+		mask &= BITMAP_LAST_BYTE_MASK(pos + len);
 
-	while (len >= bits_to_clear) {
-		kaddr[offset] &= ~mask_to_clear;
-		len -= bits_to_clear;
-		bits_to_clear = BITS_PER_BYTE;
-		mask_to_clear = ~0;
-		if (++offset >= PAGE_SIZE && len > 0) {
-			offset = 0;
-			page = eb->pages[++i];
-			assert_eb_page_uptodate(eb, page);
-			kaddr = page_address(page);
-		}
-	}
-	if (len) {
-		mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
-		kaddr[offset] &= ~mask_to_clear;
-	}
+	/* Handle the first byte. */
+	kaddr = extent_buffer_get_byte(eb, first_byte);
+	*kaddr &= ~mask;
+	if (same_byte)
+		return;
+
+	/* Handle the byte aligned part. */
+	ASSERT(first_byte + 1 <= last_byte);
+	memset_extent_buffer(eb, 0, first_byte + 1, last_byte - first_byte - 1);
+
+	/* Handle the last byte. */
+	kaddr = extent_buffer_get_byte(eb, last_byte);
+	*kaddr &= ~BITMAP_LAST_BYTE_MASK(pos + len);
 }
 
 static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)

From 730c374e5b2cec061e3af7616b9d1265e60b6c0a Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Sat, 15 Jul 2023 19:08:30 +0800
Subject: [PATCH 1499/2047] btrfs: use write_extent_buffer() to implement
 write_extent_buffer_*id()

Helpers write_extent_buffer_chunk_tree_uuid() and
write_extent_buffer_fsid(), they can be implemented by
write_extent_buffer().

These two helpers are not that frequently used, they only get called
during initialization of a new tree block.  There is not much need for
those slightly optimized versions.  And since they can be easily
converted to one write_extent_buffer() call, define them as inline
helpers.

This would make later page/folio switch much easier, as all change only
need to happen in write_extent_buffer().

Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 22 ----------------------
 fs/btrfs/extent_io.h | 19 ++++++++++++++++---
 2 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e9f39bda9f2..dc15eaa75c65 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4129,28 +4129,6 @@ static void assert_eb_page_uptodate(const struct extent_buffer *eb,
 	}
 }
 
-void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
-		const void *srcv)
-{
-	char *kaddr;
-
-	assert_eb_page_uptodate(eb, eb->pages[0]);
-	kaddr = page_address(eb->pages[0]) +
-		get_eb_offset_in_page(eb, offsetof(struct btrfs_header,
-						   chunk_tree_uuid));
-	memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
-}
-
-void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv)
-{
-	char *kaddr;
-
-	assert_eb_page_uptodate(eb, eb->pages[0]);
-	kaddr = page_address(eb->pages[0]) +
-		get_eb_offset_in_page(eb, offsetof(struct btrfs_header, fsid));
-	memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
-}
-
 void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
 			 unsigned long start, unsigned long len)
 {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index c01f9c5ddc13..adda14c1b763 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -236,11 +236,24 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dst,
 int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
 				       void __user *dst, unsigned long start,
 				       unsigned long len);
-void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
-void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
-		const void *src);
 void write_extent_buffer(const struct extent_buffer *eb, const void *src,
 			 unsigned long start, unsigned long len);
+
+static inline void write_extent_buffer_chunk_tree_uuid(
+		const struct extent_buffer *eb, const void *chunk_tree_uuid)
+{
+	write_extent_buffer(eb, chunk_tree_uuid,
+			    offsetof(struct btrfs_header, chunk_tree_uuid),
+			    BTRFS_FSID_SIZE);
+}
+
+static inline void write_extent_buffer_fsid(const struct extent_buffer *eb,
+					    const void *fsid)
+{
+	write_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
+			    BTRFS_FSID_SIZE);
+}
+
 void copy_extent_buffer_full(const struct extent_buffer *dst,
 			     const struct extent_buffer *src);
 void copy_extent_buffer(const struct extent_buffer *dst,

From 54948681c21171a67c2fe03adcce54901e39dfe3 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Sat, 15 Jul 2023 19:08:31 +0800
Subject: [PATCH 1500/2047] btrfs: refactor main loop in
 copy_extent_buffer_full()

[BACKGROUND]
copy_extent_buffer_full() currently does different handling for regular
and subpage cases, for regular cases it does a page by page copying.
For subpage cases, it just copies the content.

This is fine for the page based extent buffer code, but for the incoming
folio conversion, it can be a burden to add a new branch just to handle
all the different combinations (subpage vs regular, one single folio vs
multi pages).

[ENHANCE]
Instead of handling the different combinations, just go one single
handling for all cases, utilizing write_extent_buffer() to do the
copying.

Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index dc15eaa75c65..1ba8857a8dbe 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4190,24 +4190,19 @@ void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
 void copy_extent_buffer_full(const struct extent_buffer *dst,
 			     const struct extent_buffer *src)
 {
-	int i;
-	int num_pages;
+	unsigned long cur = 0;
 
 	ASSERT(dst->len == src->len);
 
-	if (dst->fs_info->nodesize >= PAGE_SIZE) {
-		num_pages = num_extent_pages(dst);
-		for (i = 0; i < num_pages; i++)
-			copy_page(page_address(dst->pages[i]),
-				  page_address(src->pages[i]));
-	} else {
-		size_t src_offset = get_eb_offset_in_page(src, 0);
-		size_t dst_offset = get_eb_offset_in_page(dst, 0);
+	while (cur < src->len) {
+		unsigned long index = get_eb_page_index(cur);
+		unsigned long offset = get_eb_offset_in_page(src, cur);
+		unsigned long cur_len = min(src->len, PAGE_SIZE - offset);
+		void *addr = page_address(src->pages[index]) + offset;
 
-		ASSERT(src->fs_info->nodesize < PAGE_SIZE);
-		memcpy(page_address(dst->pages[0]) + dst_offset,
-		       page_address(src->pages[0]) + src_offset,
-		       src->len);
+		write_extent_buffer(dst, addr, cur, cur_len);
+
+		cur += cur_len;
 	}
 }
 

From 682a0bc5573f3c10a435fb9650c856bb325d2733 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Sat, 15 Jul 2023 19:08:32 +0800
Subject: [PATCH 1501/2047] btrfs: copy all pages at once at the end of
 btrfs_clone_extent_buffer()

btrfs_clone_extent_buffer() calls copy_page() at each iteration but we
can copy all pages at the end in one go if there were no errors.
This would make later conversion to folios easier.

Reviewed-by: Sweet Tea Dorminy <sweettea-kernel@dorminy.me>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1ba8857a8dbe..3e30333a4c6b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3239,8 +3239,8 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
 			return NULL;
 		}
 		WARN_ON(PageDirty(p));
-		copy_page(page_address(p), page_address(src->pages[i]));
 	}
+	copy_extent_buffer_full(new, src);
 	set_extent_buffer_uptodate(new);
 
 	return new;

From 13840f3f2837b9a00f06d2d8d93dbdc6b2b6532e Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Sat, 15 Jul 2023 19:08:33 +0800
Subject: [PATCH 1502/2047] btrfs: refactor main loop in memcpy_extent_buffer()

[BACKGROUND]
Currently memcpy_extent_buffer() does a loop where it would stop at
any page boundary inside [dst_offset, dst_offset + len) or [src_offset,
src_offset + len).

This is mostly allowing us to do copy_pages(), but if we're going to use
folios we will need to handle multi-page (the old behavior) or single
folio (the new optimization).

The current code would be a burden for future changes.

[ENHANCEMENT]
There is a hidden pitfall of the naming memcpy_extent_buffer(), unlike
regular memcpy(), this function can handle overlapping ranges.

So here we extract write_extent_buffer() into a new internal helper,
__write_extent_buffer(), and add a new parameter @use_memmove, to
indicate whether we should use memmove() or regular memcpy().

Now we can go __write_extent_buffer() to handle writing into the dst
range, with proper overlapping detection.

This has a tiny change to the chance of calling memmove().
As the split only happens at the source range page boundaries, the
memcpy/memmove() range would be slightly larger than the old code,
thus slightly increase the chance we call memmove() other than memcopy().

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 56 ++++++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e30333a4c6b..0c4a305aa5cb 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4129,8 +4129,9 @@ static void assert_eb_page_uptodate(const struct extent_buffer *eb,
 	}
 }
 
-void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
-			 unsigned long start, unsigned long len)
+static void __write_extent_buffer(const struct extent_buffer *eb,
+				  const void *srcv, unsigned long start,
+				  unsigned long len, bool use_memmove)
 {
 	size_t cur;
 	size_t offset;
@@ -4138,6 +4139,8 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
 	char *kaddr;
 	char *src = (char *)srcv;
 	unsigned long i = get_eb_page_index(start);
+	/* For unmapped (dummy) ebs, no need to check their uptodate status. */
+	const bool check_uptodate = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
 
 	WARN_ON(test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags));
 
@@ -4148,11 +4151,15 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
 
 	while (len > 0) {
 		page = eb->pages[i];
-		assert_eb_page_uptodate(eb, page);
+		if (check_uptodate)
+			assert_eb_page_uptodate(eb, page);
 
 		cur = min(len, PAGE_SIZE - offset);
 		kaddr = page_address(page);
-		memcpy(kaddr + offset, src, cur);
+		if (use_memmove)
+			memmove(kaddr + offset, src, cur);
+		else
+			memcpy(kaddr + offset, src, cur);
 
 		src += cur;
 		len -= cur;
@@ -4161,6 +4168,12 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
 	}
 }
 
+void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
+			 unsigned long start, unsigned long len)
+{
+	return __write_extent_buffer(eb, srcv, start, len, false);
+}
+
 static void memset_extent_buffer(const struct extent_buffer *eb, int c,
 				 unsigned long start, unsigned long len)
 {
@@ -4409,34 +4422,25 @@ void memcpy_extent_buffer(const struct extent_buffer *dst,
 			  unsigned long dst_offset, unsigned long src_offset,
 			  unsigned long len)
 {
-	size_t cur;
-	size_t dst_off_in_page;
-	size_t src_off_in_page;
-	unsigned long dst_i;
-	unsigned long src_i;
+	unsigned long cur_off = 0;
 
 	if (check_eb_range(dst, dst_offset, len) ||
 	    check_eb_range(dst, src_offset, len))
 		return;
 
-	while (len > 0) {
-		dst_off_in_page = get_eb_offset_in_page(dst, dst_offset);
-		src_off_in_page = get_eb_offset_in_page(dst, src_offset);
+	while (cur_off < len) {
+		unsigned long cur_src = cur_off + src_offset;
+		unsigned long pg_index = get_eb_page_index(cur_src);
+		unsigned long pg_off = get_eb_offset_in_page(dst, cur_src);
+		unsigned long cur_len = min(src_offset + len - cur_src,
+					    PAGE_SIZE - pg_off);
+		void *src_addr = page_address(dst->pages[pg_index]) + pg_off;
+		const bool use_memmove = areas_overlap(src_offset + cur_off,
+						       dst_offset + cur_off, cur_len);
 
-		dst_i = get_eb_page_index(dst_offset);
-		src_i = get_eb_page_index(src_offset);
-
-		cur = min(len, (unsigned long)(PAGE_SIZE -
-					       src_off_in_page));
-		cur = min_t(unsigned long, cur,
-			(unsigned long)(PAGE_SIZE - dst_off_in_page));
-
-		copy_pages(dst->pages[dst_i], dst->pages[src_i],
-			   dst_off_in_page, src_off_in_page, cur);
-
-		src_offset += cur;
-		dst_offset += cur;
-		len -= cur;
+		__write_extent_buffer(dst, src_addr, dst_offset + cur_off, cur_len,
+				      use_memmove);
+		cur_off += cur_len;
 	}
 }
 

From 096d230165439a341639e85a5507c6b74c7dd410 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Sat, 15 Jul 2023 19:08:34 +0800
Subject: [PATCH 1503/2047] btrfs: refactor main loop in
 memmove_extent_buffer()

[BACKGROUND]
Currently memove_extent_buffer() does a loop where it strop at any page
boundary inside [dst_offset, dst_offset + len) or [src_offset,
src_offset + len).

This is mostly allowing us to do copy_pages(), but if we're going to use
folios we will need to handle multi-page (the old behavior) or single
folio (the new optimization).

The current code would be a burden for future changes.

[ENHANCEMENT]
Instead of sticking with copy_pages(), here we utilize the new
__write_extent_buffer() helper to handle the writes.

Unlike the refactoring in memcpy_extent_buffer(), we can not just rely
on the write_extent_buffer() and only handle page boundaries inside src
range.

The function write_extent_buffer() itself is still doing forward
writing, thus it cannot handle the following case: (already in the
extent buffer memory operation tests, cross page overlapping run 2)

	Src	Page boundary
	|///////|
	    |///|////|
	    Dst

In the above case, if we just follow page boundary in the src range, we
have no need to do any split, just one __write_extent_buffer() with
use_memmove = true.

But __write_extent_buffer() would split the dst range into two,
so it first copies the beginning part of the src range into the first half
of the dst range.
After this operation, the beginning of the dst range is already updated,
causing corruption.

So we have to follow the old behavior of handling both page boundaries.

And since we're the last caller of copy_pages(), we can remove it
completely.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 48 ++++++++++++++++----------------------------
 1 file changed, 17 insertions(+), 31 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0c4a305aa5cb..36d80cb8cfa3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4396,28 +4396,6 @@ static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned
 	return distance < len;
 }
 
-static void copy_pages(struct page *dst_page, struct page *src_page,
-		       unsigned long dst_off, unsigned long src_off,
-		       unsigned long len)
-{
-	char *dst_kaddr = page_address(dst_page);
-	char *src_kaddr;
-	int must_memmove = 0;
-
-	if (dst_page != src_page) {
-		src_kaddr = page_address(src_page);
-	} else {
-		src_kaddr = dst_kaddr;
-		if (areas_overlap(src_off, dst_off, len))
-			must_memmove = 1;
-	}
-
-	if (must_memmove)
-		memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
-	else
-		memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
-}
-
 void memcpy_extent_buffer(const struct extent_buffer *dst,
 			  unsigned long dst_offset, unsigned long src_offset,
 			  unsigned long len)
@@ -4448,23 +4426,26 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
 			   unsigned long dst_offset, unsigned long src_offset,
 			   unsigned long len)
 {
-	size_t cur;
-	size_t dst_off_in_page;
-	size_t src_off_in_page;
 	unsigned long dst_end = dst_offset + len - 1;
 	unsigned long src_end = src_offset + len - 1;
-	unsigned long dst_i;
-	unsigned long src_i;
 
 	if (check_eb_range(dst, dst_offset, len) ||
 	    check_eb_range(dst, src_offset, len))
 		return;
+
 	if (dst_offset < src_offset) {
 		memcpy_extent_buffer(dst, dst_offset, src_offset, len);
 		return;
 	}
+
 	while (len > 0) {
-		dst_i = get_eb_page_index(dst_end);
+		unsigned long src_i;
+		size_t cur;
+		size_t dst_off_in_page;
+		size_t src_off_in_page;
+		void *src_addr;
+		bool use_memmove;
+
 		src_i = get_eb_page_index(src_end);
 
 		dst_off_in_page = get_eb_offset_in_page(dst, dst_end);
@@ -4472,9 +4453,14 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
 
 		cur = min_t(unsigned long, len, src_off_in_page + 1);
 		cur = min(cur, dst_off_in_page + 1);
-		copy_pages(dst->pages[dst_i], dst->pages[src_i],
-			   dst_off_in_page - cur + 1,
-			   src_off_in_page - cur + 1, cur);
+
+		src_addr = page_address(dst->pages[src_i]) + src_off_in_page -
+					cur + 1;
+		use_memmove = areas_overlap(src_end - cur + 1, dst_end - cur + 1,
+					    cur);
+
+		__write_extent_buffer(dst, src_addr, dst_end - cur + 1, cur,
+				      use_memmove);
 
 		dst_end -= cur;
 		src_end -= cur;

From 4490e803e1fe9fab8db5025e44e23b55df54078b Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:56:57 +0100
Subject: [PATCH 1504/2047] btrfs: don't start transaction when joining with
 TRANS_JOIN_NOSTART

When joining a transaction with TRANS_JOIN_NOSTART, if we don't find a
running transaction we end up creating one. This goes against the purpose
of TRANS_JOIN_NOSTART which is to join a running transaction if its state
is at or below the state TRANS_STATE_COMMIT_START, otherwise return an
-ENOENT error and don't start a new transaction. So fix this to not create
a new transaction if there's no running transaction at or below that
state.

CC: stable@vger.kernel.org # 4.14+
Fixes: a6d155d2e363 ("Btrfs: fix deadlock between fiemap and transaction commits")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/transaction.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 815f61d6b506..6a2a12593183 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -292,10 +292,11 @@ loop:
 	spin_unlock(&fs_info->trans_lock);
 
 	/*
-	 * If we are ATTACH, we just want to catch the current transaction,
-	 * and commit it. If there is no transaction, just return ENOENT.
+	 * If we are ATTACH or TRANS_JOIN_NOSTART, we just want to catch the
+	 * current transaction, and commit it. If there is no transaction, just
+	 * return ENOENT.
 	 */
-	if (type == TRANS_ATTACH)
+	if (type == TRANS_ATTACH || type == TRANS_JOIN_NOSTART)
 		return -ENOENT;
 
 	/*

From 19288951ffa8c67c9f40d2eed72ebc53071227c0 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:56:58 +0100
Subject: [PATCH 1505/2047] btrfs: update comment for
 btrfs_join_transaction_nostart()

Update the comment for btrfs_join_transaction_nostart() to be more clear
about how it works and how it's different from btrfs_attach_transaction().

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/transaction.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 6a2a12593183..ab09542f2170 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -799,7 +799,10 @@ struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *
 
 /*
  * Similar to regular join but it never starts a transaction when none is
- * running or after waiting for the current one to finish.
+ * running or when there's a running one at a state >= TRANS_STATE_UNBLOCKED.
+ * This is similar to btrfs_attach_transaction() but it allows the join to
+ * happen if the transaction commit already started but it's not yet in the
+ * "doing" phase (the state is < TRANS_STATE_COMMIT_DOING).
  */
 struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root)
 {

From 4d2024e90d0e06a1745d22a9d58fff2f6726481a Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:56:59 +0100
Subject: [PATCH 1506/2047] btrfs: print target number of bytes when dumping
 free space

When dumping free space, with btrfs_dump_free_space(), we pass a bytes
argument in order to count how many free space entries in the block group
have a size greater than or equal to that number of bytes. We then print
how many suitable entries we found, but we don't print the target number
of bytes, we just say "bytes". Change the message to actually print the
number of bytes, which makes debugging -ENOSPC issues a bit easier.

Also sligthly change the odd grammar and terminology: the sentence is
ending with 'is', which doesn't make sense, and the term 'blocks' is
confusing as we are referring to free space entries within the block
group's free space cache.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/free-space-cache.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index bfc01352351c..cd5bfda2c259 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2943,7 +2943,8 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
 	btrfs_info(fs_info, "block group has cluster?: %s",
 	       list_empty(&block_group->cluster_list) ? "no" : "yes");
 	btrfs_info(fs_info,
-		   "%d blocks of free space at or bigger than bytes is", count);
+		   "%d free space entries at or bigger than %llu bytes",
+		   count, bytes);
 }
 
 void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,

From b92e8f5472a28e311983f9f47e281e0adf56f10a Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:00 +0100
Subject: [PATCH 1507/2047] btrfs: print block group super and delalloc bytes
 when dumping space info

When dumping a space info's block groups, also print the number of bytes
used for super blocks and delalloc. This is often useful for debugging
-ENOSPC problems.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 75e7fa337e66..ae12a8a9cd12 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -525,10 +525,11 @@ again:
 	list_for_each_entry(cache, &info->block_groups[index], list) {
 		spin_lock(&cache->lock);
 		btrfs_info(fs_info,
-			"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu zone_unusable %s",
-			cache->start, cache->length, cache->used, cache->pinned,
-			cache->reserved, cache->zone_unusable,
-			cache->ro ? "[readonly]" : "");
+"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable %s",
+			   cache->start, cache->length, cache->used, cache->pinned,
+			   cache->reserved, cache->delalloc_bytes,
+			   cache->bytes_super, cache->zone_unusable,
+			   cache->ro ? "[readonly]" : "");
 		spin_unlock(&cache->lock);
 		btrfs_dump_free_space(cache, bytes);
 	}

From e50b122b832bed9f36d459f19fd73d00658f5c1e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:01 +0100
Subject: [PATCH 1508/2047] btrfs: print available space for a block group when
 dumping a space info

When dumping a space info, we iterate over all its block groups and then
print their size and the amounts of bytes used, reserved, pinned, etc.
When debugging -ENOSPC problems it's also useful to know how much space
is available (free), so calculate that and print it as well.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index ae12a8a9cd12..54d78e839c01 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -523,13 +523,18 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
 	down_read(&info->groups_sem);
 again:
 	list_for_each_entry(cache, &info->block_groups[index], list) {
+		u64 avail;
+
 		spin_lock(&cache->lock);
+		avail = cache->length - cache->used - cache->pinned -
+			cache->reserved - cache->delalloc_bytes -
+			cache->bytes_super - cache->zone_unusable;
 		btrfs_info(fs_info,
-"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable %s",
+"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s",
 			   cache->start, cache->length, cache->used, cache->pinned,
 			   cache->reserved, cache->delalloc_bytes,
 			   cache->bytes_super, cache->zone_unusable,
-			   cache->ro ? "[readonly]" : "");
+			   avail, cache->ro ? "[readonly]" : "");
 		spin_unlock(&cache->lock);
 		btrfs_dump_free_space(cache, bytes);
 	}

From 1ff9fee3bd2e865c81d29684dd40a91698f7999b Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:02 +0100
Subject: [PATCH 1509/2047] btrfs: print available space across all block
 groups when dumping space info

When dumping a space info also sum the available space for all block
groups and then print it. This often useful for debugging -ENOSPC
related problems.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 54d78e839c01..de0044283e29 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -510,6 +510,7 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
 			   int dump_block_groups)
 {
 	struct btrfs_block_group *cache;
+	u64 total_avail = 0;
 	int index = 0;
 
 	spin_lock(&info->lock);
@@ -537,10 +538,13 @@ again:
 			   avail, cache->ro ? "[readonly]" : "");
 		spin_unlock(&cache->lock);
 		btrfs_dump_free_space(cache, bytes);
+		total_avail += avail;
 	}
 	if (++index < BTRFS_NR_RAID_TYPES)
 		goto again;
 	up_read(&info->groups_sem);
+
+	btrfs_info(fs_info, "%llu bytes available across all block groups", total_avail);
 }
 
 static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info,

From 1b6948acb854b88162f273a26f9324d167051700 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:03 +0100
Subject: [PATCH 1510/2047] btrfs: don't steal space from global rsv after a
 transaction abort

When doing a priority metadata space reclaim, while we are going through
the flush states and running their respective operations, it's possible
that a transaction abort happened, for example when running delayed refs
we hit -ENOSPC or in the critical section of transaction commit we failed
with -ENOSPC or some other error. In these cases a transaction was aborted
and the fs turned into error state. If that happened, then it makes no
sense to steal from the global block reserve and return success to the
caller if the stealing was successful - the caller will later get an
error when attempting to modify the fs. Instead make the ticket fail if
we have the fs in error state and don't attempt to steal from the global
rsv, as it's not only it's pointless, it also simplifies debugging some
-ENOSPC problems.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index de0044283e29..5b1b71e029ad 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -1418,8 +1418,18 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
 		}
 	}
 
-	/* Attempt to steal from the global rsv if we can. */
-	if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
+	/*
+	 * Attempt to steal from the global rsv if we can, except if the fs was
+	 * turned into error mode due to a transaction abort when flushing space
+	 * above, in that case fail with -EROFS instead of returning success to
+	 * the caller if we can steal from the global rsv - this is just to have
+	 * caller fail immeditelly instead of later when trying to modify the
+	 * fs, making it easier to debug -ENOSPC problems.
+	 */
+	if (BTRFS_FS_ERROR(fs_info)) {
+		ticket->error = -EROFS;
+		remove_ticket(space_info, ticket);
+	} else if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
 		ticket->error = -ENOSPC;
 		remove_ticket(space_info, ticket);
 	}

From ae3364e5215bed9ce89db6b0c2d21eae4b66f4ae Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:04 +0100
Subject: [PATCH 1511/2047] btrfs: store the error that turned the fs into
 error state

Currently when we turn the fs into an error state, typically after a
transaction abort, we don't store the error anywhere, we just set a bit
(BTRFS_FS_STATE_ERROR) at struct btrfs_fs_info::fs_state to signal the
error state.

There are cases where it would be useful to have access to the specific
error in order to provide a more meaningful error to users/applications.
This change adds a member to struct btrfs_fs_info to store the error and
removes the BTRFS_FS_STATE_ERROR bit. When there's no error, the new
member (fs_error) has a value of 0, otherwise its value is a negative
errno value.

Followup changes will make use of this new member.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c  |  2 +-
 fs/btrfs/fs.h       | 12 ++++++++----
 fs/btrfs/messages.c | 10 +++++++---
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e7a096a8c0f6..ccc34c67b040 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3222,7 +3222,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 
 	/* check FS state, whether FS is broken. */
 	if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
-		set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
+		WRITE_ONCE(fs_info->fs_error, -EUCLEAN);
 
 	/*
 	 * In the long term, we'll store the compression type in the super
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 203d2a267828..ef07c6c252d8 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -46,8 +46,6 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
  * Runtime (in-memory) states of filesystem
  */
 enum {
-	/* Global indicator of serious filesystem errors */
-	BTRFS_FS_STATE_ERROR,
 	/*
 	 * Filesystem is being remounted, allow to skip some operations, like
 	 * defrag
@@ -686,6 +684,12 @@ struct btrfs_fs_info {
 	bool qgroup_rescan_running;
 	u8 qgroup_drop_subtree_thres;
 
+	/*
+	 * If this is not 0, then it indicates a serious filesystem error has
+	 * happened and it contains that error (negative errno value).
+	 */
+	int fs_error;
+
 	/* Filesystem state */
 	unsigned long fs_state;
 
@@ -962,8 +966,8 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
 	clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
 }
 
-#define BTRFS_FS_ERROR(fs_info)	(unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
-						   &(fs_info)->fs_state)))
+#define BTRFS_FS_ERROR(fs_info)	(READ_ONCE((fs_info)->fs_error))
+
 #define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info)				\
 	(unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR,		\
 			   &(fs_info)->fs_state)))
diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c
index 23fc11af498a..e3c9d2706341 100644
--- a/fs/btrfs/messages.c
+++ b/fs/btrfs/messages.c
@@ -10,14 +10,13 @@
 #ifdef CONFIG_PRINTK
 
 #define STATE_STRING_PREFACE	": state "
-#define STATE_STRING_BUF_LEN	(sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
+#define STATE_STRING_BUF_LEN	(sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT + 1)
 
 /*
  * Characters to print to indicate error conditions or uncommon filesystem state.
  * RO is not an error.
  */
 static const char fs_state_chars[] = {
-	[BTRFS_FS_STATE_ERROR]			= 'E',
 	[BTRFS_FS_STATE_REMOUNTING]		= 'M',
 	[BTRFS_FS_STATE_RO]			= 0,
 	[BTRFS_FS_STATE_TRANS_ABORTED]		= 'A',
@@ -37,6 +36,11 @@ static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
 	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
 	curr += sizeof(STATE_STRING_PREFACE) - 1;
 
+	if (BTRFS_FS_ERROR(info)) {
+		*curr++ = 'E';
+		states_printed = true;
+	}
+
 	for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
 		WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
 		if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
@@ -155,7 +159,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 	 * Today we only save the error info to memory.  Long term we'll also
 	 * send it down to the disk.
 	 */
-	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
+	WRITE_ONCE(fs_info->fs_error, errno);
 
 	/* Don't go through full error handling during mount. */
 	if (!(sb->s_flags & SB_BORN))

From a7f8de500e28bb227e02a7bd35988cf37b816c86 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:05 +0100
Subject: [PATCH 1512/2047] btrfs: return real error when orphan cleanup fails
 due to a transaction abort

During mount we will call btrfs_orphan_cleanup() to remove any inodes that
were previously deleted (have a link count of 0) but for which we were not
able before to remove their items from the subvolume tree. The removal of
the items will happen by triggering eviction, when we do the final iput()
on them at btrfs_orphan_cleanup(), which will end in the loop at
btrfs_evict_inode() that truncates inode items.

In a dire situation we may have a transaction abort due to -ENOSPC when
attempting to truncate the inode items, and in that case the orphan item
(key type BTRFS_ORPHAN_ITEM_KEY) will remain in the subvolume tree and
when we hit the next iteration of the while loop at btrfs_orphan_cleanup()
we will find the same orphan item as before, and then we will return
-EINVAL from btrfs_orphan_cleanup() through the following if statement:

    if (found_key.offset == last_objectid) {
       btrfs_err(fs_info,
                 "Error removing orphan entry, stopping orphan cleanup");
       ret = -EINVAL;
       goto out;
    }

This makes the mount operation fail with -EINVAL, when it should have been
-ENOSPC. This is confusing because -EINVAL might lead a user into thinking
it provided invalid mount options for example.

An example where this happens:

   $ mount test.img /mnt
   mount: /mnt: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error.

   $ dmesg
   [ 2542.356934] BTRFS: device fsid 977fff75-1181-4d2b-a739-384fa710d16e devid 1 transid 47409973 /dev/loop0 scanned by mount (4459)
   [ 2542.357451] BTRFS info (device loop0): using crc32c (crc32c-intel) checksum algorithm
   [ 2542.357461] BTRFS info (device loop0): disk space caching is enabled
   [ 2542.742287] BTRFS info (device loop0): auto enabling async discard
   [ 2542.764554] BTRFS info (device loop0): checking UUID tree
   [ 2551.743065] ------------[ cut here ]------------
   [ 2551.743068] BTRFS: Transaction aborted (error -28)
   [ 2551.743149] WARNING: CPU: 7 PID: 215 at fs/btrfs/block-group.c:3494 btrfs_write_dirty_block_groups+0x397/0x3d0 [btrfs]
   [ 2551.743311] Modules linked in: btrfs blake2b_generic (...)
   [ 2551.743353] CPU: 7 PID: 215 Comm: kworker/u24:5 Not tainted 6.4.0-rc6-btrfs-next-134+ #1
   [ 2551.743356] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014
   [ 2551.743357] Workqueue: events_unbound btrfs_async_reclaim_metadata_space [btrfs]
   [ 2551.743405] RIP: 0010:btrfs_write_dirty_block_groups+0x397/0x3d0 [btrfs]
   [ 2551.743449] Code: 8b 43 0c (...)
   [ 2551.743451] RSP: 0018:ffff982c005a7c40 EFLAGS: 00010286
   [ 2551.743452] RAX: 0000000000000000 RBX: ffff88fc6e44b400 RCX: 0000000000000000
   [ 2551.743453] RDX: 0000000000000002 RSI: ffffffff8dff0878 RDI: 00000000ffffffff
   [ 2551.743454] RBP: ffff88fc51817208 R08: 0000000000000000 R09: ffff982c005a7ae0
   [ 2551.743455] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88fc43d2e570
   [ 2551.743456] R13: ffff88fc43d2e400 R14: ffff88fc8fb08ee0 R15: ffff88fc6e44b530
   [ 2551.743457] FS:  0000000000000000(0000) GS:ffff89035fbc0000(0000) knlGS:0000000000000000
   [ 2551.743458] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
   [ 2551.743459] CR2: 00007fa8cdf2f6f4 CR3: 0000000124850003 CR4: 0000000000370ee0
   [ 2551.743462] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
   [ 2551.743463] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
   [ 2551.743464] Call Trace:
   [ 2551.743472]  <TASK>
   [ 2551.743474]  ? __warn+0x80/0x130
   [ 2551.743478]  ? btrfs_write_dirty_block_groups+0x397/0x3d0 [btrfs]
   [ 2551.743520]  ? report_bug+0x1f4/0x200
   [ 2551.743523]  ? handle_bug+0x42/0x70
   [ 2551.743526]  ? exc_invalid_op+0x14/0x70
   [ 2551.743528]  ? asm_exc_invalid_op+0x16/0x20
   [ 2551.743532]  ? btrfs_write_dirty_block_groups+0x397/0x3d0 [btrfs]
   [ 2551.743574]  ? _raw_spin_unlock+0x15/0x30
   [ 2551.743576]  ? btrfs_run_delayed_refs+0x1bd/0x200 [btrfs]
   [ 2551.743609]  commit_cowonly_roots+0x1e9/0x260 [btrfs]
   [ 2551.743652]  btrfs_commit_transaction+0x42e/0xfa0 [btrfs]
   [ 2551.743693]  ? __pfx_autoremove_wake_function+0x10/0x10
   [ 2551.743697]  flush_space+0xf1/0x5d0 [btrfs]
   [ 2551.743743]  ? _raw_spin_unlock+0x15/0x30
   [ 2551.743745]  ? finish_task_switch+0x91/0x2a0
   [ 2551.743748]  ? _raw_spin_unlock+0x15/0x30
   [ 2551.743750]  ? btrfs_get_alloc_profile+0xc9/0x1f0 [btrfs]
   [ 2551.743793]  btrfs_async_reclaim_metadata_space+0xe1/0x230 [btrfs]
   [ 2551.743837]  process_one_work+0x1d9/0x3e0
   [ 2551.743844]  worker_thread+0x4a/0x3b0
   [ 2551.743847]  ? __pfx_worker_thread+0x10/0x10
   [ 2551.743849]  kthread+0xee/0x120
   [ 2551.743852]  ? __pfx_kthread+0x10/0x10
   [ 2551.743854]  ret_from_fork+0x29/0x50
   [ 2551.743860]  </TASK>
   [ 2551.743861] ---[ end trace 0000000000000000 ]---
   [ 2551.743863] BTRFS info (device loop0: state A): dumping space info:
   [ 2551.743866] BTRFS info (device loop0: state A): space_info DATA has 126976 free, is full
   [ 2551.743868] BTRFS info (device loop0: state A): space_info total=13458472960, used=13458137088, pinned=143360, reserved=0, may_use=0, readonly=65536 zone_unusable=0
   [ 2551.743870] BTRFS info (device loop0: state A): space_info METADATA has -51625984 free, is full
   [ 2551.743872] BTRFS info (device loop0: state A): space_info total=771751936, used=770146304, pinned=1605632, reserved=0, may_use=51625984, readonly=0 zone_unusable=0
   [ 2551.743874] BTRFS info (device loop0: state A): space_info SYSTEM has 14663680 free, is not full
   [ 2551.743875] BTRFS info (device loop0: state A): space_info total=14680064, used=16384, pinned=0, reserved=0, may_use=0, readonly=0 zone_unusable=0
   [ 2551.743877] BTRFS info (device loop0: state A): global_block_rsv: size 53231616 reserved 51544064
   [ 2551.743878] BTRFS info (device loop0: state A): trans_block_rsv: size 0 reserved 0
   [ 2551.743879] BTRFS info (device loop0: state A): chunk_block_rsv: size 0 reserved 0
   [ 2551.743880] BTRFS info (device loop0: state A): delayed_block_rsv: size 0 reserved 0
   [ 2551.743881] BTRFS info (device loop0: state A): delayed_refs_rsv: size 786432 reserved 0
   [ 2551.743886] BTRFS: error (device loop0: state A) in btrfs_write_dirty_block_groups:3494: errno=-28 No space left
   [ 2551.743911] BTRFS info (device loop0: state EA): forced readonly
   [ 2551.743951] BTRFS warning (device loop0: state EA): could not allocate space for delete; will truncate on mount
   [ 2551.743962] BTRFS error (device loop0: state EA): Error removing orphan entry, stopping orphan cleanup
   [ 2551.743973] BTRFS warning (device loop0: state EA): Skipping commit of aborted transaction.
   [ 2551.743989] BTRFS error (device loop0: state EA): could not do orphan cleanup -22

So make the btrfs_orphan_cleanup() return the value of BTRFS_FS_ERROR(),
if it's set, and -EINVAL otherwise.

For that same example, after this change, the mount operation fails with
-ENOSPC:

   $ mount test.img /mnt
   mount: /mnt: mount(2) system call failed: No space left on device.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 19dc0b8ff427..431007e443a8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3509,9 +3509,16 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 		 */
 
 		if (found_key.offset == last_objectid) {
+			/*
+			 * We found the same inode as before. This means we were
+			 * not able to remove its items via eviction triggered
+			 * by an iput(). A transaction abort may have happened,
+			 * due to -ENOSPC for example, so try to grab the error
+			 * that lead to a transaction abort, if any.
+			 */
 			btrfs_err(fs_info,
 				  "Error removing orphan entry, stopping orphan cleanup");
-			ret = -EINVAL;
+			ret = BTRFS_FS_ERROR(fs_info) ?: -EINVAL;
 			goto out;
 		}
 

From 7e3bfd146e3ed1e3125a1c5e53c12dee787b6b4b Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:06 +0100
Subject: [PATCH 1513/2047] btrfs: fail priority metadata ticket with real fs
 error

At priority_reclaim_metadata_space(), if we were not able to satisfy the
the ticket after going through the various flushing states and we notice
the fs went into an error state, likely due to a transaction abort during
the flushing, set the ticket's error to the error that caused the
transaction abort instead of an unconditional -EROFS.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 5b1b71e029ad..be5ce209b918 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -1421,13 +1421,13 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
 	/*
 	 * Attempt to steal from the global rsv if we can, except if the fs was
 	 * turned into error mode due to a transaction abort when flushing space
-	 * above, in that case fail with -EROFS instead of returning success to
-	 * the caller if we can steal from the global rsv - this is just to have
-	 * caller fail immeditelly instead of later when trying to modify the
-	 * fs, making it easier to debug -ENOSPC problems.
+	 * above, in that case fail with the abort error instead of returning
+	 * success to the caller if we can steal from the global rsv - this is
+	 * just to have caller fail immeditelly instead of later when trying to
+	 * modify the fs, making it easier to debug -ENOSPC problems.
 	 */
 	if (BTRFS_FS_ERROR(fs_info)) {
-		ticket->error = -EROFS;
+		ticket->error = BTRFS_FS_ERROR(fs_info);
 		remove_ticket(space_info, ticket);
 	} else if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
 		ticket->error = -ENOSPC;

From 504b1596bd0554c481108e1e27ed5237ed58e60c Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:07 +0100
Subject: [PATCH 1514/2047] btrfs: make btrfs_cleanup_fs_roots() static

btrfs_cleanup_fs_roots() is not used outside disk-io.c, so make it static,
remove its prototype from disk-io.h and move its definition above the
where it's used in disk-io.c

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 100 ++++++++++++++++++++++-----------------------
 fs/btrfs/disk-io.h |   1 -
 2 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ccc34c67b040..a38d3f9a1a03 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2869,6 +2869,56 @@ static int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
 	return 0;
 }
 
+static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
+{
+	u64 root_objectid = 0;
+	struct btrfs_root *gang[8];
+	int i = 0;
+	int err = 0;
+	unsigned int ret = 0;
+
+	while (1) {
+		spin_lock(&fs_info->fs_roots_radix_lock);
+		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+					     (void **)gang, root_objectid,
+					     ARRAY_SIZE(gang));
+		if (!ret) {
+			spin_unlock(&fs_info->fs_roots_radix_lock);
+			break;
+		}
+		root_objectid = gang[ret - 1]->root_key.objectid + 1;
+
+		for (i = 0; i < ret; i++) {
+			/* Avoid to grab roots in dead_roots. */
+			if (btrfs_root_refs(&gang[i]->root_item) == 0) {
+				gang[i] = NULL;
+				continue;
+			}
+			/* Grab all the search result for later use. */
+			gang[i] = btrfs_grab_root(gang[i]);
+		}
+		spin_unlock(&fs_info->fs_roots_radix_lock);
+
+		for (i = 0; i < ret; i++) {
+			if (!gang[i])
+				continue;
+			root_objectid = gang[i]->root_key.objectid;
+			err = btrfs_orphan_cleanup(gang[i]);
+			if (err)
+				goto out;
+			btrfs_put_root(gang[i]);
+		}
+		root_objectid++;
+	}
+out:
+	/* Release the uncleaned roots due to error. */
+	for (; i < ret; i++) {
+		if (gang[i])
+			btrfs_put_root(gang[i]);
+	}
+	return err;
+}
+
 /*
  * Some options only have meaning at mount time and shouldn't persist across
  * remounts, or be displayed. Clear these at the end of mount and remount
@@ -4136,56 +4186,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
 		btrfs_put_root(root);
 }
 
-int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
-{
-	u64 root_objectid = 0;
-	struct btrfs_root *gang[8];
-	int i = 0;
-	int err = 0;
-	unsigned int ret = 0;
-
-	while (1) {
-		spin_lock(&fs_info->fs_roots_radix_lock);
-		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
-					     (void **)gang, root_objectid,
-					     ARRAY_SIZE(gang));
-		if (!ret) {
-			spin_unlock(&fs_info->fs_roots_radix_lock);
-			break;
-		}
-		root_objectid = gang[ret - 1]->root_key.objectid + 1;
-
-		for (i = 0; i < ret; i++) {
-			/* Avoid to grab roots in dead_roots */
-			if (btrfs_root_refs(&gang[i]->root_item) == 0) {
-				gang[i] = NULL;
-				continue;
-			}
-			/* grab all the search result for later use */
-			gang[i] = btrfs_grab_root(gang[i]);
-		}
-		spin_unlock(&fs_info->fs_roots_radix_lock);
-
-		for (i = 0; i < ret; i++) {
-			if (!gang[i])
-				continue;
-			root_objectid = gang[i]->root_key.objectid;
-			err = btrfs_orphan_cleanup(gang[i]);
-			if (err)
-				goto out;
-			btrfs_put_root(gang[i]);
-		}
-		root_objectid++;
-	}
-out:
-	/* release the uncleaned roots due to error */
-	for (; i < ret; i++) {
-		if (gang[i])
-			btrfs_put_root(gang[i]);
-	}
-	return err;
-}
-
 int btrfs_commit_super(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_root *root = fs_info->tree_root;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b03767f4d7ed..02b645744a82 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -77,7 +77,6 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr);
 struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info);
 
 void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
-int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
 void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
 void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
 void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,

From 883647f4b5ca9a62ce43182f32e37e50286484a5 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:08 +0100
Subject: [PATCH 1515/2047] btrfs: make find_free_dev_extent() static

The function find_free_dev_extent() is only used within volumes.c, so make
it static and remove its prototype from volumes.h.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 4 ++--
 fs/btrfs/volumes.h | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 89410a480293..72d8a3be7775 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1726,8 +1726,8 @@ out:
 	return ret;
 }
 
-int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
-			 u64 *start, u64 *len)
+static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
+				u64 *start, u64 *len)
 {
 	/* FIXME use last free of some kind */
 	return find_free_dev_extent_start(device, num_bytes, 0, start, len);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index b8c51f16ba86..a59898d51e9e 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -650,8 +650,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
 int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
 int btrfs_uuid_scan_kthread(void *data);
 bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset);
-int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
-			 u64 *start, u64 *max_avail);
 void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
 int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
 			struct btrfs_ioctl_get_dev_stats *stats);

From ed8947bc73aa5bf0ddb5473eafadaa5d740f2f3f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:09 +0100
Subject: [PATCH 1516/2047] btrfs: merge find_free_dev_extent() and
 find_free_dev_extent_start()

There is no point in having find_free_dev_extent() because it's just a
simple wrapper around find_free_dev_extent_start() which always passes a
value of 0 for the search_start argument. Since there are no other callers
of find_free_dev_extent_start(), remove find_free_dev_extent() and rename
find_free_dev_extent_start() to find_free_dev_extent(), removing its
search_start argument because it's always 0.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 72d8a3be7775..bfdfecee1afe 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1439,18 +1439,18 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
 	return false;
 }
 
-static u64 dev_extent_search_start(struct btrfs_device *device, u64 start)
+static u64 dev_extent_search_start(struct btrfs_device *device)
 {
 	switch (device->fs_devices->chunk_alloc_policy) {
 	case BTRFS_CHUNK_ALLOC_REGULAR:
-		return max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
+		return BTRFS_DEVICE_RANGE_RESERVED;
 	case BTRFS_CHUNK_ALLOC_ZONED:
 		/*
 		 * We don't care about the starting region like regular
 		 * allocator, because we anyway use/reserve the first two zones
 		 * for superblock logging.
 		 */
-		return ALIGN(start, device->zone_info->zone_size);
+		return 0;
 	default:
 		BUG();
 	}
@@ -1582,15 +1582,15 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
  * correct usable device space, as device extent freed in current transaction
  * is not reported as available.
  */
-static int find_free_dev_extent_start(struct btrfs_device *device,
-				u64 num_bytes, u64 search_start, u64 *start,
-				u64 *len)
+static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
+				u64 *start, u64 *len)
 {
 	struct btrfs_fs_info *fs_info = device->fs_info;
 	struct btrfs_root *root = fs_info->dev_root;
 	struct btrfs_key key;
 	struct btrfs_dev_extent *dev_extent;
 	struct btrfs_path *path;
+	u64 search_start;
 	u64 hole_size;
 	u64 max_hole_start;
 	u64 max_hole_size;
@@ -1600,7 +1600,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
 	int slot;
 	struct extent_buffer *l;
 
-	search_start = dev_extent_search_start(device, search_start);
+	search_start = dev_extent_search_start(device);
 
 	WARN_ON(device->zone_info &&
 		!IS_ALIGNED(num_bytes, device->zone_info->zone_size));
@@ -1726,13 +1726,6 @@ out:
 	return ret;
 }
 
-static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
-				u64 *start, u64 *len)
-{
-	/* FIXME use last free of some kind */
-	return find_free_dev_extent_start(device, num_bytes, 0, start, len);
-}
-
 static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
 			  struct btrfs_device *device,
 			  u64 start, u64 *dev_extent_len)

From 2391245ac2875f784335b9148079c6e73639a5f7 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:10 +0100
Subject: [PATCH 1517/2047] btrfs: avoid starting new transaction when flushing
 delayed items and refs

When flushing space we join a transaction to flush delayed items and
delayed references, in order to try to release space. However using
btrfs_join_transaction() not only joins an existing transaction as well
as it starts a new transaction if there is none open. If there is no
transaction open, we don't have neither delayed items nor delayed
references, so creating a new transaction is a waste of time, IO and
creates an unnecessary rotation of the backup roots without gaining any
benefits (including releasing space).

So use btrfs_join_transaction_nostart() when attempting to flush delayed
items and references.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index be5ce209b918..2db92a201697 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -725,9 +725,11 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 		else
 			nr = -1;
 
-		trans = btrfs_join_transaction(root);
+		trans = btrfs_join_transaction_nostart(root);
 		if (IS_ERR(trans)) {
 			ret = PTR_ERR(trans);
+			if (ret == -ENOENT)
+				ret = 0;
 			break;
 		}
 		ret = btrfs_run_delayed_items_nr(trans, nr);
@@ -743,9 +745,11 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 		break;
 	case FLUSH_DELAYED_REFS_NR:
 	case FLUSH_DELAYED_REFS:
-		trans = btrfs_join_transaction(root);
+		trans = btrfs_join_transaction_nostart(root);
 		if (IS_ERR(trans)) {
 			ret = PTR_ERR(trans);
+			if (ret == -ENOENT)
+				ret = 0;
 			break;
 		}
 		if (state == FLUSH_DELAYED_REFS_NR)

From 2ee70ed19ccd9c95c188816b65dacf3fca1f5565 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:11 +0100
Subject: [PATCH 1518/2047] btrfs: avoid starting and committing empty
 transaction when flushing space

When flushing space and we are in the COMMIT_TRANS state, we join a
transaction with btrfs_join_transaction() and then commit the returned
transaction. However btrfs_join_transaction() starts a new transaction if
there is none currently open, which is pointless since comitting a new,
empty transaction, doesn't achieve anything, it only wastes time, IO and
creates an unnecessary rotation of the backup roots.

So use btrfs_attach_transaction_barrier() to avoid starting a new
transaction. This also waits for any ongoing transaction that is
committing (state >= TRANS_STATE_COMMIT_DOING) to fully complete, and
therefore wait for all the extents that were pinned during the
transaction's lifetime to be unpinned.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 2db92a201697..17c86db7b1b1 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -814,9 +814,18 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 		break;
 	case COMMIT_TRANS:
 		ASSERT(current->journal_info == NULL);
-		trans = btrfs_join_transaction(root);
+		/*
+		 * We don't want to start a new transaction, just attach to the
+		 * current one or wait it fully commits in case its commit is
+		 * happening at the moment. Note: we don't use a nostart join
+		 * because that does not wait for a transaction to fully commit
+		 * (only for it to be unblocked, state TRANS_STATE_UNBLOCKED).
+		 */
+		trans = btrfs_attach_transaction_barrier(root);
 		if (IS_ERR(trans)) {
 			ret = PTR_ERR(trans);
+			if (ret == -ENOENT)
+				ret = 0;
 			break;
 		}
 		ret = btrfs_commit_transaction(trans);

From 6705b48a50d7cad55b763ae4c9544875a9727f80 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:12 +0100
Subject: [PATCH 1519/2047] btrfs: avoid start and commit empty transaction
 when starting qgroup rescan

When starting a qgroup rescan, we try to join a running transaction, with
btrfs_join_transaction(), and then commit the transaction. However using
btrfs_join_transaction() will result in creating a new transaction in case
there isn't any running or if there's an existing one already committing.
This is pointless as we only need to attach to an existing one that is
not committing and in case there's an existing one committing, wait for
its commit to complete. Creating and committing an empty transaction is
wasteful, pointless IO and unnecessary rotation of the backup roots.

So use btrfs_attach_transaction_barrier() instead, to avoid creating and
committing empty transactions.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 2637d6b157ff..1ef60ea8f0bc 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3590,15 +3590,16 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
 	 * going to clear all tracking information for a clean start.
 	 */
 
-	trans = btrfs_join_transaction(fs_info->fs_root);
-	if (IS_ERR(trans)) {
+	trans = btrfs_attach_transaction_barrier(fs_info->fs_root);
+	if (IS_ERR(trans) && trans != ERR_PTR(-ENOENT)) {
 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
 		return PTR_ERR(trans);
-	}
-	ret = btrfs_commit_transaction(trans);
-	if (ret) {
-		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
-		return ret;
+	} else if (trans != ERR_PTR(-ENOENT)) {
+		ret = btrfs_commit_transaction(trans);
+		if (ret) {
+			fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+			return ret;
+		}
 	}
 
 	qgroup_rescan_zero_tracking(fs_info);

From 9c93c238c15ffd8d5622c51a475d5fb35f65ca07 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 26 Jul 2023 16:57:13 +0100
Subject: [PATCH 1520/2047] btrfs: avoid start and commit empty transaction
 when flushing qgroups

When flushing qgroups, we try to join a running transaction, with
btrfs_join_transaction(), and then commit the transaction. However using
btrfs_join_transaction() will result in creating a new transaction in case
there isn't any running or if there's an existing one already committing.
This is pointless as we only need to attach to an existing one that is
not committing and in case there's an existing one committing, wait for
its commit to complete. Creating and committing an empty transaction is
wasteful, pointless IO and unnecessary rotation of the backup roots.

So use btrfs_attach_transaction_barrier() instead, to avoid creating and
committing empty transactions.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1ef60ea8f0bc..b99230db3c82 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3758,9 +3758,11 @@ static int try_flush_qgroup(struct btrfs_root *root)
 		goto out;
 	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
 
-	trans = btrfs_join_transaction(root);
+	trans = btrfs_attach_transaction_barrier(root);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
+		if (ret == -ENOENT)
+			ret = 0;
 		goto out;
 	}
 

From 861093eff4f01319edfc1d1ee276a7f2bf720f1d Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Tue, 8 Aug 2023 01:12:31 +0900
Subject: [PATCH 1521/2047] btrfs: introduce struct to consolidate extent
 buffer write context

Introduce btrfs_eb_write_context to consolidate writeback_control and the
exntent buffer context.  This will help adding a block group context as
well.

While at it, move the eb context setting before
btrfs_check_meta_write_pointer(). We can set it here because we anyway need
to skip pages in the same eb if that eb is rejected by
btrfs_check_meta_write_pointer().

Suggested-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 14 +++++++-------
 fs/btrfs/extent_io.h |  5 +++++
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 36d80cb8cfa3..3ee83ff7f910 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1805,9 +1805,9 @@ static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
  * previous call.
  * Return <0 for fatal error.
  */
-static int submit_eb_page(struct page *page, struct writeback_control *wbc,
-			  struct extent_buffer **eb_context)
+static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
 {
+	struct writeback_control *wbc = ctx->wbc;
 	struct address_space *mapping = page->mapping;
 	struct btrfs_block_group *cache = NULL;
 	struct extent_buffer *eb;
@@ -1836,7 +1836,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
 		return 0;
 	}
 
-	if (eb == *eb_context) {
+	if (eb == ctx->eb) {
 		spin_unlock(&mapping->private_lock);
 		return 0;
 	}
@@ -1845,6 +1845,8 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
 	if (!ret)
 		return 0;
 
+	ctx->eb = eb;
+
 	if (!btrfs_check_meta_write_pointer(eb->fs_info, eb, &cache)) {
 		/*
 		 * If for_sync, this hole will be filled with
@@ -1858,8 +1860,6 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
 		return ret;
 	}
 
-	*eb_context = eb;
-
 	if (!lock_extent_buffer_for_io(eb, wbc)) {
 		btrfs_revert_meta_write_pointer(cache, eb);
 		if (cache)
@@ -1882,7 +1882,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
 int btree_write_cache_pages(struct address_space *mapping,
 				   struct writeback_control *wbc)
 {
-	struct extent_buffer *eb_context = NULL;
+	struct btrfs_eb_write_context ctx = { .wbc = wbc };
 	struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
 	int ret = 0;
 	int done = 0;
@@ -1924,7 +1924,7 @@ retry:
 		for (i = 0; i < nr_folios; i++) {
 			struct folio *folio = fbatch.folios[i];
 
-			ret = submit_eb_page(&folio->page, wbc, &eb_context);
+			ret = submit_eb_page(&folio->page, &ctx);
 			if (ret == 0)
 				continue;
 			if (ret < 0) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index adda14c1b763..e243a8eac910 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -93,6 +93,11 @@ struct extent_buffer {
 #endif
 };
 
+struct btrfs_eb_write_context {
+	struct writeback_control *wbc;
+	struct extent_buffer *eb;
+};
+
 /*
  * Get the correct offset inside the page of extent buffer.
  *

From 7db94301a980c9da4168ac7ce61e7bde297306ba Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Tue, 8 Aug 2023 01:12:32 +0900
Subject: [PATCH 1522/2047] btrfs: zoned: introduce block group context to
 btrfs_eb_write_context

For metadata write out on the zoned mode, we call
btrfs_check_meta_write_pointer() to check if an extent buffer to be written
is aligned to the write pointer.

We look up a block group containing the extent buffer for every extent
buffer, which takes unnecessary effort as the writing extent buffers are
mostly contiguous.

Introduce "zoned_bg" to cache the block group working on.  Also, while
at it, rename "cache" to "block_group".

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 15 +++++++--------
 fs/btrfs/extent_io.h |  2 ++
 fs/btrfs/zoned.c     | 37 +++++++++++++++++++++----------------
 fs/btrfs/zoned.h     |  6 ++----
 4 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3ee83ff7f910..cdd7118e5848 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1809,7 +1809,6 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
 {
 	struct writeback_control *wbc = ctx->wbc;
 	struct address_space *mapping = page->mapping;
-	struct btrfs_block_group *cache = NULL;
 	struct extent_buffer *eb;
 	int ret;
 
@@ -1847,7 +1846,7 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
 
 	ctx->eb = eb;
 
-	if (!btrfs_check_meta_write_pointer(eb->fs_info, eb, &cache)) {
+	if (!btrfs_check_meta_write_pointer(eb->fs_info, ctx)) {
 		/*
 		 * If for_sync, this hole will be filled with
 		 * trasnsaction commit.
@@ -1861,18 +1860,15 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
 	}
 
 	if (!lock_extent_buffer_for_io(eb, wbc)) {
-		btrfs_revert_meta_write_pointer(cache, eb);
-		if (cache)
-			btrfs_put_block_group(cache);
+		btrfs_revert_meta_write_pointer(ctx->zoned_bg, eb);
 		free_extent_buffer(eb);
 		return 0;
 	}
-	if (cache) {
+	if (ctx->zoned_bg) {
 		/*
 		 * Implies write in zoned mode. Mark the last eb in a block group.
 		 */
-		btrfs_schedule_zone_finish_bg(cache, eb);
-		btrfs_put_block_group(cache);
+		btrfs_schedule_zone_finish_bg(ctx->zoned_bg, eb);
 	}
 	write_one_eb(eb, wbc);
 	free_extent_buffer(eb);
@@ -1985,6 +1981,9 @@ retry:
 		ret = 0;
 	if (!ret && BTRFS_FS_ERROR(fs_info))
 		ret = -EROFS;
+
+	if (ctx.zoned_bg)
+		btrfs_put_block_group(ctx.zoned_bg);
 	btrfs_zoned_meta_io_unlock(fs_info);
 	return ret;
 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index e243a8eac910..68368ba99321 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -96,6 +96,8 @@ struct extent_buffer {
 struct btrfs_eb_write_context {
 	struct writeback_control *wbc;
 	struct extent_buffer *eb;
+	/* Block group @eb resides in. Only used for zoned mode. */
+	struct btrfs_block_group *zoned_bg;
 };
 
 /*
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index b43c4536e685..da6bdbe68c7b 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1748,30 +1748,35 @@ out:
 }
 
 bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
-				    struct extent_buffer *eb,
-				    struct btrfs_block_group **cache_ret)
+				    struct btrfs_eb_write_context *ctx)
 {
-	struct btrfs_block_group *cache;
-	bool ret = true;
+	const struct extent_buffer *eb = ctx->eb;
+	struct btrfs_block_group *block_group = ctx->zoned_bg;
 
 	if (!btrfs_is_zoned(fs_info))
 		return true;
 
-	cache = btrfs_lookup_block_group(fs_info, eb->start);
-	if (!cache)
-		return true;
-
-	if (cache->meta_write_pointer != eb->start) {
-		btrfs_put_block_group(cache);
-		cache = NULL;
-		ret = false;
-	} else {
-		cache->meta_write_pointer = eb->start + eb->len;
+	if (block_group) {
+		if (block_group->start > eb->start ||
+		    block_group->start + block_group->length <= eb->start) {
+			btrfs_put_block_group(block_group);
+			block_group = NULL;
+			ctx->zoned_bg = NULL;
+		}
 	}
 
-	*cache_ret = cache;
+	if (!block_group) {
+		block_group = btrfs_lookup_block_group(fs_info, eb->start);
+		if (!block_group)
+			return true;
+		ctx->zoned_bg = block_group;
+	}
 
-	return ret;
+	if (block_group->meta_write_pointer != eb->start)
+		return false;
+	block_group->meta_write_pointer = eb->start + eb->len;
+
+	return true;
 }
 
 void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 27322b926038..49d5bd87245c 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -59,8 +59,7 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans,
 bool btrfs_use_zone_append(struct btrfs_bio *bbio);
 void btrfs_record_physical_zoned(struct btrfs_bio *bbio);
 bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
-				    struct extent_buffer *eb,
-				    struct btrfs_block_group **cache_ret);
+				    struct btrfs_eb_write_context *ctx);
 void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
 				     struct extent_buffer *eb);
 int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length);
@@ -190,8 +189,7 @@ static inline void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
 }
 
 static inline bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
-			       struct extent_buffer *eb,
-			       struct btrfs_block_group **cache_ret)
+						  struct btrfs_eb_write_context *ctx)
 {
 	return true;
 }

From 2ad8c0510a965113404cfe670b41ddc34fb66100 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Tue, 8 Aug 2023 01:12:33 +0900
Subject: [PATCH 1523/2047] btrfs: zoned: return int from
 btrfs_check_meta_write_pointer

Now that we have writeback_control passed to
btrfs_check_meta_write_pointer(), we can move the wbc condition in
submit_eb_page() to btrfs_check_meta_write_pointer() and return int.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 11 +++--------
 fs/btrfs/zoned.c     | 30 ++++++++++++++++++++++--------
 fs/btrfs/zoned.h     | 10 +++++-----
 3 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index cdd7118e5848..84192a63511c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1846,14 +1846,9 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
 
 	ctx->eb = eb;
 
-	if (!btrfs_check_meta_write_pointer(eb->fs_info, ctx)) {
-		/*
-		 * If for_sync, this hole will be filled with
-		 * trasnsaction commit.
-		 */
-		if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
-			ret = -EAGAIN;
-		else
+	ret = btrfs_check_meta_write_pointer(eb->fs_info, ctx);
+	if (ret) {
+		if (ret == -EBUSY)
 			ret = 0;
 		free_extent_buffer(eb);
 		return ret;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index da6bdbe68c7b..c8ff94176381 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1747,14 +1747,23 @@ out:
 	}
 }
 
-bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
-				    struct btrfs_eb_write_context *ctx)
+/*
+ * Check if @ctx->eb is aligned to the write pointer.
+ *
+ * Return:
+ *   0:        @ctx->eb is at the write pointer. You can write it.
+ *   -EAGAIN:  There is a hole. The caller should handle the case.
+ *   -EBUSY:   There is a hole, but the caller can just bail out.
+ */
+int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
+				   struct btrfs_eb_write_context *ctx)
 {
+	const struct writeback_control *wbc = ctx->wbc;
 	const struct extent_buffer *eb = ctx->eb;
 	struct btrfs_block_group *block_group = ctx->zoned_bg;
 
 	if (!btrfs_is_zoned(fs_info))
-		return true;
+		return 0;
 
 	if (block_group) {
 		if (block_group->start > eb->start ||
@@ -1768,15 +1777,20 @@ bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
 	if (!block_group) {
 		block_group = btrfs_lookup_block_group(fs_info, eb->start);
 		if (!block_group)
-			return true;
+			return 0;
 		ctx->zoned_bg = block_group;
 	}
 
-	if (block_group->meta_write_pointer != eb->start)
-		return false;
-	block_group->meta_write_pointer = eb->start + eb->len;
+	if (block_group->meta_write_pointer == eb->start) {
+		block_group->meta_write_pointer = eb->start + eb->len;
 
-	return true;
+		return 0;
+	}
+
+	/* If for_sync, this hole will be filled with trasnsaction commit. */
+	if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
+		return -EAGAIN;
+	return -EBUSY;
 }
 
 void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 49d5bd87245c..c0859d8be152 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -58,8 +58,8 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans,
 			    struct extent_buffer *eb);
 bool btrfs_use_zone_append(struct btrfs_bio *bbio);
 void btrfs_record_physical_zoned(struct btrfs_bio *bbio);
-bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
-				    struct btrfs_eb_write_context *ctx);
+int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
+				   struct btrfs_eb_write_context *ctx);
 void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
 				     struct extent_buffer *eb);
 int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length);
@@ -188,10 +188,10 @@ static inline void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
 {
 }
 
-static inline bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
-						  struct btrfs_eb_write_context *ctx)
+static inline int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
+						 struct btrfs_eb_write_context *ctx)
 {
-	return true;
+	return 0;
 }
 
 static inline void btrfs_revert_meta_write_pointer(

From 0356ad41e0ddb8cf0ea4d68820c92598413e445b Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Tue, 8 Aug 2023 01:12:34 +0900
Subject: [PATCH 1524/2047] btrfs: zoned: defer advancing meta write pointer

We currently advance the meta_write_pointer in
btrfs_check_meta_write_pointer(). That makes it necessary to revert it
when locking the buffer failed. Instead, we can advance it just before
sending the buffer.

Also, this is necessary for the following commit. In the commit, it needs
to release the zoned_meta_io_lock to allow IOs to come in and wait for them
to fill the currently active block group. If we advance the
meta_write_pointer before locking the extent buffer, the following extent
buffer can pass the meta_write_pointer check, resulting in an unaligned
write failure.

Advancing the pointer is still thread-safe as the extent buffer is locked.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c |  7 +++----
 fs/btrfs/zoned.c     | 15 +--------------
 fs/btrfs/zoned.h     |  8 --------
 3 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 84192a63511c..ac3fca5a5e41 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1855,15 +1855,14 @@ static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
 	}
 
 	if (!lock_extent_buffer_for_io(eb, wbc)) {
-		btrfs_revert_meta_write_pointer(ctx->zoned_bg, eb);
 		free_extent_buffer(eb);
 		return 0;
 	}
+	/* Implies write in zoned mode. */
 	if (ctx->zoned_bg) {
-		/*
-		 * Implies write in zoned mode. Mark the last eb in a block group.
-		 */
+		/* Mark the last eb in the block group. */
 		btrfs_schedule_zone_finish_bg(ctx->zoned_bg, eb);
+		ctx->zoned_bg->meta_write_pointer += eb->len;
 	}
 	write_one_eb(eb, wbc);
 	free_extent_buffer(eb);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index c8ff94176381..9af3cd90ed26 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1781,11 +1781,8 @@ int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
 		ctx->zoned_bg = block_group;
 	}
 
-	if (block_group->meta_write_pointer == eb->start) {
-		block_group->meta_write_pointer = eb->start + eb->len;
-
+	if (block_group->meta_write_pointer == eb->start)
 		return 0;
-	}
 
 	/* If for_sync, this hole will be filled with trasnsaction commit. */
 	if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
@@ -1793,16 +1790,6 @@ int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
 	return -EBUSY;
 }
 
-void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
-				     struct extent_buffer *eb)
-{
-	if (!btrfs_is_zoned(eb->fs_info) || !cache)
-		return;
-
-	ASSERT(cache->meta_write_pointer == eb->start + eb->len);
-	cache->meta_write_pointer = eb->start;
-}
-
 int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length)
 {
 	if (!btrfs_dev_is_sequential(device, physical))
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index c0859d8be152..74ec37a25808 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -60,8 +60,6 @@ bool btrfs_use_zone_append(struct btrfs_bio *bbio);
 void btrfs_record_physical_zoned(struct btrfs_bio *bbio);
 int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
 				   struct btrfs_eb_write_context *ctx);
-void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
-				     struct extent_buffer *eb);
 int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length);
 int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
 				  u64 physical_start, u64 physical_pos);
@@ -194,12 +192,6 @@ static inline int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-static inline void btrfs_revert_meta_write_pointer(
-						struct btrfs_block_group *cache,
-						struct extent_buffer *eb)
-{
-}
-
 static inline int btrfs_zoned_issue_zeroout(struct btrfs_device *device,
 					    u64 physical, u64 length)
 {

From c1c3c2bc2917d09bbb19cb9894ba5950254b9502 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Tue, 8 Aug 2023 01:12:35 +0900
Subject: [PATCH 1525/2047] btrfs: zoned: update meta write pointer on zone
 finish

On finishing a zone, the meta_write_pointer should be set of the end of the
zone to reflect the actual write pointer position.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 9af3cd90ed26..d7188680c8c6 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2049,6 +2049,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
 
 	clear_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
 	block_group->alloc_offset = block_group->zone_capacity;
+	if (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))
+		block_group->meta_write_pointer = block_group->start +
+						  block_group->zone_capacity;
 	block_group->free_space_ctl->free_space = 0;
 	btrfs_clear_treelog_bg(block_group);
 	btrfs_clear_data_reloc_bg(block_group);

From a7e1ac7bdc5af91af2d52e6269fdbd92fe9ee353 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Tue, 8 Aug 2023 01:12:36 +0900
Subject: [PATCH 1526/2047] btrfs: zoned: reserve zones for an active
 metadata/system block group

Ensure a metadata and system block group can be activated on write time, by
leaving a certain number of active zones when trying to activate a data
block group.

Zones for two metadata block groups (normal and tree-log) and one system
block group are reserved, according to the profile type: two zones per
block group on the DUP profile and one zone per block group otherwise.

The reservation must be freed once a non-data block group is allocated. If
not, we over-reserve the active zones and data block group activation will
suffer. For the dynamic reservation count, we need to manage the
reservation count per device.

The reservation count variable is protected by
fs_info->zone_active_bgs_lock.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c |  2 +
 fs/btrfs/zoned.c   | 93 +++++++++++++++++++++++++++++++++++++++++++---
 fs/btrfs/zoned.h   |  8 ++++
 3 files changed, 98 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a38d3f9a1a03..5022a3fd778d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3467,6 +3467,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 
 	btrfs_free_zone_cache(fs_info);
 
+	btrfs_check_active_zone_reservation(fs_info);
+
 	if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
 	    !btrfs_check_rw_degradable(fs_info, NULL)) {
 		btrfs_warn(fs_info,
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index d7188680c8c6..fc69041bb6b4 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1889,6 +1889,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 	struct map_lookup *map;
 	struct btrfs_device *device;
 	u64 physical;
+	const bool is_data = (block_group->flags & BTRFS_BLOCK_GROUP_DATA);
 	bool ret;
 	int i;
 
@@ -1910,19 +1911,40 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 		goto out_unlock;
 	}
 
+	spin_lock(&fs_info->zone_active_bgs_lock);
 	for (i = 0; i < map->num_stripes; i++) {
+		struct btrfs_zoned_device_info *zinfo;
+		int reserved = 0;
+
 		device = map->stripes[i].dev;
 		physical = map->stripes[i].physical;
+		zinfo = device->zone_info;
 
-		if (device->zone_info->max_active_zones == 0)
+		if (zinfo->max_active_zones == 0)
 			continue;
 
+		if (is_data)
+			reserved = zinfo->reserved_active_zones;
+		/*
+		 * For the data block group, leave active zones for one
+		 * metadata block group and one system block group.
+		 */
+		if (atomic_read(&zinfo->active_zones_left) <= reserved) {
+			ret = false;
+			spin_unlock(&fs_info->zone_active_bgs_lock);
+			goto out_unlock;
+		}
+
 		if (!btrfs_dev_set_active_zone(device, physical)) {
 			/* Cannot activate the zone */
 			ret = false;
+			spin_unlock(&fs_info->zone_active_bgs_lock);
 			goto out_unlock;
 		}
+		if (!is_data)
+			zinfo->reserved_active_zones--;
 	}
+	spin_unlock(&fs_info->zone_active_bgs_lock);
 
 	/* Successfully activated all the zones */
 	set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
@@ -2061,18 +2083,21 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
 	for (i = 0; i < map->num_stripes; i++) {
 		struct btrfs_device *device = map->stripes[i].dev;
 		const u64 physical = map->stripes[i].physical;
+		struct btrfs_zoned_device_info *zinfo = device->zone_info;
 
-		if (device->zone_info->max_active_zones == 0)
+		if (zinfo->max_active_zones == 0)
 			continue;
 
 		ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
 				       physical >> SECTOR_SHIFT,
-				       device->zone_info->zone_size >> SECTOR_SHIFT,
+				       zinfo->zone_size >> SECTOR_SHIFT,
 				       GFP_NOFS);
 
 		if (ret)
 			return ret;
 
+		if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
+			zinfo->reserved_active_zones++;
 		btrfs_dev_clear_active_zone(device, physical);
 	}
 
@@ -2111,8 +2136,10 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
 
 	/* Check if there is a device with active zones left */
 	mutex_lock(&fs_info->chunk_mutex);
+	spin_lock(&fs_info->zone_active_bgs_lock);
 	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
 		struct btrfs_zoned_device_info *zinfo = device->zone_info;
+		int reserved = 0;
 
 		if (!device->bdev)
 			continue;
@@ -2122,17 +2149,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
 			break;
 		}
 
+		if (flags & BTRFS_BLOCK_GROUP_DATA)
+			reserved = zinfo->reserved_active_zones;
+
 		switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
 		case 0: /* single */
-			ret = (atomic_read(&zinfo->active_zones_left) >= 1);
+			ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved));
 			break;
 		case BTRFS_BLOCK_GROUP_DUP:
-			ret = (atomic_read(&zinfo->active_zones_left) >= 2);
+			ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved));
 			break;
 		}
 		if (ret)
 			break;
 	}
+	spin_unlock(&fs_info->zone_active_bgs_lock);
 	mutex_unlock(&fs_info->chunk_mutex);
 
 	if (!ret)
@@ -2374,3 +2405,55 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
 
 	return 0;
 }
+
+/*
+ * Reserve zones for one metadata block group, one tree-log block group, and one
+ * system block group.
+ */
+void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+	struct btrfs_block_group *block_group;
+	struct btrfs_device *device;
+	/* Reserve zones for normal SINGLE metadata and tree-log block group. */
+	unsigned int metadata_reserve = 2;
+	/* Reserve a zone for SINGLE system block group. */
+	unsigned int system_reserve = 1;
+
+	if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
+		return;
+
+	/*
+	 * This function is called from the mount context. So, there is no
+	 * parallel process touching the bits. No need for read_seqretry().
+	 */
+	if (fs_info->avail_metadata_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
+		metadata_reserve = 4;
+	if (fs_info->avail_system_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
+		system_reserve = 2;
+
+	/* Apply the reservation on all the devices. */
+	mutex_lock(&fs_devices->device_list_mutex);
+	list_for_each_entry(device, &fs_devices->devices, dev_list) {
+		if (!device->bdev)
+			continue;
+
+		device->zone_info->reserved_active_zones =
+			metadata_reserve + system_reserve;
+	}
+	mutex_unlock(&fs_devices->device_list_mutex);
+
+	/* Release reservation for currently active block groups. */
+	spin_lock(&fs_info->zone_active_bgs_lock);
+	list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
+		struct map_lookup *map = block_group->physical_map;
+
+		if (!(block_group->flags &
+		      (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
+			continue;
+
+		for (int i = 0; i < map->num_stripes; i++)
+			map->stripes[i].dev->zone_info->reserved_active_zones--;
+	}
+	spin_unlock(&fs_info->zone_active_bgs_lock);
+}
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 74ec37a25808..b9cec523b778 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -22,6 +22,11 @@ struct btrfs_zoned_device_info {
 	u8  zone_size_shift;
 	u32 nr_zones;
 	unsigned int max_active_zones;
+	/*
+	 * Reserved active zones for one metadata and one system block group.
+	 * It can vary per-device depending on the allocation status.
+	 */
+	int reserved_active_zones;
 	atomic_t active_zones_left;
 	unsigned long *seq_zones;
 	unsigned long *empty_zones;
@@ -78,6 +83,7 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
 int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
 int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
 				struct btrfs_space_info *space_info, bool do_finish);
+void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
 				     struct blk_zone *zone)
@@ -252,6 +258,8 @@ static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
+static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { }
+
 #endif
 
 static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)

From 13bb483d32abb6f8ebd40141d87eb68f11cc2dd2 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Tue, 8 Aug 2023 01:12:37 +0900
Subject: [PATCH 1527/2047] btrfs: zoned: activate metadata block group on
 write time

In the current implementation, block groups are activated at reservation
time to ensure that all reserved bytes can be written to an active metadata
block group. However, this approach has proven to be less efficient, as it
activates block groups more frequently than necessary, putting pressure on
the active zone resource and leading to potential issues such as early
ENOSPC or hung_task.

Another drawback of the current method is that it hampers metadata
over-commit, and necessitates additional flush operations and block group
allocations, resulting in decreased overall performance.

To address these issues, this commit introduces a write-time activation of
metadata and system block group. This involves reserving at least one
active block group specifically for a metadata and system block group.

Since metadata write-out is always allocated sequentially, when we need to
write to a non-active block group, we can wait for the ongoing IOs to
complete, activate a new block group, and then proceed with writing to the
new block group.

Fixes: b09315139136 ("btrfs: zoned: activate metadata block group on flush_space")
CC: stable@vger.kernel.org # 6.1+
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c | 11 ++++++
 fs/btrfs/fs.h          |  3 ++
 fs/btrfs/zoned.c       | 81 ++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index a127865f49f9..b0e432c30e1d 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -4287,6 +4287,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 	struct btrfs_caching_control *caching_ctl;
 	struct rb_node *n;
 
+	if (btrfs_is_zoned(info)) {
+		if (info->active_meta_bg) {
+			btrfs_put_block_group(info->active_meta_bg);
+			info->active_meta_bg = NULL;
+		}
+		if (info->active_system_bg) {
+			btrfs_put_block_group(info->active_system_bg);
+			info->active_system_bg = NULL;
+		}
+	}
+
 	write_lock(&info->block_group_cache_lock);
 	while (!list_empty(&info->caching_block_groups)) {
 		caching_ctl = list_entry(info->caching_block_groups.next,
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index ef07c6c252d8..a523d64d5491 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -770,6 +770,9 @@ struct btrfs_fs_info {
 	u64 data_reloc_bg;
 	struct mutex zoned_data_reloc_io_lock;
 
+	struct btrfs_block_group *active_meta_bg;
+	struct btrfs_block_group *active_system_bg;
+
 	u64 nr_global_roots;
 
 	spinlock_t zone_active_bgs_lock;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index fc69041bb6b4..099cb6a6d3b3 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -65,6 +65,9 @@
 
 #define SUPER_INFO_SECTORS	((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT)
 
+static void wait_eb_writebacks(struct btrfs_block_group *block_group);
+static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written);
+
 static inline bool sb_zone_is_full(const struct blk_zone *zone)
 {
 	return (zone->cond == BLK_ZONE_COND_FULL) ||
@@ -1747,6 +1750,62 @@ out:
 	}
 }
 
+static bool check_bg_is_active(struct btrfs_eb_write_context *ctx,
+			       struct btrfs_block_group **active_bg)
+{
+	const struct writeback_control *wbc = ctx->wbc;
+	struct btrfs_block_group *block_group = ctx->zoned_bg;
+	struct btrfs_fs_info *fs_info = block_group->fs_info;
+
+	if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
+		return true;
+
+	if (fs_info->treelog_bg == block_group->start) {
+		if (!btrfs_zone_activate(block_group)) {
+			int ret_fin = btrfs_zone_finish_one_bg(fs_info);
+
+			if (ret_fin != 1 || !btrfs_zone_activate(block_group))
+				return false;
+		}
+	} else if (*active_bg != block_group) {
+		struct btrfs_block_group *tgt = *active_bg;
+
+		/* zoned_meta_io_lock protects fs_info->active_{meta,system}_bg. */
+		lockdep_assert_held(&fs_info->zoned_meta_io_lock);
+
+		if (tgt) {
+			/*
+			 * If there is an unsent IO left in the allocated area,
+			 * we cannot wait for them as it may cause a deadlock.
+			 */
+			if (tgt->meta_write_pointer < tgt->start + tgt->alloc_offset) {
+				if (wbc->sync_mode == WB_SYNC_NONE ||
+				    (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync))
+					return false;
+			}
+
+			/* Pivot active metadata/system block group. */
+			btrfs_zoned_meta_io_unlock(fs_info);
+			wait_eb_writebacks(tgt);
+			do_zone_finish(tgt, true);
+			btrfs_zoned_meta_io_lock(fs_info);
+			if (*active_bg == tgt) {
+				btrfs_put_block_group(tgt);
+				*active_bg = NULL;
+			}
+		}
+		if (!btrfs_zone_activate(block_group))
+			return false;
+		if (*active_bg != block_group) {
+			ASSERT(*active_bg == NULL);
+			*active_bg = block_group;
+			btrfs_get_block_group(block_group);
+		}
+	}
+
+	return true;
+}
+
 /*
  * Check if @ctx->eb is aligned to the write pointer.
  *
@@ -1781,8 +1840,26 @@ int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
 		ctx->zoned_bg = block_group;
 	}
 
-	if (block_group->meta_write_pointer == eb->start)
-		return 0;
+	if (block_group->meta_write_pointer == eb->start) {
+		struct btrfs_block_group **tgt;
+
+		if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
+			return 0;
+
+		if (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)
+			tgt = &fs_info->active_system_bg;
+		else
+			tgt = &fs_info->active_meta_bg;
+		if (check_bg_is_active(ctx, tgt))
+			return 0;
+	}
+
+	/*
+	 * Since we may release fs_info->zoned_meta_io_lock, someone can already
+	 * start writing this eb. In that case, we can just bail out.
+	 */
+	if (block_group->meta_write_pointer > eb->start)
+		return -EBUSY;
 
 	/* If for_sync, this hole will be filled with trasnsaction commit. */
 	if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)

From 6a8ebc773ef64c8f12d6d60fd6e53d5ccc81314b Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Tue, 8 Aug 2023 01:12:38 +0900
Subject: [PATCH 1528/2047] btrfs: zoned: no longer count fresh BG region as
 zone unusable

Now that we switched to write time activation, we no longer need to (and
must not) count the fresh region as zone unusable. This commit is similar
to revert of commit fa2068d7e922b434eb ("btrfs: zoned: count fresh BG
region as zone unusable").

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/free-space-cache.c |  8 +-------
 fs/btrfs/zoned.c            | 26 +++-----------------------
 2 files changed, 4 insertions(+), 30 deletions(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index cd5bfda2c259..27fad70451aa 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2704,13 +2704,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
 		bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
 
 	spin_lock(&ctl->tree_lock);
-	/* Count initial region as zone_unusable until it gets activated. */
 	if (!used)
 		to_free = size;
-	else if (initial &&
-		 test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &block_group->fs_info->flags) &&
-		 (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
-		to_free = 0;
 	else if (initial)
 		to_free = block_group->zone_capacity;
 	else if (offset >= block_group->alloc_offset)
@@ -2738,8 +2733,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
 	reclaimable_unusable = block_group->zone_unusable -
 			       (block_group->length - block_group->zone_capacity);
 	/* All the region is now unusable. Mark it as unused and reclaim */
-	if (block_group->zone_unusable == block_group->length &&
-	    block_group->alloc_offset) {
+	if (block_group->zone_unusable == block_group->length) {
 		btrfs_mark_bg_unused(block_group);
 	} else if (bg_reclaim_threshold &&
 		   reclaimable_unusable >=
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 099cb6a6d3b3..52f49b0d2dee 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1586,19 +1586,9 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
 		return;
 
 	WARN_ON(cache->bytes_super != 0);
-
-	/* Check for block groups never get activated */
-	if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) &&
-	    cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) &&
-	    !test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) &&
-	    cache->alloc_offset == 0) {
-		unusable = cache->length;
-		free = 0;
-	} else {
-		unusable = (cache->alloc_offset - cache->used) +
-			   (cache->length - cache->zone_capacity);
-		free = cache->zone_capacity - cache->alloc_offset;
-	}
+	unusable = (cache->alloc_offset - cache->used) +
+		   (cache->length - cache->zone_capacity);
+	free = cache->zone_capacity - cache->alloc_offset;
 
 	/* We only need ->free_space in ALLOC_SEQ block groups */
 	cache->cached = BTRFS_CACHE_FINISHED;
@@ -1962,7 +1952,6 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
 bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 {
 	struct btrfs_fs_info *fs_info = block_group->fs_info;
-	struct btrfs_space_info *space_info = block_group->space_info;
 	struct map_lookup *map;
 	struct btrfs_device *device;
 	u64 physical;
@@ -1975,7 +1964,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 
 	map = block_group->physical_map;
 
-	spin_lock(&space_info->lock);
 	spin_lock(&block_group->lock);
 	if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) {
 		ret = true;
@@ -2025,14 +2013,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 
 	/* Successfully activated all the zones */
 	set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
-	WARN_ON(block_group->alloc_offset != 0);
-	if (block_group->zone_unusable == block_group->length) {
-		block_group->zone_unusable = block_group->length - block_group->zone_capacity;
-		space_info->bytes_zone_unusable -= block_group->zone_capacity;
-	}
 	spin_unlock(&block_group->lock);
-	btrfs_try_granting_tickets(fs_info, space_info);
-	spin_unlock(&space_info->lock);
 
 	/* For the active block group list */
 	btrfs_get_block_group(block_group);
@@ -2045,7 +2026,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 
 out_unlock:
 	spin_unlock(&block_group->lock);
-	spin_unlock(&space_info->lock);
 	return ret;
 }
 

From 5a7d107e5ef9b452cf0aa96ac931b29ea980d997 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Tue, 8 Aug 2023 01:12:39 +0900
Subject: [PATCH 1529/2047] btrfs: zoned: don't activate non-DATA BG on
 allocation

Now that a non-DATA block group is activated at write time, don't
activate it on allocation time.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c |  2 +-
 fs/btrfs/extent-tree.c |  8 +++++++-
 fs/btrfs/space-info.c  | 28 ----------------------------
 3 files changed, 8 insertions(+), 30 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index b0e432c30e1d..0cb1dee965a0 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -4089,7 +4089,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
 
 	if (IS_ERR(ret_bg)) {
 		ret = PTR_ERR(ret_bg);
-	} else if (from_extent_allocation) {
+	} else if (from_extent_allocation && (flags & BTRFS_BLOCK_GROUP_DATA)) {
 		/*
 		 * New block group is likely to be used soon. Try to activate
 		 * it now. Failure is OK for now.
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6dfe086523f9..bc9ff192196f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3690,7 +3690,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 	}
 	spin_unlock(&block_group->lock);
 
-	if (!ret && !btrfs_zone_activate(block_group)) {
+	/* Metadata block group is activated at write time. */
+	if (!ret && (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
+	    !btrfs_zone_activate(block_group)) {
 		ret = 1;
 		/*
 		 * May need to clear fs_info->{treelog,data_reloc}_bg.
@@ -3866,6 +3868,10 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
 static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
 				    struct find_free_extent_ctl *ffe_ctl)
 {
+	/* Block group's activeness is not a requirement for METADATA block groups. */
+	if (!(ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA))
+		return 0;
+
 	/* If we can activate new zone, just allocate a chunk and use it */
 	if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
 		return 0;
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 17c86db7b1b1..356638f54fef 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -761,18 +761,6 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 		break;
 	case ALLOC_CHUNK:
 	case ALLOC_CHUNK_FORCE:
-		/*
-		 * For metadata space on zoned filesystem, reaching here means we
-		 * don't have enough space left in active_total_bytes. Try to
-		 * activate a block group first, because we may have inactive
-		 * block group already allocated.
-		 */
-		ret = btrfs_zoned_activate_one_bg(fs_info, space_info, false);
-		if (ret < 0)
-			break;
-		else if (ret == 1)
-			break;
-
 		trans = btrfs_join_transaction(root);
 		if (IS_ERR(trans)) {
 			ret = PTR_ERR(trans);
@@ -784,22 +772,6 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 					CHUNK_ALLOC_FORCE);
 		btrfs_end_transaction(trans);
 
-		/*
-		 * For metadata space on zoned filesystem, allocating a new chunk
-		 * is not enough. We still need to activate the block * group.
-		 * Active the newly allocated block group by (maybe) finishing
-		 * a block group.
-		 */
-		if (ret == 1) {
-			ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true);
-			/*
-			 * Revert to the original ret regardless we could finish
-			 * one block group or not.
-			 */
-			if (ret >= 0)
-				ret = 1;
-		}
-
 		if (ret > 0 || ret == -ENOSPC)
 			ret = 0;
 		break;

From 5b135b382a360f4c87cf8896d1465b0b07f10cb0 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Tue, 8 Aug 2023 01:12:40 +0900
Subject: [PATCH 1530/2047] btrfs: zoned: re-enable metadata over-commit for
 zoned mode

Now that, we can re-enable metadata over-commit. As we moved the activation
from the reservation time to the write time, we no longer need to ensure
all the reserved bytes is properly activated.

Without the metadata over-commit, it suffers from lower performance because
it needs to flush the delalloc items more often and allocate more block
groups. Re-enabling metadata over-commit will solve the issue.

Fixes: 79417d040f4f ("btrfs: zoned: disable metadata overcommit for zoned")
CC: stable@vger.kernel.org # 6.1+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 356638f54fef..d7e8cd4f140c 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -389,11 +389,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
 		return 0;
 
 	used = btrfs_space_info_used(space_info, true);
-	if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) &&
-	    (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
-		avail = 0;
-	else
-		avail = calc_available_free_space(fs_info, space_info, flush);
+	avail = calc_available_free_space(fs_info, space_info, flush);
 
 	if (used + bytes < space_info->total_bytes + avail)
 		return 1;

From 257614301a5db9f7b0548584ca207ad7785c8b89 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 9 Aug 2023 15:08:21 +0800
Subject: [PATCH 1531/2047] btrfs: handle errors properly in
 update_inline_extent_backref()

[PROBLEM]
Inside function update_inline_extent_backref(), we have several
BUG_ON()s along with some ASSERT()s which can be triggered by corrupted
filesystem.

[ANAYLYSE]
Most of those BUG_ON()s and ASSERT()s are just a way of handling
unexpected on-disk data.

Although we have tree-checker to rule out obviously incorrect extent
tree blocks, it's not enough for these ones.  Thus we need proper error
handling for them.

[FIX]
Thankfully all the callers of update_inline_extent_backref() would
eventually handle the errror by aborting the current transaction.
So this patch would do the proper error handling by:

- Make update_inline_extent_backref() to return int
  The return value would be either 0 or -EUCLEAN.

- Replace BUG_ON()s and ASSERT()s with proper error handling
  This includes:
  * Dump the bad extent tree leaf
  * Output an error message for the cause
    This would include the extent bytenr, num_bytes (if needed), the bad
    values and expected good values.
  * Return -EUCLEAN

  Note here we remove all the WARN_ON()s, as eventually the transaction
  would be aborted, thus a backtrace would be triggered anyway.

- Better comments on why we expect refs == 1 and refs_to_mode == -1 for
  tree blocks

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 75 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 62 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bc9ff192196f..3c62a7eaf24b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -381,11 +381,11 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
 		}
 	}
 
+	WARN_ON(1);
 	btrfs_print_leaf(eb);
 	btrfs_err(eb->fs_info,
 		  "eb %llu iref 0x%lx invalid extent inline ref type %d",
 		  eb->start, (unsigned long)iref, type);
-	WARN_ON(1);
 
 	return BTRFS_REF_TYPE_INVALID;
 }
@@ -1058,13 +1058,13 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
 /*
  * helper to update/remove inline back ref
  */
-static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_path *path,
+static noinline_for_stack int update_inline_extent_backref(struct btrfs_path *path,
 				  struct btrfs_extent_inline_ref *iref,
 				  int refs_to_mod,
 				  struct btrfs_delayed_extent_op *extent_op)
 {
 	struct extent_buffer *leaf = path->nodes[0];
+	struct btrfs_fs_info *fs_info = leaf->fs_info;
 	struct btrfs_extent_item *ei;
 	struct btrfs_extent_data_ref *dref = NULL;
 	struct btrfs_shared_data_ref *sref = NULL;
@@ -1077,18 +1077,33 @@ void update_inline_extent_backref(struct btrfs_path *path,
 
 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
 	refs = btrfs_extent_refs(leaf, ei);
-	WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
+	if (unlikely(refs_to_mod < 0 && refs + refs_to_mod <= 0)) {
+		struct btrfs_key key;
+		u32 extent_size;
+
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		if (key.type == BTRFS_METADATA_ITEM_KEY)
+			extent_size = fs_info->nodesize;
+		else
+			extent_size = key.offset;
+		btrfs_print_leaf(leaf);
+		btrfs_err(fs_info,
+	"invalid refs_to_mod for extent %llu num_bytes %u, has %d expect >= -%llu",
+			  key.objectid, extent_size, refs_to_mod, refs);
+		return -EUCLEAN;
+	}
 	refs += refs_to_mod;
 	btrfs_set_extent_refs(leaf, ei, refs);
 	if (extent_op)
 		__run_delayed_extent_op(extent_op, leaf, ei);
 
-	/*
-	 * If type is invalid, we should have bailed out after
-	 * lookup_inline_extent_backref().
-	 */
 	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
-	ASSERT(type != BTRFS_REF_TYPE_INVALID);
+	/*
+	 * Function btrfs_get_extent_inline_ref_type() has already printed
+	 * error messages.
+	 */
+	if (unlikely(type == BTRFS_REF_TYPE_INVALID))
+		return -EUCLEAN;
 
 	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
 		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -1098,10 +1113,43 @@ void update_inline_extent_backref(struct btrfs_path *path,
 		refs = btrfs_shared_data_ref_count(leaf, sref);
 	} else {
 		refs = 1;
-		BUG_ON(refs_to_mod != -1);
+		/*
+		 * For tree blocks we can only drop one ref for it, and tree
+		 * blocks should not have refs > 1.
+		 *
+		 * Furthermore if we're inserting a new inline backref, we
+		 * won't reach this path either. That would be
+		 * setup_inline_extent_backref().
+		 */
+		if (unlikely(refs_to_mod != -1)) {
+			struct btrfs_key key;
+
+			btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+			btrfs_print_leaf(leaf);
+			btrfs_err(fs_info,
+			"invalid refs_to_mod for tree block %llu, has %d expect -1",
+				  key.objectid, refs_to_mod);
+			return -EUCLEAN;
+		}
 	}
 
-	BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
+	if (unlikely(refs_to_mod < 0 && refs < -refs_to_mod)) {
+		struct btrfs_key key;
+		u32 extent_size;
+
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		if (key.type == BTRFS_METADATA_ITEM_KEY)
+			extent_size = fs_info->nodesize;
+		else
+			extent_size = key.offset;
+		btrfs_print_leaf(leaf);
+		btrfs_err(fs_info,
+"invalid refs_to_mod for backref entry, iref %lu extent %llu num_bytes %u, has %d expect >= -%llu",
+			  (unsigned long)iref, key.objectid, extent_size,
+			  refs_to_mod, refs);
+		return -EUCLEAN;
+	}
 	refs += refs_to_mod;
 
 	if (refs > 0) {
@@ -1121,6 +1169,7 @@ void update_inline_extent_backref(struct btrfs_path *path,
 		btrfs_truncate_item(path, item_size, 1);
 	}
 	btrfs_mark_buffer_dirty(leaf);
+	return 0;
 }
 
 static noinline_for_stack
@@ -1149,7 +1198,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
 				   bytenr, num_bytes, root_objectid, path->slots[0]);
 			return -EUCLEAN;
 		}
-		update_inline_extent_backref(path, iref, refs_to_add, extent_op);
+		ret = update_inline_extent_backref(path, iref, refs_to_add, extent_op);
 	} else if (ret == -ENOENT) {
 		setup_inline_extent_backref(trans->fs_info, path, iref, parent,
 					    root_objectid, owner, offset,
@@ -1169,7 +1218,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
 
 	BUG_ON(!is_data && refs_to_drop != 1);
 	if (iref)
-		update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
+		ret = update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
 	else if (is_data)
 		ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
 	else

From 84af994b85b89ae405b8ea930653543bc5b864d3 Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Thu, 10 Aug 2023 11:00:22 +0800
Subject: [PATCH 1532/2047] btrfs: use LIST_HEAD() to initialize the list_head

Use LIST_HEAD() to initialize the list_head instead of open-coding it.

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c      | 12 +++---------
 fs/btrfs/file.c         |  3 +--
 fs/btrfs/inode.c        | 17 +++++------------
 fs/btrfs/ordered-data.c |  4 +---
 fs/btrfs/send.c         |  6 ++----
 fs/btrfs/tree-log.c     |  4 +---
 6 files changed, 13 insertions(+), 33 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5022a3fd778d..48faf9c968d0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4554,9 +4554,7 @@ static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
 static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_root *root;
-	struct list_head splice;
-
-	INIT_LIST_HEAD(&splice);
+	LIST_HEAD(splice);
 
 	spin_lock(&fs_info->ordered_root_lock);
 	list_splice_init(&fs_info->ordered_roots, &splice);
@@ -4662,9 +4660,7 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
 {
 	struct btrfs_inode *btrfs_inode;
-	struct list_head splice;
-
-	INIT_LIST_HEAD(&splice);
+	LIST_HEAD(splice);
 
 	spin_lock(&root->delalloc_lock);
 	list_splice_init(&root->delalloc_inodes, &splice);
@@ -4697,9 +4693,7 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
 static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_root *root;
-	struct list_head splice;
-
-	INIT_LIST_HEAD(&splice);
+	LIST_HEAD(splice);
 
 	spin_lock(&fs_info->delalloc_root_lock);
 	list_splice_init(&fs_info->delalloc_roots, &splice);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fd03e689a6be..6edad7b9a5d3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3018,7 +3018,7 @@ static long btrfs_fallocate(struct file *file, int mode,
 	struct extent_changeset *data_reserved = NULL;
 	struct falloc_range *range;
 	struct falloc_range *tmp;
-	struct list_head reserve_list;
+	LIST_HEAD(reserve_list);
 	u64 cur_offset;
 	u64 last_byte;
 	u64 alloc_start;
@@ -3110,7 +3110,6 @@ static long btrfs_fallocate(struct file *file, int mode,
 	btrfs_assert_inode_range_clean(BTRFS_I(inode), alloc_start, locked_end);
 
 	/* First, check if we exceed the qgroup limit */
-	INIT_LIST_HEAD(&reserve_list);
 	while (cur_offset < alloc_end) {
 		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
 				      alloc_end - cur_offset);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 431007e443a8..f2ab27084cc4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5861,8 +5861,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 	struct btrfs_key found_key;
 	struct btrfs_path *path;
 	void *addr;
-	struct list_head ins_list;
-	struct list_head del_list;
+	LIST_HEAD(ins_list);
+	LIST_HEAD(del_list);
 	int ret;
 	char *name_ptr;
 	int name_len;
@@ -5881,8 +5881,6 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 	addr = private->filldir_buf;
 	path->reada = READA_FORWARD;
 
-	INIT_LIST_HEAD(&ins_list);
-	INIT_LIST_HEAD(&del_list);
 	put = btrfs_readdir_get_delayed_items(inode, private->last_index,
 					      &ins_list, &del_list);
 
@@ -9244,14 +9242,11 @@ static int start_delalloc_inodes(struct btrfs_root *root,
 	struct btrfs_inode *binode;
 	struct inode *inode;
 	struct btrfs_delalloc_work *work, *next;
-	struct list_head works;
-	struct list_head splice;
+	LIST_HEAD(works);
+	LIST_HEAD(splice);
 	int ret = 0;
 	bool full_flush = wbc->nr_to_write == LONG_MAX;
 
-	INIT_LIST_HEAD(&works);
-	INIT_LIST_HEAD(&splice);
-
 	mutex_lock(&root->delalloc_mutex);
 	spin_lock(&root->delalloc_lock);
 	list_splice_init(&root->delalloc_inodes, &splice);
@@ -9339,14 +9334,12 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
 		.range_end = LLONG_MAX,
 	};
 	struct btrfs_root *root;
-	struct list_head splice;
+	LIST_HEAD(splice);
 	int ret;
 
 	if (BTRFS_FS_ERROR(fs_info))
 		return -EROFS;
 
-	INIT_LIST_HEAD(&splice);
-
 	mutex_lock(&fs_info->delalloc_root_mutex);
 	spin_lock(&fs_info->delalloc_root_lock);
 	list_splice_init(&fs_info->delalloc_roots, &splice);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 109e80ed25b6..b46ab348e8e5 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -740,11 +740,9 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
 			     const u64 range_start, const u64 range_len)
 {
 	struct btrfs_root *root;
-	struct list_head splice;
+	LIST_HEAD(splice);
 	u64 done;
 
-	INIT_LIST_HEAD(&splice);
-
 	mutex_lock(&fs_info->ordered_operations_mutex);
 	spin_lock(&fs_info->ordered_root_lock);
 	list_splice_init(&fs_info->ordered_roots, &splice);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 8bfd44750efe..3a566150c531 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3685,7 +3685,7 @@ static void tail_append_pending_moves(struct send_ctx *sctx,
 static int apply_children_dir_moves(struct send_ctx *sctx)
 {
 	struct pending_dir_move *pm;
-	struct list_head stack;
+	LIST_HEAD(stack);
 	u64 parent_ino = sctx->cur_ino;
 	int ret = 0;
 
@@ -3693,7 +3693,6 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
 	if (!pm)
 		return 0;
 
-	INIT_LIST_HEAD(&stack);
 	tail_append_pending_moves(sctx, pm, &stack);
 
 	while (!list_empty(&stack)) {
@@ -4165,7 +4164,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
 	int ret = 0;
 	struct recorded_ref *cur;
 	struct recorded_ref *cur2;
-	struct list_head check_dirs;
+	LIST_HEAD(check_dirs);
 	struct fs_path *valid_path = NULL;
 	u64 ow_inode = 0;
 	u64 ow_gen;
@@ -4184,7 +4183,6 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
 	 * which is always '..'
 	 */
 	BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
-	INIT_LIST_HEAD(&check_dirs);
 
 	valid_path = fs_path_alloc();
 	if (!valid_path) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 8ad7e7e38d18..b9229c08164f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4841,13 +4841,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 	struct btrfs_ordered_extent *ordered;
 	struct btrfs_ordered_extent *tmp;
 	struct extent_map *em, *n;
-	struct list_head extents;
+	LIST_HEAD(extents);
 	struct extent_map_tree *tree = &inode->extent_tree;
 	int ret = 0;
 	int num = 0;
 
-	INIT_LIST_HEAD(&extents);
-
 	write_lock(&tree->lock);
 
 	list_for_each_entry_safe(em, n, &tree->modified_extents, list) {

From cd361199ff23776481c37023a55d855d5ad5c0f5 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Mon, 31 Jul 2023 16:28:43 -0400
Subject: [PATCH 1533/2047] btrfs: wait on uncached block groups on every
 allocation loop

My initial fix for the generic/475 hangs was related to metadata, but
our CI testing uncovered another case where we hang for similar reasons.
We again have a task with a plug that is holding an outstanding request
that is keeping the dm device from finishing it's suspend, and that task
is stuck in the allocator.

This time it is stuck trying to allocate data, but we do not have a
block group that matches the size class.  The larger loop in the
allocator looks like this (simplified of course)

  find_free_extent
    for_each_block_group {
      ffe_ctl->cached == btrfs_block_group_cache_done(bg)
      if (!ffe_ctl->cached)
	ffe_ctl->have_caching_bg = true;
      do_allocation()
	btrfs_wait_block_group_cache_progress();
    }

    if (loop == LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
      go search again;

In my earlier fix we were trying to allocate from the block group, but
we weren't waiting for the progress because we were only waiting for the
free space to be >= the amount of free space we wanted.  My fix made it
so we waited for forward progress to be made as well, so we would be
sure to wait.

This time however we did not have a block group that matched our size
class, so what was happening was this

  find_free_extent
    for_each_block_group {
      ffe_ctl->cached == btrfs_block_group_cache_done(bg)
      if (!ffe_ctl->cached)
	ffe_ctl->have_caching_bg = true;
      if (size_class_doesn't_match())
	goto loop;
      do_allocation()
	btrfs_wait_block_group_cache_progress();
  loop:
      release_block_group(block_group);
    }

    if (loop == LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
      go search again;

The size_class_doesn't_match() part was true, so we'd just skip this
block group and never wait for caching, and then because we found a
caching block group we'd just go back and do the loop again.  We never
sleep and thus never flush the plug and we have the same deadlock.

Fix the logic for waiting on the block group caching to instead do it
unconditionally when we goto loop.  This takes the logic out of the
allocation step, so now the loop looks more like this

  find_free_extent
    for_each_block_group {
      ffe_ctl->cached == btrfs_block_group_cache_done(bg)
      if (!ffe_ctl->cached)
	ffe_ctl->have_caching_bg = true;
      if (size_class_doesn't_match())
	goto loop;
      do_allocation()
	btrfs_wait_block_group_cache_progress();
  loop:
      if (loop > LOOP_CACHING_NOWAIT && !ffe_ctl->retry_uncached &&
	  !ffe_ctl->cached) {
	 ffe_ctl->retry_uncached = true;
	 btrfs_wait_block_group_cache_progress();
      }

      release_block_group(block_group);
    }

    if (loop == LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
      go search again;

This simplifies the logic a lot, and makes sure that if we're hitting
uncached block groups we're always waiting on them at some point.

I ran this through 100 iterations of generic/475, as this particular
case was harder to hit than the previous one.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 61 +++++++++++++-----------------------------
 fs/btrfs/extent-tree.h | 13 +++------
 2 files changed, 22 insertions(+), 52 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3c62a7eaf24b..ba22790f95f7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3481,7 +3481,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
  * Helper function for find_free_extent().
  *
  * Return -ENOENT to inform caller that we need fallback to unclustered mode.
- * Return -EAGAIN to inform caller that we need to re-search this block group
  * Return >0 to inform caller that we find nothing
  * Return 0 means we have found a location and set ffe_ctl->found_offset.
  */
@@ -3562,14 +3561,6 @@ refill_cluster:
 			trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
 			return 0;
 		}
-	} else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
-		   !ffe_ctl->retry_clustered) {
-		spin_unlock(&last_ptr->refill_lock);
-
-		ffe_ctl->retry_clustered = true;
-		btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
-				ffe_ctl->empty_cluster + ffe_ctl->empty_size);
-		return -EAGAIN;
 	}
 	/*
 	 * At this point we either didn't find a cluster or we weren't able to
@@ -3584,7 +3575,6 @@ refill_cluster:
 /*
  * Return >0 to inform caller that we find nothing
  * Return 0 when we found an free extent and set ffe_ctrl->found_offset
- * Return -EAGAIN to inform caller that we need to re-search this block group
  */
 static int find_free_extent_unclustered(struct btrfs_block_group *bg,
 					struct find_free_extent_ctl *ffe_ctl)
@@ -3622,25 +3612,8 @@ static int find_free_extent_unclustered(struct btrfs_block_group *bg,
 	offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
 			ffe_ctl->num_bytes, ffe_ctl->empty_size,
 			&ffe_ctl->max_extent_size);
-
-	/*
-	 * If we didn't find a chunk, and we haven't failed on this block group
-	 * before, and this block group is in the middle of caching and we are
-	 * ok with waiting, then go ahead and wait for progress to be made, and
-	 * set @retry_unclustered to true.
-	 *
-	 * If @retry_unclustered is true then we've already waited on this
-	 * block group once and should move on to the next block group.
-	 */
-	if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
-	    ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
-		btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
-						      ffe_ctl->empty_size);
-		ffe_ctl->retry_unclustered = true;
-		return -EAGAIN;
-	} else if (!offset) {
+	if (!offset)
 		return 1;
-	}
 	ffe_ctl->found_offset = offset;
 	return 0;
 }
@@ -3654,7 +3627,7 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group,
 	/* We want to try and use the cluster allocator, so lets look there */
 	if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
 		ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
-		if (ret >= 0 || ret == -EAGAIN)
+		if (ret >= 0)
 			return ret;
 		/* ret == -ENOENT case falls through */
 	}
@@ -3872,8 +3845,7 @@ static void release_block_group(struct btrfs_block_group *block_group,
 {
 	switch (ffe_ctl->policy) {
 	case BTRFS_EXTENT_ALLOC_CLUSTERED:
-		ffe_ctl->retry_clustered = false;
-		ffe_ctl->retry_unclustered = false;
+		ffe_ctl->retry_uncached = false;
 		break;
 	case BTRFS_EXTENT_ALLOC_ZONED:
 		/* Nothing to do */
@@ -4220,9 +4192,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
 	ffe_ctl->orig_have_caching_bg = false;
 	ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
 	ffe_ctl->loop = 0;
-	/* For clustered allocation */
-	ffe_ctl->retry_clustered = false;
-	ffe_ctl->retry_unclustered = false;
+	ffe_ctl->retry_uncached = false;
 	ffe_ctl->cached = 0;
 	ffe_ctl->max_extent_size = 0;
 	ffe_ctl->total_free_space = 0;
@@ -4373,16 +4343,12 @@ have_block_group:
 
 		bg_ret = NULL;
 		ret = do_allocation(block_group, ffe_ctl, &bg_ret);
-		if (ret == 0) {
-			if (bg_ret && bg_ret != block_group) {
-				btrfs_release_block_group(block_group,
-							  ffe_ctl->delalloc);
-				block_group = bg_ret;
-			}
-		} else if (ret == -EAGAIN) {
-			goto have_block_group;
-		} else if (ret > 0) {
+		if (ret > 0)
 			goto loop;
+
+		if (bg_ret && bg_ret != block_group) {
+			btrfs_release_block_group(block_group, ffe_ctl->delalloc);
+			block_group = bg_ret;
 		}
 
 		/* Checks */
@@ -4423,6 +4389,15 @@ have_block_group:
 		btrfs_release_block_group(block_group, ffe_ctl->delalloc);
 		break;
 loop:
+		if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
+		    !ffe_ctl->retry_uncached) {
+			ffe_ctl->retry_uncached = true;
+			btrfs_wait_block_group_cache_progress(block_group,
+						ffe_ctl->num_bytes +
+						ffe_ctl->empty_cluster +
+						ffe_ctl->empty_size);
+			goto have_block_group;
+		}
 		release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
 		cond_resched();
 	}
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index b9e148adcd28..88c249c37516 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -48,16 +48,11 @@ struct find_free_extent_ctl {
 	int loop;
 
 	/*
-	 * Whether we're refilling a cluster, if true we need to re-search
-	 * current block group but don't try to refill the cluster again.
+	 * Set to true if we're retrying the allocation on this block group
+	 * after waiting for caching progress, this is so that we retry only
+	 * once before moving on to another block group.
 	 */
-	bool retry_clustered;
-
-	/*
-	 * Whether we're updating free space cache, if true we need to re-search
-	 * current block group but don't try updating free space cache again.
-	 */
-	bool retry_unclustered;
+	bool retry_uncached;
 
 	/* If current block group is cached */
 	int cached;

From e7f1326cc24e22b38afc3acd328480a1183f9e79 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Mon, 31 Jul 2023 11:13:00 -0400
Subject: [PATCH 1534/2047] btrfs: set page extent mapped after read_folio in
 relocate_one_page

One of the CI runs triggered the following panic

  assertion failed: PagePrivate(page) && page->private, in fs/btrfs/subpage.c:229
  ------------[ cut here ]------------
  kernel BUG at fs/btrfs/subpage.c:229!
  Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
  CPU: 0 PID: 923660 Comm: btrfs Not tainted 6.5.0-rc3+ #1
  pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
  pc : btrfs_subpage_assert+0xbc/0xf0
  lr : btrfs_subpage_assert+0xbc/0xf0
  sp : ffff800093213720
  x29: ffff800093213720 x28: ffff8000932138b4 x27: 000000000c280000
  x26: 00000001b5d00000 x25: 000000000c281000 x24: 000000000c281fff
  x23: 0000000000001000 x22: 0000000000000000 x21: ffffff42b95bf880
  x20: ffff42b9528e0000 x19: 0000000000001000 x18: ffffffffffffffff
  x17: 667274622f736620 x16: 6e69202c65746176 x15: 0000000000000028
  x14: 0000000000000003 x13: 00000000002672d7 x12: 0000000000000000
  x11: ffffcd3f0ccd9204 x10: ffffcd3f0554ae50 x9 : ffffcd3f0379528c
  x8 : ffff800093213428 x7 : 0000000000000000 x6 : ffffcd3f091771e8
  x5 : ffff42b97f333948 x4 : 0000000000000000 x3 : 0000000000000000
  x2 : 0000000000000000 x1 : ffff42b9556cde80 x0 : 000000000000004f
  Call trace:
   btrfs_subpage_assert+0xbc/0xf0
   btrfs_subpage_set_dirty+0x38/0xa0
   btrfs_page_set_dirty+0x58/0x88
   relocate_one_page+0x204/0x5f0
   relocate_file_extent_cluster+0x11c/0x180
   relocate_data_extent+0xd0/0xf8
   relocate_block_group+0x3d0/0x4e8
   btrfs_relocate_block_group+0x2d8/0x490
   btrfs_relocate_chunk+0x54/0x1a8
   btrfs_balance+0x7f4/0x1150
   btrfs_ioctl+0x10f0/0x20b8
   __arm64_sys_ioctl+0x120/0x11d8
   invoke_syscall.constprop.0+0x80/0xd8
   do_el0_svc+0x6c/0x158
   el0_svc+0x50/0x1b0
   el0t_64_sync_handler+0x120/0x130
   el0t_64_sync+0x194/0x198
  Code: 91098021 b0007fa0 91346000 97e9c6d2 (d4210000)

This is the same problem outlined in 17b17fcd6d44 ("btrfs:
set_page_extent_mapped after read_folio in btrfs_cont_expand") , and the
fix is the same.  I originally looked for the same pattern elsewhere in
our code, but mistakenly skipped over this code because I saw the page
cache readahead before we set_page_extent_mapped, not realizing that
this was only in the !page case, that we can still end up with a
!uptodate page and then do the btrfs_read_folio further down.

The fix here is the same as the above mentioned patch, move the
set_page_extent_mapped call to after the btrfs_read_folio() block to
make sure that we have the subpage blocksize stuff setup properly before
using the page.

CC: stable@vger.kernel.org # 6.1+
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/relocation.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 8ff0e9dcf85c..7408e48d45bc 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3006,9 +3006,6 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
 		if (!page)
 			return -ENOMEM;
 	}
-	ret = set_page_extent_mapped(page);
-	if (ret < 0)
-		goto release_page;
 
 	if (PageReadahead(page))
 		page_cache_async_readahead(inode->i_mapping, ra, NULL,
@@ -3024,6 +3021,15 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
 		}
 	}
 
+	/*
+	 * We could have lost page private when we dropped the lock to read the
+	 * page above, make sure we set_page_extent_mapped here so we have any
+	 * of the subpage blocksize stuff we need in place.
+	 */
+	ret = set_page_extent_mapped(page);
+	if (ret < 0)
+		goto release_page;
+
 	page_start = page_offset(page);
 	page_end = page_start + PAGE_SIZE - 1;
 

From 332581bde2a419d5f12a93a1cdc2856af649a3cc Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Fri, 21 Jul 2023 16:42:14 +0900
Subject: [PATCH 1535/2047] btrfs: zoned: do not zone finish data relocation
 block group

When multiple writes happen at once, we may need to sacrifice a currently
active block group to be zone finished for a new allocation. We choose a
block group with the least free space left, and zone finish it.

To do the finishing, we need to send IOs for already allocated region
and wait for them and on-going IOs. Otherwise, these IOs fail because the
zone is already finished at the time the IO reach a device.

However, if a block group dedicated to the data relocation is zone
finished, there is a chance that finishing it before an ongoing write IO
reaches the device. That is because there is timing gap between an
allocation is done (block_group->reservations == 0, as pre-allocation is
done) and an ordered extent is created when the relocation IO starts.
Thus, if we finish the zone between them, we can fail the IOs.

We cannot simply use "fs_info->data_reloc_bg == block_group->start" to
avoid the zone finishing. Because, the data_reloc_bg may already switch to
a new block group, while there are still ongoing write IOs to the old
data_reloc_bg.

So, this patch reworks the BLOCK_GROUP_FLAG_ZONED_DATA_RELOC bit to
indicate there is a data relocation allocation and/or ongoing write to the
block group. The bit is set on allocation and cleared in end_io function of
the last IO for the currently allocated region.

To change the timing of the bit setting also solves the issue that the bit
being left even after there is no IO going on. With the current code, if
the data_reloc_bg switches after the last IO to the current data_reloc_bg,
the bit is set at this timing and there is no one clearing that bit. As a
result, that block group is kept unallocatable for anything.

Fixes: 343d8a30851c ("btrfs: zoned: prevent allocation from previous data relocation BG")
Fixes: 74e91b12b115 ("btrfs: zoned: zone finish unused block group")
CC: stable@vger.kernel.org # 6.1+
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 43 ++++++++++++++++++++++--------------------
 fs/btrfs/zoned.c       | 16 +++++++++++++---
 2 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ba22790f95f7..e0508114f0bd 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3738,7 +3738,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 	       fs_info->data_reloc_bg == 0);
 
 	if (block_group->ro ||
-	    test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
+	    (!ffe_ctl->for_data_reloc &&
+	     test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))) {
 		ret = 1;
 		goto out;
 	}
@@ -3781,8 +3782,26 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 	if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
 		fs_info->treelog_bg = block_group->start;
 
-	if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
-		fs_info->data_reloc_bg = block_group->start;
+	if (ffe_ctl->for_data_reloc) {
+		if (!fs_info->data_reloc_bg)
+			fs_info->data_reloc_bg = block_group->start;
+		/*
+		 * Do not allow allocations from this block group, unless it is
+		 * for data relocation. Compared to increasing the ->ro, setting
+		 * the ->zoned_data_reloc_ongoing flag still allows nocow
+		 * writers to come in. See btrfs_inc_nocow_writers().
+		 *
+		 * We need to disable an allocation to avoid an allocation of
+		 * regular (non-relocation data) extent. With mix of relocation
+		 * extents and regular extents, we can dispatch WRITE commands
+		 * (for relocation extents) and ZONE APPEND commands (for
+		 * regular extents) at the same time to the same zone, which
+		 * easily break the write pointer.
+		 *
+		 * Also, this flag avoids this block group to be zone finished.
+		 */
+		set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
+	}
 
 	ffe_ctl->found_offset = start + block_group->alloc_offset;
 	block_group->alloc_offset += num_bytes;
@@ -3800,24 +3819,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 out:
 	if (ret && ffe_ctl->for_treelog)
 		fs_info->treelog_bg = 0;
-	if (ret && ffe_ctl->for_data_reloc &&
-	    fs_info->data_reloc_bg == block_group->start) {
-		/*
-		 * Do not allow further allocations from this block group.
-		 * Compared to increasing the ->ro, setting the
-		 * ->zoned_data_reloc_ongoing flag still allows nocow
-		 *  writers to come in. See btrfs_inc_nocow_writers().
-		 *
-		 * We need to disable an allocation to avoid an allocation of
-		 * regular (non-relocation data) extent. With mix of relocation
-		 * extents and regular extents, we can dispatch WRITE commands
-		 * (for relocation extents) and ZONE APPEND commands (for
-		 * regular extents) at the same time to the same zone, which
-		 * easily break the write pointer.
-		 */
-		set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
+	if (ret && ffe_ctl->for_data_reloc)
 		fs_info->data_reloc_bg = 0;
-	}
 	spin_unlock(&fs_info->relocation_bg_lock);
 	spin_unlock(&fs_info->treelog_bg_lock);
 	spin_unlock(&block_group->lock);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 52f49b0d2dee..a2f8e7440f8c 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2091,6 +2091,10 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
 	 * and block_group->meta_write_pointer for metadata.
 	 */
 	if (!fully_written) {
+		if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
+			spin_unlock(&block_group->lock);
+			return -EAGAIN;
+		}
 		spin_unlock(&block_group->lock);
 
 		ret = btrfs_inc_block_group_ro(block_group, false);
@@ -2119,7 +2123,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
 			return 0;
 		}
 
-		if (block_group->reserved) {
+		if (block_group->reserved ||
+		    test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
+			     &block_group->runtime_flags)) {
 			spin_unlock(&block_group->lock);
 			btrfs_dec_block_group_ro(block_group);
 			return -EAGAIN;
@@ -2362,7 +2368,10 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
 
 	/* All relocation extents are written. */
 	if (block_group->start + block_group->alloc_offset == logical + length) {
-		/* Now, release this block group for further allocations. */
+		/*
+		 * Now, release this block group for further allocations and
+		 * zone finish.
+		 */
 		clear_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
 			  &block_group->runtime_flags);
 	}
@@ -2386,7 +2395,8 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
 
 		spin_lock(&block_group->lock);
 		if (block_group->reserved || block_group->alloc_offset == 0 ||
-		    (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
+		    (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) ||
+		    test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
 			spin_unlock(&block_group->lock);
 			continue;
 		}

From 953fa5ced510c4937075002bbfc6405cd500efef Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jul 2023 07:22:38 -0700
Subject: [PATCH 1536/2047] btrfs: fix error handling when in a COW window in
 run_delalloc_nocow

When run_delalloc_nocow has cow_start set to a value other than (u64)-1,
it has delayed COW writeback pending behind cur_offset.  When an error
occurs in such a window, the range going back to cow_start and not just
cur_offset needs to be unlocked, but only two error cases handle this
correctly  Move the code to handle unlock the COW range to the common
error handling label and document the logic.

To make things even more complicated, cow_file_range as called by
fallback_to_cow will unlock the range it is operating on when it fails as
well, so we need to reset cow_start right after caling fallback_to_cow
instead of only when it succeeded.

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f2ab27084cc4..0d973a959559 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2029,11 +2029,8 @@ next_slot:
 		leaf = path->nodes[0];
 		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
 			ret = btrfs_next_leaf(root, path);
-			if (ret < 0) {
-				if (cow_start != (u64)-1)
-					cur_offset = cow_start;
+			if (ret < 0)
 				goto error;
-			}
 			if (ret > 0)
 				break;
 			leaf = path->nodes[0];
@@ -2096,13 +2093,10 @@ next_slot:
 
 		nocow_args.start = cur_offset;
 		ret = can_nocow_file_extent(path, &found_key, inode, &nocow_args);
-		if (ret < 0) {
-			if (cow_start != (u64)-1)
-				cur_offset = cow_start;
+		if (ret < 0)
 			goto error;
-		} else if (ret == 0) {
+		if (ret == 0)
 			goto out_check;
-		}
 
 		ret = 0;
 		bg = btrfs_inc_nocow_writers(fs_info, nocow_args.disk_bytenr);
@@ -2133,9 +2127,9 @@ out_check:
 		if (cow_start != (u64)-1) {
 			ret = fallback_to_cow(inode, locked_page,
 					      cow_start, found_key.offset - 1);
+			cow_start = (u64)-1;
 			if (ret)
 				goto error;
-			cow_start = (u64)-1;
 		}
 
 		nocow_end = cur_offset + nocow_args.num_bytes - 1;
@@ -2214,6 +2208,7 @@ out_check:
 	if (cow_start != (u64)-1) {
 		cur_offset = end;
 		ret = fallback_to_cow(inode, locked_page, cow_start, end);
+		cow_start = (u64)-1;
 		if (ret)
 			goto error;
 	}
@@ -2222,6 +2217,13 @@ error:
 	if (nocow)
 		btrfs_dec_nocow_writers(bg);
 
+	/*
+	 * If an error happened while a COW region is outstanding, cur_offset
+	 * needs to be reset to cow_start to ensure the COW region is unlocked
+	 * as well.
+	 */
+	if (cow_start != (u64)-1)
+		cur_offset = cow_start;
 	if (ret && cur_offset < end)
 		extent_clear_unlock_delalloc(inode, cur_offset, end,
 					     locked_page, EXTENT_LOCKED |

From 18f62b86c4ea943256f8962ecd0cb013c97b4e0f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jul 2023 07:22:39 -0700
Subject: [PATCH 1537/2047] btrfs: cleanup the COW fallback logic in
 run_delalloc_nocow

Use the block group pointer used to track the outstanding NOCOW writes as
a boolean to remove the duplicate nocow variable, and keep it contained
in the main loop to simplify the logic.

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 47 ++++++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0d973a959559..1fb1e4ac4a8c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1974,8 +1974,6 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
 	int ret;
 	bool check_prev = true;
 	u64 ino = btrfs_ino(inode);
-	struct btrfs_block_group *bg;
-	bool nocow = false;
 	struct can_nocow_file_extent_args nocow_args = { 0 };
 
 	path = btrfs_alloc_path();
@@ -1993,6 +1991,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
 	nocow_args.writeback_path = true;
 
 	while (1) {
+		struct btrfs_block_group *nocow_bg = NULL;
 		struct btrfs_ordered_extent *ordered;
 		struct btrfs_key found_key;
 		struct btrfs_file_extent_item *fi;
@@ -2003,8 +2002,6 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
 		int extent_type;
 		bool is_prealloc;
 
-		nocow = false;
-
 		ret = btrfs_lookup_file_extent(NULL, root, path, ino,
 					       cur_offset, 0);
 		if (ret < 0)
@@ -2063,7 +2060,7 @@ next_slot:
 		if (found_key.offset > cur_offset) {
 			extent_end = found_key.offset;
 			extent_type = 0;
-			goto out_check;
+			goto must_cow;
 		}
 
 		/*
@@ -2096,18 +2093,19 @@ next_slot:
 		if (ret < 0)
 			goto error;
 		if (ret == 0)
-			goto out_check;
+			goto must_cow;
 
 		ret = 0;
-		bg = btrfs_inc_nocow_writers(fs_info, nocow_args.disk_bytenr);
-		if (bg)
-			nocow = true;
-out_check:
-		/*
-		 * If nocow is false then record the beginning of the range
-		 * that needs to be COWed
-		 */
-		if (!nocow) {
+		nocow_bg = btrfs_inc_nocow_writers(fs_info, nocow_args.disk_bytenr);
+		if (!nocow_bg) {
+must_cow:
+			/*
+			 * If we can't perform NOCOW writeback for the range,
+			 * then record the beginning of the range that needs to
+			 * be COWed.  It will be written out before the next
+			 * NOCOW range if we find one, or when exiting this
+			 * loop.
+			 */
 			if (cow_start == (u64)-1)
 				cow_start = cur_offset;
 			cur_offset = extent_end;
@@ -2128,8 +2126,10 @@ out_check:
 			ret = fallback_to_cow(inode, locked_page,
 					      cow_start, found_key.offset - 1);
 			cow_start = (u64)-1;
-			if (ret)
+			if (ret) {
+				btrfs_dec_nocow_writers(nocow_bg);
 				goto error;
+			}
 		}
 
 		nocow_end = cur_offset + nocow_args.num_bytes - 1;
@@ -2146,6 +2146,7 @@ out_check:
 					  ram_bytes, BTRFS_COMPRESS_NONE,
 					  BTRFS_ORDERED_PREALLOC);
 			if (IS_ERR(em)) {
+				btrfs_dec_nocow_writers(nocow_bg);
 				ret = PTR_ERR(em);
 				goto error;
 			}
@@ -2159,6 +2160,7 @@ out_check:
 				? (1 << BTRFS_ORDERED_PREALLOC)
 				: (1 << BTRFS_ORDERED_NOCOW),
 				BTRFS_COMPRESS_NONE);
+		btrfs_dec_nocow_writers(nocow_bg);
 		if (IS_ERR(ordered)) {
 			if (is_prealloc) {
 				btrfs_drop_extent_map_range(inode, cur_offset,
@@ -2168,11 +2170,6 @@ out_check:
 			goto error;
 		}
 
-		if (nocow) {
-			btrfs_dec_nocow_writers(bg);
-			nocow = false;
-		}
-
 		if (btrfs_is_data_reloc_root(root))
 			/*
 			 * Error handled later, as we must prevent
@@ -2213,10 +2210,10 @@ out_check:
 			goto error;
 	}
 
-error:
-	if (nocow)
-		btrfs_dec_nocow_writers(bg);
+	btrfs_free_path(path);
+	return 0;
 
+error:
 	/*
 	 * If an error happened while a COW region is outstanding, cur_offset
 	 * needs to be reset to cow_start to ensure the COW region is unlocked
@@ -2224,7 +2221,7 @@ error:
 	 */
 	if (cow_start != (u64)-1)
 		cur_offset = cow_start;
-	if (ret && cur_offset < end)
+	if (cur_offset < end)
 		extent_clear_unlock_delalloc(inode, cur_offset, end,
 					     locked_page, EXTENT_LOCKED |
 					     EXTENT_DELALLOC | EXTENT_DEFRAG |

From 38dc88890de6cae3db5a33ece287e56fc9cc4ce6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jul 2023 07:22:40 -0700
Subject: [PATCH 1538/2047] btrfs: consolidate the error handling in
 run_delalloc_nocow

Share the calls to extent_clear_unlock_delalloc for btrfs_path allocation
failure handling and the normal exit path.

This relies on btrfs_free_path ignoring a NULL pointer, and the
initialization of cur_offset to start at the beginning of the function.

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1fb1e4ac4a8c..8d0dd266dd1b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1978,13 +1978,8 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
 
 	path = btrfs_alloc_path();
 	if (!path) {
-		extent_clear_unlock_delalloc(inode, start, end, locked_page,
-					     EXTENT_LOCKED | EXTENT_DELALLOC |
-					     EXTENT_DO_ACCOUNTING |
-					     EXTENT_DEFRAG, PAGE_UNLOCK |
-					     PAGE_START_WRITEBACK |
-					     PAGE_END_WRITEBACK);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto error;
 	}
 
 	nocow_args.end = end;

From 76c5126e76696e61ced5d6e34441f857733741bf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jul 2023 07:22:41 -0700
Subject: [PATCH 1539/2047] btrfs: move the !zoned assert into run_delalloc_cow

Having the assert in the actual helper documents the pre-conditions
much better than having it in the caller, so move it.

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8d0dd266dd1b..ca0f4781b0e5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1976,6 +1976,13 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
 	u64 ino = btrfs_ino(inode);
 	struct can_nocow_file_extent_args nocow_args = { 0 };
 
+	/*
+	 * Normally on a zoned device we're only doing COW writes, but in case
+	 * of relocation on a zoned filesystem serializes I/O so that we're only
+	 * writing sequentially and can end up here as well.
+	 */
+	ASSERT(!btrfs_is_zoned(fs_info) || btrfs_is_data_reloc_root(root));
+
 	path = btrfs_alloc_path();
 	if (!path) {
 		ret = -ENOMEM;
@@ -2257,14 +2264,6 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
 		 start >= page_offset(locked_page) + PAGE_SIZE));
 
 	if (should_nocow(inode, start, end)) {
-		/*
-		 * Normally on a zoned device we're only doing COW writes, but
-		 * in case of relocation on a zoned filesystem we have taken
-		 * precaution, that we're only writing sequentially. It's safe
-		 * to use run_delalloc_nocow() here, like for  regular
-		 * preallocated inodes.
-		 */
-		ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
 		ret = run_delalloc_nocow(inode, locked_page, start, end);
 		goto out;
 	}

From 7f72f50547b7af4ddf985b07fc56600a4deba281 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 1 Aug 2023 19:02:28 +0800
Subject: [PATCH 1540/2047] btrfs: output extra debug info if we failed to find
 an inline backref

[BUG]
Syzbot reported several warning triggered inside
lookup_inline_extent_backref().

[CAUSE]
As usual, the reproducer doesn't reliably trigger locally here, but at
least we know the WARN_ON() is triggered when an inline backref can not
be found, and it can only be triggered when @insert is true. (I.e.
inserting a new inline backref, which means the backref should already
exist)

[ENHANCEMENT]
After the WARN_ON(), dump all the parameters and the extent tree
leaf to help debug.

Link: https://syzkaller.appspot.com/bug?extid=d6f9ff86c1d804ba2bc6
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e0508114f0bd..105f528382e7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -848,6 +848,11 @@ again:
 		err = -ENOENT;
 		goto out;
 	} else if (WARN_ON(ret)) {
+		btrfs_print_leaf(path->nodes[0]);
+		btrfs_err(fs_info,
+"extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu",
+			  bytenr, num_bytes, parent, root_objectid, owner,
+			  offset);
 		err = -EIO;
 		goto out;
 	}

From 182741d287fb1ea870ee6ef45aa1915a0b031233 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Fri, 11 Aug 2023 19:02:11 +0800
Subject: [PATCH 1541/2047] btrfs: remove v0 extent handling

The v0 extent item has been deprecated for a long time, and we don't have
any report from the community either.

So it's time to remove the v0 extent specific error handling, and just
treat them as regular extent tree corruption.

This patch would remove the btrfs_print_v0_err() helper, and enhance the
involved error handling to treat them just as any extent tree
corruption. No reports regarding v0 extents have been seen since the
graceful handling was added in 2018.

This involves:

- btrfs_backref_add_tree_node()
  This change is a little tricky, the new code is changed to only handle
  BTRFS_TREE_BLOCK_REF_KEY and BTRFS_SHARED_BLOCK_REF_KEY.

  But this is safe, as we have rejected any unknown inline refs through
  btrfs_get_extent_inline_ref_type().
  For keyed backrefs, we're safe to skip anything we don't know (that's
  if it can pass tree-checker in the first place).

- btrfs_lookup_extent_info()
- lookup_inline_extent_backref()
- run_delayed_extent_op()
- __btrfs_free_extent()
- add_tree_block()
  Regular error handling of unexpected extent tree item, and abort
  transaction (if we have a trans handle).

- remove_extent_data_ref()
  It's pretty much the same as the regular rejection of unknown backref
  key.
  But for this particular case, we can also remove a BUG_ON().

- extent_data_ref_count()
  We can remove the BTRFS_EXTENT_REF_V0_KEY BUG_ON(), as it would be
  rejected by the only caller.

- btrfs_print_leaf()
  Remove the handling for BTRFS_EXTENT_REF_V0_KEY.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/backref.c              | 29 +++++++++++----------------
 fs/btrfs/extent-tree.c          | 35 ++++++++++++++++++++-------------
 fs/btrfs/messages.c             |  6 ------
 fs/btrfs/messages.h             |  2 --
 fs/btrfs/print-tree.c           | 10 ++++------
 fs/btrfs/relocation.c           | 11 ++++++-----
 include/trace/events/btrfs.h    |  1 -
 include/uapi/linux/btrfs_tree.h |  6 +++++-
 8 files changed, 48 insertions(+), 52 deletions(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 79336fa853db..b7d54efb4728 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -3373,7 +3373,6 @@ int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
 				struct btrfs_key *node_key,
 				struct btrfs_backref_node *cur)
 {
-	struct btrfs_fs_info *fs_info = cache->fs_info;
 	struct btrfs_backref_edge *edge;
 	struct btrfs_backref_node *exist;
 	int ret;
@@ -3462,25 +3461,21 @@ int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
 			ret = handle_direct_tree_backref(cache, &key, cur);
 			if (ret < 0)
 				goto out;
-			continue;
-		} else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
-			ret = -EINVAL;
-			btrfs_print_v0_err(fs_info);
-			btrfs_handle_fs_error(fs_info, ret, NULL);
-			goto out;
-		} else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
-			continue;
+		} else if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
+			/*
+			 * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref
+			 * offset means the root objectid. We need to search
+			 * the tree to get its parent bytenr.
+			 */
+			ret = handle_indirect_tree_backref(cache, path, &key, node_key,
+							   cur);
+			if (ret < 0)
+				goto out;
 		}
-
 		/*
-		 * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset
-		 * means the root objectid. We need to search the tree to get
-		 * its parent bytenr.
+		 * Unrecognized tree backref items (if it can pass tree-checker)
+		 * would be ignored.
 		 */
-		ret = handle_indirect_tree_backref(cache, path, &key, node_key,
-						   cur);
-		if (ret < 0)
-			goto out;
 	}
 	ret = 0;
 	cur->checked = 1;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 105f528382e7..f356f08b55cb 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -166,8 +166,10 @@ search_again:
 			num_refs = btrfs_extent_refs(leaf, ei);
 			extent_flags = btrfs_extent_flags(leaf, ei);
 		} else {
-			ret = -EINVAL;
-			btrfs_print_v0_err(fs_info);
+			ret = -EUCLEAN;
+			btrfs_err(fs_info,
+			"unexpected extent item size, has %u expect >= %zu",
+				  item_size, sizeof(*ei));
 			if (trans)
 				btrfs_abort_transaction(trans, ret);
 			else
@@ -603,12 +605,12 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
 		ref2 = btrfs_item_ptr(leaf, path->slots[0],
 				      struct btrfs_shared_data_ref);
 		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
-	} else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
-		btrfs_print_v0_err(trans->fs_info);
-		btrfs_abort_transaction(trans, -EINVAL);
-		return -EINVAL;
 	} else {
-		BUG();
+		btrfs_err(trans->fs_info,
+			  "unrecognized backref key (%llu %u %llu)",
+			  key.objectid, key.type, key.offset);
+		btrfs_abort_transaction(trans, -EUCLEAN);
+		return -EUCLEAN;
 	}
 
 	BUG_ON(num_refs < refs_to_drop);
@@ -639,7 +641,6 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
 	leaf = path->nodes[0];
 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 
-	BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
 	if (iref) {
 		/*
 		 * If type is invalid, we should have bailed out earlier than
@@ -860,8 +861,10 @@ again:
 	leaf = path->nodes[0];
 	item_size = btrfs_item_size(leaf, path->slots[0]);
 	if (unlikely(item_size < sizeof(*ei))) {
-		err = -EINVAL;
-		btrfs_print_v0_err(fs_info);
+		err = -EUCLEAN;
+		btrfs_err(fs_info,
+			  "unexpected extent item size, has %llu expect >= %zu",
+			  item_size, sizeof(*ei));
 		btrfs_abort_transaction(trans, err);
 		goto out;
 	}
@@ -1662,8 +1665,10 @@ again:
 	item_size = btrfs_item_size(leaf, path->slots[0]);
 
 	if (unlikely(item_size < sizeof(*ei))) {
-		err = -EINVAL;
-		btrfs_print_v0_err(fs_info);
+		err = -EUCLEAN;
+		btrfs_err(fs_info,
+			  "unexpected extent item size, has %u expect >= %zu",
+			  item_size, sizeof(*ei));
 		btrfs_abort_transaction(trans, err);
 		goto out;
 	}
@@ -3091,8 +3096,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 	leaf = path->nodes[0];
 	item_size = btrfs_item_size(leaf, extent_slot);
 	if (unlikely(item_size < sizeof(*ei))) {
-		ret = -EINVAL;
-		btrfs_print_v0_err(info);
+		ret = -EUCLEAN;
+		btrfs_err(trans->fs_info,
+			  "unexpected extent item size, has %u expect >= %zu",
+			  item_size, sizeof(*ei));
 		btrfs_abort_transaction(trans, ret);
 		goto out;
 	}
diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c
index e3c9d2706341..7695decc7243 100644
--- a/fs/btrfs/messages.c
+++ b/fs/btrfs/messages.c
@@ -256,12 +256,6 @@ void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt,
 }
 #endif
 
-void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info)
-{
-	btrfs_err(fs_info,
-"Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel");
-}
-
 #if BITS_PER_LONG == 32
 void __cold btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info)
 {
diff --git a/fs/btrfs/messages.h b/fs/btrfs/messages.h
index deedc1a168e2..1ae6f8e23e07 100644
--- a/fs/btrfs/messages.h
+++ b/fs/btrfs/messages.h
@@ -181,8 +181,6 @@ do {								\
 #define ASSERT(expr)	(void)(expr)
 #endif
 
-void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info);
-
 __printf(5, 6)
 __cold
 void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index aa06d9ca911d..0c93439e929f 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -95,8 +95,10 @@ static void print_extent_item(const struct extent_buffer *eb, int slot, int type
 	int ref_index = 0;
 
 	if (unlikely(item_size < sizeof(*ei))) {
-		btrfs_print_v0_err(eb->fs_info);
-		btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
+		btrfs_err(eb->fs_info,
+			  "unexpected extent item size, has %u expect >= %zu",
+			  item_size, sizeof(*ei));
+		btrfs_handle_fs_error(eb->fs_info, -EUCLEAN, NULL);
 	}
 
 	ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
@@ -291,10 +293,6 @@ void btrfs_print_leaf(const struct extent_buffer *l)
 			       btrfs_file_extent_num_bytes(l, fi),
 			       btrfs_file_extent_ram_bytes(l, fi));
 			break;
-		case BTRFS_EXTENT_REF_V0_KEY:
-			btrfs_print_v0_err(fs_info);
-			btrfs_handle_fs_error(fs_info, -EINVAL, NULL);
-			break;
 		case BTRFS_BLOCK_GROUP_ITEM_KEY:
 			bi = btrfs_item_ptr(l, i,
 					    struct btrfs_block_group_item);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 7408e48d45bc..9951a0caf5bb 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3256,12 +3256,13 @@ static int add_tree_block(struct reloc_control *rc,
 			if (type == BTRFS_TREE_BLOCK_REF_KEY)
 				owner = btrfs_extent_inline_ref_offset(eb, iref);
 		}
-	} else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
-		btrfs_print_v0_err(eb->fs_info);
-		btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
-		return -EINVAL;
 	} else {
-		BUG();
+		btrfs_print_leaf(eb);
+		btrfs_err(rc->block_group->fs_info,
+			  "unrecognized tree backref at tree block %llu slot %u",
+			  eb->start, path->slots[0]);
+		btrfs_release_path(path);
+		return -EUCLEAN;
 	}
 
 	btrfs_release_path(path);
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index a76a279c5f0f..b2db2c2f1c57 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -38,7 +38,6 @@ struct find_free_extent_ctl;
 	__print_symbolic(type,						\
 		{ BTRFS_TREE_BLOCK_REF_KEY, 	"TREE_BLOCK_REF" },	\
 		{ BTRFS_EXTENT_DATA_REF_KEY, 	"EXTENT_DATA_REF" },	\
-		{ BTRFS_EXTENT_REF_V0_KEY, 	"EXTENT_REF_V0" },	\
 		{ BTRFS_SHARED_BLOCK_REF_KEY, 	"SHARED_BLOCK_REF" },	\
 		{ BTRFS_SHARED_DATA_REF_KEY, 	"SHARED_DATA_REF" })
 
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index ab38d0f411fa..fc3c32186d7e 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -220,7 +220,11 @@
 
 #define BTRFS_EXTENT_DATA_REF_KEY	178
 
-#define BTRFS_EXTENT_REF_V0_KEY		180
+/*
+ * Obsolete key. Defintion removed in 6.6, value may be reused in the future.
+ *
+ * #define BTRFS_EXTENT_REF_V0_KEY	180
+ */
 
 #define BTRFS_SHARED_BLOCK_REF_KEY	182
 

From 4844c3664a72d36cc79752cb651c78860b14c240 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Mon, 31 Jul 2023 19:16:32 +0800
Subject: [PATCH 1542/2047] btrfs: add a helper to read the superblock
 metadata_uuid

In some cases, we need to read the FSID from the superblock when the
metadata_uuid is not set, and otherwise, read the metadata_uuid. So,
add a helper.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 8 ++++++++
 fs/btrfs/volumes.h | 1 +
 2 files changed, 9 insertions(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bfdfecee1afe..2669206e6f15 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -681,6 +681,14 @@ error_free_page:
 	return -EINVAL;
 }
 
+u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb)
+{
+	bool has_metadata_uuid = (btrfs_super_incompat_flags(sb) &
+				  BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
+
+	return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
+}
+
 /*
  * Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
  * being created with a disk that has already completed its fsid change. Such
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index a59898d51e9e..2128a032c3b7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -747,5 +747,6 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
 bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
 
 bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
+u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb);
 
 #endif

From 319baafcef2e075f515fce6a89e833f7aeae8484 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Mon, 31 Jul 2023 19:16:33 +0800
Subject: [PATCH 1543/2047] btrfs: simplify memcpy either of metadata_uuid or
 fsid

There is a helper which provides either metadata_uuid or fsid as per
METADATA_UUID flag. So use it.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2669206e6f15..733842136163 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -841,15 +841,8 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 		    found_transid > fs_devices->latest_generation) {
 			memcpy(fs_devices->fsid, disk_super->fsid,
 					BTRFS_FSID_SIZE);
-
-			if (has_metadata_uuid)
-				memcpy(fs_devices->metadata_uuid,
-				       disk_super->metadata_uuid,
-				       BTRFS_FSID_SIZE);
-			else
-				memcpy(fs_devices->metadata_uuid,
-				       disk_super->fsid, BTRFS_FSID_SIZE);
-
+			memcpy(fs_devices->metadata_uuid,
+			       btrfs_sb_fsid_ptr(disk_super), BTRFS_FSID_SIZE);
 			fs_devices->fsid_change = false;
 		}
 	}

From d167aa76dc0683828588c25767da07fb549e4f48 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Mon, 31 Jul 2023 19:16:34 +0800
Subject: [PATCH 1544/2047] btrfs: use the correct superblock to compare fsid
 in btrfs_validate_super

The function btrfs_validate_super() should verify the fsid in the provided
superblock argument. Because, all its callers expect it to do that.

Such as in the following stack:

   write_all_supers()
       sb = fs_info->super_for_commit;
       btrfs_validate_write_super(.., sb)
         btrfs_validate_super(.., sb, ..)

   scrub_one_super()
	btrfs_validate_super(.., sb, ..)

And
   check_dev_super()
	btrfs_validate_super(.., sb, ..)

However, it currently verifies the fs_info::super_copy::fsid instead,
which is not correct.  Fix this using the correct fsid in the superblock
argument.

CC: stable@vger.kernel.org # 5.4+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 48faf9c968d0..49f405495e34 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2384,11 +2384,10 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
 		ret = -EINVAL;
 	}
 
-	if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
-		   BTRFS_FSID_SIZE)) {
+	if (memcmp(fs_info->fs_devices->fsid, sb->fsid, BTRFS_FSID_SIZE) != 0) {
 		btrfs_err(fs_info,
 		"superblock fsid doesn't match fsid of fs_devices: %pU != %pU",
-			fs_info->super_copy->fsid, fs_info->fs_devices->fsid);
+			  sb->fsid, fs_info->fs_devices->fsid);
 		ret = -EINVAL;
 	}
 

From 6bfe3959b0e7a526f5c64747801a8613f002f05a Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Mon, 31 Jul 2023 19:16:35 +0800
Subject: [PATCH 1545/2047] btrfs: compare the correct fsid/metadata_uuid in
 btrfs_validate_super

The function btrfs_validate_super() should verify the metadata_uuid in
the provided superblock argument. Because, all its callers expect it to
do that.

Such as in the following stacks:

  write_all_supers()
   sb = fs_info->super_for_commit;
   btrfs_validate_write_super(.., sb)
     btrfs_validate_super(.., sb, ..)

  scrub_one_super()
	btrfs_validate_super(.., sb, ..)

And
   check_dev_super()
	btrfs_validate_super(.., sb, ..)

However, it currently verifies the fs_info::super_copy::metadata_uuid
instead.  Fix this using the correct metadata_uuid in the superblock
argument.

CC: stable@vger.kernel.org # 5.4+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 49f405495e34..32ec651c570f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2391,13 +2391,11 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
 		ret = -EINVAL;
 	}
 
-	if (btrfs_fs_incompat(fs_info, METADATA_UUID) &&
-	    memcmp(fs_info->fs_devices->metadata_uuid,
-		   fs_info->super_copy->metadata_uuid, BTRFS_FSID_SIZE)) {
+	if (memcmp(fs_info->fs_devices->metadata_uuid, btrfs_sb_fsid_ptr(sb),
+		   BTRFS_FSID_SIZE) != 0) {
 		btrfs_err(fs_info,
 "superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU",
-			fs_info->super_copy->metadata_uuid,
-			fs_info->fs_devices->metadata_uuid);
+			  btrfs_sb_fsid_ptr(sb), fs_info->fs_devices->metadata_uuid);
 		ret = -EINVAL;
 	}
 

From 67bc5ad04bea699371a2d52e89690e54963e1397 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Mon, 31 Jul 2023 19:16:36 +0800
Subject: [PATCH 1546/2047] btrfs: drop redundant check to use
 fs_devices::metadata_uuid

fs_devices::metadata_uuid value is already updated based on the
super_block::METADATA_UUID flag for either fsid or metadata_uuid as
appropriate. So, fs_devices::metadata_uuid can be used directly.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 32ec651c570f..0a96ea8c1d3a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -313,21 +313,16 @@ static bool check_tree_block_fsid(struct extent_buffer *eb)
 	struct btrfs_fs_info *fs_info = eb->fs_info;
 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
 	u8 fsid[BTRFS_FSID_SIZE];
-	u8 *metadata_uuid;
 
 	read_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
 			   BTRFS_FSID_SIZE);
-	/*
-	 * Checking the incompat flag is only valid for the current fs. For
-	 * seed devices it's forbidden to have their uuid changed so reading
-	 * ->fsid in this case is fine
-	 */
-	if (btrfs_fs_incompat(fs_info, METADATA_UUID))
-		metadata_uuid = fs_devices->metadata_uuid;
-	else
-		metadata_uuid = fs_devices->fsid;
 
-	if (!memcmp(fsid, metadata_uuid, BTRFS_FSID_SIZE))
+	/*
+	 * alloc_fs_devices() copies the fsid into metadata_uuid if the
+	 * metadata_uuid is unset in the superblock, including for a seed device.
+	 * So, we can use fs_devices->metadata_uuid.
+	 */
+	if (memcmp(fsid, fs_info->fs_devices->metadata_uuid, BTRFS_FSID_SIZE) == 0)
 		return false;
 
 	list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list)

From 6b604c9a0cf1af7ed5c7b4012cb2fcd9ee988833 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Sun, 13 Aug 2023 16:03:28 +0100
Subject: [PATCH 1547/2047] btrfs: remove pointless empty list check when
 reading delayed dir indexes

At btrfs_readdir_delayed_dir_index(), called when reading a directory, we
have this check for an empty list to return immediately, but it's not
needed since list_for_each_entry_safe(), called immediately after, is
prepared to deal with an empty list, it simply does nothing. So remove
the empty list check.

Besides shorter source code, it also slightly reduces the binary text
size:

  Before this change:

    $ size fs/btrfs/btrfs.ko
       text	   data	    bss	    dec	    hex	filename
    1609408	 167269	  16864	1793541	 1b5e05	fs/btrfs/btrfs.ko

  After this change:

    $ size fs/btrfs/btrfs.ko
       text	   data	    bss	    dec	    hex	filename
    1609392	 167269	  16864	1793525	 1b5df5	fs/btrfs/btrfs.ko

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-inode.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6d51db066503..85dcf0024137 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1736,9 +1736,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
 	int over = 0;
 	unsigned char d_type;
 
-	if (list_empty(ins_list))
-		return 0;
-
 	/*
 	 * Changing the data of the delayed item is impossible. So
 	 * we needn't lock them. And we have held i_mutex of the

From 94628ad9440800ca0bd2436203a27c7576f769b7 Mon Sep 17 00:00:00 2001
From: Lee Trager <lee@trager.us>
Date: Thu, 10 Aug 2023 18:44:35 -0700
Subject: [PATCH 1548/2047] btrfs: copy dir permission and time when creating a
 stub subvolume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

btrfs supports creating nested subvolumes however snapshots are not
recursive.  When a snapshot is taken of a volume which contains a
subvolume the subvolume is replaced with a stub subvolume which has the
same name and uses inode number 2[1]. The stub subvolume kept the
directory name but did not set the time or permissions of the stub
subvolume. This resulted in all time information being the current time
and ownership defaulting to root. When subvolumes and snapshots are
created using unshare this results in a snapshot directory the user
created but has no permissions for.

Test case:

  [vmuser@archvm ~]# sudo -i
  [root@archvm ~]# mkdir -p /mnt/btrfs/test
  [root@archvm ~]# chown vmuser:users /mnt/btrfs/test/
  [root@archvm ~]# exit
  logout
  [vmuser@archvm ~]$ cd /mnt/btrfs/test
  [vmuser@archvm test]$ unshare --user --keep-caps --map-auto --map-root-user
  [root@archvm test]# btrfs subvolume create subvolume
  Create subvolume './subvolume'
  [root@archvm test]# btrfs subvolume create subvolume/subsubvolume
  Create subvolume 'subvolume/subsubvolume'
  [root@archvm test]# btrfs subvolume snapshot subvolume snapshot
  Create a snapshot of 'subvolume' in './snapshot'
  [root@archvm test]# exit
  logout
  [vmuser@archvm test]$ tree -ug
  [vmuser   users   ]  .
  ├── [vmuser   users   ]  snapshot
  │   └── [vmuser   users   ]  subsubvolume  <-- Without patch perm is root:root
  └── [vmuser   users   ]  subvolume
      └── [vmuser   users   ]  subsubvolume

  5 directories, 0 files

[1] https://btrfs.readthedocs.io/en/latest/btrfs-subvolume.html#nested-subvolumes

Signed-off-by: Lee Trager <lee@trager.us>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ca0f4781b0e5..e211e88c6545 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5585,11 +5585,11 @@ struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root
 	return btrfs_iget_path(s, ino, root, NULL);
 }
 
-static struct inode *new_simple_dir(struct super_block *s,
+static struct inode *new_simple_dir(struct inode *dir,
 				    struct btrfs_key *key,
 				    struct btrfs_root *root)
 {
-	struct inode *inode = new_inode(s);
+	struct inode *inode = new_inode(dir->i_sb);
 
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
@@ -5608,9 +5608,11 @@ static struct inode *new_simple_dir(struct super_block *s,
 	inode->i_fop = &simple_dir_operations;
 	inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
 	inode->i_mtime = current_time(inode);
-	inode->i_atime = inode->i_mtime;
-	inode->i_ctime = inode->i_mtime;
+	inode->i_atime = dir->i_atime;
+	inode->i_ctime = dir->i_ctime;
 	BTRFS_I(inode)->i_otime = inode->i_mtime;
+	inode->i_uid = dir->i_uid;
+	inode->i_gid = dir->i_gid;
 
 	return inode;
 }
@@ -5669,7 +5671,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 		if (ret != -ENOENT)
 			inode = ERR_PTR(ret);
 		else
-			inode = new_simple_dir(dir->i_sb, &location, root);
+			inode = new_simple_dir(dir, &location, root);
 	} else {
 		inode = btrfs_iget(dir->i_sb, location.objectid, sub_root);
 		btrfs_put_root(sub_root);

From 1dc4888e725dc748b82858984f2a5bd41efc5201 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Thu, 3 Aug 2023 14:33:29 +0800
Subject: [PATCH 1549/2047] btrfs: scrub: avoid unnecessary extent tree search
 preparing stripes

Since commit e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror()
to scrub_stripe infrastructure"), scrub no longer re-use the same path
for extent tree search.

This can lead to unnecessary extent tree search, especially for the new
stripe based scrub, as we have way more stripes to prepare.

This patch would re-introduce a shared path for extent tree search, and
properly release it when the block group is scrubbed.

This change alone can improve scrub performance slightly by reducing the
time spend preparing the stripe thus improving the queue depth.

Before (with regression):

 Device         r/s      rkB/s   rrqm/s  %rrqm r_await rareq-sz aqu-sz  %util
 nvme0n1p3 15578.00  993616.00     5.00   0.03    0.09    63.78   1.32 100.00

After (with this patch):

 nvme0n1p3 15875.00 1013328.00    12.00   0.08    0.08    63.83   1.35 100.00

Fixes: e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure")
CC: stable@vger.kernel.org # 6.4+
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 8381174bda15..6aabce141d65 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -175,6 +175,7 @@ struct scrub_ctx {
 	struct scrub_stripe	stripes[SCRUB_STRIPES_PER_SCTX];
 	struct scrub_stripe	*raid56_data_stripes;
 	struct btrfs_fs_info	*fs_info;
+	struct btrfs_path	extent_path;
 	int			first_free;
 	int			cur_stripe;
 	atomic_t		cancel_req;
@@ -339,6 +340,8 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
 	refcount_set(&sctx->refs, 1);
 	sctx->is_dev_replace = is_dev_replace;
 	sctx->fs_info = fs_info;
+	sctx->extent_path.search_commit_root = 1;
+	sctx->extent_path.skip_locking = 1;
 	for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
 		int ret;
 
@@ -1467,6 +1470,7 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
  * Return <0 for error.
  */
 static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
+					struct btrfs_path *extent_path,
 					struct btrfs_device *dev, u64 physical,
 					int mirror_num, u64 logical_start,
 					u32 logical_len,
@@ -1476,7 +1480,6 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
 	struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bg->start);
 	struct btrfs_root *csum_root = btrfs_csum_root(fs_info, bg->start);
 	const u64 logical_end = logical_start + logical_len;
-	struct btrfs_path path = { 0 };
 	u64 cur_logical = logical_start;
 	u64 stripe_end;
 	u64 extent_start;
@@ -1492,14 +1495,13 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
 	/* The range must be inside the bg. */
 	ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length);
 
-	path.search_commit_root = 1;
-	path.skip_locking = 1;
-
-	ret = find_first_extent_item(extent_root, &path, logical_start, logical_len);
+	ret = find_first_extent_item(extent_root, extent_path, logical_start,
+				     logical_len);
 	/* Either error or not found. */
 	if (ret)
 		goto out;
-	get_extent_info(&path, &extent_start, &extent_len, &extent_flags, &extent_gen);
+	get_extent_info(extent_path, &extent_start, &extent_len, &extent_flags,
+			&extent_gen);
 	if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
 		stripe->nr_meta_extents++;
 	if (extent_flags & BTRFS_EXTENT_FLAG_DATA)
@@ -1527,7 +1529,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
 
 	/* Fill the extent info for the remaining sectors. */
 	while (cur_logical <= stripe_end) {
-		ret = find_first_extent_item(extent_root, &path, cur_logical,
+		ret = find_first_extent_item(extent_root, extent_path, cur_logical,
 					     stripe_end - cur_logical + 1);
 		if (ret < 0)
 			goto out;
@@ -1535,7 +1537,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
 			ret = 0;
 			break;
 		}
-		get_extent_info(&path, &extent_start, &extent_len,
+		get_extent_info(extent_path, &extent_start, &extent_len,
 				&extent_flags, &extent_gen);
 		if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
 			stripe->nr_meta_extents++;
@@ -1575,7 +1577,6 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
 	}
 	set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state);
 out:
-	btrfs_release_path(&path);
 	return ret;
 }
 
@@ -1765,8 +1766,9 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *
 
 	/* We can queue one stripe using the remaining slot. */
 	scrub_reset_stripe(stripe);
-	ret = scrub_find_fill_first_stripe(bg, dev, physical, mirror_num,
-					   logical, length, stripe);
+	ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path, dev,
+					   physical, mirror_num, logical,
+					   length, stripe);
 	/* Either >0 as no more extents or <0 for error. */
 	if (ret)
 		return ret;
@@ -1784,6 +1786,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
 	struct btrfs_raid_bio *rbio;
 	struct btrfs_io_context *bioc = NULL;
+	struct btrfs_path extent_path = { 0 };
 	struct bio *bio;
 	struct scrub_stripe *stripe;
 	bool all_empty = true;
@@ -1794,6 +1797,14 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
 
 	ASSERT(sctx->raid56_data_stripes);
 
+	/*
+	 * For data stripe search, we cannot re-use the same extent path, as
+	 * the data stripe bytenr may be smaller than previous extent.  Thus we
+	 * have to use our own extent path.
+	 */
+	extent_path.search_commit_root = 1;
+	extent_path.skip_locking = 1;
+
 	for (int i = 0; i < data_stripes; i++) {
 		int stripe_index;
 		int rot;
@@ -1808,7 +1819,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
 
 		scrub_reset_stripe(stripe);
 		set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state);
-		ret = scrub_find_fill_first_stripe(bg,
+		ret = scrub_find_fill_first_stripe(bg, &extent_path,
 				map->stripes[stripe_index].dev, physical, 1,
 				full_stripe_start + btrfs_stripe_nr_to_offset(i),
 				BTRFS_STRIPE_LEN, stripe);
@@ -1936,6 +1947,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
 	bio_put(bio);
 	btrfs_bio_counter_dec(fs_info);
 
+	btrfs_release_path(&extent_path);
 out:
 	return ret;
 }
@@ -2103,6 +2115,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 	u64 stripe_logical;
 	int stop_loop = 0;
 
+	/* Extent_path should be released by now. */
+	ASSERT(sctx->extent_path.nodes[0] == NULL);
+
 	scrub_blocked_if_needed(fs_info);
 
 	if (sctx->is_dev_replace &&
@@ -2221,6 +2236,8 @@ out:
 	ret2 = flush_scrub_stripes(sctx);
 	if (!ret)
 		ret = ret2;
+	btrfs_release_path(&sctx->extent_path);
+
 	if (sctx->raid56_data_stripes) {
 		for (int i = 0; i < nr_data_stripes(map); i++)
 			release_scrub_stripe(&sctx->raid56_data_stripes[i]);

From 3c771c194402ffe20d4de68d9fc21e703179a9ce Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Thu, 3 Aug 2023 14:33:30 +0800
Subject: [PATCH 1550/2047] btrfs: scrub: avoid unnecessary csum tree search
 preparing stripes

One of the bottleneck of the new scrub code is the extra csum tree
search.

The old code would only do the csum tree search for each scrub bio,
which can be as large as 512KiB, thus they can afford to allocate a new
path each time.

But the new scrub code is doing csum tree search for each stripe, which
is only 64KiB, this means we'd better re-use the same csum path during
each search.

This patch would introduce a per-sctx path for csum tree search, as we
don't need to re-allocate the path every time we need to do a csum tree
search.

With this change we can further improve the queue depth and improve the
scrub read performance:

Before (with regression and cached extent tree path):

 Device         r/s      rkB/s   rrqm/s  %rrqm r_await rareq-sz aqu-sz  %util
 nvme0n1p3 15875.00 1013328.00    12.00   0.08    0.08    63.83   1.35 100.00

After (with both cached extent/csum tree path):

 nvme0n1p3 17759.00 1133280.00    10.00   0.06    0.08    63.81   1.50 100.00

Fixes: e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure")
CC: stable@vger.kernel.org # 6.4+
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file-item.c | 34 ++++++++++++++++++++++------------
 fs/btrfs/file-item.h |  6 +++---
 fs/btrfs/raid56.c    |  4 ++--
 fs/btrfs/scrub.c     | 29 +++++++++++++++++++----------
 4 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 696bf695d8eb..1ce5dd154499 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -597,29 +597,37 @@ fail:
  * Each bit represents a sector. Thus caller should ensure @csum_buf passed
  * in is large enough to contain all csums.
  */
-int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
-			      u8 *csum_buf, unsigned long *csum_bitmap,
-			      bool search_commit)
+int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
+			      u64 start, u64 end, u8 *csum_buf,
+			      unsigned long *csum_bitmap)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_key key;
-	struct btrfs_path *path;
 	struct extent_buffer *leaf;
 	struct btrfs_csum_item *item;
 	const u64 orig_start = start;
+	bool free_path = false;
 	int ret;
 
 	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
 	       IS_ALIGNED(end + 1, fs_info->sectorsize));
 
-	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	if (!path) {
+		path = btrfs_alloc_path();
+		if (!path)
+			return -ENOMEM;
+		free_path = true;
+	}
 
-	if (search_commit) {
-		path->skip_locking = 1;
-		path->reada = READA_FORWARD;
-		path->search_commit_root = 1;
+	/* Check if we can reuse the previous path. */
+	if (path->nodes[0]) {
+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+		if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
+		    key.type == BTRFS_EXTENT_CSUM_KEY &&
+		    key.offset <= start)
+			goto search_forward;
+		btrfs_release_path(path);
 	}
 
 	key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -656,6 +664,7 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
 		}
 	}
 
+search_forward:
 	while (start <= end) {
 		u64 csum_end;
 
@@ -712,7 +721,8 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
 	}
 	ret = 0;
 fail:
-	btrfs_free_path(path);
+	if (free_path)
+		btrfs_free_path(path);
 	return ret;
 }
 
diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h
index 4ec669b69008..04bd2d34efb1 100644
--- a/fs/btrfs/file-item.h
+++ b/fs/btrfs/file-item.h
@@ -57,9 +57,9 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
 			    struct list_head *list, int search_commit,
 			    bool nowait);
-int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
-			      u8 *csum_buf, unsigned long *csum_bitmap,
-			      bool search_commit);
+int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
+			      u64 start, u64 end, u8 *csum_buf,
+			      unsigned long *csum_bitmap);
 void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 				     const struct btrfs_path *path,
 				     struct btrfs_file_extent_item *fi,
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index f97fc62fbab2..3e014b9370a3 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -2105,8 +2105,8 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio)
 		goto error;
 	}
 
-	ret = btrfs_lookup_csums_bitmap(csum_root, start, start + len - 1,
-					rbio->csum_buf, rbio->csum_bitmap, false);
+	ret = btrfs_lookup_csums_bitmap(csum_root, NULL, start, start + len - 1,
+					rbio->csum_buf, rbio->csum_bitmap);
 	if (ret < 0)
 		goto error;
 	if (bitmap_empty(rbio->csum_bitmap, len >> fs_info->sectorsize_bits))
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6aabce141d65..d04fd3c6e927 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -176,6 +176,7 @@ struct scrub_ctx {
 	struct scrub_stripe	*raid56_data_stripes;
 	struct btrfs_fs_info	*fs_info;
 	struct btrfs_path	extent_path;
+	struct btrfs_path	csum_path;
 	int			first_free;
 	int			cur_stripe;
 	atomic_t		cancel_req;
@@ -342,6 +343,8 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
 	sctx->fs_info = fs_info;
 	sctx->extent_path.search_commit_root = 1;
 	sctx->extent_path.skip_locking = 1;
+	sctx->csum_path.search_commit_root = 1;
+	sctx->csum_path.skip_locking = 1;
 	for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
 		int ret;
 
@@ -1471,6 +1474,7 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
  */
 static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
 					struct btrfs_path *extent_path,
+					struct btrfs_path *csum_path,
 					struct btrfs_device *dev, u64 physical,
 					int mirror_num, u64 logical_start,
 					u32 logical_len,
@@ -1562,9 +1566,9 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
 		 */
 		ASSERT(BITS_PER_LONG >= BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
 
-		ret = btrfs_lookup_csums_bitmap(csum_root, stripe->logical,
-						stripe_end, stripe->csums,
-						&csum_bitmap, true);
+		ret = btrfs_lookup_csums_bitmap(csum_root, csum_path,
+						stripe->logical, stripe_end,
+						stripe->csums, &csum_bitmap);
 		if (ret < 0)
 			goto out;
 		if (ret > 0)
@@ -1766,9 +1770,9 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *
 
 	/* We can queue one stripe using the remaining slot. */
 	scrub_reset_stripe(stripe);
-	ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path, dev,
-					   physical, mirror_num, logical,
-					   length, stripe);
+	ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path,
+					   &sctx->csum_path, dev, physical,
+					   mirror_num, logical, length, stripe);
 	/* Either >0 as no more extents or <0 for error. */
 	if (ret)
 		return ret;
@@ -1787,6 +1791,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
 	struct btrfs_raid_bio *rbio;
 	struct btrfs_io_context *bioc = NULL;
 	struct btrfs_path extent_path = { 0 };
+	struct btrfs_path csum_path = { 0 };
 	struct bio *bio;
 	struct scrub_stripe *stripe;
 	bool all_empty = true;
@@ -1798,12 +1803,14 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
 	ASSERT(sctx->raid56_data_stripes);
 
 	/*
-	 * For data stripe search, we cannot re-use the same extent path, as
-	 * the data stripe bytenr may be smaller than previous extent.  Thus we
-	 * have to use our own extent path.
+	 * For data stripe search, we cannot re-use the same extent/csum paths,
+	 * as the data stripe bytenr may be smaller than previous extent.  Thus
+	 * we have to use our own extent/csum paths.
 	 */
 	extent_path.search_commit_root = 1;
 	extent_path.skip_locking = 1;
+	csum_path.search_commit_root = 1;
+	csum_path.skip_locking = 1;
 
 	for (int i = 0; i < data_stripes; i++) {
 		int stripe_index;
@@ -1819,7 +1826,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
 
 		scrub_reset_stripe(stripe);
 		set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state);
-		ret = scrub_find_fill_first_stripe(bg, &extent_path,
+		ret = scrub_find_fill_first_stripe(bg, &extent_path, &csum_path,
 				map->stripes[stripe_index].dev, physical, 1,
 				full_stripe_start + btrfs_stripe_nr_to_offset(i),
 				BTRFS_STRIPE_LEN, stripe);
@@ -1948,6 +1955,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
 	btrfs_bio_counter_dec(fs_info);
 
 	btrfs_release_path(&extent_path);
+	btrfs_release_path(&csum_path);
 out:
 	return ret;
 }
@@ -2237,6 +2245,7 @@ out:
 	if (!ret)
 		ret = ret2;
 	btrfs_release_path(&sctx->extent_path);
+	btrfs_release_path(&sctx->csum_path);
 
 	if (sctx->raid56_data_stripes) {
 		for (int i = 0; i < nr_data_stripes(map); i++)

From ae76d8e3e1351aa1ba09cc68dab6866d356f2e17 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Thu, 3 Aug 2023 14:33:31 +0800
Subject: [PATCH 1551/2047] btrfs: scrub: fix grouping of read IO

[REGRESSION]
There are several regression reports about the scrub performance with
v6.4 kernel.

On a PCIe 3.0 device, the old v6.3 kernel can go 3GB/s scrub speed, but
v6.4 can only go 1GB/s, an obvious 66% performance drop.

[CAUSE]
Iostat shows a very different behavior between v6.3 and v6.4 kernel:

  Device         r/s      rkB/s   rrqm/s  %rrqm r_await rareq-sz aqu-sz  %util
  nvme0n1p3  9731.00 3425544.00 17237.00  63.92    2.18   352.02  21.18 100.00
  nvme0n1p3 15578.00  993616.00     5.00   0.03    0.09    63.78   1.32 100.00

The upper one is v6.3 while the lower one is v6.4.

There are several obvious differences:

- Very few read merges
  This turns out to be a behavior change that we no longer do bio
  plug/unplug.

- Very low aqu-sz
  This is due to the submit-and-wait behavior of flush_scrub_stripes(),
  and extra extent/csum tree search.

Both behaviors are not that obvious on SATA SSDs, as SATA SSDs have NCQ
to merge the reads, while SATA SSDs can not handle high queue depth well
either.

[FIX]
For now this patch focuses on the read speed fix. Dev-replace replace
speed needs more work.

For the read part, we go two directions to fix the problems:

- Re-introduce blk plug/unplug to merge read requests
  This is pretty simple, and the behavior is pretty easy to observe.

  This would enlarge the average read request size to 512K.

- Introduce multi-group reads and no longer wait for each group
  Instead of the old behavior, which submits 8 stripes and waits for
  them, here we would enlarge the total number of stripes to 16 * 8.
  Which is 8M per device, the same limit as the old scrub in-flight
  bios size limit.

  Now every time we fill a group (8 stripes), we submit them and
  continue to next stripes.

  Only when the full 16 * 8 stripes are all filled, we submit the
  remaining ones (the last group), and wait for all groups to finish.
  Then submit the repair writes and dev-replace writes.

  This should enlarge the queue depth.

This would greatly improve the merge rate (thus read block size) and
queue depth:

Before (with regression, and cached extent/csum path):

 Device         r/s      rkB/s   rrqm/s  %rrqm r_await rareq-sz aqu-sz  %util
 nvme0n1p3 20666.00 1318240.00    10.00   0.05    0.08    63.79   1.63 100.00

After (with all patches applied):

 nvme0n1p3  5165.00 2278304.00 30557.00  85.54    0.55   441.10   2.81 100.00

i.e. 1287 to 2224 MB/s.

CC: stable@vger.kernel.org # 6.4+
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 96 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 71 insertions(+), 25 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index d04fd3c6e927..11f943e86894 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -43,9 +43,20 @@ struct scrub_ctx;
 /*
  * The following value only influences the performance.
  *
- * This determines the batch size for stripe submitted in one go.
+ * This detemines how many stripes would be submitted in one go,
+ * which is 512KiB (BTRFS_STRIPE_LEN * SCRUB_STRIPES_PER_GROUP).
  */
-#define SCRUB_STRIPES_PER_SCTX	8	/* That would be 8 64K stripe per-device. */
+#define SCRUB_STRIPES_PER_GROUP		8
+
+/*
+ * How many groups we have for each sctx.
+ *
+ * This would be 8M per device, the same value as the old scrub in-flight bios
+ * size limit.
+ */
+#define SCRUB_GROUPS_PER_SCTX		16
+
+#define SCRUB_TOTAL_STRIPES		(SCRUB_GROUPS_PER_SCTX * SCRUB_STRIPES_PER_GROUP)
 
 /*
  * The following value times PAGE_SIZE needs to be large enough to match the
@@ -172,7 +183,7 @@ struct scrub_stripe {
 };
 
 struct scrub_ctx {
-	struct scrub_stripe	stripes[SCRUB_STRIPES_PER_SCTX];
+	struct scrub_stripe	stripes[SCRUB_TOTAL_STRIPES];
 	struct scrub_stripe	*raid56_data_stripes;
 	struct btrfs_fs_info	*fs_info;
 	struct btrfs_path	extent_path;
@@ -317,10 +328,10 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
 	if (!sctx)
 		return;
 
-	for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++)
+	for (i = 0; i < SCRUB_TOTAL_STRIPES; i++)
 		release_scrub_stripe(&sctx->stripes[i]);
 
-	kfree(sctx);
+	kvfree(sctx);
 }
 
 static void scrub_put_ctx(struct scrub_ctx *sctx)
@@ -335,7 +346,10 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
 	struct scrub_ctx *sctx;
 	int		i;
 
-	sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
+	/* Since sctx has inline 128 stripes, it can go beyond 64K easily.  Use
+	 * kvzalloc().
+	 */
+	sctx = kvzalloc(sizeof(*sctx), GFP_KERNEL);
 	if (!sctx)
 		goto nomem;
 	refcount_set(&sctx->refs, 1);
@@ -345,7 +359,7 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
 	sctx->extent_path.skip_locking = 1;
 	sctx->csum_path.search_commit_root = 1;
 	sctx->csum_path.skip_locking = 1;
-	for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
+	for (i = 0; i < SCRUB_TOTAL_STRIPES; i++) {
 		int ret;
 
 		ret = init_scrub_stripe(fs_info, &sctx->stripes[i]);
@@ -1658,6 +1672,28 @@ static bool stripe_has_metadata_error(struct scrub_stripe *stripe)
 	return false;
 }
 
+static void submit_initial_group_read(struct scrub_ctx *sctx,
+				      unsigned int first_slot,
+				      unsigned int nr_stripes)
+{
+	struct blk_plug plug;
+
+	ASSERT(first_slot < SCRUB_TOTAL_STRIPES);
+	ASSERT(first_slot + nr_stripes <= SCRUB_TOTAL_STRIPES);
+
+	scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
+			      btrfs_stripe_nr_to_offset(nr_stripes));
+	blk_start_plug(&plug);
+	for (int i = 0; i < nr_stripes; i++) {
+		struct scrub_stripe *stripe = &sctx->stripes[first_slot + i];
+
+		/* Those stripes should be initialized. */
+		ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state));
+		scrub_submit_initial_read(sctx, stripe);
+	}
+	blk_finish_plug(&plug);
+}
+
 static int flush_scrub_stripes(struct scrub_ctx *sctx)
 {
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
@@ -1670,11 +1706,11 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
 
 	ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &sctx->stripes[0].state));
 
-	scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
-			      btrfs_stripe_nr_to_offset(nr_stripes));
-	for (int i = 0; i < nr_stripes; i++) {
-		stripe = &sctx->stripes[i];
-		scrub_submit_initial_read(sctx, stripe);
+	/* Submit the stripes which are populated but not submitted. */
+	if (nr_stripes % SCRUB_STRIPES_PER_GROUP) {
+		const int first_slot = round_down(nr_stripes, SCRUB_STRIPES_PER_GROUP);
+
+		submit_initial_group_read(sctx, first_slot, nr_stripes - first_slot);
 	}
 
 	for (int i = 0; i < nr_stripes; i++) {
@@ -1754,21 +1790,19 @@ static void raid56_scrub_wait_endio(struct bio *bio)
 
 static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *bg,
 			      struct btrfs_device *dev, int mirror_num,
-			      u64 logical, u32 length, u64 physical)
+			      u64 logical, u32 length, u64 physical,
+			      u64 *found_logical_ret)
 {
 	struct scrub_stripe *stripe;
 	int ret;
 
-	/* No available slot, submit all stripes and wait for them. */
-	if (sctx->cur_stripe >= SCRUB_STRIPES_PER_SCTX) {
-		ret = flush_scrub_stripes(sctx);
-		if (ret < 0)
-			return ret;
-	}
+	/*
+	 * There should always be one slot left, as caller filling the last
+	 * slot should flush them all.
+	 */
+	ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES);
 
 	stripe = &sctx->stripes[sctx->cur_stripe];
-
-	/* We can queue one stripe using the remaining slot. */
 	scrub_reset_stripe(stripe);
 	ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path,
 					   &sctx->csum_path, dev, physical,
@@ -1776,7 +1810,20 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *
 	/* Either >0 as no more extents or <0 for error. */
 	if (ret)
 		return ret;
+	if (found_logical_ret)
+		*found_logical_ret = stripe->logical;
 	sctx->cur_stripe++;
+
+	/* We filled one group, submit it. */
+	if (sctx->cur_stripe % SCRUB_STRIPES_PER_GROUP == 0) {
+		const int first_slot = sctx->cur_stripe - SCRUB_STRIPES_PER_GROUP;
+
+		submit_initial_group_read(sctx, first_slot, SCRUB_STRIPES_PER_GROUP);
+	}
+
+	/* Last slot used, flush them all. */
+	if (sctx->cur_stripe == SCRUB_TOTAL_STRIPES)
+		return flush_scrub_stripes(sctx);
 	return 0;
 }
 
@@ -1985,6 +2032,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
 
 	/* Go through each extent items inside the logical range */
 	while (cur_logical < logical_end) {
+		u64 found_logical;
 		u64 cur_physical = physical + cur_logical - logical_start;
 
 		/* Canceled? */
@@ -2009,7 +2057,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
 
 		ret = queue_scrub_stripe(sctx, bg, device, mirror_num,
 					 cur_logical, logical_end - cur_logical,
-					 cur_physical);
+					 cur_physical, &found_logical);
 		if (ret > 0) {
 			/* No more extent, just update the accounting */
 			sctx->stat.last_physical = physical + logical_length;
@@ -2019,9 +2067,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
 		if (ret < 0)
 			break;
 
-		ASSERT(sctx->cur_stripe > 0);
-		cur_logical = sctx->stripes[sctx->cur_stripe - 1].logical
-			      + BTRFS_STRIPE_LEN;
+		cur_logical = found_logical + BTRFS_STRIPE_LEN;
 
 		/* Don't hold CPU for too long time */
 		cond_resched();

From 39dc7bd94d084dcdcd4c88be281d7d8c9b3b0d61 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Thu, 3 Aug 2023 14:33:32 +0800
Subject: [PATCH 1552/2047] btrfs: scrub: don't go ordered workqueue for
 dev-replace

The workqueue fs_info->scrub_worker would go ordered workqueue if it's a
device replace operation.

However the scrub is relying on multiple workers to do data csum
verification, and we always submit several read requests in a row.

Thus there is no need to use ordered workqueue just for dev-replace.
We have extra synchronization (the main thread will always
submit-and-wait for dev-replace writes) to handle it for zoned devices.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 11f943e86894..ff2d8b89057a 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2777,8 +2777,7 @@ static void scrub_workers_put(struct btrfs_fs_info *fs_info)
 /*
  * get a reference count on fs_info->scrub_workers. start worker if necessary
  */
-static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
-						int is_dev_replace)
+static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info)
 {
 	struct workqueue_struct *scrub_workers = NULL;
 	unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
@@ -2788,10 +2787,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
 	if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
 		return 0;
 
-	if (is_dev_replace)
-		scrub_workers = alloc_ordered_workqueue("btrfs-scrub", flags);
-	else
-		scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active);
+	scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active);
 	if (!scrub_workers)
 		return -ENOMEM;
 
@@ -2843,7 +2839,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
 	if (IS_ERR(sctx))
 		return PTR_ERR(sctx);
 
-	ret = scrub_workers_get(fs_info, is_dev_replace);
+	ret = scrub_workers_get(fs_info);
 	if (ret)
 		goto out_free_ctx;
 

From 4fe44f9d0472579f69b44001787f2316e4541217 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Thu, 3 Aug 2023 14:33:33 +0800
Subject: [PATCH 1553/2047] btrfs: scrub: move write back of repaired sectors
 to scrub_stripe_read_repair_worker()

Currently the scrub_stripe_read_repair_worker() only does reads to
rebuild the corrupted sectors, it doesn't do any writeback.

The design is mostly to put writeback into a more ordered manner, to
co-operate with dev-replace with zoned mode, which requires every write
to be submitted in their bytenr order.

However the writeback for repaired sectors into the original mirror
doesn't need such strong sync requirement, as it can only happen for
non-zoned devices.

This patch would move the writeback for repaired sectors into
scrub_stripe_read_repair_worker(), which removes two calls sites for
repaired sectors writeback. (one from flush_scrub_stripes(), one from
scrub_raid56_parity_stripe())

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 72 +++++++++++++++++-------------------------------
 1 file changed, 25 insertions(+), 47 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ff2d8b89057a..b877203f1dc5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -990,6 +990,9 @@ skip:
 	spin_unlock(&sctx->stat_lock);
 }
 
+static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *stripe,
+				unsigned long write_bitmap, bool dev_replace);
+
 /*
  * The main entrance for all read related scrub work, including:
  *
@@ -998,13 +1001,16 @@ skip:
  * - Go through the remaining mirrors and try to read as large blocksize as
  *   possible
  * - Go through all mirrors (including the failed mirror) sector-by-sector
+ * - Submit writeback for repaired sectors
  *
- * Writeback does not happen here, it needs extra synchronization.
+ * Writeback for dev-replace does not happen here, it needs extra
+ * synchronization for zoned devices.
  */
 static void scrub_stripe_read_repair_worker(struct work_struct *work)
 {
 	struct scrub_stripe *stripe = container_of(work, struct scrub_stripe, work);
-	struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+	struct scrub_ctx *sctx = stripe->sctx;
+	struct btrfs_fs_info *fs_info = sctx->fs_info;
 	int num_copies = btrfs_num_copies(fs_info, stripe->bg->start,
 					  stripe->bg->length);
 	int mirror;
@@ -1069,7 +1075,23 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
 			goto out;
 	}
 out:
-	scrub_stripe_report_errors(stripe->sctx, stripe);
+	/*
+	 * Submit the repaired sectors.  For zoned case, we cannot do repair
+	 * in-place, but queue the bg to be relocated.
+	 */
+	if (btrfs_is_zoned(fs_info)) {
+		if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
+			btrfs_repair_one_zone(fs_info, sctx->stripes[0].bg->start);
+	} else if (!sctx->readonly) {
+		unsigned long repaired;
+
+		bitmap_andnot(&repaired, &stripe->init_error_bitmap,
+			      &stripe->error_bitmap, stripe->nr_sectors);
+		scrub_write_sectors(sctx, stripe, repaired, false);
+		wait_scrub_stripe_io(stripe);
+	}
+
+	scrub_stripe_report_errors(sctx, stripe);
 	set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state);
 	wake_up(&stripe->repair_wait);
 }
@@ -1720,32 +1742,6 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
 			   test_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state));
 	}
 
-	/*
-	 * Submit the repaired sectors.  For zoned case, we cannot do repair
-	 * in-place, but queue the bg to be relocated.
-	 */
-	if (btrfs_is_zoned(fs_info)) {
-		for (int i = 0; i < nr_stripes; i++) {
-			stripe = &sctx->stripes[i];
-
-			if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) {
-				btrfs_repair_one_zone(fs_info,
-						      sctx->stripes[0].bg->start);
-				break;
-			}
-		}
-	} else if (!sctx->readonly) {
-		for (int i = 0; i < nr_stripes; i++) {
-			unsigned long repaired;
-
-			stripe = &sctx->stripes[i];
-
-			bitmap_andnot(&repaired, &stripe->init_error_bitmap,
-				      &stripe->error_bitmap, stripe->nr_sectors);
-			scrub_write_sectors(sctx, stripe, repaired, false);
-		}
-	}
-
 	/* Submit for dev-replace. */
 	if (sctx->is_dev_replace) {
 		/*
@@ -1918,24 +1914,6 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
 	/* For now, no zoned support for RAID56. */
 	ASSERT(!btrfs_is_zoned(sctx->fs_info));
 
-	/* Writeback for the repaired sectors. */
-	for (int i = 0; i < data_stripes; i++) {
-		unsigned long repaired;
-
-		stripe = &sctx->raid56_data_stripes[i];
-
-		bitmap_andnot(&repaired, &stripe->init_error_bitmap,
-			      &stripe->error_bitmap, stripe->nr_sectors);
-		scrub_write_sectors(sctx, stripe, repaired, false);
-	}
-
-	/* Wait for the above writebacks to finish. */
-	for (int i = 0; i < data_stripes; i++) {
-		stripe = &sctx->raid56_data_stripes[i];
-
-		wait_scrub_stripe_io(stripe);
-	}
-
 	/*
 	 * Now all data stripes are properly verified. Check if we have any
 	 * unrepaired, if so abort immediately or we could further corrupt the

From 89c3760428db0342edf2747240419c9b34db3485 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 17 Aug 2023 16:57:31 -0400
Subject: [PATCH 1554/2047] btrfs: tests: add extent_map tests for dropping
 with odd layouts

While investigating weird problems with the extent_map I wrote a self
test testing the various edge cases of btrfs_drop_extent_map_range.
This can split in different ways and behaves different in each case, so
test the various edge cases to make sure everything is functioning
properly.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tests/extent-map-tests.c | 218 ++++++++++++++++++++++++++++++
 1 file changed, 218 insertions(+)

diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index ed0f36ae5346..cf85c7795686 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include "btrfs-tests.h"
 #include "../ctree.h"
+#include "../btrfs_inode.h"
 #include "../volumes.h"
 #include "../disk-io.h"
 #include "../block-group.h"
@@ -442,6 +443,218 @@ static int test_case_4(struct btrfs_fs_info *fs_info,
 	return ret;
 }
 
+static int add_compressed_extent(struct extent_map_tree *em_tree,
+				 u64 start, u64 len, u64 block_start)
+{
+	struct extent_map *em;
+	int ret;
+
+	em = alloc_extent_map();
+	if (!em) {
+		test_std_err(TEST_ALLOC_EXTENT_MAP);
+		return -ENOMEM;
+	}
+
+	em->start = start;
+	em->len = len;
+	em->block_start = block_start;
+	em->block_len = SZ_4K;
+	set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+	write_lock(&em_tree->lock);
+	ret = add_extent_mapping(em_tree, em, 0);
+	write_unlock(&em_tree->lock);
+	free_extent_map(em);
+	if (ret < 0) {
+		test_err("cannot add extent map [%llu, %llu)", start, start + len);
+		return ret;
+	}
+
+	return 0;
+}
+
+struct extent_range {
+	u64 start;
+	u64 len;
+};
+
+/* The valid states of the tree after every drop, as described below. */
+struct extent_range valid_ranges[][7] = {
+	{
+	  { .start = 0,			.len = SZ_8K },		/* [0, 8K) */
+	  { .start = SZ_4K * 3,		.len = SZ_4K * 3},	/* [12k, 24k) */
+	  { .start = SZ_4K * 6,		.len = SZ_4K * 3},	/* [24k, 36k) */
+	  { .start = SZ_32K + SZ_4K,	.len = SZ_4K},		/* [36k, 40k) */
+	  { .start = SZ_4K * 10,	.len = SZ_4K * 6},	/* [40k, 64k) */
+	},
+	{
+	  { .start = 0,			.len = SZ_8K },		/* [0, 8K) */
+	  { .start = SZ_4K * 5,		.len = SZ_4K},		/* [20k, 24k) */
+	  { .start = SZ_4K * 6,		.len = SZ_4K * 3},	/* [24k, 36k) */
+	  { .start = SZ_32K + SZ_4K,	.len = SZ_4K},		/* [36k, 40k) */
+	  { .start = SZ_4K * 10,	.len = SZ_4K * 6},	/* [40k, 64k) */
+	},
+	{
+	  { .start = 0,			.len = SZ_8K },		/* [0, 8K) */
+	  { .start = SZ_4K * 5,		.len = SZ_4K},		/* [20k, 24k) */
+	  { .start = SZ_4K * 6,		.len = SZ_4K},		/* [24k, 28k) */
+	  { .start = SZ_32K,		.len = SZ_4K},		/* [32k, 36k) */
+	  { .start = SZ_32K + SZ_4K,	.len = SZ_4K},		/* [36k, 40k) */
+	  { .start = SZ_4K * 10,	.len = SZ_4K * 6},	/* [40k, 64k) */
+	},
+	{
+	  { .start = 0,			.len = SZ_8K},		/* [0, 8K) */
+	  { .start = SZ_4K * 5,		.len = SZ_4K},		/* [20k, 24k) */
+	  { .start = SZ_4K * 6,		.len = SZ_4K},		/* [24k, 28k) */
+	}
+};
+
+static int validate_range(struct extent_map_tree *em_tree, int index)
+{
+	struct rb_node *n;
+	int i;
+
+	for (i = 0, n = rb_first_cached(&em_tree->map);
+	     valid_ranges[index][i].len && n;
+	     i++, n = rb_next(n)) {
+		struct extent_map *entry = rb_entry(n, struct extent_map, rb_node);
+
+		if (entry->start != valid_ranges[index][i].start) {
+			test_err("mapping has start %llu expected %llu",
+				 entry->start, valid_ranges[index][i].start);
+			return -EINVAL;
+		}
+
+		if (entry->len != valid_ranges[index][i].len) {
+			test_err("mapping has len %llu expected %llu",
+				 entry->len, valid_ranges[index][i].len);
+			return -EINVAL;
+		}
+	}
+
+	/*
+	 * We exited because we don't have any more entries in the extent_map
+	 * but we still expect more valid entries.
+	 */
+	if (valid_ranges[index][i].len) {
+		test_err("missing an entry");
+		return -EINVAL;
+	}
+
+	/* We exited the loop but still have entries in the extent map. */
+	if (n) {
+		test_err("we have a left over entry in the extent map we didn't expect");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Test scenario:
+ *
+ * Test the various edge cases of btrfs_drop_extent_map_range, create the
+ * following ranges
+ *
+ * [0, 12k)[12k, 24k)[24k, 36k)[36k, 40k)[40k,64k)
+ *
+ * And then we'll drop:
+ *
+ * [8k, 12k) - test the single front split
+ * [12k, 20k) - test the single back split
+ * [28k, 32k) - test the double split
+ * [32k, 64k) - test whole em dropping
+ *
+ * They'll have the EXTENT_FLAG_COMPRESSED flag set to keep the em tree from
+ * merging the em's.
+ */
+static int test_case_5(void)
+{
+	struct extent_map_tree *em_tree;
+	struct inode *inode;
+	u64 start, end;
+	int ret;
+
+	test_msg("Running btrfs_drop_extent_map_range tests");
+
+	inode = btrfs_new_test_inode();
+	if (!inode) {
+		test_std_err(TEST_ALLOC_INODE);
+		return -ENOMEM;
+	}
+
+	em_tree = &BTRFS_I(inode)->extent_tree;
+
+	/* [0, 12k) */
+	ret = add_compressed_extent(em_tree, 0, SZ_4K * 3, 0);
+	if (ret) {
+		test_err("cannot add extent range [0, 12K)");
+		goto out;
+	}
+
+	/* [12k, 24k) */
+	ret = add_compressed_extent(em_tree, SZ_4K * 3, SZ_4K * 3, SZ_4K);
+	if (ret) {
+		test_err("cannot add extent range [12k, 24k)");
+		goto out;
+	}
+
+	/* [24k, 36k) */
+	ret = add_compressed_extent(em_tree, SZ_4K * 6, SZ_4K * 3, SZ_8K);
+	if (ret) {
+		test_err("cannot add extent range [12k, 24k)");
+		goto out;
+	}
+
+	/* [36k, 40k) */
+	ret = add_compressed_extent(em_tree, SZ_32K + SZ_4K, SZ_4K, SZ_4K * 3);
+	if (ret) {
+		test_err("cannot add extent range [12k, 24k)");
+		goto out;
+	}
+
+	/* [40k, 64k) */
+	ret = add_compressed_extent(em_tree, SZ_4K * 10, SZ_4K * 6, SZ_16K);
+	if (ret) {
+		test_err("cannot add extent range [12k, 24k)");
+		goto out;
+	}
+
+	/* Drop [8k, 12k) */
+	start = SZ_8K;
+	end = (3 * SZ_4K) - 1;
+	btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
+	ret = validate_range(&BTRFS_I(inode)->extent_tree, 0);
+	if (ret)
+		goto out;
+
+	/* Drop [12k, 20k) */
+	start = SZ_4K * 3;
+	end = SZ_16K + SZ_4K - 1;
+	btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
+	ret = validate_range(&BTRFS_I(inode)->extent_tree, 1);
+	if (ret)
+		goto out;
+
+	/* Drop [28k, 32k) */
+	start = SZ_32K - SZ_4K;
+	end = SZ_32K - 1;
+	btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
+	ret = validate_range(&BTRFS_I(inode)->extent_tree, 2);
+	if (ret)
+		goto out;
+
+	/* Drop [32k, 64k) */
+	start = SZ_32K;
+	end = SZ_64K - 1;
+	btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
+	ret = validate_range(&BTRFS_I(inode)->extent_tree, 3);
+	if (ret)
+		goto out;
+out:
+	iput(inode);
+	return ret;
+}
+
 struct rmap_test_vector {
 	u64 raid_type;
 	u64 physical_start;
@@ -619,6 +832,11 @@ int btrfs_test_extent_map(void)
 	if (ret)
 		goto out;
 	ret = test_case_4(fs_info, em_tree);
+	if (ret)
+		goto out;
+	ret = test_case_5();
+	if (ret)
+		goto out;
 
 	test_msg("running rmap tests");
 	for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) {

From f345dbdf2c9c4733d52b9389da6c02672f3b8672 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 17 Aug 2023 16:57:32 -0400
Subject: [PATCH 1555/2047] btrfs: tests: add a test for
 btrfs_add_extent_mapping

This helper is different from the normal add_extent_mapping in that it
will stuff an em into a gap that exists between overlapping em's in the
tree.  It appeared there was a bug so I wrote a self test to validate it
did the correct thing when it worked with two side by side ems.
Thankfully it is correct, but more testing is better.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tests/extent-map-tests.c | 56 +++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index cf85c7795686..31de4cd0666f 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -655,6 +655,59 @@ out:
 	return ret;
 }
 
+/*
+ * Test the btrfs_add_extent_mapping helper which will attempt to create an em
+ * for areas between two existing ems.  Validate it doesn't do this when there
+ * are two unmerged em's side by side.
+ */
+static int test_case_6(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree)
+{
+	struct extent_map *em = NULL;
+	int ret;
+
+	ret = add_compressed_extent(em_tree, 0, SZ_4K, 0);
+	if (ret)
+		goto out;
+
+	ret = add_compressed_extent(em_tree, SZ_4K, SZ_4K, 0);
+	if (ret)
+		goto out;
+
+	em = alloc_extent_map();
+	if (!em) {
+		test_std_err(TEST_ALLOC_EXTENT_MAP);
+		return -ENOMEM;
+	}
+
+	em->start = SZ_4K;
+	em->len = SZ_4K;
+	em->block_start = SZ_16K;
+	em->block_len = SZ_16K;
+	write_lock(&em_tree->lock);
+	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, 0, SZ_8K);
+	write_unlock(&em_tree->lock);
+
+	if (ret != 0) {
+		test_err("got an error when adding our em: %d", ret);
+		goto out;
+	}
+
+	ret = -EINVAL;
+	if (em->start != 0) {
+		test_err("unexpected em->start at %llu, wanted 0", em->start);
+		goto out;
+	}
+	if (em->len != SZ_4K) {
+		test_err("unexpected em->len %llu, expected 4K", em->len);
+		goto out;
+	}
+	ret = 0;
+out:
+	free_extent_map(em);
+	free_extent_map_tree(em_tree);
+	return ret;
+}
+
 struct rmap_test_vector {
 	u64 raid_type;
 	u64 physical_start;
@@ -835,6 +888,9 @@ int btrfs_test_extent_map(void)
 	if (ret)
 		goto out;
 	ret = test_case_5();
+	if (ret)
+		goto out;
+	ret = test_case_6(fs_info, em_tree);
 	if (ret)
 		goto out;
 

From 92e1229b204d66525ee95054188938142420324d Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 17 Aug 2023 16:57:33 -0400
Subject: [PATCH 1556/2047] btrfs: tests: test invalid splitting when skipping
 pinned drop extent_map

This reproduces the bug fixed by "btrfs: fix incorrect splitting in
btrfs_drop_extent_map_range", we were improperly calculating the range
for the split extent.  Add a test that exercises this scenario and
validates that we get the correct resulting extent_maps in our tree.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tests/extent-map-tests.c | 138 ++++++++++++++++++++++++++++++
 1 file changed, 138 insertions(+)

diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 31de4cd0666f..29bdd08b241f 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -708,6 +708,141 @@ out:
 	return ret;
 }
 
+/*
+ * Regression test for btrfs_drop_extent_map_range.  Calling with skip_pinned ==
+ * true would mess up the start/end calculations and subsequent splits would be
+ * incorrect.
+ */
+static int test_case_7(void)
+{
+	struct extent_map_tree *em_tree;
+	struct extent_map *em;
+	struct inode *inode;
+	int ret;
+
+	test_msg("Running btrfs_drop_extent_cache with pinned");
+
+	inode = btrfs_new_test_inode();
+	if (!inode) {
+		test_std_err(TEST_ALLOC_INODE);
+		return -ENOMEM;
+	}
+
+	em_tree = &BTRFS_I(inode)->extent_tree;
+
+	em = alloc_extent_map();
+	if (!em) {
+		test_std_err(TEST_ALLOC_EXTENT_MAP);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* [0, 16K), pinned */
+	em->start = 0;
+	em->len = SZ_16K;
+	em->block_start = 0;
+	em->block_len = SZ_4K;
+	set_bit(EXTENT_FLAG_PINNED, &em->flags);
+	write_lock(&em_tree->lock);
+	ret = add_extent_mapping(em_tree, em, 0);
+	write_unlock(&em_tree->lock);
+	if (ret < 0) {
+		test_err("couldn't add extent map");
+		goto out;
+	}
+	free_extent_map(em);
+
+	em = alloc_extent_map();
+	if (!em) {
+		test_std_err(TEST_ALLOC_EXTENT_MAP);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* [32K, 48K), not pinned */
+	em->start = SZ_32K;
+	em->len = SZ_16K;
+	em->block_start = SZ_32K;
+	em->block_len = SZ_16K;
+	write_lock(&em_tree->lock);
+	ret = add_extent_mapping(em_tree, em, 0);
+	write_unlock(&em_tree->lock);
+	if (ret < 0) {
+		test_err("couldn't add extent map");
+		goto out;
+	}
+	free_extent_map(em);
+
+	/*
+	 * Drop [0, 36K) This should skip the [0, 4K) extent and then split the
+	 * [32K, 48K) extent.
+	 */
+	btrfs_drop_extent_map_range(BTRFS_I(inode), 0, (36 * SZ_1K) - 1, true);
+
+	/* Make sure our extent maps look sane. */
+	ret = -EINVAL;
+
+	em = lookup_extent_mapping(em_tree, 0, SZ_16K);
+	if (!em) {
+		test_err("didn't find an em at 0 as expected");
+		goto out;
+	}
+
+	if (em->start != 0) {
+		test_err("em->start is %llu, expected 0", em->start);
+		goto out;
+	}
+
+	if (em->len != SZ_16K) {
+		test_err("em->len is %llu, expected 16K", em->len);
+		goto out;
+	}
+
+	free_extent_map(em);
+
+	read_lock(&em_tree->lock);
+	em = lookup_extent_mapping(em_tree, SZ_16K, SZ_16K);
+	read_unlock(&em_tree->lock);
+	if (em) {
+		test_err("found an em when we weren't expecting one");
+		goto out;
+	}
+
+	read_lock(&em_tree->lock);
+	em = lookup_extent_mapping(em_tree, SZ_32K, SZ_16K);
+	read_unlock(&em_tree->lock);
+	if (!em) {
+		test_err("didn't find an em at 32K as expected");
+		goto out;
+	}
+
+	if (em->start != (36 * SZ_1K)) {
+		test_err("em->start is %llu, expected 36K", em->start);
+		goto out;
+	}
+
+	if (em->len != (12 * SZ_1K)) {
+		test_err("em->len is %llu, expected 12K", em->len);
+		goto out;
+	}
+
+	free_extent_map(em);
+
+	read_lock(&em_tree->lock);
+	em = lookup_extent_mapping(em_tree, 48 * SZ_1K, (u64)-1);
+	read_unlock(&em_tree->lock);
+	if (em) {
+		test_err("found an unexpected em above 48K");
+		goto out;
+	}
+
+	ret = 0;
+out:
+	free_extent_map(em);
+	iput(inode);
+	return ret;
+}
+
 struct rmap_test_vector {
 	u64 raid_type;
 	u64 physical_start;
@@ -891,6 +1026,9 @@ int btrfs_test_extent_map(void)
 	if (ret)
 		goto out;
 	ret = test_case_6(fs_info, em_tree);
+	if (ret)
+		goto out;
+	ret = test_case_7();
 	if (ret)
 		goto out;
 

From 9c53fb0ad1acaf227718ccae16e8fb8e01c05918 Mon Sep 17 00:00:00 2001
From: Eugene Shalygin <eugene.shalygin@gmail.com>
Date: Mon, 21 Aug 2023 13:52:35 +0200
Subject: [PATCH 1557/2047] hwmon: (asus-ec-sensosrs) fix mutex path for X670E
 Hero

A user reported that they observe race condition warning [1] and after
looking once again into the DSDT source it was found that wrong mutex
was used.

[1] https://github.com/zeule/asus-ec-sensors/issues/43

Fixes: 790dec13c012 ("hwmon: (asus-ec-sensors) add ROG Crosshair X670E Hero.")
Signed-off-by: Eugene Shalygin <eugene.shalygin@gmail.com>
Link: https://lore.kernel.org/r/20230821115418.25733-2-eugene.shalygin@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/asus-ec-sensors.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
index f52a539eb33e..51f9c2db403e 100644
--- a/drivers/hwmon/asus-ec-sensors.c
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -340,7 +340,7 @@ static const struct ec_board_info board_info_crosshair_x670e_hero = {
 	.sensors = SENSOR_TEMP_CPU | SENSOR_TEMP_CPU_PACKAGE |
 		SENSOR_TEMP_MB | SENSOR_TEMP_VRM |
 		SENSOR_SET_TEMP_WATER,
-	.mutex_path = ASUS_HW_ACCESS_MUTEX_RMTW_ASMX,
+	.mutex_path = ACPI_GLOBAL_LOCK_PSEUDO_PATH,
 	.family = family_amd_600_series,
 };
 

From 4a754aba0bd1ecc0529c7e0282a3947cff68cc5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Tue, 27 Jun 2023 08:49:48 +0200
Subject: [PATCH 1558/2047] hwmon: (pmbus) Update documentation to not use
 .probe_new() any more
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 03c835f498b5 ("i2c: Switch .probe() to not take an id
parameter") .probe() is the recommended callback to implement an i2c
driver (again). Reflect this in the documentation and don't mention
.probe_new() which will be dropped soon.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20230627064948.593804-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/pmbus.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/hwmon/pmbus.rst b/Documentation/hwmon/pmbus.rst
index 7ecfec6ca2db..eb1569bfa676 100644
--- a/Documentation/hwmon/pmbus.rst
+++ b/Documentation/hwmon/pmbus.rst
@@ -163,7 +163,7 @@ Emerson DS1200 power modules might look as follows::
 	.driver = {
 		   .name = "ds1200",
 		   },
-	.probe_new = ds1200_probe,
+	.probe = ds1200_probe,
 	.id_table = ds1200_id,
   };
 

From c15a8edd63319788356a9fc9a7fe60468a1476d7 Mon Sep 17 00:00:00 2001
From: Eddie James <eajames@linux.ibm.com>
Date: Wed, 28 Jun 2023 10:34:53 -0500
Subject: [PATCH 1559/2047] hwmon: (pmbus/acbel-fsg032) Add firmware version
 debugfs attribute

Like the IBM CFFPS driver, export the PSU's firmware version to a
debugfs attribute as reported in the manufacturer register.

Signed-off-by: Eddie James <eajames@linux.ibm.com>
Link: https://lore.kernel.org/r/20230628153453.122213-1-eajames@linux.ibm.com
[groeck: Dropped unused variable; changed buffer from char to u8]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/acbel-fsg032.c | 38 ++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/drivers/hwmon/pmbus/acbel-fsg032.c b/drivers/hwmon/pmbus/acbel-fsg032.c
index 0a0ef4ce3493..e0c55fd8f3a6 100644
--- a/drivers/hwmon/pmbus/acbel-fsg032.c
+++ b/drivers/hwmon/pmbus/acbel-fsg032.c
@@ -3,14 +3,51 @@
  * Copyright 2023 IBM Corp.
  */
 
+#include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/fs.h>
 #include <linux/i2c.h>
+#include <linux/minmax.h>
 #include <linux/module.h>
 #include <linux/pmbus.h>
 #include <linux/hwmon-sysfs.h>
 #include "pmbus.h"
 
+#define ACBEL_MFR_FW_REVISION	0xd9
+
+static ssize_t acbel_fsg032_debugfs_read(struct file *file, char __user *buf, size_t count,
+					 loff_t *ppos)
+{
+	struct i2c_client *client = file->private_data;
+	u8 data[I2C_SMBUS_BLOCK_MAX + 2] = { 0 };
+	char out[8];
+	int rc;
+
+	rc = i2c_smbus_read_block_data(client, ACBEL_MFR_FW_REVISION, data);
+	if (rc < 0)
+		return rc;
+
+	rc = snprintf(out, sizeof(out), "%*phN\n", min(rc, 3), data);
+	return simple_read_from_buffer(buf, count, ppos, out, rc);
+}
+
+static const struct file_operations acbel_debugfs_ops = {
+	.llseek = noop_llseek,
+	.read = acbel_fsg032_debugfs_read,
+	.write = NULL,
+	.open = simple_open,
+};
+
+static void acbel_fsg032_init_debugfs(struct i2c_client *client)
+{
+	struct dentry *debugfs = pmbus_get_debugfs_dir(client);
+
+	if (!debugfs)
+		return;
+
+	debugfs_create_file("fw_version", 0444, debugfs, client, &acbel_debugfs_ops);
+}
+
 static const struct i2c_device_id acbel_fsg032_id[] = {
 	{ "acbel_fsg032" },
 	{}
@@ -59,6 +96,7 @@ static int acbel_fsg032_probe(struct i2c_client *client)
 	if (rc)
 		return rc;
 
+	acbel_fsg032_init_debugfs(client);
 	return 0;
 }
 

From 6698cbd624bf3ad4822453b0c3941bf0220f283f Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 8 Jul 2023 07:52:31 +0200
Subject: [PATCH 1560/2047] hwmon: (lan966x) Use the devm_clk_get_enabled()
 helper function

Use the devm_clk_get_enabled() helper function instead of hand-writing it.
It saves some line of codes.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/25f2ab4c61d4fc48e8200f8754bb446f2b89ea89.1688795527.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lan966x-hwmon.c | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

diff --git a/drivers/hwmon/lan966x-hwmon.c b/drivers/hwmon/lan966x-hwmon.c
index f8658359a098..7247c03e4f44 100644
--- a/drivers/hwmon/lan966x-hwmon.c
+++ b/drivers/hwmon/lan966x-hwmon.c
@@ -334,24 +334,6 @@ static struct regmap *lan966x_init_regmap(struct platform_device *pdev,
 	return devm_regmap_init_mmio(&pdev->dev, base, &regmap_config);
 }
 
-static void lan966x_clk_disable(void *data)
-{
-	struct lan966x_hwmon *hwmon = data;
-
-	clk_disable_unprepare(hwmon->clk);
-}
-
-static int lan966x_clk_enable(struct device *dev, struct lan966x_hwmon *hwmon)
-{
-	int ret;
-
-	ret = clk_prepare_enable(hwmon->clk);
-	if (ret)
-		return ret;
-
-	return devm_add_action_or_reset(dev, lan966x_clk_disable, hwmon);
-}
-
 static int lan966x_hwmon_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -363,15 +345,11 @@ static int lan966x_hwmon_probe(struct platform_device *pdev)
 	if (!hwmon)
 		return -ENOMEM;
 
-	hwmon->clk = devm_clk_get(dev, NULL);
+	hwmon->clk = devm_clk_get_enabled(dev, NULL);
 	if (IS_ERR(hwmon->clk))
 		return dev_err_probe(dev, PTR_ERR(hwmon->clk),
 				     "failed to get clock\n");
 
-	ret = lan966x_clk_enable(dev, hwmon);
-	if (ret)
-		return dev_err_probe(dev, ret, "failed to enable clock\n");
-
 	hwmon->clk_rate = clk_get_rate(hwmon->clk);
 
 	hwmon->regmap_pvt = lan966x_init_regmap(pdev, "pvt");

From 39f034386fc023780a505ed02ca40e57e4031e23 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 14 Jul 2023 11:46:04 -0600
Subject: [PATCH 1561/2047] hwmon: Explicitly include correct DT includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230714174607.4057185-1-robh@kernel.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ad7418.c           | 2 +-
 drivers/hwmon/ads7828.c          | 2 +-
 drivers/hwmon/adt7475.c          | 1 -
 drivers/hwmon/as370-hwmon.c      | 3 ++-
 drivers/hwmon/aspeed-pwm-tacho.c | 3 +--
 drivers/hwmon/g762.c             | 1 -
 drivers/hwmon/gxp-fan-ctrl.c     | 2 +-
 drivers/hwmon/ina2xx.c           | 1 -
 drivers/hwmon/lm63.c             | 2 +-
 drivers/hwmon/lm75.c             | 1 -
 drivers/hwmon/lm85.c             | 2 +-
 drivers/hwmon/lm90.c             | 2 +-
 drivers/hwmon/lochnagar-hwmon.c  | 1 -
 drivers/hwmon/ltq-cputemp.c      | 3 ++-
 drivers/hwmon/max31730.c         | 1 -
 drivers/hwmon/max6621.c          | 2 +-
 drivers/hwmon/max6697.c          | 1 -
 drivers/hwmon/mcp3021.c          | 1 -
 drivers/hwmon/nsa320-hwmon.c     | 2 --
 drivers/hwmon/pmbus/dps920ab.c   | 2 +-
 drivers/hwmon/pmbus/ibm-cffps.c  | 2 +-
 drivers/hwmon/pmbus/ir38064.c    | 4 ++--
 drivers/hwmon/pmbus/max20730.c   | 2 +-
 drivers/hwmon/pmbus/mp5023.c     | 2 +-
 drivers/hwmon/pmbus/mpq7932.c    | 2 +-
 drivers/hwmon/pmbus/q54sj108a2.c | 2 +-
 drivers/hwmon/pmbus/tps53679.c   | 2 +-
 drivers/hwmon/pmbus/ucd9000.c    | 2 +-
 drivers/hwmon/pmbus/ucd9200.c    | 2 +-
 drivers/hwmon/sbtsi_temp.c       | 1 -
 drivers/hwmon/scpi-hwmon.c       | 2 +-
 drivers/hwmon/tmp421.c           | 2 +-
 drivers/hwmon/tmp464.c           | 2 +-
 drivers/hwmon/tps23861.c         | 2 +-
 drivers/hwmon/ultra45_env.c      | 3 ++-
 drivers/hwmon/vexpress-hwmon.c   | 1 -
 drivers/hwmon/w83773g.c          | 2 +-
 37 files changed, 30 insertions(+), 40 deletions(-)

diff --git a/drivers/hwmon/ad7418.c b/drivers/hwmon/ad7418.c
index ffe81e445010..bcea66eac82b 100644
--- a/drivers/hwmon/ad7418.c
+++ b/drivers/hwmon/ad7418.c
@@ -16,7 +16,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
 
diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c
index 1932613ec095..e86894e35639 100644
--- a/drivers/hwmon/ads7828.c
+++ b/drivers/hwmon/ads7828.c
@@ -18,7 +18,7 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_data/ads7828.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
index c0ce88324ea6..5363254644da 100644
--- a/drivers/hwmon/adt7475.c
+++ b/drivers/hwmon/adt7475.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
diff --git a/drivers/hwmon/as370-hwmon.c b/drivers/hwmon/as370-hwmon.c
index fffbf385a57f..316454bd983d 100644
--- a/drivers/hwmon/as370-hwmon.c
+++ b/drivers/hwmon/as370-hwmon.c
@@ -11,7 +11,8 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 
 #define CTRL		0x0
 #define  PD		BIT(0)
diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c
index d11f674e3dc3..997df4b40509 100644
--- a/drivers/hwmon/aspeed-pwm-tacho.c
+++ b/drivers/hwmon/aspeed-pwm-tacho.c
@@ -12,8 +12,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
diff --git a/drivers/hwmon/g762.c b/drivers/hwmon/g762.c
index 1b6ff4712138..fad69ef56c75 100644
--- a/drivers/hwmon/g762.c
+++ b/drivers/hwmon/g762.c
@@ -39,7 +39,6 @@
 #include <linux/kernel.h>
 #include <linux/clk.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_data/g762.h>
 
 #define DRVNAME "g762"
diff --git a/drivers/hwmon/gxp-fan-ctrl.c b/drivers/hwmon/gxp-fan-ctrl.c
index 2e05bc2f627a..00e057050437 100644
--- a/drivers/hwmon/gxp-fan-ctrl.c
+++ b/drivers/hwmon/gxp-fan-ctrl.c
@@ -6,7 +6,7 @@
 #include <linux/hwmon.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #define OFS_FAN_INST	0 /* Is 0 because plreg base will be set at INST */
diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c
index cfd7efef5cdf..a47973e2d606 100644
--- a/drivers/hwmon/ina2xx.c
+++ b/drivers/hwmon/ina2xx.c
@@ -31,7 +31,6 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/jiffies.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/delay.h>
 #include <linux/util_macros.h>
diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c
index 6972454eb4e0..562c94c7d831 100644
--- a/drivers/hwmon/lm63.c
+++ b/drivers/hwmon/lm63.c
@@ -33,7 +33,7 @@
 #include <linux/hwmon.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
 
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index 72e634d1b857..bcfa8193e5e7 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -13,7 +13,6 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/util_macros.h>
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index 8540178f5b74..2a62ea7b25a9 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -12,7 +12,7 @@
  */
 
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/jiffies.h>
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index 90101c236f35..b25ae8b7ec90 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -106,7 +106,7 @@
 #include <linux/kstrtox.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
diff --git a/drivers/hwmon/lochnagar-hwmon.c b/drivers/hwmon/lochnagar-hwmon.c
index 6350904a8a8b..5202dddfd61e 100644
--- a/drivers/hwmon/lochnagar-hwmon.c
+++ b/drivers/hwmon/lochnagar-hwmon.c
@@ -16,7 +16,6 @@
 #include <linux/mfd/lochnagar2_regs.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
diff --git a/drivers/hwmon/ltq-cputemp.c b/drivers/hwmon/ltq-cputemp.c
index 08e09a82acab..f7e4a4ca5239 100644
--- a/drivers/hwmon/ltq-cputemp.c
+++ b/drivers/hwmon/ltq-cputemp.c
@@ -9,8 +9,9 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 
 #include <lantiq_soc.h>
 
diff --git a/drivers/hwmon/max31730.c b/drivers/hwmon/max31730.c
index b1300ca9db1f..7d237db6e57c 100644
--- a/drivers/hwmon/max31730.c
+++ b/drivers/hwmon/max31730.c
@@ -11,7 +11,6 @@
 #include <linux/init.h>
 #include <linux/hwmon.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/slab.h>
 
diff --git a/drivers/hwmon/max6621.c b/drivers/hwmon/max6621.c
index 7f709fd1af89..af7e62685898 100644
--- a/drivers/hwmon/max6621.c
+++ b/drivers/hwmon/max6621.c
@@ -12,7 +12,7 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 
 #define MAX6621_DRV_NAME		"max6621"
diff --git a/drivers/hwmon/max6697.c b/drivers/hwmon/max6697.c
index 3a67778f111c..fe826fcf9990 100644
--- a/drivers/hwmon/max6697.c
+++ b/drivers/hwmon/max6697.c
@@ -15,7 +15,6 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 
 #include <linux/platform_data/max6697.h>
diff --git a/drivers/hwmon/mcp3021.c b/drivers/hwmon/mcp3021.c
index 127e15ff3e76..9814eaf24564 100644
--- a/drivers/hwmon/mcp3021.c
+++ b/drivers/hwmon/mcp3021.c
@@ -20,7 +20,6 @@
 #include <linux/err.h>
 #include <linux/device.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 /* Vdd / reference voltage in millivolt */
 #define MCP3021_VDD_REF_MAX	5500
diff --git a/drivers/hwmon/nsa320-hwmon.c b/drivers/hwmon/nsa320-hwmon.c
index ebe6b031e56f..e26334469549 100644
--- a/drivers/hwmon/nsa320-hwmon.c
+++ b/drivers/hwmon/nsa320-hwmon.c
@@ -20,8 +20,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 
 /* Tests for error return values rely upon this value being < 0x80 */
diff --git a/drivers/hwmon/pmbus/dps920ab.c b/drivers/hwmon/pmbus/dps920ab.c
index f7ff3e4650b7..04e0d598a6e5 100644
--- a/drivers/hwmon/pmbus/dps920ab.c
+++ b/drivers/hwmon/pmbus/dps920ab.c
@@ -9,7 +9,7 @@
 #include <linux/debugfs.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include "pmbus.h"
 
 struct dps920ab_data {
diff --git a/drivers/hwmon/pmbus/ibm-cffps.c b/drivers/hwmon/pmbus/ibm-cffps.c
index c791925b8907..5b11aacda4d2 100644
--- a/drivers/hwmon/pmbus/ibm-cffps.c
+++ b/drivers/hwmon/pmbus/ibm-cffps.c
@@ -13,7 +13,7 @@
 #include <linux/leds.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/pmbus.h>
 
 #include "pmbus.h"
diff --git a/drivers/hwmon/pmbus/ir38064.c b/drivers/hwmon/pmbus/ir38064.c
index 871c322d3d51..04185be3fdb6 100644
--- a/drivers/hwmon/pmbus/ir38064.c
+++ b/drivers/hwmon/pmbus/ir38064.c
@@ -6,7 +6,7 @@
  *
  * VOUT_MODE is not supported by the device. The driver fakes VOUT linear16
  * mode with exponent value -8 as direct mode with m=256/b=0/R=0.
- *          
+ *
  * The device supports VOUT_PEAK, IOUT_PEAK, and TEMPERATURE_PEAK, however
  * this driver does not currently support them.
  */
@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regulator/driver.h>
 #include "pmbus.h"
 
diff --git a/drivers/hwmon/pmbus/max20730.c b/drivers/hwmon/pmbus/max20730.c
index 7bcf27995033..6f99085c2c3b 100644
--- a/drivers/hwmon/pmbus/max20730.c
+++ b/drivers/hwmon/pmbus/max20730.c
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/pmbus.h>
 #include <linux/util_macros.h>
 #include "pmbus.h"
diff --git a/drivers/hwmon/pmbus/mp5023.c b/drivers/hwmon/pmbus/mp5023.c
index c4c4324d2b74..21acb7fd9a1a 100644
--- a/drivers/hwmon/pmbus/mp5023.c
+++ b/drivers/hwmon/pmbus/mp5023.c
@@ -5,7 +5,7 @@
 
 #include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include "pmbus.h"
 
 static struct pmbus_driver_info mp5023_info = {
diff --git a/drivers/hwmon/pmbus/mpq7932.c b/drivers/hwmon/pmbus/mpq7932.c
index 865d42edda1a..6c62f01da7c6 100644
--- a/drivers/hwmon/pmbus/mpq7932.c
+++ b/drivers/hwmon/pmbus/mpq7932.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/pmbus.h>
 #include "pmbus.h"
 
diff --git a/drivers/hwmon/pmbus/q54sj108a2.c b/drivers/hwmon/pmbus/q54sj108a2.c
index b830f3b02bcc..a235c1cdf4fe 100644
--- a/drivers/hwmon/pmbus/q54sj108a2.c
+++ b/drivers/hwmon/pmbus/q54sj108a2.c
@@ -10,7 +10,7 @@
 #include <linux/i2c.h>
 #include <linux/kstrtox.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include "pmbus.h"
 
 #define STORE_DEFAULT_ALL		0x11
diff --git a/drivers/hwmon/pmbus/tps53679.c b/drivers/hwmon/pmbus/tps53679.c
index ef99005a3af5..e77b12f342b4 100644
--- a/drivers/hwmon/pmbus/tps53679.c
+++ b/drivers/hwmon/pmbus/tps53679.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include "pmbus.h"
 
 enum chips {
diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
index c404d306e8f7..b1d1d4214e69 100644
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -10,7 +10,7 @@
 #include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/slab.h>
diff --git a/drivers/hwmon/pmbus/ucd9200.c b/drivers/hwmon/pmbus/ucd9200.c
index a82847945508..e4aad64b2d94 100644
--- a/drivers/hwmon/pmbus/ucd9200.c
+++ b/drivers/hwmon/pmbus/ucd9200.c
@@ -7,7 +7,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/slab.h>
diff --git a/drivers/hwmon/sbtsi_temp.c b/drivers/hwmon/sbtsi_temp.c
index b79cece4ac9a..dd85cf89f008 100644
--- a/drivers/hwmon/sbtsi_temp.c
+++ b/drivers/hwmon/sbtsi_temp.c
@@ -13,7 +13,6 @@
 #include <linux/hwmon.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 
 /*
diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c
index 121e5e9f487f..a0d220609565 100644
--- a/drivers/hwmon/scpi-hwmon.c
+++ b/drivers/hwmon/scpi-hwmon.c
@@ -8,7 +8,7 @@
 
 #include <linux/hwmon.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/scpi_protocol.h>
 #include <linux/slab.h>
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
index 3cde3916ab6d..10b66c9ce045 100644
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -20,7 +20,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/sysfs.h>
 
 /* Addresses to scan */
diff --git a/drivers/hwmon/tmp464.c b/drivers/hwmon/tmp464.c
index 4b79c3f4d9fe..f58ca4c6acb6 100644
--- a/drivers/hwmon/tmp464.c
+++ b/drivers/hwmon/tmp464.c
@@ -14,7 +14,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
 
diff --git a/drivers/hwmon/tps23861.c b/drivers/hwmon/tps23861.c
index 8fbbb29ae11d..d33ecbac00d6 100644
--- a/drivers/hwmon/tps23861.c
+++ b/drivers/hwmon/tps23861.c
@@ -14,7 +14,7 @@
 #include <linux/hwmon.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 
 #define TEMPERATURE			0x2c
diff --git a/drivers/hwmon/ultra45_env.c b/drivers/hwmon/ultra45_env.c
index b23cff2e3d5d..3b580f229887 100644
--- a/drivers/hwmon/ultra45_env.c
+++ b/drivers/hwmon/ultra45_env.c
@@ -9,7 +9,8 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
diff --git a/drivers/hwmon/vexpress-hwmon.c b/drivers/hwmon/vexpress-hwmon.c
index 2ac5fb96bba4..d82a3b454d0e 100644
--- a/drivers/hwmon/vexpress-hwmon.c
+++ b/drivers/hwmon/vexpress-hwmon.c
@@ -13,7 +13,6 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/vexpress.h>
 
diff --git a/drivers/hwmon/w83773g.c b/drivers/hwmon/w83773g.c
index 7f3615f5587c..045eea8378c2 100644
--- a/drivers/hwmon/w83773g.c
+++ b/drivers/hwmon/w83773g.c
@@ -12,7 +12,7 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 
 /* W83773 has 3 channels */

From 170fea88170796562a95e561a486f8403cfc747d Mon Sep 17 00:00:00 2001
From: Azeem Shaikh <azeemshaikh38@gmail.com>
Date: Wed, 12 Jul 2023 21:43:07 +0000
Subject: [PATCH 1562/2047] hwmon: (pmbus/max20730) Remove strlcpy occurences

strlcpy() reads the entire source buffer first.
This read may exceed the destination size limit.
This is both inefficient and can lead to linear read
overflows if a source string is not NUL-terminated [1].
In an effort to remove strlcpy() completely [2], replace
strlcpy() here with direct assignment.

strlcpy in this file is used to copy fixed-length strings which can be
completely avoided by direct assignment and is safe to do so. strlen()
is used to return the length of @tbuf.

[1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy
[2] https://github.com/KSPP/linux/issues/89

Signed-off-by: Azeem Shaikh <azeemshaikh38@gmail.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230712214307.2424810-1-azeemshaikh38@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/max20730.c | 64 ++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 31 deletions(-)

diff --git a/drivers/hwmon/pmbus/max20730.c b/drivers/hwmon/pmbus/max20730.c
index 6f99085c2c3b..6df4c5b75bdc 100644
--- a/drivers/hwmon/pmbus/max20730.c
+++ b/drivers/hwmon/pmbus/max20730.c
@@ -114,6 +114,7 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf,
 	const struct pmbus_driver_info *info;
 	const struct max20730_data *data;
 	char tbuf[DEBUG_FS_DATA_MAX] = { 0 };
+	char *result = tbuf;
 	u16 val;
 
 	info = pmbus_get_driver_info(psu->client);
@@ -148,13 +149,13 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf,
 			>> MAX20730_MFR_DEVSET1_TSTAT_BIT_POS;
 
 		if (val == 0)
-			len = strlcpy(tbuf, "2000\n", DEBUG_FS_DATA_MAX);
+			result = "2000\n";
 		else if (val == 1)
-			len = strlcpy(tbuf, "125\n", DEBUG_FS_DATA_MAX);
+			result = "125\n";
 		else if (val == 2)
-			len = strlcpy(tbuf, "62.5\n", DEBUG_FS_DATA_MAX);
+			result = "62.5\n";
 		else
-			len = strlcpy(tbuf, "32\n", DEBUG_FS_DATA_MAX);
+			result = "32\n";
 		break;
 	case MAX20730_DEBUGFS_INTERNAL_GAIN:
 		val = (data->mfr_devset1 & MAX20730_MFR_DEVSET1_RGAIN_MASK)
@@ -163,35 +164,35 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf,
 		if (data->id == max20734) {
 			/* AN6209 */
 			if (val == 0)
-				len = strlcpy(tbuf, "0.8\n", DEBUG_FS_DATA_MAX);
+				result = "0.8\n";
 			else if (val == 1)
-				len = strlcpy(tbuf, "3.2\n", DEBUG_FS_DATA_MAX);
+				result = "3.2\n";
 			else if (val == 2)
-				len = strlcpy(tbuf, "1.6\n", DEBUG_FS_DATA_MAX);
+				result = "1.6\n";
 			else
-				len = strlcpy(tbuf, "6.4\n", DEBUG_FS_DATA_MAX);
+				result = "6.4\n";
 		} else if (data->id == max20730 || data->id == max20710) {
 			/* AN6042 or AN6140 */
 			if (val == 0)
-				len = strlcpy(tbuf, "0.9\n", DEBUG_FS_DATA_MAX);
+				result = "0.9\n";
 			else if (val == 1)
-				len = strlcpy(tbuf, "3.6\n", DEBUG_FS_DATA_MAX);
+				result = "3.6\n";
 			else if (val == 2)
-				len = strlcpy(tbuf, "1.8\n", DEBUG_FS_DATA_MAX);
+				result = "1.8\n";
 			else
-				len = strlcpy(tbuf, "7.2\n", DEBUG_FS_DATA_MAX);
+				result = "7.2\n";
 		} else if (data->id == max20743) {
 			/* AN6042 */
 			if (val == 0)
-				len = strlcpy(tbuf, "0.45\n", DEBUG_FS_DATA_MAX);
+				result = "0.45\n";
 			else if (val == 1)
-				len = strlcpy(tbuf, "1.8\n", DEBUG_FS_DATA_MAX);
+				result = "1.8\n";
 			else if (val == 2)
-				len = strlcpy(tbuf, "0.9\n", DEBUG_FS_DATA_MAX);
+				result = "0.9\n";
 			else
-				len = strlcpy(tbuf, "3.6\n", DEBUG_FS_DATA_MAX);
+				result = "3.6\n";
 		} else {
-			len = strlcpy(tbuf, "Not supported\n", DEBUG_FS_DATA_MAX);
+			result = "Not supported\n";
 		}
 		break;
 	case MAX20730_DEBUGFS_BOOT_VOLTAGE:
@@ -199,26 +200,26 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf,
 			>> MAX20730_MFR_DEVSET1_VBOOT_BIT_POS;
 
 		if (val == 0)
-			len = strlcpy(tbuf, "0.6484\n", DEBUG_FS_DATA_MAX);
+			result = "0.6484\n";
 		else if (val == 1)
-			len = strlcpy(tbuf, "0.8984\n", DEBUG_FS_DATA_MAX);
+			result = "0.8984\n";
 		else if (val == 2)
-			len = strlcpy(tbuf, "1.0\n", DEBUG_FS_DATA_MAX);
+			result = "1.0\n";
 		else
-			len = strlcpy(tbuf, "Invalid\n", DEBUG_FS_DATA_MAX);
+			result = "Invalid\n";
 		break;
 	case MAX20730_DEBUGFS_OUT_V_RAMP_RATE:
 		val = (data->mfr_devset2 & MAX20730_MFR_DEVSET2_VRATE)
 			>> MAX20730_MFR_DEVSET2_VRATE_BIT_POS;
 
 		if (val == 0)
-			len = strlcpy(tbuf, "4\n", DEBUG_FS_DATA_MAX);
+			result = "4\n";
 		else if (val == 1)
-			len = strlcpy(tbuf, "2\n", DEBUG_FS_DATA_MAX);
+			result = "2\n";
 		else if (val == 2)
-			len = strlcpy(tbuf, "1\n", DEBUG_FS_DATA_MAX);
+			result = "1\n";
 		else
-			len = strlcpy(tbuf, "Invalid\n", DEBUG_FS_DATA_MAX);
+			result = "Invalid\n";
 		break;
 	case MAX20730_DEBUGFS_OC_PROTECT_MODE:
 		ret = (data->mfr_devset2 & MAX20730_MFR_DEVSET2_OCPM_MASK)
@@ -230,13 +231,13 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf,
 			>> MAX20730_MFR_DEVSET2_SS_BIT_POS;
 
 		if (val == 0)
-			len = strlcpy(tbuf, "0.75\n", DEBUG_FS_DATA_MAX);
+			result = "0.75\n";
 		else if (val == 1)
-			len = strlcpy(tbuf, "1.5\n", DEBUG_FS_DATA_MAX);
+			result = "1.5\n";
 		else if (val == 2)
-			len = strlcpy(tbuf, "3\n", DEBUG_FS_DATA_MAX);
+			result = "3\n";
 		else
-			len = strlcpy(tbuf, "6\n", DEBUG_FS_DATA_MAX);
+			result = "6\n";
 		break;
 	case MAX20730_DEBUGFS_IMAX:
 		ret = (data->mfr_devset2 & MAX20730_MFR_DEVSET2_IMAX_MASK)
@@ -287,10 +288,11 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf,
 				"%d.%d\n", ret / 10000, ret % 10000);
 		break;
 	default:
-		len = strlcpy(tbuf, "Invalid\n", DEBUG_FS_DATA_MAX);
+		result = "Invalid\n";
 	}
 
-	return simple_read_from_buffer(buf, count, ppos, tbuf, len);
+	len = strlen(result);
+	return simple_read_from_buffer(buf, count, ppos, result, len);
 }
 
 static const struct file_operations max20730_fops = {

From 46290c6bc0b102dc30250ded4f359174a384c957 Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Tue, 11 Jul 2023 17:21:43 +0200
Subject: [PATCH 1563/2047] hwmon: (dimmtemp) Support more than 32 DIMMs

This patch introduces support for handling more than 32 DIMMs by
utilizing bitmap operations. The changes ensure that the driver can
handle a higher number of DIMMs efficiently.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Link: https://lore.kernel.org/r/20230711152144.755177-1-Naresh.Solanki@9elements.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/peci/dimmtemp.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/hwmon/peci/dimmtemp.c b/drivers/hwmon/peci/dimmtemp.c
index ed968401f93c..ce89da3937a0 100644
--- a/drivers/hwmon/peci/dimmtemp.c
+++ b/drivers/hwmon/peci/dimmtemp.c
@@ -219,19 +219,21 @@ static int check_populated_dimms(struct peci_dimmtemp *priv)
 {
 	int chan_rank_max = priv->gen_info->chan_rank_max;
 	int dimm_idx_max = priv->gen_info->dimm_idx_max;
-	u32 chan_rank_empty = 0;
-	u32 dimm_mask = 0;
-	int chan_rank, dimm_idx, ret;
+	DECLARE_BITMAP(dimm_mask, DIMM_NUMS_MAX);
+	DECLARE_BITMAP(chan_rank_empty, CHAN_RANK_MAX);
+
+	int chan_rank, dimm_idx, ret, i;
 	u32 pcs;
 
-	BUILD_BUG_ON(BITS_PER_TYPE(chan_rank_empty) < CHAN_RANK_MAX);
-	BUILD_BUG_ON(BITS_PER_TYPE(dimm_mask) < DIMM_NUMS_MAX);
 	if (chan_rank_max * dimm_idx_max > DIMM_NUMS_MAX) {
 		WARN_ONCE(1, "Unsupported number of DIMMs - chan_rank_max: %d, dimm_idx_max: %d",
 			  chan_rank_max, dimm_idx_max);
 		return -EINVAL;
 	}
 
+	bitmap_zero(dimm_mask, DIMM_NUMS_MAX);
+	bitmap_zero(chan_rank_empty, CHAN_RANK_MAX);
+
 	for (chan_rank = 0; chan_rank < chan_rank_max; chan_rank++) {
 		ret = peci_pcs_read(priv->peci_dev, PECI_PCS_DDR_DIMM_TEMP, chan_rank, &pcs);
 		if (ret) {
@@ -242,7 +244,7 @@ static int check_populated_dimms(struct peci_dimmtemp *priv)
 			 * detection to be performed at a later point in time.
 			 */
 			if (ret == -EINVAL) {
-				chan_rank_empty |= BIT(chan_rank);
+				bitmap_set(chan_rank_empty, chan_rank, 1);
 				continue;
 			}
 
@@ -251,7 +253,7 @@ static int check_populated_dimms(struct peci_dimmtemp *priv)
 
 		for (dimm_idx = 0; dimm_idx < dimm_idx_max; dimm_idx++)
 			if (__dimm_temp(pcs, dimm_idx))
-				dimm_mask |= BIT(chan_rank * dimm_idx_max + dimm_idx);
+				bitmap_set(dimm_mask, chan_rank * dimm_idx_max + dimm_idx, 1);
 	}
 
 	/*
@@ -260,7 +262,7 @@ static int check_populated_dimms(struct peci_dimmtemp *priv)
 	 * host platform boot. Retrying a couple of times lets us make sure
 	 * that the state is persistent.
 	 */
-	if (chan_rank_empty == GENMASK(chan_rank_max - 1, 0)) {
+	if (bitmap_full(chan_rank_empty, chan_rank_max)) {
 		if (priv->no_dimm_retry_count < NO_DIMM_RETRY_COUNT_MAX) {
 			priv->no_dimm_retry_count++;
 
@@ -274,14 +276,16 @@ static int check_populated_dimms(struct peci_dimmtemp *priv)
 	 * It's possible that memory training is not done yet. In this case we
 	 * defer the detection to be performed at a later point in time.
 	 */
-	if (!dimm_mask) {
+	if (bitmap_empty(dimm_mask, DIMM_NUMS_MAX)) {
 		priv->no_dimm_retry_count = 0;
 		return -EAGAIN;
 	}
 
-	dev_dbg(priv->dev, "Scanned populated DIMMs: %#x\n", dimm_mask);
+	for_each_set_bit(i, dimm_mask, DIMM_NUMS_MAX) {
+		dev_dbg(priv->dev, "Found DIMM%#x\n", i);
+	}
 
-	bitmap_from_arr32(priv->dimm_mask, &dimm_mask, DIMM_NUMS_MAX);
+	bitmap_copy(priv->dimm_mask, dimm_mask, DIMM_NUMS_MAX);
 
 	return 0;
 }

From c0cf96d7e3f004fac3cc36434eeeb01fa5a38dae Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 17:43:06 +0800
Subject: [PATCH 1564/2047] hwmon: (bt1-pvt) Convert to
 devm_platform_ioremap_resource()

Use devm_platform_ioremap_resource() to simplify code.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Link: https://lore.kernel.org/r/20230704094306.21933-1-frank.li@vivo.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/bt1-pvt.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/hwmon/bt1-pvt.c b/drivers/hwmon/bt1-pvt.c
index 8d402a627306..b77ebac2e0ce 100644
--- a/drivers/hwmon/bt1-pvt.c
+++ b/drivers/hwmon/bt1-pvt.c
@@ -891,15 +891,8 @@ static struct pvt_hwmon *pvt_create_data(struct platform_device *pdev)
 static int pvt_request_regs(struct pvt_hwmon *pvt)
 {
 	struct platform_device *pdev = to_platform_device(pvt->dev);
-	struct resource *res;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(pvt->dev, "Couldn't find PVT memresource\n");
-		return -EINVAL;
-	}
-
-	pvt->regs = devm_ioremap_resource(pvt->dev, res);
+	pvt->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(pvt->regs))
 		return PTR_ERR(pvt->regs);
 

From 23299bba08dfb7037877edcc430c907f0522c856 Mon Sep 17 00:00:00 2001
From: Ahmad Khalifa <ahmad@khalifa.ws>
Date: Sat, 15 Jul 2023 20:52:44 +0100
Subject: [PATCH 1565/2047] hwmon: (nct6775) Change labels for nct6799

nct6799d-r and nct6796d-s are very similar and chip_id is only
different in the version nibblet.

Since both will be detected by the driver anyway due to the
chipid mask, they should be labeled together for dmesg msg.

Signed-off-by: Ahmad Khalifa <ahmad@khalifa.ws>
Link: https://lore.kernel.org/r/20230715195244.1334723-1-ahmad@khalifa.ws
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/nct6775.rst  | 11 ++++++++++-
 drivers/hwmon/nct6775-core.c     |  3 ++-
 drivers/hwmon/nct6775-platform.c |  2 +-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/Documentation/hwmon/nct6775.rst b/Documentation/hwmon/nct6775.rst
index 5ba8276aad4b..9d7a10de61a7 100644
--- a/Documentation/hwmon/nct6775.rst
+++ b/Documentation/hwmon/nct6775.rst
@@ -80,7 +80,13 @@ Supported chips:
 
     Datasheet: Available from Nuvoton upon request
 
+  * Nuvoton NCT6796D-S/NCT6799D-R
 
+    Prefix: 'nct6799'
+
+    Addresses scanned: ISA address retrieved from Super I/O registers
+
+    Datasheet: Available from Nuvoton upon request
 
 Authors:
 
@@ -277,4 +283,7 @@ will not reflect a usable value. It often reports unreasonably high
 temperatures, and in some cases the reported temperature declines if the actual
 temperature increases (similar to the raw PECI temperature value - see PECI
 specification for details). CPUTIN should therefore be ignored on ASUS
-boards. The CPU temperature on ASUS boards is reported from PECI 0.
+boards. The CPU temperature on ASUS boards is reported from PECI 0 or TSI 0.
+
+NCT6796D-S and NCT6799D-R chips are very similar and their chip_id indicates
+they are different versions. This driver treats them the same way.
diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c
index 08ce4984151d..e0d1770806b7 100644
--- a/drivers/hwmon/nct6775-core.c
+++ b/drivers/hwmon/nct6775-core.c
@@ -33,7 +33,8 @@
  *                                           (0xd451)
  * nct6798d    14      7       7       2+6    0xd428 0xc1    0x5ca3
  *                                           (0xd429)
- * nct6799d    14      7       7       2+6    0xd802 0xc1    0x5ca3
+ * nct6796d-s  18      7       7       6+2    0xd801 0xc1    0x5ca3
+ * nct6799d-r  18      7       7       6+2    0xd802 0xc1    0x5ca3
  *
  * #temp lists the number of monitored temperature sources (first value) plus
  * the number of directly connectable temperature sensors (second value).
diff --git a/drivers/hwmon/nct6775-platform.c b/drivers/hwmon/nct6775-platform.c
index a409d7a0b813..81bf03dad6bb 100644
--- a/drivers/hwmon/nct6775-platform.c
+++ b/drivers/hwmon/nct6775-platform.c
@@ -35,7 +35,7 @@ static const char * const nct6775_sio_names[] __initconst = {
 	"NCT6796D",
 	"NCT6797D",
 	"NCT6798D",
-	"NCT6799D",
+	"NCT6796D-S/NCT6799D-R",
 };
 
 static unsigned short force_id;

From b3af14c4c2841343584b61c49b0cfb8808764239 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 16 Jul 2023 08:39:41 -0700
Subject: [PATCH 1566/2047] hwmon: Remove smm665 driver

SMM665 and related chips are power controller/sequencer chips from
Summit Microelectronics. The company was acquired by Qualcomm in 2012,
and support for the chip series stopped.

The chips are long since gone from active use, making the driver
unsupportable and just consuming space and compile time. Remove it.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/index.rst  |   1 -
 Documentation/hwmon/smm665.rst | 187 ---------
 MAINTAINERS                    |   7 -
 drivers/hwmon/Kconfig          |  14 -
 drivers/hwmon/Makefile         |   1 -
 drivers/hwmon/smm665.c         | 706 ---------------------------------
 6 files changed, 916 deletions(-)
 delete mode 100644 Documentation/hwmon/smm665.rst
 delete mode 100644 drivers/hwmon/smm665.c

diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 042e1cf9501b..d11924667f76 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -195,7 +195,6 @@ Hardware Monitoring Kernel Drivers
    shtc1
    sis5595
    sl28cpld
-   smm665
    smpro-hwmon
    smsc47b397
    smsc47m192
diff --git a/Documentation/hwmon/smm665.rst b/Documentation/hwmon/smm665.rst
deleted file mode 100644
index 481e69d8bf39..000000000000
--- a/Documentation/hwmon/smm665.rst
+++ /dev/null
@@ -1,187 +0,0 @@
-Kernel driver smm665
-====================
-
-Supported chips:
-
-  * Summit Microelectronics SMM465
-
-    Prefix: 'smm465'
-
-    Addresses scanned: -
-
-    Datasheet:
-
-      http://www.summitmicro.com/prod_select/summary/SMM465/SMM465DS.pdf
-
-  * Summit Microelectronics SMM665, SMM665B
-
-    Prefix: 'smm665'
-
-    Addresses scanned: -
-
-    Datasheet:
-
-      http://www.summitmicro.com/prod_select/summary/SMM665/SMM665B_2089_20.pdf
-
-  * Summit Microelectronics SMM665C
-
-    Prefix: 'smm665c'
-
-    Addresses scanned: -
-
-    Datasheet:
-
-      http://www.summitmicro.com/prod_select/summary/SMM665C/SMM665C_2125.pdf
-
-  * Summit Microelectronics SMM764
-
-    Prefix: 'smm764'
-
-    Addresses scanned: -
-
-    Datasheet:
-
-      http://www.summitmicro.com/prod_select/summary/SMM764/SMM764_2098.pdf
-
-  * Summit Microelectronics SMM766, SMM766B
-
-    Prefix: 'smm766'
-
-    Addresses scanned: -
-
-    Datasheets:
-
-      http://www.summitmicro.com/prod_select/summary/SMM766/SMM766_2086.pdf
-
-      http://www.summitmicro.com/prod_select/summary/SMM766B/SMM766B_2122.pdf
-
-Author: Guenter Roeck <linux@roeck-us.net>
-
-
-Module Parameters
------------------
-
-* vref: int
-    Default: 1250 (mV)
-
-    Reference voltage on VREF_ADC pin in mV. It should not be necessary to set
-    this parameter unless a non-default reference voltage is used.
-
-
-Description
------------
-
-[From datasheet] The SMM665 is an Active DC Output power supply Controller
-that monitors, margins and cascade sequences power. The part monitors six
-power supply channels as well as VDD, 12V input, two general-purpose analog
-inputs and an internal temperature sensor using a 10-bit ADC.
-
-Each monitored channel has its own high and low limits, plus a critical
-limit.
-
-Support for SMM465, SMM764, and SMM766 has been implemented but is untested.
-
-
-Usage Notes
------------
-
-This driver does not probe for devices, since there is no register which
-can be safely used to identify the chip. You will have to instantiate
-the devices explicitly. When instantiating the device, you have to specify
-its configuration register address.
-
-Example: the following will load the driver for an SMM665 at address 0x57
-on I2C bus #1::
-
-	$ modprobe smm665
-	$ echo smm665 0x57 > /sys/bus/i2c/devices/i2c-1/new_device
-
-
-Sysfs entries
--------------
-
-This driver uses the values in the datasheet to convert ADC register values
-into the values specified in the sysfs-interface document. All attributes are
-read only.
-
-Min, max, lcrit, and crit values are used by the chip to trigger external signals
-and/or other activity. Triggered signals can include HEALTHY, RST, Power Off,
-or Fault depending on the chip configuration. The driver reports values as lcrit
-or crit if exceeding the limits triggers RST, Power Off, or Fault, and as min or
-max otherwise. For details please see the SMM665 datasheet.
-
-For SMM465 and SMM764, values for Channel E and F are reported but undefined.
-
-======================= =======================================================
-in1_input		12V input voltage (mV)
-in2_input		3.3V (VDD) input voltage (mV)
-in3_input		Channel A voltage (mV)
-in4_input		Channel B voltage (mV)
-in5_input		Channel C voltage (mV)
-in6_input		Channel D voltage (mV)
-in7_input		Channel E voltage (mV)
-in8_input		Channel F voltage (mV)
-in9_input		AIN1 voltage (mV)
-in10_input		AIN2 voltage (mV)
-
-in1_min			12v input minimum voltage (mV)
-in2_min			3.3V (VDD) input minimum voltage (mV)
-in3_min			Channel A minimum voltage (mV)
-in4_min			Channel B minimum voltage (mV)
-in5_min			Channel C minimum voltage (mV)
-in6_min			Channel D minimum voltage (mV)
-in7_min			Channel E minimum voltage (mV)
-in8_min			Channel F minimum voltage (mV)
-in9_min			AIN1 minimum voltage (mV)
-in10_min		AIN2 minimum voltage (mV)
-
-in1_max			12v input maximum voltage (mV)
-in2_max			3.3V (VDD) input maximum voltage (mV)
-in3_max			Channel A maximum voltage (mV)
-in4_max			Channel B maximum voltage (mV)
-in5_max			Channel C maximum voltage (mV)
-in6_max			Channel D maximum voltage (mV)
-in7_max			Channel E maximum voltage (mV)
-in8_max			Channel F maximum voltage (mV)
-in9_max			AIN1 maximum voltage (mV)
-in10_max		AIN2 maximum voltage (mV)
-
-in1_lcrit		12v input critical minimum voltage (mV)
-in2_lcrit		3.3V (VDD) input critical minimum voltage (mV)
-in3_lcrit		Channel A critical minimum voltage (mV)
-in4_lcrit		Channel B critical minimum voltage (mV)
-in5_lcrit		Channel C critical minimum voltage (mV)
-in6_lcrit		Channel D critical minimum voltage (mV)
-in7_lcrit		Channel E critical minimum voltage (mV)
-in8_lcrit		Channel F critical minimum voltage (mV)
-in9_lcrit		AIN1 critical minimum voltage (mV)
-in10_lcrit		AIN2 critical minimum voltage (mV)
-
-in1_crit		12v input critical maximum voltage (mV)
-in2_crit		3.3V (VDD) input critical maximum voltage (mV)
-in3_crit		Channel A critical maximum voltage (mV)
-in4_crit		Channel B critical maximum voltage (mV)
-in5_crit		Channel C critical maximum voltage (mV)
-in6_crit		Channel D critical maximum voltage (mV)
-in7_crit		Channel E critical maximum voltage (mV)
-in8_crit		Channel F critical maximum voltage (mV)
-in9_crit		AIN1 critical maximum voltage (mV)
-in10_crit		AIN2 critical maximum voltage (mV)
-
-in1_crit_alarm		12v input critical alarm
-in2_crit_alarm		3.3V (VDD) input critical alarm
-in3_crit_alarm		Channel A critical alarm
-in4_crit_alarm		Channel B critical alarm
-in5_crit_alarm		Channel C critical alarm
-in6_crit_alarm		Channel D critical alarm
-in7_crit_alarm		Channel E critical alarm
-in8_crit_alarm		Channel F critical alarm
-in9_crit_alarm		AIN1 critical alarm
-in10_crit_alarm		AIN2 critical alarm
-
-temp1_input		Chip temperature
-temp1_min		Minimum chip temperature
-temp1_max		Maximum chip temperature
-temp1_crit		Critical chip temperature
-temp1_crit_alarm	Temperature critical alarm
-======================= =======================================================
diff --git a/MAINTAINERS b/MAINTAINERS
index 53b7ca804465..080e39424343 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19596,13 +19596,6 @@ M:	Nicolas Pitre <nico@fluxnic.net>
 S:	Odd Fixes
 F:	drivers/net/ethernet/smsc/smc91x.*
 
-SMM665 HARDWARE MONITOR DRIVER
-M:	Guenter Roeck <linux@roeck-us.net>
-L:	linux-hwmon@vger.kernel.org
-S:	Maintained
-F:	Documentation/hwmon/smm665.rst
-F:	drivers/hwmon/smm665.c
-
 SMSC EMC2103 HARDWARE MONITOR DRIVER
 M:	Steve Glendinning <steve.glendinning@shawell.net>
 L:	linux-hwmon@vger.kernel.org
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 307477b8a371..2913299c2c9e 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1951,20 +1951,6 @@ config SENSORS_SFCTEMP
 	  This driver can also be built as a module.  If so, the module
 	  will be called sfctemp.
 
-config SENSORS_SMM665
-	tristate "Summit Microelectronics SMM665"
-	depends on I2C
-	help
-	  If you say yes here you get support for the hardware monitoring
-	  features of the Summit Microelectronics SMM665/SMM665B Six-Channel
-	  Active DC Output Controller / Monitor.
-
-	  Other supported chips are SMM465, SMM665C, SMM764, and SMM766.
-	  Support for those chips is untested.
-
-	  This driver can also be built as a module. If so, the module will
-	  be called smm665.
-
 config SENSORS_ADC128D818
 	tristate "Texas Instruments ADC128D818"
 	depends on I2C
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 3f4b0fda0998..ff6bfd109c72 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -191,7 +191,6 @@ obj-$(CONFIG_SENSORS_SHT3x)	+= sht3x.o
 obj-$(CONFIG_SENSORS_SHT4x)	+= sht4x.o
 obj-$(CONFIG_SENSORS_SHTC1)	+= shtc1.o
 obj-$(CONFIG_SENSORS_SIS5595)	+= sis5595.o
-obj-$(CONFIG_SENSORS_SMM665)	+= smm665.o
 obj-$(CONFIG_SENSORS_SMPRO)	+= smpro-hwmon.o
 obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o
 obj-$(CONFIG_SENSORS_SMSC47M1)	+= smsc47m1.o
diff --git a/drivers/hwmon/smm665.c b/drivers/hwmon/smm665.c
deleted file mode 100644
index 026c76f8c22e..000000000000
--- a/drivers/hwmon/smm665.c
+++ /dev/null
@@ -1,706 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Driver for SMM665 Power Controller / Monitor
- *
- * Copyright (C) 2010 Ericsson AB.
- *
- * This driver should also work for SMM465, SMM764, and SMM766, but is untested
- * for those chips. Only monitoring functionality is implemented.
- *
- * Datasheets:
- * http://www.summitmicro.com/prod_select/summary/SMM665/SMM665B_2089_20.pdf
- * http://www.summitmicro.com/prod_select/summary/SMM766B/SMM766B_2122.pdf
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/i2c.h>
-#include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
-#include <linux/delay.h>
-#include <linux/jiffies.h>
-
-/* Internal reference voltage (VREF, x 1000 */
-#define SMM665_VREF_ADC_X1000	1250
-
-/* module parameters */
-static int vref = SMM665_VREF_ADC_X1000;
-module_param(vref, int, 0);
-MODULE_PARM_DESC(vref, "Reference voltage in mV");
-
-enum chips { smm465, smm665, smm665c, smm764, smm766 };
-
-/*
- * ADC channel addresses
- */
-#define	SMM665_MISC16_ADC_DATA_A	0x00
-#define	SMM665_MISC16_ADC_DATA_B	0x01
-#define	SMM665_MISC16_ADC_DATA_C	0x02
-#define	SMM665_MISC16_ADC_DATA_D	0x03
-#define	SMM665_MISC16_ADC_DATA_E	0x04
-#define	SMM665_MISC16_ADC_DATA_F	0x05
-#define	SMM665_MISC16_ADC_DATA_VDD	0x06
-#define	SMM665_MISC16_ADC_DATA_12V	0x07
-#define	SMM665_MISC16_ADC_DATA_INT_TEMP	0x08
-#define	SMM665_MISC16_ADC_DATA_AIN1	0x09
-#define	SMM665_MISC16_ADC_DATA_AIN2	0x0a
-
-/*
- * Command registers
- */
-#define	SMM665_MISC8_CMD_STS		0x80
-#define	SMM665_MISC8_STATUS1		0x81
-#define	SMM665_MISC8_STATUSS2		0x82
-#define	SMM665_MISC8_IO_POLARITY	0x83
-#define	SMM665_MISC8_PUP_POLARITY	0x84
-#define	SMM665_MISC8_ADOC_STATUS1	0x85
-#define	SMM665_MISC8_ADOC_STATUS2	0x86
-#define	SMM665_MISC8_WRITE_PROT		0x87
-#define	SMM665_MISC8_STS_TRACK		0x88
-
-/*
- * Configuration registers and register groups
- */
-#define SMM665_ADOC_ENABLE		0x0d
-#define SMM665_LIMIT_BASE		0x80	/* First limit register */
-
-/*
- * Limit register bit masks
- */
-#define SMM665_TRIGGER_RST		0x8000
-#define SMM665_TRIGGER_HEALTHY		0x4000
-#define SMM665_TRIGGER_POWEROFF		0x2000
-#define SMM665_TRIGGER_SHUTDOWN		0x1000
-#define SMM665_ADC_MASK			0x03ff
-
-#define smm665_is_critical(lim)	((lim) & (SMM665_TRIGGER_RST \
-					| SMM665_TRIGGER_POWEROFF \
-					| SMM665_TRIGGER_SHUTDOWN))
-/*
- * Fault register bit definitions
- * Values are merged from status registers 1/2,
- * with status register 1 providing the upper 8 bits.
- */
-#define SMM665_FAULT_A		0x0001
-#define SMM665_FAULT_B		0x0002
-#define SMM665_FAULT_C		0x0004
-#define SMM665_FAULT_D		0x0008
-#define SMM665_FAULT_E		0x0010
-#define SMM665_FAULT_F		0x0020
-#define SMM665_FAULT_VDD	0x0040
-#define SMM665_FAULT_12V	0x0080
-#define SMM665_FAULT_TEMP	0x0100
-#define SMM665_FAULT_AIN1	0x0200
-#define SMM665_FAULT_AIN2	0x0400
-
-/*
- * I2C Register addresses
- *
- * The configuration register needs to be the configured base register.
- * The command/status register address is derived from it.
- */
-#define SMM665_REGMASK		0x78
-#define SMM665_CMDREG_BASE	0x48
-#define SMM665_CONFREG_BASE	0x50
-
-/*
- *  Equations given by chip manufacturer to calculate voltage/temperature values
- *  vref = Reference voltage on VREF_ADC pin (module parameter)
- *  adc  = 10bit ADC value read back from registers
- */
-
-/* Voltage A-F and VDD */
-#define SMM665_VMON_ADC_TO_VOLTS(adc)  ((adc) * vref / 256)
-
-/* Voltage 12VIN */
-#define SMM665_12VIN_ADC_TO_VOLTS(adc) ((adc) * vref * 3 / 256)
-
-/* Voltage AIN1, AIN2 */
-#define SMM665_AIN_ADC_TO_VOLTS(adc)   ((adc) * vref / 512)
-
-/* Temp Sensor */
-#define SMM665_TEMP_ADC_TO_CELSIUS(adc) (((adc) <= 511) ?		   \
-					 ((int)(adc) * 1000 / 4) :	   \
-					 (((int)(adc) - 0x400) * 1000 / 4))
-
-#define SMM665_NUM_ADC		11
-
-/*
- * Chip dependent ADC conversion time, in uS
- */
-#define SMM665_ADC_WAIT_SMM665	70
-#define SMM665_ADC_WAIT_SMM766	185
-
-struct smm665_data {
-	enum chips type;
-	int conversion_time;		/* ADC conversion time */
-	struct i2c_client *client;
-	struct mutex update_lock;
-	bool valid;
-	unsigned long last_updated;	/* in jiffies */
-	u16 adc[SMM665_NUM_ADC];	/* adc values (raw) */
-	u16 faults;			/* fault status */
-	/* The following values are in mV */
-	int critical_min_limit[SMM665_NUM_ADC];
-	int alarm_min_limit[SMM665_NUM_ADC];
-	int critical_max_limit[SMM665_NUM_ADC];
-	int alarm_max_limit[SMM665_NUM_ADC];
-	struct i2c_client *cmdreg;
-};
-
-/*
- * smm665_read16()
- *
- * Read 16 bit value from <reg>, <reg+1>. Upper 8 bits are in <reg>.
- */
-static int smm665_read16(struct i2c_client *client, int reg)
-{
-	int rv, val;
-
-	rv = i2c_smbus_read_byte_data(client, reg);
-	if (rv < 0)
-		return rv;
-	val = rv << 8;
-	rv = i2c_smbus_read_byte_data(client, reg + 1);
-	if (rv < 0)
-		return rv;
-	val |= rv;
-	return val;
-}
-
-/*
- * Read adc value.
- */
-static int smm665_read_adc(struct smm665_data *data, int adc)
-{
-	struct i2c_client *client = data->cmdreg;
-	int rv;
-	int radc;
-
-	/*
-	 * Algorithm for reading ADC, per SMM665 datasheet
-	 *
-	 *  {[S][addr][W][Ack]} {[offset][Ack]} {[S][addr][R][Nack]}
-	 * [wait conversion time]
-	 *  {[S][addr][R][Ack]} {[datahi][Ack]} {[datalo][Ack][P]}
-	 *
-	 * To implement the first part of this exchange,
-	 * do a full read transaction and expect a failure/Nack.
-	 * This sets up the address pointer on the SMM665
-	 * and starts the ADC conversion.
-	 * Then do a two-byte read transaction.
-	 */
-	rv = i2c_smbus_read_byte_data(client, adc << 3);
-	if (rv != -ENXIO) {
-		/*
-		 * We expect ENXIO to reflect NACK
-		 * (per Documentation/i2c/fault-codes.rst).
-		 * Everything else is an error.
-		 */
-		dev_dbg(&client->dev,
-			"Unexpected return code %d when setting ADC index", rv);
-		return (rv < 0) ? rv : -EIO;
-	}
-
-	udelay(data->conversion_time);
-
-	/*
-	 * Now read two bytes.
-	 *
-	 * Neither i2c_smbus_read_byte() nor
-	 * i2c_smbus_read_block_data() worked here,
-	 * so use i2c_smbus_read_word_swapped() instead.
-	 * We could also try to use i2c_master_recv(),
-	 * but that is not always supported.
-	 */
-	rv = i2c_smbus_read_word_swapped(client, 0);
-	if (rv < 0) {
-		dev_dbg(&client->dev, "Failed to read ADC value: error %d", rv);
-		return rv;
-	}
-	/*
-	 * Validate/verify readback adc channel (in bit 11..14).
-	 */
-	radc = (rv >> 11) & 0x0f;
-	if (radc != adc) {
-		dev_dbg(&client->dev, "Unexpected RADC: Expected %d got %d",
-			adc, radc);
-		return -EIO;
-	}
-
-	return rv & SMM665_ADC_MASK;
-}
-
-static struct smm665_data *smm665_update_device(struct device *dev)
-{
-	struct smm665_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-	struct smm665_data *ret = data;
-
-	mutex_lock(&data->update_lock);
-
-	if (time_after(jiffies, data->last_updated + HZ) || !data->valid) {
-		int i, val;
-
-		/*
-		 * read status registers
-		 */
-		val = smm665_read16(client, SMM665_MISC8_STATUS1);
-		if (unlikely(val < 0)) {
-			ret = ERR_PTR(val);
-			goto abort;
-		}
-		data->faults = val;
-
-		/* Read adc registers */
-		for (i = 0; i < SMM665_NUM_ADC; i++) {
-			val = smm665_read_adc(data, i);
-			if (unlikely(val < 0)) {
-				ret = ERR_PTR(val);
-				goto abort;
-			}
-			data->adc[i] = val;
-		}
-		data->last_updated = jiffies;
-		data->valid = true;
-	}
-abort:
-	mutex_unlock(&data->update_lock);
-	return ret;
-}
-
-/* Return converted value from given adc */
-static int smm665_convert(u16 adcval, int index)
-{
-	int val = 0;
-
-	switch (index) {
-	case SMM665_MISC16_ADC_DATA_12V:
-		val = SMM665_12VIN_ADC_TO_VOLTS(adcval & SMM665_ADC_MASK);
-		break;
-
-	case SMM665_MISC16_ADC_DATA_VDD:
-	case SMM665_MISC16_ADC_DATA_A:
-	case SMM665_MISC16_ADC_DATA_B:
-	case SMM665_MISC16_ADC_DATA_C:
-	case SMM665_MISC16_ADC_DATA_D:
-	case SMM665_MISC16_ADC_DATA_E:
-	case SMM665_MISC16_ADC_DATA_F:
-		val = SMM665_VMON_ADC_TO_VOLTS(adcval & SMM665_ADC_MASK);
-		break;
-
-	case SMM665_MISC16_ADC_DATA_AIN1:
-	case SMM665_MISC16_ADC_DATA_AIN2:
-		val = SMM665_AIN_ADC_TO_VOLTS(adcval & SMM665_ADC_MASK);
-		break;
-
-	case SMM665_MISC16_ADC_DATA_INT_TEMP:
-		val = SMM665_TEMP_ADC_TO_CELSIUS(adcval & SMM665_ADC_MASK);
-		break;
-
-	default:
-		/* If we get here, the developer messed up */
-		WARN_ON_ONCE(1);
-		break;
-	}
-
-	return val;
-}
-
-static int smm665_get_min(struct device *dev, int index)
-{
-	struct smm665_data *data = dev_get_drvdata(dev);
-
-	return data->alarm_min_limit[index];
-}
-
-static int smm665_get_max(struct device *dev, int index)
-{
-	struct smm665_data *data = dev_get_drvdata(dev);
-
-	return data->alarm_max_limit[index];
-}
-
-static int smm665_get_lcrit(struct device *dev, int index)
-{
-	struct smm665_data *data = dev_get_drvdata(dev);
-
-	return data->critical_min_limit[index];
-}
-
-static int smm665_get_crit(struct device *dev, int index)
-{
-	struct smm665_data *data = dev_get_drvdata(dev);
-
-	return data->critical_max_limit[index];
-}
-
-static ssize_t smm665_show_crit_alarm(struct device *dev,
-				      struct device_attribute *da, char *buf)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
-	struct smm665_data *data = smm665_update_device(dev);
-	int val = 0;
-
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	if (data->faults & (1 << attr->index))
-		val = 1;
-
-	return sysfs_emit(buf, "%d\n", val);
-}
-
-static ssize_t smm665_show_input(struct device *dev,
-				 struct device_attribute *da, char *buf)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
-	struct smm665_data *data = smm665_update_device(dev);
-	int adc = attr->index;
-	int val;
-
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	val = smm665_convert(data->adc[adc], adc);
-	return sysfs_emit(buf, "%d\n", val);
-}
-
-#define SMM665_SHOW(what) \
-static ssize_t smm665_show_##what(struct device *dev, \
-				    struct device_attribute *da, char *buf) \
-{ \
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da); \
-	const int val = smm665_get_##what(dev, attr->index); \
-	return snprintf(buf, PAGE_SIZE, "%d\n", val); \
-}
-
-SMM665_SHOW(min);
-SMM665_SHOW(max);
-SMM665_SHOW(lcrit);
-SMM665_SHOW(crit);
-
-/*
- * These macros are used below in constructing device attribute objects
- * for use with sysfs_create_group() to make a sysfs device file
- * for each register.
- */
-
-#define SMM665_ATTR(name, type, cmd_idx) \
-	static SENSOR_DEVICE_ATTR(name##_##type, S_IRUGO, \
-				  smm665_show_##type, NULL, cmd_idx)
-
-/* Construct a sensor_device_attribute structure for each register */
-
-/* Input voltages */
-SMM665_ATTR(in1, input, SMM665_MISC16_ADC_DATA_12V);
-SMM665_ATTR(in2, input, SMM665_MISC16_ADC_DATA_VDD);
-SMM665_ATTR(in3, input, SMM665_MISC16_ADC_DATA_A);
-SMM665_ATTR(in4, input, SMM665_MISC16_ADC_DATA_B);
-SMM665_ATTR(in5, input, SMM665_MISC16_ADC_DATA_C);
-SMM665_ATTR(in6, input, SMM665_MISC16_ADC_DATA_D);
-SMM665_ATTR(in7, input, SMM665_MISC16_ADC_DATA_E);
-SMM665_ATTR(in8, input, SMM665_MISC16_ADC_DATA_F);
-SMM665_ATTR(in9, input, SMM665_MISC16_ADC_DATA_AIN1);
-SMM665_ATTR(in10, input, SMM665_MISC16_ADC_DATA_AIN2);
-
-/* Input voltages min */
-SMM665_ATTR(in1, min, SMM665_MISC16_ADC_DATA_12V);
-SMM665_ATTR(in2, min, SMM665_MISC16_ADC_DATA_VDD);
-SMM665_ATTR(in3, min, SMM665_MISC16_ADC_DATA_A);
-SMM665_ATTR(in4, min, SMM665_MISC16_ADC_DATA_B);
-SMM665_ATTR(in5, min, SMM665_MISC16_ADC_DATA_C);
-SMM665_ATTR(in6, min, SMM665_MISC16_ADC_DATA_D);
-SMM665_ATTR(in7, min, SMM665_MISC16_ADC_DATA_E);
-SMM665_ATTR(in8, min, SMM665_MISC16_ADC_DATA_F);
-SMM665_ATTR(in9, min, SMM665_MISC16_ADC_DATA_AIN1);
-SMM665_ATTR(in10, min, SMM665_MISC16_ADC_DATA_AIN2);
-
-/* Input voltages max */
-SMM665_ATTR(in1, max, SMM665_MISC16_ADC_DATA_12V);
-SMM665_ATTR(in2, max, SMM665_MISC16_ADC_DATA_VDD);
-SMM665_ATTR(in3, max, SMM665_MISC16_ADC_DATA_A);
-SMM665_ATTR(in4, max, SMM665_MISC16_ADC_DATA_B);
-SMM665_ATTR(in5, max, SMM665_MISC16_ADC_DATA_C);
-SMM665_ATTR(in6, max, SMM665_MISC16_ADC_DATA_D);
-SMM665_ATTR(in7, max, SMM665_MISC16_ADC_DATA_E);
-SMM665_ATTR(in8, max, SMM665_MISC16_ADC_DATA_F);
-SMM665_ATTR(in9, max, SMM665_MISC16_ADC_DATA_AIN1);
-SMM665_ATTR(in10, max, SMM665_MISC16_ADC_DATA_AIN2);
-
-/* Input voltages lcrit */
-SMM665_ATTR(in1, lcrit, SMM665_MISC16_ADC_DATA_12V);
-SMM665_ATTR(in2, lcrit, SMM665_MISC16_ADC_DATA_VDD);
-SMM665_ATTR(in3, lcrit, SMM665_MISC16_ADC_DATA_A);
-SMM665_ATTR(in4, lcrit, SMM665_MISC16_ADC_DATA_B);
-SMM665_ATTR(in5, lcrit, SMM665_MISC16_ADC_DATA_C);
-SMM665_ATTR(in6, lcrit, SMM665_MISC16_ADC_DATA_D);
-SMM665_ATTR(in7, lcrit, SMM665_MISC16_ADC_DATA_E);
-SMM665_ATTR(in8, lcrit, SMM665_MISC16_ADC_DATA_F);
-SMM665_ATTR(in9, lcrit, SMM665_MISC16_ADC_DATA_AIN1);
-SMM665_ATTR(in10, lcrit, SMM665_MISC16_ADC_DATA_AIN2);
-
-/* Input voltages crit */
-SMM665_ATTR(in1, crit, SMM665_MISC16_ADC_DATA_12V);
-SMM665_ATTR(in2, crit, SMM665_MISC16_ADC_DATA_VDD);
-SMM665_ATTR(in3, crit, SMM665_MISC16_ADC_DATA_A);
-SMM665_ATTR(in4, crit, SMM665_MISC16_ADC_DATA_B);
-SMM665_ATTR(in5, crit, SMM665_MISC16_ADC_DATA_C);
-SMM665_ATTR(in6, crit, SMM665_MISC16_ADC_DATA_D);
-SMM665_ATTR(in7, crit, SMM665_MISC16_ADC_DATA_E);
-SMM665_ATTR(in8, crit, SMM665_MISC16_ADC_DATA_F);
-SMM665_ATTR(in9, crit, SMM665_MISC16_ADC_DATA_AIN1);
-SMM665_ATTR(in10, crit, SMM665_MISC16_ADC_DATA_AIN2);
-
-/* critical alarms */
-SMM665_ATTR(in1, crit_alarm, SMM665_FAULT_12V);
-SMM665_ATTR(in2, crit_alarm, SMM665_FAULT_VDD);
-SMM665_ATTR(in3, crit_alarm, SMM665_FAULT_A);
-SMM665_ATTR(in4, crit_alarm, SMM665_FAULT_B);
-SMM665_ATTR(in5, crit_alarm, SMM665_FAULT_C);
-SMM665_ATTR(in6, crit_alarm, SMM665_FAULT_D);
-SMM665_ATTR(in7, crit_alarm, SMM665_FAULT_E);
-SMM665_ATTR(in8, crit_alarm, SMM665_FAULT_F);
-SMM665_ATTR(in9, crit_alarm, SMM665_FAULT_AIN1);
-SMM665_ATTR(in10, crit_alarm, SMM665_FAULT_AIN2);
-
-/* Temperature */
-SMM665_ATTR(temp1, input, SMM665_MISC16_ADC_DATA_INT_TEMP);
-SMM665_ATTR(temp1, min, SMM665_MISC16_ADC_DATA_INT_TEMP);
-SMM665_ATTR(temp1, max, SMM665_MISC16_ADC_DATA_INT_TEMP);
-SMM665_ATTR(temp1, lcrit, SMM665_MISC16_ADC_DATA_INT_TEMP);
-SMM665_ATTR(temp1, crit, SMM665_MISC16_ADC_DATA_INT_TEMP);
-SMM665_ATTR(temp1, crit_alarm, SMM665_FAULT_TEMP);
-
-/*
- * Finally, construct an array of pointers to members of the above objects,
- * as required for sysfs_create_group()
- */
-static struct attribute *smm665_attrs[] = {
-	&sensor_dev_attr_in1_input.dev_attr.attr,
-	&sensor_dev_attr_in1_min.dev_attr.attr,
-	&sensor_dev_attr_in1_max.dev_attr.attr,
-	&sensor_dev_attr_in1_lcrit.dev_attr.attr,
-	&sensor_dev_attr_in1_crit.dev_attr.attr,
-	&sensor_dev_attr_in1_crit_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_in2_input.dev_attr.attr,
-	&sensor_dev_attr_in2_min.dev_attr.attr,
-	&sensor_dev_attr_in2_max.dev_attr.attr,
-	&sensor_dev_attr_in2_lcrit.dev_attr.attr,
-	&sensor_dev_attr_in2_crit.dev_attr.attr,
-	&sensor_dev_attr_in2_crit_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_in3_input.dev_attr.attr,
-	&sensor_dev_attr_in3_min.dev_attr.attr,
-	&sensor_dev_attr_in3_max.dev_attr.attr,
-	&sensor_dev_attr_in3_lcrit.dev_attr.attr,
-	&sensor_dev_attr_in3_crit.dev_attr.attr,
-	&sensor_dev_attr_in3_crit_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_in4_input.dev_attr.attr,
-	&sensor_dev_attr_in4_min.dev_attr.attr,
-	&sensor_dev_attr_in4_max.dev_attr.attr,
-	&sensor_dev_attr_in4_lcrit.dev_attr.attr,
-	&sensor_dev_attr_in4_crit.dev_attr.attr,
-	&sensor_dev_attr_in4_crit_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_in5_input.dev_attr.attr,
-	&sensor_dev_attr_in5_min.dev_attr.attr,
-	&sensor_dev_attr_in5_max.dev_attr.attr,
-	&sensor_dev_attr_in5_lcrit.dev_attr.attr,
-	&sensor_dev_attr_in5_crit.dev_attr.attr,
-	&sensor_dev_attr_in5_crit_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_in6_input.dev_attr.attr,
-	&sensor_dev_attr_in6_min.dev_attr.attr,
-	&sensor_dev_attr_in6_max.dev_attr.attr,
-	&sensor_dev_attr_in6_lcrit.dev_attr.attr,
-	&sensor_dev_attr_in6_crit.dev_attr.attr,
-	&sensor_dev_attr_in6_crit_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_in7_input.dev_attr.attr,
-	&sensor_dev_attr_in7_min.dev_attr.attr,
-	&sensor_dev_attr_in7_max.dev_attr.attr,
-	&sensor_dev_attr_in7_lcrit.dev_attr.attr,
-	&sensor_dev_attr_in7_crit.dev_attr.attr,
-	&sensor_dev_attr_in7_crit_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_in8_input.dev_attr.attr,
-	&sensor_dev_attr_in8_min.dev_attr.attr,
-	&sensor_dev_attr_in8_max.dev_attr.attr,
-	&sensor_dev_attr_in8_lcrit.dev_attr.attr,
-	&sensor_dev_attr_in8_crit.dev_attr.attr,
-	&sensor_dev_attr_in8_crit_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_in9_input.dev_attr.attr,
-	&sensor_dev_attr_in9_min.dev_attr.attr,
-	&sensor_dev_attr_in9_max.dev_attr.attr,
-	&sensor_dev_attr_in9_lcrit.dev_attr.attr,
-	&sensor_dev_attr_in9_crit.dev_attr.attr,
-	&sensor_dev_attr_in9_crit_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_in10_input.dev_attr.attr,
-	&sensor_dev_attr_in10_min.dev_attr.attr,
-	&sensor_dev_attr_in10_max.dev_attr.attr,
-	&sensor_dev_attr_in10_lcrit.dev_attr.attr,
-	&sensor_dev_attr_in10_crit.dev_attr.attr,
-	&sensor_dev_attr_in10_crit_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_min.dev_attr.attr,
-	&sensor_dev_attr_temp1_max.dev_attr.attr,
-	&sensor_dev_attr_temp1_lcrit.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
-
-	NULL,
-};
-
-ATTRIBUTE_GROUPS(smm665);
-
-static const struct i2c_device_id smm665_id[];
-
-static int smm665_probe(struct i2c_client *client)
-{
-	struct i2c_adapter *adapter = client->adapter;
-	struct smm665_data *data;
-	struct device *hwmon_dev;
-	int i, ret;
-
-	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA
-				     | I2C_FUNC_SMBUS_WORD_DATA))
-		return -ENODEV;
-
-	if (i2c_smbus_read_byte_data(client, SMM665_ADOC_ENABLE) < 0)
-		return -ENODEV;
-
-	data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	i2c_set_clientdata(client, data);
-	mutex_init(&data->update_lock);
-
-	data->client = client;
-	data->type = i2c_match_id(smm665_id, client)->driver_data;
-	data->cmdreg = i2c_new_dummy_device(adapter, (client->addr & ~SMM665_REGMASK)
-				     | SMM665_CMDREG_BASE);
-	if (IS_ERR(data->cmdreg))
-		return PTR_ERR(data->cmdreg);
-
-	switch (data->type) {
-	case smm465:
-	case smm665:
-		data->conversion_time = SMM665_ADC_WAIT_SMM665;
-		break;
-	case smm665c:
-	case smm764:
-	case smm766:
-		data->conversion_time = SMM665_ADC_WAIT_SMM766;
-		break;
-	}
-
-	ret = -ENODEV;
-	if (i2c_smbus_read_byte_data(data->cmdreg, SMM665_MISC8_CMD_STS) < 0)
-		goto out_unregister;
-
-	/*
-	 * Read limits.
-	 *
-	 * Limit registers start with register SMM665_LIMIT_BASE.
-	 * Each channel uses 8 registers, providing four limit values
-	 * per channel. Each limit value requires two registers, with the
-	 * high byte in the first register and the low byte in the second
-	 * register. The first two limits are under limit values, followed
-	 * by two over limit values.
-	 *
-	 * Limit register order matches the ADC register order, so we use
-	 * ADC register defines throughout the code to index limit registers.
-	 *
-	 * We save the first retrieved value both as "critical" and "alarm"
-	 * value. The second value overwrites either the critical or the
-	 * alarm value, depending on its configuration. This ensures that both
-	 * critical and alarm values are initialized, even if both registers are
-	 * configured as critical or non-critical.
-	 */
-	for (i = 0; i < SMM665_NUM_ADC; i++) {
-		int val;
-
-		val = smm665_read16(client, SMM665_LIMIT_BASE + i * 8);
-		if (unlikely(val < 0))
-			goto out_unregister;
-		data->critical_min_limit[i] = data->alarm_min_limit[i]
-		  = smm665_convert(val, i);
-		val = smm665_read16(client, SMM665_LIMIT_BASE + i * 8 + 2);
-		if (unlikely(val < 0))
-			goto out_unregister;
-		if (smm665_is_critical(val))
-			data->critical_min_limit[i] = smm665_convert(val, i);
-		else
-			data->alarm_min_limit[i] = smm665_convert(val, i);
-		val = smm665_read16(client, SMM665_LIMIT_BASE + i * 8 + 4);
-		if (unlikely(val < 0))
-			goto out_unregister;
-		data->critical_max_limit[i] = data->alarm_max_limit[i]
-		  = smm665_convert(val, i);
-		val = smm665_read16(client, SMM665_LIMIT_BASE + i * 8 + 6);
-		if (unlikely(val < 0))
-			goto out_unregister;
-		if (smm665_is_critical(val))
-			data->critical_max_limit[i] = smm665_convert(val, i);
-		else
-			data->alarm_max_limit[i] = smm665_convert(val, i);
-	}
-
-	hwmon_dev = devm_hwmon_device_register_with_groups(&client->dev,
-							   client->name, data,
-							   smm665_groups);
-	if (IS_ERR(hwmon_dev)) {
-		ret = PTR_ERR(hwmon_dev);
-		goto out_unregister;
-	}
-
-	return 0;
-
-out_unregister:
-	i2c_unregister_device(data->cmdreg);
-	return ret;
-}
-
-static void smm665_remove(struct i2c_client *client)
-{
-	struct smm665_data *data = i2c_get_clientdata(client);
-
-	i2c_unregister_device(data->cmdreg);
-}
-
-static const struct i2c_device_id smm665_id[] = {
-	{"smm465", smm465},
-	{"smm665", smm665},
-	{"smm665c", smm665c},
-	{"smm764", smm764},
-	{"smm766", smm766},
-	{}
-};
-
-MODULE_DEVICE_TABLE(i2c, smm665_id);
-
-/* This is the driver that will be inserted */
-static struct i2c_driver smm665_driver = {
-	.driver = {
-		   .name = "smm665",
-		   },
-	.probe = smm665_probe,
-	.remove = smm665_remove,
-	.id_table = smm665_id,
-};
-
-module_i2c_driver(smm665_driver);
-
-MODULE_AUTHOR("Guenter Roeck");
-MODULE_DESCRIPTION("SMM665 driver");
-MODULE_LICENSE("GPL");

From 3b7f4bde06daaff391a374fc27c8163b2847de34 Mon Sep 17 00:00:00 2001
From: Ahmad Khalifa <ahmad@khalifa.ws>
Date: Mon, 17 Jul 2023 21:10:51 +0100
Subject: [PATCH 1567/2047] hwmon: (nct6775) Increase and reorder ALARM/BEEP
 bits

* Increase available bits, IN: 16 to 24, FAN: 8 to 12,
  TEMP: 6 to 12
* Reorder alarm/beep definitions to match in order to allow
  additional inputs in the future
* Remove comments about 'unused' bits as probe() is a better
  reference

Testing note:
* Tested on nct6799 with IN/FAN/TEMP, and changing min/max/high/hyst,
  that triggers the corresponding alarms correctly. Good confirmation
  on the original mapping of the registers and masks.
  As to be expected, only 4 fans and 2 temps (fixed) have limits
  currently on nct6799 on my board.
* Trouble with testing intrusion alarms and beeps, no way to confirm
  those. As I understand now, intrusion/caseopen is probably not
  connected on my board.
  And I haven't seen a buzzer on a board in ages.

Signed-off-by: Ahmad Khalifa <ahmad@khalifa.ws>
Link: https://lore.kernel.org/r/20230717201050.1657809-1-ahmad@khalifa.ws
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/nct6775-core.c | 167 +++++++++++++++--------------------
 drivers/hwmon/nct6775.h      |  23 ++++-
 2 files changed, 92 insertions(+), 98 deletions(-)

diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c
index e0d1770806b7..fa0b7551ebf5 100644
--- a/drivers/hwmon/nct6775-core.c
+++ b/drivers/hwmon/nct6775-core.c
@@ -98,31 +98,23 @@ static const u16 NCT6775_REG_IN[] = {
 
 static const u16 NCT6775_REG_ALARM[NUM_REG_ALARM] = { 0x459, 0x45A, 0x45B };
 
-/* 0..15 voltages, 16..23 fans, 24..29 temperatures, 30..31 intrusion */
-
-static const s8 NCT6775_ALARM_BITS[] = {
-	0, 1, 2, 3, 8, 21, 20, 16,	/* in0.. in7 */
-	17, -1, -1, -1, -1, -1, -1,	/* in8..in14 */
-	-1,				/* unused */
-	6, 7, 11, -1, -1,		/* fan1..fan5 */
-	-1, -1, -1,			/* unused */
-	4, 5, 13, -1, -1, -1,		/* temp1..temp6 */
-	12, -1 };			/* intrusion0, intrusion1 */
+static const s8 NCT6775_ALARM_BITS[NUM_ALARM_BITS] = {
+	 0,  1,  2,  3,  8, 21, 20, 16, 17, -1, -1, -1,	  /* in0-in11     */
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	 6,  7, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	 4,  5, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	12, -1,						  /* intr0-intr1  */
+};
 
 static const u16 NCT6775_REG_BEEP[NUM_REG_BEEP] = { 0x56, 0x57, 0x453, 0x4e };
 
-/*
- * 0..14 voltages, 15 global beep enable, 16..23 fans, 24..29 temperatures,
- * 30..31 intrusion
- */
-static const s8 NCT6775_BEEP_BITS[] = {
-	0, 1, 2, 3, 8, 9, 10, 16,	/* in0.. in7 */
-	17, -1, -1, -1, -1, -1, -1,	/* in8..in14 */
-	21,				/* global beep enable */
-	6, 7, 11, 28, -1,		/* fan1..fan5 */
-	-1, -1, -1,			/* unused */
-	4, 5, 13, -1, -1, -1,		/* temp1..temp6 */
-	12, -1 };			/* intrusion0, intrusion1 */
+static const s8 NCT6775_BEEP_BITS[NUM_BEEP_BITS] = {
+	 0,  1,  2,  3,  8,  9, 10, 16, 17, -1, -1, -1,	  /* in0-in11     */
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	 6,  7, 11, 28, -1, -1, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	 4,  5, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	12, -1, 21					  /* intr0-intr1, beep_en */
+};
 
 /* DC or PWM output fan configuration */
 static const u8 NCT6775_REG_PWM_MODE[] = { 0x04, 0x04, 0x12 };
@@ -256,25 +248,23 @@ static const u16 NCT6775_REG_TSI_TEMP[] = { 0x669 };
 #define NCT6776_REG_FAN_STEP_UP_TIME NCT6775_REG_FAN_STEP_DOWN_TIME
 #define NCT6776_REG_FAN_STEP_DOWN_TIME NCT6775_REG_FAN_STEP_UP_TIME
 
-static const s8 NCT6776_ALARM_BITS[] = {
-	0, 1, 2, 3, 8, 21, 20, 16,	/* in0.. in7 */
-	17, -1, -1, -1, -1, -1, -1,	/* in8..in14 */
-	-1,				/* unused */
-	6, 7, 11, 10, 23,		/* fan1..fan5 */
-	-1, -1, -1,			/* unused */
-	4, 5, 13, -1, -1, -1,		/* temp1..temp6 */
-	12, 9 };			/* intrusion0, intrusion1 */
+static const s8 NCT6776_ALARM_BITS[NUM_ALARM_BITS] = {
+	 0,  1,  2,  3,  8, 21, 20, 16, 17, -1, -1, -1,	  /* in0-in11     */
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	 6,  7, 11, 10, 23, -1, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	 4,  5, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	12,  9,						  /* intr0-intr1  */
+};
 
 static const u16 NCT6776_REG_BEEP[NUM_REG_BEEP] = { 0xb2, 0xb3, 0xb4, 0xb5 };
 
-static const s8 NCT6776_BEEP_BITS[] = {
-	0, 1, 2, 3, 4, 5, 6, 7,		/* in0.. in7 */
-	8, -1, -1, -1, -1, -1, -1,	/* in8..in14 */
-	24,				/* global beep enable */
-	25, 26, 27, 28, 29,		/* fan1..fan5 */
-	-1, -1, -1,			/* unused */
-	16, 17, 18, 19, 20, 21,		/* temp1..temp6 */
-	30, 31 };			/* intrusion0, intrusion1 */
+static const s8 NCT6776_BEEP_BITS[NUM_BEEP_BITS] = {
+	 0,  1,  2,  3,  4,  5,  6,  7,  8, -1, -1, -1,	  /* in0-in11     */
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	25, 26, 27, 28, 29, -1, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	16, 17, 18, 19, 20, 21, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	30, 31, 24					  /* intr0-intr1, beep_en */
+};
 
 static const u16 NCT6776_REG_TOLERANCE_H[] = {
 	0x10c, 0x20c, 0x30c, 0x80c, 0x90c, 0xa0c, 0xb0c };
@@ -345,23 +335,21 @@ static const u16 NCT6779_REG_IN[] = {
 static const u16 NCT6779_REG_ALARM[NUM_REG_ALARM] = {
 	0x459, 0x45A, 0x45B, 0x568 };
 
-static const s8 NCT6779_ALARM_BITS[] = {
-	0, 1, 2, 3, 8, 21, 20, 16,	/* in0.. in7 */
-	17, 24, 25, 26, 27, 28, 29,	/* in8..in14 */
-	-1,				/* unused */
-	6, 7, 11, 10, 23,		/* fan1..fan5 */
-	-1, -1, -1,			/* unused */
-	4, 5, 13, -1, -1, -1,		/* temp1..temp6 */
-	12, 9 };			/* intrusion0, intrusion1 */
+static const s8 NCT6779_ALARM_BITS[NUM_ALARM_BITS] = {
+	 0,  1,  2,  3,  8, 21, 20, 16, 17, 24, 25, 26,	  /* in0-in11     */
+	27, 28, 29, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	 6,  7, 11, 10, 23, -1, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	 4,  5, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	12,  9,						  /* intr0-intr1  */
+};
 
-static const s8 NCT6779_BEEP_BITS[] = {
-	0, 1, 2, 3, 4, 5, 6, 7,		/* in0.. in7 */
-	8, 9, 10, 11, 12, 13, 14,	/* in8..in14 */
-	24,				/* global beep enable */
-	25, 26, 27, 28, 29,		/* fan1..fan5 */
-	-1, -1, -1,			/* unused */
-	16, 17, -1, -1, -1, -1,		/* temp1..temp6 */
-	30, 31 };			/* intrusion0, intrusion1 */
+static const s8 NCT6779_BEEP_BITS[NUM_BEEP_BITS] = {
+	 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,	  /* in0-in11     */
+	12, 13, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	25, 26, 27, 28, 29, -1, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	16, 17, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	30, 31, 24					  /* intr0-intr1, beep_en */
+};
 
 static const u16 NCT6779_REG_FAN[] = {
 	0x4c0, 0x4c2, 0x4c4, 0x4c6, 0x4c8, 0x4ca, 0x4ce };
@@ -449,14 +437,13 @@ static const u16 NCT6791_REG_WEIGHT_DUTY_BASE[NUM_FAN] = { 0, 0x23e };
 static const u16 NCT6791_REG_ALARM[NUM_REG_ALARM] = {
 	0x459, 0x45A, 0x45B, 0x568, 0x45D };
 
-static const s8 NCT6791_ALARM_BITS[] = {
-	0, 1, 2, 3, 8, 21, 20, 16,	/* in0.. in7 */
-	17, 24, 25, 26, 27, 28, 29,	/* in8..in14 */
-	-1,				/* unused */
-	6, 7, 11, 10, 23, 33,		/* fan1..fan6 */
-	-1, -1,				/* unused */
-	4, 5, 13, -1, -1, -1,		/* temp1..temp6 */
-	12, 9 };			/* intrusion0, intrusion1 */
+static const s8 NCT6791_ALARM_BITS[NUM_ALARM_BITS] = {
+	 0,  1,  2,  3,  8, 21, 20, 16, 17, 24, 25, 26,	  /* in0-in11     */
+	27, 28, 29, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	 6,  7, 11, 10, 23, 33, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	 4,  5, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	12,  9,						  /* intr0-intr1  */
+};
 
 /* NCT6792/NCT6793 specific data */
 
@@ -764,27 +751,23 @@ static const u16 NCT6106_REG_AUTO_PWM[] = { 0x164, 0x174, 0x184 };
 static const u16 NCT6106_REG_ALARM[NUM_REG_ALARM] = {
 	0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d };
 
-static const s8 NCT6106_ALARM_BITS[] = {
-	0, 1, 2, 3, 4, 5, 7, 8,		/* in0.. in7 */
-	9, -1, -1, -1, -1, -1, -1,	/* in8..in14 */
-	-1,				/* unused */
-	32, 33, 34, -1, -1,		/* fan1..fan5 */
-	-1, -1, -1,			/* unused */
-	16, 17, 18, 19, 20, 21,		/* temp1..temp6 */
-	48, -1				/* intrusion0, intrusion1 */
+static const s8 NCT6106_ALARM_BITS[NUM_ALARM_BITS] = {
+	 0,  1,  2,  3,  4,  5,  7,  8,  9, -1, -1, -1,	  /* in0-in11     */
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	32, 33, 34, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	16, 17, 18, 19, 20, 21, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	48, -1,						  /* intr0-intr1  */
 };
 
 static const u16 NCT6106_REG_BEEP[NUM_REG_BEEP] = {
 	0x3c0, 0x3c1, 0x3c2, 0x3c3, 0x3c4 };
 
-static const s8 NCT6106_BEEP_BITS[] = {
-	0, 1, 2, 3, 4, 5, 7, 8,		/* in0.. in7 */
-	9, 10, 11, 12, -1, -1, -1,	/* in8..in14 */
-	32,				/* global beep enable */
-	24, 25, 26, 27, 28,		/* fan1..fan5 */
-	-1, -1, -1,			/* unused */
-	16, 17, 18, 19, 20, 21,		/* temp1..temp6 */
-	34, -1				/* intrusion0, intrusion1 */
+static const s8 NCT6106_BEEP_BITS[NUM_BEEP_BITS] = {
+	 0,  1,  2,  3,  4,  5,  7,  8,  9, 10, 11, 12,	  /* in0-in11     */
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	24, 25, 26, 27, 28, -1, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	16, 17, 18, 19, 20, 21, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	34, -1, 32					  /* intr0-intr1, beep_en */
 };
 
 static const u16 NCT6106_REG_TEMP_ALTERNATE[32] = {
@@ -844,24 +827,20 @@ static const u16 NCT6116_REG_AUTO_TEMP[] = {
 static const u16 NCT6116_REG_AUTO_PWM[] = {
 	0x164, 0x174, 0x184, 0x1d4, 0x1e4 };
 
-static const s8 NCT6116_ALARM_BITS[] = {
-	0, 1, 2, 3, 4, 5, 7, 8,		/* in0.. in7 */
-	9, -1, -1, -1, -1, -1, -1,	/* in8..in9 */
-	-1,				/* unused */
-	32, 33, 34, 35, 36,		/* fan1..fan5 */
-	-1, -1, -1,			/* unused */
-	16, 17, 18, -1, -1, -1,		/* temp1..temp6 */
-	48, -1				/* intrusion0, intrusion1 */
+static const s8 NCT6116_ALARM_BITS[NUM_ALARM_BITS] = {
+	 0,  1,  2,  3,  4,  5,  7,  8,  9, -1, -1, -1,	  /* in0-in11     */
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	32, 33, 34, 35, 36, -1, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	16, 17, 18, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	48, -1,						  /* intr0-intr1  */
 };
 
-static const s8 NCT6116_BEEP_BITS[] = {
-	0, 1, 2, 3, 4, 5, 7, 8,		/* in0.. in7 */
-	9, 10, 11, 12, -1, -1, -1,	/* in8..in14 */
-	32,				/* global beep enable */
-	24, 25, 26, 27, 28,		/* fan1..fan5 */
-	-1, -1, -1,			/* unused */
-	16, 17, 18, -1, -1, -1,		/* temp1..temp6 */
-	34, -1				/* intrusion0, intrusion1 */
+static const s8 NCT6116_BEEP_BITS[NUM_BEEP_BITS] = {
+	 0,  1,  2,  3,  4,  5,  7,  8,  9, 10, 11, 12,	  /* in0-in11     */
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	24, 25, 26, 27, 28, -1, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	16, 17, 18, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	34, -1, 32					  /* intr0-intr1, beep_en */
 };
 
 static const u16 NCT6116_REG_TSI_TEMP[] = { 0x59, 0x5b };
diff --git a/drivers/hwmon/nct6775.h b/drivers/hwmon/nct6775.h
index a84c6ce7275d..c752bc7bbe72 100644
--- a/drivers/hwmon/nct6775.h
+++ b/drivers/hwmon/nct6775.h
@@ -239,10 +239,25 @@ nct6775_add_attr_group(struct nct6775_data *data, const struct attribute_group *
 
 #define NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE	0x28
 
-#define FAN_ALARM_BASE		16
-#define TEMP_ALARM_BASE		24
-#define INTRUSION_ALARM_BASE	30
-#define BEEP_ENABLE_BASE	15
+/*
+ * ALARM_BITS and BEEP_BITS store bit-index for the mask of the registers
+ * loaded into data->alarm and data->beep.
+ *
+ * Every input register (IN/TEMP/FAN) must have a corresponding
+ *   ALARM/BEEP bit at the same index BITS[BASE + index]
+ * Set value to -1 to disable the visibility of that '*_alarm' attribute and
+ * to pad the bits until the next BASE
+ *
+ * Beep has an additional GLOBAL_BEEP_ENABLE bit
+ */
+#define VIN_ALARM_BASE		 0
+#define FAN_ALARM_BASE		24
+#define TEMP_ALARM_BASE		36
+#define INTRUSION_ALARM_BASE	48
+#define BEEP_ENABLE_BASE	50
+
+#define NUM_ALARM_BITS		(INTRUSION_ALARM_BASE + 4)
+#define NUM_BEEP_BITS		(BEEP_ENABLE_BASE + 1)
 
 /*
  * Not currently used:

From 49ffb5eefebccf498fb2a45b46ff58d0b255bf97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joaqu=C3=ADn=20Ignacio=20Aramend=C3=ADa?=
 <samsagax@gmail.com>
Date: Mon, 17 Jul 2023 19:25:16 -0300
Subject: [PATCH 1568/2047] hwmon: (oxp-sensors) Move board detection to the
 init function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move detection logic to the start of init() function so we won't
instantiate the driver if the board is not compatible.

Signed-off-by: Joaquín Ignacio Aramendía <samsagax@gmail.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20230717222526.229984-3-samsagax@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/oxp-sensors.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/hwmon/oxp-sensors.c b/drivers/hwmon/oxp-sensors.c
index 1e1cc67bcdea..ea9602063eab 100644
--- a/drivers/hwmon/oxp-sensors.c
+++ b/drivers/hwmon/oxp-sensors.c
@@ -434,23 +434,9 @@ static const struct hwmon_chip_info oxp_ec_chip_info = {
 /* Initialization logic */
 static int oxp_platform_probe(struct platform_device *pdev)
 {
-	const struct dmi_system_id *dmi_entry;
 	struct device *dev = &pdev->dev;
 	struct device *hwdev;
 
-	/*
-	 * Have to check for AMD processor here because DMI strings are the
-	 * same between Intel and AMD boards, the only way to tell them apart
-	 * is the CPU.
-	 * Intel boards seem to have different EC registers and values to
-	 * read/write.
-	 */
-	dmi_entry = dmi_first_match(dmi_table);
-	if (!dmi_entry || boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
-		return -ENODEV;
-
-	board = (enum oxp_board)(unsigned long)dmi_entry->driver_data;
-
 	hwdev = devm_hwmon_device_register_with_info(dev, "oxpec", NULL,
 						     &oxp_ec_chip_info, NULL);
 
@@ -469,6 +455,21 @@ static struct platform_device *oxp_platform_device;
 
 static int __init oxp_platform_init(void)
 {
+	const struct dmi_system_id *dmi_entry;
+
+	/*
+	 * Have to check for AMD processor here because DMI strings are the
+	 * same between Intel and AMD boards, the only way to tell them apart
+	 * is the CPU.
+	 * Intel boards seem to have different EC registers and values to
+	 * read/write.
+	 */
+	dmi_entry = dmi_first_match(dmi_table);
+	if (!dmi_entry || boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		return -ENODEV;
+
+	board = (enum oxp_board)(unsigned long)dmi_entry->driver_data;
+
 	oxp_platform_device =
 		platform_create_bundle(&oxp_platform_driver,
 				       oxp_platform_probe, NULL, 0, NULL, 0);

From 4018e0a9c00131d8514015749f45f0578cc59c64 Mon Sep 17 00:00:00 2001
From: Frank Crawford <frank@crawford.emu.id.au>
Date: Fri, 7 Jul 2023 22:29:50 +1000
Subject: [PATCH 1569/2047] hwmon: (it87) Split temperature sensor detection to
 separate function

The temperature sensor type will need to be used in multiple places, so
split it out into its own function.

Signed-off-by: Frank Crawford <frank@crawford.emu.id.au>
Link: https://lore.kernel.org/r/20230707123005.956415-2-frank@crawford.emu.id.au
[groeck: Dropped unnecessary 'type' variable in show_temp_type()]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/it87.c | 47 +++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index 5deff5e5f693..f9703c4e3a9d 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -1159,28 +1159,43 @@ static SENSOR_DEVICE_ATTR_2(temp4_input, S_IRUGO, show_temp, NULL, 3, 0);
 static SENSOR_DEVICE_ATTR_2(temp5_input, S_IRUGO, show_temp, NULL, 4, 0);
 static SENSOR_DEVICE_ATTR_2(temp6_input, S_IRUGO, show_temp, NULL, 5, 0);
 
-static ssize_t show_temp_type(struct device *dev, struct device_attribute *attr,
-			      char *buf)
+static int get_temp_type(struct it87_data *data, int index)
 {
-	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-	struct it87_data *data = it87_update_device(dev);
+	/*
+	 * 2 is deprecated;
+	 * 3 = thermal diode;
+	 * 4 = thermistor;
+	 * 5 = AMDTSI;
+	 * 6 = Intel PECI;
+	 * 0 = disabled
+	 */
 	u8 reg, extra;
-
-	if (IS_ERR(data))
-		return PTR_ERR(data);
+	int type = 0;
 
 	reg = data->sensor;	/* In case value is updated while used */
 	extra = data->extra;
 
-	if ((has_temp_peci(data, nr) && (reg >> 6 == nr + 1)) ||
-	    (has_temp_old_peci(data, nr) && (extra & 0x80)))
-		return sprintf(buf, "6\n");  /* Intel PECI */
-	if (reg & (1 << nr))
-		return sprintf(buf, "3\n");  /* thermal diode */
-	if (reg & (8 << nr))
-		return sprintf(buf, "4\n");  /* thermistor */
-	return sprintf(buf, "0\n");      /* disabled */
+	if ((has_temp_peci(data, index) && (reg >> 6 == index + 1)) ||
+	    (has_temp_old_peci(data, index) && (extra & 0x80)))
+		type = 6;	/* Intel PECI */
+	else if (reg & BIT(index))
+		type = 3;	/* thermal diode */
+	else if (reg & BIT(index + 3))
+		type = 4;	/* thermistor */
+
+	return type;
+}
+
+static ssize_t show_temp_type(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+	struct it87_data *data = it87_update_device(dev);
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return sprintf(buf, "%d\n", get_temp_type(data, sensor_attr->index));
 }
 
 static ssize_t set_temp_type(struct device *dev, struct device_attribute *attr,

From 2f60e5932942d923258b37d24d147cb7d765df3d Mon Sep 17 00:00:00 2001
From: Frank Crawford <frank@crawford.emu.id.au>
Date: Fri, 7 Jul 2023 22:29:51 +1000
Subject: [PATCH 1570/2047] hwmon: (it87) Improve temperature reporting support

Add test if thermistor sensor type attribute should be visible, i.e.
test if the attribute is defined.

Signed-off-by: Frank Crawford <frank@crawford.emu.id.au>
Link: https://lore.kernel.org/r/20230707123005.956415-3-frank@crawford.emu.id.au
[groeck: Dropped unnecessary 'type' variable in it87_temp_is_visible()]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/it87.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index f9703c4e3a9d..36cc7a2b56b5 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -2328,6 +2328,12 @@ static umode_t it87_temp_is_visible(struct kobject *kobj,
 	if (!(data->has_temp & BIT(i)))
 		return 0;
 
+	if (a == 3) {
+		if (get_temp_type(data, i) == 0)
+			return 0;
+		return attr->mode;
+	}
+
 	if (a == 5 && !has_temp_offset(data))
 		return 0;
 

From 6593eac899202eedf328247d6f1818453c1b49ac Mon Sep 17 00:00:00 2001
From: Frank Crawford <frank@crawford.emu.id.au>
Date: Fri, 7 Jul 2023 22:29:52 +1000
Subject: [PATCH 1571/2047] hwmon: (it87) Add support to detect sensor type
 AMDTSI

Add test for sensor type AMDTSI which is available on certain recent
chipsets.

Signed-off-by: Frank Crawford <frank@crawford.emu.id.au>
Link: https://lore.kernel.org/r/20230707123005.956415-4-frank@crawford.emu.id.au
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/it87.c | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index 36cc7a2b56b5..fbe86cec6055 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -221,6 +221,10 @@ static bool fix_pwm_polarity;
  * Super-I/O configuration space.
  */
 #define IT87_REG_VID           0x0a
+
+/* Interface Selection register on other chips */
+#define IT87_REG_IFSEL         0x0a
+
 /*
  * The IT8705F and IT8712F earlier than revision 0x08 use register 0x0b
  * for fan divisors. Later IT8712F revisions must use 16-bit tachometer
@@ -1170,14 +1174,37 @@ static int get_temp_type(struct it87_data *data, int index)
 	 * 0 = disabled
 	 */
 	u8 reg, extra;
-	int type = 0;
+	int ttype, type = 0;
 
-	reg = data->sensor;	/* In case value is updated while used */
-	extra = data->extra;
+	/* Detect PECI vs. AMDTSI */
+	ttype = 6;
+	if ((has_temp_peci(data, index)) || data->type == it8721 ||
+	    data->type == it8720) {
+		extra = it87_read_value(data, IT87_REG_IFSEL);
+		if ((extra & 0x70) == 0x40)
+			ttype = 5;
+	}
+
+	reg = it87_read_value(data, IT87_REG_TEMP_ENABLE);
+
+	/* Per chip special detection */
+	switch (data->type) {
+	case it8622:
+		if (!(reg & 0xc0) && index == 3)
+			type = ttype;
+		break;
+	default:
+		break;
+	}
+
+	if (type || index >= 3)
+		return type;
+
+	extra = it87_read_value(data, IT87_REG_TEMP_EXTRA);
 
 	if ((has_temp_peci(data, index) && (reg >> 6 == index + 1)) ||
 	    (has_temp_old_peci(data, index) && (extra & 0x80)))
-		type = 6;	/* Intel PECI */
+		type = ttype;	/* Intel PECI or AMDTSI */
 	else if (reg & BIT(index))
 		type = 3;	/* thermal diode */
 	else if (reg & BIT(index + 3))

From b4389ee5274535fd22966a188a048624fe700b70 Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Fri, 14 Jul 2023 15:51:09 +0200
Subject: [PATCH 1572/2047] hwmon: (pmbus/mp2975) Fix whitespace error

Fix whitespace error reported by checkpatch.pl

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230714135124.2645339-1-Naresh.Solanki@9elements.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/mp2975.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c
index 2109b0458a8b..130cfde52e42 100644
--- a/drivers/hwmon/pmbus/mp2975.c
+++ b/drivers/hwmon/pmbus/mp2975.c
@@ -600,7 +600,7 @@ mp2975_vout_ov_scale_get(struct i2c_client *client, struct mp2975_data *data,
 	if (ret < 0)
 		return ret;
 	thres_dev = ret & MP2975_PRT_THRES_DIV_OV_EN ? MP2975_PROT_DEV_OV_ON :
-	                                               MP2975_PROT_DEV_OV_OFF;
+						       MP2975_PROT_DEV_OV_OFF;
 
 	/* Select the gain of remote sense amplifier. */
 	ret = i2c_smbus_read_word_data(client, PMBUS_VOUT_SCALE_LOOP);

From 99a368cfab265a4ecd5643feca5470c6d3c829f2 Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Fri, 14 Jul 2023 15:51:10 +0200
Subject: [PATCH 1573/2047] dt-bindings: trivial-devices: Add MPS MP2971 and
 MP2973

Add Monolithic Power Systems MP2971 & MP2973 to trivial devices.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230714135124.2645339-2-Naresh.Solanki@9elements.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/trivial-devices.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index ba2bfb547909..6e24c4d25ec3 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -119,6 +119,10 @@ properties:
           - fsl,mpr121
             # Monolithic Power Systems Inc. multi-phase controller mp2888
           - mps,mp2888
+            # Monolithic Power Systems Inc. multi-phase controller mp2971
+          - mps,mp2971
+            # Monolithic Power Systems Inc. multi-phase controller mp2973
+          - mps,mp2973
             # Monolithic Power Systems Inc. multi-phase controller mp2975
           - mps,mp2975
             # Honeywell Humidicon HIH-6130 humidity/temperature sensor

From 1f6f34d08a95b953c58e1c7388a787e78d819c83 Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Fri, 14 Jul 2023 15:51:11 +0200
Subject: [PATCH 1574/2047] hwmon: (pmbus/mp2975) Prepare for MP2973 and MP2971

Add support for differntiating between the chips.
The following commits will make use of this mechanism.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230714135124.2645339-3-Naresh.Solanki@9elements.com
[groeck: double-cast of_device_get_match_data() to make gcc happy]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/mp2975.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c
index 130cfde52e42..047f040451ee 100644
--- a/drivers/hwmon/pmbus/mp2975.c
+++ b/drivers/hwmon/pmbus/mp2975.c
@@ -10,6 +10,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include "pmbus.h"
 
 /* Vendor specific registers. */
@@ -56,8 +57,13 @@
 				 PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT | \
 				 PMBUS_HAVE_POUT | PMBUS_PHASE_VIRTUAL)
 
+enum chips {
+	mp2975
+};
+
 struct mp2975_data {
 	struct pmbus_driver_info info;
+	enum chips chip_id;
 	int vout_scale;
 	int vid_step[MP2975_PAGE_NUM];
 	int vref[MP2975_PAGE_NUM];
@@ -68,6 +74,13 @@ struct mp2975_data {
 	int curr_sense_gain[MP2975_PAGE_NUM];
 };
 
+static const struct i2c_device_id mp2975_id[] = {
+	{"mp2975", mp2975},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, mp2975_id);
+
 #define to_mp2975_data(x)  container_of(x, struct mp2975_data, info)
 
 static int mp2975_read_byte_data(struct i2c_client *client, int page, int reg)
@@ -691,6 +704,11 @@ static int mp2975_probe(struct i2c_client *client)
 	if (!data)
 		return -ENOMEM;
 
+	if (client->dev.of_node)
+		data->chip_id = (enum chips)(unsigned long)of_device_get_match_data(&client->dev);
+	else
+		data->chip_id = i2c_match_id(mp2975_id, client)->driver_data;
+
 	memcpy(&data->info, &mp2975_info, sizeof(*info));
 	info = &data->info;
 
@@ -739,15 +757,8 @@ static int mp2975_probe(struct i2c_client *client)
 	return pmbus_do_probe(client, info);
 }
 
-static const struct i2c_device_id mp2975_id[] = {
-	{"mp2975", 0},
-	{}
-};
-
-MODULE_DEVICE_TABLE(i2c, mp2975_id);
-
 static const struct of_device_id __maybe_unused mp2975_of_match[] = {
-	{.compatible = "mps,mp2975"},
+	{.compatible = "mps,mp2975", .data = (void *)mp2975},
 	{}
 };
 MODULE_DEVICE_TABLE(of, mp2975_of_match);

From 1feb31e810b0634d962920b7c2ccb54d2817ba56 Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Fri, 14 Jul 2023 15:51:12 +0200
Subject: [PATCH 1575/2047] hwmon: (pmbus/mp2975) Simplify VOUT code

In order to upstream MP2973/MP2971 simplify the code by removing support
for various VOUT formats. The MP2973 and MP2971 supports all PMBUS
supported formats for VOUT, while the MP2975 only support DIRECT and
VID for VOUT.

In DIRECT mode all chips report the voltage in 1mV/LSB.

Configure the chip to use DIRECT format for VOUT and drop the code
conversion code for other formats. The to be added chips MP2973/MP2971
will be configured to also report VOUT in DIRECT format.

The maximum voltage that can be reported in DIRECT format is 32768mV.
This is sufficient as the maximum output voltage for VR12/VR13 is
3040 mV.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230714135124.2645339-4-Naresh.Solanki@9elements.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/mp2975.c | 60 ++++++------------------------------
 1 file changed, 10 insertions(+), 50 deletions(-)

diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c
index 047f040451ee..82f9d972762a 100644
--- a/drivers/hwmon/pmbus/mp2975.c
+++ b/drivers/hwmon/pmbus/mp2975.c
@@ -70,7 +70,6 @@ struct mp2975_data {
 	int vref_off[MP2975_PAGE_NUM];
 	int vout_max[MP2975_PAGE_NUM];
 	int vout_ov_fixed[MP2975_PAGE_NUM];
-	int vout_format[MP2975_PAGE_NUM];
 	int curr_sense_gain[MP2975_PAGE_NUM];
 };
 
@@ -83,22 +82,6 @@ MODULE_DEVICE_TABLE(i2c, mp2975_id);
 
 #define to_mp2975_data(x)  container_of(x, struct mp2975_data, info)
 
-static int mp2975_read_byte_data(struct i2c_client *client, int page, int reg)
-{
-	switch (reg) {
-	case PMBUS_VOUT_MODE:
-		/*
-		 * Enforce VOUT direct format, since device allows to set the
-		 * different formats for the different rails. Conversion from
-		 * VID to direct provided by driver internally, in case it is
-		 * necessary.
-		 */
-		return PB_VOUT_MODE_DIRECT;
-	default:
-		return -ENODATA;
-	}
-}
-
 static int
 mp2975_read_word_helper(struct i2c_client *client, int page, int phase, u8 reg,
 			u16 mask)
@@ -273,24 +256,6 @@ static int mp2975_read_word_data(struct i2c_client *client, int page,
 		ret = DIV_ROUND_CLOSEST(data->vref[page] * 10 - 50 *
 					(ret + 1) * data->vout_scale, 10);
 		break;
-	case PMBUS_READ_VOUT:
-		ret = mp2975_read_word_helper(client, page, phase, reg,
-					      GENMASK(11, 0));
-		if (ret < 0)
-			return ret;
-
-		/*
-		 * READ_VOUT can be provided in VID or direct format. The
-		 * format type is specified by bit 15 of the register
-		 * MP2975_MFR_DC_LOOP_CTRL. The driver enforces VOUT direct
-		 * format, since device allows to set the different formats for
-		 * the different rails and also all VOUT limits registers are
-		 * provided in a direct format. In case format is VID - convert
-		 * to direct.
-		 */
-		if (data->vout_format[page] == vid)
-			ret = mp2975_vid2direct(info->vrm_version[page], ret);
-		break;
 	case PMBUS_VIRT_READ_POUT_MAX:
 		ret = mp2975_read_word_helper(client, page, phase,
 					      MP2975_MFR_READ_POUT_PK,
@@ -578,20 +543,20 @@ mp2975_vout_max_get(struct i2c_client *client, struct mp2975_data *data,
 }
 
 static int
-mp2975_identify_vout_format(struct i2c_client *client,
-			    struct mp2975_data *data, int page)
+mp2975_set_vout_format(struct i2c_client *client,
+		       struct mp2975_data *data, int page)
 {
 	int ret;
 
 	ret = i2c_smbus_read_word_data(client, MP2975_MFR_DC_LOOP_CTRL);
 	if (ret < 0)
 		return ret;
-
-	if (ret & MP2975_VOUT_FORMAT)
-		data->vout_format[page] = vid;
-	else
-		data->vout_format[page] = direct;
-	return 0;
+	/* Enable DIRECT VOUT format 1mV/LSB */
+	if (ret & MP2975_VOUT_FORMAT) {
+		ret &= ~MP2975_VOUT_FORMAT;
+		ret = i2c_smbus_write_word_data(client, MP2975_MFR_DC_LOOP_CTRL, ret);
+	}
+	return ret;
 }
 
 static int
@@ -649,12 +614,8 @@ mp2975_vout_per_rail_config_get(struct i2c_client *client,
 		if (ret < 0)
 			return ret;
 
-		/*
-		 * Get VOUT format for READ_VOUT command : VID or direct.
-		 * Pages on same device can be configured with different
-		 * formats.
-		 */
-		ret = mp2975_identify_vout_format(client, data, i);
+		/* Set VOUT format for READ_VOUT command : direct. */
+		ret = mp2975_set_vout_format(client, data, i);
 		if (ret < 0)
 			return ret;
 
@@ -689,7 +650,6 @@ static struct pmbus_driver_info mp2975_info = {
 		PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT |
 		PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP | PMBUS_HAVE_POUT |
 		PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT | PMBUS_PHASE_VIRTUAL,
-	.read_byte_data = mp2975_read_byte_data,
 	.read_word_data = mp2975_read_word_data,
 };
 

From e2c90b481d1de049dba7fbacf176052c49e83a9c Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Fri, 14 Jul 2023 15:51:13 +0200
Subject: [PATCH 1576/2047] hwmon: (pmbus/mp2975) Make phase count variable

In order to add support for MP2973 and MP2971 replace hardcoded
phase count for both channels by a variable.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230714135124.2645339-5-Naresh.Solanki@9elements.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/mp2975.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c
index 82f9d972762a..6a97d46ea5e8 100644
--- a/drivers/hwmon/pmbus/mp2975.c
+++ b/drivers/hwmon/pmbus/mp2975.c
@@ -61,10 +61,15 @@ enum chips {
 	mp2975
 };
 
+static const int mp2975_max_phases[][MP2975_PAGE_NUM] = {
+	[mp2975] = { MP2975_MAX_PHASE_RAIL1, MP2975_MAX_PHASE_RAIL2 },
+};
+
 struct mp2975_data {
 	struct pmbus_driver_info info;
 	enum chips chip_id;
 	int vout_scale;
+	int max_phases[MP2975_PAGE_NUM];
 	int vid_step[MP2975_PAGE_NUM];
 	int vref[MP2975_PAGE_NUM];
 	int vref_off[MP2975_PAGE_NUM];
@@ -304,25 +309,25 @@ static int mp2975_read_word_data(struct i2c_client *client, int page,
 	return ret;
 }
 
-static int mp2975_identify_multiphase_rail2(struct i2c_client *client)
+static int mp2975_identify_multiphase_rail2(struct i2c_client *client,
+					    struct mp2975_data *data)
 {
 	int ret;
 
 	/*
-	 * Identify multiphase for rail 2 - could be from 0 to 4.
+	 * Identify multiphase for rail 2 - could be from 0 to data->max_phases[1].
 	 * In case phase number is zero – only page zero is supported
 	 */
 	ret = i2c_smbus_write_byte_data(client, PMBUS_PAGE, 2);
 	if (ret < 0)
 		return ret;
 
-	/* Identify multiphase for rail 2 - could be from 0 to 4. */
 	ret = i2c_smbus_read_word_data(client, MP2975_MFR_VR_MULTI_CONFIG_R2);
 	if (ret < 0)
 		return ret;
 
 	ret &= GENMASK(2, 0);
-	return (ret >= 4) ? 4 : ret;
+	return (ret >= data->max_phases[1]) ? data->max_phases[1] : ret;
 }
 
 static void mp2975_set_phase_rail1(struct pmbus_driver_info *info)
@@ -353,7 +358,7 @@ mp2975_identify_multiphase(struct i2c_client *client, struct mp2975_data *data,
 	if (ret < 0)
 		return ret;
 
-	/* Identify multiphase for rail 1 - could be from 1 to 8. */
+	/* Identify multiphase for rail 1 - could be from 1 to data->max_phases[0]. */
 	ret = i2c_smbus_read_word_data(client, MP2975_MFR_VR_MULTI_CONFIG_R1);
 	if (ret <= 0)
 		return ret;
@@ -361,19 +366,19 @@ mp2975_identify_multiphase(struct i2c_client *client, struct mp2975_data *data,
 	info->phases[0] = ret & GENMASK(3, 0);
 
 	/*
-	 * The device provides a total of 8 PWM pins, and can be configured
+	 * The device provides a total of $n PWM pins, and can be configured
 	 * to different phase count applications for rail 1 and rail 2.
-	 * Rail 1 can be set to 8 phases, while rail 2 can only be set to 4
-	 * phases at most. When rail 1’s phase count is configured as 0, rail
+	 * Rail 1 can be set to $n phases, while rail 2 can be set to less than
+	 * that. When rail 1’s phase count is configured as 0, rail
 	 * 1 operates with 1-phase DCM. When rail 2 phase count is configured
 	 * as 0, rail 2 is disabled.
 	 */
-	if (info->phases[0] > MP2975_MAX_PHASE_RAIL1)
+	if (info->phases[0] > data->max_phases[0])
 		return -EINVAL;
 
 	mp2975_set_phase_rail1(info);
-	num_phases2 = min(MP2975_MAX_PHASE_RAIL1 - info->phases[0],
-			  MP2975_MAX_PHASE_RAIL2);
+	num_phases2 = min(data->max_phases[0] - info->phases[0],
+			  data->max_phases[1]);
 	if (info->phases[1] && info->phases[1] <= num_phases2)
 		mp2975_set_phase_rail2(info, num_phases2);
 
@@ -669,11 +674,13 @@ static int mp2975_probe(struct i2c_client *client)
 	else
 		data->chip_id = i2c_match_id(mp2975_id, client)->driver_data;
 
-	memcpy(&data->info, &mp2975_info, sizeof(*info));
+	memcpy(data->max_phases, mp2975_max_phases[data->chip_id],
+	       sizeof(data->max_phases));
+
 	info = &data->info;
 
 	/* Identify multiphase configuration for rail 2. */
-	ret = mp2975_identify_multiphase_rail2(client);
+	ret = mp2975_identify_multiphase_rail2(client, data);
 	if (ret < 0)
 		return ret;
 

From 5239277ef41086cf4173c62de2fe3dda2cd9c777 Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Fri, 14 Jul 2023 15:51:14 +0200
Subject: [PATCH 1577/2047] hwmon: (pmbus/mp2975) Add support for MP2971 and
 MP2973

Add support for MP2971 and MP2973, the successor of MP2975.

The major differences are:
 - On MP2973 and MP2971 the Vref cannot be read and thus most of
   the OVP/current calculations won't work.
 - MP2973 and MP2971 also support LINEAR format for VOUT
 - MP2973 and MP2971 do not support OVP2
 - On MP2973 and MP2971 most registers are in LINEAR format
 - The IMVP9_EN bit has a different position
 - Per phase current sense haven't been implemented.

As on MP2975 most of the FAULT_LIMIT and WARN_LIMIT registers
have been redefined and doesn't provide the functionality as
defined in PMBUS spec.

Tested on MP2973 and MP2971.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230714135124.2645339-6-Naresh.Solanki@9elements.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/mp2975.c | 247 ++++++++++++++++++++++++++++++-----
 1 file changed, 214 insertions(+), 33 deletions(-)

diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c
index 6a97d46ea5e8..634369ebec43 100644
--- a/drivers/hwmon/pmbus/mp2975.c
+++ b/drivers/hwmon/pmbus/mp2975.c
@@ -35,6 +35,8 @@
 #define MP2975_MFR_OVP_TH_SET		0xe5
 #define MP2975_MFR_UVP_SET		0xe6
 
+#define MP2973_MFR_RESO_SET		0xc7
+
 #define MP2975_VOUT_FORMAT		BIT(15)
 #define MP2975_VID_STEP_SEL_R1		BIT(4)
 #define MP2975_IMVP9_EN_R1		BIT(13)
@@ -49,8 +51,32 @@
 #define MP2975_SENSE_AMPL_HALF		2
 #define MP2975_VIN_UV_LIMIT_UNIT	8
 
+#define MP2973_VOUT_FORMAT_R1		GENMASK(7, 6)
+#define MP2973_VOUT_FORMAT_R2		GENMASK(4, 3)
+#define MP2973_VOUT_FORMAT_DIRECT_R1	BIT(7)
+#define MP2973_VOUT_FORMAT_LINEAR_R1	BIT(6)
+#define MP2973_VOUT_FORMAT_DIRECT_R2	BIT(4)
+#define MP2973_VOUT_FORMAT_LINEAR_R2	BIT(3)
+
+#define MP2973_MFR_VR_MULTI_CONFIG_R1	0x0d
+#define MP2973_MFR_VR_MULTI_CONFIG_R2	0x1d
+#define MP2973_VID_STEP_SEL_R1		BIT(4)
+#define MP2973_IMVP9_EN_R1		BIT(14)
+#define MP2973_VID_STEP_SEL_R2		BIT(3)
+#define MP2973_IMVP9_EN_R2		BIT(13)
+
+#define MP2973_MFR_READ_IOUT_PK		0x90
+#define MP2973_MFR_READ_POUT_PK		0x91
+
 #define MP2975_MAX_PHASE_RAIL1	8
 #define MP2975_MAX_PHASE_RAIL2	4
+
+#define MP2973_MAX_PHASE_RAIL1	14
+#define MP2973_MAX_PHASE_RAIL2	6
+
+#define MP2971_MAX_PHASE_RAIL1	8
+#define MP2971_MAX_PHASE_RAIL2	3
+
 #define MP2975_PAGE_NUM		2
 
 #define MP2975_RAIL2_FUNC	(PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT | \
@@ -58,11 +84,13 @@
 				 PMBUS_HAVE_POUT | PMBUS_PHASE_VIRTUAL)
 
 enum chips {
-	mp2975
+	mp2971, mp2973, mp2975
 };
 
 static const int mp2975_max_phases[][MP2975_PAGE_NUM] = {
 	[mp2975] = { MP2975_MAX_PHASE_RAIL1, MP2975_MAX_PHASE_RAIL2 },
+	[mp2973] = { MP2973_MAX_PHASE_RAIL1, MP2973_MAX_PHASE_RAIL2 },
+	[mp2971] = { MP2971_MAX_PHASE_RAIL1, MP2971_MAX_PHASE_RAIL2 },
 };
 
 struct mp2975_data {
@@ -79,6 +107,8 @@ struct mp2975_data {
 };
 
 static const struct i2c_device_id mp2975_id[] = {
+	{"mp2971", mp2971},
+	{"mp2973", mp2973},
 	{"mp2975", mp2975},
 	{}
 };
@@ -215,6 +245,76 @@ mp2975_read_phases(struct i2c_client *client, struct mp2975_data *data,
 	return ret;
 }
 
+static int mp2973_read_word_data(struct i2c_client *client, int page,
+				 int phase, int reg)
+{
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct mp2975_data *data = to_mp2975_data(info);
+	int ret;
+
+	switch (reg) {
+	case PMBUS_OT_FAULT_LIMIT:
+		ret = mp2975_read_word_helper(client, page, phase, reg,
+					      GENMASK(7, 0));
+		break;
+	case PMBUS_VIN_OV_FAULT_LIMIT:
+		ret = mp2975_read_word_helper(client, page, phase, reg,
+					      GENMASK(7, 0));
+		if (ret < 0)
+			return ret;
+
+		ret = DIV_ROUND_CLOSEST(ret, MP2975_VIN_UV_LIMIT_UNIT);
+		break;
+	case PMBUS_VOUT_OV_FAULT_LIMIT:
+		/*
+		 * MP2971 and mp2973 only supports tracking (ovp1) mode.
+		 */
+		ret = mp2975_read_word_helper(client, page, phase,
+					      MP2975_MFR_OVP_TH_SET,
+					      GENMASK(2, 0));
+		if (ret < 0)
+			return ret;
+
+		ret = data->vout_max[page] + 50 * (ret + 1);
+		break;
+	case PMBUS_VOUT_UV_FAULT_LIMIT:
+		ret = mp2975_read_word_helper(client, page, phase, reg,
+					      GENMASK(8, 0));
+		if (ret < 0)
+			return ret;
+		ret = mp2975_vid2direct(info->vrm_version[page], ret);
+		break;
+	case PMBUS_VIRT_READ_POUT_MAX:
+		ret = pmbus_read_word_data(client, page, phase,
+					   MP2973_MFR_READ_POUT_PK);
+		break;
+	case PMBUS_VIRT_READ_IOUT_MAX:
+		ret = pmbus_read_word_data(client, page, phase,
+					   MP2973_MFR_READ_IOUT_PK);
+		break;
+	case PMBUS_UT_WARN_LIMIT:
+	case PMBUS_UT_FAULT_LIMIT:
+	case PMBUS_VIN_UV_WARN_LIMIT:
+	case PMBUS_VIN_UV_FAULT_LIMIT:
+	case PMBUS_VOUT_UV_WARN_LIMIT:
+	case PMBUS_VOUT_OV_WARN_LIMIT:
+	case PMBUS_VIN_OV_WARN_LIMIT:
+	case PMBUS_IIN_OC_FAULT_LIMIT:
+	case PMBUS_IOUT_OC_LV_FAULT_LIMIT:
+	case PMBUS_IOUT_OC_WARN_LIMIT:
+	case PMBUS_IOUT_OC_FAULT_LIMIT:
+	case PMBUS_IOUT_UC_FAULT_LIMIT:
+	case PMBUS_POUT_OP_FAULT_LIMIT:
+	case PMBUS_POUT_OP_WARN_LIMIT:
+	case PMBUS_PIN_OP_WARN_LIMIT:
+		return -ENXIO;
+	default:
+		return -ENODATA;
+	}
+
+	return ret;
+}
+
 static int mp2975_read_word_data(struct i2c_client *client, int page,
 				 int phase, int reg)
 {
@@ -434,6 +534,35 @@ mp2975_identify_rails_vid(struct i2c_client *client, struct mp2975_data *data,
 					  MP2975_MFR_VR_MULTI_CONFIG_R2, 1,
 					  MP2975_IMVP9_EN_R2,
 					  MP2975_VID_STEP_SEL_R2);
+
+	return ret;
+}
+
+static int
+mp2973_identify_rails_vid(struct i2c_client *client, struct mp2975_data *data,
+			  struct pmbus_driver_info *info)
+{
+	int ret;
+
+	ret = i2c_smbus_write_byte_data(client, PMBUS_PAGE, 2);
+	if (ret < 0)
+		return ret;
+
+	/* Identify VID mode for rail 1. */
+	ret = mp2975_identify_vid(client, data, info,
+				  MP2973_MFR_VR_MULTI_CONFIG_R1, 0,
+				  MP2973_IMVP9_EN_R1, MP2973_VID_STEP_SEL_R1);
+
+	if (ret < 0)
+		return ret;
+
+	/* Identify VID mode for rail 2, if connected. */
+	if (info->phases[1])
+		ret = mp2975_identify_vid(client, data, info,
+					  MP2973_MFR_VR_MULTI_CONFIG_R2, 1,
+					  MP2973_IMVP9_EN_R2,
+					  MP2973_VID_STEP_SEL_R2);
+
 	return ret;
 }
 
@@ -551,15 +680,32 @@ static int
 mp2975_set_vout_format(struct i2c_client *client,
 		       struct mp2975_data *data, int page)
 {
-	int ret;
+	int ret, i;
 
-	ret = i2c_smbus_read_word_data(client, MP2975_MFR_DC_LOOP_CTRL);
-	if (ret < 0)
-		return ret;
 	/* Enable DIRECT VOUT format 1mV/LSB */
-	if (ret & MP2975_VOUT_FORMAT) {
-		ret &= ~MP2975_VOUT_FORMAT;
-		ret = i2c_smbus_write_word_data(client, MP2975_MFR_DC_LOOP_CTRL, ret);
+	if (data->chip_id == mp2975) {
+		ret = i2c_smbus_read_word_data(client, MP2975_MFR_DC_LOOP_CTRL);
+		if (ret < 0)
+			return ret;
+		if (ret & MP2975_VOUT_FORMAT) {
+			ret &= ~MP2975_VOUT_FORMAT;
+			ret = i2c_smbus_write_word_data(client, MP2975_MFR_DC_LOOP_CTRL, ret);
+		}
+	} else {
+		ret = i2c_smbus_read_word_data(client, MP2973_MFR_RESO_SET);
+		if (ret < 0)
+			return ret;
+		i = ret;
+
+		if (page == 0) {
+			i &= ~MP2973_VOUT_FORMAT_R1;
+			i |= MP2973_VOUT_FORMAT_DIRECT_R1;
+		} else {
+			i &= ~MP2973_VOUT_FORMAT_R2;
+			i |= MP2973_VOUT_FORMAT_DIRECT_R2;
+		}
+		if (i != ret)
+			ret = i2c_smbus_write_word_data(client, MP2973_MFR_RESO_SET, i);
 	}
 	return ret;
 }
@@ -607,10 +753,10 @@ mp2975_vout_per_rail_config_get(struct i2c_client *client,
 	for (i = 0; i < data->info.pages; i++) {
 		ret = i2c_smbus_write_byte_data(client, PMBUS_PAGE, i);
 		if (ret < 0)
-			return ret;
+			continue;
 
-		/* Obtain voltage reference offsets. */
-		ret = mp2975_vref_offset_get(client, data, i);
+		/* Set VOUT format for READ_VOUT command : direct. */
+		ret = mp2975_set_vout_format(client, data, i);
 		if (ret < 0)
 			return ret;
 
@@ -619,8 +765,12 @@ mp2975_vout_per_rail_config_get(struct i2c_client *client,
 		if (ret < 0)
 			return ret;
 
-		/* Set VOUT format for READ_VOUT command : direct. */
-		ret = mp2975_set_vout_format(client, data, i);
+		/* Skip if reading Vref is unsupported */
+		if (data->chip_id != mp2975)
+			continue;
+
+		/* Obtain voltage reference offsets. */
+		ret = mp2975_vref_offset_get(client, data, i);
 		if (ret < 0)
 			return ret;
 
@@ -658,6 +808,23 @@ static struct pmbus_driver_info mp2975_info = {
 	.read_word_data = mp2975_read_word_data,
 };
 
+static struct pmbus_driver_info mp2973_info = {
+	.pages = 1,
+	.format[PSC_VOLTAGE_IN] = linear,
+	.format[PSC_VOLTAGE_OUT] = direct,
+	.format[PSC_TEMPERATURE] = linear,
+	.format[PSC_CURRENT_IN] = linear,
+	.format[PSC_CURRENT_OUT] = linear,
+	.format[PSC_POWER] = linear,
+	.m[PSC_VOLTAGE_OUT] = 1,
+	.R[PSC_VOLTAGE_OUT] = 3,
+	.func[0] = PMBUS_HAVE_VIN | PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT |
+		PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT |
+		PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP | PMBUS_HAVE_POUT |
+		PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT,
+	.read_word_data = mp2973_read_word_data,
+};
+
 static int mp2975_probe(struct i2c_client *client)
 {
 	struct pmbus_driver_info *info;
@@ -677,6 +844,11 @@ static int mp2975_probe(struct i2c_client *client)
 	memcpy(data->max_phases, mp2975_max_phases[data->chip_id],
 	       sizeof(data->max_phases));
 
+	if (data->chip_id == mp2975)
+		memcpy(&data->info, &mp2975_info, sizeof(*info));
+	else
+		memcpy(&data->info, &mp2973_info, sizeof(*info));
+
 	info = &data->info;
 
 	/* Identify multiphase configuration for rail 2. */
@@ -691,30 +863,37 @@ static int mp2975_probe(struct i2c_client *client)
 		data->info.func[1] = MP2975_RAIL2_FUNC;
 	}
 
-	/* Identify multiphase configuration. */
-	ret = mp2975_identify_multiphase(client, data, info);
-	if (ret)
-		return ret;
+	if (data->chip_id == mp2975) {
+		/* Identify multiphase configuration. */
+		ret = mp2975_identify_multiphase(client, data, info);
+		if (ret)
+			return ret;
 
-	/* Identify VID setting per rail. */
-	ret = mp2975_identify_rails_vid(client, data, info);
-	if (ret < 0)
-		return ret;
+		/* Identify VID setting per rail. */
+		ret = mp2975_identify_rails_vid(client, data, info);
+		if (ret < 0)
+			return ret;
 
-	/* Obtain current sense gain of power stage. */
-	ret = mp2975_current_sense_gain_get(client, data);
-	if (ret)
-		return ret;
+		/* Obtain current sense gain of power stage. */
+		ret = mp2975_current_sense_gain_get(client, data);
+		if (ret)
+			return ret;
 
-	/* Obtain voltage reference values. */
-	ret = mp2975_vref_get(client, data, info);
-	if (ret)
-		return ret;
+		/* Obtain voltage reference values. */
+		ret = mp2975_vref_get(client, data, info);
+		if (ret)
+			return ret;
 
-	/* Obtain vout over-voltage scales. */
-	ret = mp2975_vout_ov_scale_get(client, data, info);
-	if (ret < 0)
-		return ret;
+		/* Obtain vout over-voltage scales. */
+		ret = mp2975_vout_ov_scale_get(client, data, info);
+		if (ret < 0)
+			return ret;
+	} else {
+		/* Identify VID setting per rail. */
+		ret = mp2973_identify_rails_vid(client, data, info);
+		if (ret < 0)
+			return ret;
+	}
 
 	/* Obtain offsets, maximum and format for vout. */
 	ret = mp2975_vout_per_rail_config_get(client, data, info);
@@ -725,6 +904,8 @@ static int mp2975_probe(struct i2c_client *client)
 }
 
 static const struct of_device_id __maybe_unused mp2975_of_match[] = {
+	{.compatible = "mps,mp2971", .data = (void *)mp2971},
+	{.compatible = "mps,mp2973", .data = (void *)mp2973},
 	{.compatible = "mps,mp2975", .data = (void *)mp2975},
 	{}
 };

From 88fc1efcd45c3f681740659a1483978e7553ade7 Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Fri, 14 Jul 2023 15:51:15 +0200
Subject: [PATCH 1578/2047] hwmon: (pmbus/mp2975) Add regulator support

Add support to expose the PMBUS regulator.

Tested on MP2973 and MP2971.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230714135124.2645339-7-Naresh.Solanki@9elements.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/Kconfig  |  7 +++++++
 drivers/hwmon/pmbus/mp2975.c | 15 +++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 270b6336b76d..b4e93bd5835e 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -317,6 +317,13 @@ config SENSORS_MP2975
 	  This driver can also be built as a module. If so, the module will
 	  be called mp2975.
 
+config SENSORS_MP2975_REGULATOR
+	depends on SENSORS_MP2975 && REGULATOR
+	bool "Regulator support for MPS MP2975"
+	help
+	  If you say yes here you get regulator support for MPS MP2975
+	  Dual Loop Digital Multi-Phase Controller.
+
 config SENSORS_MP5023
 	tristate "MPS MP5023"
 	help
diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c
index 634369ebec43..5fb21b9f4ab3 100644
--- a/drivers/hwmon/pmbus/mp2975.c
+++ b/drivers/hwmon/pmbus/mp2975.c
@@ -115,6 +115,11 @@ static const struct i2c_device_id mp2975_id[] = {
 
 MODULE_DEVICE_TABLE(i2c, mp2975_id);
 
+static const struct regulator_desc __maybe_unused mp2975_reg_desc[] = {
+	PMBUS_REGULATOR("vout", 0),
+	PMBUS_REGULATOR("vout", 1),
+};
+
 #define to_mp2975_data(x)  container_of(x, struct mp2975_data, info)
 
 static int
@@ -806,6 +811,10 @@ static struct pmbus_driver_info mp2975_info = {
 		PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP | PMBUS_HAVE_POUT |
 		PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT | PMBUS_PHASE_VIRTUAL,
 	.read_word_data = mp2975_read_word_data,
+#if IS_ENABLED(CONFIG_SENSORS_MP2975_REGULATOR)
+	.num_regulators = 1,
+	.reg_desc = mp2975_reg_desc,
+#endif
 };
 
 static struct pmbus_driver_info mp2973_info = {
@@ -823,6 +832,10 @@ static struct pmbus_driver_info mp2973_info = {
 		PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP | PMBUS_HAVE_POUT |
 		PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT,
 	.read_word_data = mp2973_read_word_data,
+#if IS_ENABLED(CONFIG_SENSORS_MP2975_REGULATOR)
+	.num_regulators = 1,
+	.reg_desc = mp2975_reg_desc,
+#endif
 };
 
 static int mp2975_probe(struct i2c_client *client)
@@ -861,6 +874,8 @@ static int mp2975_probe(struct i2c_client *client)
 		data->info.pages = MP2975_PAGE_NUM;
 		data->info.phases[1] = ret;
 		data->info.func[1] = MP2975_RAIL2_FUNC;
+		if (IS_ENABLED(CONFIG_SENSORS_MP2975_REGULATOR))
+			data->info.num_regulators = MP2975_PAGE_NUM;
 	}
 
 	if (data->chip_id == mp2975) {

From 45f154dc966322948725af1004f53a5531f03b7f Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Fri, 14 Jul 2023 15:51:16 +0200
Subject: [PATCH 1579/2047] hwmon: (pmbus/mp2975) Add OCP limit

Add support for PMBUS_IOUT_OC_FAULT_LIMIT.
Add a helper function to convert the limit to LINEAR11 format
and read data->info.phases[0] on MP2971 and MP2973 as well.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230714135124.2645339-8-Naresh.Solanki@9elements.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/mp2975.c | 76 ++++++++++++++++++++++++++++++------
 1 file changed, 65 insertions(+), 11 deletions(-)

diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c
index 5fb21b9f4ab3..28f33f4618fa 100644
--- a/drivers/hwmon/pmbus/mp2975.c
+++ b/drivers/hwmon/pmbus/mp2975.c
@@ -65,6 +65,10 @@
 #define MP2973_VID_STEP_SEL_R2		BIT(3)
 #define MP2973_IMVP9_EN_R2		BIT(13)
 
+#define MP2973_MFR_OCP_TOTAL_SET	0x5f
+#define MP2973_OCP_TOTAL_CUR_MASK	GENMASK(6, 0)
+#define MP2973_MFR_OCP_LEVEL_RES	BIT(15)
+
 #define MP2973_MFR_READ_IOUT_PK		0x90
 #define MP2973_MFR_READ_POUT_PK		0x91
 
@@ -153,6 +157,41 @@ mp2975_vid2direct(int vrf, int val)
 	return 0;
 }
 
+#define MAX_LIN_MANTISSA	(1023 * 1000)
+#define MIN_LIN_MANTISSA	(511 * 1000)
+
+/* Converts a milli-unit DIRECT value to LINEAR11 format */
+static u16 mp2975_data2reg_linear11(s64 val)
+{
+	s16 exponent = 0, mantissa;
+	bool negative = false;
+
+	/* simple case */
+	if (val == 0)
+		return 0;
+
+	/* Reduce large mantissa until it fits into 10 bit */
+	while (val >= MAX_LIN_MANTISSA && exponent < 15) {
+		exponent++;
+		val >>= 1;
+	}
+	/* Increase small mantissa to improve precision */
+	while (val < MIN_LIN_MANTISSA && exponent > -15) {
+		exponent--;
+		val <<= 1;
+	}
+
+	/* Convert mantissa from milli-units to units */
+	mantissa = clamp_val(DIV_ROUND_CLOSEST_ULL(val, 1000), 0, 0x3ff);
+
+	/* restore sign */
+	if (negative)
+		mantissa = -mantissa;
+
+	/* Convert to 5 bit exponent, 11 bit mantissa */
+	return (mantissa & 0x7ff) | ((exponent << 11) & 0xf800);
+}
+
 static int
 mp2975_read_phase(struct i2c_client *client, struct mp2975_data *data,
 		  int page, int phase, u8 reg)
@@ -297,6 +336,20 @@ static int mp2973_read_word_data(struct i2c_client *client, int page,
 		ret = pmbus_read_word_data(client, page, phase,
 					   MP2973_MFR_READ_IOUT_PK);
 		break;
+	case PMBUS_IOUT_OC_FAULT_LIMIT:
+		ret = mp2975_read_word_helper(client, page, phase,
+					      MP2973_MFR_OCP_TOTAL_SET,
+					      GENMASK(15, 0));
+		if (ret < 0)
+			return ret;
+
+		if (ret & MP2973_MFR_OCP_LEVEL_RES)
+			ret = 2 * (ret & MP2973_OCP_TOTAL_CUR_MASK);
+		else
+			ret = ret & MP2973_OCP_TOTAL_CUR_MASK;
+
+		ret = mp2975_data2reg_linear11(ret * info->phases[page] * 1000);
+		break;
 	case PMBUS_UT_WARN_LIMIT:
 	case PMBUS_UT_FAULT_LIMIT:
 	case PMBUS_VIN_UV_WARN_LIMIT:
@@ -307,7 +360,6 @@ static int mp2973_read_word_data(struct i2c_client *client, int page,
 	case PMBUS_IIN_OC_FAULT_LIMIT:
 	case PMBUS_IOUT_OC_LV_FAULT_LIMIT:
 	case PMBUS_IOUT_OC_WARN_LIMIT:
-	case PMBUS_IOUT_OC_FAULT_LIMIT:
 	case PMBUS_IOUT_UC_FAULT_LIMIT:
 	case PMBUS_POUT_OP_FAULT_LIMIT:
 	case PMBUS_POUT_OP_WARN_LIMIT:
@@ -481,11 +533,13 @@ mp2975_identify_multiphase(struct i2c_client *client, struct mp2975_data *data,
 	if (info->phases[0] > data->max_phases[0])
 		return -EINVAL;
 
-	mp2975_set_phase_rail1(info);
-	num_phases2 = min(data->max_phases[0] - info->phases[0],
-			  data->max_phases[1]);
-	if (info->phases[1] && info->phases[1] <= num_phases2)
-		mp2975_set_phase_rail2(info, num_phases2);
+	if (data->chip_id == mp2975) {
+		mp2975_set_phase_rail1(info);
+		num_phases2 = min(data->max_phases[0] - info->phases[0],
+				  data->max_phases[1]);
+		if (info->phases[1] && info->phases[1] <= num_phases2)
+			mp2975_set_phase_rail2(info, num_phases2);
+	}
 
 	return 0;
 }
@@ -878,12 +932,12 @@ static int mp2975_probe(struct i2c_client *client)
 			data->info.num_regulators = MP2975_PAGE_NUM;
 	}
 
-	if (data->chip_id == mp2975) {
-		/* Identify multiphase configuration. */
-		ret = mp2975_identify_multiphase(client, data, info);
-		if (ret)
-			return ret;
+	/* Identify multiphase configuration. */
+	ret = mp2975_identify_multiphase(client, data, info);
+	if (ret)
+		return ret;
 
+	if (data->chip_id == mp2975) {
 		/* Identify VID setting per rail. */
 		ret = mp2975_identify_rails_vid(client, data, info);
 		if (ret < 0)

From acda945afb465e2a2c436623a35203d63994441f Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Mon, 31 Jul 2023 11:22:04 +0200
Subject: [PATCH 1580/2047] hwmon: (pmbus/mp2975) Fix PGOOD in READ_STATUS_WORD

MP2973 & MP2971 return PGOOD instead of PB_STATUS_POWER_GOOD_N.
Fix that in the read_word_data hook.

MP2975 should not be affected, but that has not been confirmed with
hardware.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230731092204.2933045-1-Naresh.Solanki@9elements.com
[groeck: Rephrased description to indicate that MP2975 is likely not affected]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/mp2975.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c
index 28f33f4618fa..26ba50633100 100644
--- a/drivers/hwmon/pmbus/mp2975.c
+++ b/drivers/hwmon/pmbus/mp2975.c
@@ -380,6 +380,11 @@ static int mp2975_read_word_data(struct i2c_client *client, int page,
 	int ret;
 
 	switch (reg) {
+	case PMBUS_STATUS_WORD:
+		/* MP2973 & MP2971 return PGOOD instead of PB_STATUS_POWER_GOOD_N. */
+		ret = pmbus_read_word_data(client, page, phase, reg);
+		ret ^= PB_STATUS_POWER_GOOD_N;
+		break;
 	case PMBUS_OT_FAULT_LIMIT:
 		ret = mp2975_read_word_helper(client, page, phase, reg,
 					      GENMASK(7, 0));

From a0ac418c6007c9e1694e21056964c923364f3175 Mon Sep 17 00:00:00 2001
From: JuenKit Yip <JuenKit_Yip@hotmail.com>
Date: Mon, 3 Jul 2023 22:08:16 +0800
Subject: [PATCH 1581/2047] hwmon: (sht3x) convert some of sysfs interface to
 hwmon

update_interval, temperature/humidity max/min and hyst
were moved to new hwmon interface, and only heater and
repeatability were reserved as non-stardard sysfs interface.

Signed-off-by: JuenKit Yip <JuenKit_Yip@hotmail.com>
Link: https://lore.kernel.org/r/DB4PR10MB626157BC697F2CD6100431359229A@DB4PR10MB6261.EURPRD10.PROD.OUTLOOK.COM
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/sht3x.c | 389 +++++++++++++++++++++++++++---------------
 1 file changed, 252 insertions(+), 137 deletions(-)

diff --git a/drivers/hwmon/sht3x.c b/drivers/hwmon/sht3x.c
index bf18630619e0..79657910b79e 100644
--- a/drivers/hwmon/sht3x.c
+++ b/drivers/hwmon/sht3x.c
@@ -147,8 +147,20 @@ static const u16 mode_to_update_interval[] = {
 	 100,
 };
 
+static const struct hwmon_channel_info * const sht3x_channel_info[] = {
+	HWMON_CHANNEL_INFO(chip, HWMON_C_UPDATE_INTERVAL),
+	HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MIN |
+				HWMON_T_MIN_HYST | HWMON_T_MAX |
+				HWMON_T_MAX_HYST | HWMON_T_ALARM),
+	HWMON_CHANNEL_INFO(humidity, HWMON_H_INPUT | HWMON_H_MIN |
+				HWMON_H_MIN_HYST | HWMON_H_MAX |
+				HWMON_H_MAX_HYST | HWMON_H_ALARM),
+	NULL,
+};
+
 struct sht3x_data {
 	struct i2c_client *client;
+	enum sht3x_chips chip_id;
 	struct mutex i2c_lock; /* lock for sending i2c commands */
 	struct mutex data_lock; /* lock for updating driver data */
 
@@ -276,27 +288,24 @@ out:
 	return data;
 }
 
-/* sysfs attributes */
-static ssize_t temp1_input_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static int temp1_input_read(struct device *dev)
 {
 	struct sht3x_data *data = sht3x_update_client(dev);
 
 	if (IS_ERR(data))
 		return PTR_ERR(data);
 
-	return sprintf(buf, "%d\n", data->temperature);
+	return data->temperature;
 }
 
-static ssize_t humidity1_input_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
+static int humidity1_input_read(struct device *dev)
 {
 	struct sht3x_data *data = sht3x_update_client(dev);
 
 	if (IS_ERR(data))
 		return PTR_ERR(data);
 
-	return sprintf(buf, "%u\n", data->humidity);
+	return data->humidity;
 }
 
 /*
@@ -332,33 +341,24 @@ static int limits_update(struct sht3x_data *data)
 	return ret;
 }
 
-static ssize_t temp1_limit_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
+static int temp1_limit_read(struct device *dev, int index)
 {
 	struct sht3x_data *data = dev_get_drvdata(dev);
-	u8 index = to_sensor_dev_attr(attr)->index;
-	int temperature_limit = data->temperature_limits[index];
 
-	return sysfs_emit(buf, "%d\n", temperature_limit);
+	return data->temperature_limits[index];
 }
 
-static ssize_t humidity1_limit_show(struct device *dev,
-				    struct device_attribute *attr,
-				    char *buf)
+static int humidity1_limit_read(struct device *dev, int index)
 {
 	struct sht3x_data *data = dev_get_drvdata(dev);
-	u8 index = to_sensor_dev_attr(attr)->index;
-	u32 humidity_limit = data->humidity_limits[index];
 
-	return sysfs_emit(buf, "%u\n", humidity_limit);
+	return data->humidity_limits[index];
 }
 
 /*
- * limit_store must only be called with data_lock held
+ * limit_write must only be called with data_lock held
  */
-static size_t limit_store(struct device *dev,
-			  size_t count,
+static size_t limit_write(struct device *dev,
 			  u8 index,
 			  int temperature,
 			  u32 humidity)
@@ -379,7 +379,7 @@ static size_t limit_store(struct device *dev,
 	 * ST = (T + 45) / 175 * 2^16
 	 * SRH = RH / 100 * 2^16
 	 * adapted for fixed point arithmetic and packed the same as
-	 * in limit_show()
+	 * in limit_read()
 	 */
 	raw = ((u32)(temperature + 45000) * 24543) >> (16 + 7);
 	raw |= ((humidity * 42950) >> 16) & 0xfe00;
@@ -400,50 +400,35 @@ static size_t limit_store(struct device *dev,
 
 	data->temperature_limits[index] = temperature;
 	data->humidity_limits[index] = humidity;
-	return count;
+
+	return 0;
 }
 
-static ssize_t temp1_limit_store(struct device *dev,
-				 struct device_attribute *attr,
-				 const char *buf,
-				 size_t count)
+static int temp1_limit_write(struct device *dev, int index, int val)
 {
 	int temperature;
 	int ret;
 	struct sht3x_data *data = dev_get_drvdata(dev);
-	u8 index = to_sensor_dev_attr(attr)->index;
 
-	ret = kstrtoint(buf, 0, &temperature);
-	if (ret)
-		return ret;
-
-	temperature = clamp_val(temperature, SHT3X_MIN_TEMPERATURE,
+	temperature = clamp_val(val, SHT3X_MIN_TEMPERATURE,
 				SHT3X_MAX_TEMPERATURE);
 	mutex_lock(&data->data_lock);
-	ret = limit_store(dev, count, index, temperature,
+	ret = limit_write(dev, index, temperature,
 			  data->humidity_limits[index]);
 	mutex_unlock(&data->data_lock);
 
 	return ret;
 }
 
-static ssize_t humidity1_limit_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf,
-				     size_t count)
+static int humidity1_limit_write(struct device *dev, int index, int val)
 {
 	u32 humidity;
 	int ret;
 	struct sht3x_data *data = dev_get_drvdata(dev);
-	u8 index = to_sensor_dev_attr(attr)->index;
 
-	ret = kstrtou32(buf, 0, &humidity);
-	if (ret)
-		return ret;
-
-	humidity = clamp_val(humidity, SHT3X_MIN_HUMIDITY, SHT3X_MAX_HUMIDITY);
+	humidity = clamp_val(val, SHT3X_MIN_HUMIDITY, SHT3X_MAX_HUMIDITY);
 	mutex_lock(&data->data_lock);
-	ret = limit_store(dev, count, index, data->temperature_limits[index],
+	ret = limit_write(dev, index, data->temperature_limits[index],
 			  humidity);
 	mutex_unlock(&data->data_lock);
 
@@ -474,7 +459,6 @@ static void sht3x_select_command(struct sht3x_data *data)
 }
 
 static int status_register_read(struct device *dev,
-				struct device_attribute *attr,
 				char *buffer, int length)
 {
 	int ret;
@@ -487,34 +471,30 @@ static int status_register_read(struct device *dev,
 	return ret;
 }
 
-static ssize_t temp1_alarm_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
+static int temp1_alarm_read(struct device *dev)
 {
 	char buffer[SHT3X_WORD_LEN + SHT3X_CRC8_LEN];
 	int ret;
 
-	ret = status_register_read(dev, attr, buffer,
+	ret = status_register_read(dev, buffer,
 				   SHT3X_WORD_LEN + SHT3X_CRC8_LEN);
 	if (ret)
 		return ret;
 
-	return sysfs_emit(buf, "%d\n", !!(buffer[0] & 0x04));
+	return !!(buffer[0] & 0x04);
 }
 
-static ssize_t humidity1_alarm_show(struct device *dev,
-				    struct device_attribute *attr,
-				    char *buf)
+static int humidity1_alarm_read(struct device *dev)
 {
 	char buffer[SHT3X_WORD_LEN + SHT3X_CRC8_LEN];
 	int ret;
 
-	ret = status_register_read(dev, attr, buffer,
+	ret = status_register_read(dev, buffer,
 				   SHT3X_WORD_LEN + SHT3X_CRC8_LEN);
 	if (ret)
 		return ret;
 
-	return sysfs_emit(buf, "%d\n", !!(buffer[0] & 0x08));
+	return !!(buffer[0] & 0x08);
 }
 
 static ssize_t heater_enable_show(struct device *dev,
@@ -524,7 +504,7 @@ static ssize_t heater_enable_show(struct device *dev,
 	char buffer[SHT3X_WORD_LEN + SHT3X_CRC8_LEN];
 	int ret;
 
-	ret = status_register_read(dev, attr, buffer,
+	ret = status_register_read(dev, buffer,
 				   SHT3X_WORD_LEN + SHT3X_CRC8_LEN);
 	if (ret)
 		return ret;
@@ -560,39 +540,28 @@ static ssize_t heater_enable_store(struct device *dev,
 	return ret;
 }
 
-static ssize_t update_interval_show(struct device *dev,
-				    struct device_attribute *attr,
-				    char *buf)
+static int update_interval_read(struct device *dev)
 {
 	struct sht3x_data *data = dev_get_drvdata(dev);
 
-	return sysfs_emit(buf, "%u\n",
-			 mode_to_update_interval[data->mode]);
+	return mode_to_update_interval[data->mode];
 }
 
-static ssize_t update_interval_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf,
-				     size_t count)
+static int update_interval_write(struct device *dev, int val)
 {
-	u16 update_interval;
 	u8 mode;
 	int ret;
 	const char *command;
 	struct sht3x_data *data = dev_get_drvdata(dev);
 	struct i2c_client *client = data->client;
 
-	ret = kstrtou16(buf, 0, &update_interval);
-	if (ret)
-		return ret;
-
-	mode = get_mode_from_update_interval(update_interval);
+	mode = get_mode_from_update_interval(val);
 
 	mutex_lock(&data->data_lock);
 	/* mode did not change */
 	if (mode == data->mode) {
 		mutex_unlock(&data->data_lock);
-		return count;
+		return 0;
 	}
 
 	mutex_lock(&data->i2c_lock);
@@ -634,7 +603,7 @@ out:
 	if (ret != SHT3X_CMD_LENGTH)
 		return ret < 0 ? ret : -EIO;
 
-	return count;
+	return 0;
 }
 
 static ssize_t repeatability_show(struct device *dev,
@@ -668,60 +637,219 @@ static ssize_t repeatability_store(struct device *dev,
 	return count;
 }
 
-static SENSOR_DEVICE_ATTR_RO(temp1_input, temp1_input, 0);
-static SENSOR_DEVICE_ATTR_RO(humidity1_input, humidity1_input, 0);
-static SENSOR_DEVICE_ATTR_RW(temp1_max, temp1_limit, limit_max);
-static SENSOR_DEVICE_ATTR_RW(humidity1_max, humidity1_limit, limit_max);
-static SENSOR_DEVICE_ATTR_RW(temp1_max_hyst, temp1_limit, limit_max_hyst);
-static SENSOR_DEVICE_ATTR_RW(humidity1_max_hyst, humidity1_limit,
-			     limit_max_hyst);
-static SENSOR_DEVICE_ATTR_RW(temp1_min, temp1_limit, limit_min);
-static SENSOR_DEVICE_ATTR_RW(humidity1_min, humidity1_limit, limit_min);
-static SENSOR_DEVICE_ATTR_RW(temp1_min_hyst, temp1_limit, limit_min_hyst);
-static SENSOR_DEVICE_ATTR_RW(humidity1_min_hyst, humidity1_limit,
-			     limit_min_hyst);
-static SENSOR_DEVICE_ATTR_RO(temp1_alarm, temp1_alarm, 0);
-static SENSOR_DEVICE_ATTR_RO(humidity1_alarm, humidity1_alarm, 0);
 static SENSOR_DEVICE_ATTR_RW(heater_enable, heater_enable, 0);
-static SENSOR_DEVICE_ATTR_RW(update_interval, update_interval, 0);
 static SENSOR_DEVICE_ATTR_RW(repeatability, repeatability, 0);
 
 static struct attribute *sht3x_attrs[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_humidity1_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_max.dev_attr.attr,
-	&sensor_dev_attr_temp1_max_hyst.dev_attr.attr,
-	&sensor_dev_attr_humidity1_max.dev_attr.attr,
-	&sensor_dev_attr_humidity1_max_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp1_min.dev_attr.attr,
-	&sensor_dev_attr_temp1_min_hyst.dev_attr.attr,
-	&sensor_dev_attr_humidity1_min.dev_attr.attr,
-	&sensor_dev_attr_humidity1_min_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp1_alarm.dev_attr.attr,
-	&sensor_dev_attr_humidity1_alarm.dev_attr.attr,
 	&sensor_dev_attr_heater_enable.dev_attr.attr,
-	&sensor_dev_attr_update_interval.dev_attr.attr,
-	&sensor_dev_attr_repeatability.dev_attr.attr,
-	NULL
-};
-
-static struct attribute *sts3x_attrs[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_max.dev_attr.attr,
-	&sensor_dev_attr_temp1_max_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp1_min.dev_attr.attr,
-	&sensor_dev_attr_temp1_min_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp1_alarm.dev_attr.attr,
-	&sensor_dev_attr_heater_enable.dev_attr.attr,
-	&sensor_dev_attr_update_interval.dev_attr.attr,
 	&sensor_dev_attr_repeatability.dev_attr.attr,
 	NULL
 };
 
 ATTRIBUTE_GROUPS(sht3x);
-ATTRIBUTE_GROUPS(sts3x);
 
-static const struct i2c_device_id sht3x_ids[];
+static umode_t sht3x_is_visible(const void *data, enum hwmon_sensor_types type,
+				u32 attr, int channel)
+{
+	const struct sht3x_data *chip_data = data;
+
+	switch (type) {
+	case hwmon_chip:
+		switch (attr) {
+		case hwmon_chip_update_interval:
+			return 0644;
+		default:
+			break;
+		}
+		break;
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_input:
+		case hwmon_temp_alarm:
+			return 0444;
+		case hwmon_temp_max:
+		case hwmon_temp_max_hyst:
+		case hwmon_temp_min:
+		case hwmon_temp_min_hyst:
+			return 0644;
+		default:
+			break;
+		}
+		break;
+	case hwmon_humidity:
+		if (chip_data->chip_id == sts3x)
+			break;
+		switch (attr) {
+		case hwmon_humidity_input:
+		case hwmon_humidity_alarm:
+			return 0444;
+		case hwmon_humidity_max:
+		case hwmon_humidity_max_hyst:
+		case hwmon_humidity_min:
+		case hwmon_humidity_min_hyst:
+			return 0644;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int sht3x_read(struct device *dev, enum hwmon_sensor_types type,
+		      u32 attr, int channel, long *val)
+{
+	enum sht3x_limits index;
+
+	switch (type) {
+	case hwmon_chip:
+		switch (attr) {
+		case hwmon_chip_update_interval:
+			*val = update_interval_read(dev);
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+		break;
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_input:
+			*val = temp1_input_read(dev);
+			break;
+		case hwmon_temp_alarm:
+			*val = temp1_alarm_read(dev);
+			break;
+		case hwmon_temp_max:
+			index = limit_max;
+			*val = temp1_limit_read(dev, index);
+			break;
+		case hwmon_temp_max_hyst:
+			index = limit_max_hyst;
+			*val = temp1_limit_read(dev, index);
+			break;
+		case hwmon_temp_min:
+			index = limit_min;
+			*val = temp1_limit_read(dev, index);
+			break;
+		case hwmon_temp_min_hyst:
+			index = limit_min_hyst;
+			*val = temp1_limit_read(dev, index);
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+		break;
+	case hwmon_humidity:
+		switch (attr) {
+		case hwmon_humidity_input:
+			*val = humidity1_input_read(dev);
+			break;
+		case hwmon_humidity_alarm:
+			*val = humidity1_alarm_read(dev);
+			break;
+		case hwmon_humidity_max:
+			index = limit_max;
+			*val = humidity1_limit_read(dev, index);
+			break;
+		case hwmon_humidity_max_hyst:
+			index = limit_max_hyst;
+			*val = humidity1_limit_read(dev, index);
+			break;
+		case hwmon_humidity_min:
+			index = limit_min;
+			*val = humidity1_limit_read(dev, index);
+			break;
+		case hwmon_humidity_min_hyst:
+			index = limit_min_hyst;
+			*val = humidity1_limit_read(dev, index);
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int sht3x_write(struct device *dev, enum hwmon_sensor_types type,
+		       u32 attr, int channel, long val)
+{
+	enum sht3x_limits index;
+
+	switch (type) {
+	case hwmon_chip:
+		switch (attr) {
+		case hwmon_chip_update_interval:
+			return update_interval_write(dev, val);
+		default:
+			return -EOPNOTSUPP;
+		}
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_max:
+			index = limit_max;
+			break;
+		case hwmon_temp_max_hyst:
+			index = limit_max_hyst;
+			break;
+		case hwmon_temp_min:
+			index = limit_min;
+			break;
+		case hwmon_temp_min_hyst:
+			index = limit_min_hyst;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+		return temp1_limit_write(dev, index, val);
+	case hwmon_humidity:
+		switch (attr) {
+		case hwmon_humidity_max:
+			index = limit_max;
+			break;
+		case hwmon_humidity_max_hyst:
+			index = limit_max_hyst;
+			break;
+		case hwmon_humidity_min:
+			index = limit_min;
+			break;
+		case hwmon_humidity_min_hyst:
+			index = limit_min_hyst;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+		return humidity1_limit_write(dev, index, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct hwmon_ops sht3x_ops = {
+	.is_visible = sht3x_is_visible,
+	.read = sht3x_read,
+	.write = sht3x_write,
+};
+
+static const struct hwmon_chip_info sht3x_chip_info = {
+	.ops = &sht3x_ops,
+	.info = sht3x_channel_info,
+};
+
+/* device ID table */
+static const struct i2c_device_id sht3x_ids[] = {
+	{"sht3x", sht3x},
+	{"sts3x", sts3x},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, sht3x_ids);
 
 static int sht3x_probe(struct i2c_client *client)
 {
@@ -730,7 +858,6 @@ static int sht3x_probe(struct i2c_client *client)
 	struct device *hwmon_dev;
 	struct i2c_adapter *adap = client->adapter;
 	struct device *dev = &client->dev;
-	const struct attribute_group **attribute_groups;
 
 	/*
 	 * we require full i2c support since the sht3x uses multi-byte read and
@@ -753,6 +880,7 @@ static int sht3x_probe(struct i2c_client *client)
 	data->mode = 0;
 	data->last_update = jiffies - msecs_to_jiffies(3000);
 	data->client = client;
+	data->chip_id = i2c_match_id(sht3x_ids, client)->driver_data;
 	crc8_populate_msb(sht3x_crc8_table, SHT3X_CRC8_POLYNOMIAL);
 
 	sht3x_select_command(data);
@@ -771,15 +899,11 @@ static int sht3x_probe(struct i2c_client *client)
 	if (ret)
 		return ret;
 
-	if (i2c_match_id(sht3x_ids, client)->driver_data == sts3x)
-		attribute_groups = sts3x_groups;
-	else
-		attribute_groups = sht3x_groups;
-
-	hwmon_dev = devm_hwmon_device_register_with_groups(dev,
-							   client->name,
-							   data,
-							   attribute_groups);
+	hwmon_dev = devm_hwmon_device_register_with_info(dev,
+							 client->name,
+							 data,
+							 &sht3x_chip_info,
+							 sht3x_groups);
 
 	if (IS_ERR(hwmon_dev))
 		dev_dbg(dev, "unable to register hwmon device\n");
@@ -787,15 +911,6 @@ static int sht3x_probe(struct i2c_client *client)
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
-/* device ID table */
-static const struct i2c_device_id sht3x_ids[] = {
-	{"sht3x", sht3x},
-	{"sts3x", sts3x},
-	{}
-};
-
-MODULE_DEVICE_TABLE(i2c, sht3x_ids);
-
 static struct i2c_driver sht3x_i2c_driver = {
 	.driver.name = "sht3x",
 	.probe       = sht3x_probe,

From 4f65c15cf70eb22c074889af60b9d2bcffbb375a Mon Sep 17 00:00:00 2001
From: Ahmad Khalifa <ahmad@khalifa.ws>
Date: Wed, 19 Jul 2023 23:41:42 +0100
Subject: [PATCH 1582/2047] hwmon: (nct6775) Add support for 18 IN readings for
 nct6799

* Add additional VIN/IN_MIN/IN_MAX register values
* Separate ALARM/BEEP bits for nct6799
* Update scaling factors for nct6799

Registers/alarms match for NCT6796D-S and NCT6799D-R
Tested on NCT6799D-R for new IN/MIN/MAX and ALARMS

Signed-off-by: Ahmad Khalifa <ahmad@khalifa.ws>
Link: https://lore.kernel.org/r/20230719224142.411237-1-ahmad@khalifa.ws
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/nct6775-core.c | 51 +++++++++++++++++++++++++++++-------
 drivers/hwmon/nct6775.h      |  5 ++--
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c
index fa0b7551ebf5..33533d95cf48 100644
--- a/drivers/hwmon/nct6775-core.c
+++ b/drivers/hwmon/nct6775-core.c
@@ -80,14 +80,17 @@ static const char * const nct6775_device_names[] = {
 
 /* Common and NCT6775 specific data */
 
-/* Voltage min/max registers for nr=7..14 are in bank 5 */
+/*
+ * Voltage min/max registers for nr=7..14 are in bank 5
+ * min/max: 15-17 for NCT6799 only
+ */
 
 static const u16 NCT6775_REG_IN_MAX[] = {
 	0x2b, 0x2d, 0x2f, 0x31, 0x33, 0x35, 0x37, 0x554, 0x556, 0x558, 0x55a,
-	0x55c, 0x55e, 0x560, 0x562 };
+	0x55c, 0x55e, 0x560, 0x562, 0x564, 0x570, 0x572 };
 static const u16 NCT6775_REG_IN_MIN[] = {
 	0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x555, 0x557, 0x559, 0x55b,
-	0x55d, 0x55f, 0x561, 0x563 };
+	0x55d, 0x55f, 0x561, 0x563, 0x565, 0x571, 0x573 };
 static const u16 NCT6775_REG_IN[] = {
 	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x550, 0x551, 0x552
 };
@@ -256,7 +259,8 @@ static const s8 NCT6776_ALARM_BITS[NUM_ALARM_BITS] = {
 	12,  9,						  /* intr0-intr1  */
 };
 
-static const u16 NCT6776_REG_BEEP[NUM_REG_BEEP] = { 0xb2, 0xb3, 0xb4, 0xb5 };
+/* 0xbf: nct6799 only */
+static const u16 NCT6776_REG_BEEP[NUM_REG_BEEP] = { 0xb2, 0xb3, 0xb4, 0xb5, 0xbf };
 
 static const s8 NCT6776_BEEP_BITS[NUM_BEEP_BITS] = {
 	 0,  1,  2,  3,  4,  5,  6,  7,  8, -1, -1, -1,	  /* in0-in11     */
@@ -328,9 +332,16 @@ static const u16 NCT6776_REG_TSI_TEMP[] = {
 
 /* NCT6779 specific data */
 
+/*
+ * 15-17 for NCT6799 only, register labels are:
+ *      CPUVC,  VIN1,  AVSB,  3VCC,  VIN0,  VIN8,  VIN4, 3VSB
+ *       VBAT,   VTT,  VIN5,  VIN6,  VIN2,  VIN3,  VIN7, VIN9
+ *       VHIF, VIN10
+ */
 static const u16 NCT6779_REG_IN[] = {
 	0x480, 0x481, 0x482, 0x483, 0x484, 0x485, 0x486, 0x487,
-	0x488, 0x489, 0x48a, 0x48b, 0x48c, 0x48d, 0x48e };
+	0x488, 0x489, 0x48a, 0x48b, 0x48c, 0x48d, 0x48e, 0x48f,
+	0x470, 0x471};
 
 static const u16 NCT6779_REG_ALARM[NUM_REG_ALARM] = {
 	0x459, 0x45A, 0x45B, 0x568 };
@@ -644,6 +655,22 @@ static const char *const nct6798_temp_label[] = {
 #define NCT6798_TEMP_MASK	0xbfff0ffe
 #define NCT6798_VIRT_TEMP_MASK	0x80000c00
 
+static const s8 NCT6799_ALARM_BITS[NUM_ALARM_BITS] = {
+	 0,  1,  2,  3,  8, -1, 20, 16, 17, 24, 25, 26,	  /* in0-in11     */
+	27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	 6,  7, 11, 10, 23, 33, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	 4,  5, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	12,  9,						  /* intr0-intr1  */
+};
+
+static const s8 NCT6799_BEEP_BITS[NUM_BEEP_BITS] = {
+	 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,	  /* in0-in11     */
+	12, 13, 14, 15, 34, 35, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
+	25, 26, 27, 28, 29, -1, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
+	16, 17, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	30, 31, 24					  /* intr0-intr1, beep_en */
+};
+
 static const char *const nct6799_temp_label[] = {
 	"",
 	"SYSTIN",
@@ -938,12 +965,12 @@ static const u16 scale_in[15] = {
 /*
  * NCT6798 scaling:
  *    CPUVC, IN1, AVSB, 3VCC, IN0, IN8, IN4, 3VSB, VBAT,  VTT,  IN5,  IN6, IN2,
- *      IN3, IN7
- * Additional scales to be added later: IN9 (800), VHIF (1600)
+ *      IN3, IN7,  IN9, VHIF, IN10
+ * 15-17 for NCT6799 only
  */
-static const u16 scale_in_6798[15] = {
+static const u16 scale_in_6798[NUM_IN] = {
 	800, 800, 1600, 1600, 800, 800, 800, 1600, 1600, 1600, 1600, 1600, 800,
-	800, 800
+	800, 800,  800, 1600, 800
 };
 
 static inline long in_from_reg(u8 reg, u8 nr, const u16 *scales)
@@ -3961,7 +3988,13 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
 		case nct6796:
 		case nct6797:
 		case nct6798:
+			data->REG_TSI_TEMP = NCT6796_REG_TSI_TEMP;
+			num_reg_tsi_temp = ARRAY_SIZE(NCT6796_REG_TSI_TEMP);
+			break;
 		case nct6799:
+			data->in_num = 18;
+			data->ALARM_BITS = NCT6799_ALARM_BITS;
+			data->BEEP_BITS = NCT6799_BEEP_BITS;
 			data->REG_TSI_TEMP = NCT6796_REG_TSI_TEMP;
 			num_reg_tsi_temp = ARRAY_SIZE(NCT6796_REG_TSI_TEMP);
 			break;
diff --git a/drivers/hwmon/nct6775.h b/drivers/hwmon/nct6775.h
index c752bc7bbe72..edcde39c4791 100644
--- a/drivers/hwmon/nct6775.h
+++ b/drivers/hwmon/nct6775.h
@@ -16,6 +16,7 @@ enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 };
 #define NUM_REG_BEEP	5	/* Max number of beep registers */
 
 #define NUM_FAN		7
+#define NUM_IN		18
 
 struct nct6775_data {
 	int addr;	/* IO base of hw monitor block */
@@ -97,7 +98,7 @@ struct nct6775_data {
 	/* Register values */
 	u8 bank;		/* current register bank */
 	u8 in_num;		/* number of in inputs we have */
-	u8 in[15][3];		/* [0]=in, [1]=in_max, [2]=in_min */
+	u8 in[NUM_IN][3];	/* [0]=in, [1]=in_max, [2]=in_min */
 	const u16 *scale_in;	/* internal scaling factors */
 	unsigned int rpm[NUM_FAN];
 	u16 fan_min[NUM_FAN];
@@ -166,7 +167,7 @@ struct nct6775_data {
 	u16 have_temp;
 	u16 have_temp_fixed;
 	u16 have_tsi_temp;
-	u16 have_in;
+	u32 have_in;
 
 	/* Remember extra register values over suspend/resume */
 	u8 vbat;

From 10a7a334d3a1f2d888a42a3c956813274ffb8a9c Mon Sep 17 00:00:00 2001
From: James Seo <james@equiv.tech>
Date: Sat, 22 Jul 2023 10:25:13 -0700
Subject: [PATCH 1583/2047] hwmon: (hp-wmi-sensors) Get WMI instance count from
 WMI driver core

Commit 2a2b13ae50cf ("platform/x86: wmi: Allow retrieving the number of
WMI object instances") means we no longer need to find this ourselves.

Signed-off-by: James Seo <james@equiv.tech>
Link: https://lore.kernel.org/r/20230722172513.9324-2-james@equiv.tech
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hp-wmi-sensors.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/drivers/hwmon/hp-wmi-sensors.c b/drivers/hwmon/hp-wmi-sensors.c
index ebe2fb513480..3a99cc5f44b2 100644
--- a/drivers/hwmon/hp-wmi-sensors.c
+++ b/drivers/hwmon/hp-wmi-sensors.c
@@ -435,25 +435,11 @@ static union acpi_object *hp_wmi_get_wobj(const char *guid, u8 instance)
 /* hp_wmi_wobj_instance_count - find count of WMI object instances */
 static u8 hp_wmi_wobj_instance_count(const char *guid)
 {
-	u8 hi = HP_WMI_MAX_INSTANCES;
-	union acpi_object *wobj;
-	u8 lo = 0;
-	u8 mid;
+	int count;
 
-	while (lo < hi) {
-		mid = (lo + hi) / 2;
+	count = wmi_instance_count(guid);
 
-		wobj = hp_wmi_get_wobj(guid, mid);
-		if (!wobj) {
-			hi = mid;
-			continue;
-		}
-
-		lo = mid + 1;
-		kfree(wobj);
-	}
-
-	return lo;
+	return clamp(count, 0, (int)HP_WMI_MAX_INSTANCES);
 }
 
 static int check_wobj(const union acpi_object *wobj,

From 311cb3638e9c98e974dee03438c671b722154969 Mon Sep 17 00:00:00 2001
From: James Seo <james@equiv.tech>
Date: Tue, 25 Jul 2023 02:48:17 -0700
Subject: [PATCH 1584/2047] hwmon: (hp-wmi-sensors) Initialize pevents in
 hp_wmi_sensors_init()

The following warning is given by the Smatch static checker:

  drivers/hwmon/hp-wmi-sensors.c:1937 hp_wmi_sensors_init()
  error: uninitialized symbol 'pevents'.

If there are no instances of the HPBIOS_PlatformEvents WMI object
available, init_platform_events() never initializes this pointer,
which may then be passed to hp_wmi_debugfs_init() uninitialized.

The impact should be limited because hp_wmi_debugfs_init() uses this
pointer only if the count of HPBIOS_PlatformEvents instances is _not_
zero, while conversely, it will be uninitialized only if the count of
such instances _is_ zero. However, passing it uninitialized still
constitutes a bug.

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/linux-hwmon/f72c129b-8c57-406a-bf41-bd889b65ea0f@moroto.mountain/
Signed-off-by: James Seo <james@equiv.tech>
Link: https://lore.kernel.org/r/20230725094817.588640-1-james@equiv.tech
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hp-wmi-sensors.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/hp-wmi-sensors.c b/drivers/hwmon/hp-wmi-sensors.c
index 3a99cc5f44b2..17ae62f88bbf 100644
--- a/drivers/hwmon/hp-wmi-sensors.c
+++ b/drivers/hwmon/hp-wmi-sensors.c
@@ -1913,7 +1913,7 @@ static bool add_event_handler(struct hp_wmi_sensors *state)
 static int hp_wmi_sensors_init(struct hp_wmi_sensors *state)
 {
 	struct hp_wmi_info *connected[HP_WMI_MAX_INSTANCES];
-	struct hp_wmi_platform_events *pevents;
+	struct hp_wmi_platform_events *pevents = NULL;
 	struct device *dev = &state->wdev->dev;
 	struct hp_wmi_info *info;
 	struct device *hwdev;

From 6ae8867ae31b202018b19959c32105213148878e Mon Sep 17 00:00:00 2001
From: Andre Werner <andre.werner@systec-electronic.com>
Date: Wed, 26 Jul 2023 07:16:34 +0200
Subject: [PATCH 1585/2047] dt-bindings: hwmon: Add description for new hwmon
 sensor hs3001

Add binding for Renesas HS3001 Temperature and Relative Humidity Sensor.

Signed-off-by: Andre Werner <andre.werner@systec-electronic.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230726051635.9778-1-andre.werner@systec-electronic.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/trivial-devices.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 6e24c4d25ec3..40bc475ee7e1 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -319,6 +319,8 @@ properties:
           - plx,pex8648
             # Pulsedlight LIDAR range-finding sensor
           - pulsedlight,lidar-lite-v2
+            # Renesas HS3001 Temperature and Relative Humidity Sensors
+          - renesas,hs3001
             # Renesas ISL29501 time-of-flight sensor
           - renesas,isl29501
             # Rohm DH2228FV

From 43fbe66dc2164c03e7a58dfcf0ab737b2f12cc79 Mon Sep 17 00:00:00 2001
From: Andre Werner <andre.werner@systec-electronic.com>
Date: Tue, 25 Jul 2023 06:22:07 +0200
Subject: [PATCH 1586/2047] hwmon: Add driver for Renesas HS3001

Add base support for Renesas HS3001 temperature
and humidity sensors and its compatibles HS3002,
HS3003 and HS3004.

The sensor has a fix I2C address 0x44. The resolution
is fixed to 14bit (ref. Missing feature).

Missing feature:
- Accessing non-volatile memory: Custom board has no
  possibility to control voltage supply of sensor. Thus,
  we cannot send the necessary control commands within
  the first 10ms after power-on.

Signed-off-by: Andre Werner <andre.werner@systec-electronic.com>
Link: https://lore.kernel.org/r/20230725042207.22310-2-andre.werner@systec-electronic.com
[groeck: Cosmetic documentation fixup; added documentation to index;
 replaced probe_new with probe dropped unused variable]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/hs3001.rst |  37 +++++
 Documentation/hwmon/index.rst  |   1 +
 MAINTAINERS                    |   6 +
 drivers/hwmon/Kconfig          |  10 ++
 drivers/hwmon/Makefile         |   1 +
 drivers/hwmon/hs3001.c         | 242 +++++++++++++++++++++++++++++++++
 6 files changed, 297 insertions(+)
 create mode 100644 Documentation/hwmon/hs3001.rst
 create mode 100644 drivers/hwmon/hs3001.c

diff --git a/Documentation/hwmon/hs3001.rst b/Documentation/hwmon/hs3001.rst
new file mode 100644
index 000000000000..9f59dfc212d9
--- /dev/null
+++ b/Documentation/hwmon/hs3001.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Kernel driver HS3001
+====================
+
+Supported chips:
+
+  * Renesas HS3001, HS3002, HS3003, HS3004
+
+    Prefix: 'hs3001'
+
+    Addresses scanned: -
+
+    Datasheet: https://www.renesas.com/us/en/document/dst/hs300x-datasheet?r=417401
+
+Author:
+
+  - Andre Werner <andre.werner@systec-electronic.com>
+
+Description
+-----------
+
+This driver implements support for the Renesas HS3001 chips, a humidity
+and temperature family. Temperature is measured in degrees celsius, relative
+humidity is expressed as a percentage. In the sysfs interface, all values are
+scaled by 1000, i.e. the value for 31.5 degrees celsius is 31500.
+
+The device communicates with the I2C protocol. Sensors have the I2C
+address 0x44 by default.
+
+sysfs-Interface
+---------------
+
+=================== =================
+temp1_input:        temperature input
+humidity1_input:    humidity input
+=================== =================
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index d11924667f76..88dadea85cfc 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -78,6 +78,7 @@ Hardware Monitoring Kernel Drivers
    gxp-fan-ctrl
    hih6130
    hp-wmi-sensors
+   hs3001
    ibmaem
    ibm-cffps
    ibmpowernv
diff --git a/MAINTAINERS b/MAINTAINERS
index 080e39424343..2c163adcda2d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9497,6 +9497,12 @@ S:	Maintained
 W:	http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi
 F:	fs/hpfs/
 
+HS3001 Hardware Temperature and Humidity Sensor
+M:	Andre Werner <andre.werner@systec-electronic.com>
+L:	linux-hwmon@vger.kernel.org
+S:	Maintained
+F:	drivers/hwmon/hs3001.c
+
 HSI SUBSYSTEM
 M:	Sebastian Reichel <sre@kernel.org>
 S:	Maintained
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 2913299c2c9e..ec38c8892158 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -734,6 +734,16 @@ config SENSORS_HIH6130
 	  This driver can also be built as a module. If so, the module
 	  will be called hih6130.
 
+config SENSORS_HS3001
+	tristate "Renesas HS3001 humidity and temperature sensors"
+	depends on I2C
+	help
+	  If you say yes here you get support for the Renesas HS3001,
+	  to HS3004 humidity and temperature sensors.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called hs3001.
+
 config SENSORS_IBMAEM
 	tristate "IBM Active Energy Manager temperature/power sensors and control"
 	select IPMI_SI
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index ff6bfd109c72..4ac9452b5430 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -86,6 +86,7 @@ obj-$(CONFIG_SENSORS_GSC)	+= gsc-hwmon.o
 obj-$(CONFIG_SENSORS_GPIO_FAN)	+= gpio-fan.o
 obj-$(CONFIG_SENSORS_GXP_FAN_CTRL) += gxp-fan-ctrl.o
 obj-$(CONFIG_SENSORS_HIH6130)	+= hih6130.o
+obj-$(CONFIG_SENSORS_HS3001)	+= hs3001.o
 obj-$(CONFIG_SENSORS_ULTRA45)	+= ultra45_env.o
 obj-$(CONFIG_SENSORS_I5500)	+= i5500_temp.o
 obj-$(CONFIG_SENSORS_I5K_AMB)	+= i5k_amb.o
diff --git a/drivers/hwmon/hs3001.c b/drivers/hwmon/hs3001.c
new file mode 100644
index 000000000000..ac574e46d069
--- /dev/null
+++ b/drivers/hwmon/hs3001.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This is a non-complete driver implementation for the
+ * HS3001 humidity and temperature sensor and compatibles. It does not include
+ * the configuration possibilities, where it needs to be set to 'programming mode'
+ * during power-up.
+ *
+ *
+ * Copyright (C) 2023 SYS TEC electronic AG
+ * Author: Andre Werner <andre.werner@systec-electronic.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/hwmon.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+/* Measurement times */
+#define HS3001_WAKEUP_TIME	100	/* us */
+#define HS3001_8BIT_RESOLUTION	550	/* us */
+#define HS3001_10BIT_RESOLUTION	1310	/* us */
+#define HS3001_12BIT_RESOLUTION	4500	/* us */
+#define HS3001_14BIT_RESOLUTION	16900	/* us */
+
+#define HS3001_RESPONSE_LENGTH	4
+
+#define HS3001_FIXPOINT_ARITH	1000U
+
+#define HS3001_MASK_HUMIDITY_0X3FFF	GENMASK(13, 0)
+#define HS3001_MASK_STATUS_0XC0	GENMASK(7, 6)
+
+/* Definitions for Status Bits of A/D Data */
+#define HS3001_DATA_VALID	0x00	/* Valid Data */
+#define HS3001_DATA_STALE	0x01	/* Stale Data */
+
+struct hs3001_data {
+	struct i2c_client *client;
+	struct mutex i2c_lock; /* lock for sending i2c commands */
+	u32 wait_time;		/* in us */
+	int temperature;	/* in milli degree */
+	u32 humidity;		/* in milli % */
+};
+
+static int hs3001_extract_temperature(u16 raw)
+{
+	/* fixpoint arithmetic 1 digit */
+	u32 temp = (raw >> 2) * HS3001_FIXPOINT_ARITH * 165;
+
+	temp /= (1 << 14) - 1;
+
+	return (int)temp - 40 * HS3001_FIXPOINT_ARITH;
+}
+
+static u32 hs3001_extract_humidity(u16 raw)
+{
+	u32 hum = (raw & HS3001_MASK_HUMIDITY_0X3FFF) * HS3001_FIXPOINT_ARITH * 100;
+
+	return hum /= (1 << 14) - 1;
+}
+
+static int hs3001_data_fetch_command(struct i2c_client *client,
+				     struct hs3001_data *data)
+{
+	int ret;
+	u8 buf[HS3001_RESPONSE_LENGTH];
+	u8 hs3001_status;
+
+	ret = i2c_master_recv(client, buf, HS3001_RESPONSE_LENGTH);
+	if (ret != HS3001_RESPONSE_LENGTH) {
+		ret = ret < 0 ? ret : -EIO;
+		dev_dbg(&client->dev,
+			"Error in i2c communication. Error code: %d.\n", ret);
+		return ret;
+	}
+
+	hs3001_status = FIELD_GET(HS3001_MASK_STATUS_0XC0, buf[0]);
+	if (hs3001_status == HS3001_DATA_STALE) {
+		dev_dbg(&client->dev, "Sensor busy.\n");
+		return -EBUSY;
+	}
+	if (hs3001_status != HS3001_DATA_VALID) {
+		dev_dbg(&client->dev, "Data invalid.\n");
+		return -EIO;
+	}
+
+	data->humidity =
+		hs3001_extract_humidity(be16_to_cpup((__be16 *)&buf[0]));
+	data->temperature =
+		hs3001_extract_temperature(be16_to_cpup((__be16 *)&buf[2]));
+
+	return 0;
+}
+
+static umode_t hs3001_is_visible(const void *data, enum hwmon_sensor_types type,
+				 u32 attr, int channel)
+{
+	/* Both, humidity and temperature can only be read. */
+	return 0444;
+}
+
+static int hs3001_read(struct device *dev, enum hwmon_sensor_types type,
+		       u32 attr, int channel, long *val)
+{
+	struct hs3001_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	int ret;
+
+	mutex_lock(&data->i2c_lock);
+	ret = i2c_master_send(client, NULL, 0);
+	if (ret < 0) {
+		mutex_unlock(&data->i2c_lock);
+		return ret;
+	}
+
+	/*
+	 * Sensor needs some time to process measurement depending on
+	 * resolution (ref. datasheet)
+	 */
+	fsleep(data->wait_time);
+
+	ret = hs3001_data_fetch_command(client, data);
+	mutex_unlock(&data->i2c_lock);
+
+	if (ret < 0)
+		return ret;
+
+	switch (type) {
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_input:
+			*val = data->temperature;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case hwmon_humidity:
+		switch (attr) {
+		case hwmon_humidity_input:
+			*val = data->humidity;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct hwmon_channel_info *hs3001_info[] = {
+	HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT),
+	HWMON_CHANNEL_INFO(humidity, HWMON_H_INPUT),
+	NULL
+};
+
+static const struct hwmon_ops hs3001_hwmon_ops = {
+	.is_visible = hs3001_is_visible,
+	.read = hs3001_read,
+};
+
+static const struct hwmon_chip_info hs3001_chip_info = {
+	.ops = &hs3001_hwmon_ops,
+	.info = hs3001_info,
+};
+
+/* device ID table */
+static const struct i2c_device_id hs3001_ids[] = {
+	{ "hs3001", 0 },
+	{ },
+};
+
+MODULE_DEVICE_TABLE(i2c, hs3001_ids);
+
+static const struct of_device_id hs3001_of_match[] = {
+	{.compatible = "renesas,hs3001"},
+	{ },
+};
+
+MODULE_DEVICE_TABLE(of, hs3001_of_match);
+
+static int hs3001_probe(struct i2c_client *client)
+{
+	struct hs3001_data *data;
+	struct device *hwmon_dev;
+	struct device *dev = &client->dev;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+		return -EOPNOTSUPP;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->client = client;
+
+	/*
+	 * Measurement time = wake-up time + measurement time temperature
+	 * + measurement time humidity. This is currently static, because
+	 * enabling programming mode is not supported, yet.
+	 */
+	data->wait_time = (HS3001_WAKEUP_TIME + HS3001_14BIT_RESOLUTION +
+			   HS3001_14BIT_RESOLUTION);
+
+	mutex_init(&data->i2c_lock);
+
+	hwmon_dev = devm_hwmon_device_register_with_info(dev,
+							 client->name,
+							 data,
+							 &hs3001_chip_info,
+							 NULL);
+
+	if (IS_ERR(hwmon_dev))
+		return dev_err_probe(dev, PTR_ERR(hwmon_dev),
+				     "Unable to register hwmon device.\n");
+
+	return 0;
+}
+
+static struct i2c_driver hs3001_i2c_driver = {
+	.driver = {
+		   .name = "hs3001",
+		   .of_match_table = hs3001_of_match,
+	},
+	.probe = hs3001_probe,
+	.id_table = hs3001_ids,
+};
+
+module_i2c_driver(hs3001_i2c_driver);
+
+MODULE_AUTHOR("Andre Werner <andre.werner@systec-electronic.com>");
+MODULE_DESCRIPTION("HS3001 humidity and temperature sensor base driver");
+MODULE_LICENSE("GPL");

From b7f1f7b2523a6a4382f12fe953380b847b80e09d Mon Sep 17 00:00:00 2001
From: Ahmad Khalifa <ahmad@khalifa.ws>
Date: Wed, 2 Aug 2023 19:58:21 +0100
Subject: [PATCH 1587/2047] hwmon: (nct6775) Additional TEMP registers for
 nct6799

Additional TEMP registers for nct6798d, nct6799d-r and nct6796d-s
This allows the max/max_hyst/crit attributes to be shown/stored

* Increase NUM_TEMP from 10 to 12
* Separate TEMP/MON_TEMP/OVER/HYST/CRIT registers
* Rename "PECI Calibration" to include "TSI" too
* Update ALARM/BEEP bits for temps for 6799
* For 6799, keep temp_fixed_num at 6, but increase
  num_temp_alarms/num_temp_beeps to 7/8

Tested with NCT6799D-R showing additional sysfs attributes:
* temp3-temp8: max/max_hyst/beep/alarm
* temp3-temp6: crit/offset

Signed-off-by: Ahmad Khalifa <ahmad@khalifa.ws>
Link: https://lore.kernel.org/r/20230802185820.3642399-1-ahmad@khalifa.ws
[groeck: Addressed cosmetic checkpatch complaints]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/nct6775-core.c | 150 ++++++++++++++++++++++++++++-------
 drivers/hwmon/nct6775.h      |   2 +-
 2 files changed, 121 insertions(+), 31 deletions(-)

diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c
index 33533d95cf48..02a71244fc3b 100644
--- a/drivers/hwmon/nct6775-core.c
+++ b/drivers/hwmon/nct6775-core.c
@@ -617,6 +617,28 @@ static const char *const nct6796_temp_label[] = {
 
 static const u16 NCT6796_REG_TSI_TEMP[] = { 0x409, 0x40b };
 
+static const u16 NCT6798_REG_TEMP[] = {
+	0x27, 0x150, 0x670, 0x672, 0x674, 0x676, 0x678, 0x67a};
+
+static const u16 NCT6798_REG_TEMP_SOURCE[] = {
+	0x621, 0x622, 0xc26, 0xc27, 0xc28, 0xc29, 0xc2a, 0xc2b };
+
+static const u16 NCT6798_REG_TEMP_MON[] = {
+	0x73, 0x75, 0x77, 0x79, 0x7b, 0x7d, 0x4a0 };
+static const u16 NCT6798_REG_TEMP_OVER[] = {
+	0x39, 0x155, 0xc1a, 0xc1b, 0xc1c, 0xc1d, 0xc1e, 0xc1f };
+static const u16 NCT6798_REG_TEMP_HYST[] = {
+	0x3a, 0x153, 0xc20, 0xc21, 0xc22, 0xc23, 0xc24, 0xc25 };
+
+static const u16 NCT6798_REG_TEMP_CRIT[32] = {
+	0x135, 0x235, 0x335, 0x835, 0x935, 0xa35, 0xb35, 0 };
+
+static const u16 NCT6798_REG_TEMP_ALTERNATE[32] = {
+	0x490, 0x491, 0x492, 0x493, 0x494, 0x495, 0x496, 0,
+	0, 0, 0, 0, 0x4a2, 0, 0, 0,
+	0, 0x400, 0x401, 0x402, 0x404, 0x405, 0x406, 0x407,
+	0x408, 0x419, 0x41a, 0x4f4, 0x4f5 };
+
 static const char *const nct6798_temp_label[] = {
 	"",
 	"SYSTIN",
@@ -655,11 +677,14 @@ static const char *const nct6798_temp_label[] = {
 #define NCT6798_TEMP_MASK	0xbfff0ffe
 #define NCT6798_VIRT_TEMP_MASK	0x80000c00
 
+static const u16 NCT6799_REG_ALARM[NUM_REG_ALARM] = {
+	0x459, 0x45A, 0x45B, 0x568, 0x45D, 0xc01 };
+
 static const s8 NCT6799_ALARM_BITS[NUM_ALARM_BITS] = {
 	 0,  1,  2,  3,  8, -1, 20, 16, 17, 24, 25, 26,	  /* in0-in11     */
 	27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
 	 6,  7, 11, 10, 23, 33, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
-	 4,  5, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	 4,  5, 40, 41, 42, 43, 44, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
 	12,  9,						  /* intr0-intr1  */
 };
 
@@ -667,10 +692,11 @@ static const s8 NCT6799_BEEP_BITS[NUM_BEEP_BITS] = {
 	 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,	  /* in0-in11     */
 	12, 13, 14, 15, 34, 35, -1, -1, -1, -1, -1, -1,	  /* in12-in23    */
 	25, 26, 27, 28, 29, -1, -1, -1, -1, -1, -1, -1,	  /* fan1-fan12   */
-	16, 17, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,	  /* temp1-temp12 */
+	16, 17, 18, 19, 20, 21, 22, 23, -1, -1, -1, -1,	  /* temp1-temp12 */
 	30, 31, 24					  /* intr0-intr1, beep_en */
 };
 
+/* PECI Calibration only for NCT6799D, not NCT6796D-S */
 static const char *const nct6799_temp_label[] = {
 	"",
 	"SYSTIN",
@@ -700,8 +726,8 @@ static const char *const nct6799_temp_label[] = {
 	"Agent1 Dimm1",
 	"BYTE_TEMP0",
 	"BYTE_TEMP1",
-	"PECI Agent 0 Calibration",	/* undocumented */
-	"PECI Agent 1 Calibration",	/* undocumented */
+	"PECI/TSI Agent 0 Calibration",
+	"PECI/TSI Agent 1 Calibration",
 	"",
 	"Virtual_TEMP"
 };
@@ -3869,13 +3895,9 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
 	case nct6795:
 	case nct6796:
 	case nct6797:
-	case nct6798:
-	case nct6799:
 		data->in_num = 15;
 		data->pwm_num = (data->kind == nct6796 ||
-				 data->kind == nct6797 ||
-				 data->kind == nct6798 ||
-				 data->kind == nct6799) ? 7 : 6;
+				 data->kind == nct6797) ? 7 : 6;
 		data->auto_pwm_num = 4;
 		data->has_fan_div = false;
 		data->temp_fixed_num = 6;
@@ -3919,16 +3941,6 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
 			data->temp_mask = NCT6796_TEMP_MASK;
 			data->virt_temp_mask = NCT6796_VIRT_TEMP_MASK;
 			break;
-		case nct6798:
-			data->temp_label = nct6798_temp_label;
-			data->temp_mask = NCT6798_TEMP_MASK;
-			data->virt_temp_mask = NCT6798_VIRT_TEMP_MASK;
-			break;
-		case nct6799:
-			data->temp_label = nct6799_temp_label;
-			data->temp_mask = NCT6799_TEMP_MASK;
-			data->virt_temp_mask = NCT6799_VIRT_TEMP_MASK;
-			break;
 		}
 
 		data->REG_CONFIG = NCT6775_REG_CONFIG;
@@ -3987,14 +3999,6 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
 		case nct6795:
 		case nct6796:
 		case nct6797:
-		case nct6798:
-			data->REG_TSI_TEMP = NCT6796_REG_TSI_TEMP;
-			num_reg_tsi_temp = ARRAY_SIZE(NCT6796_REG_TSI_TEMP);
-			break;
-		case nct6799:
-			data->in_num = 18;
-			data->ALARM_BITS = NCT6799_ALARM_BITS;
-			data->BEEP_BITS = NCT6799_BEEP_BITS;
 			data->REG_TSI_TEMP = NCT6796_REG_TSI_TEMP;
 			num_reg_tsi_temp = ARRAY_SIZE(NCT6796_REG_TSI_TEMP);
 			break;
@@ -4003,9 +4007,6 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
 			break;
 		}
 
-		if (data->kind == nct6798 || data->kind == nct6799)
-			data->scale_in = scale_in_6798;
-
 		reg_temp = NCT6779_REG_TEMP;
 		num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP);
 		if (data->kind == nct6791) {
@@ -4021,6 +4022,95 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
 		reg_temp_alternate = NCT6779_REG_TEMP_ALTERNATE;
 		reg_temp_crit = NCT6779_REG_TEMP_CRIT;
 
+		break;
+	case nct6798:
+	case nct6799:
+		data->in_num = data->kind == nct6799 ? 18 : 15;
+		data->scale_in = scale_in_6798;
+		data->pwm_num = 7;
+		data->auto_pwm_num = 4;
+		data->has_fan_div = false;
+		data->temp_fixed_num = 6;
+		data->num_temp_alarms = 7;
+		data->num_temp_beeps = 8;
+
+		data->ALARM_BITS = NCT6799_ALARM_BITS;
+		data->BEEP_BITS = NCT6799_BEEP_BITS;
+
+		data->fan_from_reg = fan_from_reg_rpm;
+		data->fan_from_reg_min = fan_from_reg13;
+		data->target_temp_mask = 0xff;
+		data->tolerance_mask = 0x07;
+		data->speed_tolerance_limit = 63;
+
+		switch (data->kind) {
+		default:
+		case nct6798:
+			data->temp_label = nct6798_temp_label;
+			data->temp_mask = NCT6798_TEMP_MASK;
+			data->virt_temp_mask = NCT6798_VIRT_TEMP_MASK;
+			break;
+		case nct6799:
+			data->temp_label = nct6799_temp_label;
+			data->temp_mask = NCT6799_TEMP_MASK;
+			data->virt_temp_mask = NCT6799_VIRT_TEMP_MASK;
+			break;
+		}
+
+		data->REG_CONFIG = NCT6775_REG_CONFIG;
+		data->REG_VBAT = NCT6775_REG_VBAT;
+		data->REG_DIODE = NCT6775_REG_DIODE;
+		data->DIODE_MASK = NCT6775_DIODE_MASK;
+		data->REG_VIN = NCT6779_REG_IN;
+		data->REG_IN_MINMAX[0] = NCT6775_REG_IN_MIN;
+		data->REG_IN_MINMAX[1] = NCT6775_REG_IN_MAX;
+		data->REG_TARGET = NCT6775_REG_TARGET;
+		data->REG_FAN = NCT6779_REG_FAN;
+		data->REG_FAN_MODE = NCT6775_REG_FAN_MODE;
+		data->REG_FAN_MIN = NCT6776_REG_FAN_MIN;
+		data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES;
+		data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT;
+		data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME;
+		data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME;
+		data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME;
+		data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H;
+		data->REG_PWM[0] = NCT6775_REG_PWM;
+		data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT;
+		data->REG_PWM[2] = NCT6775_REG_FAN_STOP_OUTPUT;
+		data->REG_PWM[5] = NCT6791_REG_WEIGHT_DUTY_STEP;
+		data->REG_PWM[6] = NCT6791_REG_WEIGHT_DUTY_BASE;
+		data->REG_PWM_READ = NCT6775_REG_PWM_READ;
+		data->REG_PWM_MODE = NCT6776_REG_PWM_MODE;
+		data->PWM_MODE_MASK = NCT6776_PWM_MODE_MASK;
+		data->REG_AUTO_TEMP = NCT6775_REG_AUTO_TEMP;
+		data->REG_AUTO_PWM = NCT6775_REG_AUTO_PWM;
+		data->REG_CRITICAL_TEMP = NCT6775_REG_CRITICAL_TEMP;
+		data->REG_CRITICAL_TEMP_TOLERANCE = NCT6775_REG_CRITICAL_TEMP_TOLERANCE;
+		data->REG_CRITICAL_PWM_ENABLE = NCT6779_REG_CRITICAL_PWM_ENABLE;
+		data->CRITICAL_PWM_ENABLE_MASK = NCT6779_CRITICAL_PWM_ENABLE_MASK;
+		data->REG_CRITICAL_PWM = NCT6779_REG_CRITICAL_PWM;
+		data->REG_TEMP_OFFSET = NCT6779_REG_TEMP_OFFSET;
+		data->REG_TEMP_SOURCE = NCT6798_REG_TEMP_SOURCE;
+		data->REG_TEMP_SEL = NCT6775_REG_TEMP_SEL;
+		data->REG_WEIGHT_TEMP_SEL = NCT6791_REG_WEIGHT_TEMP_SEL;
+		data->REG_WEIGHT_TEMP[0] = NCT6791_REG_WEIGHT_TEMP_STEP;
+		data->REG_WEIGHT_TEMP[1] = NCT6791_REG_WEIGHT_TEMP_STEP_TOL;
+		data->REG_WEIGHT_TEMP[2] = NCT6791_REG_WEIGHT_TEMP_BASE;
+		data->REG_ALARM = NCT6799_REG_ALARM;
+		data->REG_BEEP = NCT6792_REG_BEEP;
+		data->REG_TSI_TEMP = NCT6796_REG_TSI_TEMP;
+		num_reg_tsi_temp = ARRAY_SIZE(NCT6796_REG_TSI_TEMP);
+
+		reg_temp = NCT6798_REG_TEMP;
+		num_reg_temp = ARRAY_SIZE(NCT6798_REG_TEMP);
+		reg_temp_mon = NCT6798_REG_TEMP_MON;
+		num_reg_temp_mon = ARRAY_SIZE(NCT6798_REG_TEMP_MON);
+		reg_temp_over = NCT6798_REG_TEMP_OVER;
+		reg_temp_hyst = NCT6798_REG_TEMP_HYST;
+		reg_temp_config = NCT6779_REG_TEMP_CONFIG;
+		reg_temp_alternate = NCT6798_REG_TEMP_ALTERNATE;
+		reg_temp_crit = NCT6798_REG_TEMP_CRIT;
+
 		break;
 	default:
 		return -ENODEV;
diff --git a/drivers/hwmon/nct6775.h b/drivers/hwmon/nct6775.h
index edcde39c4791..296eff99d003 100644
--- a/drivers/hwmon/nct6775.h
+++ b/drivers/hwmon/nct6775.h
@@ -8,7 +8,7 @@ enum kinds { nct6106, nct6116, nct6775, nct6776, nct6779, nct6791, nct6792,
 	     nct6793, nct6795, nct6796, nct6797, nct6798, nct6799 };
 enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 };
 
-#define NUM_TEMP	10	/* Max number of temp attribute sets w/ limits*/
+#define NUM_TEMP	12	/* Max number of temp attribute sets w/ limits*/
 #define NUM_TEMP_FIXED	6	/* Max number of fixed temp attribute sets */
 #define NUM_TSI_TEMP	8	/* Max number of TSI temp register pairs */
 

From f11e27383c1223620264044442b8abb23bdb1c9f Mon Sep 17 00:00:00 2001
From: Naresh Solanki <naresh.solanki@9elements.com>
Date: Thu, 3 Aug 2023 16:44:00 +0200
Subject: [PATCH 1588/2047] hwmon: (max6639) Add compatible string

Use maxim,max6639 as compatible string for the driver.

Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230803144401.1151065-2-Naresh.Solanki@9elements.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/max6639.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/hwmon/max6639.c b/drivers/hwmon/max6639.c
index caf527154fca..aa7f21ab2395 100644
--- a/drivers/hwmon/max6639.c
+++ b/drivers/hwmon/max6639.c
@@ -618,11 +618,17 @@ MODULE_DEVICE_TABLE(i2c, max6639_id);
 
 static DEFINE_SIMPLE_DEV_PM_OPS(max6639_pm_ops, max6639_suspend, max6639_resume);
 
+static const struct of_device_id max6639_of_match[] = {
+	{ .compatible = "maxim,max6639", },
+	{ },
+};
+
 static struct i2c_driver max6639_driver = {
 	.class = I2C_CLASS_HWMON,
 	.driver = {
 		   .name = "max6639",
 		   .pm = pm_sleep_ptr(&max6639_pm_ops),
+		   .of_match_table = max6639_of_match,
 		   },
 	.probe = max6639_probe,
 	.id_table = max6639_id,

From 3fc59546a6412374d5dc4d0e127ead6b85ed7f6e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:43 +0200
Subject: [PATCH 1589/2047] hwmon: (adt7475) fix Wvoid-pointer-to-enum-cast
 warning

'chip' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  adt7475.c:1655:10: error: cast to smaller integer type 'enum chips' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/adt7475.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
index 5363254644da..03acadc3a6cb 100644
--- a/drivers/hwmon/adt7475.c
+++ b/drivers/hwmon/adt7475.c
@@ -1652,7 +1652,7 @@ static int adt7475_probe(struct i2c_client *client)
 	i2c_set_clientdata(client, data);
 
 	if (client->dev.of_node)
-		chip = (enum chips)of_device_get_match_data(&client->dev);
+		chip = (uintptr_t)of_device_get_match_data(&client->dev);
 	else
 		chip = id->driver_data;
 

From c8b73e42f5bbc9ac134192d57bcc19ff225c95ad Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:44 +0200
Subject: [PATCH 1590/2047] hwmon: (ad7418) fix Wvoid-pointer-to-enum-cast
 warning

'type' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  ad7418.c:256:16: error: cast to smaller integer type 'enum chips' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ad7418.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/ad7418.c b/drivers/hwmon/ad7418.c
index bcea66eac82b..4829f83ff52e 100644
--- a/drivers/hwmon/ad7418.c
+++ b/drivers/hwmon/ad7418.c
@@ -253,7 +253,7 @@ static int ad7418_probe(struct i2c_client *client)
 	mutex_init(&data->lock);
 	data->client = client;
 	if (dev->of_node)
-		data->type = (enum chips)of_device_get_match_data(dev);
+		data->type = (uintptr_t)of_device_get_match_data(dev);
 	else
 		data->type = i2c_match_id(ad7418_id, client)->driver_data;
 

From 3ff0befaefeffd92c828b961ba3a582490bab777 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:45 +0200
Subject: [PATCH 1591/2047] hwmon: (ads7828) fix Wvoid-pointer-to-enum-cast
 warning

'chip' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  ads7828.c:142:10: error: cast to smaller integer type 'enum ads7828_chips' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-3-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ads7828.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c
index e86894e35639..809e830f52a6 100644
--- a/drivers/hwmon/ads7828.c
+++ b/drivers/hwmon/ads7828.c
@@ -139,8 +139,7 @@ static int ads7828_probe(struct i2c_client *client)
 	}
 
 	if (client->dev.of_node)
-		chip = (enum ads7828_chips)
-			of_device_get_match_data(&client->dev);
+		chip = (uintptr_t)of_device_get_match_data(&client->dev);
 	else
 		chip = i2c_match_id(ads7828_device_ids, client)->driver_data;
 

From 4a2a41dfff69ddc2bf9f1ac76fd7e98ab982f105 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:46 +0200
Subject: [PATCH 1592/2047] hwmon: (ina2xx) fix Wvoid-pointer-to-enum-cast
 warning

'chip' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  ina2xx.c:627:10: error: cast to smaller integer type 'enum ina2xx_ids' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-4-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ina2xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c
index a47973e2d606..d8415d1f21fc 100644
--- a/drivers/hwmon/ina2xx.c
+++ b/drivers/hwmon/ina2xx.c
@@ -624,7 +624,7 @@ static int ina2xx_probe(struct i2c_client *client)
 	enum ina2xx_ids chip;
 
 	if (client->dev.of_node)
-		chip = (enum ina2xx_ids)of_device_get_match_data(&client->dev);
+		chip = (uintptr_t)of_device_get_match_data(&client->dev);
 	else
 		chip = i2c_match_id(ina2xx_id, client)->driver_data;
 

From d96e79d00ffbdac475a46e7e25d201b94e8dcea2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:47 +0200
Subject: [PATCH 1593/2047] hwmon: (lm63) fix Wvoid-pointer-to-enum-cast
 warning

'kind' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  lm63.c:1108:16: error: cast to smaller integer type 'enum chips' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-5-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm63.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c
index 562c94c7d831..0878a044dd8e 100644
--- a/drivers/hwmon/lm63.c
+++ b/drivers/hwmon/lm63.c
@@ -1105,7 +1105,7 @@ static int lm63_probe(struct i2c_client *client)
 
 	/* Set the device type */
 	if (client->dev.of_node)
-		data->kind = (enum chips)of_device_get_match_data(&client->dev);
+		data->kind = (uintptr_t)of_device_get_match_data(&client->dev);
 	else
 		data->kind = i2c_match_id(lm63_id, client)->driver_data;
 	if (data->kind == lm64)

From c7e07faa48512f4beafef54b144a1e6df25c727f Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:48 +0200
Subject: [PATCH 1594/2047] hwmon: (lm75) fix Wvoid-pointer-to-enum-cast
 warning

'kind' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  lm75.c:581:10: error: cast to smaller integer type 'enum lm75_type' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-6-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm75.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index bcfa8193e5e7..5b2ea05c951e 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -578,7 +578,7 @@ static int lm75_probe(struct i2c_client *client)
 	enum lm75_type kind;
 
 	if (client->dev.of_node)
-		kind = (enum lm75_type)of_device_get_match_data(&client->dev);
+		kind = (uintptr_t)of_device_get_match_data(&client->dev);
 	else
 		kind = i2c_match_id(lm75_ids, client)->driver_data;
 

From 8d84910a27ddd5c4a83df712424b7cab4ed95d3d Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:49 +0200
Subject: [PATCH 1595/2047] hwmon: (lm85) fix Wvoid-pointer-to-enum-cast
 warning

'type' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  lm85.c:1562:16: error: cast to smaller integer type 'enum chips' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-7-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm85.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index 2a62ea7b25a9..68c210002357 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -1559,7 +1559,7 @@ static int lm85_probe(struct i2c_client *client)
 
 	data->client = client;
 	if (client->dev.of_node)
-		data->type = (enum chips)of_device_get_match_data(&client->dev);
+		data->type = (uintptr_t)of_device_get_match_data(&client->dev);
 	else
 		data->type = i2c_match_id(lm85_id, client)->driver_data;
 	mutex_init(&data->update_lock);

From 1ef2ebf2631221cf6ee975be81e95cb512f43346 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:50 +0200
Subject: [PATCH 1596/2047] hwmon: (lm90) fix Wvoid-pointer-to-enum-cast
 warning

'kind' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  lm90.c:2768:16: error: cast to smaller integer type 'enum chips' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-8-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm90.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index b25ae8b7ec90..e0d7454a301c 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -2765,7 +2765,7 @@ static int lm90_probe(struct i2c_client *client)
 
 	/* Set the device type */
 	if (client->dev.of_node)
-		data->kind = (enum chips)of_device_get_match_data(&client->dev);
+		data->kind = (uintptr_t)of_device_get_match_data(&client->dev);
 	else
 		data->kind = i2c_match_id(lm90_id, client)->driver_data;
 

From 026738ecd077c83737ec87ecda2fb350c2bcebae Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:51 +0200
Subject: [PATCH 1597/2047] hwmon: (max20730) fix Wvoid-pointer-to-enum-cast
 warning

'chip_id' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  max20730.c:719:13: error: cast to smaller integer type 'enum chips' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-9-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/max20730.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/max20730.c b/drivers/hwmon/pmbus/max20730.c
index 6df4c5b75bdc..d56ec24764fd 100644
--- a/drivers/hwmon/pmbus/max20730.c
+++ b/drivers/hwmon/pmbus/max20730.c
@@ -716,7 +716,7 @@ static int max20730_probe(struct i2c_client *client)
 	}
 
 	if (client->dev.of_node)
-		chip_id = (enum chips)of_device_get_match_data(dev);
+		chip_id = (uintptr_t)of_device_get_match_data(dev);
 	else
 		chip_id = i2c_match_id(max20730_id, client)->driver_data;
 

From d6f09471b331e1f8bf5eb8f1ecde4f62ddee8352 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:52 +0200
Subject: [PATCH 1598/2047] hwmon: (max6697) fix Wvoid-pointer-to-enum-cast
 warning

'type' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  max6697.c:705:16: error: cast to smaller integer type 'enum chips' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-10-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/max6697.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/max6697.c b/drivers/hwmon/max6697.c
index fe826fcf9990..7d10dd434f2e 100644
--- a/drivers/hwmon/max6697.c
+++ b/drivers/hwmon/max6697.c
@@ -702,7 +702,7 @@ static int max6697_probe(struct i2c_client *client)
 		return -ENOMEM;
 
 	if (client->dev.of_node)
-		data->type = (enum chips)of_device_get_match_data(&client->dev);
+		data->type = (uintptr_t)of_device_get_match_data(&client->dev);
 	else
 		data->type = i2c_match_id(max6697_id, client)->driver_data;
 	data->chip = &max6697_chip_data[data->type];

From 58aec51df895c456b6057531e9ee959131b7efda Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:53 +0200
Subject: [PATCH 1599/2047] hwmon: (tmp513) fix Wvoid-pointer-to-enum-cast
 warning

'id' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  tmp513.c:724:14: error: cast to smaller integer type 'enum tmp51x_ids' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-11-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/tmp513.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/tmp513.c b/drivers/hwmon/tmp513.c
index bff10f4b56e1..7db5d0fc24a4 100644
--- a/drivers/hwmon/tmp513.c
+++ b/drivers/hwmon/tmp513.c
@@ -721,7 +721,7 @@ static int tmp51x_probe(struct i2c_client *client)
 		return -ENOMEM;
 
 	if (client->dev.of_node)
-		data->id = (enum tmp51x_ids)device_get_match_data(&client->dev);
+		data->id = (uintptr_t)device_get_match_data(&client->dev);
 	else
 		data->id = i2c_match_id(tmp51x_id, client)->driver_data;
 

From 45f980cae3224022f9a1827d57b6732302c0734a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:54 +0200
Subject: [PATCH 1600/2047] hwmon: (pmbus/ibm-cffps) fix
 Wvoid-pointer-to-enum-cast warning

'vs' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  ibm-cffps.c:492:8: error: cast to smaller integer type 'enum versions' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-12-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/ibm-cffps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/ibm-cffps.c b/drivers/hwmon/pmbus/ibm-cffps.c
index 5b11aacda4d2..1ba4c5e95820 100644
--- a/drivers/hwmon/pmbus/ibm-cffps.c
+++ b/drivers/hwmon/pmbus/ibm-cffps.c
@@ -489,7 +489,7 @@ static int ibm_cffps_probe(struct i2c_client *client)
 	const struct i2c_device_id *id;
 
 	if (md) {
-		vs = (enum versions)md;
+		vs = (uintptr_t)md;
 	} else {
 		id = i2c_match_id(ibm_cffps_id, client);
 		if (id)

From 30572c7b086745c3411c1a580319a99eda7a7a56 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:55 +0200
Subject: [PATCH 1601/2047] hwmon: (pmbus/tps53679) fix
 Wvoid-pointer-to-enum-cast warning

'chip_id' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  tps53679.c:238:13: error: cast to smaller integer type 'enum chips' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-13-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/tps53679.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/tps53679.c b/drivers/hwmon/pmbus/tps53679.c
index e77b12f342b4..5c9466244d70 100644
--- a/drivers/hwmon/pmbus/tps53679.c
+++ b/drivers/hwmon/pmbus/tps53679.c
@@ -235,7 +235,7 @@ static int tps53679_probe(struct i2c_client *client)
 	enum chips chip_id;
 
 	if (dev->of_node)
-		chip_id = (enum chips)of_device_get_match_data(dev);
+		chip_id = (uintptr_t)of_device_get_match_data(dev);
 	else
 		chip_id = i2c_match_id(tps53679_id, client)->driver_data;
 

From d29b763c0a047c8b31d0e63575b2a2c4c498214a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:56 +0200
Subject: [PATCH 1602/2047] hwmon: (pmbus/ucd9000) fix
 Wvoid-pointer-to-enum-cast warning

'chip' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  ucd9000.c:591:10: error: cast to smaller integer type 'enum chips' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-14-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/ucd9000.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
index b1d1d4214e69..8d9d422450e5 100644
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -588,7 +588,7 @@ static int ucd9000_probe(struct i2c_client *client)
 	}
 
 	if (client->dev.of_node)
-		chip = (enum chips)of_device_get_match_data(&client->dev);
+		chip = (uintptr_t)of_device_get_match_data(&client->dev);
 	else
 		chip = mid->driver_data;
 

From 1030892c4427e503b32ec46f88179e9138cdbf4e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 10 Aug 2023 11:31:57 +0200
Subject: [PATCH 1603/2047] hwmon: (pmbus/ucd9200) fix
 Wvoid-pointer-to-enum-cast warning

'chip' is an enum, thus cast of pointer on 64-bit compile test with W=1
causes:

  ucd9200.c:106:10: error: cast to smaller integer type 'enum chips' from 'const void *' [-Werror,-Wvoid-pointer-to-enum-cast]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230810093157.94244-15-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/ucd9200.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/ucd9200.c b/drivers/hwmon/pmbus/ucd9200.c
index e4aad64b2d94..7920d1c06df0 100644
--- a/drivers/hwmon/pmbus/ucd9200.c
+++ b/drivers/hwmon/pmbus/ucd9200.c
@@ -103,7 +103,7 @@ static int ucd9200_probe(struct i2c_client *client)
 	}
 
 	if (client->dev.of_node)
-		chip = (enum chips)of_device_get_match_data(&client->dev);
+		chip = (uintptr_t)of_device_get_match_data(&client->dev);
 	else
 		chip = mid->driver_data;
 

From 7d9be29d8382030266c88a732a82d3bbc6fb0c5f Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Thu, 10 Aug 2023 14:26:35 +0800
Subject: [PATCH 1604/2047] hwmon: (nsa320-hwmon) Remove redundant
 of_match_ptr()

The driver depends on CONFIG_OF, it is not necessary to use
of_match_ptr() here.

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Link: https://lore.kernel.org/r/20230810062635.1947552-1-ruanjinjie@huawei.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/nsa320-hwmon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/nsa320-hwmon.c b/drivers/hwmon/nsa320-hwmon.c
index e26334469549..18076ba7fc14 100644
--- a/drivers/hwmon/nsa320-hwmon.c
+++ b/drivers/hwmon/nsa320-hwmon.c
@@ -191,7 +191,7 @@ static struct platform_driver nsa320_hwmon_driver = {
 	.probe = nsa320_hwmon_probe,
 	.driver = {
 		.name = "nsa320-hwmon",
-		.of_match_table = of_match_ptr(of_nsa320_hwmon_match),
+		.of_match_table = of_nsa320_hwmon_match,
 	},
 };
 

From 14cf45f2f2d56adc061f7b8c9104b56db8d8cb60 Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Thu, 17 Aug 2023 11:25:24 +0200
Subject: [PATCH 1605/2047] hwmon: pmbus: Reduce clear fault page invocations

Observing I2C traffic revealed consecutive transmission of CLEAR_FAULT
commands. While this doesn't cause issues, it extends driver probe time.

Avoid invoking pmbus_clear_fault_page for virtual registers, as they're
managed by the driver, not the chip.

TEST: Verified using an I2C bus analyzer that only one CLEAR_FAULT
      command is send instead 5 in a row.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230817092527.808631-1-Naresh.Solanki@9elements.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index 69a4e62b6c8d..cbfabdd69056 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -561,7 +561,8 @@ static bool pmbus_check_register(struct i2c_client *client,
 		rv = pmbus_check_status_cml(client);
 	if (rv < 0 && (data->flags & PMBUS_READ_STATUS_AFTER_FAILED_CHECK))
 		data->read_status(client, -1);
-	pmbus_clear_fault_page(client, -1);
+	if (reg < PMBUS_VIRT_BASE)
+		pmbus_clear_fault_page(client, -1);
 	return rv >= 0;
 }
 

From 3fd2188e588ffd8d45df5ac92ab1089328211a86 Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Thu, 17 Aug 2023 11:25:25 +0200
Subject: [PATCH 1606/2047] hwmon: pmbus: Drop unnecessary clear fault page

The pmbus_check_byte_register function already calls clear
fault page, so there's no need to do it again in
pmbus_identify_common.

TEST: Verified using an I2C bus analyser to confirm that only
      one clear fault page is being send instead of two in a row.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230817092527.808631-2-Naresh.Solanki@9elements.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus_core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index cbfabdd69056..1363d9f89181 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -2541,7 +2541,6 @@ static int pmbus_identify_common(struct i2c_client *client,
 		}
 	}
 
-	pmbus_clear_fault_page(client, page);
 	return 0;
 }
 

From e7593bda6a2e2e46521567f0ddea940f4acda0db Mon Sep 17 00:00:00 2001
From: Patrick Rudolph <patrick.rudolph@9elements.com>
Date: Thu, 17 Aug 2023 11:25:26 +0200
Subject: [PATCH 1607/2047] hwmon: pmbus: Fix -EIO seen on pli1209

After doing performance optimizations the pli1209 driver failed to
probe with a probabilty of 2%. It wasn't able to read the PMBUS_OPERATION
register due to an -EIO error.

An investigation showed that the PLI1209 takes 230 usec to execute the
CLEAR_FAULTS command. During that time it's busy and NACKs all requests
on the SMBUS interface. It also NACKs reads on PMBUS_STATUS_BYTE
making it impossible to poll the BUSY flag.

Add a custom write_data function to just wait for not BUSY unconditionally
after sending a CLEAR_FAULTS command.

TEST: Verified using an I2C bus analyser that no more NACKs are seen after
      sending a CLEAR_FAULTS command.

Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230817092527.808631-3-Naresh.Solanki@9elements.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pli1209bc.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/hwmon/pmbus/pli1209bc.c b/drivers/hwmon/pmbus/pli1209bc.c
index 7d8bd3167b21..c95433790b11 100644
--- a/drivers/hwmon/pmbus/pli1209bc.c
+++ b/drivers/hwmon/pmbus/pli1209bc.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2022 9elements GmbH
  */
 
+#include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/pmbus.h>
@@ -53,6 +54,30 @@ static int pli1209bc_read_word_data(struct i2c_client *client, int page,
 	}
 }
 
+static int pli1209bc_write_byte(struct i2c_client *client, int page, u8 reg)
+{
+	int ret;
+
+	switch (reg) {
+	case PMBUS_CLEAR_FAULTS:
+		ret = pmbus_write_byte(client, page, reg);
+		/*
+		 * PLI1209 takes 230 usec to execute the CLEAR_FAULTS command.
+		 * During that time it's busy and NACKs all requests on the
+		 * SMBUS interface. It also NACKs reads on PMBUS_STATUS_BYTE
+		 * making it impossible to poll the BUSY flag.
+		 *
+		 * Just wait for not BUSY unconditionally.
+		 */
+		usleep_range(250, 300);
+		break;
+	default:
+		ret = -ENODATA;
+		break;
+	}
+	return ret;
+}
+
 #if IS_ENABLED(CONFIG_SENSORS_PLI1209BC_REGULATOR)
 static const struct regulator_desc pli1209bc_reg_desc = {
 	.name = "vout2",
@@ -102,6 +127,7 @@ static struct pmbus_driver_info pli1209bc_info = {
 	    | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP
 	    | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_STATUS_INPUT,
 	.read_word_data = pli1209bc_read_word_data,
+	.write_byte = pli1209bc_write_byte,
 #if IS_ENABLED(CONFIG_SENSORS_PLI1209BC_REGULATOR)
 	.num_regulators = 1,
 	.reg_desc = &pli1209bc_reg_desc,

From 08e6c5038fee470d9f240c12dcf69927e204a56b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:32 +0800
Subject: [PATCH 1608/2047] spi: orion: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-2-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-orion.c | 86 ++++++++++++++++++++---------------------
 1 file changed, 43 insertions(+), 43 deletions(-)

diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index faf2764b91a8..1f10f5c8e34d 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c
@@ -90,7 +90,7 @@ struct orion_child_options {
 };
 
 struct orion_spi {
-	struct spi_master	*master;
+	struct spi_controller	*host;
 	void __iomem		*base;
 	struct clk              *clk;
 	struct clk              *axi_clk;
@@ -141,7 +141,7 @@ static int orion_spi_baudrate_set(struct spi_device *spi, unsigned int speed)
 	struct orion_spi *orion_spi;
 	const struct orion_spi_dev *devdata;
 
-	orion_spi = spi_master_get_devdata(spi->master);
+	orion_spi = spi_controller_get_devdata(spi->controller);
 	devdata = orion_spi->devdata;
 
 	tclk_hz = clk_get_rate(orion_spi->clk);
@@ -235,7 +235,7 @@ orion_spi_mode_set(struct spi_device *spi)
 	u32 reg;
 	struct orion_spi *orion_spi;
 
-	orion_spi = spi_master_get_devdata(spi->master);
+	orion_spi = spi_controller_get_devdata(spi->controller);
 
 	reg = readl(spi_reg(orion_spi, ORION_SPI_IF_CONFIG_REG));
 	reg &= ~ORION_SPI_MODE_MASK;
@@ -257,7 +257,7 @@ orion_spi_50mhz_ac_timing_erratum(struct spi_device *spi, unsigned int speed)
 	u32 reg;
 	struct orion_spi *orion_spi;
 
-	orion_spi = spi_master_get_devdata(spi->master);
+	orion_spi = spi_controller_get_devdata(spi->controller);
 
 	/*
 	 * Erratum description: (Erratum NO. FE-9144572) The device
@@ -297,7 +297,7 @@ orion_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
 	unsigned int bits_per_word = spi->bits_per_word;
 	int	rc;
 
-	orion_spi = spi_master_get_devdata(spi->master);
+	orion_spi = spi_controller_get_devdata(spi->controller);
 
 	if ((t != NULL) && t->speed_hz)
 		speed = t->speed_hz;
@@ -330,7 +330,7 @@ static void orion_spi_set_cs(struct spi_device *spi, bool enable)
 	void __iomem *ctrl_reg;
 	u32 val;
 
-	orion_spi = spi_master_get_devdata(spi->master);
+	orion_spi = spi_controller_get_devdata(spi->controller);
 	ctrl_reg = spi_reg(orion_spi, ORION_SPI_IF_CTRL_REG);
 
 	val = readl(ctrl_reg);
@@ -388,7 +388,7 @@ orion_spi_write_read_8bit(struct spi_device *spi,
 
 	cs_single_byte = spi->mode & SPI_CS_WORD;
 
-	orion_spi = spi_master_get_devdata(spi->master);
+	orion_spi = spi_controller_get_devdata(spi->controller);
 
 	if (cs_single_byte)
 		orion_spi_set_cs(spi, 0);
@@ -439,7 +439,7 @@ orion_spi_write_read_16bit(struct spi_device *spi,
 		return -1;
 	}
 
-	orion_spi = spi_master_get_devdata(spi->master);
+	orion_spi = spi_controller_get_devdata(spi->controller);
 	tx_reg = spi_reg(orion_spi, ORION_SPI_DATA_OUT_REG);
 	rx_reg = spi_reg(orion_spi, ORION_SPI_DATA_IN_REG);
 	int_reg = spi_reg(orion_spi, ORION_SPI_INT_CAUSE_REG);
@@ -475,7 +475,7 @@ orion_spi_write_read(struct spi_device *spi, struct spi_transfer *xfer)
 	word_len = spi->bits_per_word;
 	count = xfer->len;
 
-	orion_spi = spi_master_get_devdata(spi->master);
+	orion_spi = spi_controller_get_devdata(spi->controller);
 
 	/*
 	 * Use SPI direct write mode if base address is available
@@ -528,7 +528,7 @@ out:
 	return xfer->len - count;
 }
 
-static int orion_spi_transfer_one(struct spi_master *master,
+static int orion_spi_transfer_one(struct spi_controller *host,
 					struct spi_device *spi,
 					struct spi_transfer *t)
 {
@@ -548,7 +548,7 @@ static int orion_spi_setup(struct spi_device *spi)
 {
 	int ret;
 #ifdef CONFIG_PM
-	struct orion_spi *orion_spi = spi_master_get_devdata(spi->master);
+	struct orion_spi *orion_spi = spi_controller_get_devdata(spi->controller);
 	struct device *dev = orion_spi->dev;
 
 	orion_spi_runtime_resume(dev);
@@ -644,44 +644,44 @@ MODULE_DEVICE_TABLE(of, orion_spi_of_match_table);
 static int orion_spi_probe(struct platform_device *pdev)
 {
 	const struct orion_spi_dev *devdata;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct orion_spi *spi;
 	struct resource *r;
 	unsigned long tclk_hz;
 	int status = 0;
 	struct device_node *np;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*spi));
-	if (master == NULL) {
-		dev_dbg(&pdev->dev, "master allocation failed\n");
+	host = spi_alloc_host(&pdev->dev, sizeof(*spi));
+	if (host == NULL) {
+		dev_dbg(&pdev->dev, "host allocation failed\n");
 		return -ENOMEM;
 	}
 
 	if (pdev->id != -1)
-		master->bus_num = pdev->id;
+		host->bus_num = pdev->id;
 	if (pdev->dev.of_node) {
 		u32 cell_index;
 
 		if (!of_property_read_u32(pdev->dev.of_node, "cell-index",
 					  &cell_index))
-			master->bus_num = cell_index;
+			host->bus_num = cell_index;
 	}
 
 	/* we support all 4 SPI modes and LSB first option */
-	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_LSB_FIRST | SPI_CS_WORD;
-	master->set_cs = orion_spi_set_cs;
-	master->transfer_one = orion_spi_transfer_one;
-	master->num_chipselect = ORION_NUM_CHIPSELECTS;
-	master->setup = orion_spi_setup;
-	master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
-	master->auto_runtime_pm = true;
-	master->use_gpio_descriptors = true;
-	master->flags = SPI_CONTROLLER_GPIO_SS;
+	host->mode_bits = SPI_CPHA | SPI_CPOL | SPI_LSB_FIRST | SPI_CS_WORD;
+	host->set_cs = orion_spi_set_cs;
+	host->transfer_one = orion_spi_transfer_one;
+	host->num_chipselect = ORION_NUM_CHIPSELECTS;
+	host->setup = orion_spi_setup;
+	host->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
+	host->auto_runtime_pm = true;
+	host->use_gpio_descriptors = true;
+	host->flags = SPI_CONTROLLER_GPIO_SS;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
-	spi = spi_master_get_devdata(master);
-	spi->master = master;
+	spi = spi_controller_get_devdata(host);
+	spi->host = host;
 	spi->dev = &pdev->dev;
 
 	devdata = device_get_match_data(&pdev->dev);
@@ -718,14 +718,14 @@ static int orion_spi_probe(struct platform_device *pdev)
 	 */
 	if (of_device_is_compatible(pdev->dev.of_node,
 					"marvell,armada-370-spi"))
-		master->max_speed_hz = min(devdata->max_hz,
+		host->max_speed_hz = min(devdata->max_hz,
 				DIV_ROUND_UP(tclk_hz, devdata->min_divisor));
 	else if (devdata->min_divisor)
-		master->max_speed_hz =
+		host->max_speed_hz =
 			DIV_ROUND_UP(tclk_hz, devdata->min_divisor);
 	else
-		master->max_speed_hz = devdata->max_hz;
-	master->min_speed_hz = DIV_ROUND_UP(tclk_hz, devdata->max_divisor);
+		host->max_speed_hz = devdata->max_hz;
+	host->min_speed_hz = DIV_ROUND_UP(tclk_hz, devdata->max_divisor);
 
 	spi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &r);
 	if (IS_ERR(spi->base)) {
@@ -784,8 +784,8 @@ static int orion_spi_probe(struct platform_device *pdev)
 	if (status < 0)
 		goto out_rel_pm;
 
-	master->dev.of_node = pdev->dev.of_node;
-	status = spi_register_master(master);
+	host->dev.of_node = pdev->dev.of_node;
+	status = spi_register_controller(host);
 	if (status < 0)
 		goto out_rel_pm;
 
@@ -798,21 +798,21 @@ out_rel_axi_clk:
 out_rel_clk:
 	clk_disable_unprepare(spi->clk);
 out:
-	spi_master_put(master);
+	spi_controller_put(host);
 	return status;
 }
 
 
 static void orion_spi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct orion_spi *spi = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct orion_spi *spi = spi_controller_get_devdata(host);
 
 	pm_runtime_get_sync(&pdev->dev);
 	clk_disable_unprepare(spi->axi_clk);
 	clk_disable_unprepare(spi->clk);
 
-	spi_unregister_master(master);
+	spi_unregister_controller(host);
 	pm_runtime_disable(&pdev->dev);
 }
 
@@ -821,8 +821,8 @@ MODULE_ALIAS("platform:" DRIVER_NAME);
 #ifdef CONFIG_PM
 static int orion_spi_runtime_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct orion_spi *spi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct orion_spi *spi = spi_controller_get_devdata(host);
 
 	clk_disable_unprepare(spi->axi_clk);
 	clk_disable_unprepare(spi->clk);
@@ -831,8 +831,8 @@ static int orion_spi_runtime_suspend(struct device *dev)
 
 static int orion_spi_runtime_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct orion_spi *spi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct orion_spi *spi = spi_controller_get_devdata(host);
 
 	if (!IS_ERR(spi->axi_clk))
 		clk_prepare_enable(spi->axi_clk);

From f9977bb164a26d3bec31cdaf04865accf97e4353 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:33 +0800
Subject: [PATCH 1609/2047] spi: mchp-pci1xxxx: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-3-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pci1xxxx.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
index d23c42839da1..3638e974f5d4 100644
--- a/drivers/spi/spi-pci1xxxx.c
+++ b/drivers/spi/spi-pci1xxxx.c
@@ -65,7 +65,7 @@ struct pci1xxxx_spi_internal {
 	bool spi_xfer_in_progress;
 	int irq;
 	struct completion spi_xfer_done;
-	struct spi_master *spi_host;
+	struct spi_controller *spi_host;
 	struct pci1xxxx_spi *parent;
 	struct {
 		unsigned int dev_sel : 3;
@@ -250,7 +250,7 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
 	struct pci1xxxx_spi_internal *spi_sub_ptr;
 	struct device *dev = &pdev->dev;
 	struct pci1xxxx_spi *spi_bus;
-	struct spi_master *spi_host;
+	struct spi_controller *spi_host;
 	u32 regval;
 	int ret;
 
@@ -276,7 +276,7 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
 						      sizeof(struct pci1xxxx_spi_internal),
 						      GFP_KERNEL);
 		spi_sub_ptr = spi_bus->spi_int[iter];
-		spi_sub_ptr->spi_host = devm_spi_alloc_master(dev, sizeof(struct spi_master));
+		spi_sub_ptr->spi_host = devm_spi_alloc_host(dev, sizeof(struct spi_controller));
 		if (!spi_sub_ptr->spi_host)
 			return -ENOMEM;
 
@@ -366,8 +366,8 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
 		spi_host->max_speed_hz = PCI1XXXX_SPI_MAX_CLOCK_HZ;
 		spi_host->min_speed_hz = PCI1XXXX_SPI_MIN_CLOCK_HZ;
 		spi_host->flags = SPI_CONTROLLER_MUST_TX;
-		spi_master_set_devdata(spi_host, spi_sub_ptr);
-		ret = devm_spi_register_master(dev, spi_host);
+		spi_controller_set_devdata(spi_host, spi_sub_ptr);
+		ret = devm_spi_register_controller(dev, spi_host);
 		if (ret)
 			goto error;
 	}
@@ -415,7 +415,7 @@ static int pci1xxxx_spi_resume(struct device *dev)
 
 	for (iter = 0; iter < spi_ptr->total_hw_instances; iter++) {
 		spi_sub_ptr = spi_ptr->spi_int[iter];
-		spi_master_resume(spi_sub_ptr->spi_host);
+		spi_controller_resume(spi_sub_ptr->spi_host);
 		writel(regval, spi_ptr->reg_base +
 		       SPI_MST_EVENT_MASK_REG_OFFSET(iter));
 
@@ -441,7 +441,7 @@ static int pci1xxxx_spi_suspend(struct device *dev)
 
 		/* Store existing config before suspend */
 		store_restore_config(spi_ptr, spi_sub_ptr, iter, 1);
-		spi_master_suspend(spi_sub_ptr->spi_host);
+		spi_controller_suspend(spi_sub_ptr->spi_host);
 		writel(reg1, spi_ptr->reg_base +
 		       SPI_MST_EVENT_MASK_REG_OFFSET(iter));
 	}

From cc64ab491b4236cbeb8aabf2cd41db4499a0745b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:34 +0800
Subject: [PATCH 1610/2047] spi: pic32-sqi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-4-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pic32-sqi.c | 84 ++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/drivers/spi/spi-pic32-sqi.c b/drivers/spi/spi-pic32-sqi.c
index 5cbebcf26a2a..883354d0ff52 100644
--- a/drivers/spi/spi-pic32-sqi.c
+++ b/drivers/spi/spi-pic32-sqi.c
@@ -139,7 +139,7 @@ struct pic32_sqi {
 	void __iomem		*regs;
 	struct clk		*sys_clk;
 	struct clk		*base_clk; /* drives spi clock */
-	struct spi_master	*master;
+	struct spi_controller	*host;
 	int			irq;
 	struct completion	xfer_done;
 	struct ring_desc	*ring;
@@ -316,9 +316,9 @@ static int pic32_sqi_one_transfer(struct pic32_sqi *sqi,
 	return 0;
 }
 
-static int pic32_sqi_prepare_hardware(struct spi_master *master)
+static int pic32_sqi_prepare_hardware(struct spi_controller *host)
 {
-	struct pic32_sqi *sqi = spi_master_get_devdata(master);
+	struct pic32_sqi *sqi = spi_controller_get_devdata(host);
 
 	/* enable spi interface */
 	pic32_setbits(sqi->regs + PESQI_CONF_REG, PESQI_EN);
@@ -328,7 +328,7 @@ static int pic32_sqi_prepare_hardware(struct spi_master *master)
 	return 0;
 }
 
-static bool pic32_sqi_can_dma(struct spi_master *master,
+static bool pic32_sqi_can_dma(struct spi_controller *host,
 			      struct spi_device *spi,
 			      struct spi_transfer *x)
 {
@@ -336,7 +336,7 @@ static bool pic32_sqi_can_dma(struct spi_master *master,
 	return true;
 }
 
-static int pic32_sqi_one_message(struct spi_master *master,
+static int pic32_sqi_one_message(struct spi_controller *host,
 				 struct spi_message *msg)
 {
 	struct spi_device *spi = msg->spi;
@@ -347,7 +347,7 @@ static int pic32_sqi_one_message(struct spi_master *master,
 	unsigned long timeout;
 	u32 val;
 
-	sqi = spi_master_get_devdata(master);
+	sqi = spi_controller_get_devdata(host);
 
 	reinit_completion(&sqi->xfer_done);
 	msg->actual_length = 0;
@@ -412,7 +412,7 @@ static int pic32_sqi_one_message(struct spi_master *master,
 	/* wait for xfer completion */
 	timeout = wait_for_completion_timeout(&sqi->xfer_done, 5 * HZ);
 	if (timeout == 0) {
-		dev_err(&sqi->master->dev, "wait timedout/interrupted\n");
+		dev_err(&sqi->host->dev, "wait timedout/interrupted\n");
 		ret = -ETIMEDOUT;
 		msg->status = ret;
 	} else {
@@ -434,14 +434,14 @@ xfer_out:
 		/* release ring descr */
 		ring_desc_put(sqi, rdesc);
 	}
-	spi_finalize_current_message(spi->master);
+	spi_finalize_current_message(spi->controller);
 
 	return ret;
 }
 
-static int pic32_sqi_unprepare_hardware(struct spi_master *master)
+static int pic32_sqi_unprepare_hardware(struct spi_controller *host)
 {
-	struct pic32_sqi *sqi = spi_master_get_devdata(master);
+	struct pic32_sqi *sqi = spi_controller_get_devdata(host);
 
 	/* disable clk */
 	pic32_clrbits(sqi->regs + PESQI_CLK_CTRL_REG, PESQI_CLK_EN);
@@ -458,18 +458,18 @@ static int ring_desc_ring_alloc(struct pic32_sqi *sqi)
 	int i;
 
 	/* allocate coherent DMAable memory for hardware buffer descriptors. */
-	sqi->bd = dma_alloc_coherent(&sqi->master->dev,
+	sqi->bd = dma_alloc_coherent(&sqi->host->dev,
 				     sizeof(*bd) * PESQI_BD_COUNT,
 				     &sqi->bd_dma, GFP_KERNEL);
 	if (!sqi->bd) {
-		dev_err(&sqi->master->dev, "failed allocating dma buffer\n");
+		dev_err(&sqi->host->dev, "failed allocating dma buffer\n");
 		return -ENOMEM;
 	}
 
 	/* allocate software ring descriptors */
 	sqi->ring = kcalloc(PESQI_BD_COUNT, sizeof(*rdesc), GFP_KERNEL);
 	if (!sqi->ring) {
-		dma_free_coherent(&sqi->master->dev,
+		dma_free_coherent(&sqi->host->dev,
 				  sizeof(*bd) * PESQI_BD_COUNT,
 				  sqi->bd, sqi->bd_dma);
 		return -ENOMEM;
@@ -498,7 +498,7 @@ static int ring_desc_ring_alloc(struct pic32_sqi *sqi)
 
 static void ring_desc_ring_free(struct pic32_sqi *sqi)
 {
-	dma_free_coherent(&sqi->master->dev,
+	dma_free_coherent(&sqi->host->dev,
 			  sizeof(struct buf_desc) * PESQI_BD_COUNT,
 			  sqi->bd, sqi->bd_dma);
 	kfree(sqi->ring);
@@ -568,28 +568,28 @@ static void pic32_sqi_hw_init(struct pic32_sqi *sqi)
 
 static int pic32_sqi_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct pic32_sqi *sqi;
 	int ret;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*sqi));
-	if (!master)
+	host = spi_alloc_host(&pdev->dev, sizeof(*sqi));
+	if (!host)
 		return -ENOMEM;
 
-	sqi = spi_master_get_devdata(master);
-	sqi->master = master;
+	sqi = spi_controller_get_devdata(host);
+	sqi->host = host;
 
 	sqi->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(sqi->regs)) {
 		ret = PTR_ERR(sqi->regs);
-		goto err_free_master;
+		goto err_free_host;
 	}
 
 	/* irq */
 	sqi->irq = platform_get_irq(pdev, 0);
 	if (sqi->irq < 0) {
 		ret = sqi->irq;
-		goto err_free_master;
+		goto err_free_host;
 	}
 
 	/* clocks */
@@ -597,27 +597,27 @@ static int pic32_sqi_probe(struct platform_device *pdev)
 	if (IS_ERR(sqi->sys_clk)) {
 		ret = PTR_ERR(sqi->sys_clk);
 		dev_err(&pdev->dev, "no sys_clk ?\n");
-		goto err_free_master;
+		goto err_free_host;
 	}
 
 	sqi->base_clk = devm_clk_get(&pdev->dev, "spi_ck");
 	if (IS_ERR(sqi->base_clk)) {
 		ret = PTR_ERR(sqi->base_clk);
 		dev_err(&pdev->dev, "no base clk ?\n");
-		goto err_free_master;
+		goto err_free_host;
 	}
 
 	ret = clk_prepare_enable(sqi->sys_clk);
 	if (ret) {
 		dev_err(&pdev->dev, "sys clk enable failed\n");
-		goto err_free_master;
+		goto err_free_host;
 	}
 
 	ret = clk_prepare_enable(sqi->base_clk);
 	if (ret) {
 		dev_err(&pdev->dev, "base clk enable failed\n");
 		clk_disable_unprepare(sqi->sys_clk);
-		goto err_free_master;
+		goto err_free_host;
 	}
 
 	init_completion(&sqi->xfer_done);
@@ -640,24 +640,24 @@ static int pic32_sqi_probe(struct platform_device *pdev)
 		goto err_free_ring;
 	}
 
-	/* register master */
-	master->num_chipselect	= 2;
-	master->max_speed_hz	= clk_get_rate(sqi->base_clk);
-	master->dma_alignment	= 32;
-	master->max_dma_len	= PESQI_BD_BUF_LEN_MAX;
-	master->dev.of_node	= pdev->dev.of_node;
-	master->mode_bits	= SPI_MODE_3 | SPI_MODE_0 | SPI_TX_DUAL |
+	/* register host */
+	host->num_chipselect	= 2;
+	host->max_speed_hz	= clk_get_rate(sqi->base_clk);
+	host->dma_alignment	= 32;
+	host->max_dma_len	= PESQI_BD_BUF_LEN_MAX;
+	host->dev.of_node	= pdev->dev.of_node;
+	host->mode_bits		= SPI_MODE_3 | SPI_MODE_0 | SPI_TX_DUAL |
 				  SPI_RX_DUAL | SPI_TX_QUAD | SPI_RX_QUAD;
-	master->flags		= SPI_CONTROLLER_HALF_DUPLEX;
-	master->can_dma		= pic32_sqi_can_dma;
-	master->bits_per_word_mask	= SPI_BPW_RANGE_MASK(8, 32);
-	master->transfer_one_message	= pic32_sqi_one_message;
-	master->prepare_transfer_hardware	= pic32_sqi_prepare_hardware;
-	master->unprepare_transfer_hardware	= pic32_sqi_unprepare_hardware;
+	host->flags		= SPI_CONTROLLER_HALF_DUPLEX;
+	host->can_dma		= pic32_sqi_can_dma;
+	host->bits_per_word_mask	= SPI_BPW_RANGE_MASK(8, 32);
+	host->transfer_one_message	= pic32_sqi_one_message;
+	host->prepare_transfer_hardware	= pic32_sqi_prepare_hardware;
+	host->unprepare_transfer_hardware	= pic32_sqi_unprepare_hardware;
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	ret = devm_spi_register_controller(&pdev->dev, host);
 	if (ret) {
-		dev_err(&master->dev, "failed registering spi master\n");
+		dev_err(&host->dev, "failed registering spi host\n");
 		free_irq(sqi->irq, sqi);
 		goto err_free_ring;
 	}
@@ -673,8 +673,8 @@ err_disable_clk:
 	clk_disable_unprepare(sqi->base_clk);
 	clk_disable_unprepare(sqi->sys_clk);
 
-err_free_master:
-	spi_master_put(master);
+err_free_host:
+	spi_controller_put(host);
 	return ret;
 }
 

From 0273727c3bae812e12d9bc32830af9d7b3767d3b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:35 +0800
Subject: [PATCH 1611/2047] spi: pic32: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-5-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pic32.c | 152 ++++++++++++++++++++--------------------
 1 file changed, 76 insertions(+), 76 deletions(-)

diff --git a/drivers/spi/spi-pic32.c b/drivers/spi/spi-pic32.c
index e9b4c9cb97fb..52b788dac10a 100644
--- a/drivers/spi/spi-pic32.c
+++ b/drivers/spi/spi-pic32.c
@@ -100,7 +100,7 @@ struct pic32_spi {
 	int			tx_irq;
 	u32			fifo_n_byte; /* FIFO depth in bytes */
 	struct clk		*clk;
-	struct spi_master	*master;
+	struct spi_controller	*host;
 	/* Current controller setting */
 	u32			speed_hz; /* spi-clk rate */
 	u32			mode;
@@ -224,9 +224,9 @@ static void pic32_err_stop(struct pic32_spi *pic32s, const char *msg)
 	disable_irq_nosync(pic32s->tx_irq);
 
 	/* Show err message and abort xfer with err */
-	dev_err(&pic32s->master->dev, "%s\n", msg);
-	if (pic32s->master->cur_msg)
-		pic32s->master->cur_msg->status = -EIO;
+	dev_err(&pic32s->host->dev, "%s\n", msg);
+	if (pic32s->host->cur_msg)
+		pic32s->host->cur_msg->status = -EIO;
 	complete(&pic32s->xfer_done);
 }
 
@@ -250,7 +250,7 @@ static irqreturn_t pic32_spi_fault_irq(int irq, void *dev_id)
 		return IRQ_HANDLED;
 	}
 
-	if (!pic32s->master->cur_msg) {
+	if (!pic32s->host->cur_msg) {
 		pic32_err_stop(pic32s, "err_irq: no mesg");
 		return IRQ_NONE;
 	}
@@ -300,16 +300,16 @@ static void pic32_spi_dma_rx_notify(void *data)
 static int pic32_spi_dma_transfer(struct pic32_spi *pic32s,
 				  struct spi_transfer *xfer)
 {
-	struct spi_master *master = pic32s->master;
+	struct spi_controller *host = pic32s->host;
 	struct dma_async_tx_descriptor *desc_rx;
 	struct dma_async_tx_descriptor *desc_tx;
 	dma_cookie_t cookie;
 	int ret;
 
-	if (!master->dma_rx || !master->dma_tx)
+	if (!host->dma_rx || !host->dma_tx)
 		return -ENODEV;
 
-	desc_rx = dmaengine_prep_slave_sg(master->dma_rx,
+	desc_rx = dmaengine_prep_slave_sg(host->dma_rx,
 					  xfer->rx_sg.sgl,
 					  xfer->rx_sg.nents,
 					  DMA_DEV_TO_MEM,
@@ -319,7 +319,7 @@ static int pic32_spi_dma_transfer(struct pic32_spi *pic32s,
 		goto err_dma;
 	}
 
-	desc_tx = dmaengine_prep_slave_sg(master->dma_tx,
+	desc_tx = dmaengine_prep_slave_sg(host->dma_tx,
 					  xfer->tx_sg.sgl,
 					  xfer->tx_sg.nents,
 					  DMA_MEM_TO_DEV,
@@ -343,13 +343,13 @@ static int pic32_spi_dma_transfer(struct pic32_spi *pic32s,
 	if (ret)
 		goto err_dma_tx;
 
-	dma_async_issue_pending(master->dma_rx);
-	dma_async_issue_pending(master->dma_tx);
+	dma_async_issue_pending(host->dma_rx);
+	dma_async_issue_pending(host->dma_tx);
 
 	return 0;
 
 err_dma_tx:
-	dmaengine_terminate_all(master->dma_rx);
+	dmaengine_terminate_all(host->dma_rx);
 err_dma:
 	return ret;
 }
@@ -357,7 +357,7 @@ err_dma:
 static int pic32_spi_dma_config(struct pic32_spi *pic32s, u32 dma_width)
 {
 	int buf_offset = offsetof(struct pic32_spi_regs, buf);
-	struct spi_master *master = pic32s->master;
+	struct spi_controller *host = pic32s->host;
 	struct dma_slave_config cfg;
 	int ret;
 
@@ -371,16 +371,16 @@ static int pic32_spi_dma_config(struct pic32_spi *pic32s, u32 dma_width)
 	cfg.dst_addr_width = dma_width;
 	/* tx channel */
 	cfg.direction = DMA_MEM_TO_DEV;
-	ret = dmaengine_slave_config(master->dma_tx, &cfg);
+	ret = dmaengine_slave_config(host->dma_tx, &cfg);
 	if (ret) {
-		dev_err(&master->dev, "tx channel setup failed\n");
+		dev_err(&host->dev, "tx channel setup failed\n");
 		return ret;
 	}
 	/* rx channel */
 	cfg.direction = DMA_DEV_TO_MEM;
-	ret = dmaengine_slave_config(master->dma_rx, &cfg);
+	ret = dmaengine_slave_config(host->dma_rx, &cfg);
 	if (ret)
-		dev_err(&master->dev, "rx channel setup failed\n");
+		dev_err(&host->dev, "rx channel setup failed\n");
 
 	return ret;
 }
@@ -430,19 +430,19 @@ static int pic32_spi_set_word_size(struct pic32_spi *pic32s, u8 bits_per_word)
 	return 0;
 }
 
-static int pic32_spi_prepare_hardware(struct spi_master *master)
+static int pic32_spi_prepare_hardware(struct spi_controller *host)
 {
-	struct pic32_spi *pic32s = spi_master_get_devdata(master);
+	struct pic32_spi *pic32s = spi_controller_get_devdata(host);
 
 	pic32_spi_enable(pic32s);
 
 	return 0;
 }
 
-static int pic32_spi_prepare_message(struct spi_master *master,
+static int pic32_spi_prepare_message(struct spi_controller *host,
 				     struct spi_message *msg)
 {
-	struct pic32_spi *pic32s = spi_master_get_devdata(master);
+	struct pic32_spi *pic32s = spi_controller_get_devdata(host);
 	struct spi_device *spi = msg->spi;
 	u32 val;
 
@@ -481,18 +481,18 @@ static int pic32_spi_prepare_message(struct spi_master *master,
 	return 0;
 }
 
-static bool pic32_spi_can_dma(struct spi_master *master,
+static bool pic32_spi_can_dma(struct spi_controller *host,
 			      struct spi_device *spi,
 			      struct spi_transfer *xfer)
 {
-	struct pic32_spi *pic32s = spi_master_get_devdata(master);
+	struct pic32_spi *pic32s = spi_controller_get_devdata(host);
 
 	/* skip using DMA on small size transfer to avoid overhead.*/
 	return (xfer->len >= PIC32_DMA_LEN_MIN) &&
 	       test_bit(PIC32F_DMA_PREP, &pic32s->flags);
 }
 
-static int pic32_spi_one_transfer(struct spi_master *master,
+static int pic32_spi_one_transfer(struct spi_controller *host,
 				  struct spi_device *spi,
 				  struct spi_transfer *transfer)
 {
@@ -501,7 +501,7 @@ static int pic32_spi_one_transfer(struct spi_master *master,
 	unsigned long timeout;
 	int ret;
 
-	pic32s = spi_master_get_devdata(master);
+	pic32s = spi_controller_get_devdata(host);
 
 	/* handle transfer specific word size change */
 	if (transfer->bits_per_word &&
@@ -549,8 +549,8 @@ static int pic32_spi_one_transfer(struct spi_master *master,
 	if (timeout == 0) {
 		dev_err(&spi->dev, "wait error/timedout\n");
 		if (dma_issued) {
-			dmaengine_terminate_all(master->dma_rx);
-			dmaengine_terminate_all(master->dma_tx);
+			dmaengine_terminate_all(host->dma_rx);
+			dmaengine_terminate_all(host->dma_tx);
 		}
 		ret = -ETIMEDOUT;
 	} else {
@@ -560,16 +560,16 @@ static int pic32_spi_one_transfer(struct spi_master *master,
 	return ret;
 }
 
-static int pic32_spi_unprepare_message(struct spi_master *master,
+static int pic32_spi_unprepare_message(struct spi_controller *host,
 				       struct spi_message *msg)
 {
 	/* nothing to do */
 	return 0;
 }
 
-static int pic32_spi_unprepare_hardware(struct spi_master *master)
+static int pic32_spi_unprepare_hardware(struct spi_controller *host)
 {
-	struct pic32_spi *pic32s = spi_master_get_devdata(master);
+	struct pic32_spi *pic32s = spi_controller_get_devdata(host);
 
 	pic32_spi_disable(pic32s);
 
@@ -605,28 +605,28 @@ static void pic32_spi_cleanup(struct spi_device *spi)
 
 static int pic32_spi_dma_prep(struct pic32_spi *pic32s, struct device *dev)
 {
-	struct spi_master *master = pic32s->master;
+	struct spi_controller *host = pic32s->host;
 	int ret = 0;
 
-	master->dma_rx = dma_request_chan(dev, "spi-rx");
-	if (IS_ERR(master->dma_rx)) {
-		if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER)
+	host->dma_rx = dma_request_chan(dev, "spi-rx");
+	if (IS_ERR(host->dma_rx)) {
+		if (PTR_ERR(host->dma_rx) == -EPROBE_DEFER)
 			ret = -EPROBE_DEFER;
 		else
 			dev_warn(dev, "RX channel not found.\n");
 
-		master->dma_rx = NULL;
+		host->dma_rx = NULL;
 		goto out_err;
 	}
 
-	master->dma_tx = dma_request_chan(dev, "spi-tx");
-	if (IS_ERR(master->dma_tx)) {
-		if (PTR_ERR(master->dma_tx) == -EPROBE_DEFER)
+	host->dma_tx = dma_request_chan(dev, "spi-tx");
+	if (IS_ERR(host->dma_tx)) {
+		if (PTR_ERR(host->dma_tx) == -EPROBE_DEFER)
 			ret = -EPROBE_DEFER;
 		else
 			dev_warn(dev, "TX channel not found.\n");
 
-		master->dma_tx = NULL;
+		host->dma_tx = NULL;
 		goto out_err;
 	}
 
@@ -639,14 +639,14 @@ static int pic32_spi_dma_prep(struct pic32_spi *pic32s, struct device *dev)
 	return 0;
 
 out_err:
-	if (master->dma_rx) {
-		dma_release_channel(master->dma_rx);
-		master->dma_rx = NULL;
+	if (host->dma_rx) {
+		dma_release_channel(host->dma_rx);
+		host->dma_rx = NULL;
 	}
 
-	if (master->dma_tx) {
-		dma_release_channel(master->dma_tx);
-		master->dma_tx = NULL;
+	if (host->dma_tx) {
+		dma_release_channel(host->dma_tx);
+		host->dma_tx = NULL;
 	}
 
 	return ret;
@@ -658,11 +658,11 @@ static void pic32_spi_dma_unprep(struct pic32_spi *pic32s)
 		return;
 
 	clear_bit(PIC32F_DMA_PREP, &pic32s->flags);
-	if (pic32s->master->dma_rx)
-		dma_release_channel(pic32s->master->dma_rx);
+	if (pic32s->host->dma_rx)
+		dma_release_channel(pic32s->host->dma_rx);
 
-	if (pic32s->master->dma_tx)
-		dma_release_channel(pic32s->master->dma_tx);
+	if (pic32s->host->dma_tx)
+		dma_release_channel(pic32s->host->dma_tx);
 }
 
 static void pic32_spi_hw_init(struct pic32_spi *pic32s)
@@ -680,7 +680,7 @@ static void pic32_spi_hw_init(struct pic32_spi *pic32s)
 	/* disable framing mode */
 	ctrl &= ~CTRL_FRMEN;
 
-	/* enable master mode while disabled */
+	/* enable host mode while disabled */
 	ctrl |= CTRL_MSTEN;
 
 	/* set tx fifo threshold interrupt */
@@ -752,36 +752,36 @@ err_unmap_mem:
 
 static int pic32_spi_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct pic32_spi *pic32s;
 	int ret;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(*pic32s));
-	if (!master)
+	host = spi_alloc_host(&pdev->dev, sizeof(*pic32s));
+	if (!host)
 		return -ENOMEM;
 
-	pic32s = spi_master_get_devdata(master);
-	pic32s->master = master;
+	pic32s = spi_controller_get_devdata(host);
+	pic32s->host = host;
 
 	ret = pic32_spi_hw_probe(pdev, pic32s);
 	if (ret)
-		goto err_master;
+		goto err_host;
 
-	master->dev.of_node	= pdev->dev.of_node;
-	master->mode_bits	= SPI_MODE_3 | SPI_MODE_0 | SPI_CS_HIGH;
-	master->num_chipselect	= 1; /* single chip-select */
-	master->max_speed_hz	= clk_get_rate(pic32s->clk);
-	master->setup		= pic32_spi_setup;
-	master->cleanup		= pic32_spi_cleanup;
-	master->flags		= SPI_CONTROLLER_MUST_TX | SPI_CONTROLLER_MUST_RX;
-	master->bits_per_word_mask	= SPI_BPW_MASK(8) | SPI_BPW_MASK(16) |
+	host->dev.of_node	= pdev->dev.of_node;
+	host->mode_bits	= SPI_MODE_3 | SPI_MODE_0 | SPI_CS_HIGH;
+	host->num_chipselect	= 1; /* single chip-select */
+	host->max_speed_hz	= clk_get_rate(pic32s->clk);
+	host->setup		= pic32_spi_setup;
+	host->cleanup		= pic32_spi_cleanup;
+	host->flags		= SPI_CONTROLLER_MUST_TX | SPI_CONTROLLER_MUST_RX;
+	host->bits_per_word_mask	= SPI_BPW_MASK(8) | SPI_BPW_MASK(16) |
 					  SPI_BPW_MASK(32);
-	master->transfer_one		= pic32_spi_one_transfer;
-	master->prepare_message		= pic32_spi_prepare_message;
-	master->unprepare_message	= pic32_spi_unprepare_message;
-	master->prepare_transfer_hardware	= pic32_spi_prepare_hardware;
-	master->unprepare_transfer_hardware	= pic32_spi_unprepare_hardware;
-	master->use_gpio_descriptors = true;
+	host->transfer_one		= pic32_spi_one_transfer;
+	host->prepare_message		= pic32_spi_prepare_message;
+	host->unprepare_message	= pic32_spi_unprepare_message;
+	host->prepare_transfer_hardware	= pic32_spi_prepare_hardware;
+	host->unprepare_transfer_hardware	= pic32_spi_unprepare_hardware;
+	host->use_gpio_descriptors = true;
 
 	/* optional DMA support */
 	ret = pic32_spi_dma_prep(pic32s, &pdev->dev);
@@ -789,7 +789,7 @@ static int pic32_spi_probe(struct platform_device *pdev)
 		goto err_bailout;
 
 	if (test_bit(PIC32F_DMA_PREP, &pic32s->flags))
-		master->can_dma	= pic32_spi_can_dma;
+		host->can_dma	= pic32_spi_can_dma;
 
 	init_completion(&pic32s->xfer_done);
 	pic32s->mode = -1;
@@ -824,10 +824,10 @@ static int pic32_spi_probe(struct platform_device *pdev)
 		goto err_bailout;
 	}
 
-	/* register master */
-	ret = devm_spi_register_master(&pdev->dev, master);
+	/* register host */
+	ret = devm_spi_register_controller(&pdev->dev, host);
 	if (ret) {
-		dev_err(&master->dev, "failed registering spi master\n");
+		dev_err(&host->dev, "failed registering spi host\n");
 		goto err_bailout;
 	}
 
@@ -838,8 +838,8 @@ static int pic32_spi_probe(struct platform_device *pdev)
 err_bailout:
 	pic32_spi_dma_unprep(pic32s);
 	clk_disable_unprepare(pic32s->clk);
-err_master:
-	spi_master_put(master);
+err_host:
+	spi_controller_put(host);
 	return ret;
 }
 

From c97a43a54600fd977acf654802ccf3c94f5f592a Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:36 +0800
Subject: [PATCH 1612/2047] spi: spl022: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-6-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pl022.c | 94 ++++++++++++++++++++---------------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 1af75eff26b6..bb347b6bb6f3 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -336,8 +336,8 @@ struct vendor_data {
  * @phybase: the physical memory where the SSP device resides
  * @virtbase: the virtual memory where the SSP is mapped
  * @clk: outgoing clock "SPICLK" for the SPI bus
- * @master: SPI framework hookup
- * @master_info: controller-specific data from machine setup
+ * @host: SPI framework hookup
+ * @host_info: controller-specific data from machine setup
  * @pump_transfers: Tasklet used in Interrupt Transfer mode
  * @cur_msg: Pointer to current spi_message being processed
  * @cur_transfer: Pointer to current spi_transfer
@@ -370,8 +370,8 @@ struct pl022 {
 	resource_size_t			phybase;
 	void __iomem			*virtbase;
 	struct clk			*clk;
-	struct spi_master		*master;
-	struct pl022_ssp_controller	*master_info;
+	struct spi_controller		*host;
+	struct pl022_ssp_controller	*host_info;
 	/* Message per-transfer pump */
 	struct tasklet_struct		pump_transfers;
 	struct spi_message		*cur_msg;
@@ -500,7 +500,7 @@ static void giveback(struct pl022 *pl022)
 		 * could invalidate the cs_control() callback...
 		 */
 		/* get a pointer to the next message, if any */
-		next_msg = spi_get_next_queued_message(pl022->master);
+		next_msg = spi_get_next_queued_message(pl022->host);
 
 		/*
 		 * see if the next and current messages point
@@ -523,7 +523,7 @@ static void giveback(struct pl022 *pl022)
 	writew((readw(SSP_CR1(pl022->virtbase)) &
 		(~SSP_CR1_MASK_SSE)), SSP_CR1(pl022->virtbase));
 
-	spi_finalize_current_message(pl022->master);
+	spi_finalize_current_message(pl022->host);
 }
 
 /**
@@ -1110,16 +1110,16 @@ static int pl022_dma_probe(struct pl022 *pl022)
 	 * of them.
 	 */
 	pl022->dma_rx_channel = dma_request_channel(mask,
-					    pl022->master_info->dma_filter,
-					    pl022->master_info->dma_rx_param);
+					    pl022->host_info->dma_filter,
+					    pl022->host_info->dma_rx_param);
 	if (!pl022->dma_rx_channel) {
 		dev_dbg(&pl022->adev->dev, "no RX DMA channel!\n");
 		goto err_no_rxchan;
 	}
 
 	pl022->dma_tx_channel = dma_request_channel(mask,
-					    pl022->master_info->dma_filter,
-					    pl022->master_info->dma_tx_param);
+					    pl022->host_info->dma_filter,
+					    pl022->host_info->dma_tx_param);
 	if (!pl022->dma_tx_channel) {
 		dev_dbg(&pl022->adev->dev, "no TX DMA channel!\n");
 		goto err_no_txchan;
@@ -1573,10 +1573,10 @@ out:
 	return;
 }
 
-static int pl022_transfer_one_message(struct spi_master *master,
+static int pl022_transfer_one_message(struct spi_controller *host,
 				      struct spi_message *msg)
 {
-	struct pl022 *pl022 = spi_master_get_devdata(master);
+	struct pl022 *pl022 = spi_controller_get_devdata(host);
 
 	/* Initial message state */
 	pl022->cur_msg = msg;
@@ -1602,9 +1602,9 @@ static int pl022_transfer_one_message(struct spi_master *master,
 	return 0;
 }
 
-static int pl022_unprepare_transfer_hardware(struct spi_master *master)
+static int pl022_unprepare_transfer_hardware(struct spi_controller *host)
 {
-	struct pl022 *pl022 = spi_master_get_devdata(master);
+	struct pl022 *pl022 = spi_controller_get_devdata(host);
 
 	/* nothing more to do - disable spi/ssp and power off */
 	writew((readw(SSP_CR1(pl022->virtbase)) &
@@ -1826,10 +1826,10 @@ static const struct pl022_config_chip pl022_default_chip_info = {
 };
 
 /**
- * pl022_setup - setup function registered to SPI master framework
+ * pl022_setup - setup function registered to SPI host framework
  * @spi: spi device which is requesting setup
  *
- * This function is registered to the SPI framework for this SPI master
+ * This function is registered to the SPI framework for this SPI host
  * controller. If it is the first time when setup is called by this device,
  * this function will initialize the runtime state for this chip and save
  * the same in the device structure. Else it will update the runtime info
@@ -1844,7 +1844,7 @@ static int pl022_setup(struct spi_device *spi)
 	struct chip_data *chip;
 	struct ssp_clock_params clk_freq = { .cpsdvsr = 0, .scr = 0};
 	int status = 0;
-	struct pl022 *pl022 = spi_master_get_devdata(spi->master);
+	struct pl022 *pl022 = spi_controller_get_devdata(spi->controller);
 	unsigned int bits = spi->bits_per_word;
 	u32 tmp;
 	struct device_node *np = spi->dev.of_node;
@@ -1964,7 +1964,7 @@ static int pl022_setup(struct spi_device *spi)
 	chip->dmacr = 0;
 	chip->cpsr = 0;
 	if ((chip_info->com_mode == DMA_TRANSFER)
-	    && ((pl022->master_info)->enable_dma)) {
+	    && ((pl022->host_info)->enable_dma)) {
 		chip->enable_dma = true;
 		dev_dbg(&spi->dev, "DMA mode set in controller state\n");
 		SSP_WRITE_BITS(chip->dmacr, SSP_DMA_ENABLED,
@@ -2061,10 +2061,10 @@ static int pl022_setup(struct spi_device *spi)
 }
 
 /**
- * pl022_cleanup - cleanup function registered to SPI master framework
+ * pl022_cleanup - cleanup function registered to SPI host framework
  * @spi: spi device which is requesting cleanup
  *
- * This function is registered to the SPI framework for this SPI master
+ * This function is registered to the SPI framework for this SPI host
  * controller. It will free the runtime state of chip.
  */
 static void pl022_cleanup(struct spi_device *spi)
@@ -2103,7 +2103,7 @@ static int pl022_probe(struct amba_device *adev, const struct amba_id *id)
 	struct device *dev = &adev->dev;
 	struct pl022_ssp_controller *platform_info =
 			dev_get_platdata(&adev->dev);
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct pl022 *pl022 = NULL;	/*Data for this driver */
 	int status = 0;
 
@@ -2117,16 +2117,16 @@ static int pl022_probe(struct amba_device *adev, const struct amba_id *id)
 		return -ENODEV;
 	}
 
-	/* Allocate master with space for data */
-	master = spi_alloc_master(dev, sizeof(struct pl022));
-	if (master == NULL) {
-		dev_err(&adev->dev, "probe - cannot alloc SPI master\n");
+	/* Allocate host with space for data */
+	host = spi_alloc_host(dev, sizeof(struct pl022));
+	if (host == NULL) {
+		dev_err(&adev->dev, "probe - cannot alloc SPI host\n");
 		return -ENOMEM;
 	}
 
-	pl022 = spi_master_get_devdata(master);
-	pl022->master = master;
-	pl022->master_info = platform_info;
+	pl022 = spi_controller_get_devdata(host);
+	pl022->host = host;
+	pl022->host_info = platform_info;
 	pl022->adev = adev;
 	pl022->vendor = id->data;
 
@@ -2134,25 +2134,25 @@ static int pl022_probe(struct amba_device *adev, const struct amba_id *id)
 	 * Bus Number Which has been Assigned to this SSP controller
 	 * on this board
 	 */
-	master->bus_num = platform_info->bus_id;
-	master->cleanup = pl022_cleanup;
-	master->setup = pl022_setup;
-	master->auto_runtime_pm = true;
-	master->transfer_one_message = pl022_transfer_one_message;
-	master->unprepare_transfer_hardware = pl022_unprepare_transfer_hardware;
-	master->rt = platform_info->rt;
-	master->dev.of_node = dev->of_node;
-	master->use_gpio_descriptors = true;
+	host->bus_num = platform_info->bus_id;
+	host->cleanup = pl022_cleanup;
+	host->setup = pl022_setup;
+	host->auto_runtime_pm = true;
+	host->transfer_one_message = pl022_transfer_one_message;
+	host->unprepare_transfer_hardware = pl022_unprepare_transfer_hardware;
+	host->rt = platform_info->rt;
+	host->dev.of_node = dev->of_node;
+	host->use_gpio_descriptors = true;
 
 	/*
 	 * Supports mode 0-3, loopback, and active low CS. Transfers are
 	 * always MS bit first on the original pl022.
 	 */
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
 	if (pl022->vendor->extended_cr)
-		master->mode_bits |= SPI_LSB_FIRST;
+		host->mode_bits |= SPI_LSB_FIRST;
 
-	dev_dbg(&adev->dev, "BUSNO: %d\n", master->bus_num);
+	dev_dbg(&adev->dev, "BUSNO: %d\n", host->bus_num);
 
 	status = amba_request_regions(adev, NULL);
 	if (status)
@@ -2215,10 +2215,10 @@ static int pl022_probe(struct amba_device *adev, const struct amba_id *id)
 
 	/* Register with the SPI framework */
 	amba_set_drvdata(adev, pl022);
-	status = devm_spi_register_master(&adev->dev, master);
+	status = devm_spi_register_controller(&adev->dev, host);
 	if (status != 0) {
 		dev_err_probe(&adev->dev, status,
-			      "problem registering spi master\n");
+			      "problem registering spi host\n");
 		goto err_spi_register;
 	}
 	dev_dbg(dev, "probe succeeded\n");
@@ -2246,7 +2246,7 @@ static int pl022_probe(struct amba_device *adev, const struct amba_id *id)
  err_no_ioremap:
 	amba_release_regions(adev);
  err_no_ioregion:
-	spi_master_put(master);
+	spi_controller_put(host);
 	return status;
 }
 
@@ -2265,7 +2265,7 @@ pl022_remove(struct amba_device *adev)
 	pm_runtime_get_noresume(&adev->dev);
 
 	load_ssp_default_config(pl022);
-	if (pl022->master_info->enable_dma)
+	if (pl022->host_info->enable_dma)
 		pl022_dma_remove(pl022);
 
 	clk_disable_unprepare(pl022->clk);
@@ -2279,13 +2279,13 @@ static int pl022_suspend(struct device *dev)
 	struct pl022 *pl022 = dev_get_drvdata(dev);
 	int ret;
 
-	ret = spi_master_suspend(pl022->master);
+	ret = spi_controller_suspend(pl022->host);
 	if (ret)
 		return ret;
 
 	ret = pm_runtime_force_suspend(dev);
 	if (ret) {
-		spi_master_resume(pl022->master);
+		spi_controller_resume(pl022->host);
 		return ret;
 	}
 
@@ -2305,7 +2305,7 @@ static int pl022_resume(struct device *dev)
 		dev_err(dev, "problem resuming\n");
 
 	/* Start the queue running */
-	ret = spi_master_resume(pl022->master);
+	ret = spi_controller_resume(pl022->host);
 	if (!ret)
 		dev_dbg(dev, "resumed\n");
 

From 71345830575984c01c4c2b655ef3a7877100fba3 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:37 +0800
Subject: [PATCH 1613/2047] spi: ppc4xx: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-7-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ppc4xx.c | 44 ++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index d725e915025d..03aab661be9d 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -126,7 +126,7 @@ struct ppc4xx_spi {
 	unsigned char *rx;
 
 	struct spi_ppc4xx_regs __iomem *regs; /* pointer to the registers */
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct device *dev;
 };
 
@@ -143,7 +143,7 @@ static int spi_ppc4xx_txrx(struct spi_device *spi, struct spi_transfer *t)
 	dev_dbg(&spi->dev, "txrx: tx %p, rx %p, len %d\n",
 		t->tx_buf, t->rx_buf, t->len);
 
-	hw = spi_master_get_devdata(spi->master);
+	hw = spi_controller_get_devdata(spi->controller);
 
 	hw->tx = t->tx_buf;
 	hw->rx = t->rx_buf;
@@ -161,7 +161,7 @@ static int spi_ppc4xx_txrx(struct spi_device *spi, struct spi_transfer *t)
 
 static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t)
 {
-	struct ppc4xx_spi *hw = spi_master_get_devdata(spi->master);
+	struct ppc4xx_spi *hw = spi_controller_get_devdata(spi->controller);
 	struct spi_ppc4xx_cs *cs = spi->controller_state;
 	int scr;
 	u8 cdm = 0;
@@ -340,7 +340,7 @@ static void spi_ppc4xx_enable(struct ppc4xx_spi *hw)
 static int spi_ppc4xx_of_probe(struct platform_device *op)
 {
 	struct ppc4xx_spi *hw;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct spi_bitbang *bbp;
 	struct resource resource;
 	struct device_node *np = op->dev.of_node;
@@ -349,20 +349,20 @@ static int spi_ppc4xx_of_probe(struct platform_device *op)
 	int ret;
 	const unsigned int *clk;
 
-	master = spi_alloc_master(dev, sizeof(*hw));
-	if (master == NULL)
+	host = spi_alloc_host(dev, sizeof(*hw));
+	if (host == NULL)
 		return -ENOMEM;
-	master->dev.of_node = np;
-	platform_set_drvdata(op, master);
-	hw = spi_master_get_devdata(master);
-	hw->master = master;
+	host->dev.of_node = np;
+	platform_set_drvdata(op, host);
+	hw = spi_controller_get_devdata(host);
+	hw->host = host;
 	hw->dev = dev;
 
 	init_completion(&hw->done);
 
 	/* Setup the state for the bitbang driver */
 	bbp = &hw->bitbang;
-	bbp->master = hw->master;
+	bbp->master = hw->host;
 	bbp->setup_transfer = spi_ppc4xx_setupxfer;
 	bbp->txrx_bufs = spi_ppc4xx_txrx;
 	bbp->use_dma = 0;
@@ -385,7 +385,7 @@ static int spi_ppc4xx_of_probe(struct platform_device *op)
 	if (opbnp == NULL) {
 		dev_err(dev, "OPB: cannot find node\n");
 		ret = -ENODEV;
-		goto free_master;
+		goto free_host;
 	}
 	/* Get the clock (Hz) for the OPB */
 	clk = of_get_property(opbnp, "clock-frequency", NULL);
@@ -393,7 +393,7 @@ static int spi_ppc4xx_of_probe(struct platform_device *op)
 		dev_err(dev, "OPB: no clock-frequency property set\n");
 		of_node_put(opbnp);
 		ret = -ENODEV;
-		goto free_master;
+		goto free_host;
 	}
 	hw->opb_freq = *clk;
 	hw->opb_freq >>= 2;
@@ -402,7 +402,7 @@ static int spi_ppc4xx_of_probe(struct platform_device *op)
 	ret = of_address_to_resource(np, 0, &resource);
 	if (ret) {
 		dev_err(dev, "error while parsing device node resource\n");
-		goto free_master;
+		goto free_host;
 	}
 	hw->mapbase = resource.start;
 	hw->mapsize = resource_size(&resource);
@@ -411,7 +411,7 @@ static int spi_ppc4xx_of_probe(struct platform_device *op)
 	if (hw->mapsize < sizeof(struct spi_ppc4xx_regs)) {
 		dev_err(dev, "too small to map registers\n");
 		ret = -EINVAL;
-		goto free_master;
+		goto free_host;
 	}
 
 	/* Request IRQ */
@@ -420,7 +420,7 @@ static int spi_ppc4xx_of_probe(struct platform_device *op)
 			  0, "spi_ppc4xx_of", (void *)hw);
 	if (ret) {
 		dev_err(dev, "unable to allocate interrupt\n");
-		goto free_master;
+		goto free_host;
 	}
 
 	if (!request_mem_region(hw->mapbase, hw->mapsize, DRIVER_NAME)) {
@@ -443,7 +443,7 @@ static int spi_ppc4xx_of_probe(struct platform_device *op)
 	dev->dma_mask = 0;
 	ret = spi_bitbang_start(bbp);
 	if (ret) {
-		dev_err(dev, "failed to register SPI master\n");
+		dev_err(dev, "failed to register SPI host\n");
 		goto unmap_regs;
 	}
 
@@ -457,8 +457,8 @@ map_io_error:
 	release_mem_region(hw->mapbase, hw->mapsize);
 request_mem_error:
 	free_irq(hw->irqnum, hw);
-free_master:
-	spi_master_put(master);
+free_host:
+	spi_controller_put(host);
 
 	dev_err(dev, "initialization failed\n");
 	return ret;
@@ -466,14 +466,14 @@ free_master:
 
 static void spi_ppc4xx_of_remove(struct platform_device *op)
 {
-	struct spi_master *master = platform_get_drvdata(op);
-	struct ppc4xx_spi *hw = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(op);
+	struct ppc4xx_spi *hw = spi_controller_get_devdata(host);
 
 	spi_bitbang_stop(&hw->bitbang);
 	release_mem_region(hw->mapbase, hw->mapsize);
 	free_irq(hw->irqnum, hw);
 	iounmap(hw->regs);
-	spi_master_put(master);
+	spi_controller_put(host);
 }
 
 static const struct of_device_id spi_ppc4xx_of_match[] = {

From 60ba4431c8e8c137f92d050b9f0bfc8e8c26a364 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:38 +0800
Subject: [PATCH 1614/2047] spi: pxa2xx: switch to use modern name

Change legacy name master/slave to modern name host/target or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-8-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c       | 24 ++++++++++++------------
 include/linux/spi/pxa2xx_spi.h |  4 ++--
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 21f156587669..f2a856f6a99e 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1102,7 +1102,7 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 		}
 	}
 
-	if (spi_controller_is_slave(controller)) {
+	if (spi_controller_is_target(controller)) {
 		while (drv_data->write(drv_data))
 			;
 		if (drv_data->gpiod_ready) {
@@ -1121,7 +1121,7 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 	return 1;
 }
 
-static int pxa2xx_spi_slave_abort(struct spi_controller *controller)
+static int pxa2xx_spi_target_abort(struct spi_controller *controller)
 {
 	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
@@ -1199,7 +1199,7 @@ static int setup(struct spi_device *spi)
 		break;
 	default:
 		tx_hi_thres = 0;
-		if (spi_controller_is_slave(drv_data->controller)) {
+		if (spi_controller_is_target(drv_data->controller)) {
 			tx_thres = 1;
 			rx_thres = 2;
 		} else {
@@ -1248,7 +1248,7 @@ static int setup(struct spi_device *spi)
 	}
 
 	chip->cr1 = 0;
-	if (spi_controller_is_slave(drv_data->controller)) {
+	if (spi_controller_is_target(drv_data->controller)) {
 		chip->cr1 |= SSCR1_SCFR;
 		chip->cr1 |= SSCR1_SCLKDIR;
 		chip->cr1 |= SSCR1_SFRMDIR;
@@ -1395,7 +1395,7 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev)
 	else
 		ssp->port_id = uid;
 
-	pdata->is_slave = device_property_read_bool(dev, "spi-slave");
+	pdata->is_target = device_property_read_bool(dev, "spi-slave");
 	pdata->num_chipselect = 1;
 	pdata->enable_dma = true;
 	pdata->dma_burst_size = 1;
@@ -1461,10 +1461,10 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	if (platform_info->is_slave)
-		controller = devm_spi_alloc_slave(dev, sizeof(*drv_data));
+	if (platform_info->is_target)
+		controller = devm_spi_alloc_target(dev, sizeof(*drv_data));
 	else
-		controller = devm_spi_alloc_master(dev, sizeof(*drv_data));
+		controller = devm_spi_alloc_host(dev, sizeof(*drv_data));
 
 	if (!controller) {
 		dev_err(&pdev->dev, "cannot alloc spi_controller\n");
@@ -1487,7 +1487,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	controller->setup = setup;
 	controller->set_cs = pxa2xx_spi_set_cs;
 	controller->transfer_one = pxa2xx_spi_transfer_one;
-	controller->slave_abort = pxa2xx_spi_slave_abort;
+	controller->target_abort = pxa2xx_spi_target_abort;
 	controller->handle_err = pxa2xx_spi_handle_err;
 	controller->unprepare_transfer_hardware = pxa2xx_spi_unprepare_transfer;
 	controller->fw_translate_cs = pxa2xx_spi_fw_translate_cs;
@@ -1579,7 +1579,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		break;
 	default:
 
-		if (spi_controller_is_slave(controller)) {
+		if (spi_controller_is_target(controller)) {
 			tmp = SSCR1_SCFR |
 			      SSCR1_SCLKDIR |
 			      SSCR1_SFRMDIR |
@@ -1592,7 +1592,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		}
 		pxa2xx_spi_write(drv_data, SSCR1, tmp);
 		tmp = SSCR0_Motorola | SSCR0_DataSize(8);
-		if (!spi_controller_is_slave(controller))
+		if (!spi_controller_is_target(controller))
 			tmp |= SSCR0_SCR(2);
 		pxa2xx_spi_write(drv_data, SSCR0, tmp);
 		break;
@@ -1620,7 +1620,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	controller->num_chipselect = platform_info->num_chipselect;
 	controller->use_gpio_descriptors = true;
 
-	if (platform_info->is_slave) {
+	if (platform_info->is_target) {
 		drv_data->gpiod_ready = devm_gpiod_get_optional(dev,
 						"ready", GPIOD_OUT_LOW);
 		if (IS_ERR(drv_data->gpiod_ready)) {
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 4658e7801b42..0916cb9bcb0a 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -19,7 +19,7 @@ struct pxa2xx_spi_controller {
 	u16 num_chipselect;
 	u8 enable_dma;
 	u8 dma_burst_size;
-	bool is_slave;
+	bool is_target;
 
 	/* DMA engine specific config */
 	bool (*dma_filter)(struct dma_chan *chan, void *param);
@@ -31,7 +31,7 @@ struct pxa2xx_spi_controller {
 };
 
 /*
- * The controller specific data for SPI slave devices
+ * The controller specific data for SPI target devices
  * (resides in spi_board_info.controller_data),
  * copied to spi_device.platform_data ... mostly for
  * DMA tuning.

From 8d3ad99ad8f7370a0f9c8bdb741747fdd70e4e49 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:39 +0800
Subject: [PATCH 1615/2047] spi: spi-qcom-qspi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-9-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-qcom-qspi.c | 80 ++++++++++++++++++-------------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/spi/spi-qcom-qspi.c b/drivers/spi/spi-qcom-qspi.c
index b9d5641b3365..49b775134485 100644
--- a/drivers/spi/spi-qcom-qspi.c
+++ b/drivers/spi/spi-qcom-qspi.c
@@ -247,11 +247,11 @@ static void qcom_qspi_pio_xfer(struct qcom_qspi *ctrl)
 	qcom_qspi_pio_xfer_ctrl(ctrl);
 }
 
-static void qcom_qspi_handle_err(struct spi_master *master,
+static void qcom_qspi_handle_err(struct spi_controller *host,
 				 struct spi_message *msg)
 {
 	u32 int_status;
-	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
+	struct qcom_qspi *ctrl = spi_controller_get_devdata(host);
 	unsigned long flags;
 	int i;
 
@@ -411,11 +411,11 @@ static bool qcom_qspi_can_dma(struct spi_controller *ctlr,
 	return xfer->len > QSPI_MAX_BYTES_FIFO;
 }
 
-static int qcom_qspi_transfer_one(struct spi_master *master,
+static int qcom_qspi_transfer_one(struct spi_controller *host,
 				  struct spi_device *slv,
 				  struct spi_transfer *xfer)
 {
-	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
+	struct qcom_qspi *ctrl = spi_controller_get_devdata(host);
 	int ret;
 	unsigned long speed_hz;
 	unsigned long flags;
@@ -443,7 +443,7 @@ static int qcom_qspi_transfer_one(struct spi_master *master,
 		ctrl->xfer.tx_buf = xfer->tx_buf;
 	}
 	ctrl->xfer.is_last = list_is_last(&xfer->transfer_list,
-					  &master->cur_msg->transfers);
+					  &host->cur_msg->transfers);
 	ctrl->xfer.rem_bytes = xfer->len;
 
 	if (xfer->rx_sg.nents || xfer->tx_sg.nents) {
@@ -481,7 +481,7 @@ exit:
 	return 1;
 }
 
-static int qcom_qspi_prepare_message(struct spi_master *master,
+static int qcom_qspi_prepare_message(struct spi_controller *host,
 				     struct spi_message *message)
 {
 	u32 mstr_cfg;
@@ -490,7 +490,7 @@ static int qcom_qspi_prepare_message(struct spi_master *master,
 	int tx_data_delay = 1;
 	unsigned long flags;
 
-	ctrl = spi_master_get_devdata(master);
+	ctrl = spi_controller_get_devdata(host);
 	spin_lock_irqsave(&ctrl->lock, flags);
 
 	mstr_cfg = readl(ctrl->base + MSTR_CONFIG);
@@ -694,18 +694,18 @@ static int qcom_qspi_probe(struct platform_device *pdev)
 {
 	int ret;
 	struct device *dev;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct qcom_qspi *ctrl;
 
 	dev = &pdev->dev;
 
-	master = devm_spi_alloc_master(dev, sizeof(*ctrl));
-	if (!master)
+	host = devm_spi_alloc_host(dev, sizeof(*ctrl));
+	if (!host)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
-	ctrl = spi_master_get_devdata(master);
+	ctrl = spi_controller_get_devdata(host);
 
 	spin_lock_init(&ctrl->lock);
 	ctrl->dev = dev;
@@ -758,23 +758,23 @@ static int qcom_qspi_probe(struct platform_device *pdev)
 	if (ret)
 		return dev_err_probe(dev, ret, "could not set DMA mask\n");
 
-	master->max_speed_hz = 300000000;
-	master->max_dma_len = 65536; /* as per HPG */
-	master->dma_alignment = QSPI_ALIGN_REQ;
-	master->num_chipselect = QSPI_NUM_CS;
-	master->bus_num = -1;
-	master->dev.of_node = pdev->dev.of_node;
-	master->mode_bits = SPI_MODE_0 |
-			    SPI_TX_DUAL | SPI_RX_DUAL |
-			    SPI_TX_QUAD | SPI_RX_QUAD;
-	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
-	master->prepare_message = qcom_qspi_prepare_message;
-	master->transfer_one = qcom_qspi_transfer_one;
-	master->handle_err = qcom_qspi_handle_err;
+	host->max_speed_hz = 300000000;
+	host->max_dma_len = 65536; /* as per HPG */
+	host->dma_alignment = QSPI_ALIGN_REQ;
+	host->num_chipselect = QSPI_NUM_CS;
+	host->bus_num = -1;
+	host->dev.of_node = pdev->dev.of_node;
+	host->mode_bits = SPI_MODE_0 |
+			  SPI_TX_DUAL | SPI_RX_DUAL |
+			  SPI_TX_QUAD | SPI_RX_QUAD;
+	host->flags = SPI_CONTROLLER_HALF_DUPLEX;
+	host->prepare_message = qcom_qspi_prepare_message;
+	host->transfer_one = qcom_qspi_transfer_one;
+	host->handle_err = qcom_qspi_handle_err;
 	if (of_property_read_bool(pdev->dev.of_node, "iommus"))
-		master->can_dma = qcom_qspi_can_dma;
-	master->auto_runtime_pm = true;
-	master->mem_ops = &qcom_qspi_mem_ops;
+		host->can_dma = qcom_qspi_can_dma;
+	host->auto_runtime_pm = true;
+	host->mem_ops = &qcom_qspi_mem_ops;
 
 	ret = devm_pm_opp_set_clkname(&pdev->dev, "core");
 	if (ret)
@@ -794,7 +794,7 @@ static int qcom_qspi_probe(struct platform_device *pdev)
 	pm_runtime_set_autosuspend_delay(dev, 250);
 	pm_runtime_enable(dev);
 
-	ret = spi_register_master(master);
+	ret = spi_register_controller(host);
 	if (!ret)
 		return 0;
 
@@ -805,18 +805,18 @@ static int qcom_qspi_probe(struct platform_device *pdev)
 
 static void qcom_qspi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
+	struct spi_controller *host = platform_get_drvdata(pdev);
 
 	/* Unregister _before_ disabling pm_runtime() so we stop transfers */
-	spi_unregister_master(master);
+	spi_unregister_controller(host);
 
 	pm_runtime_disable(&pdev->dev);
 }
 
 static int __maybe_unused qcom_qspi_runtime_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct qcom_qspi *ctrl = spi_controller_get_devdata(host);
 	int ret;
 
 	/* Drop the performance state vote */
@@ -837,8 +837,8 @@ static int __maybe_unused qcom_qspi_runtime_suspend(struct device *dev)
 
 static int __maybe_unused qcom_qspi_runtime_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct qcom_qspi *ctrl = spi_controller_get_devdata(host);
 	int ret;
 
 	pinctrl_pm_select_default_state(dev);
@@ -859,30 +859,30 @@ static int __maybe_unused qcom_qspi_runtime_resume(struct device *dev)
 
 static int __maybe_unused qcom_qspi_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
+	struct spi_controller *host = dev_get_drvdata(dev);
 	int ret;
 
-	ret = spi_master_suspend(master);
+	ret = spi_controller_suspend(host);
 	if (ret)
 		return ret;
 
 	ret = pm_runtime_force_suspend(dev);
 	if (ret)
-		spi_master_resume(master);
+		spi_controller_resume(host);
 
 	return ret;
 }
 
 static int __maybe_unused qcom_qspi_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
+	struct spi_controller *host = dev_get_drvdata(dev);
 	int ret;
 
 	ret = pm_runtime_force_resume(dev);
 	if (ret)
 		return ret;
 
-	ret = spi_master_resume(master);
+	ret = spi_controller_resume(host);
 	if (ret)
 		pm_runtime_force_suspend(dev);
 

From 597442ff4f6226206b7cc28b86eb2be0ae9c6418 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:40 +0800
Subject: [PATCH 1616/2047] spi: qup: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-10-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-qup.c | 166 +++++++++++++++++++++---------------------
 1 file changed, 83 insertions(+), 83 deletions(-)

diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index fd16acb1f578..4b6f6b25219b 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -385,20 +385,20 @@ static void spi_qup_write(struct spi_qup *controller)
 	} while (remainder);
 }
 
-static int spi_qup_prep_sg(struct spi_master *master, struct scatterlist *sgl,
+static int spi_qup_prep_sg(struct spi_controller *host, struct scatterlist *sgl,
 			   unsigned int nents, enum dma_transfer_direction dir,
 			   dma_async_tx_callback callback)
 {
-	struct spi_qup *qup = spi_master_get_devdata(master);
+	struct spi_qup *qup = spi_controller_get_devdata(host);
 	unsigned long flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
 	struct dma_async_tx_descriptor *desc;
 	struct dma_chan *chan;
 	dma_cookie_t cookie;
 
 	if (dir == DMA_MEM_TO_DEV)
-		chan = master->dma_tx;
+		chan = host->dma_tx;
 	else
-		chan = master->dma_rx;
+		chan = host->dma_rx;
 
 	desc = dmaengine_prep_slave_sg(chan, sgl, nents, dir, flags);
 	if (IS_ERR_OR_NULL(desc))
@@ -412,13 +412,13 @@ static int spi_qup_prep_sg(struct spi_master *master, struct scatterlist *sgl,
 	return dma_submit_error(cookie);
 }
 
-static void spi_qup_dma_terminate(struct spi_master *master,
+static void spi_qup_dma_terminate(struct spi_controller *host,
 				  struct spi_transfer *xfer)
 {
 	if (xfer->tx_buf)
-		dmaengine_terminate_all(master->dma_tx);
+		dmaengine_terminate_all(host->dma_tx);
 	if (xfer->rx_buf)
-		dmaengine_terminate_all(master->dma_rx);
+		dmaengine_terminate_all(host->dma_rx);
 }
 
 static u32 spi_qup_sgl_get_nents_len(struct scatterlist *sgl, u32 max,
@@ -445,8 +445,8 @@ static int spi_qup_do_dma(struct spi_device *spi, struct spi_transfer *xfer,
 			  unsigned long timeout)
 {
 	dma_async_tx_callback rx_done = NULL, tx_done = NULL;
-	struct spi_master *master = spi->master;
-	struct spi_qup *qup = spi_master_get_devdata(master);
+	struct spi_controller *host = spi->controller;
+	struct spi_qup *qup = spi_controller_get_devdata(host);
 	struct scatterlist *tx_sgl, *rx_sgl;
 	int ret;
 
@@ -481,20 +481,20 @@ static int spi_qup_do_dma(struct spi_device *spi, struct spi_transfer *xfer,
 			return ret;
 		}
 		if (rx_sgl) {
-			ret = spi_qup_prep_sg(master, rx_sgl, rx_nents,
+			ret = spi_qup_prep_sg(host, rx_sgl, rx_nents,
 					      DMA_DEV_TO_MEM, rx_done);
 			if (ret)
 				return ret;
-			dma_async_issue_pending(master->dma_rx);
+			dma_async_issue_pending(host->dma_rx);
 		}
 
 		if (tx_sgl) {
-			ret = spi_qup_prep_sg(master, tx_sgl, tx_nents,
+			ret = spi_qup_prep_sg(host, tx_sgl, tx_nents,
 					      DMA_MEM_TO_DEV, tx_done);
 			if (ret)
 				return ret;
 
-			dma_async_issue_pending(master->dma_tx);
+			dma_async_issue_pending(host->dma_tx);
 		}
 
 		if (!wait_for_completion_timeout(&qup->done, timeout))
@@ -513,8 +513,8 @@ static int spi_qup_do_dma(struct spi_device *spi, struct spi_transfer *xfer,
 static int spi_qup_do_pio(struct spi_device *spi, struct spi_transfer *xfer,
 			  unsigned long timeout)
 {
-	struct spi_master *master = spi->master;
-	struct spi_qup *qup = spi_master_get_devdata(master);
+	struct spi_controller *host = spi->controller;
+	struct spi_qup *qup = spi_controller_get_devdata(host);
 	int ret, n_words, iterations, offset = 0;
 
 	n_words = qup->n_words;
@@ -658,7 +658,7 @@ static irqreturn_t spi_qup_qup_irq(int irq, void *dev_id)
 /* set clock freq ... bits per word, determine mode */
 static int spi_qup_io_prep(struct spi_device *spi, struct spi_transfer *xfer)
 {
-	struct spi_qup *controller = spi_master_get_devdata(spi->master);
+	struct spi_qup *controller = spi_controller_get_devdata(spi->controller);
 	int ret;
 
 	if (spi->mode & SPI_LOOP && xfer->len > controller->in_fifo_sz) {
@@ -679,9 +679,9 @@ static int spi_qup_io_prep(struct spi_device *spi, struct spi_transfer *xfer)
 
 	if (controller->n_words <= (controller->in_fifo_sz / sizeof(u32)))
 		controller->mode = QUP_IO_M_MODE_FIFO;
-	else if (spi->master->can_dma &&
-		 spi->master->can_dma(spi->master, spi, xfer) &&
-		 spi->master->cur_msg_mapped)
+	else if (spi->controller->can_dma &&
+		 spi->controller->can_dma(spi->controller, spi, xfer) &&
+		 spi->controller->cur_msg_mapped)
 		controller->mode = QUP_IO_M_MODE_BAM;
 	else
 		controller->mode = QUP_IO_M_MODE_BLOCK;
@@ -692,7 +692,7 @@ static int spi_qup_io_prep(struct spi_device *spi, struct spi_transfer *xfer)
 /* prep qup for another spi transaction of specific type */
 static int spi_qup_io_config(struct spi_device *spi, struct spi_transfer *xfer)
 {
-	struct spi_qup *controller = spi_master_get_devdata(spi->master);
+	struct spi_qup *controller = spi_controller_get_devdata(spi->controller);
 	u32 config, iomode, control;
 	unsigned long flags;
 
@@ -840,11 +840,11 @@ static int spi_qup_io_config(struct spi_device *spi, struct spi_transfer *xfer)
 	return 0;
 }
 
-static int spi_qup_transfer_one(struct spi_master *master,
+static int spi_qup_transfer_one(struct spi_controller *host,
 			      struct spi_device *spi,
 			      struct spi_transfer *xfer)
 {
-	struct spi_qup *controller = spi_master_get_devdata(master);
+	struct spi_qup *controller = spi_controller_get_devdata(host);
 	unsigned long timeout, flags;
 	int ret;
 
@@ -878,21 +878,21 @@ static int spi_qup_transfer_one(struct spi_master *master,
 	spin_unlock_irqrestore(&controller->lock, flags);
 
 	if (ret && spi_qup_is_dma_xfer(controller->mode))
-		spi_qup_dma_terminate(master, xfer);
+		spi_qup_dma_terminate(host, xfer);
 
 	return ret;
 }
 
-static bool spi_qup_can_dma(struct spi_master *master, struct spi_device *spi,
+static bool spi_qup_can_dma(struct spi_controller *host, struct spi_device *spi,
 			    struct spi_transfer *xfer)
 {
-	struct spi_qup *qup = spi_master_get_devdata(master);
+	struct spi_qup *qup = spi_controller_get_devdata(host);
 	size_t dma_align = dma_get_cache_alignment();
 	int n_words;
 
 	if (xfer->rx_buf) {
 		if (!IS_ALIGNED((size_t)xfer->rx_buf, dma_align) ||
-		    IS_ERR_OR_NULL(master->dma_rx))
+		    IS_ERR_OR_NULL(host->dma_rx))
 			return false;
 		if (qup->qup_v1 && (xfer->len % qup->in_blk_sz))
 			return false;
@@ -900,7 +900,7 @@ static bool spi_qup_can_dma(struct spi_master *master, struct spi_device *spi,
 
 	if (xfer->tx_buf) {
 		if (!IS_ALIGNED((size_t)xfer->tx_buf, dma_align) ||
-		    IS_ERR_OR_NULL(master->dma_tx))
+		    IS_ERR_OR_NULL(host->dma_tx))
 			return false;
 		if (qup->qup_v1 && (xfer->len % qup->out_blk_sz))
 			return false;
@@ -913,30 +913,30 @@ static bool spi_qup_can_dma(struct spi_master *master, struct spi_device *spi,
 	return true;
 }
 
-static void spi_qup_release_dma(struct spi_master *master)
+static void spi_qup_release_dma(struct spi_controller *host)
 {
-	if (!IS_ERR_OR_NULL(master->dma_rx))
-		dma_release_channel(master->dma_rx);
-	if (!IS_ERR_OR_NULL(master->dma_tx))
-		dma_release_channel(master->dma_tx);
+	if (!IS_ERR_OR_NULL(host->dma_rx))
+		dma_release_channel(host->dma_rx);
+	if (!IS_ERR_OR_NULL(host->dma_tx))
+		dma_release_channel(host->dma_tx);
 }
 
-static int spi_qup_init_dma(struct spi_master *master, resource_size_t base)
+static int spi_qup_init_dma(struct spi_controller *host, resource_size_t base)
 {
-	struct spi_qup *spi = spi_master_get_devdata(master);
+	struct spi_qup *spi = spi_controller_get_devdata(host);
 	struct dma_slave_config *rx_conf = &spi->rx_conf,
 				*tx_conf = &spi->tx_conf;
 	struct device *dev = spi->dev;
 	int ret;
 
 	/* allocate dma resources, if available */
-	master->dma_rx = dma_request_chan(dev, "rx");
-	if (IS_ERR(master->dma_rx))
-		return PTR_ERR(master->dma_rx);
+	host->dma_rx = dma_request_chan(dev, "rx");
+	if (IS_ERR(host->dma_rx))
+		return PTR_ERR(host->dma_rx);
 
-	master->dma_tx = dma_request_chan(dev, "tx");
-	if (IS_ERR(master->dma_tx)) {
-		ret = PTR_ERR(master->dma_tx);
+	host->dma_tx = dma_request_chan(dev, "tx");
+	if (IS_ERR(host->dma_tx)) {
+		ret = PTR_ERR(host->dma_tx);
 		goto err_tx;
 	}
 
@@ -951,13 +951,13 @@ static int spi_qup_init_dma(struct spi_master *master, resource_size_t base)
 	tx_conf->dst_addr = base + QUP_OUTPUT_FIFO;
 	tx_conf->dst_maxburst = spi->out_blk_sz;
 
-	ret = dmaengine_slave_config(master->dma_rx, rx_conf);
+	ret = dmaengine_slave_config(host->dma_rx, rx_conf);
 	if (ret) {
 		dev_err(dev, "failed to configure RX channel\n");
 		goto err;
 	}
 
-	ret = dmaengine_slave_config(master->dma_tx, tx_conf);
+	ret = dmaengine_slave_config(host->dma_tx, tx_conf);
 	if (ret) {
 		dev_err(dev, "failed to configure TX channel\n");
 		goto err;
@@ -966,9 +966,9 @@ static int spi_qup_init_dma(struct spi_master *master, resource_size_t base)
 	return 0;
 
 err:
-	dma_release_channel(master->dma_tx);
+	dma_release_channel(host->dma_tx);
 err_tx:
-	dma_release_channel(master->dma_rx);
+	dma_release_channel(host->dma_rx);
 	return ret;
 }
 
@@ -978,7 +978,7 @@ static void spi_qup_set_cs(struct spi_device *spi, bool val)
 	u32 spi_ioc;
 	u32 spi_ioc_orig;
 
-	controller = spi_master_get_devdata(spi->master);
+	controller = spi_controller_get_devdata(spi->controller);
 	spi_ioc = readl_relaxed(controller->base + SPI_IO_CONTROL);
 	spi_ioc_orig = spi_ioc;
 	if (!val)
@@ -992,7 +992,7 @@ static void spi_qup_set_cs(struct spi_device *spi, bool val)
 
 static int spi_qup_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct clk *iclk, *cclk;
 	struct spi_qup *controller;
 	struct resource *res;
@@ -1027,34 +1027,34 @@ static int spi_qup_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	master = spi_alloc_master(dev, sizeof(struct spi_qup));
-	if (!master) {
-		dev_err(dev, "cannot allocate master\n");
+	host = spi_alloc_host(dev, sizeof(struct spi_qup));
+	if (!host) {
+		dev_err(dev, "cannot allocate host\n");
 		return -ENOMEM;
 	}
 
 	/* use num-cs unless not present or out of range */
 	if (of_property_read_u32(dev->of_node, "num-cs", &num_cs) ||
 	    num_cs > SPI_NUM_CHIPSELECTS)
-		master->num_chipselect = SPI_NUM_CHIPSELECTS;
+		host->num_chipselect = SPI_NUM_CHIPSELECTS;
 	else
-		master->num_chipselect = num_cs;
+		host->num_chipselect = num_cs;
 
-	master->use_gpio_descriptors = true;
-	master->max_native_cs = SPI_NUM_CHIPSELECTS;
-	master->bus_num = pdev->id;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
-	master->max_speed_hz = max_freq;
-	master->transfer_one = spi_qup_transfer_one;
-	master->dev.of_node = pdev->dev.of_node;
-	master->auto_runtime_pm = true;
-	master->dma_alignment = dma_get_cache_alignment();
-	master->max_dma_len = SPI_MAX_XFER;
+	host->use_gpio_descriptors = true;
+	host->max_native_cs = SPI_NUM_CHIPSELECTS;
+	host->bus_num = pdev->id;
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
+	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
+	host->max_speed_hz = max_freq;
+	host->transfer_one = spi_qup_transfer_one;
+	host->dev.of_node = pdev->dev.of_node;
+	host->auto_runtime_pm = true;
+	host->dma_alignment = dma_get_cache_alignment();
+	host->max_dma_len = SPI_MAX_XFER;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
-	controller = spi_master_get_devdata(master);
+	controller = spi_controller_get_devdata(host);
 
 	controller->dev = dev;
 	controller->base = base;
@@ -1062,16 +1062,16 @@ static int spi_qup_probe(struct platform_device *pdev)
 	controller->cclk = cclk;
 	controller->irq = irq;
 
-	ret = spi_qup_init_dma(master, res->start);
+	ret = spi_qup_init_dma(host, res->start);
 	if (ret == -EPROBE_DEFER)
 		goto error;
 	else if (!ret)
-		master->can_dma = spi_qup_can_dma;
+		host->can_dma = spi_qup_can_dma;
 
 	controller->qup_v1 = (uintptr_t)of_device_get_match_data(dev);
 
 	if (!controller->qup_v1)
-		master->set_cs = spi_qup_set_cs;
+		host->set_cs = spi_qup_set_cs;
 
 	spin_lock_init(&controller->lock);
 	init_completion(&controller->done);
@@ -1149,7 +1149,7 @@ static int spi_qup_probe(struct platform_device *pdev)
 	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
 
-	ret = devm_spi_register_master(dev, master);
+	ret = devm_spi_register_controller(dev, host);
 	if (ret)
 		goto disable_pm;
 
@@ -1161,17 +1161,17 @@ error_clk:
 	clk_disable_unprepare(cclk);
 	clk_disable_unprepare(iclk);
 error_dma:
-	spi_qup_release_dma(master);
+	spi_qup_release_dma(host);
 error:
-	spi_master_put(master);
+	spi_controller_put(host);
 	return ret;
 }
 
 #ifdef CONFIG_PM
 static int spi_qup_pm_suspend_runtime(struct device *device)
 {
-	struct spi_master *master = dev_get_drvdata(device);
-	struct spi_qup *controller = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(device);
+	struct spi_qup *controller = spi_controller_get_devdata(host);
 	u32 config;
 
 	/* Enable clocks auto gaiting */
@@ -1187,8 +1187,8 @@ static int spi_qup_pm_suspend_runtime(struct device *device)
 
 static int spi_qup_pm_resume_runtime(struct device *device)
 {
-	struct spi_master *master = dev_get_drvdata(device);
-	struct spi_qup *controller = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(device);
+	struct spi_qup *controller = spi_controller_get_devdata(host);
 	u32 config;
 	int ret;
 
@@ -1213,8 +1213,8 @@ static int spi_qup_pm_resume_runtime(struct device *device)
 #ifdef CONFIG_PM_SLEEP
 static int spi_qup_suspend(struct device *device)
 {
-	struct spi_master *master = dev_get_drvdata(device);
-	struct spi_qup *controller = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(device);
+	struct spi_qup *controller = spi_controller_get_devdata(host);
 	int ret;
 
 	if (pm_runtime_suspended(device)) {
@@ -1222,7 +1222,7 @@ static int spi_qup_suspend(struct device *device)
 		if (ret)
 			return ret;
 	}
-	ret = spi_master_suspend(master);
+	ret = spi_controller_suspend(host);
 	if (ret)
 		return ret;
 
@@ -1237,8 +1237,8 @@ static int spi_qup_suspend(struct device *device)
 
 static int spi_qup_resume(struct device *device)
 {
-	struct spi_master *master = dev_get_drvdata(device);
-	struct spi_qup *controller = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(device);
+	struct spi_qup *controller = spi_controller_get_devdata(host);
 	int ret;
 
 	ret = clk_prepare_enable(controller->iclk);
@@ -1255,7 +1255,7 @@ static int spi_qup_resume(struct device *device)
 	if (ret)
 		goto disable_clk;
 
-	ret = spi_master_resume(master);
+	ret = spi_controller_resume(host);
 	if (ret)
 		goto disable_clk;
 
@@ -1270,8 +1270,8 @@ disable_clk:
 
 static void spi_qup_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = dev_get_drvdata(&pdev->dev);
-	struct spi_qup *controller = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(&pdev->dev);
+	struct spi_qup *controller = spi_controller_get_devdata(host);
 	int ret;
 
 	ret = pm_runtime_get_sync(&pdev->dev);
@@ -1289,7 +1289,7 @@ static void spi_qup_remove(struct platform_device *pdev)
 			 ERR_PTR(ret));
 	}
 
-	spi_qup_release_dma(master);
+	spi_qup_release_dma(host);
 
 	pm_runtime_put_noidle(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);

From e6302d00d985d49f891d4b007afdae824d873edf Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:41 +0800
Subject: [PATCH 1617/2047] spi: rb4xx: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-11-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rb4xx.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/spi/spi-rb4xx.c b/drivers/spi/spi-rb4xx.c
index c817889a7797..225f75550780 100644
--- a/drivers/spi/spi-rb4xx.c
+++ b/drivers/spi/spi-rb4xx.c
@@ -80,7 +80,7 @@ static void do_spi_byte_two(struct rb4xx_spi *rbspi, u32 spi_ioc, u8 byte)
 
 static void rb4xx_set_cs(struct spi_device *spi, bool enable)
 {
-	struct rb4xx_spi *rbspi = spi_master_get_devdata(spi->master);
+	struct rb4xx_spi *rbspi = spi_controller_get_devdata(spi->controller);
 
 	/*
 	 * Setting CS is done along with bitbanging the actual values,
@@ -92,10 +92,10 @@ static void rb4xx_set_cs(struct spi_device *spi, bool enable)
 			    AR71XX_SPI_IOC_CS0 | AR71XX_SPI_IOC_CS1);
 }
 
-static int rb4xx_transfer_one(struct spi_master *master,
+static int rb4xx_transfer_one(struct spi_controller *host,
 			      struct spi_device *spi, struct spi_transfer *t)
 {
-	struct rb4xx_spi *rbspi = spi_master_get_devdata(master);
+	struct rb4xx_spi *rbspi = spi_controller_get_devdata(host);
 	int i;
 	u32 spi_ioc;
 	u8 *rx_buf;
@@ -126,14 +126,14 @@ static int rb4xx_transfer_one(struct spi_master *master,
 			continue;
 		rx_buf[i] = rb4xx_read(rbspi, AR71XX_SPI_REG_RDS);
 	}
-	spi_finalize_current_transfer(master);
+	spi_finalize_current_transfer(host);
 
 	return 0;
 }
 
 static int rb4xx_spi_probe(struct platform_device *pdev)
 {
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct clk *ahb_clk;
 	struct rb4xx_spi *rbspi;
 	int err;
@@ -143,31 +143,31 @@ static int rb4xx_spi_probe(struct platform_device *pdev)
 	if (IS_ERR(spi_base))
 		return PTR_ERR(spi_base);
 
-	master = devm_spi_alloc_master(&pdev->dev, sizeof(*rbspi));
-	if (!master)
+	host = devm_spi_alloc_host(&pdev->dev, sizeof(*rbspi));
+	if (!host)
 		return -ENOMEM;
 
 	ahb_clk = devm_clk_get(&pdev->dev, "ahb");
 	if (IS_ERR(ahb_clk))
 		return PTR_ERR(ahb_clk);
 
-	master->dev.of_node = pdev->dev.of_node;
-	master->bus_num = 0;
-	master->num_chipselect = 3;
-	master->mode_bits = SPI_TX_DUAL;
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->flags = SPI_CONTROLLER_MUST_TX;
-	master->transfer_one = rb4xx_transfer_one;
-	master->set_cs = rb4xx_set_cs;
+	host->dev.of_node = pdev->dev.of_node;
+	host->bus_num = 0;
+	host->num_chipselect = 3;
+	host->mode_bits = SPI_TX_DUAL;
+	host->bits_per_word_mask = SPI_BPW_MASK(8);
+	host->flags = SPI_CONTROLLER_MUST_TX;
+	host->transfer_one = rb4xx_transfer_one;
+	host->set_cs = rb4xx_set_cs;
 
-	rbspi = spi_master_get_devdata(master);
+	rbspi = spi_controller_get_devdata(host);
 	rbspi->base = spi_base;
 	rbspi->clk = ahb_clk;
 	platform_set_drvdata(pdev, rbspi);
 
-	err = devm_spi_register_master(&pdev->dev, master);
+	err = devm_spi_register_controller(&pdev->dev, host);
 	if (err) {
-		dev_err(&pdev->dev, "failed to register SPI master\n");
+		dev_err(&pdev->dev, "failed to register SPI host\n");
 		return err;
 	}
 

From c84036243c6bc0e1b5846e379f5d4b3e4658688e Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:42 +0800
Subject: [PATCH 1618/2047] spi: realtek-rtl: switch to use
 devm_spi_alloc_host()

Switch to use modern name function devm_spi_alloc_host().

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-12-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-realtek-rtl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-realtek-rtl.c b/drivers/spi/spi-realtek-rtl.c
index 866b0477dbd7..0b0123e20b54 100644
--- a/drivers/spi/spi-realtek-rtl.c
+++ b/drivers/spi/spi-realtek-rtl.c
@@ -153,7 +153,7 @@ static int realtek_rtl_spi_probe(struct platform_device *pdev)
 	struct rtspi *rtspi;
 	int err;
 
-	ctrl = devm_spi_alloc_master(&pdev->dev, sizeof(*rtspi));
+	ctrl = devm_spi_alloc_host(&pdev->dev, sizeof(*rtspi));
 	if (!ctrl) {
 		dev_err(&pdev->dev, "Error allocating SPI controller\n");
 		return -ENOMEM;

From 7d5db7251997da069ca22c837a53ca8a84b4036e Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:43 +0800
Subject: [PATCH 1619/2047] spi: rockchip-sfc: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-13-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rockchip-sfc.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c
index bd550e76ab3d..0d7fadcd4ed3 100644
--- a/drivers/spi/spi-rockchip-sfc.c
+++ b/drivers/spi/spi-rockchip-sfc.c
@@ -487,7 +487,7 @@ static int rockchip_sfc_xfer_done(struct rockchip_sfc *sfc, u32 timeout_us)
 
 static int rockchip_sfc_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op)
 {
-	struct rockchip_sfc *sfc = spi_master_get_devdata(mem->spi->master);
+	struct rockchip_sfc *sfc = spi_controller_get_devdata(mem->spi->controller);
 	u32 len = op->data.nbytes;
 	int ret;
 
@@ -523,7 +523,7 @@ static int rockchip_sfc_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op
 
 static int rockchip_sfc_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
 {
-	struct rockchip_sfc *sfc = spi_master_get_devdata(mem->spi->master);
+	struct rockchip_sfc *sfc = spi_controller_get_devdata(mem->spi->controller);
 
 	op->data.nbytes = min(op->data.nbytes, sfc->max_iosize);
 
@@ -557,22 +557,22 @@ static irqreturn_t rockchip_sfc_irq_handler(int irq, void *dev_id)
 static int rockchip_sfc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct rockchip_sfc *sfc;
 	int ret;
 
-	master = devm_spi_alloc_master(&pdev->dev, sizeof(*sfc));
-	if (!master)
+	host = devm_spi_alloc_host(&pdev->dev, sizeof(*sfc));
+	if (!host)
 		return -ENOMEM;
 
-	master->flags = SPI_CONTROLLER_HALF_DUPLEX;
-	master->mem_ops = &rockchip_sfc_mem_ops;
-	master->dev.of_node = pdev->dev.of_node;
-	master->mode_bits = SPI_TX_QUAD | SPI_TX_DUAL | SPI_RX_QUAD | SPI_RX_DUAL;
-	master->max_speed_hz = SFC_MAX_SPEED;
-	master->num_chipselect = SFC_MAX_CHIPSELECT_NUM;
+	host->flags = SPI_CONTROLLER_HALF_DUPLEX;
+	host->mem_ops = &rockchip_sfc_mem_ops;
+	host->dev.of_node = pdev->dev.of_node;
+	host->mode_bits = SPI_TX_QUAD | SPI_TX_DUAL | SPI_RX_QUAD | SPI_RX_DUAL;
+	host->max_speed_hz = SFC_MAX_SPEED;
+	host->num_chipselect = SFC_MAX_CHIPSELECT_NUM;
 
-	sfc = spi_master_get_devdata(master);
+	sfc = spi_controller_get_devdata(host);
 	sfc->dev = dev;
 
 	sfc->regbase = devm_platform_ioremap_resource(pdev, 0);
@@ -640,7 +640,7 @@ static int rockchip_sfc_probe(struct platform_device *pdev)
 	sfc->max_iosize = rockchip_sfc_get_max_iosize(sfc);
 	sfc->version = rockchip_sfc_get_version(sfc);
 
-	ret = spi_register_master(master);
+	ret = spi_register_controller(host);
 	if (ret)
 		goto err_irq;
 
@@ -656,10 +656,10 @@ err_hclk:
 
 static void rockchip_sfc_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
+	struct spi_controller *host = platform_get_drvdata(pdev);
 	struct rockchip_sfc *sfc = platform_get_drvdata(pdev);
 
-	spi_unregister_master(master);
+	spi_unregister_controller(host);
 
 	clk_disable_unprepare(sfc->clk);
 	clk_disable_unprepare(sfc->hclk);

From 1a3ccff3bc39acc04e69e3a65833d474471598ec Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:44 +0800
Subject: [PATCH 1620/2047] spi: rockchip: switch to use modern name

Change legacy name master/slave to modern name host/target or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Heiko Stuebner <heiko@sntech.e>
Link: https://lore.kernel.org/r/20230818093154.1183529-14-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rockchip.c | 58 +++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index a37943847e81..5b010094dace 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -104,8 +104,8 @@
 #define CR0_XFM_RO					0x2
 
 #define CR0_OPM_OFFSET				20
-#define CR0_OPM_MASTER				0x0
-#define CR0_OPM_SLAVE				0x1
+#define CR0_OPM_HOST				0x0
+#define CR0_OPM_TARGET				0x1
 
 #define CR0_SOI_OFFSET				23
 
@@ -125,7 +125,7 @@
 #define SR_TF_EMPTY					(1 << 2)
 #define SR_RF_EMPTY					(1 << 3)
 #define SR_RF_FULL					(1 << 4)
-#define SR_SLAVE_TX_BUSY				(1 << 5)
+#define SR_TARGET_TX_BUSY				(1 << 5)
 
 /* Bit fields in ISR, IMR, ISR, RISR, 5bit */
 #define INT_MASK					0x1f
@@ -151,7 +151,7 @@
 #define RXDMA					(1 << 0)
 #define TXDMA					(1 << 1)
 
-/* sclk_out: spi master internal logic in rk3x can support 50Mhz */
+/* sclk_out: spi host internal logic in rk3x can support 50Mhz */
 #define MAX_SCLK_OUT				50000000U
 
 /*
@@ -194,8 +194,8 @@ struct rockchip_spi {
 
 	bool cs_asserted[ROCKCHIP_SPI_MAX_CS_NUM];
 
-	bool slave_abort;
-	bool cs_inactive; /* spi slave tansmition stop when cs inactive */
+	bool target_abort;
+	bool cs_inactive; /* spi target tansmition stop when cs inactive */
 	bool cs_high_supported; /* native CS supports active-high polarity */
 
 	struct spi_transfer *xfer; /* Store xfer temporarily */
@@ -206,13 +206,13 @@ static inline void spi_enable_chip(struct rockchip_spi *rs, bool enable)
 	writel_relaxed((enable ? 1U : 0U), rs->regs + ROCKCHIP_SPI_SSIENR);
 }
 
-static inline void wait_for_tx_idle(struct rockchip_spi *rs, bool slave_mode)
+static inline void wait_for_tx_idle(struct rockchip_spi *rs, bool target_mode)
 {
 	unsigned long timeout = jiffies + msecs_to_jiffies(5);
 
 	do {
-		if (slave_mode) {
-			if (!(readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_SLAVE_TX_BUSY) &&
+		if (target_mode) {
+			if (!(readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_TARGET_TX_BUSY) &&
 			    !((readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY)))
 				return;
 		} else {
@@ -351,9 +351,9 @@ static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
 	struct spi_controller *ctlr = dev_id;
 	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
-	/* When int_cs_inactive comes, spi slave abort */
+	/* When int_cs_inactive comes, spi target abort */
 	if (rs->cs_inactive && readl_relaxed(rs->regs + ROCKCHIP_SPI_IMR) & INT_CS_INACTIVE) {
-		ctlr->slave_abort(ctlr);
+		ctlr->target_abort(ctlr);
 		writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
 		writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR);
 
@@ -405,7 +405,7 @@ static void rockchip_spi_dma_rxcb(void *data)
 	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 	int state = atomic_fetch_andnot(RXDMA, &rs->state);
 
-	if (state & TXDMA && !rs->slave_abort)
+	if (state & TXDMA && !rs->target_abort)
 		return;
 
 	if (rs->cs_inactive)
@@ -421,11 +421,11 @@ static void rockchip_spi_dma_txcb(void *data)
 	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 	int state = atomic_fetch_andnot(TXDMA, &rs->state);
 
-	if (state & RXDMA && !rs->slave_abort)
+	if (state & RXDMA && !rs->target_abort)
 		return;
 
 	/* Wait until the FIFO data completely. */
-	wait_for_tx_idle(rs, ctlr->slave);
+	wait_for_tx_idle(rs, ctlr->target);
 
 	spi_enable_chip(rs, false);
 	spi_finalize_current_transfer(ctlr);
@@ -525,7 +525,7 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
 
 static int rockchip_spi_config(struct rockchip_spi *rs,
 		struct spi_device *spi, struct spi_transfer *xfer,
-		bool use_dma, bool slave_mode)
+		bool use_dma, bool target_mode)
 {
 	u32 cr0 = CR0_FRF_SPI  << CR0_FRF_OFFSET
 		| CR0_BHT_8BIT << CR0_BHT_OFFSET
@@ -534,9 +534,9 @@ static int rockchip_spi_config(struct rockchip_spi *rs,
 	u32 cr1;
 	u32 dmacr = 0;
 
-	if (slave_mode)
-		cr0 |= CR0_OPM_SLAVE << CR0_OPM_OFFSET;
-	rs->slave_abort = false;
+	if (target_mode)
+		cr0 |= CR0_OPM_TARGET << CR0_OPM_OFFSET;
+	rs->target_abort = false;
 
 	cr0 |= rs->rsd << CR0_RSD_OFFSET;
 	cr0 |= (spi->mode & 0x3U) << CR0_SCPH_OFFSET;
@@ -614,7 +614,7 @@ static size_t rockchip_spi_max_transfer_size(struct spi_device *spi)
 	return ROCKCHIP_SPI_MAX_TRANLEN;
 }
 
-static int rockchip_spi_slave_abort(struct spi_controller *ctlr)
+static int rockchip_spi_target_abort(struct spi_controller *ctlr)
 {
 	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 	u32 rx_fifo_left;
@@ -659,7 +659,7 @@ out:
 		dmaengine_terminate_sync(ctlr->dma_tx);
 	atomic_set(&rs->state, 0);
 	spi_enable_chip(rs, false);
-	rs->slave_abort = true;
+	rs->target_abort = true;
 	spi_finalize_current_transfer(ctlr);
 
 	return 0;
@@ -697,7 +697,7 @@ static int rockchip_spi_transfer_one(
 	rs->xfer = xfer;
 	use_dma = ctlr->can_dma ? ctlr->can_dma(ctlr, spi, xfer) : false;
 
-	ret = rockchip_spi_config(rs, spi, xfer, use_dma, ctlr->slave);
+	ret = rockchip_spi_config(rs, spi, xfer, use_dma, ctlr->target);
 	if (ret)
 		return ret;
 
@@ -757,15 +757,15 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 	struct resource *mem;
 	struct device_node *np = pdev->dev.of_node;
 	u32 rsd_nsecs, num_cs;
-	bool slave_mode;
+	bool target_mode;
 
-	slave_mode = of_property_read_bool(np, "spi-slave");
+	target_mode = of_property_read_bool(np, "spi-slave");
 
-	if (slave_mode)
-		ctlr = spi_alloc_slave(&pdev->dev,
+	if (target_mode)
+		ctlr = spi_alloc_target(&pdev->dev,
 				sizeof(struct rockchip_spi));
 	else
-		ctlr = spi_alloc_master(&pdev->dev,
+		ctlr = spi_alloc_host(&pdev->dev,
 				sizeof(struct rockchip_spi));
 
 	if (!ctlr)
@@ -854,9 +854,9 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 	ctlr->auto_runtime_pm = true;
 	ctlr->bus_num = pdev->id;
 	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP | SPI_LSB_FIRST;
-	if (slave_mode) {
+	if (target_mode) {
 		ctlr->mode_bits |= SPI_NO_CS;
-		ctlr->slave_abort = rockchip_spi_slave_abort;
+		ctlr->target_abort = rockchip_spi_target_abort;
 	} else {
 		ctlr->flags = SPI_CONTROLLER_GPIO_SS;
 		ctlr->max_native_cs = ROCKCHIP_SPI_MAX_CS_NUM;
@@ -911,7 +911,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 	case ROCKCHIP_SPI_VER2_TYPE2:
 		rs->cs_high_supported = true;
 		ctlr->mode_bits |= SPI_CS_HIGH;
-		if (ctlr->can_dma && slave_mode)
+		if (ctlr->can_dma && target_mode)
 			rs->cs_inactive = true;
 		else
 			rs->cs_inactive = false;

From 1405efe76f2a242832a53b56deda15f17d51219b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:45 +0800
Subject: [PATCH 1621/2047] spi: rspi: switch to use spi_alloc_host()

Switch to use modern name function spi_alloc_host().

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230818093154.1183529-15-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rspi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 83bb16995e06..8e81f1a8623f 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -1294,7 +1294,7 @@ static int rspi_probe(struct platform_device *pdev)
 	const struct spi_ops *ops;
 	unsigned long clksrc;
 
-	ctlr = spi_alloc_master(&pdev->dev, sizeof(struct rspi_data));
+	ctlr = spi_alloc_host(&pdev->dev, sizeof(struct rspi_data));
 	if (ctlr == NULL)
 		return -ENOMEM;
 

From 6536da62e4378caafa309ae372ca88f065f3e47c Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:46 +0800
Subject: [PATCH 1622/2047] spi: rzv2m-csi: switch to use devm_spi_alloc_host()

Switch to use modern name function devm_spi_alloc_host().

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230818093154.1183529-16-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rzv2m-csi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-rzv2m-csi.c b/drivers/spi/spi-rzv2m-csi.c
index d098aefa370d..d0f51b17aa7c 100644
--- a/drivers/spi/spi-rzv2m-csi.c
+++ b/drivers/spi/spi-rzv2m-csi.c
@@ -555,7 +555,7 @@ static int rzv2m_csi_probe(struct platform_device *pdev)
 	int irq;
 	int ret;
 
-	controller = devm_spi_alloc_master(dev, sizeof(*csi));
+	controller = devm_spi_alloc_host(dev, sizeof(*csi));
 	if (!controller)
 		return -ENOMEM;
 

From f4bc49eda254c6f49c6679149d31e93271d6ee74 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:47 +0800
Subject: [PATCH 1623/2047] spi: s3c64xx: switch to use modern name

Change legacy name master/slave to modern name host/target or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-17-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-s3c64xx.c | 126 +++++++++++++++++++-------------------
 1 file changed, 63 insertions(+), 63 deletions(-)

diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index bf7cb18be9dc..0e48ffd499b9 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -163,9 +163,9 @@ struct s3c64xx_spi_port_config {
  * struct s3c64xx_spi_driver_data - Runtime info holder for SPI driver.
  * @clk: Pointer to the spi clock.
  * @src_clk: Pointer to the clock used to generate SPI signals.
- * @ioclk: Pointer to the i/o clock between master and slave
+ * @ioclk: Pointer to the i/o clock between host and target
  * @pdev: Pointer to device's platform device data
- * @master: Pointer to the SPI Protocol master.
+ * @host: Pointer to the SPI Protocol host.
  * @cntrlr_info: Platform specific data for the controller this driver manages.
  * @lock: Controller specific lock.
  * @state: Set of FLAGS to indicate status.
@@ -186,7 +186,7 @@ struct s3c64xx_spi_driver_data {
 	struct clk                      *src_clk;
 	struct clk                      *ioclk;
 	struct platform_device          *pdev;
-	struct spi_master               *master;
+	struct spi_controller           *host;
 	struct s3c64xx_spi_info         *cntrlr_info;
 	spinlock_t                      lock;
 	unsigned long                   sfr_start;
@@ -329,7 +329,7 @@ static int prepare_dma(struct s3c64xx_spi_dma_data *dma,
 static void s3c64xx_spi_set_cs(struct spi_device *spi, bool enable)
 {
 	struct s3c64xx_spi_driver_data *sdd =
-					spi_master_get_devdata(spi->master);
+					spi_controller_get_devdata(spi->controller);
 
 	if (sdd->cntrlr_info->no_cs)
 		return;
@@ -351,9 +351,9 @@ static void s3c64xx_spi_set_cs(struct spi_device *spi, bool enable)
 	}
 }
 
-static int s3c64xx_spi_prepare_transfer(struct spi_master *spi)
+static int s3c64xx_spi_prepare_transfer(struct spi_controller *spi)
 {
-	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(spi);
+	struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(spi);
 
 	if (is_polling(sdd))
 		return 0;
@@ -381,9 +381,9 @@ static int s3c64xx_spi_prepare_transfer(struct spi_master *spi)
 	return 0;
 }
 
-static int s3c64xx_spi_unprepare_transfer(struct spi_master *spi)
+static int s3c64xx_spi_unprepare_transfer(struct spi_controller *spi)
 {
-	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(spi);
+	struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(spi);
 
 	if (is_polling(sdd))
 		return 0;
@@ -399,11 +399,11 @@ static int s3c64xx_spi_unprepare_transfer(struct spi_master *spi)
 	return 0;
 }
 
-static bool s3c64xx_spi_can_dma(struct spi_master *master,
+static bool s3c64xx_spi_can_dma(struct spi_controller *host,
 				struct spi_device *spi,
 				struct spi_transfer *xfer)
 {
-	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master);
+	struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
 
 	if (sdd->rx_dma.ch && sdd->tx_dma.ch) {
 		return xfer->len > (FIFO_LVL_MASK(sdd) >> 1) + 1;
@@ -712,10 +712,10 @@ static int s3c64xx_spi_config(struct s3c64xx_spi_driver_data *sdd)
 
 #define XFER_DMAADDR_INVALID DMA_BIT_MASK(32)
 
-static int s3c64xx_spi_prepare_message(struct spi_master *master,
+static int s3c64xx_spi_prepare_message(struct spi_controller *host,
 				       struct spi_message *msg)
 {
-	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master);
+	struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
 	struct spi_device *spi = msg->spi;
 	struct s3c64xx_spi_csinfo *cs = spi->controller_data;
 
@@ -736,11 +736,11 @@ static size_t s3c64xx_spi_max_transfer_size(struct spi_device *spi)
 	return ctlr->can_dma ? S3C64XX_SPI_PACKET_CNT_MASK : SIZE_MAX;
 }
 
-static int s3c64xx_spi_transfer_one(struct spi_master *master,
+static int s3c64xx_spi_transfer_one(struct spi_controller *host,
 				    struct spi_device *spi,
 				    struct spi_transfer *xfer)
 {
-	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master);
+	struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
 	const unsigned int fifo_len = (FIFO_LVL_MASK(sdd) >> 1) + 1;
 	const void *tx_buf = NULL;
 	void *rx_buf = NULL;
@@ -890,15 +890,15 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master,
 	return status;
 }
 
-static struct s3c64xx_spi_csinfo *s3c64xx_get_slave_ctrldata(
+static struct s3c64xx_spi_csinfo *s3c64xx_get_target_ctrldata(
 				struct spi_device *spi)
 {
 	struct s3c64xx_spi_csinfo *cs;
-	struct device_node *slave_np, *data_np = NULL;
+	struct device_node *target_np, *data_np = NULL;
 	u32 fb_delay = 0;
 
-	slave_np = spi->dev.of_node;
-	if (!slave_np) {
+	target_np = spi->dev.of_node;
+	if (!target_np) {
 		dev_err(&spi->dev, "device node not found\n");
 		return ERR_PTR(-EINVAL);
 	}
@@ -907,7 +907,7 @@ static struct s3c64xx_spi_csinfo *s3c64xx_get_slave_ctrldata(
 	if (!cs)
 		return ERR_PTR(-ENOMEM);
 
-	data_np = of_get_child_by_name(slave_np, "controller-data");
+	data_np = of_get_child_by_name(target_np, "controller-data");
 	if (!data_np) {
 		dev_info(&spi->dev, "feedback delay set to default (0)\n");
 		return cs;
@@ -932,9 +932,9 @@ static int s3c64xx_spi_setup(struct spi_device *spi)
 	int err;
 	int div;
 
-	sdd = spi_master_get_devdata(spi->master);
+	sdd = spi_controller_get_devdata(spi->controller);
 	if (spi->dev.of_node) {
-		cs = s3c64xx_get_slave_ctrldata(spi);
+		cs = s3c64xx_get_target_ctrldata(spi);
 		spi->controller_data = cs;
 	}
 
@@ -1022,7 +1022,7 @@ static void s3c64xx_spi_cleanup(struct spi_device *spi)
 static irqreturn_t s3c64xx_spi_irq(int irq, void *data)
 {
 	struct s3c64xx_spi_driver_data *sdd = data;
-	struct spi_master *spi = sdd->master;
+	struct spi_controller *spi = sdd->host;
 	unsigned int val, clr = 0;
 
 	val = readl(sdd->regs + S3C64XX_SPI_STATUS);
@@ -1151,7 +1151,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
 	struct resource	*mem_res;
 	struct s3c64xx_spi_driver_data *sdd;
 	struct s3c64xx_spi_info *sci = dev_get_platdata(&pdev->dev);
-	struct spi_master *master;
+	struct spi_controller *host;
 	int ret, irq;
 	char clk_name[16];
 
@@ -1169,16 +1169,16 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	master = devm_spi_alloc_master(&pdev->dev, sizeof(*sdd));
-	if (!master)
+	host = devm_spi_alloc_host(&pdev->dev, sizeof(*sdd));
+	if (!host)
 		return dev_err_probe(&pdev->dev, -ENOMEM,
-				     "Unable to allocate SPI Master\n");
+				     "Unable to allocate SPI Host\n");
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
-	sdd = spi_master_get_devdata(master);
+	sdd = spi_controller_get_devdata(host);
 	sdd->port_conf = s3c64xx_spi_get_port_config(pdev);
-	sdd->master = master;
+	sdd->host = host;
 	sdd->cntrlr_info = sci;
 	sdd->pdev = pdev;
 	if (pdev->dev.of_node) {
@@ -1196,27 +1196,27 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
 	sdd->tx_dma.direction = DMA_MEM_TO_DEV;
 	sdd->rx_dma.direction = DMA_DEV_TO_MEM;
 
-	master->dev.of_node = pdev->dev.of_node;
-	master->bus_num = sdd->port_id;
-	master->setup = s3c64xx_spi_setup;
-	master->cleanup = s3c64xx_spi_cleanup;
-	master->prepare_transfer_hardware = s3c64xx_spi_prepare_transfer;
-	master->unprepare_transfer_hardware = s3c64xx_spi_unprepare_transfer;
-	master->prepare_message = s3c64xx_spi_prepare_message;
-	master->transfer_one = s3c64xx_spi_transfer_one;
-	master->max_transfer_size = s3c64xx_spi_max_transfer_size;
-	master->num_chipselect = sci->num_cs;
-	master->use_gpio_descriptors = true;
-	master->dma_alignment = 8;
-	master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) |
-					SPI_BPW_MASK(8);
+	host->dev.of_node = pdev->dev.of_node;
+	host->bus_num = sdd->port_id;
+	host->setup = s3c64xx_spi_setup;
+	host->cleanup = s3c64xx_spi_cleanup;
+	host->prepare_transfer_hardware = s3c64xx_spi_prepare_transfer;
+	host->unprepare_transfer_hardware = s3c64xx_spi_unprepare_transfer;
+	host->prepare_message = s3c64xx_spi_prepare_message;
+	host->transfer_one = s3c64xx_spi_transfer_one;
+	host->max_transfer_size = s3c64xx_spi_max_transfer_size;
+	host->num_chipselect = sci->num_cs;
+	host->use_gpio_descriptors = true;
+	host->dma_alignment = 8;
+	host->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) |
+				   SPI_BPW_MASK(8);
 	/* the spi->mode bits understood by this driver: */
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	host->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 	if (sdd->port_conf->has_loopback)
-		master->mode_bits |= SPI_LOOP;
-	master->auto_runtime_pm = true;
+		host->mode_bits |= SPI_LOOP;
+	host->auto_runtime_pm = true;
 	if (!is_polling(sdd))
-		master->can_dma = s3c64xx_spi_can_dma;
+		host->can_dma = s3c64xx_spi_can_dma;
 
 	sdd->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &mem_res);
 	if (IS_ERR(sdd->regs))
@@ -1271,14 +1271,14 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
 	       S3C64XX_SPI_INT_TX_OVERRUN_EN | S3C64XX_SPI_INT_TX_UNDERRUN_EN,
 	       sdd->regs + S3C64XX_SPI_INT_EN);
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	ret = devm_spi_register_controller(&pdev->dev, host);
 	if (ret != 0) {
-		dev_err(&pdev->dev, "cannot register SPI master: %d\n", ret);
+		dev_err(&pdev->dev, "cannot register SPI host: %d\n", ret);
 		goto err_pm_put;
 	}
 
-	dev_dbg(&pdev->dev, "Samsung SoC SPI Driver loaded for Bus SPI-%d with %d Slaves attached\n",
-					sdd->port_id, master->num_chipselect);
+	dev_dbg(&pdev->dev, "Samsung SoC SPI Driver loaded for Bus SPI-%d with %d Targets attached\n",
+					sdd->port_id, host->num_chipselect);
 	dev_dbg(&pdev->dev, "\tIOmem=[%pR]\tFIFO %dbytes\n",
 					mem_res, (FIFO_LVL_MASK(sdd) >> 1) + 1);
 
@@ -1297,8 +1297,8 @@ err_pm_put:
 
 static void s3c64xx_spi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
 
 	pm_runtime_get_sync(&pdev->dev);
 
@@ -1317,10 +1317,10 @@ static void s3c64xx_spi_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int s3c64xx_spi_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
 
-	int ret = spi_master_suspend(master);
+	int ret = spi_controller_suspend(host);
 	if (ret)
 		return ret;
 
@@ -1335,8 +1335,8 @@ static int s3c64xx_spi_suspend(struct device *dev)
 
 static int s3c64xx_spi_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
 	struct s3c64xx_spi_info *sci = sdd->cntrlr_info;
 	int ret;
 
@@ -1347,15 +1347,15 @@ static int s3c64xx_spi_resume(struct device *dev)
 	if (ret < 0)
 		return ret;
 
-	return spi_master_resume(master);
+	return spi_controller_resume(host);
 }
 #endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM
 static int s3c64xx_spi_runtime_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
 
 	clk_disable_unprepare(sdd->clk);
 	clk_disable_unprepare(sdd->src_clk);
@@ -1366,8 +1366,8 @@ static int s3c64xx_spi_runtime_suspend(struct device *dev)
 
 static int s3c64xx_spi_runtime_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
 	int ret;
 
 	if (sdd->port_conf->clk_ioclk) {

From 82a779231805c388e09921f4a28d9daebf95aeda Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:48 +0800
Subject: [PATCH 1624/2047] spi: sc18is602: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-18-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sc18is602.c | 48 ++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index 9f98bf350697..eecf9ea95ae3 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -30,7 +30,7 @@ enum chips { sc18is602, sc18is602b, sc18is603 };
 #define SC18IS602_MODE_CLOCK_DIV_128	0x3
 
 struct sc18is602 {
-	struct spi_master	*master;
+	struct spi_controller	*host;
 	struct device		*dev;
 	u8			ctrl;
 	u32			freq;
@@ -179,10 +179,10 @@ static int sc18is602_check_transfer(struct spi_device *spi,
 	return 0;
 }
 
-static int sc18is602_transfer_one(struct spi_master *master,
+static int sc18is602_transfer_one(struct spi_controller *host,
 				  struct spi_message *m)
 {
-	struct sc18is602 *hw = spi_master_get_devdata(master);
+	struct sc18is602 *hw = spi_controller_get_devdata(host);
 	struct spi_device *spi = m->spi;
 	struct spi_transfer *t;
 	int status = 0;
@@ -213,7 +213,7 @@ static int sc18is602_transfer_one(struct spi_master *master,
 		spi_transfer_delay_exec(t);
 	}
 	m->status = status;
-	spi_finalize_current_message(master);
+	spi_finalize_current_message(host);
 
 	return status;
 }
@@ -225,7 +225,7 @@ static size_t sc18is602_max_transfer_size(struct spi_device *spi)
 
 static int sc18is602_setup(struct spi_device *spi)
 {
-	struct sc18is602 *hw = spi_master_get_devdata(spi->master);
+	struct sc18is602 *hw = spi_controller_get_devdata(spi->controller);
 
 	/* SC18IS602 does not support CS2 */
 	if (hw->id == sc18is602 && (spi_get_chipselect(spi, 0) == 2))
@@ -241,17 +241,17 @@ static int sc18is602_probe(struct i2c_client *client)
 	struct device_node *np = dev->of_node;
 	struct sc18is602_platform_data *pdata = dev_get_platdata(dev);
 	struct sc18is602 *hw;
-	struct spi_master *master;
+	struct spi_controller *host;
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C |
 				     I2C_FUNC_SMBUS_WRITE_BYTE_DATA))
 		return -EINVAL;
 
-	master = devm_spi_alloc_master(dev, sizeof(struct sc18is602));
-	if (!master)
+	host = devm_spi_alloc_host(dev, sizeof(struct sc18is602));
+	if (!host)
 		return -ENOMEM;
 
-	hw = spi_master_get_devdata(master);
+	hw = spi_controller_get_devdata(host);
 	i2c_set_clientdata(client, hw);
 
 	/* assert reset and then release */
@@ -260,7 +260,7 @@ static int sc18is602_probe(struct i2c_client *client)
 		return PTR_ERR(hw->reset);
 	gpiod_set_value_cansleep(hw->reset, 0);
 
-	hw->master = master;
+	hw->host = host;
 	hw->client = client;
 	hw->dev = dev;
 	hw->ctrl = 0xff;
@@ -273,11 +273,11 @@ static int sc18is602_probe(struct i2c_client *client)
 	switch (hw->id) {
 	case sc18is602:
 	case sc18is602b:
-		master->num_chipselect = 4;
+		host->num_chipselect = 4;
 		hw->freq = SC18IS602_CLOCK;
 		break;
 	case sc18is603:
-		master->num_chipselect = 2;
+		host->num_chipselect = 2;
 		if (pdata) {
 			hw->freq = pdata->clock_frequency;
 		} else {
@@ -292,18 +292,18 @@ static int sc18is602_probe(struct i2c_client *client)
 			hw->freq = SC18IS602_CLOCK;
 		break;
 	}
-	master->bus_num = np ? -1 : client->adapter->nr;
-	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_LSB_FIRST;
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->setup = sc18is602_setup;
-	master->transfer_one_message = sc18is602_transfer_one;
-	master->max_transfer_size = sc18is602_max_transfer_size;
-	master->max_message_size = sc18is602_max_transfer_size;
-	master->dev.of_node = np;
-	master->min_speed_hz = hw->freq / 128;
-	master->max_speed_hz = hw->freq / 4;
+	host->bus_num = np ? -1 : client->adapter->nr;
+	host->mode_bits = SPI_CPHA | SPI_CPOL | SPI_LSB_FIRST;
+	host->bits_per_word_mask = SPI_BPW_MASK(8);
+	host->setup = sc18is602_setup;
+	host->transfer_one_message = sc18is602_transfer_one;
+	host->max_transfer_size = sc18is602_max_transfer_size;
+	host->max_message_size = sc18is602_max_transfer_size;
+	host->dev.of_node = np;
+	host->min_speed_hz = hw->freq / 128;
+	host->max_speed_hz = hw->freq / 4;
 
-	return devm_spi_register_master(dev, master);
+	return devm_spi_register_controller(dev, host);
 }
 
 static const struct i2c_device_id sc18is602_id[] = {
@@ -342,6 +342,6 @@ static struct i2c_driver sc18is602_driver = {
 
 module_i2c_driver(sc18is602_driver);
 
-MODULE_DESCRIPTION("SC18IS602/603 SPI Master Driver");
+MODULE_DESCRIPTION("SC18IS602/603 SPI Host Driver");
 MODULE_AUTHOR("Guenter Roeck");
 MODULE_LICENSE("GPL");

From 0c8e5afc879f5b693dd2db68a7db3ffb3dc6823b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:49 +0800
Subject: [PATCH 1625/2047] spi: sh-hspi: switch to use modern name

Change legacy name master to modern name host.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230818093154.1183529-19-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sh-hspi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-sh-hspi.c b/drivers/spi/spi-sh-hspi.c
index d6ffeae66ed3..5d63aa1d28e2 100644
--- a/drivers/spi/spi-sh-hspi.c
+++ b/drivers/spi/spi-sh-hspi.c
@@ -82,7 +82,7 @@ static int hspi_status_check_timeout(struct hspi_priv *hspi, u32 mask, u32 val)
 }
 
 /*
- *		spi master function
+ *		spi host function
  */
 
 #define hspi_hw_cs_enable(hspi)		hspi_hw_cs_ctrl(hspi, 0)
@@ -224,7 +224,7 @@ static int hspi_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	ctlr = spi_alloc_master(&pdev->dev, sizeof(*hspi));
+	ctlr = spi_alloc_host(&pdev->dev, sizeof(*hspi));
 	if (!ctlr)
 		return -ENOMEM;
 

From 1cb3ebc417fe6cc5bea1b37682c0e860ea9ffb2d Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:50 +0800
Subject: [PATCH 1626/2047] spi: sh-msiof: switch to use modern name

Change legacy name master/slave to modern name host/target.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230818093154.1183529-20-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sh-msiof.c   | 50 ++++++++++++++++++------------------
 include/linux/spi/sh_msiof.h |  4 +--
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 5c75b5c58d97..fb452bc78372 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -54,7 +54,7 @@ struct sh_msiof_spi_priv {
 	dma_addr_t rx_dma_addr;
 	bool native_cs_inited;
 	bool native_cs_high;
-	bool slave_aborted;
+	bool target_aborted;
 };
 
 #define MAX_SS	3	/* Maximum number of native chip selects */
@@ -361,7 +361,7 @@ static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p, u32 ss,
 	tmp |= !cs_high << SIMDR1_SYNCAC_SHIFT;
 	tmp |= lsb_first << SIMDR1_BITLSB_SHIFT;
 	tmp |= sh_msiof_spi_get_dtdl_and_syncdl(p);
-	if (spi_controller_is_slave(p->ctlr)) {
+	if (spi_controller_is_target(p->ctlr)) {
 		sh_msiof_write(p, SITMDR1, tmp | SITMDR1_PCON);
 	} else {
 		sh_msiof_write(p, SITMDR1,
@@ -553,7 +553,7 @@ static int sh_msiof_spi_setup(struct spi_device *spi)
 		spi_controller_get_devdata(spi->controller);
 	u32 clr, set, tmp;
 
-	if (spi_get_csgpiod(spi, 0) || spi_controller_is_slave(p->ctlr))
+	if (spi_get_csgpiod(spi, 0) || spi_controller_is_target(p->ctlr))
 		return 0;
 
 	if (p->native_cs_inited &&
@@ -602,11 +602,11 @@ static int sh_msiof_prepare_message(struct spi_controller *ctlr,
 
 static int sh_msiof_spi_start(struct sh_msiof_spi_priv *p, void *rx_buf)
 {
-	bool slave = spi_controller_is_slave(p->ctlr);
+	bool target = spi_controller_is_target(p->ctlr);
 	int ret = 0;
 
 	/* setup clock and rx/tx signals */
-	if (!slave)
+	if (!target)
 		ret = sh_msiof_modify_ctr_wait(p, 0, SICTR_TSCKE);
 	if (rx_buf && !ret)
 		ret = sh_msiof_modify_ctr_wait(p, 0, SICTR_RXE);
@@ -614,7 +614,7 @@ static int sh_msiof_spi_start(struct sh_msiof_spi_priv *p, void *rx_buf)
 		ret = sh_msiof_modify_ctr_wait(p, 0, SICTR_TXE);
 
 	/* start by setting frame bit */
-	if (!ret && !slave)
+	if (!ret && !target)
 		ret = sh_msiof_modify_ctr_wait(p, 0, SICTR_TFSE);
 
 	return ret;
@@ -622,27 +622,27 @@ static int sh_msiof_spi_start(struct sh_msiof_spi_priv *p, void *rx_buf)
 
 static int sh_msiof_spi_stop(struct sh_msiof_spi_priv *p, void *rx_buf)
 {
-	bool slave = spi_controller_is_slave(p->ctlr);
+	bool target = spi_controller_is_target(p->ctlr);
 	int ret = 0;
 
 	/* shut down frame, rx/tx and clock signals */
-	if (!slave)
+	if (!target)
 		ret = sh_msiof_modify_ctr_wait(p, SICTR_TFSE, 0);
 	if (!ret)
 		ret = sh_msiof_modify_ctr_wait(p, SICTR_TXE, 0);
 	if (rx_buf && !ret)
 		ret = sh_msiof_modify_ctr_wait(p, SICTR_RXE, 0);
-	if (!ret && !slave)
+	if (!ret && !target)
 		ret = sh_msiof_modify_ctr_wait(p, SICTR_TSCKE, 0);
 
 	return ret;
 }
 
-static int sh_msiof_slave_abort(struct spi_controller *ctlr)
+static int sh_msiof_target_abort(struct spi_controller *ctlr)
 {
 	struct sh_msiof_spi_priv *p = spi_controller_get_devdata(ctlr);
 
-	p->slave_aborted = true;
+	p->target_aborted = true;
 	complete(&p->done);
 	complete(&p->done_txdma);
 	return 0;
@@ -651,9 +651,9 @@ static int sh_msiof_slave_abort(struct spi_controller *ctlr)
 static int sh_msiof_wait_for_completion(struct sh_msiof_spi_priv *p,
 					struct completion *x)
 {
-	if (spi_controller_is_slave(p->ctlr)) {
+	if (spi_controller_is_target(p->ctlr)) {
 		if (wait_for_completion_interruptible(x) ||
-		    p->slave_aborted) {
+		    p->target_aborted) {
 			dev_dbg(&p->pdev->dev, "interrupted\n");
 			return -EINTR;
 		}
@@ -699,7 +699,7 @@ static int sh_msiof_spi_txrx_once(struct sh_msiof_spi_priv *p,
 		tx_fifo(p, tx_buf, words, fifo_shift);
 
 	reinit_completion(&p->done);
-	p->slave_aborted = false;
+	p->target_aborted = false;
 
 	ret = sh_msiof_spi_start(p, rx_buf);
 	if (ret) {
@@ -796,7 +796,7 @@ static int sh_msiof_dma_once(struct sh_msiof_spi_priv *p, const void *tx,
 	reinit_completion(&p->done);
 	if (tx)
 		reinit_completion(&p->done_txdma);
-	p->slave_aborted = false;
+	p->target_aborted = false;
 
 	/* Now start DMA */
 	if (rx)
@@ -925,7 +925,7 @@ static int sh_msiof_transfer_one(struct spi_controller *ctlr,
 	sh_msiof_spi_reset_regs(p);
 
 	/* setup clocks (clock already enabled in chipselect()) */
-	if (!spi_controller_is_slave(p->ctlr))
+	if (!spi_controller_is_target(p->ctlr))
 		sh_msiof_spi_set_clk_regs(p, t);
 
 	while (ctlr->dma_tx && len > 15) {
@@ -1101,11 +1101,11 @@ static struct sh_msiof_spi_info *sh_msiof_spi_parse_dt(struct device *dev)
 	if (!info)
 		return NULL;
 
-	info->mode = of_property_read_bool(np, "spi-slave") ? MSIOF_SPI_SLAVE
-							    : MSIOF_SPI_MASTER;
+	info->mode = of_property_read_bool(np, "spi-slave") ? MSIOF_SPI_TARGET
+							    : MSIOF_SPI_HOST;
 
 	/* Parse the MSIOF properties */
-	if (info->mode == MSIOF_SPI_MASTER)
+	if (info->mode == MSIOF_SPI_HOST)
 		of_property_read_u32(np, "num-cs", &num_cs);
 	of_property_read_u32(np, "renesas,tx-fifo-size",
 					&info->tx_fifo_override);
@@ -1279,12 +1279,12 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	if (info->mode == MSIOF_SPI_SLAVE)
-		ctlr = spi_alloc_slave(&pdev->dev,
-				       sizeof(struct sh_msiof_spi_priv));
+	if (info->mode == MSIOF_SPI_TARGET)
+		ctlr = spi_alloc_target(&pdev->dev,
+				        sizeof(struct sh_msiof_spi_priv));
 	else
-		ctlr = spi_alloc_master(&pdev->dev,
-					sizeof(struct sh_msiof_spi_priv));
+		ctlr = spi_alloc_host(&pdev->dev,
+				      sizeof(struct sh_msiof_spi_priv));
 	if (ctlr == NULL)
 		return -ENOMEM;
 
@@ -1347,7 +1347,7 @@ static int sh_msiof_spi_probe(struct platform_device *pdev)
 	ctlr->dev.of_node = pdev->dev.of_node;
 	ctlr->setup = sh_msiof_spi_setup;
 	ctlr->prepare_message = sh_msiof_prepare_message;
-	ctlr->slave_abort = sh_msiof_slave_abort;
+	ctlr->target_abort = sh_msiof_target_abort;
 	ctlr->bits_per_word_mask = chipdata->bits_per_word_mask;
 	ctlr->auto_runtime_pm = true;
 	ctlr->transfer_one = sh_msiof_transfer_one;
diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h
index dc2a0cbd210d..f950d280461b 100644
--- a/include/linux/spi/sh_msiof.h
+++ b/include/linux/spi/sh_msiof.h
@@ -3,8 +3,8 @@
 #define __SPI_SH_MSIOF_H__
 
 enum {
-	MSIOF_SPI_MASTER,
-	MSIOF_SPI_SLAVE,
+	MSIOF_SPI_HOST,
+	MSIOF_SPI_TARGET,
 };
 
 struct sh_msiof_spi_info {

From 91a940bb1075337184c3aa537c6e5b9429380400 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:51 +0800
Subject: [PATCH 1627/2047] spi: sh-sci: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230818093154.1183529-21-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sh-sci.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/spi/spi-sh-sci.c b/drivers/spi/spi-sh-sci.c
index 92ca3f2d61ba..148d615d2f38 100644
--- a/drivers/spi/spi-sh-sci.c
+++ b/drivers/spi/spi-sh-sci.c
@@ -56,17 +56,17 @@ static inline void setbits(struct sh_sci_spi *sp, int bits, int on)
 
 static inline void setsck(struct spi_device *dev, int on)
 {
-	setbits(spi_master_get_devdata(dev->master), PIN_SCK, on);
+	setbits(spi_controller_get_devdata(dev->controller), PIN_SCK, on);
 }
 
 static inline void setmosi(struct spi_device *dev, int on)
 {
-	setbits(spi_master_get_devdata(dev->master), PIN_TXD, on);
+	setbits(spi_controller_get_devdata(dev->controller), PIN_TXD, on);
 }
 
 static inline u32 getmiso(struct spi_device *dev)
 {
-	struct sh_sci_spi *sp = spi_master_get_devdata(dev->master);
+	struct sh_sci_spi *sp = spi_controller_get_devdata(dev->controller);
 
 	return (ioread8(SCSPTR(sp)) & PIN_RXD) ? 1 : 0;
 }
@@ -105,7 +105,7 @@ static u32 sh_sci_spi_txrx_mode3(struct spi_device *spi,
 
 static void sh_sci_spi_chipselect(struct spi_device *dev, int value)
 {
-	struct sh_sci_spi *sp = spi_master_get_devdata(dev->master);
+	struct sh_sci_spi *sp = spi_controller_get_devdata(dev->controller);
 
 	if (sp->info->chip_select)
 		(sp->info->chip_select)(sp->info, spi_get_chipselect(dev, 0), value);
@@ -114,18 +114,18 @@ static void sh_sci_spi_chipselect(struct spi_device *dev, int value)
 static int sh_sci_spi_probe(struct platform_device *dev)
 {
 	struct resource	*r;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct sh_sci_spi *sp;
 	int ret;
 
-	master = spi_alloc_master(&dev->dev, sizeof(struct sh_sci_spi));
-	if (master == NULL) {
-		dev_err(&dev->dev, "failed to allocate spi master\n");
+	host = spi_alloc_host(&dev->dev, sizeof(struct sh_sci_spi));
+	if (host == NULL) {
+		dev_err(&dev->dev, "failed to allocate spi host\n");
 		ret = -ENOMEM;
 		goto err0;
 	}
 
-	sp = spi_master_get_devdata(master);
+	sp = spi_controller_get_devdata(host);
 
 	platform_set_drvdata(dev, sp);
 	sp->info = dev_get_platdata(&dev->dev);
@@ -136,7 +136,7 @@ static int sh_sci_spi_probe(struct platform_device *dev)
 	}
 
 	/* setup spi bitbang adaptor */
-	sp->bitbang.master = master;
+	sp->bitbang.master = host;
 	sp->bitbang.master->bus_num = sp->info->bus_num;
 	sp->bitbang.master->num_chipselect = sp->info->num_chipselect;
 	sp->bitbang.chipselect = sh_sci_spi_chipselect;
@@ -166,7 +166,7 @@ static int sh_sci_spi_probe(struct platform_device *dev)
 	setbits(sp, PIN_INIT, 0);
 	iounmap(sp->membase);
  err1:
-	spi_master_put(sp->bitbang.master);
+	spi_controller_put(sp->bitbang.master);
  err0:
 	return ret;
 }
@@ -178,7 +178,7 @@ static void sh_sci_spi_remove(struct platform_device *dev)
 	spi_bitbang_stop(&sp->bitbang);
 	setbits(sp, PIN_INIT, 0);
 	iounmap(sp->membase);
-	spi_master_put(sp->bitbang.master);
+	spi_controller_put(sp->bitbang.master);
 }
 
 static struct platform_driver sh_sci_spi_drv = {

From 0ec6a15091a17dc5af7a4b23a2d02e554cbe3549 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:52 +0800
Subject: [PATCH 1628/2047] spi: sh: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230818093154.1183529-22-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sh.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/spi/spi-sh.c b/drivers/spi/spi-sh.c
index d358a2a9c3f5..4b873d9a7602 100644
--- a/drivers/spi/spi-sh.c
+++ b/drivers/spi/spi-sh.c
@@ -72,7 +72,7 @@
 struct spi_sh_data {
 	void __iomem *addr;
 	int irq;
-	struct spi_master *master;
+	struct spi_controller *host;
 	unsigned long cr1;
 	wait_queue_head_t wait;
 	int width;
@@ -327,7 +327,7 @@ static int spi_sh_transfer_one_message(struct spi_controller *ctlr,
 
 static int spi_sh_setup(struct spi_device *spi)
 {
-	struct spi_sh_data *ss = spi_master_get_devdata(spi->master);
+	struct spi_sh_data *ss = spi_controller_get_devdata(spi->controller);
 
 	pr_debug("%s: enter\n", __func__);
 
@@ -346,7 +346,7 @@ static int spi_sh_setup(struct spi_device *spi)
 
 static void spi_sh_cleanup(struct spi_device *spi)
 {
-	struct spi_sh_data *ss = spi_master_get_devdata(spi->master);
+	struct spi_sh_data *ss = spi_controller_get_devdata(spi->controller);
 
 	pr_debug("%s: enter\n", __func__);
 
@@ -381,14 +381,14 @@ static void spi_sh_remove(struct platform_device *pdev)
 {
 	struct spi_sh_data *ss = platform_get_drvdata(pdev);
 
-	spi_unregister_master(ss->master);
+	spi_unregister_controller(ss->host);
 	free_irq(ss->irq, ss);
 }
 
 static int spi_sh_probe(struct platform_device *pdev)
 {
 	struct resource *res;
-	struct spi_master *master;
+	struct spi_controller *host;
 	struct spi_sh_data *ss;
 	int ret, irq;
 
@@ -403,13 +403,13 @@ static int spi_sh_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	master = devm_spi_alloc_master(&pdev->dev, sizeof(struct spi_sh_data));
-	if (master == NULL) {
-		dev_err(&pdev->dev, "spi_alloc_master error.\n");
+	host = devm_spi_alloc_host(&pdev->dev, sizeof(struct spi_sh_data));
+	if (host == NULL) {
+		dev_err(&pdev->dev, "devm_spi_alloc_host error.\n");
 		return -ENOMEM;
 	}
 
-	ss = spi_master_get_devdata(master);
+	ss = spi_controller_get_devdata(host);
 	platform_set_drvdata(pdev, ss);
 
 	switch (res->flags & IORESOURCE_MEM_TYPE_MASK) {
@@ -424,7 +424,7 @@ static int spi_sh_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 	ss->irq = irq;
-	ss->master = master;
+	ss->host = host;
 	ss->addr = devm_ioremap(&pdev->dev, res->start, resource_size(res));
 	if (ss->addr == NULL) {
 		dev_err(&pdev->dev, "ioremap error.\n");
@@ -438,15 +438,15 @@ static int spi_sh_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	master->num_chipselect = 2;
-	master->bus_num = pdev->id;
-	master->setup = spi_sh_setup;
-	master->transfer_one_message = spi_sh_transfer_one_message;
-	master->cleanup = spi_sh_cleanup;
+	host->num_chipselect = 2;
+	host->bus_num = pdev->id;
+	host->setup = spi_sh_setup;
+	host->transfer_one_message = spi_sh_transfer_one_message;
+	host->cleanup = spi_sh_cleanup;
 
-	ret = spi_register_master(master);
+	ret = spi_register_controller(host);
 	if (ret < 0) {
-		printk(KERN_ERR "spi_register_master error.\n");
+		printk(KERN_ERR "spi_register_controller error.\n");
 		goto error3;
 	}
 

From 8d9ae783fb706ae98db18d050201d96a9634bd45 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:53 +0800
Subject: [PATCH 1629/2047] spi: sifive: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-23-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sifive.c | 80 ++++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/spi/spi-sifive.c b/drivers/spi/spi-sifive.c
index 2f77dae85386..cfd17bbb2202 100644
--- a/drivers/spi/spi-sifive.c
+++ b/drivers/spi/spi-sifive.c
@@ -128,9 +128,9 @@ static void sifive_spi_init(struct sifive_spi *spi)
 }
 
 static int
-sifive_spi_prepare_message(struct spi_master *master, struct spi_message *msg)
+sifive_spi_prepare_message(struct spi_controller *host, struct spi_message *msg)
 {
-	struct sifive_spi *spi = spi_master_get_devdata(master);
+	struct sifive_spi *spi = spi_controller_get_devdata(host);
 	struct spi_device *device = msg->spi;
 
 	/* Update the chip select polarity */
@@ -152,7 +152,7 @@ sifive_spi_prepare_message(struct spi_master *master, struct spi_message *msg)
 
 static void sifive_spi_set_cs(struct spi_device *device, bool is_high)
 {
-	struct sifive_spi *spi = spi_master_get_devdata(device->master);
+	struct sifive_spi *spi = spi_controller_get_devdata(device->controller);
 
 	/* Reverse polarity is handled by SCMR/CPOL. Not inverted CS. */
 	if (device->mode & SPI_CS_HIGH)
@@ -252,10 +252,10 @@ static void sifive_spi_rx(struct sifive_spi *spi, u8 *rx_ptr)
 }
 
 static int
-sifive_spi_transfer_one(struct spi_master *master, struct spi_device *device,
+sifive_spi_transfer_one(struct spi_controller *host, struct spi_device *device,
 			struct spi_transfer *t)
 {
-	struct sifive_spi *spi = spi_master_get_devdata(master);
+	struct sifive_spi *spi = spi_controller_get_devdata(host);
 	int poll = sifive_spi_prep_transfer(spi, device, t);
 	const u8 *tx_ptr = t->tx_buf;
 	u8 *rx_ptr = t->rx_buf;
@@ -294,35 +294,35 @@ static int sifive_spi_probe(struct platform_device *pdev)
 	struct sifive_spi *spi;
 	int ret, irq, num_cs;
 	u32 cs_bits, max_bits_per_word;
-	struct spi_master *master;
+	struct spi_controller *host;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct sifive_spi));
-	if (!master) {
+	host = spi_alloc_host(&pdev->dev, sizeof(struct sifive_spi));
+	if (!host) {
 		dev_err(&pdev->dev, "out of memory\n");
 		return -ENOMEM;
 	}
 
-	spi = spi_master_get_devdata(master);
+	spi = spi_controller_get_devdata(host);
 	init_completion(&spi->done);
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, host);
 
 	spi->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(spi->regs)) {
 		ret = PTR_ERR(spi->regs);
-		goto put_master;
+		goto put_host;
 	}
 
 	spi->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(spi->clk)) {
 		dev_err(&pdev->dev, "Unable to find bus clock\n");
 		ret = PTR_ERR(spi->clk);
-		goto put_master;
+		goto put_host;
 	}
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
 		ret = irq;
-		goto put_master;
+		goto put_host;
 	}
 
 	/* Optional parameters */
@@ -339,14 +339,14 @@ static int sifive_spi_probe(struct platform_device *pdev)
 	if (!ret && max_bits_per_word < 8) {
 		dev_err(&pdev->dev, "Only 8bit SPI words supported by the driver\n");
 		ret = -EINVAL;
-		goto put_master;
+		goto put_host;
 	}
 
 	/* Spin up the bus clock before hitting registers */
 	ret = clk_prepare_enable(spi->clk);
 	if (ret) {
 		dev_err(&pdev->dev, "Unable to enable bus clock\n");
-		goto put_master;
+		goto put_host;
 	}
 
 	/* probe the number of CS lines */
@@ -362,30 +362,30 @@ static int sifive_spi_probe(struct platform_device *pdev)
 
 	num_cs = ilog2(cs_bits) + 1;
 	if (num_cs > SIFIVE_SPI_MAX_CS) {
-		dev_err(&pdev->dev, "Invalid number of spi slaves\n");
+		dev_err(&pdev->dev, "Invalid number of spi targets\n");
 		ret = -EINVAL;
 		goto disable_clk;
 	}
 
-	/* Define our master */
-	master->dev.of_node = pdev->dev.of_node;
-	master->bus_num = pdev->id;
-	master->num_chipselect = num_cs;
-	master->mode_bits = SPI_CPHA | SPI_CPOL
+	/* Define our host */
+	host->dev.of_node = pdev->dev.of_node;
+	host->bus_num = pdev->id;
+	host->num_chipselect = num_cs;
+	host->mode_bits = SPI_CPHA | SPI_CPOL
 			  | SPI_CS_HIGH | SPI_LSB_FIRST
 			  | SPI_TX_DUAL | SPI_TX_QUAD
 			  | SPI_RX_DUAL | SPI_RX_QUAD;
 	/* TODO: add driver support for bits_per_word < 8
 	 * we need to "left-align" the bits (unless SPI_LSB_FIRST)
 	 */
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->flags = SPI_CONTROLLER_MUST_TX | SPI_CONTROLLER_GPIO_SS;
-	master->prepare_message = sifive_spi_prepare_message;
-	master->set_cs = sifive_spi_set_cs;
-	master->transfer_one = sifive_spi_transfer_one;
+	host->bits_per_word_mask = SPI_BPW_MASK(8);
+	host->flags = SPI_CONTROLLER_MUST_TX | SPI_CONTROLLER_GPIO_SS;
+	host->prepare_message = sifive_spi_prepare_message;
+	host->set_cs = sifive_spi_set_cs;
+	host->transfer_one = sifive_spi_transfer_one;
 
 	pdev->dev.dma_mask = NULL;
-	/* Configure the SPI master hardware */
+	/* Configure the SPI host hardware */
 	sifive_spi_init(spi);
 
 	/* Register for SPI Interrupt */
@@ -397,11 +397,11 @@ static int sifive_spi_probe(struct platform_device *pdev)
 	}
 
 	dev_info(&pdev->dev, "mapped; irq=%d, cs=%d\n",
-		 irq, master->num_chipselect);
+		 irq, host->num_chipselect);
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	ret = devm_spi_register_controller(&pdev->dev, host);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "spi_register_master failed\n");
+		dev_err(&pdev->dev, "spi_register_host failed\n");
 		goto disable_clk;
 	}
 
@@ -409,16 +409,16 @@ static int sifive_spi_probe(struct platform_device *pdev)
 
 disable_clk:
 	clk_disable_unprepare(spi->clk);
-put_master:
-	spi_master_put(master);
+put_host:
+	spi_controller_put(host);
 
 	return ret;
 }
 
 static void sifive_spi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct sifive_spi *spi = spi_master_get_devdata(master);
+	struct spi_controller *host = platform_get_drvdata(pdev);
+	struct sifive_spi *spi = spi_controller_get_devdata(host);
 
 	/* Disable all the interrupts just in case */
 	sifive_spi_write(spi, SIFIVE_SPI_REG_IE, 0);
@@ -427,11 +427,11 @@ static void sifive_spi_remove(struct platform_device *pdev)
 
 static int sifive_spi_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct sifive_spi *spi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct sifive_spi *spi = spi_controller_get_devdata(host);
 	int ret;
 
-	ret = spi_master_suspend(master);
+	ret = spi_controller_suspend(host);
 	if (ret)
 		return ret;
 
@@ -445,14 +445,14 @@ static int sifive_spi_suspend(struct device *dev)
 
 static int sifive_spi_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct sifive_spi *spi = spi_master_get_devdata(master);
+	struct spi_controller *host = dev_get_drvdata(dev);
+	struct sifive_spi *spi = spi_controller_get_devdata(host);
 	int ret;
 
 	ret = clk_prepare_enable(spi->clk);
 	if (ret)
 		return ret;
-	ret = spi_master_resume(master);
+	ret = spi_controller_resume(host);
 	if (ret)
 		clk_disable_unprepare(spi->clk);
 

From 557efcf4c64f27f1b69af464246bf90b4d52e5be Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Aug 2023 17:31:54 +0800
Subject: [PATCH 1630/2047] spi: spi-sn-f-ospi: switch to use modern name

Change legacy name master to modern name host or controller.

No functional changed.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20230818093154.1183529-24-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sn-f-ospi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-sn-f-ospi.c b/drivers/spi/spi-sn-f-ospi.c
index 85e4a01bc8b0..a7c3b3923b4a 100644
--- a/drivers/spi/spi-sn-f-ospi.c
+++ b/drivers/spi/spi-sn-f-ospi.c
@@ -501,7 +501,7 @@ out:
 
 static int f_ospi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 {
-	struct f_ospi *ospi = spi_controller_get_devdata(mem->spi->master);
+	struct f_ospi *ospi = spi_controller_get_devdata(mem->spi->controller);
 	int err = 0;
 
 	switch (op->data.dir) {
@@ -606,7 +606,7 @@ static int f_ospi_probe(struct platform_device *pdev)
 	u32 num_cs = OSPI_NUM_CS;
 	int ret;
 
-	ctlr = spi_alloc_master(dev, sizeof(*ospi));
+	ctlr = spi_alloc_host(dev, sizeof(*ospi));
 	if (!ctlr)
 		return -ENOMEM;
 

From 17a7ca35890b411414a71fbeef13cb65fe9511df Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Thu, 17 Aug 2023 10:52:28 +0200
Subject: [PATCH 1631/2047] gpiolib: rename the gpio_device notifier

Change the generic "notifier" name to "line_state_notifier" in order to
reflect its purpose in preparation for adding a second notifier which
will be used to notify wait queues about device unregistering.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Kent Gibson <warthog618@gmail.com>
---
 drivers/gpio/gpiolib-cdev.c | 16 ++++++++--------
 drivers/gpio/gpiolib.c      |  6 +++---
 drivers/gpio/gpiolib.h      |  6 +++---
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 0a33971c964c..9ee8604f32e1 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -230,7 +230,7 @@ static long linehandle_set_config(struct linehandle_state *lh,
 				return ret;
 		}
 
-		blocking_notifier_call_chain(&desc->gdev->notifier,
+		blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
 					     GPIO_V2_LINE_CHANGED_CONFIG,
 					     desc);
 	}
@@ -414,7 +414,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
 				goto out_free_lh;
 		}
 
-		blocking_notifier_call_chain(&desc->gdev->notifier,
+		blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
 					     GPIO_V2_LINE_CHANGED_REQUESTED, desc);
 
 		dev_dbg(&gdev->dev, "registered chardev handle for line %d\n",
@@ -1407,7 +1407,7 @@ static long linereq_set_config_unlocked(struct linereq *lr,
 
 		WRITE_ONCE(line->edflags, edflags);
 
-		blocking_notifier_call_chain(&desc->gdev->notifier,
+		blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
 					     GPIO_V2_LINE_CHANGED_CONFIG,
 					     desc);
 	}
@@ -1720,7 +1720,7 @@ static int linereq_create(struct gpio_device *gdev, void __user *ip)
 
 		lr->lines[i].edflags = edflags;
 
-		blocking_notifier_call_chain(&desc->gdev->notifier,
+		blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
 					     GPIO_V2_LINE_CHANGED_REQUESTED, desc);
 
 		dev_dbg(&gdev->dev, "registered chardev handle for line %d\n",
@@ -2117,7 +2117,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
 	if (ret)
 		goto out_free_le;
 
-	blocking_notifier_call_chain(&desc->gdev->notifier,
+	blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
 				     GPIO_V2_LINE_CHANGED_REQUESTED, desc);
 
 	irq = gpiod_to_irq(desc);
@@ -2671,7 +2671,7 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file)
 	cdev->gdev = gpio_device_get(gdev);
 
 	cdev->lineinfo_changed_nb.notifier_call = lineinfo_changed_notify;
-	ret = blocking_notifier_chain_register(&gdev->notifier,
+	ret = blocking_notifier_chain_register(&gdev->line_state_notifier,
 					       &cdev->lineinfo_changed_nb);
 	if (ret)
 		goto out_free_bitmap;
@@ -2687,7 +2687,7 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file)
 	return ret;
 
 out_unregister_notifier:
-	blocking_notifier_chain_unregister(&gdev->notifier,
+	blocking_notifier_chain_unregister(&gdev->line_state_notifier,
 					   &cdev->lineinfo_changed_nb);
 out_free_bitmap:
 	gpio_device_put(gdev);
@@ -2711,7 +2711,7 @@ static int gpio_chrdev_release(struct inode *inode, struct file *file)
 	struct gpio_device *gdev = cdev->gdev;
 
 	bitmap_free(cdev->watched_lines);
-	blocking_notifier_chain_unregister(&gdev->notifier,
+	blocking_notifier_chain_unregister(&gdev->line_state_notifier,
 					   &cdev->lineinfo_changed_nb);
 	gpio_device_put(gdev);
 	kfree(cdev);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index edab00c9cb3c..38640df1d798 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -847,7 +847,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
-	BLOCKING_INIT_NOTIFIER_HEAD(&gdev->notifier);
+	BLOCKING_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
 	init_rwsem(&gdev->sem);
 
 #ifdef CONFIG_PINCTRL
@@ -2177,7 +2177,7 @@ static bool gpiod_free_commit(struct gpio_desc *desc)
 	}
 
 	spin_unlock_irqrestore(&gpio_lock, flags);
-	blocking_notifier_call_chain(&desc->gdev->notifier,
+	blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
 				     GPIOLINE_CHANGED_RELEASED, desc);
 
 	return ret;
@@ -4007,7 +4007,7 @@ static struct gpio_desc *gpiod_find_and_request(struct device *consumer,
 		return ERR_PTR(ret);
 	}
 
-	blocking_notifier_call_chain(&desc->gdev->notifier,
+	blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
 				     GPIOLINE_CHANGED_REQUESTED, desc);
 
 	return desc;
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 08e8e8274883..11a5e02f317d 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -39,8 +39,8 @@
  * or name of the IP component in a System on Chip.
  * @data: per-instance data assigned by the driver
  * @list: links gpio_device:s together for traversal
- * @notifier: used to notify subscribers about lines being requested, released
- *            or reconfigured
+ * @line_state_notifier: used to notify subscribers about lines being
+ *                       requested, released or reconfigured
  * @sem: protects the structure from a NULL-pointer dereference of @chip by
  *       user-space operations when the device gets unregistered during
  *       a hot-unplug event
@@ -64,7 +64,7 @@ struct gpio_device {
 	const char		*label;
 	void			*data;
 	struct list_head        list;
-	struct blocking_notifier_head notifier;
+	struct blocking_notifier_head line_state_notifier;
 	struct rw_semaphore	sem;
 
 #ifdef CONFIG_PINCTRL

From e82bbd6761f7e313141c4033ebb79e3d397b6a9c Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Thu, 17 Aug 2023 13:42:34 +0200
Subject: [PATCH 1632/2047] gpio: cdev: open-code to_gpio_chardev_data()

This function is a wrapper around container_of(). It's used only once and
we will have a second notifier soon, so instead of having two flavors of
this helper, let's just open-code where needed.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Kent Gibson <warthog618@gmail.com>
---
 drivers/gpio/gpiolib-cdev.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 9ee8604f32e1..91bdd3b928b5 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -2491,16 +2491,11 @@ static long gpio_ioctl_compat(struct file *file, unsigned int cmd,
 }
 #endif
 
-static struct gpio_chardev_data *
-to_gpio_chardev_data(struct notifier_block *nb)
-{
-	return container_of(nb, struct gpio_chardev_data, lineinfo_changed_nb);
-}
-
 static int lineinfo_changed_notify(struct notifier_block *nb,
 				   unsigned long action, void *data)
 {
-	struct gpio_chardev_data *cdev = to_gpio_chardev_data(nb);
+	struct gpio_chardev_data *cdev =
+		container_of(nb, struct gpio_chardev_data, lineinfo_changed_nb);
 	struct gpio_v2_line_info_changed chg;
 	struct gpio_desc *desc = data;
 	int ret;

From a067419ba77da830939852758702388f0fba09a1 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Thu, 17 Aug 2023 13:59:05 +0200
Subject: [PATCH 1633/2047] gpiolib: add a second blocking notifier to struct
 gpio_device

Add a new blocking notifier to struct gpio_device and use it to notify
subscribers about the GPIO device being unregistered from the device
model.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Kent Gibson <warthog618@gmail.com>
---
 drivers/gpio/gpiolib-cdev.c | 1 +
 drivers/gpio/gpiolib.c      | 1 +
 drivers/gpio/gpiolib.h      | 3 +++
 3 files changed, 5 insertions(+)

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 91bdd3b928b5..fec446510028 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -2748,4 +2748,5 @@ int gpiolib_cdev_register(struct gpio_device *gdev, dev_t devt)
 void gpiolib_cdev_unregister(struct gpio_device *gdev)
 {
 	cdev_device_del(&gdev->chrdev, &gdev->dev);
+	blocking_notifier_call_chain(&gdev->device_notifier, 0, NULL);
 }
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 38640df1d798..8f74e0bb2a5c 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -848,6 +848,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
 	BLOCKING_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
+	BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier);
 	init_rwsem(&gdev->sem);
 
 #ifdef CONFIG_PINCTRL
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 11a5e02f317d..e860ed434bc2 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -41,6 +41,8 @@
  * @list: links gpio_device:s together for traversal
  * @line_state_notifier: used to notify subscribers about lines being
  *                       requested, released or reconfigured
+ * @device_notifier: used to notify character device wait queues about the GPIO
+ *                   device being unregistered
  * @sem: protects the structure from a NULL-pointer dereference of @chip by
  *       user-space operations when the device gets unregistered during
  *       a hot-unplug event
@@ -65,6 +67,7 @@ struct gpio_device {
 	void			*data;
 	struct list_head        list;
 	struct blocking_notifier_head line_state_notifier;
+	struct blocking_notifier_head device_notifier;
 	struct rw_semaphore	sem;
 
 #ifdef CONFIG_PINCTRL

From d2e2586a3292e05d4872e9111e8b005bc4ff4d09 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Thu, 17 Aug 2023 14:42:24 +0200
Subject: [PATCH 1634/2047] gpio: cdev: wake up chardev poll() on device unbind

Add a notifier block to the gpio_chardev_data structure and register it
with the gpio_device's device notifier. Upon reception of an event, wake
up the wait queue so that the user-space be forced out of poll() and need
to go into a new system call which will then fail due to the chip being
gone.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Kent Gibson <warthog618@gmail.com>
---
 drivers/gpio/gpiolib-cdev.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index fec446510028..bc1542544e49 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -2320,6 +2320,7 @@ struct gpio_chardev_data {
 	wait_queue_head_t wait;
 	DECLARE_KFIFO(events, struct gpio_v2_line_info_changed, 32);
 	struct notifier_block lineinfo_changed_nb;
+	struct notifier_block device_unregistered_nb;
 	unsigned long *watched_lines;
 #ifdef CONFIG_GPIO_CDEV_V1
 	atomic_t watch_abi_version;
@@ -2517,6 +2518,18 @@ static int lineinfo_changed_notify(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+static int gpio_device_unregistered_notify(struct notifier_block *nb,
+					   unsigned long action, void *data)
+{
+	struct gpio_chardev_data *cdev = container_of(nb,
+						      struct gpio_chardev_data,
+						      device_unregistered_nb);
+
+	wake_up_poll(&cdev->wait, EPOLLIN | EPOLLERR);
+
+	return NOTIFY_OK;
+}
+
 static __poll_t lineinfo_watch_poll_unlocked(struct file *file,
 					     struct poll_table_struct *pollt)
 {
@@ -2671,17 +2684,27 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file)
 	if (ret)
 		goto out_free_bitmap;
 
+	cdev->device_unregistered_nb.notifier_call =
+					gpio_device_unregistered_notify;
+	ret = blocking_notifier_chain_register(&gdev->device_notifier,
+					       &cdev->device_unregistered_nb);
+	if (ret)
+		goto out_unregister_line_notifier;
+
 	file->private_data = cdev;
 
 	ret = nonseekable_open(inode, file);
 	if (ret)
-		goto out_unregister_notifier;
+		goto out_unregister_device_notifier;
 
 	up_read(&gdev->sem);
 
 	return ret;
 
-out_unregister_notifier:
+out_unregister_device_notifier:
+	blocking_notifier_chain_unregister(&gdev->device_notifier,
+					   &cdev->device_unregistered_nb);
+out_unregister_line_notifier:
 	blocking_notifier_chain_unregister(&gdev->line_state_notifier,
 					   &cdev->lineinfo_changed_nb);
 out_free_bitmap:
@@ -2706,6 +2729,8 @@ static int gpio_chrdev_release(struct inode *inode, struct file *file)
 	struct gpio_device *gdev = cdev->gdev;
 
 	bitmap_free(cdev->watched_lines);
+	blocking_notifier_chain_unregister(&gdev->device_notifier,
+					   &cdev->device_unregistered_nb);
 	blocking_notifier_chain_unregister(&gdev->line_state_notifier,
 					   &cdev->lineinfo_changed_nb);
 	gpio_device_put(gdev);

From a0dda508bd66b9eeeedc6fe0597aa8613f4b3c5d Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Thu, 17 Aug 2023 15:18:58 +0200
Subject: [PATCH 1635/2047] gpio: cdev: wake up linereq poll() on device unbind

Add a notifier block to the linereq structure and register it with the
gpio_device's device notifier. Upon reception of an event, wake up the
wait queue so that the user-space be forced out of poll() and need to go
into a new system call which will then fail due to the chip being gone.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Kent Gibson <warthog618@gmail.com>
---
 drivers/gpio/gpiolib-cdev.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index bc1542544e49..7d0c83127ccb 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -555,6 +555,7 @@ struct line {
  * @label: consumer label used to tag GPIO descriptors
  * @num_lines: the number of lines in the lines array
  * @wait: wait queue that handles blocking reads of events
+ * @device_unregistered_nb: notifier block for receiving gdev unregister events
  * @event_buffer_size: the number of elements allocated in @events
  * @events: KFIFO for the GPIO events
  * @seqno: the sequence number for edge events generated on all lines in
@@ -569,6 +570,7 @@ struct linereq {
 	const char *label;
 	u32 num_lines;
 	wait_queue_head_t wait;
+	struct notifier_block device_unregistered_nb;
 	u32 event_buffer_size;
 	DECLARE_KFIFO_PTR(events, struct gpio_v2_line_event);
 	atomic_t seqno;
@@ -610,6 +612,17 @@ struct linereq {
 	 GPIO_V2_LINE_FLAG_EVENT_CLOCK_HTE | \
 	 GPIO_V2_LINE_EDGE_FLAGS)
 
+static int linereq_unregistered_notify(struct notifier_block *nb,
+				       unsigned long action, void *data)
+{
+	struct linereq *lr = container_of(nb, struct linereq,
+					  device_unregistered_nb);
+
+	wake_up_poll(&lr->wait, EPOLLIN | EPOLLERR);
+
+	return NOTIFY_OK;
+}
+
 static void linereq_put_event(struct linereq *lr,
 			      struct gpio_v2_line_event *le)
 {
@@ -1567,6 +1580,10 @@ static void linereq_free(struct linereq *lr)
 {
 	unsigned int i;
 
+	if (lr->device_unregistered_nb.notifier_call)
+		blocking_notifier_chain_unregister(&lr->gdev->device_notifier,
+						   &lr->device_unregistered_nb);
+
 	for (i = 0; i < lr->num_lines; i++) {
 		if (lr->lines[i].desc) {
 			edge_detector_stop(&lr->lines[i]);
@@ -1727,6 +1744,12 @@ static int linereq_create(struct gpio_device *gdev, void __user *ip)
 			offset);
 	}
 
+	lr->device_unregistered_nb.notifier_call = linereq_unregistered_notify;
+	ret = blocking_notifier_chain_register(&gdev->device_notifier,
+					       &lr->device_unregistered_nb);
+	if (ret)
+		goto out_free_linereq;
+
 	fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
 	if (fd < 0) {
 		ret = fd;

From 91043f559313fb04ac18253fcf18561a40c1b045 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Thu, 17 Aug 2023 15:28:31 +0200
Subject: [PATCH 1636/2047] gpio: cdev: wake up lineevent poll() on device
 unbind

Add a notifier block to the lineevent_state structure and register it
with the gpio_device's device notifier. Upon reception of an event, wake
up the wait queue so that the user-space be forced out of poll() and
need to go into a new system call which will then fail due to the chip
being gone.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Kent Gibson <warthog618@gmail.com>
---
 drivers/gpio/gpiolib-cdev.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 7d0c83127ccb..35dcaf78aed6 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -1802,6 +1802,7 @@ out_free_linereq:
  * @eflags: the event flags this line was requested with
  * @irq: the interrupt that trigger in response to events on this GPIO
  * @wait: wait queue that handles blocking reads of events
+ * @device_unregistered_nb: notifier block for receiving gdev unregister events
  * @events: KFIFO for the GPIO events
  * @timestamp: cache for the timestamp storing it between hardirq
  * and IRQ thread, used to bring the timestamp close to the actual
@@ -1814,6 +1815,7 @@ struct lineevent_state {
 	u32 eflags;
 	int irq;
 	wait_queue_head_t wait;
+	struct notifier_block device_unregistered_nb;
 	DECLARE_KFIFO(events, struct gpioevent_data, 16);
 	u64 timestamp;
 };
@@ -1847,6 +1849,17 @@ static __poll_t lineevent_poll(struct file *file,
 	return call_poll_locked(file, wait, le->gdev, lineevent_poll_unlocked);
 }
 
+static int lineevent_unregistered_notify(struct notifier_block *nb,
+					 unsigned long action, void *data)
+{
+	struct lineevent_state *le = container_of(nb, struct lineevent_state,
+						  device_unregistered_nb);
+
+	wake_up_poll(&le->wait, EPOLLIN | EPOLLERR);
+
+	return NOTIFY_OK;
+}
+
 struct compat_gpioeevent_data {
 	compat_u64	timestamp;
 	u32		id;
@@ -1932,6 +1945,9 @@ static ssize_t lineevent_read(struct file *file, char __user *buf,
 
 static void lineevent_free(struct lineevent_state *le)
 {
+	if (le->device_unregistered_nb.notifier_call)
+		blocking_notifier_chain_unregister(&le->gdev->device_notifier,
+						   &le->device_unregistered_nb);
 	if (le->irq)
 		free_irq(le->irq, le);
 	if (le->desc)
@@ -2160,6 +2176,12 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
 	INIT_KFIFO(le->events);
 	init_waitqueue_head(&le->wait);
 
+	le->device_unregistered_nb.notifier_call = lineevent_unregistered_notify;
+	ret = blocking_notifier_chain_register(&gdev->device_notifier,
+					       &le->device_unregistered_nb);
+	if (ret)
+		goto out_free_le;
+
 	/* Request a thread to read the events */
 	ret = request_threaded_irq(irq,
 				   lineevent_irq_handler,

From 187b4c0d34e34cc4a275fd98393a3bc21a460a66 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Mon, 31 Jul 2023 11:00:37 +0800
Subject: [PATCH 1637/2047] xen: Fix one kernel-doc comment

Use colon to separate parameter name from their specific meaning.
silence the warning:

drivers/xen/grant-table.c:1051: warning: Function parameter or member 'nr_pages' not described in 'gnttab_free_pages'

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=6030
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Acked-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20230731030037.123946-1-yang.lee@linux.alibaba.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/grant-table.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index f13c3b76ad1e..35659bf70746 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1044,7 +1044,7 @@ EXPORT_SYMBOL_GPL(gnttab_pages_clear_private);
 
 /**
  * gnttab_free_pages - free pages allocated by gnttab_alloc_pages()
- * @nr_pages; number of pages to free
+ * @nr_pages: number of pages to free
  * @pages: the pages
  */
 void gnttab_free_pages(int nr_pages, struct page **pages)

From 25e324bc9cf2ee956eec1db384c39c1a17b7c44a Mon Sep 17 00:00:00 2001
From: Rae Moar <rmoar@google.com>
Date: Thu, 17 Aug 2023 19:14:51 +0000
Subject: [PATCH 1638/2047] kunit: fix struct kunit_attr header

Add parameter descriptions to struct kunit_attr header for the
parameters attr_default and print.

Fixes: 39e92cb1e4a1 ("kunit: Add test attributes API structure")

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202308180127.VD7YRPGa-lkp@intel.com/

Signed-off-by: Rae Moar <rmoar@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 lib/kunit/attributes.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/kunit/attributes.c b/lib/kunit/attributes.c
index 5e3034b6be99..1b512f7e1838 100644
--- a/lib/kunit/attributes.c
+++ b/lib/kunit/attributes.c
@@ -30,6 +30,8 @@ enum print_ops {
  * attribute value
  * @filter: function to indicate whether a given attribute value passes a
  * filter
+ * @attr_default: default attribute value used during filtering
+ * @print: value of enum print_ops to indicate when to print attribute
  */
 struct kunit_attr {
 	const char *name;

From f19df6e4de64b7fc6d71f192aa9ff3b701e4bade Mon Sep 17 00:00:00 2001
From: David Michael <fedora.dm0@gmail.com>
Date: Tue, 15 Aug 2023 21:42:41 -0400
Subject: [PATCH 1639/2047] drm/panfrost: Skip speed binning on EOPNOTSUPP

Encountered on an ARM Mali-T760 MP4, attempting to read the nvmem
variable can also return EOPNOTSUPP instead of ENOENT when speed
binning is unsupported.

Cc: <stable@vger.kernel.org>
Fixes: 7d690f936e9b ("drm/panfrost: Add basic support for speed binning")
Signed-off-by: David Michael <fedora.dm0@gmail.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/87msyryd7y.fsf@gmail.com
---
 drivers/gpu/drm/panfrost/panfrost_devfreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
index 58dfb15a8757..e78de99e9933 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -96,7 +96,7 @@ static int panfrost_read_speedbin(struct device *dev)
 		 * keep going without it; any other error means that we are
 		 * supposed to read the bin value, but we failed doing so.
 		 */
-		if (ret != -ENOENT) {
+		if (ret != -ENOENT && ret != -EOPNOTSUPP) {
 			DRM_DEV_ERROR(dev, "Cannot read speed-bin (%d).", ret);
 			return ret;
 		}

From c2489bb7e6be2e8cdced12c16c42fa128403ac03 Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Fri, 18 Aug 2023 10:26:45 +0800
Subject: [PATCH 1640/2047] tracing: Introduce pipe_cpumask to avoid race on
 trace_pipes

There is race issue when concurrently splice_read main trace_pipe and
per_cpu trace_pipes which will result in data read out being different
from what actually writen.

As suggested by Steven:
  > I believe we should add a ref count to trace_pipe and the per_cpu
  > trace_pipes, where if they are opened, nothing else can read it.
  >
  > Opening trace_pipe locks all per_cpu ref counts, if any of them are
  > open, then the trace_pipe open will fail (and releases any ref counts
  > it had taken).
  >
  > Opening a per_cpu trace_pipe will up the ref count for just that
  > CPU buffer. This will allow multiple tasks to read different per_cpu
  > trace_pipe files, but will prevent the main trace_pipe file from
  > being opened.

But because we only need to know whether per_cpu trace_pipe is open or
not, using a cpumask instead of using ref count may be easier.

After this patch, users will find that:
 - Main trace_pipe can be opened by only one user, and if it is
   opened, all per_cpu trace_pipes cannot be opened;
 - Per_cpu trace_pipes can be opened by multiple users, but each per_cpu
   trace_pipe can only be opened by one user. And if one of them is
   opened, main trace_pipe cannot be opened.

Link: https://lore.kernel.org/linux-trace-kernel/20230818022645.1948314-1-zhengyejian1@huawei.com

Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 55 ++++++++++++++++++++++++++++++++++++++------
 kernel/trace/trace.h |  2 ++
 2 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 29a2e4d7886d..8e64aaad5361 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6718,10 +6718,36 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
 
 #endif
 
+static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
+{
+	if (cpu == RING_BUFFER_ALL_CPUS) {
+		if (cpumask_empty(tr->pipe_cpumask)) {
+			cpumask_setall(tr->pipe_cpumask);
+			return 0;
+		}
+	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
+		cpumask_set_cpu(cpu, tr->pipe_cpumask);
+		return 0;
+	}
+	return -EBUSY;
+}
+
+static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
+{
+	if (cpu == RING_BUFFER_ALL_CPUS) {
+		WARN_ON(!cpumask_full(tr->pipe_cpumask));
+		cpumask_clear(tr->pipe_cpumask);
+	} else {
+		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
+		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
+	}
+}
+
 static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
 	struct trace_array *tr = inode->i_private;
 	struct trace_iterator *iter;
+	int cpu;
 	int ret;
 
 	ret = tracing_check_open_get_tr(tr);
@@ -6729,13 +6755,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 		return ret;
 
 	mutex_lock(&trace_types_lock);
+	cpu = tracing_get_cpu(inode);
+	ret = open_pipe_on_cpu(tr, cpu);
+	if (ret)
+		goto fail_pipe_on_cpu;
 
 	/* create a buffer to store the information to pass to userspace */
 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
 	if (!iter) {
 		ret = -ENOMEM;
-		__trace_array_put(tr);
-		goto out;
+		goto fail_alloc_iter;
 	}
 
 	trace_seq_init(&iter->seq);
@@ -6758,7 +6787,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 
 	iter->tr = tr;
 	iter->array_buffer = &tr->array_buffer;
-	iter->cpu_file = tracing_get_cpu(inode);
+	iter->cpu_file = cpu;
 	mutex_init(&iter->mutex);
 	filp->private_data = iter;
 
@@ -6768,12 +6797,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	nonseekable_open(inode, filp);
 
 	tr->trace_ref++;
-out:
+
 	mutex_unlock(&trace_types_lock);
 	return ret;
 
 fail:
 	kfree(iter);
+fail_alloc_iter:
+	close_pipe_on_cpu(tr, cpu);
+fail_pipe_on_cpu:
 	__trace_array_put(tr);
 	mutex_unlock(&trace_types_lock);
 	return ret;
@@ -6790,7 +6822,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
 
 	if (iter->trace->pipe_close)
 		iter->trace->pipe_close(iter);
-
+	close_pipe_on_cpu(tr, iter->cpu_file);
 	mutex_unlock(&trace_types_lock);
 
 	free_cpumask_var(iter->started);
@@ -9454,6 +9486,9 @@ static struct trace_array *trace_array_create(const char *name)
 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
 		goto out_free_tr;
 
+	if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
+		goto out_free_tr;
+
 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
 
 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
@@ -9495,6 +9530,7 @@ static struct trace_array *trace_array_create(const char *name)
  out_free_tr:
 	ftrace_free_ftrace_ops(tr);
 	free_trace_buffers(tr);
+	free_cpumask_var(tr->pipe_cpumask);
 	free_cpumask_var(tr->tracing_cpumask);
 	kfree(tr->name);
 	kfree(tr);
@@ -9597,6 +9633,7 @@ static int __remove_instance(struct trace_array *tr)
 	}
 	kfree(tr->topts);
 
+	free_cpumask_var(tr->pipe_cpumask);
 	free_cpumask_var(tr->tracing_cpumask);
 	kfree(tr->name);
 	kfree(tr);
@@ -10394,12 +10431,14 @@ __init static int tracer_alloc_buffers(void)
 	if (trace_create_savedcmd() < 0)
 		goto out_free_temp_buffer;
 
+	if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
+		goto out_free_savedcmd;
+
 	/* TODO: make the number of buffers hot pluggable with CPUS */
 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
-		goto out_free_savedcmd;
+		goto out_free_pipe_cpumask;
 	}
-
 	if (global_trace.buffer_disabled)
 		tracing_off();
 
@@ -10452,6 +10491,8 @@ __init static int tracer_alloc_buffers(void)
 
 	return 0;
 
+out_free_pipe_cpumask:
+	free_cpumask_var(global_trace.pipe_cpumask);
 out_free_savedcmd:
 	free_saved_cmdlines_buffer(savedcmd);
 out_free_temp_buffer:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 95956f75bea5..73eaec158473 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -377,6 +377,8 @@ struct trace_array {
 	struct list_head	events;
 	struct trace_event_file *trace_marker_file;
 	cpumask_var_t		tracing_cpumask; /* only trace on set CPUs */
+	/* one per_cpu trace_pipe can be opened by only one user */
+	cpumask_var_t		pipe_cpumask;
 	int			ref;
 	int			trace_ref;
 #ifdef CONFIG_FUNCTION_TRACER

From 5522d9f7b2e6d21dfa361cc498719b7f5e736b57 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 21 Aug 2023 15:13:22 +0100
Subject: [PATCH 1641/2047] libfs: Convert simple_write_begin and
 simple_write_end to use a folio

Remove a number of implicit calls to compound_head() and various calls
to compatibility functions.  This is not sufficient to enable support
for large folios; generic_perform_write() must be converted first.

Signed-off-by: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Message-Id: <20230821141322.2535459-1-willy@infradead.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/libfs.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index 0ea7fedb0b7a..d80374ede638 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -548,21 +548,20 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len,
 			struct page **pagep, void **fsdata)
 {
-	struct page *page;
-	pgoff_t index;
+	struct folio *folio;
 
-	index = pos >> PAGE_SHIFT;
+	folio = __filemap_get_folio(mapping, pos / PAGE_SIZE, FGP_WRITEBEGIN,
+			mapping_gfp_mask(mapping));
+	if (IS_ERR(folio))
+		return PTR_ERR(folio);
 
-	page = grab_cache_page_write_begin(mapping, index);
-	if (!page)
-		return -ENOMEM;
+	*pagep = &folio->page;
 
-	*pagep = page;
+	if (!folio_test_uptodate(folio) && (len != folio_size(folio))) {
+		size_t from = offset_in_folio(folio, pos);
 
-	if (!PageUptodate(page) && (len != PAGE_SIZE)) {
-		unsigned from = pos & (PAGE_SIZE - 1);
-
-		zero_user_segments(page, 0, from, from + len, PAGE_SIZE);
+		folio_zero_segments(folio, 0, from,
+				from + len, folio_size(folio));
 	}
 	return 0;
 }
@@ -594,17 +593,18 @@ static int simple_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
 {
-	struct inode *inode = page->mapping->host;
+	struct folio *folio = page_folio(page);
+	struct inode *inode = folio->mapping->host;
 	loff_t last_pos = pos + copied;
 
-	/* zero the stale part of the page if we did a short copy */
-	if (!PageUptodate(page)) {
+	/* zero the stale part of the folio if we did a short copy */
+	if (!folio_test_uptodate(folio)) {
 		if (copied < len) {
-			unsigned from = pos & (PAGE_SIZE - 1);
+			size_t from = offset_in_folio(folio, pos);
 
-			zero_user(page, from + copied, len - copied);
+			folio_zero_range(folio, from + copied, len - copied);
 		}
-		SetPageUptodate(page);
+		folio_mark_uptodate(folio);
 	}
 	/*
 	 * No need to use i_size_read() here, the i_size
@@ -613,9 +613,9 @@ static int simple_write_end(struct file *file, struct address_space *mapping,
 	if (last_pos > inode->i_size)
 		i_size_write(inode, last_pos);
 
-	set_page_dirty(page);
-	unlock_page(page);
-	put_page(page);
+	folio_mark_dirty(folio);
+	folio_unlock(folio);
+	folio_put(folio);
 
 	return copied;
 }

From 781ca6027ed7b83b97184a0606b703794da1e4be Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 21 Aug 2023 15:15:41 +0100
Subject: [PATCH 1642/2047] splice: Convert page_cache_pipe_buf_confirm() to
 use a folio

Convert buf->page to a folio once instead of five times.  There's only
one uptodate bit per folio, not per page, so we lose nothing here.

Signed-off-by: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Message-Id: <20230821141541.2535953-1-willy@infradead.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/splice.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index 378fedb392ae..fa4f51055e9f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -120,17 +120,17 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
 static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
 				       struct pipe_buffer *buf)
 {
-	struct page *page = buf->page;
+	struct folio *folio = page_folio(buf->page);
 	int err;
 
-	if (!PageUptodate(page)) {
-		lock_page(page);
+	if (!folio_test_uptodate(folio)) {
+		folio_lock(folio);
 
 		/*
-		 * Page got truncated/unhashed. This will cause a 0-byte
+		 * Folio got truncated/unhashed. This will cause a 0-byte
 		 * splice, if this is the first page.
 		 */
-		if (!page->mapping) {
+		if (!folio->mapping) {
 			err = -ENODATA;
 			goto error;
 		}
@@ -138,20 +138,18 @@ static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
 		/*
 		 * Uh oh, read-error from disk.
 		 */
-		if (!PageUptodate(page)) {
+		if (!folio_test_uptodate(folio)) {
 			err = -EIO;
 			goto error;
 		}
 
-		/*
-		 * Page is ok afterall, we are done.
-		 */
-		unlock_page(page);
+		/* Folio is ok after all, we are done */
+		folio_unlock(folio);
 	}
 
 	return 0;
 error:
-	unlock_page(page);
+	folio_unlock(folio);
 	return err;
 }
 

From a370167fe526123637965f60859a9f1f3e1a58b7 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 17 Aug 2023 17:13:31 +0300
Subject: [PATCH 1643/2047] io_uring: rename kiocb_end_write() local helper

This helper does not take a kiocb as input and we want to create a
common helper by that name that takes a kiocb as input.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Message-Id: <20230817141337.1025891-2-amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 io_uring/rw.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/io_uring/rw.c b/io_uring/rw.c
index 1bce2208b65c..749ebd565839 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -220,7 +220,7 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
 }
 #endif
 
-static void kiocb_end_write(struct io_kiocb *req)
+static void io_req_end_write(struct io_kiocb *req)
 {
 	/*
 	 * Tell lockdep we inherited freeze protection from submission
@@ -243,7 +243,7 @@ static void io_req_io_end(struct io_kiocb *req)
 	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 
 	if (rw->kiocb.ki_flags & IOCB_WRITE) {
-		kiocb_end_write(req);
+		io_req_end_write(req);
 		fsnotify_modify(req->file);
 	} else {
 		fsnotify_access(req->file);
@@ -313,7 +313,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
 	struct io_kiocb *req = cmd_to_io_kiocb(rw);
 
 	if (kiocb->ki_flags & IOCB_WRITE)
-		kiocb_end_write(req);
+		io_req_end_write(req);
 	if (unlikely(res != req->cqe.res)) {
 		if (res == -EAGAIN && io_rw_should_reissue(req)) {
 			req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
@@ -961,7 +961,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
 				io->bytes_done += ret2;
 
 			if (kiocb->ki_flags & IOCB_WRITE)
-				kiocb_end_write(req);
+				io_req_end_write(req);
 			return ret ? ret : -EAGAIN;
 		}
 done:
@@ -972,7 +972,7 @@ copy_iov:
 		ret = io_setup_async_rw(req, iovec, s, false);
 		if (!ret) {
 			if (kiocb->ki_flags & IOCB_WRITE)
-				kiocb_end_write(req);
+				io_req_end_write(req);
 			return -EAGAIN;
 		}
 		return ret;

From f6c05b9e5d9bb8f54c6d00b8070fa93619f62a5e Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 17 Aug 2023 17:13:32 +0300
Subject: [PATCH 1644/2047] fs: add kerneldoc to file_{start,end}_write()
 helpers

and use sb_end_write() instead of open coded version.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Message-Id: <20230817141337.1025891-3-amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 57e2a0a9eea1..660ff7178caa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2539,6 +2539,13 @@ static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
 	return (inode->i_mode ^ mode) & S_IFMT;
 }
 
+/**
+ * file_start_write - get write access to a superblock for regular file io
+ * @file: the file we want to write to
+ *
+ * This is a variant of sb_start_write() which is a noop on non-regualr file.
+ * Should be matched with a call to file_end_write().
+ */
 static inline void file_start_write(struct file *file)
 {
 	if (!S_ISREG(file_inode(file)->i_mode))
@@ -2553,11 +2560,17 @@ static inline bool file_start_write_trylock(struct file *file)
 	return sb_start_write_trylock(file_inode(file)->i_sb);
 }
 
+/**
+ * file_end_write - drop write access to a superblock of a regular file
+ * @file: the file we wrote to
+ *
+ * Should be matched with a call to file_start_write().
+ */
 static inline void file_end_write(struct file *file)
 {
 	if (!S_ISREG(file_inode(file)->i_mode))
 		return;
-	__sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+	sb_end_write(file_inode(file)->i_sb);
 }
 
 /*

From ed0360bbab72b829437b67ebb2f9cfac19f59dfe Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 17 Aug 2023 17:13:33 +0300
Subject: [PATCH 1645/2047] fs: create kiocb_{start,end}_write() helpers

aio, io_uring, cachefiles and overlayfs, all open code an ugly variant
of file_{start,end}_write() to silence lockdep warnings.

Create helpers for this lockdep dance so we can use the helpers in all
the callers.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Message-Id: <20230817141337.1025891-4-amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 660ff7178caa..1eac9545a7b4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2573,6 +2573,42 @@ static inline void file_end_write(struct file *file)
 	sb_end_write(file_inode(file)->i_sb);
 }
 
+/**
+ * kiocb_start_write - get write access to a superblock for async file io
+ * @iocb: the io context we want to submit the write with
+ *
+ * This is a variant of sb_start_write() for async io submission.
+ * Should be matched with a call to kiocb_end_write().
+ */
+static inline void kiocb_start_write(struct kiocb *iocb)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+
+	sb_start_write(inode->i_sb);
+	/*
+	 * Fool lockdep by telling it the lock got released so that it
+	 * doesn't complain about the held lock when we return to userspace.
+	 */
+	__sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+}
+
+/**
+ * kiocb_end_write - drop write access to a superblock after async file io
+ * @iocb: the io context we sumbitted the write with
+ *
+ * Should be matched with a call to kiocb_start_write().
+ */
+static inline void kiocb_end_write(struct kiocb *iocb)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+
+	/*
+	 * Tell lockdep we inherited freeze protection from submission thread.
+	 */
+	__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+	sb_end_write(inode->i_sb);
+}
+
 /*
  * This is used for regular files where some users -- especially the
  * currently executed binary in a process, previously handled via

From e484fd73f4bdcb00c2188100c2d84e9f3f5c9f7d Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 17 Aug 2023 17:13:34 +0300
Subject: [PATCH 1646/2047] io_uring: use kiocb_{start,end}_write() helpers

Use helpers instead of the open coded dance to silence lockdep warnings.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Message-Id: <20230817141337.1025891-5-amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 io_uring/rw.c | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/io_uring/rw.c b/io_uring/rw.c
index 749ebd565839..9581b90cb459 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -222,15 +222,10 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
 
 static void io_req_end_write(struct io_kiocb *req)
 {
-	/*
-	 * Tell lockdep we inherited freeze protection from submission
-	 * thread.
-	 */
 	if (req->flags & REQ_F_ISREG) {
-		struct super_block *sb = file_inode(req->file)->i_sb;
+		struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 
-		__sb_writers_acquired(sb, SB_FREEZE_WRITE);
-		sb_end_write(sb);
+		kiocb_end_write(&rw->kiocb);
 	}
 }
 
@@ -902,18 +897,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
 		return ret;
 	}
 
-	/*
-	 * Open-code file_start_write here to grab freeze protection,
-	 * which will be released by another thread in
-	 * io_complete_rw().  Fool lockdep by telling it the lock got
-	 * released so that it doesn't complain about the held lock when
-	 * we return to userspace.
-	 */
-	if (req->flags & REQ_F_ISREG) {
-		sb_start_write(file_inode(req->file)->i_sb);
-		__sb_writers_release(file_inode(req->file)->i_sb,
-					SB_FREEZE_WRITE);
-	}
+	if (req->flags & REQ_F_ISREG)
+		kiocb_start_write(kiocb);
 	kiocb->ki_flags |= IOCB_WRITE;
 
 	if (likely(req->file->f_op->write_iter))

From 8c3cfa80fd1ee9cd4b249de197f5ca9bbfae0daf Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 17 Aug 2023 17:13:35 +0300
Subject: [PATCH 1647/2047] aio: use kiocb_{start,end}_write() helpers

Use helpers instead of the open coded dance to silence lockdep warnings.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Message-Id: <20230817141337.1025891-6-amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/aio.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 77e33619de40..b3174da80ff6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1447,13 +1447,8 @@ static void aio_complete_rw(struct kiocb *kiocb, long res)
 	if (kiocb->ki_flags & IOCB_WRITE) {
 		struct inode *inode = file_inode(kiocb->ki_filp);
 
-		/*
-		 * Tell lockdep we inherited freeze protection from submission
-		 * thread.
-		 */
 		if (S_ISREG(inode->i_mode))
-			__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
-		file_end_write(kiocb->ki_filp);
+			kiocb_end_write(kiocb);
 	}
 
 	iocb->ki_res.res = res;
@@ -1581,17 +1576,8 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
 		return ret;
 	ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
 	if (!ret) {
-		/*
-		 * Open-code file_start_write here to grab freeze protection,
-		 * which will be released by another thread in
-		 * aio_complete_rw().  Fool lockdep by telling it the lock got
-		 * released so that it doesn't complain about the held lock when
-		 * we return to userspace.
-		 */
-		if (S_ISREG(file_inode(file)->i_mode)) {
-			sb_start_write(file_inode(file)->i_sb);
-			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
-		}
+		if (S_ISREG(file_inode(file)->i_mode))
+			kiocb_start_write(req);
 		req->ki_flags |= IOCB_WRITE;
 		aio_rw_done(req, call_write_iter(file, req, &iter));
 	}

From 8f7371268a4ba83e6bf3b73cf7484a2581f56d7b Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 17 Aug 2023 17:13:36 +0300
Subject: [PATCH 1648/2047] ovl: use kiocb_{start,end}_write() helpers

Use helpers instead of the open coded dance to silence lockdep warnings.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Message-Id: <20230817141337.1025891-7-amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/overlayfs/file.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 21245b00722a..e4cc401d334d 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -290,10 +290,7 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
 	if (iocb->ki_flags & IOCB_WRITE) {
 		struct inode *inode = file_inode(orig_iocb->ki_filp);
 
-		/* Actually acquired in ovl_write_iter() */
-		__sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
-				      SB_FREEZE_WRITE);
-		file_end_write(iocb->ki_filp);
+		kiocb_end_write(iocb);
 		ovl_copyattr(inode);
 	}
 
@@ -409,10 +406,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 		if (!aio_req)
 			goto out;
 
-		file_start_write(real.file);
-		/* Pacify lockdep, same trick as done in aio_write() */
-		__sb_writers_release(file_inode(real.file)->i_sb,
-				     SB_FREEZE_WRITE);
 		aio_req->fd = real;
 		real.flags = 0;
 		aio_req->orig_iocb = iocb;
@@ -420,6 +413,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 		aio_req->iocb.ki_flags = ifl;
 		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
 		refcount_set(&aio_req->ref, 2);
+		kiocb_start_write(&aio_req->iocb);
 		ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
 		ovl_aio_put(aio_req);
 		if (ret != -EIOCBQUEUED)

From e6fa4c728fb671765291cca3a905986612c06b6e Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 17 Aug 2023 17:13:37 +0300
Subject: [PATCH 1649/2047] cachefiles: use kiocb_{start,end}_write() helpers

Use helpers instead of the open coded dance to silence lockdep warnings.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Message-Id: <20230817141337.1025891-8-amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/cachefiles/io.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 175a25fcade8..009d23cd435b 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -259,9 +259,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
 
 	_enter("%ld", ret);
 
-	/* Tell lockdep we inherited freeze protection from submission thread */
-	__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
-	__sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
+	kiocb_end_write(iocb);
 
 	if (ret < 0)
 		trace_cachefiles_io_error(object, inode, ret,
@@ -286,7 +284,6 @@ int __cachefiles_write(struct cachefiles_object *object,
 {
 	struct cachefiles_cache *cache;
 	struct cachefiles_kiocb *ki;
-	struct inode *inode;
 	unsigned int old_nofs;
 	ssize_t ret;
 	size_t len = iov_iter_count(iter);
@@ -322,19 +319,12 @@ int __cachefiles_write(struct cachefiles_object *object,
 		ki->iocb.ki_complete = cachefiles_write_complete;
 	atomic_long_add(ki->b_writing, &cache->b_writing);
 
-	/* Open-code file_start_write here to grab freeze protection, which
-	 * will be released by another thread in aio_complete_rw().  Fool
-	 * lockdep by telling it the lock got released so that it doesn't
-	 * complain about the held lock when we return to userspace.
-	 */
-	inode = file_inode(file);
-	__sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
-	__sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+	kiocb_start_write(&ki->iocb);
 
 	get_file(ki->iocb.ki_filp);
 	cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
 
-	trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
+	trace_cachefiles_write(object, file_inode(file), ki->iocb.ki_pos, len);
 	old_nofs = memalloc_nofs_save();
 	ret = cachefiles_inject_write_error();
 	if (ret == 0)

From c37ce23591b14545f7c8b09e167b995f8acdda71 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 8 Aug 2023 09:46:03 +0200
Subject: [PATCH 1650/2047] efi/arm64: Move EFI runtime call setup/teardown
 helpers out of line

Only the arch_efi_call_virt() macro that some architectures override
needs to be a macro, given that it is variadic and encapsulates calls
via function pointers that have different prototypes.

The associated setup and teardown code are not special in this regard,
and don't need to be instantiated at each call site. So turn them into
ordinary C functions and move them out of line.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/include/asm/efi.h | 18 +++---------------
 arch/arm64/kernel/efi.c      | 16 +++++++++++++++-
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 4cf2cb053bc8..f482b994c608 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -30,28 +30,16 @@ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
 int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md,
 				bool has_bti);
 
-#define arch_efi_call_virt_setup()					\
-({									\
-	efi_virtmap_load();						\
-	__efi_fpsimd_begin();						\
-	raw_spin_lock(&efi_rt_lock);					\
-})
-
 #undef arch_efi_call_virt
 #define arch_efi_call_virt(p, f, args...)				\
 	__efi_rt_asm_wrapper((p)->f, #f, args)
 
-#define arch_efi_call_virt_teardown()					\
-({									\
-	raw_spin_unlock(&efi_rt_lock);					\
-	__efi_fpsimd_end();						\
-	efi_virtmap_unload();						\
-})
-
-extern raw_spinlock_t efi_rt_lock;
 extern u64 *efi_rt_stack_top;
 efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
 
+void arch_efi_call_virt_setup(void);
+void arch_efi_call_virt_teardown(void);
+
 /*
  * efi_rt_stack_top[-1] contains the value the stack pointer had before
  * switching to the EFI runtime stack.
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index baab8dd3ead3..49efbdbd6f7a 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -158,7 +158,21 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
 	return s;
 }
 
-DEFINE_RAW_SPINLOCK(efi_rt_lock);
+static DEFINE_RAW_SPINLOCK(efi_rt_lock);
+
+void arch_efi_call_virt_setup(void)
+{
+	efi_virtmap_load();
+	__efi_fpsimd_begin();
+	raw_spin_lock(&efi_rt_lock);
+}
+
+void arch_efi_call_virt_teardown(void)
+{
+	raw_spin_unlock(&efi_rt_lock);
+	__efi_fpsimd_end();
+	efi_virtmap_unload();
+}
 
 asmlinkage u64 *efi_rt_stack_top __ro_after_init;
 

From d8ea2ffd017d0608635c0ec7bc9ba39edbbd2482 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 8 Aug 2023 09:51:58 +0200
Subject: [PATCH 1651/2047] efi/riscv: Move EFI runtime call setup/teardown
 helpers out of line

Only the arch_efi_call_virt() macro that some architectures override
needs to be a macro, given that it is variadic and encapsulates calls
via function pointers that have different prototypes.

The associated setup and teardown code are not special in this regard,
and don't need to be instantiated at each call site. So turn them into
ordinary C functions and move them out of line.

Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/riscv/include/asm/efi.h         | 10 ++--------
 drivers/firmware/efi/riscv-runtime.c | 15 +++++++++++++--
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
index 29e9a0d84b16..8a6a128ec57f 100644
--- a/arch/riscv/include/asm/efi.h
+++ b/arch/riscv/include/asm/efi.h
@@ -21,12 +21,6 @@ extern void efi_init(void);
 int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
 int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md, bool);
 
-#define arch_efi_call_virt_setup()      ({		\
-		sync_kernel_mappings(efi_mm.pgd);	\
-		efi_virtmap_load();			\
-	})
-#define arch_efi_call_virt_teardown()   efi_virtmap_unload()
-
 #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
 
 /* Load initrd anywhere in system RAM */
@@ -46,8 +40,8 @@ static inline unsigned long efi_get_kimg_min_align(void)
 
 #define EFI_KIMG_PREFERRED_ADDRESS	efi_get_kimg_min_align()
 
-void efi_virtmap_load(void);
-void efi_virtmap_unload(void);
+void arch_efi_call_virt_setup(void);
+void arch_efi_call_virt_teardown(void);
 
 unsigned long stext_offset(void);
 
diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c
index d0daacd2c903..09525fb5c240 100644
--- a/drivers/firmware/efi/riscv-runtime.c
+++ b/drivers/firmware/efi/riscv-runtime.c
@@ -130,14 +130,25 @@ static int __init riscv_enable_runtime_services(void)
 }
 early_initcall(riscv_enable_runtime_services);
 
-void efi_virtmap_load(void)
+static void efi_virtmap_load(void)
 {
 	preempt_disable();
 	switch_mm(current->active_mm, &efi_mm, NULL);
 }
 
-void efi_virtmap_unload(void)
+static void efi_virtmap_unload(void)
 {
 	switch_mm(&efi_mm, current->active_mm, NULL);
 	preempt_enable();
 }
+
+void arch_efi_call_virt_setup(void)
+{
+	sync_kernel_mappings(efi_mm.pgd);
+	efi_virtmap_load();
+}
+
+void arch_efi_call_virt_teardown(void)
+{
+	efi_virtmap_unload();
+}

From c7c7bce093c883739d6735d68604055131acfbea Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sun, 2 Jul 2023 15:15:18 +0200
Subject: [PATCH 1652/2047] efi/runtime-wrappers: Use type safe encapsulation
 of call arguments

The current code that marshalls the EFI runtime call arguments to hand
them off to a async helper does so in a type unsafe and slightly messy
manner - everything is cast to void* except for some integral types that
are passed by reference and dereferenced on the receiver end.

Let's clean this up a bit, and record the arguments of each runtime
service invocation exactly as they are issued, in a manner that permits
the compiler to check the types of the arguments at both ends.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/runtime-wrappers.c | 196 ++++++++++++++++--------
 include/linux/efi.h                     |  18 +--
 2 files changed, 138 insertions(+), 76 deletions(-)

diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index a400c4312c82..66066e058757 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -44,20 +44,90 @@
 #define __efi_call_virt(f, args...) \
 	__efi_call_virt_pointer(efi.runtime, f, args)
 
+union efi_rts_args {
+	struct {
+		efi_time_t 	*time;
+		efi_time_cap_t	*capabilities;
+	} GET_TIME;
+
+	struct {
+		efi_time_t	*time;
+	} SET_TIME;
+
+	struct {
+		efi_bool_t	*enabled;
+		efi_bool_t	*pending;
+		efi_time_t	*time;
+	} GET_WAKEUP_TIME;
+
+	struct {
+		efi_bool_t	enable;
+		efi_time_t	*time;
+	} SET_WAKEUP_TIME;
+
+	struct {
+		efi_char16_t	*name;
+		efi_guid_t	*vendor;
+		u32		*attr;
+		unsigned long	*data_size;
+		void		*data;
+	} GET_VARIABLE;
+
+	struct {
+		unsigned long	*name_size;
+		efi_char16_t	*name;
+		efi_guid_t 	*vendor;
+	} GET_NEXT_VARIABLE;
+
+	struct {
+		efi_char16_t	*name;
+		efi_guid_t	*vendor;
+		u32		attr;
+		unsigned long	data_size;
+		void		*data;
+	} SET_VARIABLE;
+
+	struct {
+		u32		attr;
+		u64		*storage_space;
+		u64		*remaining_space;
+		u64		*max_variable_size;
+	} QUERY_VARIABLE_INFO;
+
+	struct {
+		u32		*high_count;
+	} GET_NEXT_HIGH_MONO_COUNT;
+
+	struct {
+		efi_capsule_header_t **capsules;
+		unsigned long	count;
+		unsigned long	sg_list;
+	} UPDATE_CAPSULE;
+
+	struct {
+		efi_capsule_header_t **capsules;
+		unsigned long	count;
+		u64		*max_size;
+		int		*reset_type;
+	} QUERY_CAPSULE_CAPS;
+};
+
 struct efi_runtime_work efi_rts_work;
 
 /*
- * efi_queue_work:	Queue efi_runtime_service() and wait until it's done
- * @rts:		efi_runtime_service() function identifier
- * @rts_arg<1-5>:	efi_runtime_service() function arguments
+ * efi_queue_work:	Queue EFI runtime service call and wait for completion
+ * @_rts:		EFI runtime service function identifier
+ * @_args:		Arguments to pass to the EFI runtime service
  *
  * Accesses to efi_runtime_services() are serialized by a binary
  * semaphore (efi_runtime_lock) and caller waits until the work is
  * finished, hence _only_ one work is queued at a time and the caller
  * thread waits for completion.
  */
-#define efi_queue_work(_rts, _arg1, _arg2, _arg3, _arg4, _arg5)		\
+#define efi_queue_work(_rts, _args...)					\
 ({									\
+	efi_rts_work.efi_rts_id = EFI_ ## _rts;				\
+	efi_rts_work.args = &(union efi_rts_args){ ._rts = { _args }};	\
 	efi_rts_work.status = EFI_ABORTED;				\
 									\
 	if (!efi_enabled(EFI_RUNTIME_SERVICES)) {			\
@@ -68,12 +138,6 @@ struct efi_runtime_work efi_rts_work;
 									\
 	init_completion(&efi_rts_work.efi_rts_comp);			\
 	INIT_WORK(&efi_rts_work.work, efi_call_rts);			\
-	efi_rts_work.arg1 = _arg1;					\
-	efi_rts_work.arg2 = _arg2;					\
-	efi_rts_work.arg3 = _arg3;					\
-	efi_rts_work.arg4 = _arg4;					\
-	efi_rts_work.arg5 = _arg5;					\
-	efi_rts_work.efi_rts_id = _rts;					\
 									\
 	/*								\
 	 * queue_work() returns 0 if work was already on queue,         \
@@ -170,73 +234,78 @@ extern struct semaphore __efi_uv_runtime_lock __alias(efi_runtime_lock);
 /*
  * Calls the appropriate efi_runtime_service() with the appropriate
  * arguments.
- *
- * Semantics followed by efi_call_rts() to understand efi_runtime_work:
- * 1. If argument was a pointer, recast it from void pointer to original
- * pointer type.
- * 2. If argument was a value, recast it from void pointer to original
- * pointer type and dereference it.
  */
 static void efi_call_rts(struct work_struct *work)
 {
-	void *arg1, *arg2, *arg3, *arg4, *arg5;
+	const union efi_rts_args *args = efi_rts_work.args;
 	efi_status_t status = EFI_NOT_FOUND;
 
-	arg1 = efi_rts_work.arg1;
-	arg2 = efi_rts_work.arg2;
-	arg3 = efi_rts_work.arg3;
-	arg4 = efi_rts_work.arg4;
-	arg5 = efi_rts_work.arg5;
-
 	switch (efi_rts_work.efi_rts_id) {
 	case EFI_GET_TIME:
-		status = efi_call_virt(get_time, (efi_time_t *)arg1,
-				       (efi_time_cap_t *)arg2);
+		status = efi_call_virt(get_time,
+				       args->GET_TIME.time,
+				       args->GET_TIME.capabilities);
 		break;
 	case EFI_SET_TIME:
-		status = efi_call_virt(set_time, (efi_time_t *)arg1);
+		status = efi_call_virt(set_time,
+				       args->SET_TIME.time);
 		break;
 	case EFI_GET_WAKEUP_TIME:
-		status = efi_call_virt(get_wakeup_time, (efi_bool_t *)arg1,
-				       (efi_bool_t *)arg2, (efi_time_t *)arg3);
+		status = efi_call_virt(get_wakeup_time,
+				       args->GET_WAKEUP_TIME.enabled,
+				       args->GET_WAKEUP_TIME.pending,
+				       args->GET_WAKEUP_TIME.time);
 		break;
 	case EFI_SET_WAKEUP_TIME:
-		status = efi_call_virt(set_wakeup_time, *(efi_bool_t *)arg1,
-				       (efi_time_t *)arg2);
+		status = efi_call_virt(set_wakeup_time,
+				       args->SET_WAKEUP_TIME.enable,
+				       args->SET_WAKEUP_TIME.time);
 		break;
 	case EFI_GET_VARIABLE:
-		status = efi_call_virt(get_variable, (efi_char16_t *)arg1,
-				       (efi_guid_t *)arg2, (u32 *)arg3,
-				       (unsigned long *)arg4, (void *)arg5);
+		status = efi_call_virt(get_variable,
+				       args->GET_VARIABLE.name,
+				       args->GET_VARIABLE.vendor,
+				       args->GET_VARIABLE.attr,
+				       args->GET_VARIABLE.data_size,
+				       args->GET_VARIABLE.data);
 		break;
 	case EFI_GET_NEXT_VARIABLE:
-		status = efi_call_virt(get_next_variable, (unsigned long *)arg1,
-				       (efi_char16_t *)arg2,
-				       (efi_guid_t *)arg3);
+		status = efi_call_virt(get_next_variable,
+				       args->GET_NEXT_VARIABLE.name_size,
+				       args->GET_NEXT_VARIABLE.name,
+				       args->GET_NEXT_VARIABLE.vendor);
 		break;
 	case EFI_SET_VARIABLE:
-		status = efi_call_virt(set_variable, (efi_char16_t *)arg1,
-				       (efi_guid_t *)arg2, *(u32 *)arg3,
-				       *(unsigned long *)arg4, (void *)arg5);
+		status = efi_call_virt(set_variable,
+				       args->SET_VARIABLE.name,
+				       args->SET_VARIABLE.vendor,
+				       args->SET_VARIABLE.attr,
+				       args->SET_VARIABLE.data_size,
+				       args->SET_VARIABLE.data);
 		break;
 	case EFI_QUERY_VARIABLE_INFO:
-		status = efi_call_virt(query_variable_info, *(u32 *)arg1,
-				       (u64 *)arg2, (u64 *)arg3, (u64 *)arg4);
+		status = efi_call_virt(query_variable_info,
+				       args->QUERY_VARIABLE_INFO.attr,
+				       args->QUERY_VARIABLE_INFO.storage_space,
+				       args->QUERY_VARIABLE_INFO.remaining_space,
+				       args->QUERY_VARIABLE_INFO.max_variable_size);
 		break;
 	case EFI_GET_NEXT_HIGH_MONO_COUNT:
-		status = efi_call_virt(get_next_high_mono_count, (u32 *)arg1);
+		status = efi_call_virt(get_next_high_mono_count,
+				       args->GET_NEXT_HIGH_MONO_COUNT.high_count);
 		break;
 	case EFI_UPDATE_CAPSULE:
 		status = efi_call_virt(update_capsule,
-				       (efi_capsule_header_t **)arg1,
-				       *(unsigned long *)arg2,
-				       *(unsigned long *)arg3);
+				       args->UPDATE_CAPSULE.capsules,
+				       args->UPDATE_CAPSULE.count,
+				       args->UPDATE_CAPSULE.sg_list);
 		break;
 	case EFI_QUERY_CAPSULE_CAPS:
 		status = efi_call_virt(query_capsule_caps,
-				       (efi_capsule_header_t **)arg1,
-				       *(unsigned long *)arg2, (u64 *)arg3,
-				       (int *)arg4);
+				       args->QUERY_CAPSULE_CAPS.capsules,
+				       args->QUERY_CAPSULE_CAPS.count,
+				       args->QUERY_CAPSULE_CAPS.max_size,
+				       args->QUERY_CAPSULE_CAPS.reset_type);
 		break;
 	default:
 		/*
@@ -256,7 +325,7 @@ static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(EFI_GET_TIME, tm, tc, NULL, NULL, NULL);
+	status = efi_queue_work(GET_TIME, tm, tc);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -267,7 +336,7 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(EFI_SET_TIME, tm, NULL, NULL, NULL, NULL);
+	status = efi_queue_work(SET_TIME, tm);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -280,8 +349,7 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(EFI_GET_WAKEUP_TIME, enabled, pending, tm, NULL,
-				NULL);
+	status = efi_queue_work(GET_WAKEUP_TIME, enabled, pending, tm);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -292,8 +360,7 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(EFI_SET_WAKEUP_TIME, &enabled, tm, NULL, NULL,
-				NULL);
+	status = efi_queue_work(SET_WAKEUP_TIME, enabled, tm);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -308,7 +375,7 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(EFI_GET_VARIABLE, name, vendor, attr, data_size,
+	status = efi_queue_work(GET_VARIABLE, name, vendor, attr, data_size,
 				data);
 	up(&efi_runtime_lock);
 	return status;
@@ -322,8 +389,7 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(EFI_GET_NEXT_VARIABLE, name_size, name, vendor,
-				NULL, NULL);
+	status = efi_queue_work(GET_NEXT_VARIABLE, name_size, name, vendor);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -338,7 +404,7 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(EFI_SET_VARIABLE, name, vendor, &attr, &data_size,
+	status = efi_queue_work(SET_VARIABLE, name, vendor, attr, data_size,
 				data);
 	up(&efi_runtime_lock);
 	return status;
@@ -373,8 +439,8 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(EFI_QUERY_VARIABLE_INFO, &attr, storage_space,
-				remaining_space, max_variable_size, NULL);
+	status = efi_queue_work(QUERY_VARIABLE_INFO, attr, storage_space,
+				remaining_space, max_variable_size);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -405,8 +471,7 @@ static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(EFI_GET_NEXT_HIGH_MONO_COUNT, count, NULL, NULL,
-				NULL, NULL);
+	status = efi_queue_work(GET_NEXT_HIGH_MONO_COUNT, count);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -437,8 +502,7 @@ static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(EFI_UPDATE_CAPSULE, capsules, &count, &sg_list,
-				NULL, NULL);
+	status = efi_queue_work(UPDATE_CAPSULE, capsules, count, sg_list);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -455,8 +519,8 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(EFI_QUERY_CAPSULE_CAPS, capsules, &count,
-				max_size, reset_type, NULL);
+	status = efi_queue_work(QUERY_CAPSULE_CAPS, capsules, count,
+				max_size, reset_type);
 	up(&efi_runtime_lock);
 	return status;
 }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index e9004358f7bd..9d09dba116ce 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1264,23 +1264,21 @@ enum efi_rts_ids {
 	EFI_QUERY_CAPSULE_CAPS,
 };
 
+union efi_rts_args;
+
 /*
  * efi_runtime_work:	Details of EFI Runtime Service work
- * @arg<1-5>:		EFI Runtime Service function arguments
+ * @args:		Pointer to union describing the arguments
  * @status:		Status of executing EFI Runtime Service
  * @efi_rts_id:		EFI Runtime Service function identifier
  * @efi_rts_comp:	Struct used for handling completions
  */
 struct efi_runtime_work {
-	void *arg1;
-	void *arg2;
-	void *arg3;
-	void *arg4;
-	void *arg5;
-	efi_status_t status;
-	struct work_struct work;
-	enum efi_rts_ids efi_rts_id;
-	struct completion efi_rts_comp;
+	union efi_rts_args	*args;
+	efi_status_t		status;
+	struct work_struct	work;
+	enum efi_rts_ids	efi_rts_id;
+	struct completion	efi_rts_comp;
 };
 
 extern struct efi_runtime_work efi_rts_work;

From c99ba6e54652652a30a48e3f789d503d75646520 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sun, 2 Jul 2023 15:57:13 +0200
Subject: [PATCH 1653/2047] efi/runtime-wrapper: Move workqueue manipulation
 out of line

efi_queue_work() is a macro that implements the non-trivial manipulation
of the EFI runtime workqueue and completion data structure, most of
which is generic, and could be shared between all the users of the
macro. So move it out of the macro and into a new helper function.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/runtime-wrappers.c | 61 +++++++++++++------------
 1 file changed, 33 insertions(+), 28 deletions(-)

diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 66066e058757..ee5c9a3e5060 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -125,34 +125,8 @@ struct efi_runtime_work efi_rts_work;
  * thread waits for completion.
  */
 #define efi_queue_work(_rts, _args...)					\
-({									\
-	efi_rts_work.efi_rts_id = EFI_ ## _rts;				\
-	efi_rts_work.args = &(union efi_rts_args){ ._rts = { _args }};	\
-	efi_rts_work.status = EFI_ABORTED;				\
-									\
-	if (!efi_enabled(EFI_RUNTIME_SERVICES)) {			\
-		pr_warn_once("EFI Runtime Services are disabled!\n");	\
-		efi_rts_work.status = EFI_DEVICE_ERROR;			\
-		goto exit;						\
-	}								\
-									\
-	init_completion(&efi_rts_work.efi_rts_comp);			\
-	INIT_WORK(&efi_rts_work.work, efi_call_rts);			\
-									\
-	/*								\
-	 * queue_work() returns 0 if work was already on queue,         \
-	 * _ideally_ this should never happen.                          \
-	 */								\
-	if (queue_work(efi_rts_wq, &efi_rts_work.work))			\
-		wait_for_completion(&efi_rts_work.efi_rts_comp);	\
-	else								\
-		pr_err("Failed to queue work to efi_rts_wq.\n");	\
-									\
-	WARN_ON_ONCE(efi_rts_work.status == EFI_ABORTED);		\
-exit:									\
-	efi_rts_work.efi_rts_id = EFI_NONE;				\
-	efi_rts_work.status;						\
-})
+	__efi_queue_work(EFI_ ## _rts,					\
+			 &(union efi_rts_args){ ._rts = { _args }})
 
 #ifndef arch_efi_save_flags
 #define arch_efi_save_flags(state_flags)	local_save_flags(state_flags)
@@ -319,6 +293,37 @@ static void efi_call_rts(struct work_struct *work)
 	complete(&efi_rts_work.efi_rts_comp);
 }
 
+static efi_status_t __efi_queue_work(enum efi_rts_ids id,
+				     union efi_rts_args *args)
+{
+	efi_rts_work.efi_rts_id = id;
+	efi_rts_work.args = args;
+	efi_rts_work.status = EFI_ABORTED;
+
+	if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
+		pr_warn_once("EFI Runtime Services are disabled!\n");
+		efi_rts_work.status = EFI_DEVICE_ERROR;
+		goto exit;
+	}
+
+	init_completion(&efi_rts_work.efi_rts_comp);
+	INIT_WORK(&efi_rts_work.work, efi_call_rts);
+
+	/*
+	 * queue_work() returns 0 if work was already on queue,
+	 * _ideally_ this should never happen.
+	 */
+	if (queue_work(efi_rts_wq, &efi_rts_work.work))
+		wait_for_completion(&efi_rts_work.efi_rts_comp);
+	else
+		pr_err("Failed to queue work to efi_rts_wq.\n");
+
+	WARN_ON_ONCE(efi_rts_work.status == EFI_ABORTED);
+exit:
+	efi_rts_work.efi_rts_id = EFI_NONE;
+	return efi_rts_work.status;
+}
+
 static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 {
 	efi_status_t status;

From 5e87491415217d6bec0bcae08a3156622be2b177 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 18 Aug 2023 16:00:50 +0200
Subject: [PATCH 1654/2047] super: wait for nascent superblocks

Recent patches experiment with making it possible to allocate a new
superblock before opening the relevant block device. Naturally this has
intricate side-effects that we get to learn about while developing this.

Superblock allocators such as sget{_fc}() return with s_umount of the
new superblock held and lock ordering currently requires that block
level locks such as bdev_lock and open_mutex rank above s_umount.

Before aca740cecbe5 ("fs: open block device after superblock creation")
ordering was guaranteed to be correct as block devices were opened prior
to superblock allocation and thus s_umount wasn't held. But now s_umount
must be dropped before opening block devices to avoid locking
violations.

This has consequences. The main one being that iterators over
@super_blocks and @fs_supers that grab a temporary reference to the
superblock can now also grab s_umount before the caller has managed to
open block devices and called fill_super(). So whereas before such
iterators or concurrent mounts would have simply slept on s_umount until
SB_BORN was set or the superblock was discard due to initalization
failure they can now needlessly spin through sget{_fc}().

If the caller is sleeping on bdev_lock or open_mutex one caller waiting
on SB_BORN will always spin somewhere and potentially this can go on for
quite a while.

It should be possible to drop s_umount while allowing iterators to wait
on a nascent superblock to either be born or discarded. This patch
implements a wait_var_event() mechanism allowing iterators to sleep
until they are woken when the superblock is born or discarded.

This also allows us to avoid relooping through @fs_supers and
@super_blocks if a superblock isn't yet born or dying.

Link: aca740cecbe5 ("fs: open block device after superblock creation")
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230818-vfs-super-fixes-v3-v3-3-9f0b1876e46b@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/super.c         | 204 +++++++++++++++++++++++++++++++++------------
 include/linux/fs.h |   1 +
 2 files changed, 154 insertions(+), 51 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index ba5d813c5804..e2630fe4928a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -50,7 +50,7 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
 	"sb_internal",
 };
 
-static inline void super_lock(struct super_block *sb, bool excl)
+static inline void __super_lock(struct super_block *sb, bool excl)
 {
 	if (excl)
 		down_write(&sb->s_umount);
@@ -66,14 +66,9 @@ static inline void super_unlock(struct super_block *sb, bool excl)
 		up_read(&sb->s_umount);
 }
 
-static inline void super_lock_excl(struct super_block *sb)
+static inline void __super_lock_excl(struct super_block *sb)
 {
-	super_lock(sb, true);
-}
-
-static inline void super_lock_shared(struct super_block *sb)
-{
-	super_lock(sb, false);
+	__super_lock(sb, true);
 }
 
 static inline void super_unlock_excl(struct super_block *sb)
@@ -86,6 +81,99 @@ static inline void super_unlock_shared(struct super_block *sb)
 	super_unlock(sb, false);
 }
 
+static inline bool wait_born(struct super_block *sb)
+{
+	unsigned int flags;
+
+	/*
+	 * Pairs with smp_store_release() in super_wake() and ensures
+	 * that we see SB_BORN or SB_DYING after we're woken.
+	 */
+	flags = smp_load_acquire(&sb->s_flags);
+	return flags & (SB_BORN | SB_DYING);
+}
+
+/**
+ * super_lock - wait for superblock to become ready and lock it
+ * @sb: superblock to wait for
+ * @excl: whether exclusive access is required
+ *
+ * If the superblock has neither passed through vfs_get_tree() or
+ * generic_shutdown_super() yet wait for it to happen. Either superblock
+ * creation will succeed and SB_BORN is set by vfs_get_tree() or we're
+ * woken and we'll see SB_DYING.
+ *
+ * The caller must have acquired a temporary reference on @sb->s_count.
+ *
+ * Return: This returns true if SB_BORN was set, false if SB_DYING was
+ *         set. The function acquires s_umount and returns with it held.
+ */
+static __must_check bool super_lock(struct super_block *sb, bool excl)
+{
+
+	lockdep_assert_not_held(&sb->s_umount);
+
+relock:
+	__super_lock(sb, excl);
+
+	/*
+	 * Has gone through generic_shutdown_super() in the meantime.
+	 * @sb->s_root is NULL and @sb->s_active is 0. No one needs to
+	 * grab a reference to this. Tell them so.
+	 */
+	if (sb->s_flags & SB_DYING)
+		return false;
+
+	/* Has called ->get_tree() successfully. */
+	if (sb->s_flags & SB_BORN)
+		return true;
+
+	super_unlock(sb, excl);
+
+	/* wait until the superblock is ready or dying */
+	wait_var_event(&sb->s_flags, wait_born(sb));
+
+	/*
+	 * Neither SB_BORN nor SB_DYING are ever unset so we never loop.
+	 * Just reacquire @sb->s_umount for the caller.
+	 */
+	goto relock;
+}
+
+/* wait and acquire read-side of @sb->s_umount */
+static inline bool super_lock_shared(struct super_block *sb)
+{
+	return super_lock(sb, false);
+}
+
+/* wait and acquire write-side of @sb->s_umount */
+static inline bool super_lock_excl(struct super_block *sb)
+{
+	return super_lock(sb, true);
+}
+
+/* wake waiters */
+#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING)
+static void super_wake(struct super_block *sb, unsigned int flag)
+{
+	WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS));
+	WARN_ON_ONCE(hweight32(flag & SUPER_WAKE_FLAGS) > 1);
+
+	/*
+	 * Pairs with smp_load_acquire() in super_lock() to make sure
+	 * all initializations in the superblock are seen by the user
+	 * seeing SB_BORN sent.
+	 */
+	smp_store_release(&sb->s_flags, sb->s_flags | flag);
+	/*
+	 * Pairs with the barrier in prepare_to_wait_event() to make sure
+	 * ___wait_var_event() either sees SB_BORN set or
+	 * waitqueue_active() check in wake_up_var() sees the waiter.
+	 */
+	smp_mb();
+	wake_up_var(&sb->s_flags);
+}
+
 /*
  * One thing we have to be careful of with a per-sb shrinker is that we don't
  * drop the last active reference to the superblock from within the shrinker.
@@ -393,7 +481,7 @@ EXPORT_SYMBOL(deactivate_locked_super);
 void deactivate_super(struct super_block *s)
 {
 	if (!atomic_add_unless(&s->s_active, -1, 1)) {
-		super_lock_excl(s);
+		__super_lock_excl(s);
 		deactivate_locked_super(s);
 	}
 }
@@ -415,10 +503,12 @@ EXPORT_SYMBOL(deactivate_super);
  */
 static int grab_super(struct super_block *s) __releases(sb_lock)
 {
+	bool born;
+
 	s->s_count++;
 	spin_unlock(&sb_lock);
-	super_lock_excl(s);
-	if ((s->s_flags & SB_BORN) && atomic_inc_not_zero(&s->s_active)) {
+	born = super_lock_excl(s);
+	if (born && atomic_inc_not_zero(&s->s_active)) {
 		put_super(s);
 		return 1;
 	}
@@ -447,8 +537,8 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
 bool super_trylock_shared(struct super_block *sb)
 {
 	if (down_read_trylock(&sb->s_umount)) {
-		if (!hlist_unhashed(&sb->s_instances) &&
-		    sb->s_root && (sb->s_flags & SB_BORN))
+		if (!(sb->s_flags & SB_DYING) && sb->s_root &&
+		    (sb->s_flags & SB_BORN))
 			return true;
 		super_unlock_shared(sb);
 	}
@@ -475,7 +565,7 @@ bool super_trylock_shared(struct super_block *sb)
 void retire_super(struct super_block *sb)
 {
 	WARN_ON(!sb->s_bdev);
-	super_lock_excl(sb);
+	__super_lock_excl(sb);
 	if (sb->s_iflags & SB_I_PERSB_BDI) {
 		bdi_unregister(sb->s_bdi);
 		sb->s_iflags &= ~SB_I_PERSB_BDI;
@@ -557,6 +647,13 @@ void generic_shutdown_super(struct super_block *sb)
 	/* should be initialized for __put_super_and_need_restart() */
 	hlist_del_init(&sb->s_instances);
 	spin_unlock(&sb_lock);
+	/*
+	 * Broadcast to everyone that grabbed a temporary reference to this
+	 * superblock before we removed it from @fs_supers that the superblock
+	 * is dying. Every walker of @fs_supers outside of sget{_fc}() will now
+	 * discard this superblock and treat it as dead.
+	 */
+	super_wake(sb, SB_DYING);
 	super_unlock_excl(sb);
 	if (sb->s_bdi != &noop_backing_dev_info) {
 		if (sb->s_iflags & SB_I_PERSB_BDI)
@@ -631,6 +728,11 @@ retry:
 	s->s_type = fc->fs_type;
 	s->s_iflags |= fc->s_iflags;
 	strscpy(s->s_id, s->s_type->name, sizeof(s->s_id));
+	/*
+	 * Make the superblock visible on @super_blocks and @fs_supers.
+	 * It's in a nascent state and users should wait on SB_BORN or
+	 * SB_DYING to be set.
+	 */
 	list_add_tail(&s->s_list, &super_blocks);
 	hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
 	spin_unlock(&sb_lock);
@@ -740,7 +842,8 @@ static void __iterate_supers(void (*f)(struct super_block *))
 
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (hlist_unhashed(&sb->s_instances))
+		/* Pairs with memory marrier in super_wake(). */
+		if (smp_load_acquire(&sb->s_flags) & SB_DYING)
 			continue;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
@@ -770,13 +873,13 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (hlist_unhashed(&sb->s_instances))
-			continue;
+		bool born;
+
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 
-		super_lock_shared(sb);
-		if (sb->s_root && (sb->s_flags & SB_BORN))
+		born = super_lock_shared(sb);
+		if (born && sb->s_root)
 			f(sb, arg);
 		super_unlock_shared(sb);
 
@@ -806,11 +909,13 @@ void iterate_supers_type(struct file_system_type *type,
 
 	spin_lock(&sb_lock);
 	hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
+		bool born;
+
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 
-		super_lock_shared(sb);
-		if (sb->s_root && (sb->s_flags & SB_BORN))
+		born = super_lock_shared(sb);
+		if (born && sb->s_root)
 			f(sb, arg);
 		super_unlock_shared(sb);
 
@@ -841,14 +946,11 @@ struct super_block *get_active_super(struct block_device *bdev)
 	if (!bdev)
 		return NULL;
 
-restart:
 	spin_lock(&sb_lock);
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (hlist_unhashed(&sb->s_instances))
-			continue;
 		if (sb->s_bdev == bdev) {
 			if (!grab_super(sb))
-				goto restart;
+				return NULL;
 			super_unlock_excl(sb);
 			return sb;
 		}
@@ -862,22 +964,21 @@ struct super_block *user_get_super(dev_t dev, bool excl)
 	struct super_block *sb;
 
 	spin_lock(&sb_lock);
-rescan:
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (hlist_unhashed(&sb->s_instances))
-			continue;
 		if (sb->s_dev ==  dev) {
+			bool born;
+
 			sb->s_count++;
 			spin_unlock(&sb_lock);
-			super_lock(sb, excl);
 			/* still alive? */
-			if (sb->s_root && (sb->s_flags & SB_BORN))
+			born = super_lock(sb, excl);
+			if (born && sb->s_root)
 				return sb;
 			super_unlock(sb, excl);
 			/* nope, got unmounted */
 			spin_lock(&sb_lock);
 			__put_super(sb);
-			goto rescan;
+			break;
 		}
 	}
 	spin_unlock(&sb_lock);
@@ -921,7 +1022,7 @@ int reconfigure_super(struct fs_context *fc)
 		if (!hlist_empty(&sb->s_pins)) {
 			super_unlock_excl(sb);
 			group_pin_kill(&sb->s_pins);
-			super_lock_excl(sb);
+			__super_lock_excl(sb);
 			if (!sb->s_root)
 				return 0;
 			if (sb->s_writers.frozen != SB_UNFROZEN)
@@ -984,9 +1085,9 @@ cancel_readonly:
 
 static void do_emergency_remount_callback(struct super_block *sb)
 {
-	super_lock_excl(sb);
-	if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
-	    !sb_rdonly(sb)) {
+	bool born = super_lock_excl(sb);
+
+	if (born && sb->s_root && sb->s_bdev && !sb_rdonly(sb)) {
 		struct fs_context *fc;
 
 		fc = fs_context_for_reconfigure(sb->s_root,
@@ -1020,8 +1121,9 @@ void emergency_remount(void)
 
 static void do_thaw_all_callback(struct super_block *sb)
 {
-	super_lock_excl(sb);
-	if (sb->s_root && sb->s_flags & SB_BORN) {
+	bool born = super_lock_excl(sb);
+
+	if (born && sb->s_root) {
 		emergency_thaw_bdev(sb);
 		thaw_super_locked(sb);
 	} else {
@@ -1212,9 +1314,9 @@ EXPORT_SYMBOL(get_tree_keyed);
  */
 static bool super_lock_shared_active(struct super_block *sb)
 {
-	super_lock_shared(sb);
-	if (!sb->s_root ||
-	    (sb->s_flags & (SB_ACTIVE | SB_BORN)) != (SB_ACTIVE | SB_BORN)) {
+	bool born = super_lock_shared(sb);
+
+	if (!born || !sb->s_root || !(sb->s_flags & SB_ACTIVE)) {
 		super_unlock_shared(sb);
 		return false;
 	}
@@ -1374,7 +1476,7 @@ int get_tree_bdev(struct fs_context *fc,
 		 */
 		super_unlock_excl(s);
 		error = setup_bdev_super(s, fc->sb_flags, fc);
-		super_lock_excl(s);
+		__super_lock_excl(s);
 		if (!error)
 			error = fill_super(s, fc);
 		if (error) {
@@ -1426,7 +1528,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 		 */
 		super_unlock_excl(s);
 		error = setup_bdev_super(s, flags, NULL);
-		super_lock_excl(s);
+		__super_lock_excl(s);
 		if (!error)
 			error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
 		if (error) {
@@ -1566,13 +1668,13 @@ int vfs_get_tree(struct fs_context *fc)
 	WARN_ON(!sb->s_bdi);
 
 	/*
-	 * Write barrier is for super_cache_count(). We place it before setting
-	 * SB_BORN as the data dependency between the two functions is the
-	 * superblock structure contents that we just set up, not the SB_BORN
-	 * flag.
+	 * super_wake() contains a memory barrier which also care of
+	 * ordering for super_cache_count(). We place it before setting
+	 * SB_BORN as the data dependency between the two functions is
+	 * the superblock structure contents that we just set up, not
+	 * the SB_BORN flag.
 	 */
-	smp_wmb();
-	sb->s_flags |= SB_BORN;
+	super_wake(sb, SB_BORN);
 
 	error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL);
 	if (unlikely(error)) {
@@ -1715,7 +1817,7 @@ int freeze_super(struct super_block *sb)
 	int ret;
 
 	atomic_inc(&sb->s_active);
-	super_lock_excl(sb);
+	__super_lock_excl(sb);
 	if (sb->s_writers.frozen != SB_UNFROZEN) {
 		deactivate_locked_super(sb);
 		return -EBUSY;
@@ -1737,7 +1839,7 @@ int freeze_super(struct super_block *sb)
 	/* Release s_umount to preserve sb_start_write -> s_umount ordering */
 	super_unlock_excl(sb);
 	sb_wait_write(sb, SB_FREEZE_WRITE);
-	super_lock_excl(sb);
+	__super_lock_excl(sb);
 
 	/* Now we go and block page faults... */
 	sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
@@ -1820,7 +1922,7 @@ out:
  */
 int thaw_super(struct super_block *sb)
 {
-	super_lock_excl(sb);
+	__super_lock_excl(sb);
 	return thaw_super_locked(sb);
 }
 EXPORT_SYMBOL(thaw_super);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 14b5777a24a0..173672645156 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1095,6 +1095,7 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_LAZYTIME     BIT(25)	/* Update the on-disk [acm]times lazily */
 
 /* These sb flags are internal to the kernel */
+#define SB_DYING        BIT(24)
 #define SB_SUBMOUNT     BIT(26)
 #define SB_FORCE        BIT(27)
 #define SB_NOSEC        BIT(28)

From 2c18a63b760a0f68f14cb8bb4c3840bb0b63b73e Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 18 Aug 2023 16:00:51 +0200
Subject: [PATCH 1655/2047] super: wait until we passed kill super

Recent rework moved block device closing out of sb->put_super() and into
sb->kill_sb() to avoid deadlocks as s_umount is held in put_super() and
blkdev_put() can end up taking s_umount again.

That means we need to move the removal of the superblock from @fs_supers
out of generic_shutdown_super() and into deactivate_locked_super() to
ensure that concurrent mounters don't fail to open block devices that
are still in use because blkdev_put() in sb->kill_sb() hasn't been
called yet.

We can now do this as we can make iterators through @fs_super and
@super_blocks wait without holding s_umount. Concurrent mounts will wait
until a dying superblock is fully dead so until sb->kill_sb() has been
called and SB_DEAD been set. Concurrent iterators can already discard
any SB_DYING superblock.

Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <20230818-vfs-super-fixes-v3-v3-4-9f0b1876e46b@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/super.c         | 71 +++++++++++++++++++++++++++++++++++++++++-----
 include/linux/fs.h |  1 +
 2 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index e2630fe4928a..2f604a6494fb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -153,7 +153,7 @@ static inline bool super_lock_excl(struct super_block *sb)
 }
 
 /* wake waiters */
-#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING)
+#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD)
 static void super_wake(struct super_block *sb, unsigned int flag)
 {
 	WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS));
@@ -461,6 +461,25 @@ void deactivate_locked_super(struct super_block *s)
 		list_lru_destroy(&s->s_dentry_lru);
 		list_lru_destroy(&s->s_inode_lru);
 
+		/*
+		 * Remove it from @fs_supers so it isn't found by new
+		 * sget{_fc}() walkers anymore. Any concurrent mounter still
+		 * managing to grab a temporary reference is guaranteed to
+		 * already see SB_DYING and will wait until we notify them about
+		 * SB_DEAD.
+		 */
+		spin_lock(&sb_lock);
+		hlist_del_init(&s->s_instances);
+		spin_unlock(&sb_lock);
+
+		/*
+		 * Let concurrent mounts know that this thing is really dead.
+		 * We don't need @sb->s_umount here as every concurrent caller
+		 * will see SB_DYING and either discard the superblock or wait
+		 * for SB_DEAD.
+		 */
+		super_wake(s, SB_DEAD);
+
 		put_filesystem(fs);
 		put_super(s);
 	} else {
@@ -517,6 +536,45 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
 	return 0;
 }
 
+static inline bool wait_dead(struct super_block *sb)
+{
+	unsigned int flags;
+
+	/*
+	 * Pairs with memory barrier in super_wake() and ensures
+	 * that we see SB_DEAD after we're woken.
+	 */
+	flags = smp_load_acquire(&sb->s_flags);
+	return flags & SB_DEAD;
+}
+
+/**
+ * grab_super_dead - acquire an active reference to a superblock
+ * @sb: superblock to acquire
+ *
+ * Acquire a temporary reference on a superblock and try to trade it for
+ * an active reference. This is used in sget{_fc}() to wait for a
+ * superblock to either become SB_BORN or for it to pass through
+ * sb->kill() and be marked as SB_DEAD.
+ *
+ * Return: This returns true if an active reference could be acquired,
+ *         false if not.
+ */
+static bool grab_super_dead(struct super_block *sb)
+{
+
+	sb->s_count++;
+	if (grab_super(sb)) {
+		put_super(sb);
+		lockdep_assert_held(&sb->s_umount);
+		return true;
+	}
+	wait_var_event(&sb->s_flags, wait_dead(sb));
+	put_super(sb);
+	lockdep_assert_not_held(&sb->s_umount);
+	return false;
+}
+
 /*
  *	super_trylock_shared - try to grab ->s_umount shared
  *	@sb: reference we are trying to grab
@@ -643,15 +701,14 @@ void generic_shutdown_super(struct super_block *sb)
 			spin_unlock(&sb->s_inode_list_lock);
 		}
 	}
-	spin_lock(&sb_lock);
-	/* should be initialized for __put_super_and_need_restart() */
-	hlist_del_init(&sb->s_instances);
-	spin_unlock(&sb_lock);
 	/*
 	 * Broadcast to everyone that grabbed a temporary reference to this
 	 * superblock before we removed it from @fs_supers that the superblock
 	 * is dying. Every walker of @fs_supers outside of sget{_fc}() will now
 	 * discard this superblock and treat it as dead.
+	 *
+	 * We leave the superblock on @fs_supers so it can be found by
+	 * sget{_fc}() until we passed sb->kill_sb().
 	 */
 	super_wake(sb, SB_DYING);
 	super_unlock_excl(sb);
@@ -746,7 +803,7 @@ share_extant_sb:
 		destroy_unused_super(s);
 		return ERR_PTR(-EBUSY);
 	}
-	if (!grab_super(old))
+	if (!grab_super_dead(old))
 		goto retry;
 	destroy_unused_super(s);
 	return old;
@@ -790,7 +847,7 @@ retry:
 				destroy_unused_super(s);
 				return ERR_PTR(-EBUSY);
 			}
-			if (!grab_super(old))
+			if (!grab_super_dead(old))
 				goto retry;
 			destroy_unused_super(s);
 			return old;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 173672645156..a63da68305e9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1095,6 +1095,7 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_LAZYTIME     BIT(25)	/* Update the on-disk [acm]times lazily */
 
 /* These sb flags are internal to the kernel */
+#define SB_DEAD         BIT(21)
 #define SB_DYING        BIT(24)
 #define SB_SUBMOUNT     BIT(26)
 #define SB_FORCE        BIT(27)

From 2e99b73afde18853754c5fae8e8d1a66fe5e3f64 Mon Sep 17 00:00:00 2001
From: Bibo Mao <maobibo@loongson.cn>
Date: Fri, 11 Aug 2023 17:58:04 +0800
Subject: [PATCH 1656/2047] irqchip/loongson-eiointc: Fix return value checking
 of eiointc_index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Return value of function eiointc_index is int, however it is converted
into uint32_t and then compared smaller than zero, this will cause logic
problem.

Fixes: dd281e1a1a93 ("irqchip: Add Loongson Extended I/O interrupt controller support")
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230811095805.2974722-2-maobibo@loongson.cn
---
 drivers/irqchip/irq-loongson-eiointc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c
index 92d8aa28bdf5..1623cd779175 100644
--- a/drivers/irqchip/irq-loongson-eiointc.c
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -144,7 +144,7 @@ static int eiointc_router_init(unsigned int cpu)
 	int i, bit;
 	uint32_t data;
 	uint32_t node = cpu_to_eio_node(cpu);
-	uint32_t index = eiointc_index(node);
+	int index = eiointc_index(node);
 
 	if (index < 0) {
 		pr_err("Error: invalid nodemap!\n");

From 9bdd26b6477f798e292d0c196b9a8e5b77ce5e1d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 10 Aug 2023 14:33:54 +0200
Subject: [PATCH 1657/2047] irqchip/xtensa-pic: Include header for
 xtensa_pic_init_legacy()

The declaration for this function is not included, which leads to a harmless warning:

drivers/irqchip/irq-xtensa-pic.c:91:12: error: no previous prototype for 'xtensa_pic_init_legacy' [-Werror=missing-prototypes]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230810123404.1222332-1-arnd@kernel.org
---
 drivers/irqchip/irq-xtensa-pic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/irq-xtensa-pic.c b/drivers/irqchip/irq-xtensa-pic.c
index ab12328be5ee..0c18d1f1e264 100644
--- a/drivers/irqchip/irq-xtensa-pic.c
+++ b/drivers/irqchip/irq-xtensa-pic.c
@@ -16,6 +16,7 @@
 #include <linux/irqdomain.h>
 #include <linux/irq.h>
 #include <linux/irqchip.h>
+#include <linux/irqchip/xtensa-pic.h>
 #include <linux/of.h>
 
 unsigned int cached_irq_mask;

From 90e921d768677172cbb4be1d7597d556ae018020 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 10 Aug 2023 14:33:55 +0200
Subject: [PATCH 1658/2047] irqchip/mips-gic: Mark gic_irq_domain_free() static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This function is only used locally and should be static to avoid a warning:

drivers/irqchip/irq-mips-gic.c:560:6: error: no previous prototype for 'gic_irq_domain_free' [-Werror=missing-prototypes]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230810123404.1222332-2-arnd@kernel.org
---
 drivers/irqchip/irq-mips-gic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 6d5ecc10a870..76253e864f23 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -557,7 +557,7 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
 	return gic_irq_domain_map(d, virq, hwirq);
 }
 
-void gic_irq_domain_free(struct irq_domain *d, unsigned int virq,
+static void gic_irq_domain_free(struct irq_domain *d, unsigned int virq,
 			 unsigned int nr_irqs)
 {
 }

From 424a4aec7a2392d248e2f260ba571a01e24b8ec2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 10 Aug 2023 14:33:56 +0200
Subject: [PATCH 1659/2047] irqchipr/i8259: Mark i8259_of_init() static

i8259_of_init() is only used as an initcall and does not need to be global,
so mark it static to avoid:

drivers/irqchip/irq-i8259.c:343:12: warning: no previous prototype for 'i8259_of_init' [-Wmissing-prototypes]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230810123404.1222332-3-arnd@kernel.org
---
 drivers/irqchip/irq-i8259.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-i8259.c b/drivers/irqchip/irq-i8259.c
index b70ce0d3c092..115bdcffab24 100644
--- a/drivers/irqchip/irq-i8259.c
+++ b/drivers/irqchip/irq-i8259.c
@@ -340,7 +340,7 @@ static void i8259_irq_dispatch(struct irq_desc *desc)
 	generic_handle_domain_irq(domain, hwirq);
 }
 
-int __init i8259_of_init(struct device_node *node, struct device_node *parent)
+static int __init i8259_of_init(struct device_node *node, struct device_node *parent)
 {
 	struct irq_domain *domain;
 	unsigned int parent_irq;

From 2f54db449b6185024ef884fd3083802e1eae6921 Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Thu, 3 Aug 2023 16:34:51 +0800
Subject: [PATCH 1660/2047] irqchip/imx-mu-msi: Do not check for 0 return after
 calling platform_get_irq()

It is not possible for platform_get_irq() to return 0. Use the
return value from platform_get_irq().

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230803083452.4085650-2-ruanjinjie@huawei.com
---
 drivers/irqchip/irq-imx-mu-msi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-imx-mu-msi.c b/drivers/irqchip/irq-imx-mu-msi.c
index 229039eda1b1..90d41c1407ac 100644
--- a/drivers/irqchip/irq-imx-mu-msi.c
+++ b/drivers/irqchip/irq-imx-mu-msi.c
@@ -339,8 +339,8 @@ static int __init imx_mu_of_init(struct device_node *dn,
 	msi_data->msiir_addr = res->start + msi_data->cfg->xTR;
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq <= 0)
-		return -ENODEV;
+	if (irq < 0)
+		return irq;
 
 	platform_set_drvdata(pdev, msi_data);
 

From d0afed833970808f141ed52fcc591998087f3a26 Mon Sep 17 00:00:00 2001
From: Ruan Jinjie <ruanjinjie@huawei.com>
Date: Thu, 3 Aug 2023 16:34:52 +0800
Subject: [PATCH 1661/2047] irqchip/irq-pruss-intc: Do not check for 0 return
 after calling platform_get_irq()

It is not possible for platform_get_irq() to return 0. Use the
return value from platform_get_irq().

Signed-off-by: Ruan Jinjie <ruanjinjie@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230803083452.4085650-3-ruanjinjie@huawei.com
---
 drivers/irqchip/irq-pruss-intc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-pruss-intc.c b/drivers/irqchip/irq-pruss-intc.c
index fa8d89b02ec0..ee69e8d83b9a 100644
--- a/drivers/irqchip/irq-pruss-intc.c
+++ b/drivers/irqchip/irq-pruss-intc.c
@@ -565,8 +565,8 @@ static int pruss_intc_probe(struct platform_device *pdev)
 			continue;
 
 		irq = platform_get_irq_byname(pdev, irq_names[i]);
-		if (irq <= 0) {
-			ret = (irq == 0) ? -EINVAL : irq;
+		if (irq < 0) {
+			ret = irq;
 			goto fail_irq;
 		}
 

From 70befedaf5b020a08d397e2d6a9c306f0f611cfb Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 26 Jul 2023 11:07:41 +0800
Subject: [PATCH 1662/2047] irqchip/orion: Use of_address_count() helper

After commit 16988c742968 ("of/address: introduce of_address_count() helper"),
Use of_address_count() to instead of open-coding it, it's no functional change.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230726030741.1136244-1-yangyingliang@huawei.com
---
 drivers/irqchip/irq-orion.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-orion.c b/drivers/irqchip/irq-orion.c
index 17c2c7a07f10..4e4e874e09a8 100644
--- a/drivers/irqchip/irq-orion.c
+++ b/drivers/irqchip/irq-orion.c
@@ -57,8 +57,7 @@ static int __init orion_irq_init(struct device_node *np,
 	struct resource r;
 
 	/* count number of irq chips by valid reg addresses */
-	while (of_address_to_resource(np, num_chips, &r) == 0)
-		num_chips++;
+	num_chips = of_address_count(np);
 
 	orion_irq_domain = irq_domain_add_linear(np,
 				num_chips * ORION_IRQS_PER_CHIP,

From ee07675046389046d501319b08158b20a8902a5e Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 14 Jul 2023 11:46:43 -0600
Subject: [PATCH 1663/2047] irqchip: Explicitly include correct DT includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230714174645.4058547-1-robh@kernel.org
---
 drivers/irqchip/irq-bcm6345-l1.c            | 1 -
 drivers/irqchip/irq-bcm7038-l1.c            | 1 -
 drivers/irqchip/irq-brcmstb-l2.c            | 1 -
 drivers/irqchip/irq-gic-pm.c                | 2 +-
 drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c | 2 --
 drivers/irqchip/irq-imx-intmux.c            | 3 ++-
 drivers/irqchip/irq-imx-irqsteer.c          | 3 ++-
 drivers/irqchip/irq-keystone.c              | 2 +-
 drivers/irqchip/irq-loongson-htvec.c        | 1 -
 drivers/irqchip/irq-loongson-pch-pic.c      | 2 +-
 drivers/irqchip/irq-madera.c                | 4 +---
 drivers/irqchip/irq-pruss-intc.c            | 2 +-
 drivers/irqchip/irq-qcom-mpm.c              | 2 +-
 drivers/irqchip/irq-renesas-intc-irqpin.c   | 1 -
 drivers/irqchip/irq-st.c                    | 2 +-
 drivers/irqchip/irq-stm32-exti.c            | 3 ++-
 drivers/irqchip/irq-sunxi-nmi.c             | 1 -
 drivers/irqchip/irq-tb10x.c                 | 1 -
 drivers/irqchip/irq-ti-sci-inta.c           | 4 ++--
 drivers/irqchip/irq-ti-sci-intr.c           | 4 ++--
 drivers/irqchip/irq-uniphier-aidet.c        | 1 -
 drivers/irqchip/irqchip.c                   | 2 +-
 drivers/irqchip/qcom-pdc.c                  | 1 -
 23 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c
index fa113cb2529a..b521897277a8 100644
--- a/drivers/irqchip/irq-bcm6345-l1.c
+++ b/drivers/irqchip/irq-bcm6345-l1.c
@@ -60,7 +60,6 @@
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c
index a62b96237b82..24ca1d656adc 100644
--- a/drivers/irqchip/irq-bcm7038-l1.c
+++ b/drivers/irqchip/irq-bcm7038-l1.c
@@ -20,7 +20,6 @@
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index 091b0fe7e324..5559c943f03f 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -15,7 +15,6 @@
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/io.h>
diff --git a/drivers/irqchip/irq-gic-pm.c b/drivers/irqchip/irq-gic-pm.c
index 3989d16f997b..a275a8071a25 100644
--- a/drivers/irqchip/irq-gic-pm.c
+++ b/drivers/irqchip/irq-gic-pm.c
@@ -4,7 +4,7 @@
  */
 #include <linux/module.h>
 #include <linux/clk.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/irqchip/arm-gic.h>
 #include <linux/platform_device.h>
diff --git a/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c b/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c
index 634263dfd7b5..8e87fc35f8aa 100644
--- a/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c
@@ -9,8 +9,6 @@
 
 #include <linux/acpi.h>
 #include <linux/acpi_iort.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
 #include <linux/irq.h>
 #include <linux/msi.h>
 #include <linux/of.h>
diff --git a/drivers/irqchip/irq-imx-intmux.c b/drivers/irqchip/irq-imx-intmux.c
index 80aaea82468a..6d9a08238c9d 100644
--- a/drivers/irqchip/irq-imx-intmux.c
+++ b/drivers/irqchip/irq-imx-intmux.c
@@ -50,8 +50,9 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/pm_runtime.h>
 
diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c
index 96230a04ec23..bd9543314539 100644
--- a/drivers/irqchip/irq-imx-irqsteer.c
+++ b/drivers/irqchip/irq-imx-irqsteer.c
@@ -10,8 +10,9 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
 #include <linux/kernel.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/spinlock.h>
 
diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c
index ba9792e60329..a36396db4b08 100644
--- a/drivers/irqchip/irq-keystone.c
+++ b/drivers/irqchip/irq-keystone.c
@@ -15,7 +15,7 @@
 #include <linux/irqdomain.h>
 #include <linux/irqchip.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 
diff --git a/drivers/irqchip/irq-loongson-htvec.c b/drivers/irqchip/irq-loongson-htvec.c
index fc8bf1f5d41b..0bff728b25e3 100644
--- a/drivers/irqchip/irq-loongson-htvec.c
+++ b/drivers/irqchip/irq-loongson-htvec.c
@@ -15,7 +15,6 @@
 #include <linux/platform_device.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/syscore_ops.h>
 
 /* Registers */
diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c
index 93a71f66efeb..63db8e2172e0 100644
--- a/drivers/irqchip/irq-loongson-pch-pic.c
+++ b/drivers/irqchip/irq-loongson-pch-pic.c
@@ -12,9 +12,9 @@
 #include <linux/irqdomain.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/syscore_ops.h>
 
 /* Registers */
diff --git a/drivers/irqchip/irq-madera.c b/drivers/irqchip/irq-madera.c
index 8b81271c823c..3eb1f8cdf674 100644
--- a/drivers/irqchip/irq-madera.c
+++ b/drivers/irqchip/irq-madera.c
@@ -10,12 +10,10 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/irqchip/irq-madera.h>
 #include <linux/mfd/madera/core.h>
 #include <linux/mfd/madera/pdata.h>
diff --git a/drivers/irqchip/irq-pruss-intc.c b/drivers/irqchip/irq-pruss-intc.c
index ee69e8d83b9a..0f64ecb9b1f4 100644
--- a/drivers/irqchip/irq-pruss-intc.c
+++ b/drivers/irqchip/irq-pruss-intc.c
@@ -17,7 +17,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 
 /*
diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c
index d30614661eea..7124565234a5 100644
--- a/drivers/irqchip/irq-qcom-mpm.c
+++ b/drivers/irqchip/irq-qcom-mpm.c
@@ -14,7 +14,7 @@
 #include <linux/mailbox_client.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/slab.h>
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index 26e4c17a7bf2..fa19585f3dee 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -17,7 +17,6 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/pm_runtime.h>
 
 #define INTC_IRQPIN_MAX 8 /* maximum 8 interrupts per driver instance */
diff --git a/drivers/irqchip/irq-st.c b/drivers/irqchip/irq-st.c
index 819a12297b58..de71bb350d57 100644
--- a/drivers/irqchip/irq-st.c
+++ b/drivers/irqchip/irq-st.c
@@ -10,7 +10,7 @@
 #include <dt-bindings/interrupt-controller/irq-st.h>
 #include <linux/err.h>
 #include <linux/mfd/syscon.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index b5fa76ce5046..d8ba5fba7450 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -14,10 +14,11 @@
 #include <linux/irqchip.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/syscore_ops.h>
 
 #include <dt-bindings/interrupt-controller/arm-gic.h>
diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 21d49791f855..e760b1278143 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -19,7 +19,6 @@
 #include <linux/irqdomain.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/irqchip.h>
 #include <linux/irqchip/chained_irq.h>
 
diff --git a/drivers/irqchip/irq-tb10x.c b/drivers/irqchip/irq-tb10x.c
index 8a0e69298e83..680586354d12 100644
--- a/drivers/irqchip/irq-tb10x.c
+++ b/drivers/irqchip/irq-tb10x.c
@@ -13,7 +13,6 @@
 #include <linux/irqchip.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/bitops.h>
diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c
index 7133f9fa6fd9..b83f5cbab123 100644
--- a/drivers/irqchip/irq-ti-sci-inta.c
+++ b/drivers/irqchip/irq-ti-sci-inta.c
@@ -15,9 +15,9 @@
 #include <linux/msi.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/soc/ti/ti_sci_inta_msi.h>
 #include <linux/soc/ti/ti_sci_protocol.h>
diff --git a/drivers/irqchip/irq-ti-sci-intr.c b/drivers/irqchip/irq-ti-sci-intr.c
index 1186f1e431a3..c027cd9e4a69 100644
--- a/drivers/irqchip/irq-ti-sci-intr.c
+++ b/drivers/irqchip/irq-ti-sci-intr.c
@@ -12,9 +12,9 @@
 #include <linux/io.h>
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
-#include <linux/of_platform.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
 #include <linux/soc/ti/ti_sci_protocol.h>
 
 /**
diff --git a/drivers/irqchip/irq-uniphier-aidet.c b/drivers/irqchip/irq-uniphier-aidet.c
index 716b1bb88bf2..601f9343d5b3 100644
--- a/drivers/irqchip/irq-uniphier-aidet.c
+++ b/drivers/irqchip/irq-uniphier-aidet.c
@@ -12,7 +12,6 @@
 #include <linux/irqdomain.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c
index 7899607fbee8..1eeb0d0156ce 100644
--- a/drivers/irqchip/irqchip.c
+++ b/drivers/irqchip/irqchip.c
@@ -10,7 +10,7 @@
 
 #include <linux/acpi.h>
 #include <linux/init.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/irqchip.h>
 #include <linux/platform_device.h>
diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index d96916cf6a41..a32c0d28d038 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/soc/qcom/irq.h>
 #include <linux/spinlock.h>

From 32b5f8a6f5de384291ea4083541ad10725c7a054 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 18:02:52 +0800
Subject: [PATCH 1664/2047] irqchip/ls-scfg-msi: Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230704100253.36045-1-frank.li@vivo.com
---
 drivers/irqchip/irq-ls-scfg-msi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c
index f5ba3f9f8415..f31a262fe438 100644
--- a/drivers/irqchip/irq-ls-scfg-msi.c
+++ b/drivers/irqchip/irq-ls-scfg-msi.c
@@ -349,8 +349,7 @@ static int ls_scfg_msi_probe(struct platform_device *pdev)
 
 	msi_data->cfg = (struct ls_scfg_msi_cfg *) match->data;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	msi_data->regs = devm_ioremap_resource(&pdev->dev, res);
+	msi_data->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(msi_data->regs)) {
 		dev_err(&pdev->dev, "failed to initialize 'regs'\n");
 		return PTR_ERR(msi_data->regs);

From 69da32b5438a9f3dc5b26eee0a94372b232a603c Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 4 Jul 2023 18:02:53 +0800
Subject: [PATCH 1665/2047] irqchip/irq-mvebu-sei: Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230704100253.36045-2-frank.li@vivo.com
---
 drivers/irqchip/irq-mvebu-sei.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
index 4ecef6d83777..a48dbe91b036 100644
--- a/drivers/irqchip/irq-mvebu-sei.c
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -377,8 +377,7 @@ static int mvebu_sei_probe(struct platform_device *pdev)
 	mutex_init(&sei->cp_msi_lock);
 	raw_spin_lock_init(&sei->mask_lock);
 
-	sei->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	sei->base = devm_ioremap_resource(sei->dev, sei->res);
+	sei->base = devm_platform_get_and_ioremap_resource(pdev, 0, &sei->res);
 	if (IS_ERR(sei->base))
 		return PTR_ERR(sei->base);
 

From 2b709c6760ecf6d953ce15b2301ae0c24793cbe1 Mon Sep 17 00:00:00 2001
From: Huqiang Qin <huqiang.qin@amlogic.com>
Date: Wed, 28 Jun 2023 17:15:32 +0800
Subject: [PATCH 1666/2047] dt-bindings: interrupt-controller: Add support for
 Amlogic-C3 SoCs

Update dt-binding document for GPIO interrupt controller of Amlogic-C3 SoCs

Signed-off-by: Huqiang Qin <huqiang.qin@amlogic.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230628091533.3884385-2-huqiang.qin@amlogic.com
---
 .../bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml
index e84e4f33b358..3d06db98e978 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml
@@ -35,6 +35,7 @@ properties:
               - amlogic,meson-sm1-gpio-intc
               - amlogic,meson-a1-gpio-intc
               - amlogic,meson-s4-gpio-intc
+              - amlogic,c3-gpio-intc
           - const: amlogic,meson-gpio-intc
 
   reg:

From 19b5a44bee16518104e8a159ab9a60788292fbd4 Mon Sep 17 00:00:00 2001
From: Huqiang Qin <huqiang.qin@amlogic.com>
Date: Wed, 28 Jun 2023 17:15:33 +0800
Subject: [PATCH 1667/2047] irqchip: Add support for Amlogic-C3 SoCs

The Amlogic-C3 SoCs support 12 GPIO IRQ lines compared with previous
serial chips and have something different, details are as below.

IRQ Number:
- 54     1 pins on bank TESTN
- 53:40 14 pins on bank X
- 39:33  7 pins on bank D
- 32:27  6 pins on bank A
- 26:22  5 pins on bank E
- 21:15  7 pins on bank C
- 14:0  15 pins on bank B

Signed-off-by: Huqiang Qin <huqiang.qin@amlogic.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230628091533.3884385-3-huqiang.qin@amlogic.com
---
 drivers/irqchip/irq-meson-gpio.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index 7da18ef95211..f88df39f4129 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -150,6 +150,10 @@ static const struct meson_gpio_irq_params s4_params = {
 	INIT_MESON_S4_COMMON_DATA(82)
 };
 
+static const struct meson_gpio_irq_params c3_params = {
+	INIT_MESON_S4_COMMON_DATA(55)
+};
+
 static const struct of_device_id meson_irq_gpio_matches[] __maybe_unused = {
 	{ .compatible = "amlogic,meson8-gpio-intc", .data = &meson8_params },
 	{ .compatible = "amlogic,meson8b-gpio-intc", .data = &meson8b_params },
@@ -160,6 +164,7 @@ static const struct of_device_id meson_irq_gpio_matches[] __maybe_unused = {
 	{ .compatible = "amlogic,meson-sm1-gpio-intc", .data = &sm1_params },
 	{ .compatible = "amlogic,meson-a1-gpio-intc", .data = &a1_params },
 	{ .compatible = "amlogic,meson-s4-gpio-intc", .data = &s4_params },
+	{ .compatible = "amlogic,c3-gpio-intc", .data = &c3_params },
 	{ }
 };
 

From 3ac9b733723e4a09ad9125ceb51898d904cd5169 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 18 Aug 2023 14:40:01 -0500
Subject: [PATCH 1668/2047] ACPI: Adjust #ifdef for *_lps0_dev use

The `#ifdef` for acpi_register_lps0_dev() currently is guarded against
`CONFIG_X86`, but actually the functions contained in the block are
specifically sleep related functions.
Adjust the guard to also check for `CONFIG_SUSPEND`.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 641dc4843987..6b4966e5d7b1 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1100,7 +1100,7 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state,
 
 acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state,
 					   u32 val_a, u32 val_b);
-#ifdef CONFIG_X86
+#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86)
 struct acpi_s2idle_dev_ops {
 	struct list_head list_node;
 	void (*prepare)(void);
@@ -1109,7 +1109,7 @@ struct acpi_s2idle_dev_ops {
 };
 int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg);
 void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg);
-#endif /* CONFIG_X86 */
+#endif /* CONFIG_SUSPEND && CONFIG_X86 */
 #ifndef CONFIG_IA64
 void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
 #else

From 3c6b1212d20bbbffcad5709ab0f2d5ed9b5859a8 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 18 Aug 2023 14:40:02 -0500
Subject: [PATCH 1669/2047] ACPI: x86: s2idle: Post-increment variables when
 getting constraints

When code uses a pre-increment it makes the reader question "why".
In the constraint fetching code there is no reason for the variables
to be pre-incremented so adjust to post-increment.
No intended functional changes.

Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Suggested-by: Bjorn Helgaas <helgaas@kernel.org>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index ce62e61a9605..7711dde68947 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -123,13 +123,13 @@ static void lpi_device_get_constraints_amd(void)
 			acpi_handle_debug(lps0_device_handle,
 					  "LPI: constraints list begin:\n");
 
-			for (j = 0; j < package->package.count; ++j) {
+			for (j = 0; j < package->package.count; j++) {
 				union acpi_object *info_obj = &package->package.elements[j];
 				struct lpi_device_constraint_amd dev_info = {};
 				struct lpi_constraints *list;
 				acpi_status status;
 
-				for (k = 0; k < info_obj->package.count; ++k) {
+				for (k = 0; k < info_obj->package.count; k++) {
 					union acpi_object *obj = &info_obj->package.elements[k];
 
 					list = &lpi_constraints_table[lpi_constraints_table_size];
@@ -214,7 +214,7 @@ static void lpi_device_get_constraints(void)
 		if (!package)
 			continue;
 
-		for (j = 0; j < package->package.count; ++j) {
+		for (j = 0; j < package->package.count; j++) {
 			union acpi_object *element =
 					&(package->package.elements[j]);
 
@@ -246,7 +246,7 @@ static void lpi_device_get_constraints(void)
 
 		constraint->min_dstate = -1;
 
-		for (j = 0; j < package_count; ++j) {
+		for (j = 0; j < package_count; j++) {
 			union acpi_object *info_obj = &info.package[j];
 			union acpi_object *cnstr_pkg;
 			union acpi_object *obj;

From 883cf0d4cf288313b71146ddebdf5d647b76c78b Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 18 Aug 2023 14:40:03 -0500
Subject: [PATCH 1670/2047] ACPI: x86: s2idle: Catch multiple ACPI_TYPE_PACKAGE
 objects

If a badly constructed firmware includes multiple `ACPI_TYPE_PACKAGE`
objects while evaluating the AMD LPS0 _DSM, there will be a memory
leak.  Explicitly guard against this.

Suggested-by: Bjorn Helgaas <helgaas@kernel.org>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 7711dde68947..508decbac298 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -113,6 +113,12 @@ static void lpi_device_get_constraints_amd(void)
 		union acpi_object *package = &out_obj->package.elements[i];
 
 		if (package->type == ACPI_TYPE_PACKAGE) {
+			if (lpi_constraints_table) {
+				acpi_handle_err(lps0_device_handle,
+						"Duplicate constraints list\n");
+				goto free_acpi_buffer;
+			}
+
 			lpi_constraints_table = kcalloc(package->package.count,
 							sizeof(*lpi_constraints_table),
 							GFP_KERNEL);

From 9cc8cd086f05d9a01026c65c98da88561e9c619e Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 18 Aug 2023 14:40:04 -0500
Subject: [PATCH 1671/2047] ACPI: x86: s2idle: Fix a logic error parsing AMD
 constraints table

The constraints table should be resetting the `list` object
after running through all of `info_obj` iterations.

This adjusts whitespace as well as less code will now be included
with each loop. This fixes a functional problem is fixed where a
badly formed package in the inner loop may have incorrect data.

Fixes: 146f1ed852a8 ("ACPI: PM: s2idle: Add AMD support to handle _DSM")
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 39 ++++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 508decbac298..60835953ebfc 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -135,12 +135,11 @@ static void lpi_device_get_constraints_amd(void)
 				struct lpi_constraints *list;
 				acpi_status status;
 
+				list = &lpi_constraints_table[lpi_constraints_table_size];
+
 				for (k = 0; k < info_obj->package.count; k++) {
 					union acpi_object *obj = &info_obj->package.elements[k];
 
-					list = &lpi_constraints_table[lpi_constraints_table_size];
-					list->min_dstate = -1;
-
 					switch (k) {
 					case 0:
 						dev_info.enabled = obj->integer.value;
@@ -155,27 +154,21 @@ static void lpi_device_get_constraints_amd(void)
 						dev_info.min_dstate = obj->integer.value;
 						break;
 					}
-
-					if (!dev_info.enabled || !dev_info.name ||
-					    !dev_info.min_dstate)
-						continue;
-
-					status = acpi_get_handle(NULL, dev_info.name,
-								 &list->handle);
-					if (ACPI_FAILURE(status))
-						continue;
-
-					acpi_handle_debug(lps0_device_handle,
-							  "Name:%s\n", dev_info.name);
-
-					list->min_dstate = dev_info.min_dstate;
-
-					if (list->min_dstate < 0) {
-						acpi_handle_debug(lps0_device_handle,
-								  "Incomplete constraint defined\n");
-						continue;
-					}
 				}
+
+				if (!dev_info.enabled || !dev_info.name ||
+				    !dev_info.min_dstate)
+					continue;
+
+				status = acpi_get_handle(NULL, dev_info.name, &list->handle);
+				if (ACPI_FAILURE(status))
+					continue;
+
+				acpi_handle_debug(lps0_device_handle,
+						  "Name:%s\n", dev_info.name);
+
+				list->min_dstate = dev_info.min_dstate;
+
 				lpi_constraints_table_size++;
 			}
 		}

From a879058d01e2f6e324cc98ff2ffbe4f574c100a6 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 18 Aug 2023 14:40:05 -0500
Subject: [PATCH 1672/2047] ACPI: x86: s2idle: Add more debugging for AMD
 constraints parsing

While parsing the constraints show all the entries for the table
to aid with debugging other problems later.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 60835953ebfc..87563337a478 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -156,6 +156,13 @@ static void lpi_device_get_constraints_amd(void)
 					}
 				}
 
+				acpi_handle_debug(lps0_device_handle,
+						  "Name:%s, Enabled: %d, States: %d, MinDstate: %d\n",
+						  dev_info.name,
+						  dev_info.enabled,
+						  dev_info.function_states,
+						  dev_info.min_dstate);
+
 				if (!dev_info.enabled || !dev_info.name ||
 				    !dev_info.min_dstate)
 					continue;
@@ -164,9 +171,6 @@ static void lpi_device_get_constraints_amd(void)
 				if (ACPI_FAILURE(status))
 					continue;
 
-				acpi_handle_debug(lps0_device_handle,
-						  "Name:%s\n", dev_info.name);
-
 				list->min_dstate = dev_info.min_dstate;
 
 				lpi_constraints_table_size++;

From 41233988112f0f5fb015172a8db68a3c052aedda Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Aug 2023 14:40:06 -0500
Subject: [PATCH 1673/2047] ACPI: x86: s2idle: Add for_each_lpi_constraint()
 helper

We have one existing and one coming user of this macro.
Introduce a helper.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 87563337a478..1aa3cd5677bd 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -94,6 +94,11 @@ static struct lpi_constraints *lpi_constraints_table;
 static int lpi_constraints_table_size;
 static int rev_id;
 
+#define for_each_lpi_constraint(entry)						\
+	for (int i = 0;								\
+	     entry = &lpi_constraints_table[i], i < lpi_constraints_table_size;	\
+	     i++)
+
 static void lpi_device_get_constraints_amd(void)
 {
 	union acpi_object *out_obj;
@@ -296,30 +301,29 @@ free_acpi_buffer:
 
 static void lpi_check_constraints(void)
 {
-	int i;
+	struct lpi_constraints *entry;
 
-	for (i = 0; i < lpi_constraints_table_size; ++i) {
-		acpi_handle handle = lpi_constraints_table[i].handle;
-		struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
+	for_each_lpi_constraint(entry) {
+		struct acpi_device *adev = acpi_fetch_acpi_dev(entry->handle);
 
 		if (!adev)
 			continue;
 
-		acpi_handle_debug(handle,
+		acpi_handle_debug(entry->handle,
 			"LPI: required min power state:%s current power state:%s\n",
-			acpi_power_state_string(lpi_constraints_table[i].min_dstate),
+			acpi_power_state_string(entry->min_dstate),
 			acpi_power_state_string(adev->power.state));
 
 		if (!adev->flags.power_manageable) {
-			acpi_handle_info(handle, "LPI: Device not power manageable\n");
-			lpi_constraints_table[i].handle = NULL;
+			acpi_handle_info(entry->handle, "LPI: Device not power manageable\n");
+			entry->handle = NULL;
 			continue;
 		}
 
-		if (adev->power.state < lpi_constraints_table[i].min_dstate)
-			acpi_handle_info(handle,
+		if (adev->power.state < entry->min_dstate)
+			acpi_handle_info(entry->handle,
 				"LPI: Constraint not met; min power state:%s current power state:%s\n",
-				acpi_power_state_string(lpi_constraints_table[i].min_dstate),
+				acpi_power_state_string(entry->min_dstate),
 				acpi_power_state_string(adev->power.state));
 	}
 }

From 1c2a66d47de36608551d73562d01bb4afcf1d61a Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 18 Aug 2023 14:40:07 -0500
Subject: [PATCH 1674/2047] ACPI: x86: s2idle: Add a function to get LPS0
 constraint for a device

Other parts of the kernel may use LPS0 constraints information to make
decisions on what power state to put a device into.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
[ rjw: Rewrite kerneldoc, rearrange if () statement, edit subject and
  changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 24 ++++++++++++++++++++++++
 include/linux/acpi.h      |  6 ++++++
 2 files changed, 30 insertions(+)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 1aa3cd5677bd..08f7c6708206 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -299,6 +299,30 @@ free_acpi_buffer:
 	ACPI_FREE(out_obj);
 }
 
+/**
+ * acpi_get_lps0_constraint - Get the LPS0 constraint for a device.
+ * @adev: Device to get the constraint for.
+ *
+ * The LPS0 constraint is the shallowest (minimum) power state in which the
+ * device can be so as to allow the platform as a whole to achieve additional
+ * energy conservation by utilizing a system-wide low-power state.
+ *
+ * Returns:
+ *  - ACPI power state value of the constraint for @adev on success.
+ *  - Otherwise, ACPI_STATE_UNKNOWN.
+ */
+int acpi_get_lps0_constraint(struct acpi_device *adev)
+{
+	struct lpi_constraints *entry;
+
+	for_each_lpi_constraint(entry) {
+		if (adev->handle == entry->handle)
+			return entry->min_dstate;
+	}
+
+	return ACPI_STATE_UNKNOWN;
+}
+
 static void lpi_check_constraints(void)
 {
 	struct lpi_constraints *entry;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6b4966e5d7b1..9e2a15dd33e4 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1109,6 +1109,12 @@ struct acpi_s2idle_dev_ops {
 };
 int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg);
 void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg);
+int acpi_get_lps0_constraint(struct acpi_device *adev);
+#else /* CONFIG_SUSPEND && CONFIG_X86 */
+static inline int acpi_get_lps0_constraint(struct device *dev)
+{
+	return ACPI_STATE_UNKNOWN;
+}
 #endif /* CONFIG_SUSPEND && CONFIG_X86 */
 #ifndef CONFIG_IA64
 void arch_reserve_mem_area(acpi_physical_address addr, size_t size);

From 2ddd3cac1fa988323684cee567356e970e6750bd Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Thu, 17 Aug 2023 21:13:32 -0700
Subject: [PATCH 1675/2047] nsproxy: Convert nsproxy.count to refcount_t

atomic_t variables are currently used to implement reference counters
with the following properties:
 - counter is initialized to 1 using atomic_set()
 - a resource is freed upon counter reaching zero
 - once counter reaches zero, its further
   increments aren't allowed
 - counter schema uses basic atomic operations
   (set, inc, inc_not_zero, dec_and_test, etc.)

Such atomic variables should be converted to a newly provided
refcount_t type and API that prevents accidental counter overflows and
underflows. This is important since overflows and underflows can lead
to use-after-free situation and be exploitable.

The variable nsproxy.count is used as pure reference counter. Convert it
to refcount_t and fix up the operations.

**Important note for maintainers:

Some functions from refcount_t API defined in refcount.h have different
memory ordering guarantees than their atomic counterparts. Please check
Documentation/core-api/refcount-vs-atomic.rst for more information.

Normally the differences should not matter since refcount_t provides
enough guarantees to satisfy the refcounting use cases, but in some
rare cases it might matter. Please double check that you don't have
some undocumented memory guarantees for this variable usage.

For the nsproxy.count it might make a difference in following places:
 - put_nsproxy() and switch_task_namespaces(): decrement in
   refcount_dec_and_test() only provides RELEASE ordering and ACQUIRE
   ordering on success vs. fully ordered atomic counterpart

Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Reviewed-by: David Windsor <dwindsor@gmail.com>
Reviewed-by: Hans Liljestrand <ishkamiel@gmail.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20230818041327.gonna.210-kees@kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/nsproxy.h | 7 +++----
 kernel/nsproxy.c        | 4 ++--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index fee881cded01..771cb0285872 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -29,7 +29,7 @@ struct fs_struct;
  * nsproxy is copied.
  */
 struct nsproxy {
-	atomic_t count;
+	refcount_t count;
 	struct uts_namespace *uts_ns;
 	struct ipc_namespace *ipc_ns;
 	struct mnt_namespace *mnt_ns;
@@ -102,14 +102,13 @@ int __init nsproxy_cache_init(void);
 
 static inline void put_nsproxy(struct nsproxy *ns)
 {
-	if (atomic_dec_and_test(&ns->count)) {
+	if (refcount_dec_and_test(&ns->count))
 		free_nsproxy(ns);
-	}
 }
 
 static inline void get_nsproxy(struct nsproxy *ns)
 {
-	atomic_inc(&ns->count);
+	refcount_inc(&ns->count);
 }
 
 #endif
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 80d9c6d77a45..15781acaac1c 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -30,7 +30,7 @@
 static struct kmem_cache *nsproxy_cachep;
 
 struct nsproxy init_nsproxy = {
-	.count			= ATOMIC_INIT(1),
+	.count			= REFCOUNT_INIT(1),
 	.uts_ns			= &init_uts_ns,
 #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
 	.ipc_ns			= &init_ipc_ns,
@@ -55,7 +55,7 @@ static inline struct nsproxy *create_nsproxy(void)
 
 	nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
 	if (nsproxy)
-		atomic_set(&nsproxy->count, 1);
+		refcount_set(&nsproxy->count, 1);
 	return nsproxy;
 }
 

From ed9571647eff3dd7a53068f83338530959c7ace4 Mon Sep 17 00:00:00 2001
From: Liao Chang <liaochang1@huawei.com>
Date: Fri, 18 Aug 2023 09:50:00 +0000
Subject: [PATCH 1676/2047] cpufreq: stats: Improve the performance of
 cpufreq_stats_create_table()

In the worst case, the freq_table of policy data is not sorted and
contains duplicate frequencies, this means that it needs to iterate
through the entire freq_table of policy to ensure each frequency is
unique in the freq_table of stats data, this has a time complexity of
O(N^2), where N is the number of frequencies in the freq_table of
policy.

However, if the policy.freq_table is already sorted and contains no
duplicate frequencies, it can reduce the time complexity of creating
stats.freq_table to O(N), the 'freq_table_sorted' field of policy data
can be used to indicate whether the policy.freq_table is sorted.

Signed-off-by: Liao Chang <liaochang1@huawei.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Dhruva Gole <d-gole@ti.com>
[ rjw: Fix typo in changelog, remove redundant parens ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_stats.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 55c7ffd37d1c..a33df3c66c88 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -243,7 +243,8 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 
 	/* Find valid-unique entries */
 	cpufreq_for_each_valid_entry(pos, policy->freq_table)
-		if (freq_table_get_index(stats, pos->frequency) == -1)
+		if (policy->freq_table_sorted != CPUFREQ_TABLE_UNSORTED ||
+		    freq_table_get_index(stats, pos->frequency) == -1)
 			stats->freq_table[i++] = pos->frequency;
 
 	stats->state_num = i;

From d51847acb018d83186e4af67bc93f9a00a8644f7 Mon Sep 17 00:00:00 2001
From: Doug Smythies <dsmythies@telus.net>
Date: Sun, 20 Aug 2023 13:46:49 -0700
Subject: [PATCH 1677/2047] cpufreq: intel_pstate: set stale CPU frequency to
 minimum

The intel_pstate CPU frequency scaling driver does not
use policy->cur and it is 0.
When the CPU frequency is outdated arch_freq_get_on_cpu()
will default to the nominal clock frequency when its call to
cpufreq_quick_getpolicy_cur returns the never updated 0.
Thus, the listed frequency might be outside of currently
set limits. Some users are complaining about the high
reported frequency, albeit stale, when their system is
idle and/or it is above the reduced maximum they have set.

This patch will maintain policy_cur for the intel_pstate
driver at the current minimum CPU frequency.

Reported-by: Yang Jie <yang.jie@linux.intel.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217597
Signed-off-by: Doug Smythies <dsmythies@telus.net>
[ rjw: White space damage fixes and comment adjustment ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 8ca2bce4341a..dc50c9fb488d 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2609,6 +2609,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 			intel_pstate_clear_update_util_hook(policy->cpu);
 		intel_pstate_hwp_set(policy->cpu);
 	}
+	/*
+	 * policy->cur is never updated with the intel_pstate driver, but it
+	 * is used as a stale frequency value. So, keep it within limits.
+	 */
+	policy->cur = policy->min;
 
 	mutex_unlock(&intel_pstate_limits_lock);
 

From d74943a2f3cdade34e471b36f55f7979be656867 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 3 Aug 2023 16:32:02 +0200
Subject: [PATCH 1678/2047] mm/gup: reintroduce FOLL_NUMA as
 FOLL_HONOR_NUMA_FAULT

Unfortunately commit 474098edac26 ("mm/gup: replace FOLL_NUMA by
gup_can_follow_protnone()") missed that follow_page() and
follow_trans_huge_pmd() never implicitly set FOLL_NUMA because they really
don't want to fail on PROT_NONE-mapped pages -- either due to NUMA hinting
or due to inaccessible (PROT_NONE) VMAs.

As spelled out in commit 0b9d705297b2 ("mm: numa: Support NUMA hinting
page faults from gup/gup_fast"): "Other follow_page callers like KSM
should not use FOLL_NUMA, or they would fail to get the pages if they use
follow_page instead of get_user_pages."

liubo reported [1] that smaps_rollup results are imprecise, because they
miss accounting of pages that are mapped PROT_NONE.  Further, it's easy to
reproduce that KSM no longer works on inaccessible VMAs on x86-64, because
pte_protnone()/pmd_protnone() also indictaes "true" in inaccessible VMAs,
and follow_page() refuses to return such pages right now.

As KVM really depends on these NUMA hinting faults, removing the
pte_protnone()/pmd_protnone() handling in GUP code completely is not
really an option.

To fix the issues at hand, let's revive FOLL_NUMA as FOLL_HONOR_NUMA_FAULT
to restore the original behavior for now and add better comments.

Set FOLL_HONOR_NUMA_FAULT independent of FOLL_FORCE in
is_valid_gup_args(), to add that flag for all external GUP users.

Note that there are three GUP-internal __get_user_pages() users that don't
end up calling is_valid_gup_args() and consequently won't get
FOLL_HONOR_NUMA_FAULT set.

1) get_dump_page(): we really don't want to handle NUMA hinting
   faults. It specifies FOLL_FORCE and wouldn't have honored NUMA
   hinting faults already.
2) populate_vma_page_range(): we really don't want to handle NUMA hinting
   faults. It specifies FOLL_FORCE on accessible VMAs, so it wouldn't have
   honored NUMA hinting faults already.
3) faultin_vma_page_range(): we similarly don't want to handle NUMA
   hinting faults.

To make the combination of FOLL_FORCE and FOLL_HONOR_NUMA_FAULT work in
inaccessible VMAs properly, we have to perform VMA accessibility checks in
gup_can_follow_protnone().

As GUP-fast should reject such pages either way in
pte_access_permitted()/pmd_access_permitted() -- for example on x86-64 and
arm64 that both implement pte_protnone() -- let's just always fallback to
ordinary GUP when stumbling over pte_protnone()/pmd_protnone().

As Linus notes [2], honoring NUMA faults might only make sense for
selected GUP users.

So we should really see if we can instead let relevant GUP callers specify
it manually, and not trigger NUMA hinting faults from GUP as default.
Prepare for that by making FOLL_HONOR_NUMA_FAULT an external GUP flag and
adding appropriate documenation.

While at it, remove a stale comment from follow_trans_huge_pmd(): That
comment for pmd_protnone() was added in commit 2b4847e73004 ("mm: numa:
serialise parallel get_user_page against THP migration"), which noted:

	THP does not unmap pages due to a lack of support for migration
	entries at a PMD level.  This allows races with get_user_pages

Nowadays, we do have PMD migration entries, so the comment no longer
applies.  Let's drop it.

[1] https://lore.kernel.org/r/20230726073409.631838-1-liubo254@huawei.com
[2] https://lore.kernel.org/r/CAHk-=wgRiP_9X0rRdZKT8nhemZGNateMtb366t37d8-x7VRs=g@mail.gmail.com

Link: https://lkml.kernel.org/r/20230803143208.383663-2-david@redhat.com
Fixes: 474098edac26 ("mm/gup: replace FOLL_NUMA by gup_can_follow_protnone()")
Signed-off-by: David Hildenbrand <david@redhat.com>
Reported-by: liubo <liubo254@huawei.com>
Closes: https://lore.kernel.org/r/20230726073409.631838-1-liubo254@huawei.com
Reported-by: Peter Xu <peterx@redhat.com>
Closes: https://lore.kernel.org/all/ZMKJjDaqZ7FW0jfe@x1n/
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h       | 21 +++++++++++++++------
 include/linux/mm_types.h |  9 +++++++++
 mm/gup.c                 | 30 ++++++++++++++++++++++++------
 mm/huge_memory.c         |  3 +--
 4 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 406ab9ea818f..34f9dba17c1a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3421,15 +3421,24 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
  * Indicates whether GUP can follow a PROT_NONE mapped page, or whether
  * a (NUMA hinting) fault is required.
  */
-static inline bool gup_can_follow_protnone(unsigned int flags)
+static inline bool gup_can_follow_protnone(struct vm_area_struct *vma,
+					   unsigned int flags)
 {
 	/*
-	 * FOLL_FORCE has to be able to make progress even if the VMA is
-	 * inaccessible. Further, FOLL_FORCE access usually does not represent
-	 * application behaviour and we should avoid triggering NUMA hinting
-	 * faults.
+	 * If callers don't want to honor NUMA hinting faults, no need to
+	 * determine if we would actually have to trigger a NUMA hinting fault.
 	 */
-	return flags & FOLL_FORCE;
+	if (!(flags & FOLL_HONOR_NUMA_FAULT))
+		return true;
+
+	/*
+	 * NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs.
+	 *
+	 * Requiring a fault here even for inaccessible VMAs would mean that
+	 * FOLL_FORCE cannot make any progress, because handle_mm_fault()
+	 * refuses to process NUMA hinting faults in inaccessible VMAs.
+	 */
+	return !vma_is_accessible(vma);
 }
 
 typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5e74ce4a28cd..7d30dc4ff0ff 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1286,6 +1286,15 @@ enum {
 	FOLL_PCI_P2PDMA = 1 << 10,
 	/* allow interrupts from generic signals */
 	FOLL_INTERRUPTIBLE = 1 << 11,
+	/*
+	 * Always honor (trigger) NUMA hinting faults.
+	 *
+	 * FOLL_WRITE implicitly honors NUMA hinting faults because a
+	 * PROT_NONE-mapped page is not writable (exceptions with FOLL_FORCE
+	 * apply). get_user_pages_fast_only() always implicitly honors NUMA
+	 * hinting faults.
+	 */
+	FOLL_HONOR_NUMA_FAULT = 1 << 12,
 
 	/* See also internal only FOLL flags in mm/internal.h */
 };
diff --git a/mm/gup.c b/mm/gup.c
index 76d222ccc3ff..6e2f9e9d6537 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -597,7 +597,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 	pte = ptep_get(ptep);
 	if (!pte_present(pte))
 		goto no_page;
-	if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+	if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags))
 		goto no_page;
 
 	page = vm_normal_page(vma, address, pte);
@@ -714,7 +714,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
 	if (likely(!pmd_trans_huge(pmdval)))
 		return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
 
-	if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags))
+	if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
 		return no_page_table(vma, flags);
 
 	ptl = pmd_lock(mm, pmd);
@@ -851,6 +851,10 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 	if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
 		return NULL;
 
+	/*
+	 * We never set FOLL_HONOR_NUMA_FAULT because callers don't expect
+	 * to fail on PROT_NONE-mapped pages.
+	 */
 	page = follow_page_mask(vma, address, foll_flags, &ctx);
 	if (ctx.pgmap)
 		put_dev_pagemap(ctx.pgmap);
@@ -2227,6 +2231,13 @@ static bool is_valid_gup_args(struct page **pages, int *locked,
 		gup_flags |= FOLL_UNLOCKABLE;
 	}
 
+	/*
+	 * For now, always trigger NUMA hinting faults. Some GUP users like
+	 * KVM require the hint to be as the calling context of GUP is
+	 * functionally similar to a memory reference from task context.
+	 */
+	gup_flags |= FOLL_HONOR_NUMA_FAULT;
+
 	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
 	if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
 			 (FOLL_PIN | FOLL_GET)))
@@ -2551,7 +2562,14 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
 		struct page *page;
 		struct folio *folio;
 
-		if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+		/*
+		 * Always fallback to ordinary GUP on PROT_NONE-mapped pages:
+		 * pte_access_permitted() better should reject these pages
+		 * either way: otherwise, GUP-fast might succeed in
+		 * cases where ordinary GUP would fail due to VMA access
+		 * permissions.
+		 */
+		if (pte_protnone(pte))
 			goto pte_unmap;
 
 		if (!pte_access_permitted(pte, flags & FOLL_WRITE))
@@ -2970,8 +2988,8 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
 
 		if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
 			     pmd_devmap(pmd))) {
-			if (pmd_protnone(pmd) &&
-			    !gup_can_follow_protnone(flags))
+			/* See gup_pte_range() */
+			if (pmd_protnone(pmd))
 				return 0;
 
 			if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
@@ -3151,7 +3169,7 @@ static int internal_get_user_pages_fast(unsigned long start,
 	if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
 				       FOLL_FORCE | FOLL_PIN | FOLL_GET |
 				       FOLL_FAST_ONLY | FOLL_NOFAULT |
-				       FOLL_PCI_P2PDMA)))
+				       FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT)))
 		return -EINVAL;
 
 	if (gup_flags & FOLL_PIN)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index eb3678360b97..f15d557e5708 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1467,8 +1467,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 	if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
 		return ERR_PTR(-EFAULT);
 
-	/* Full NUMA hinting faults to serialise migration in fault paths */
-	if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags))
+	if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags))
 		return NULL;
 
 	if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page))

From 8b9c1cc0418a43196477083e7082568e7a4c9418 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 3 Aug 2023 16:32:03 +0200
Subject: [PATCH 1679/2047] smaps: use vm_normal_page_pmd() instead of
 follow_trans_huge_pmd()

We shouldn't be using a GUP-internal helper if it can be avoided.

Similar to smaps_pte_entry() that uses vm_normal_page(), let's use
vm_normal_page_pmd() that similarly refuses to return the huge zeropage.

In contrast to follow_trans_huge_pmd(), vm_normal_page_pmd():

(1) Will always return the head page, not a tail page of a THP.

 If we'd ever call smaps_account with a tail page while setting "compound
 = true", we could be in trouble, because smaps_account() would look at
 the memmap of unrelated pages.

 If we're unlucky, that memmap does not exist at all. Before we removed
 PG_doublemap, we could have triggered something similar as in
 commit 24d7275ce279 ("fs/proc: task_mmu.c: don't read mapcount for
 migration entry").

 This can theoretically happen ever since commit ff9f47f6f00c ("mm: proc:
 smaps_rollup: do not stall write attempts on mmap_lock"):

  (a) We're in show_smaps_rollup() and processed a VMA
  (b) We release the mmap lock in show_smaps_rollup() because it is
      contended
  (c) We merged that VMA with another VMA
  (d) We collapsed a THP in that merged VMA at that position

 If the end address of the original VMA falls into the middle of a THP
 area, we would call smap_gather_stats() with a start address that falls
 into a PMD-mapped THP. It's probably very rare to trigger when not
 really forced.

(2) Will succeed on a is_pci_p2pdma_page(), like vm_normal_page()

 Treat such PMDs here just like smaps_pte_entry() would treat such PTEs.
 If such pages would be anonymous, we most certainly would want to
 account them.

(3) Will skip over pmd_devmap(), like vm_normal_page() for pte_devmap()

 As noted in vm_normal_page(), that is only for handling legacy ZONE_DEVICE
 pages. So just like smaps_pte_entry(), we'll now also ignore such PMD
 entries.

 Especially, follow_pmd_mask() never ends up calling
 follow_trans_huge_pmd() on pmd_devmap(). Instead it calls
 follow_devmap_pmd() -- which will fail if neither FOLL_GET nor FOLL_PIN
 is set.

 So skipping pmd_devmap() pages seems to be the right thing to do.

(4) Will properly handle VM_MIXEDMAP/VM_PFNMAP, like vm_normal_page()

 We won't be returning a memmap that should be ignored by core-mm, or
 worse, a memmap that does not even exist. Note that while
 walk_page_range() will skip VM_PFNMAP mappings, walk_page_vma() won't.

 Most probably this case doesn't currently really happen on the PMD level,
 otherwise we'd already be able to trigger kernel crashes when reading
 smaps / smaps_rollup.

So most probably only (1) is relevant in practice as of now, but could only
cause trouble in extreme corner cases.

Let's move follow_trans_huge_pmd() to mm/internal.h to discourage future
reuse in wrong context.

Link: https://lkml.kernel.org/r/20230803143208.383663-3-david@redhat.com
Fixes: ff9f47f6f00c ("mm: proc: smaps_rollup: do not stall write attempts on mmap_lock")
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: liubo <liubo254@huawei.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/proc/task_mmu.c      | 3 +--
 include/linux/huge_mm.h | 3 ---
 mm/internal.h           | 7 +++++++
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 507cd4e59d07..fc744964816e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -587,8 +587,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
 	bool migration = false;
 
 	if (pmd_present(*pmd)) {
-		/* FOLL_DUMP will return -EFAULT on huge zero page */
-		page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+		page = vm_normal_page_pmd(vma, addr, *pmd);
 	} else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
 		swp_entry_t entry = pmd_to_swp_entry(*pmd);
 
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 20284387b841..e718dbe928ba 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -25,9 +25,6 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
 #endif
 
 vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
-struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
-				   unsigned long addr, pmd_t *pmd,
-				   unsigned int flags);
 bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			   pmd_t *pmd, unsigned long addr, unsigned long next);
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
diff --git a/mm/internal.h b/mm/internal.h
index a7d9e980429a..45383527e8b4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -924,6 +924,13 @@ int migrate_device_coherent_page(struct page *page);
 struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
 int __must_check try_grab_page(struct page *page, unsigned int flags);
 
+/*
+ * mm/huge_memory.c
+ */
+struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
+				   unsigned long addr, pmd_t *pmd,
+				   unsigned int flags);
+
 enum {
 	/* mark page accessed */
 	FOLL_TOUCH = 1 << 16,

From 49b0638502da097c15d46cd4e871dbaa022caf7c Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Fri, 4 Aug 2023 08:27:19 -0700
Subject: [PATCH 1680/2047] mm: enable page walking API to lock vmas during the
 walk

walk_page_range() and friends often operate under write-locked mmap_lock.
With introduction of vma locks, the vmas have to be locked as well during
such walks to prevent concurrent page faults in these areas.  Add an
additional member to mm_walk_ops to indicate locking requirements for the
walk.

The change ensures that page walks which prevent concurrent page faults
by write-locking mmap_lock, operate correctly after introduction of
per-vma locks.  With per-vma locks page faults can be handled under vma
lock without taking mmap_lock at all, so write locking mmap_lock would
not stop them.  The change ensures vmas are properly locked during such
walks.

A sample issue this solves is do_mbind() performing queue_pages_range()
to queue pages for migration.  Without this change a concurrent page
can be faulted into the area and be left out of migration.

Link: https://lkml.kernel.org/r/20230804152724.3090321-2-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Suggested-by: Linus Torvalds <torvalds@linuxfoundation.org>
Suggested-by: Jann Horn <jannh@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michel Lespinasse <michel@lespinasse.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/powerpc/mm/book3s64/subpage_prot.c |  1 +
 arch/riscv/mm/pageattr.c                |  1 +
 arch/s390/mm/gmap.c                     |  5 ++++
 fs/proc/task_mmu.c                      |  5 ++++
 include/linux/pagewalk.h                | 11 ++++++++
 mm/damon/vaddr.c                        |  2 ++
 mm/hmm.c                                |  1 +
 mm/ksm.c                                | 25 ++++++++++-------
 mm/madvise.c                            |  3 +++
 mm/memcontrol.c                         |  2 ++
 mm/memory-failure.c                     |  1 +
 mm/mempolicy.c                          | 22 +++++++++------
 mm/migrate_device.c                     |  1 +
 mm/mincore.c                            |  1 +
 mm/mlock.c                              |  1 +
 mm/mprotect.c                           |  1 +
 mm/pagewalk.c                           | 36 ++++++++++++++++++++++---
 mm/vmscan.c                             |  1 +
 18 files changed, 100 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 0dc85556dec5..ec98e526167e 100644
--- a/arch/powerpc/mm/book3s64/subpage_prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -145,6 +145,7 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
 
 static const struct mm_walk_ops subpage_walk_ops = {
 	.pmd_entry	= subpage_walk_pmd_entry,
+	.walk_lock	= PGWALK_WRLOCK_VERIFY,
 };
 
 static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index ea3d61de065b..161d0b34c2cb 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -102,6 +102,7 @@ static const struct mm_walk_ops pageattr_ops = {
 	.pmd_entry = pageattr_pmd_entry,
 	.pte_entry = pageattr_pte_entry,
 	.pte_hole = pageattr_pte_hole,
+	.walk_lock = PGWALK_RDLOCK,
 };
 
 static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 9c8af31be970..906a7bfc2a78 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2514,6 +2514,7 @@ static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
 
 static const struct mm_walk_ops thp_split_walk_ops = {
 	.pmd_entry	= thp_split_walk_pmd_entry,
+	.walk_lock	= PGWALK_WRLOCK_VERIFY,
 };
 
 static inline void thp_split_mm(struct mm_struct *mm)
@@ -2565,6 +2566,7 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
 
 static const struct mm_walk_ops zap_zero_walk_ops = {
 	.pmd_entry	= __zap_zero_pages,
+	.walk_lock	= PGWALK_WRLOCK,
 };
 
 /*
@@ -2655,6 +2657,7 @@ static const struct mm_walk_ops enable_skey_walk_ops = {
 	.hugetlb_entry		= __s390_enable_skey_hugetlb,
 	.pte_entry		= __s390_enable_skey_pte,
 	.pmd_entry		= __s390_enable_skey_pmd,
+	.walk_lock		= PGWALK_WRLOCK,
 };
 
 int s390_enable_skey(void)
@@ -2692,6 +2695,7 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
 
 static const struct mm_walk_ops reset_cmma_walk_ops = {
 	.pte_entry		= __s390_reset_cmma,
+	.walk_lock		= PGWALK_WRLOCK,
 };
 
 void s390_reset_cmma(struct mm_struct *mm)
@@ -2728,6 +2732,7 @@ static int s390_gather_pages(pte_t *ptep, unsigned long addr,
 
 static const struct mm_walk_ops gather_pages_ops = {
 	.pte_entry = s390_gather_pages,
+	.walk_lock = PGWALK_RDLOCK,
 };
 
 /*
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fc744964816e..fafff1bd34cd 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -757,12 +757,14 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
 static const struct mm_walk_ops smaps_walk_ops = {
 	.pmd_entry		= smaps_pte_range,
 	.hugetlb_entry		= smaps_hugetlb_range,
+	.walk_lock		= PGWALK_RDLOCK,
 };
 
 static const struct mm_walk_ops smaps_shmem_walk_ops = {
 	.pmd_entry		= smaps_pte_range,
 	.hugetlb_entry		= smaps_hugetlb_range,
 	.pte_hole		= smaps_pte_hole,
+	.walk_lock		= PGWALK_RDLOCK,
 };
 
 /*
@@ -1244,6 +1246,7 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
 static const struct mm_walk_ops clear_refs_walk_ops = {
 	.pmd_entry		= clear_refs_pte_range,
 	.test_walk		= clear_refs_test_walk,
+	.walk_lock		= PGWALK_WRLOCK,
 };
 
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
@@ -1621,6 +1624,7 @@ static const struct mm_walk_ops pagemap_ops = {
 	.pmd_entry	= pagemap_pmd_range,
 	.pte_hole	= pagemap_pte_hole,
 	.hugetlb_entry	= pagemap_hugetlb_range,
+	.walk_lock	= PGWALK_RDLOCK,
 };
 
 /*
@@ -1934,6 +1938,7 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
 static const struct mm_walk_ops show_numa_ops = {
 	.hugetlb_entry = gather_hugetlb_stats,
 	.pmd_entry = gather_pte_stats,
+	.walk_lock = PGWALK_RDLOCK,
 };
 
 /*
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index 27a6df448ee5..27cd1e59ccf7 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -6,6 +6,16 @@
 
 struct mm_walk;
 
+/* Locking requirement during a page walk. */
+enum page_walk_lock {
+	/* mmap_lock should be locked for read to stabilize the vma tree */
+	PGWALK_RDLOCK = 0,
+	/* vma will be write-locked during the walk */
+	PGWALK_WRLOCK = 1,
+	/* vma is expected to be already write-locked during the walk */
+	PGWALK_WRLOCK_VERIFY = 2,
+};
+
 /**
  * struct mm_walk_ops - callbacks for walk_page_range
  * @pgd_entry:		if set, called for each non-empty PGD (top-level) entry
@@ -66,6 +76,7 @@ struct mm_walk_ops {
 	int (*pre_vma)(unsigned long start, unsigned long end,
 		       struct mm_walk *walk);
 	void (*post_vma)(struct mm_walk *walk);
+	enum page_walk_lock walk_lock;
 };
 
 /*
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 2fcc9731528a..e0e59d420fca 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -386,6 +386,7 @@ out:
 static const struct mm_walk_ops damon_mkold_ops = {
 	.pmd_entry = damon_mkold_pmd_entry,
 	.hugetlb_entry = damon_mkold_hugetlb_entry,
+	.walk_lock = PGWALK_RDLOCK,
 };
 
 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
@@ -525,6 +526,7 @@ out:
 static const struct mm_walk_ops damon_young_ops = {
 	.pmd_entry = damon_young_pmd_entry,
 	.hugetlb_entry = damon_young_hugetlb_entry,
+	.walk_lock = PGWALK_RDLOCK,
 };
 
 static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
diff --git a/mm/hmm.c b/mm/hmm.c
index 855e25e59d8f..277ddcab4947 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -562,6 +562,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
 	.pte_hole	= hmm_vma_walk_hole,
 	.hugetlb_entry	= hmm_vma_walk_hugetlb_entry,
 	.test_walk	= hmm_vma_walk_test,
+	.walk_lock	= PGWALK_RDLOCK,
 };
 
 /**
diff --git a/mm/ksm.c b/mm/ksm.c
index d20d7662419b..d7b5b95e936e 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -455,6 +455,12 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
 
 static const struct mm_walk_ops break_ksm_ops = {
 	.pmd_entry = break_ksm_pmd_entry,
+	.walk_lock = PGWALK_RDLOCK,
+};
+
+static const struct mm_walk_ops break_ksm_lock_vma_ops = {
+	.pmd_entry = break_ksm_pmd_entry,
+	.walk_lock = PGWALK_WRLOCK,
 };
 
 /*
@@ -470,16 +476,17 @@ static const struct mm_walk_ops break_ksm_ops = {
  * of the process that owns 'vma'.  We also do not want to enforce
  * protection keys here anyway.
  */
-static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
+static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)
 {
 	vm_fault_t ret = 0;
+	const struct mm_walk_ops *ops = lock_vma ?
+				&break_ksm_lock_vma_ops : &break_ksm_ops;
 
 	do {
 		int ksm_page;
 
 		cond_resched();
-		ksm_page = walk_page_range_vma(vma, addr, addr + 1,
-					       &break_ksm_ops, NULL);
+		ksm_page = walk_page_range_vma(vma, addr, addr + 1, ops, NULL);
 		if (WARN_ON_ONCE(ksm_page < 0))
 			return ksm_page;
 		if (!ksm_page)
@@ -565,7 +572,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
 	mmap_read_lock(mm);
 	vma = find_mergeable_vma(mm, addr);
 	if (vma)
-		break_ksm(vma, addr);
+		break_ksm(vma, addr, false);
 	mmap_read_unlock(mm);
 }
 
@@ -871,7 +878,7 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
  * in cmp_and_merge_page on one of the rmap_items we would be removing.
  */
 static int unmerge_ksm_pages(struct vm_area_struct *vma,
-			     unsigned long start, unsigned long end)
+			     unsigned long start, unsigned long end, bool lock_vma)
 {
 	unsigned long addr;
 	int err = 0;
@@ -882,7 +889,7 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
 		if (signal_pending(current))
 			err = -ERESTARTSYS;
 		else
-			err = break_ksm(vma, addr);
+			err = break_ksm(vma, addr, lock_vma);
 	}
 	return err;
 }
@@ -1029,7 +1036,7 @@ static int unmerge_and_remove_all_rmap_items(void)
 			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
 				continue;
 			err = unmerge_ksm_pages(vma,
-						vma->vm_start, vma->vm_end);
+						vma->vm_start, vma->vm_end, false);
 			if (err)
 				goto error;
 		}
@@ -2530,7 +2537,7 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
 		return 0;
 
 	if (vma->anon_vma) {
-		err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
+		err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true);
 		if (err)
 			return err;
 	}
@@ -2668,7 +2675,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 			return 0;		/* just ignore the advice */
 
 		if (vma->anon_vma) {
-			err = unmerge_ksm_pages(vma, start, end);
+			err = unmerge_ksm_pages(vma, start, end, true);
 			if (err)
 				return err;
 		}
diff --git a/mm/madvise.c b/mm/madvise.c
index 886f06066622..bfe0e06427bd 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -233,6 +233,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 
 static const struct mm_walk_ops swapin_walk_ops = {
 	.pmd_entry		= swapin_walk_pmd_entry,
+	.walk_lock		= PGWALK_RDLOCK,
 };
 
 static void shmem_swapin_range(struct vm_area_struct *vma,
@@ -534,6 +535,7 @@ regular_folio:
 
 static const struct mm_walk_ops cold_walk_ops = {
 	.pmd_entry = madvise_cold_or_pageout_pte_range,
+	.walk_lock = PGWALK_RDLOCK,
 };
 
 static void madvise_cold_page_range(struct mmu_gather *tlb,
@@ -757,6 +759,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 
 static const struct mm_walk_ops madvise_free_walk_ops = {
 	.pmd_entry		= madvise_free_pte_range,
+	.walk_lock		= PGWALK_RDLOCK,
 };
 
 static int madvise_free_single_vma(struct vm_area_struct *vma,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e8ca4bdcb03c..315fd5f45e3c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6024,6 +6024,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
 
 static const struct mm_walk_ops precharge_walk_ops = {
 	.pmd_entry	= mem_cgroup_count_precharge_pte_range,
+	.walk_lock	= PGWALK_RDLOCK,
 };
 
 static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
@@ -6303,6 +6304,7 @@ put:			/* get_mctgt_type() gets & locks the page */
 
 static const struct mm_walk_ops charge_walk_ops = {
 	.pmd_entry	= mem_cgroup_move_charge_pte_range,
+	.walk_lock	= PGWALK_RDLOCK,
 };
 
 static void mem_cgroup_move_charge(void)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 9a285038d765..139b31fdb678 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -831,6 +831,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
 static const struct mm_walk_ops hwp_walk_ops = {
 	.pmd_entry = hwpoison_pte_range,
 	.hugetlb_entry = hwpoison_hugetlb_range,
+	.walk_lock = PGWALK_RDLOCK,
 };
 
 /*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c53f8beeb507..ec2eaceffd74 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -718,6 +718,14 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
 	.hugetlb_entry		= queue_folios_hugetlb,
 	.pmd_entry		= queue_folios_pte_range,
 	.test_walk		= queue_pages_test_walk,
+	.walk_lock		= PGWALK_RDLOCK,
+};
+
+static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = {
+	.hugetlb_entry		= queue_folios_hugetlb,
+	.pmd_entry		= queue_folios_pte_range,
+	.test_walk		= queue_pages_test_walk,
+	.walk_lock		= PGWALK_WRLOCK,
 };
 
 /*
@@ -738,7 +746,7 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
 static int
 queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 		nodemask_t *nodes, unsigned long flags,
-		struct list_head *pagelist)
+		struct list_head *pagelist, bool lock_vma)
 {
 	int err;
 	struct queue_pages qp = {
@@ -749,8 +757,10 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 		.end = end,
 		.first = NULL,
 	};
+	const struct mm_walk_ops *ops = lock_vma ?
+			&queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
 
-	err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
+	err = walk_page_range(mm, start, end, ops, &qp);
 
 	if (!qp.first)
 		/* whole range in hole */
@@ -1078,7 +1088,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 	vma = find_vma(mm, 0);
 	VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
 	queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
-			flags | MPOL_MF_DISCONTIG_OK, &pagelist);
+			flags | MPOL_MF_DISCONTIG_OK, &pagelist, false);
 
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, alloc_migration_target, NULL,
@@ -1321,12 +1331,8 @@ static long do_mbind(unsigned long start, unsigned long len,
 	 * Lock the VMAs before scanning for pages to migrate, to ensure we don't
 	 * miss a concurrently inserted page.
 	 */
-	vma_iter_init(&vmi, mm, start);
-	for_each_vma_range(vmi, vma, end)
-		vma_start_write(vma);
-
 	ret = queue_pages_range(mm, start, end, nmask,
-			  flags | MPOL_MF_INVERT, &pagelist);
+			  flags | MPOL_MF_INVERT, &pagelist, true);
 
 	if (ret < 0) {
 		err = ret;
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 8365158460ed..d5f492356e3e 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -279,6 +279,7 @@ next:
 static const struct mm_walk_ops migrate_vma_walk_ops = {
 	.pmd_entry		= migrate_vma_collect_pmd,
 	.pte_hole		= migrate_vma_collect_hole,
+	.walk_lock		= PGWALK_RDLOCK,
 };
 
 /*
diff --git a/mm/mincore.c b/mm/mincore.c
index b7f7a516b26c..dad3622cc963 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -176,6 +176,7 @@ static const struct mm_walk_ops mincore_walk_ops = {
 	.pmd_entry		= mincore_pte_range,
 	.pte_hole		= mincore_unmapped_range,
 	.hugetlb_entry		= mincore_hugetlb,
+	.walk_lock		= PGWALK_RDLOCK,
 };
 
 /*
diff --git a/mm/mlock.c b/mm/mlock.c
index 0a0c996c5c21..479e09d0994c 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -371,6 +371,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
 {
 	static const struct mm_walk_ops mlock_walk_ops = {
 		.pmd_entry = mlock_pte_range,
+		.walk_lock = PGWALK_WRLOCK_VERIFY,
 	};
 
 	/*
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 6f658d483704..3aef1340533a 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -568,6 +568,7 @@ static const struct mm_walk_ops prot_none_walk_ops = {
 	.pte_entry		= prot_none_pte_entry,
 	.hugetlb_entry		= prot_none_hugetlb_entry,
 	.test_walk		= prot_none_test,
+	.walk_lock		= PGWALK_WRLOCK,
 };
 
 int
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 2022333805d3..9b2d23fbf4d3 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -400,6 +400,33 @@ static int __walk_page_range(unsigned long start, unsigned long end,
 	return err;
 }
 
+static inline void process_mm_walk_lock(struct mm_struct *mm,
+					enum page_walk_lock walk_lock)
+{
+	if (walk_lock == PGWALK_RDLOCK)
+		mmap_assert_locked(mm);
+	else
+		mmap_assert_write_locked(mm);
+}
+
+static inline void process_vma_walk_lock(struct vm_area_struct *vma,
+					 enum page_walk_lock walk_lock)
+{
+#ifdef CONFIG_PER_VMA_LOCK
+	switch (walk_lock) {
+	case PGWALK_WRLOCK:
+		vma_start_write(vma);
+		break;
+	case PGWALK_WRLOCK_VERIFY:
+		vma_assert_write_locked(vma);
+		break;
+	case PGWALK_RDLOCK:
+		/* PGWALK_RDLOCK is handled by process_mm_walk_lock */
+		break;
+	}
+#endif
+}
+
 /**
  * walk_page_range - walk page table with caller specific callbacks
  * @mm:		mm_struct representing the target process of page table walk
@@ -459,7 +486,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
 	if (!walk.mm)
 		return -EINVAL;
 
-	mmap_assert_locked(walk.mm);
+	process_mm_walk_lock(walk.mm, ops->walk_lock);
 
 	vma = find_vma(walk.mm, start);
 	do {
@@ -474,6 +501,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
 			if (ops->pte_hole)
 				err = ops->pte_hole(start, next, -1, &walk);
 		} else { /* inside vma */
+			process_vma_walk_lock(vma, ops->walk_lock);
 			walk.vma = vma;
 			next = min(end, vma->vm_end);
 			vma = find_vma(mm, vma->vm_end);
@@ -549,7 +577,8 @@ int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
 	if (start < vma->vm_start || end > vma->vm_end)
 		return -EINVAL;
 
-	mmap_assert_locked(walk.mm);
+	process_mm_walk_lock(walk.mm, ops->walk_lock);
+	process_vma_walk_lock(vma, ops->walk_lock);
 	return __walk_page_range(start, end, &walk);
 }
 
@@ -566,7 +595,8 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
 	if (!walk.mm)
 		return -EINVAL;
 
-	mmap_assert_locked(walk.mm);
+	process_mm_walk_lock(walk.mm, ops->walk_lock);
+	process_vma_walk_lock(vma, ops->walk_lock);
 	return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1080209a568b..3555927df9b5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4284,6 +4284,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
 	static const struct mm_walk_ops mm_walk_ops = {
 		.test_walk = should_skip_vma,
 		.p4d_entry = walk_pud_range,
+		.walk_lock = PGWALK_RDLOCK,
 	};
 
 	int err;

From 60439471f30be00dc4f6648322ab1a3a3f89777f Mon Sep 17 00:00:00 2001
From: Lucas Karpinski <lkarpins@redhat.com>
Date: Fri, 4 Aug 2023 15:35:29 -0400
Subject: [PATCH 1681/2047] selftests: cgroup: fix test_kmem_basic less than
 error

test_kmem_basic creates 100,000 negative dentries, with each one mapping
to a slab object.  After memory.high is set, these are reclaimed through
the shrink_slab function call which reclaims all 100,000 entries.  The
test passes the majority of the time because when slab1 or current is
calculated, it is often above 0, however, 0 is also an acceptable value.

Link: https://lkml.kernel.org/r/7d6gcuyzdjcice6qbphrmpmv5skr5jtglg375unnjxqhstvhxc@qkn6dw6bao6v
Signed-off-by: Lucas Karpinski <lkarpins@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_kmem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 1b2cec9d18a4..ed2e50bb1e76 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -75,11 +75,11 @@ static int test_kmem_basic(const char *root)
 	sleep(1);
 
 	slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
-	if (slab1 <= 0)
+	if (slab1 < 0)
 		goto cleanup;
 
 	current = cg_read_long(cg, "memory.current");
-	if (current <= 0)
+	if (current < 0)
 		goto cleanup;
 
 	if (slab1 < slab0 / 2 && current < slab0 / 2)

From 5805192c7b7257d290474cb1a3897d0567281bbc Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Sat, 5 Aug 2023 12:12:56 +0200
Subject: [PATCH 1682/2047] mm/gup: handle cont-PTE hugetlb pages correctly in
 gup_must_unshare() via GUP-fast

In contrast to most other GUP code, GUP-fast common page table walking
code like gup_pte_range() also handles hugetlb pages.  But in contrast to
other hugetlb page table walking code, it does not look at the hugetlb PTE
abstraction whereby we have only a single logical hugetlb PTE per hugetlb
page, even when using multiple cont-PTEs underneath -- which is for
example what huge_ptep_get() abstracts.

So when we have a hugetlb page that is mapped via cont-PTEs, GUP-fast
might stumble over a PTE that does not map the head page of a hugetlb page
-- not the first "head" PTE of such a cont mapping.

Logically, the whole hugetlb page is mapped (entire_mapcount == 1), but we
might end up calling gup_must_unshare() with a tail page of a hugetlb
page.

We only maintain a single PageAnonExclusive flag per hugetlb page (as
hugetlb pages cannot get partially COW-shared), stored for the head page.
That flag is clear for all tail pages.

So when gup_must_unshare() ends up calling PageAnonExclusive() with a tail
page of a hugetlb page:

1) With CONFIG_DEBUG_VM_PGFLAGS

Stumbles over the:

	VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);

For example, when executing the COW selftests with 64k hugetlb pages on
arm64:

  [   61.082187] page:00000000829819ff refcount:3 mapcount:1 mapping:0000000000000000 index:0x1 pfn:0x11ee11
  [   61.082842] head:0000000080f79bf7 order:4 entire_mapcount:1 nr_pages_mapped:0 pincount:2
  [   61.083384] anon flags: 0x17ffff80003000e(referenced|uptodate|dirty|head|mappedtodisk|node=0|zone=2|lastcpupid=0xfffff)
  [   61.084101] page_type: 0xffffffff()
  [   61.084332] raw: 017ffff800000000 fffffc00037b8401 0000000000000402 0000000200000000
  [   61.084840] raw: 0000000000000010 0000000000000000 00000000ffffffff 0000000000000000
  [   61.085359] head: 017ffff80003000e ffffd9e95b09b788 ffffd9e95b09b788 ffff0007ff63cf71
  [   61.085885] head: 0000000000000000 0000000000000002 00000003ffffffff 0000000000000000
  [   61.086415] page dumped because: VM_BUG_ON_PAGE(PageHuge(page) && !PageHead(page))
  [   61.086914] ------------[ cut here ]------------
  [   61.087220] kernel BUG at include/linux/page-flags.h:990!
  [   61.087591] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
  [   61.087999] Modules linked in: ...
  [   61.089404] CPU: 0 PID: 4612 Comm: cow Kdump: loaded Not tainted 6.5.0-rc4+ #3
  [   61.089917] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
  [   61.090409] pstate: 604000c5 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
  [   61.090897] pc : gup_must_unshare.part.0+0x64/0x98
  [   61.091242] lr : gup_must_unshare.part.0+0x64/0x98
  [   61.091592] sp : ffff8000825eb940
  [   61.091826] x29: ffff8000825eb940 x28: 0000000000000000 x27: fffffc00037b8440
  [   61.092329] x26: 0400000000000001 x25: 0000000000080101 x24: 0000000000080000
  [   61.092835] x23: 0000000000080100 x22: ffff0000cffb9588 x21: ffff0000c8ec6b58
  [   61.093341] x20: 0000ffffad6b1000 x19: fffffc00037b8440 x18: ffffffffffffffff
  [   61.093850] x17: 2864616548656761 x16: 5021202626202965 x15: 6761702865677548
  [   61.094358] x14: 6567615028454741 x13: 2929656761702864 x12: 6165486567615021
  [   61.094858] x11: 00000000ffff7fff x10: 00000000ffff7fff x9 : ffffd9e958b7a1c0
  [   61.095359] x8 : 00000000000bffe8 x7 : c0000000ffff7fff x6 : 00000000002bffa8
  [   61.095873] x5 : ffff0008bb19e708 x4 : 0000000000000000 x3 : 0000000000000000
  [   61.096380] x2 : 0000000000000000 x1 : ffff0000cf6636c0 x0 : 0000000000000046
  [   61.096894] Call trace:
  [   61.097080]  gup_must_unshare.part.0+0x64/0x98
  [   61.097392]  gup_pte_range+0x3a8/0x3f0
  [   61.097662]  gup_pgd_range+0x1ec/0x280
  [   61.097942]  lockless_pages_from_mm+0x64/0x1a0
  [   61.098258]  internal_get_user_pages_fast+0xe4/0x1d0
  [   61.098612]  pin_user_pages_fast+0x58/0x78
  [   61.098917]  pin_longterm_test_start+0xf4/0x2b8
  [   61.099243]  gup_test_ioctl+0x170/0x3b0
  [   61.099528]  __arm64_sys_ioctl+0xa8/0xf0
  [   61.099822]  invoke_syscall.constprop.0+0x7c/0xd0
  [   61.100160]  el0_svc_common.constprop.0+0xe8/0x100
  [   61.100500]  do_el0_svc+0x38/0xa0
  [   61.100736]  el0_svc+0x3c/0x198
  [   61.100971]  el0t_64_sync_handler+0x134/0x150
  [   61.101280]  el0t_64_sync+0x17c/0x180
  [   61.101543] Code: aa1303e0 f00074c1 912b0021 97fffeb2 (d4210000)

2) Without CONFIG_DEBUG_VM_PGFLAGS

Always detects "not exclusive" for passed tail pages and refuses to PIN
the tail pages R/O, as gup_must_unshare() == true.  GUP-fast will fallback
to ordinary GUP.  As ordinary GUP properly considers the logical hugetlb
PTE abstraction in hugetlb_follow_page_mask(), pinning the page will
succeed when looking at the PageAnonExclusive on the head page only.

So the only real effect of this is that with cont-PTE hugetlb pages, we'll
always fallback from GUP-fast to ordinary GUP when not working on the head
page, which ends up checking the head page and do the right thing.

Consequently, the cow selftests pass with cont-PTE hugetlb pages as well
without CONFIG_DEBUG_VM_PGFLAGS.

Note that this only applies to anon hugetlb pages that are mapped using
cont-PTEs: for example 64k hugetlb pages on a 4k arm64 kernel.

... and only when R/O-pinning (FOLL_PIN) such pages that are mapped into
the page table R/O using GUP-fast.

On production kernels (and even most debug kernels, that don't set
CONFIG_DEBUG_VM_PGFLAGS) this patch should theoretically not be required
to be backported.  But of course, it does not hurt.

Link: https://lkml.kernel.org/r/20230805101256.87306-1-david@redhat.com
Fixes: a7f226604170 ("mm/gup: trigger FAULT_FLAG_UNSHARE when R/O-pinning a possibly shared anonymous page")
Signed-off-by: David Hildenbrand <david@redhat.com>
Reported-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Tested-by: Ryan Roberts <ryan.roberts@arm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/internal.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/mm/internal.h b/mm/internal.h
index 45383527e8b4..8ed127c1c808 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1004,6 +1004,16 @@ static inline bool gup_must_unshare(struct vm_area_struct *vma,
 	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
 		smp_rmb();
 
+	/*
+	 * During GUP-fast we might not get called on the head page for a
+	 * hugetlb page that is mapped using cont-PTE, because GUP-fast does
+	 * not work with the abstracted hugetlb PTEs that always point at the
+	 * head page. For hugetlb, PageAnonExclusive only applies on the head
+	 * page (as it cannot be partially COW-shared), so lookup the head page.
+	 */
+	if (unlikely(!PageHead(page) && PageHuge(page)))
+		page = compound_head(page);
+
 	/*
 	 * Note that PageKsm() pages cannot be exclusive, and consequently,
 	 * cannot get pinned.

From f83913f8c5b882a312e72b7669762f8a5c9385e4 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Date: Sat, 5 Aug 2023 22:20:38 +0900
Subject: [PATCH 1683/2047] nilfs2: fix general protection fault in
 nilfs_lookup_dirty_data_buffers()

A syzbot stress test reported that create_empty_buffers() called from
nilfs_lookup_dirty_data_buffers() can cause a general protection fault.

Analysis using its reproducer revealed that the back reference "mapping"
from a page/folio has been changed to NULL after dirty page/folio gang
lookup in nilfs_lookup_dirty_data_buffers().

Fix this issue by excluding pages/folios from being collected if, after
acquiring a lock on each page/folio, its back reference "mapping" differs
from the pointer to the address space struct that held the page/folio.

Link: https://lkml.kernel.org/r/20230805132038.6435-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: syzbot+0ad741797f4565e7e2d2@syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/0000000000002930a705fc32b231@google.com
Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/nilfs2/segment.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 581691e4be49..7ec16879756e 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -725,6 +725,11 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
 		struct folio *folio = fbatch.folios[i];
 
 		folio_lock(folio);
+		if (unlikely(folio->mapping != mapping)) {
+			/* Exclude folios removed from the address space */
+			folio_unlock(folio);
+			continue;
+		}
 		head = folio_buffers(folio);
 		if (!head) {
 			create_empty_buffers(&folio->page, i_blocksize(inode), 0);

From 1738b949625c7e17a454b25de33f1f415da3db69 Mon Sep 17 00:00:00 2001
From: Ayush Jain <ayush.jain3@amd.com>
Date: Tue, 8 Aug 2023 07:43:47 -0500
Subject: [PATCH 1684/2047] selftests/mm: FOLL_LONGTERM need to be updated to
 0x100

After commit 2c2241081f7d ("mm/gup: move private gup FOLL_ flags to
internal.h") FOLL_LONGTERM flag value got updated from 0x10000 to 0x100 at
include/linux/mm_types.h.

As hmm.hmm_device_private.hmm_gup_test uses FOLL_LONGTERM Updating same
here as well.

Before this change test goes in an infinite assert loop in
hmm.hmm_device_private.hmm_gup_test
==========================================================
 RUN           hmm.hmm_device_private.hmm_gup_test ...
hmm-tests.c:1962:hmm_gup_test:Expected HMM_DMIRROR_PROT_WRITE..
..(2) == m[2] (34)
hmm-tests.c:157:hmm_gup_test:Expected ret (-1) == 0 (0)
hmm-tests.c:157:hmm_gup_test:Expected ret (-1) == 0 (0)
...
==========================================================

 Call Trace:
 <TASK>
 ? sched_clock+0xd/0x20
 ? __lock_acquire.constprop.0+0x120/0x6c0
 ? ktime_get+0x2c/0xd0
 ? sched_clock+0xd/0x20
 ? local_clock+0x12/0xd0
 ? lock_release+0x26e/0x3b0
 pin_user_pages_fast+0x4c/0x70
 gup_test_ioctl+0x4ff/0xbb0
 ? gup_test_ioctl+0x68c/0xbb0
 __x64_sys_ioctl+0x99/0xd0
 do_syscall_64+0x60/0x90
 ? syscall_exit_to_user_mode+0x2a/0x50
 ? do_syscall_64+0x6d/0x90
 ? syscall_exit_to_user_mode+0x2a/0x50
 ? do_syscall_64+0x6d/0x90
 ? irqentry_exit_to_user_mode+0xd/0x20
 ? irqentry_exit+0x3f/0x50
 ? exc_page_fault+0x96/0x200
 entry_SYSCALL_64_after_hwframe+0x72/0xdc
 RIP: 0033:0x7f6aaa31aaff

After this change test is able to pass successfully.

Link: https://lkml.kernel.org/r/20230808124347.79163-1-ayush.jain3@amd.com
Fixes: 2c2241081f7d ("mm/gup: move private gup FOLL_ flags to internal.h")
Signed-off-by: Ayush Jain <ayush.jain3@amd.com>
Reviewed-by: Raghavendra K T <raghavendra.kt@amd.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/mm/hmm-tests.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
index 4adaad1b822f..20294553a5dd 100644
--- a/tools/testing/selftests/mm/hmm-tests.c
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -57,9 +57,14 @@ enum {
 
 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
 /* Just the flags we need, copied from mm.h: */
-#define FOLL_WRITE	0x01	/* check pte is writable */
-#define FOLL_LONGTERM   0x10000 /* mapping lifetime is indefinite */
 
+#ifndef FOLL_WRITE
+#define FOLL_WRITE	0x01	/* check pte is writable */
+#endif
+
+#ifndef FOLL_LONGTERM
+#define FOLL_LONGTERM   0x100 /* mapping lifetime is indefinite */
+#endif
 FIXTURE(hmm)
 {
 	int		fd;

From a50420c79731fc5cf27ad43719c1091e842a2606 Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alexghiti@rivosinc.com>
Date: Wed, 9 Aug 2023 18:46:33 +0200
Subject: [PATCH 1685/2047] mm: add a call to flush_cache_vmap() in vmap_pfn()

flush_cache_vmap() must be called after new vmalloc mappings are installed
in the page table in order to allow architectures to make sure the new
mapping is visible.

It could lead to a panic since on some architectures (like powerpc),
the page table walker could see the wrong pte value and trigger a
spurious page fault that can not be resolved (see commit f1cb8f9beba8
("powerpc/64s/radix: avoid ptesync after set_pte and
ptep_set_access_flags")).

But actually the patch is aiming at riscv: the riscv specification
allows the caching of invalid entries in the TLB, and since we recently
removed the vmalloc page fault handling, we now need to emit a tlb
shootdown whenever a new vmalloc mapping is emitted
(https://lore.kernel.org/linux-riscv/20230725132246.817726-1-alexghiti@rivosinc.com/).
That's a temporary solution, there are ways to avoid that :)

Link: https://lkml.kernel.org/r/20230809164633.1556126-1-alexghiti@rivosinc.com
Fixes: 3e9a9e256b1e ("mm: add a vmap_pfn function")
Reported-by: Dylan Jhong <dylan@andestech.com>
Closes: https://lore.kernel.org/linux-riscv/ZMytNY2J8iyjbPPy@atctrx.andestech.com/
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Palmer Dabbelt <palmer@rivosinc.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Reviewed-by: Dylan Jhong <dylan@andestech.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/vmalloc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 93cf99aba335..228a4a5312f2 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2979,6 +2979,10 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
 		free_vm_area(area);
 		return NULL;
 	}
+
+	flush_cache_vmap((unsigned long)area->addr,
+			 (unsigned long)area->addr + count * PAGE_SIZE);
+
 	return area->addr;
 }
 EXPORT_SYMBOL_GPL(vmap_pfn);

From d59070d1076ec5114edb67c87658aeb1d691d381 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 11 Aug 2023 15:10:13 +0200
Subject: [PATCH 1686/2047] radix tree: remove unused variable

Recent versions of clang warn about an unused variable, though older
versions saw the 'slot++' as a use and did not warn:

radix-tree.c:1136:50: error: parameter 'slot' set but not used [-Werror,-Wunused-but-set-parameter]

It's clearly not needed any more, so just remove it.

Link: https://lkml.kernel.org/r/20230811131023.2226509-1-arnd@kernel.org
Fixes: 3a08cd52c37c7 ("radix tree: Remove multiorder support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: Rong Tao <rongtao@cestc.cn>
Cc: Tom Rix <trix@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/radix-tree.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 1a31065b2036..976b9bd02a1b 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1136,7 +1136,6 @@ static void set_iter_tags(struct radix_tree_iter *iter,
 void __rcu **radix_tree_iter_resume(void __rcu **slot,
 					struct radix_tree_iter *iter)
 {
-	slot++;
 	iter->index = __radix_tree_iter_add(iter, 1);
 	iter->next_index = iter->index;
 	iter->tags = 0;

From e2c1ab070fdc81010ec44634838d24fce9ff9e53 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Tue, 27 Jun 2023 19:28:08 +0800
Subject: [PATCH 1687/2047] mm: memory-failure: fix unexpected return value in
 soft_offline_page()

When page_handle_poison() fails to handle the hugepage or free page in
retry path, soft_offline_page() will return 0 while -EBUSY is expected in
this case.

Consequently the user will think soft_offline_page succeeds while it in
fact failed.  So the user will not try again later in this case.

Link: https://lkml.kernel.org/r/20230627112808.1275241-1-linmiaohe@huawei.com
Fixes: b94e02822deb ("mm,hwpoison: try to narrow window race for free pages")
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memory-failure.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 139b31fdb678..fe121fdb05f7 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2741,10 +2741,13 @@ retry:
 	if (ret > 0) {
 		ret = soft_offline_in_use_page(page);
 	} else if (ret == 0) {
-		if (!page_handle_poison(page, true, false) && try_again) {
-			try_again = false;
-			flags &= ~MF_COUNT_INCREASED;
-			goto retry;
+		if (!page_handle_poison(page, true, false)) {
+			if (try_again) {
+				try_again = false;
+				flags &= ~MF_COUNT_INCREASED;
+				goto retry;
+			}
+			ret = -EBUSY;
 		}
 	}
 

From 6867c7a3320669cbe44b905a3eb35db725c6d470 Mon Sep 17 00:00:00 2001
From: "T.J. Mercier" <tjmercier@google.com>
Date: Mon, 14 Aug 2023 15:16:36 +0000
Subject: [PATCH 1688/2047] mm: multi-gen LRU: don't spin during memcg release

When a memcg is in the process of being released mem_cgroup_tryget will
fail because its reference count has already reached 0.  This can happen
during reclaim if the memcg has already been offlined, and we reclaim all
remaining pages attributed to the offlined memcg.  shrink_many attempts to
skip the empty memcg in this case, and continue reclaiming from the
remaining memcgs in the old generation.  If there is only one memcg
remaining, or if all remaining memcgs are in the process of being released
then shrink_many will spin until all memcgs have finished being released.
The release occurs through a workqueue, so it can take a while before
kswapd is able to make any further progress.

This fix results in reductions in kswapd activity and direct reclaim in
a test where 28 apps (working set size > total memory) are repeatedly
launched in a random sequence:

                                       A          B      delta   ratio(%)
           allocstall_movable       5962       3539      -2423     -40.64
            allocstall_normal       2661       2417       -244      -9.17
kswapd_high_wmark_hit_quickly      53152       7594     -45558     -85.71
                   pageoutrun      57365      11750     -45615     -79.52

Link: https://lkml.kernel.org/r/20230814151636.1639123-1-tjmercier@google.com
Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists")
Signed-off-by: T.J. Mercier <tjmercier@google.com>
Acked-by: Yu Zhao <yuzhao@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/vmscan.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3555927df9b5..2fe4a11d63f4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4854,16 +4854,17 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg)
 
 		spin_lock_irq(&pgdat->memcg_lru.lock);
 
-		VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
+		if (hlist_nulls_unhashed(&lruvec->lrugen.list))
+			goto unlock;
 
 		gen = lruvec->lrugen.gen;
 
-		hlist_nulls_del_rcu(&lruvec->lrugen.list);
+		hlist_nulls_del_init_rcu(&lruvec->lrugen.list);
 		pgdat->memcg_lru.nr_memcgs[gen]--;
 
 		if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
 			WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
-
+unlock:
 		spin_unlock_irq(&pgdat->memcg_lru.lock);
 	}
 }
@@ -5435,8 +5436,10 @@ restart:
 	rcu_read_lock();
 
 	hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) {
-		if (op)
+		if (op) {
 			lru_gen_rotate_memcg(lruvec, op);
+			op = 0;
+		}
 
 		mem_cgroup_put(memcg);
 
@@ -5444,7 +5447,7 @@ restart:
 		memcg = lruvec_memcg(lruvec);
 
 		if (!mem_cgroup_tryget(memcg)) {
-			op = 0;
+			lru_gen_release_memcg(memcg);
 			memcg = NULL;
 			continue;
 		}

From d0c89af3130eb4ff962266bb7597690a696f1cbc Mon Sep 17 00:00:00 2001
From: "Bao D. Nguyen" <quic_nguyenb@quicinc.com>
Date: Tue, 15 Aug 2023 18:38:29 -0700
Subject: [PATCH 1689/2047] scsi: ufs: mcq: Fix the search/wrap around logic

The search and wrap around logic in the ufshcd_mcq_sqe_search() function
does not work correctly when the hwq's queue depth is not a power of two
number. Correct it so that any queue depth with a positive integer value
within the supported range would work.

Signed-off-by: "Bao D. Nguyen" <quic_nguyenb@quicinc.com>
Link: https://lore.kernel.org/r/ff49c15be205135ed3ec186f3086694c02867dbd.1692149603.git.quic_nguyenb@quicinc.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Fixes: 8d7290348992 ("scsi: ufs: mcq: Add supporting functions for MCQ abort")
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufs-mcq.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 6fb0e007af63..386674ead7f0 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -580,7 +580,6 @@ static bool ufshcd_mcq_sqe_search(struct ufs_hba *hba,
 {
 	struct ufshcd_lrb *lrbp = &hba->lrb[task_tag];
 	struct utp_transfer_req_desc *utrd;
-	u32 mask = hwq->max_entries - 1;
 	__le64  cmd_desc_base_addr;
 	bool ret = false;
 	u64 addr, match;
@@ -608,7 +607,10 @@ static bool ufshcd_mcq_sqe_search(struct ufs_hba *hba,
 			ret = true;
 			goto out;
 		}
-		sq_head_slot = (sq_head_slot + 1) & mask;
+
+		sq_head_slot++;
+		if (sq_head_slot == hwq->max_entries)
+			sq_head_slot = 0;
 	}
 
 out:

From c422fbd5cb58c9a078172ae1e9750971b738a197 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Mon, 21 Aug 2023 14:11:21 +0200
Subject: [PATCH 1690/2047] scsi: ufs: ufs-qcom: Clear qunipro_g4_sel for HW
 major version > 5

The qunipro_g4_sel clear is also needed for new platforms with major
version > 5. Fix the version check to take this into account.

Fixes: 9c02aa24bf40 ("scsi: ufs: ufs-qcom: Clear qunipro_g4_sel for HW version major 5")
Acked-by: Manivannan Sadhasivam <mani@kernel.org>
Reviewed-by: Nitin Rawat <quic_nitirawa@quicinc.com>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20230821-topic-sm8x50-upstream-ufs-major-5-plus-v2-1-f42a4b712e58@linaro.org
Reviewed-by: "Bao D. Nguyen" <quic_nguyenb@quicinc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/host/ufs-qcom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index 8d6fd4c3324f..c1557d21b027 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -321,7 +321,7 @@ static void ufs_qcom_select_unipro_mode(struct ufs_qcom_host *host)
 		   ufs_qcom_cap_qunipro(host) ? QUNIPRO_SEL : 0,
 		   REG_UFS_CFG1);
 
-	if (host->hw_ver.major == 0x05)
+	if (host->hw_ver.major >= 0x05)
 		ufshcd_rmwl(host->hba, QUNIPRO_G4_SEL, 0, REG_UFS_CFG0);
 
 	/* make sure above configuration is applied before we return */

From e332938e6fc8117fb9bb1374339cea879b3972d9 Mon Sep 17 00:00:00 2001
From: "GONG, Ruiqi" <gongruiqi1@huawei.com>
Date: Sun, 20 Aug 2023 19:15:09 +0800
Subject: [PATCH 1691/2047] samples: ftrace: Replace bti assembly with hint for
 older compiler

When cross-building the arm64 kernel with allmodconfig using GCC 9.4,
the following error occurs on multiple files under samples/ftrace/:

/tmp/ccPC1ODs.s: Assembler messages:
/tmp/ccPC1ODs.s:8: Error: selected processor does not support `bti c'

Fix this issue by replacing `bti c` with `hint 34`, which is compatible
for the older compiler.

Link: https://lore.kernel.org/linux-trace-kernel/20230820111509.1470826-1-gongruiqi@huaweicloud.com

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Florent Revest <revest@chromium.org>
Fixes: 8c3526fb86060cb5 ("arm64: ftrace: Add direct call trampoline samples support")
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: GONG, Ruiqi <gongruiqi1@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 samples/ftrace/ftrace-direct-modify.c       | 4 ++--
 samples/ftrace/ftrace-direct-multi-modify.c | 4 ++--
 samples/ftrace/ftrace-direct-multi.c        | 2 +-
 samples/ftrace/ftrace-direct-too.c          | 2 +-
 samples/ftrace/ftrace-direct.c              | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c
index e5ed08098ff3..e2a6a69352df 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -105,7 +105,7 @@ asm (
 "	.type		my_tramp1, @function\n"
 "	.globl		my_tramp1\n"
 "   my_tramp1:"
-"	bti	c\n"
+"	hint	34\n" // bti	c
 "	sub	sp, sp, #16\n"
 "	stp	x9, x30, [sp]\n"
 "	bl	my_direct_func1\n"
@@ -117,7 +117,7 @@ asm (
 "	.type		my_tramp2, @function\n"
 "	.globl		my_tramp2\n"
 "   my_tramp2:"
-"	bti	c\n"
+"	hint	34\n" // bti	c
 "	sub	sp, sp, #16\n"
 "	stp	x9, x30, [sp]\n"
 "	bl	my_direct_func2\n"
diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c
index 292cff2b3f5d..2e349834d63c 100644
--- a/samples/ftrace/ftrace-direct-multi-modify.c
+++ b/samples/ftrace/ftrace-direct-multi-modify.c
@@ -112,7 +112,7 @@ asm (
 "	.type		my_tramp1, @function\n"
 "	.globl		my_tramp1\n"
 "   my_tramp1:"
-"	bti	c\n"
+"	hint	34\n" // bti	c
 "	sub	sp, sp, #32\n"
 "	stp	x9, x30, [sp]\n"
 "	str	x0, [sp, #16]\n"
@@ -127,7 +127,7 @@ asm (
 "	.type		my_tramp2, @function\n"
 "	.globl		my_tramp2\n"
 "   my_tramp2:"
-"	bti	c\n"
+"	hint	34\n" // bti	c
 "	sub	sp, sp, #32\n"
 "	stp	x9, x30, [sp]\n"
 "	str	x0, [sp, #16]\n"
diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c
index b4391e08c913..9243dbfe4d0c 100644
--- a/samples/ftrace/ftrace-direct-multi.c
+++ b/samples/ftrace/ftrace-direct-multi.c
@@ -75,7 +75,7 @@ asm (
 "	.type		my_tramp, @function\n"
 "	.globl		my_tramp\n"
 "   my_tramp:"
-"	bti	c\n"
+"	hint	34\n" // bti	c
 "	sub	sp, sp, #32\n"
 "	stp	x9, x30, [sp]\n"
 "	str	x0, [sp, #16]\n"
diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c
index e9804c5307c0..e39c3563ae4e 100644
--- a/samples/ftrace/ftrace-direct-too.c
+++ b/samples/ftrace/ftrace-direct-too.c
@@ -81,7 +81,7 @@ asm (
 "	.type		my_tramp, @function\n"
 "	.globl		my_tramp\n"
 "   my_tramp:"
-"	bti	c\n"
+"	hint	34\n" // bti	c
 "	sub	sp, sp, #48\n"
 "	stp	x9, x30, [sp]\n"
 "	stp	x0, x1, [sp, #16]\n"
diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c
index 20f4a7caa810..32c477da1e9a 100644
--- a/samples/ftrace/ftrace-direct.c
+++ b/samples/ftrace/ftrace-direct.c
@@ -72,7 +72,7 @@ asm (
 "	.type		my_tramp, @function\n"
 "	.globl		my_tramp\n"
 "   my_tramp:"
-"	bti	c\n"
+"	hint	34\n" // bti	c
 "	sub	sp, sp, #32\n"
 "	stp	x9, x30, [sp]\n"
 "	str	x0, [sp, #16]\n"

From 442466e04f5f1b4616d3f023ff19166b82e19989 Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Wed, 7 Jun 2023 14:36:24 +0200
Subject: [PATCH 1692/2047] xen/xenbus: Avoid a lockdep warning when adding a
 watch

The following lockdep warning appears during boot on a Xen dom0 system:

[   96.388794] ======================================================
[   96.388797] WARNING: possible circular locking dependency detected
[   96.388799] 6.4.0-rc5-default+ #8 Tainted: G            EL
[   96.388803] ------------------------------------------------------
[   96.388804] xenconsoled/1330 is trying to acquire lock:
[   96.388808] ffffffff82acdd10 (xs_watch_rwsem){++++}-{3:3}, at: register_xenbus_watch+0x45/0x140
[   96.388847]
               but task is already holding lock:
[   96.388849] ffff888100c92068 (&u->msgbuffer_mutex){+.+.}-{3:3}, at: xenbus_file_write+0x2c/0x600
[   96.388862]
               which lock already depends on the new lock.

[   96.388864]
               the existing dependency chain (in reverse order) is:
[   96.388866]
               -> #2 (&u->msgbuffer_mutex){+.+.}-{3:3}:
[   96.388874]        __mutex_lock+0x85/0xb30
[   96.388885]        xenbus_dev_queue_reply+0x48/0x2b0
[   96.388890]        xenbus_thread+0x1d7/0x950
[   96.388897]        kthread+0xe7/0x120
[   96.388905]        ret_from_fork+0x2c/0x50
[   96.388914]
               -> #1 (xs_response_mutex){+.+.}-{3:3}:
[   96.388923]        __mutex_lock+0x85/0xb30
[   96.388930]        xenbus_backend_ioctl+0x56/0x1c0
[   96.388935]        __x64_sys_ioctl+0x90/0xd0
[   96.388942]        do_syscall_64+0x5c/0x90
[   96.388950]        entry_SYSCALL_64_after_hwframe+0x72/0xdc
[   96.388957]
               -> #0 (xs_watch_rwsem){++++}-{3:3}:
[   96.388965]        __lock_acquire+0x1538/0x2260
[   96.388972]        lock_acquire+0xc6/0x2b0
[   96.388976]        down_read+0x2d/0x160
[   96.388983]        register_xenbus_watch+0x45/0x140
[   96.388990]        xenbus_file_write+0x53d/0x600
[   96.388994]        vfs_write+0xe4/0x490
[   96.389003]        ksys_write+0xb8/0xf0
[   96.389011]        do_syscall_64+0x5c/0x90
[   96.389017]        entry_SYSCALL_64_after_hwframe+0x72/0xdc
[   96.389023]
               other info that might help us debug this:

[   96.389025] Chain exists of:
                 xs_watch_rwsem --> xs_response_mutex --> &u->msgbuffer_mutex

[   96.413429]  Possible unsafe locking scenario:

[   96.413430]        CPU0                    CPU1
[   96.413430]        ----                    ----
[   96.413431]   lock(&u->msgbuffer_mutex);
[   96.413432]                                lock(xs_response_mutex);
[   96.413433]                                lock(&u->msgbuffer_mutex);
[   96.413434]   rlock(xs_watch_rwsem);
[   96.413436]
                *** DEADLOCK ***

[   96.413436] 1 lock held by xenconsoled/1330:
[   96.413438]  #0: ffff888100c92068 (&u->msgbuffer_mutex){+.+.}-{3:3}, at: xenbus_file_write+0x2c/0x600
[   96.413446]

An ioctl call IOCTL_XENBUS_BACKEND_SETUP (record #1 in the report)
results in calling xenbus_alloc() -> xs_suspend() which introduces
ordering xs_watch_rwsem --> xs_response_mutex. The xenbus_thread()
operation (record #2) creates xs_response_mutex --> &u->msgbuffer_mutex.
An XS_WATCH write to the xenbus file then results in a complain about
the opposite lock order &u->msgbuffer_mutex --> xs_watch_rwsem.

The dependency xs_watch_rwsem --> xs_response_mutex is spurious. Avoid
it and the warning by changing the ordering in xs_suspend(), first
acquire xs_response_mutex and then xs_watch_rwsem. Reverse also the
unlocking order in xs_suspend_cancel() for consistency, but keep
xs_resume() as is because it needs to have xs_watch_rwsem unlocked only
after exiting xs suspend and re-adding all watches.

Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20230607123624.15739-1-petr.pavlu@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xenbus/xenbus_xs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 12e02eb01f59..028a182bcc9e 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -840,8 +840,8 @@ void xs_suspend(void)
 {
 	xs_suspend_enter();
 
-	down_write(&xs_watch_rwsem);
 	mutex_lock(&xs_response_mutex);
+	down_write(&xs_watch_rwsem);
 }
 
 void xs_resume(void)
@@ -866,8 +866,8 @@ void xs_resume(void)
 
 void xs_suspend_cancel(void)
 {
-	mutex_unlock(&xs_response_mutex);
 	up_write(&xs_watch_rwsem);
+	mutex_unlock(&xs_response_mutex);
 
 	xs_suspend_exit();
 }

From 9ce4ed5b4db13633f9934a4eac02c4cde04d5c63 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 21 Aug 2023 16:18:27 +0200
Subject: [PATCH 1693/2047] gpiolib: provide and use gpiod_line_state_notify()

Wrap the calls to blocking_notifier_call_chain() for the line state
notifier with a helper that allows us to use fewer lines of code and
simpler syntax.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpiolib-cdev.c | 17 +++++------------
 drivers/gpio/gpiolib.c      | 12 ++++++++----
 drivers/gpio/gpiolib.h      |  1 +
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 35dcaf78aed6..e39d344feb28 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -230,9 +230,7 @@ static long linehandle_set_config(struct linehandle_state *lh,
 				return ret;
 		}
 
-		blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
-					     GPIO_V2_LINE_CHANGED_CONFIG,
-					     desc);
+		gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_CONFIG);
 	}
 	return 0;
 }
@@ -414,8 +412,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
 				goto out_free_lh;
 		}
 
-		blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
-					     GPIO_V2_LINE_CHANGED_REQUESTED, desc);
+		gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_REQUESTED);
 
 		dev_dbg(&gdev->dev, "registered chardev handle for line %d\n",
 			offset);
@@ -1420,9 +1417,7 @@ static long linereq_set_config_unlocked(struct linereq *lr,
 
 		WRITE_ONCE(line->edflags, edflags);
 
-		blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
-					     GPIO_V2_LINE_CHANGED_CONFIG,
-					     desc);
+		gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_CONFIG);
 	}
 	return 0;
 }
@@ -1737,8 +1732,7 @@ static int linereq_create(struct gpio_device *gdev, void __user *ip)
 
 		lr->lines[i].edflags = edflags;
 
-		blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
-					     GPIO_V2_LINE_CHANGED_REQUESTED, desc);
+		gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_REQUESTED);
 
 		dev_dbg(&gdev->dev, "registered chardev handle for line %d\n",
 			offset);
@@ -2156,8 +2150,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
 	if (ret)
 		goto out_free_le;
 
-	blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
-				     GPIO_V2_LINE_CHANGED_REQUESTED, desc);
+	gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_REQUESTED);
 
 	irq = gpiod_to_irq(desc);
 	if (irq <= 0) {
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 8f74e0bb2a5c..810bc09467a9 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2178,8 +2178,7 @@ static bool gpiod_free_commit(struct gpio_desc *desc)
 	}
 
 	spin_unlock_irqrestore(&gpio_lock, flags);
-	blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
-				     GPIOLINE_CHANGED_RELEASED, desc);
+	gpiod_line_state_notify(desc, GPIOLINE_CHANGED_RELEASED);
 
 	return ret;
 }
@@ -3741,6 +3740,12 @@ int gpiod_set_array_value_cansleep(unsigned int array_size,
 }
 EXPORT_SYMBOL_GPL(gpiod_set_array_value_cansleep);
 
+void gpiod_line_state_notify(struct gpio_desc *desc, unsigned long action)
+{
+	blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
+				     action, desc);
+}
+
 /**
  * gpiod_add_lookup_table() - register GPIO device consumers
  * @table: table of consumers to register
@@ -4008,8 +4013,7 @@ static struct gpio_desc *gpiod_find_and_request(struct device *consumer,
 		return ERR_PTR(ret);
 	}
 
-	blocking_notifier_call_chain(&desc->gdev->line_state_notifier,
-				     GPIOLINE_CHANGED_REQUESTED, desc);
+	gpiod_line_state_notify(desc, GPIOLINE_CHANGED_REQUESTED);
 
 	return desc;
 }
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index e860ed434bc2..a0a67569300b 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -147,6 +147,7 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep,
 extern spinlock_t gpio_lock;
 extern struct list_head gpio_devices;
 
+void gpiod_line_state_notify(struct gpio_desc *desc, unsigned long action);
 
 /**
  * struct gpio_desc - Opaque descriptor for a GPIO

From 5ad1ab30ac0809d2963ddcf39ac34317a24a2f17 Mon Sep 17 00:00:00 2001
From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Date: Fri, 18 Aug 2023 10:14:36 +0530
Subject: [PATCH 1694/2047] drm/display/dp: Fix the DP DSC Receiver cap size

DP DSC Receiver Capabilities are exposed via DPCD 60h-6Fh.
Fix the DSC RECEIVER CAP SIZE accordingly.

Fixes: ffddc4363c28 ("drm/dp: Add DP DSC DPCD receiver capability size define and missing SHIFT")
Cc: Anusha Srivatsa <anusha.srivatsa@intel.com>
Cc: Manasi Navare <manasi.d.navare@intel.com>
Cc: <stable@vger.kernel.org> # v5.0+

Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230818044436.177806-1-ankit.k.nautiyal@intel.com
---
 include/drm/display/drm_dp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h
index 02f2ac4dd2df..e69cece404b3 100644
--- a/include/drm/display/drm_dp.h
+++ b/include/drm/display/drm_dp.h
@@ -1537,7 +1537,7 @@ enum drm_dp_phy {
 
 #define DP_BRANCH_OUI_HEADER_SIZE	0xc
 #define DP_RECEIVER_CAP_SIZE		0xf
-#define DP_DSC_RECEIVER_CAP_SIZE        0xf
+#define DP_DSC_RECEIVER_CAP_SIZE        0x10 /* DSC Capabilities 0x60 through 0x6F */
 #define EDP_PSR_RECEIVER_CAP_SIZE	2
 #define EDP_DISPLAY_CTL_CAP_SIZE	3
 #define DP_LTTPR_COMMON_CAP_SIZE	8

From e38abdab441c0dcfb8f28dd0daf1dfa44b494492 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sun, 2 Jul 2023 18:40:39 +0200
Subject: [PATCH 1695/2047] efi/runtime-wrappers: Remove duplicated macro for
 service returning void

__efi_call_virt() exists as an alternative for efi_call_virt() for the
sole reason that ResetSystem() returns void, and so we cannot use a call
to it in the RHS of an assignment.

Given that there is only a single user, let's drop the macro, and expand
it into the caller. That way, the remaining macro can be tightened
somewhat in terms of type safety too.

Note that the use of typeof() on the runtime service invocation does not
result in an actual call being made, but it does require a few pointer
types to be fixed up and converted into the proper function pointer
prototypes.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/include/asm/uv/bios.h          |  4 +++-
 drivers/acpi/prmt.c                     |  2 +-
 drivers/firmware/efi/runtime-wrappers.c |  9 ++++++---
 include/linux/efi.h                     | 23 ++++-------------------
 4 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 1b6455f881f9..6989b824fd32 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -10,6 +10,7 @@
  * Copyright (c) Russ Anderson <rja@sgi.com>
  */
 
+#include <linux/efi.h>
 #include <linux/rtc.h>
 
 /*
@@ -115,7 +116,8 @@ struct uv_arch_type_entry {
 struct uv_systab {
 	char signature[4];	/* must be UV_SYSTAB_SIG */
 	u32 revision;		/* distinguish different firmware revs */
-	u64 function;		/* BIOS runtime callback function ptr */
+	u64 (__efiapi *function)(enum uv_bios_cmd, ...);
+				/* BIOS runtime callback function ptr */
 	u32 size;		/* systab size (starting with _VERSION_UV4) */
 	struct {
 		u32 type:8;	/* type of entry */
diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c
index 3d4c4620f9f9..71b9adaaf33b 100644
--- a/drivers/acpi/prmt.c
+++ b/drivers/acpi/prmt.c
@@ -53,7 +53,7 @@ static LIST_HEAD(prm_module_list);
 
 struct prm_handler_info {
 	guid_t guid;
-	void *handler_addr;
+	efi_status_t (__efiapi *handler_addr)(u64, void *);
 	u64 static_data_buffer_addr;
 	u64 acpi_param_buffer_addr;
 
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index ee5c9a3e5060..c5e0c73cc000 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -41,8 +41,6 @@
  */
 #define efi_call_virt(f, args...)   \
 	efi_call_virt_pointer(efi.runtime, f, args)
-#define __efi_call_virt(f, args...) \
-	__efi_call_virt_pointer(efi.runtime, f, args)
 
 union efi_rts_args {
 	struct {
@@ -491,8 +489,13 @@ static void virt_efi_reset_system(int reset_type,
 			"could not get exclusive access to the firmware\n");
 		return;
 	}
+
+	arch_efi_call_virt_setup();
 	efi_rts_work.efi_rts_id = EFI_RESET_SYSTEM;
-	__efi_call_virt(reset_system, reset_type, status, data_size, data);
+	arch_efi_call_virt(efi.runtime, reset_system, reset_type, status,
+			   data_size, data);
+	arch_efi_call_virt_teardown();
+
 	up(&efi_runtime_lock);
 }
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 9d09dba116ce..28ac4e4a725d 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1170,8 +1170,7 @@ static inline void efi_check_for_embedded_firmwares(void) { }
 #define arch_efi_call_virt(p, f, args...)	((p)->f(args))
 
 /*
- * Arch code can implement the following three template macros, avoiding
- * reptition for the void/non-void return cases of {__,}efi_call_virt():
+ * Arch code must implement the following three routines:
  *
  *  * arch_efi_call_virt_setup()
  *
@@ -1180,9 +1179,8 @@ static inline void efi_check_for_embedded_firmwares(void) { }
  *
  *  * arch_efi_call_virt()
  *
- *    Performs the call. The last expression in the macro must be the call
- *    itself, allowing the logic to be shared by the void and non-void
- *    cases.
+ *    Performs the call. This routine takes a variable number of arguments so
+ *    it must be implemented as a variadic preprocessor macro.
  *
  *  * arch_efi_call_virt_teardown()
  *
@@ -1191,7 +1189,7 @@ static inline void efi_check_for_embedded_firmwares(void) { }
 
 #define efi_call_virt_pointer(p, f, args...)				\
 ({									\
-	efi_status_t __s;						\
+	typeof((p)->f(args)) __s;					\
 	unsigned long __flags;						\
 									\
 	arch_efi_call_virt_setup();					\
@@ -1205,19 +1203,6 @@ static inline void efi_check_for_embedded_firmwares(void) { }
 	__s;								\
 })
 
-#define __efi_call_virt_pointer(p, f, args...)				\
-({									\
-	unsigned long __flags;						\
-									\
-	arch_efi_call_virt_setup();					\
-									\
-	__flags = efi_call_virt_save_flags();				\
-	arch_efi_call_virt(p, f, args);					\
-	efi_call_virt_check_flags(__flags, __stringify(f));		\
-									\
-	arch_efi_call_virt_teardown();					\
-})
-
 #define EFI_RANDOM_SEED_SIZE		32U // BLAKE2S_HASH_SIZE
 
 struct linux_efi_random_seed {

From 3c17ae4161093fa2263b70064e34a033dc16fef5 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 8 Aug 2023 12:47:51 +0200
Subject: [PATCH 1696/2047] efi/runtime-wrappers: Don't duplicate
 setup/teardown code

Avoid duplicating the EFI arch setup and teardown routine calls numerous
times in efi_call_rts(). Instead, expand the efi_call_virt_pointer()
macro into efi_call_rts(), taking the pre and post parts out of the
switch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/runtime-wrappers.c | 26 +++++++++++++++++--------
 include/linux/efi.h                     |  6 ++++--
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index c5e0c73cc000..afe9248cc5bc 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -40,7 +40,7 @@
  * code doesn't get too cluttered:
  */
 #define efi_call_virt(f, args...)   \
-	efi_call_virt_pointer(efi.runtime, f, args)
+	arch_efi_call_virt(efi.runtime, f, args)
 
 union efi_rts_args {
 	struct {
@@ -139,7 +139,7 @@ unsigned long efi_call_virt_save_flags(void)
 	return flags;
 }
 
-void efi_call_virt_check_flags(unsigned long flags, const char *call)
+void efi_call_virt_check_flags(unsigned long flags, const void *caller)
 {
 	unsigned long cur_flags, mismatch;
 
@@ -150,8 +150,8 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call)
 		return;
 
 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_NOW_UNRELIABLE);
-	pr_err_ratelimited(FW_BUG "IRQ flags corrupted (0x%08lx=>0x%08lx) by EFI %s\n",
-			   flags, cur_flags, call);
+	pr_err_ratelimited(FW_BUG "IRQ flags corrupted (0x%08lx=>0x%08lx) by EFI call from %pS\n",
+			   flags, cur_flags, caller ?: __builtin_return_address(0));
 	arch_efi_restore_flags(flags);
 }
 
@@ -211,6 +211,10 @@ static void efi_call_rts(struct work_struct *work)
 {
 	const union efi_rts_args *args = efi_rts_work.args;
 	efi_status_t status = EFI_NOT_FOUND;
+	unsigned long flags;
+
+	arch_efi_call_virt_setup();
+	flags = efi_call_virt_save_flags();
 
 	switch (efi_rts_work.efi_rts_id) {
 	case EFI_GET_TIME:
@@ -287,6 +291,10 @@ static void efi_call_rts(struct work_struct *work)
 		 */
 		pr_err("Requested executing invalid EFI Runtime Service.\n");
 	}
+
+	efi_call_virt_check_flags(flags, efi_rts_work.caller);
+	arch_efi_call_virt_teardown();
+
 	efi_rts_work.status = status;
 	complete(&efi_rts_work.efi_rts_comp);
 }
@@ -296,6 +304,7 @@ static efi_status_t __efi_queue_work(enum efi_rts_ids id,
 {
 	efi_rts_work.efi_rts_id = id;
 	efi_rts_work.args = args;
+	efi_rts_work.caller = __builtin_return_address(0);
 	efi_rts_work.status = EFI_ABORTED;
 
 	if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
@@ -423,8 +432,8 @@ virt_efi_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
 	if (down_trylock(&efi_runtime_lock))
 		return EFI_NOT_READY;
 
-	status = efi_call_virt(set_variable, name, vendor, attr, data_size,
-			       data);
+	status = efi_call_virt_pointer(efi.runtime, set_variable, name, vendor,
+				       attr, data_size, data);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -462,8 +471,9 @@ virt_efi_query_variable_info_nonblocking(u32 attr,
 	if (down_trylock(&efi_runtime_lock))
 		return EFI_NOT_READY;
 
-	status = efi_call_virt(query_variable_info, attr, storage_space,
-			       remaining_space, max_variable_size);
+	status = efi_call_virt_pointer(efi.runtime, query_variable_info, attr,
+				       storage_space, remaining_space,
+				       max_variable_size);
 	up(&efi_runtime_lock);
 	return status;
 }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 28ac4e4a725d..e0b346b014df 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1129,7 +1129,7 @@ extern bool efi_runtime_disabled(void);
 static inline bool efi_runtime_disabled(void) { return true; }
 #endif
 
-extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
+extern void efi_call_virt_check_flags(unsigned long flags, const void *caller);
 extern unsigned long efi_call_virt_save_flags(void);
 
 enum efi_secureboot_mode {
@@ -1196,7 +1196,7 @@ static inline void efi_check_for_embedded_firmwares(void) { }
 									\
 	__flags = efi_call_virt_save_flags();				\
 	__s = arch_efi_call_virt(p, f, args);				\
-	efi_call_virt_check_flags(__flags, __stringify(f));		\
+	efi_call_virt_check_flags(__flags, NULL);			\
 									\
 	arch_efi_call_virt_teardown();					\
 									\
@@ -1257,6 +1257,7 @@ union efi_rts_args;
  * @status:		Status of executing EFI Runtime Service
  * @efi_rts_id:		EFI Runtime Service function identifier
  * @efi_rts_comp:	Struct used for handling completions
+ * @caller:		The caller of the runtime service
  */
 struct efi_runtime_work {
 	union efi_rts_args	*args;
@@ -1264,6 +1265,7 @@ struct efi_runtime_work {
 	struct work_struct	work;
 	enum efi_rts_ids	efi_rts_id;
 	struct completion	efi_rts_comp;
+	const void		*caller;
 };
 
 extern struct efi_runtime_work efi_rts_work;

From 5894cf571e14fb393a4d0a82538de032127b9d8b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sun, 2 Jul 2023 11:57:34 +0200
Subject: [PATCH 1697/2047] acpi/prmt: Use EFI runtime sandbox to invoke PRM
 handlers

Instead of bypassing the kernel's adaptation layer for performing EFI
runtime calls, wire up ACPI PRM handling into it. This means these calls
can no longer occur concurrently with EFI runtime calls, and will be
made from the EFI runtime workqueue. It also means any page faults
occurring during PRM handling will be identified correctly as
originating in firmware code.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/acpi/Kconfig                    |  2 +-
 drivers/acpi/prmt.c                     |  6 ++---
 drivers/firmware/efi/runtime-wrappers.c | 31 +++++++++++++++++++++++++
 include/linux/efi.h                     |  5 ++++
 4 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 00dd309b6682..cee82b473dc5 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -581,7 +581,7 @@ config ACPI_VIOT
 
 config ACPI_PRMT
 	bool "Platform Runtime Mechanism Support"
-	depends on EFI && (X86_64 || ARM64)
+	depends on EFI_RUNTIME_WRAPPERS && (X86_64 || ARM64)
 	default y
 	help
 	  Platform Runtime Mechanism (PRM) is a firmware interface exposing a
diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c
index 71b9adaaf33b..7020584096bf 100644
--- a/drivers/acpi/prmt.c
+++ b/drivers/acpi/prmt.c
@@ -260,9 +260,9 @@ static acpi_status acpi_platformrt_space_handler(u32 function,
 		context.static_data_buffer = handler->static_data_buffer_addr;
 		context.mmio_ranges = module->mmio_info;
 
-		status = efi_call_virt_pointer(handler, handler_addr,
-					       handler->acpi_param_buffer_addr,
-					       &context);
+		status = efi_call_acpi_prm_handler(handler->handler_addr,
+						   handler->acpi_param_buffer_addr,
+						   &context);
 		if (status == EFI_SUCCESS) {
 			buffer->prm_status = PRM_HANDLER_SUCCESS;
 		} else {
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index afe9248cc5bc..e89484af9740 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -108,6 +108,12 @@ union efi_rts_args {
 		u64		*max_size;
 		int		*reset_type;
 	} QUERY_CAPSULE_CAPS;
+
+	struct {
+		efi_status_t	(__efiapi *acpi_prm_handler)(u64, void *);
+		u64		param_buffer_addr;
+		void		*context;
+	} ACPI_PRM_HANDLER;
 };
 
 struct efi_runtime_work efi_rts_work;
@@ -283,6 +289,13 @@ static void efi_call_rts(struct work_struct *work)
 				       args->QUERY_CAPSULE_CAPS.max_size,
 				       args->QUERY_CAPSULE_CAPS.reset_type);
 		break;
+	case EFI_ACPI_PRM_HANDLER:
+#ifdef CONFIG_ACPI_PRMT
+		status = arch_efi_call_virt(args, ACPI_PRM_HANDLER.acpi_prm_handler,
+					    args->ACPI_PRM_HANDLER.param_buffer_addr,
+					    args->ACPI_PRM_HANDLER.context);
+		break;
+#endif
 	default:
 		/*
 		 * Ideally, we should never reach here because a caller of this
@@ -560,3 +573,21 @@ void efi_native_runtime_setup(void)
 	efi.update_capsule = virt_efi_update_capsule;
 	efi.query_capsule_caps = virt_efi_query_capsule_caps;
 }
+
+#ifdef CONFIG_ACPI_PRMT
+
+efi_status_t
+efi_call_acpi_prm_handler(efi_status_t (__efiapi *handler_addr)(u64, void *),
+			  u64 param_buffer_addr, void *context)
+{
+	efi_status_t status;
+
+	if (down_interruptible(&efi_runtime_lock))
+		return EFI_ABORTED;
+	status = efi_queue_work(ACPI_PRM_HANDLER, handler_addr,
+				param_buffer_addr, context);
+	up(&efi_runtime_lock);
+	return status;
+}
+
+#endif
diff --git a/include/linux/efi.h b/include/linux/efi.h
index e0b346b014df..5a1e39df8b26 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1228,6 +1228,10 @@ extern int efi_tpm_final_log_size;
 
 extern unsigned long rci2_table_phys;
 
+efi_status_t
+efi_call_acpi_prm_handler(efi_status_t (__efiapi *handler_addr)(u64, void *),
+			  u64 param_buffer_addr, void *context);
+
 /*
  * efi_runtime_service() function identifiers.
  * "NONE" is used by efi_recover_from_page_fault() to check if the page
@@ -1247,6 +1251,7 @@ enum efi_rts_ids {
 	EFI_RESET_SYSTEM,
 	EFI_UPDATE_CAPSULE,
 	EFI_QUERY_CAPSULE_CAPS,
+	EFI_ACPI_PRM_HANDLER,
 };
 
 union efi_rts_args;

From a14198dfe990d58474cea64dab8b95b810a68b70 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 8 Aug 2023 14:49:12 +0200
Subject: [PATCH 1698/2047] efi/runtime-wrappers: Clean up white space and add
 __init annotation

Some cosmetic changes as well as a missing __init annotation.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/runtime-wrappers.c | 41 ++++++++++++-------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index e89484af9740..5d56bc40a79d 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -436,9 +436,8 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
 }
 
 static efi_status_t
-virt_efi_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
-				  u32 attr, unsigned long data_size,
-				  void *data)
+virt_efi_set_variable_nb(efi_char16_t *name, efi_guid_t *vendor, u32 attr,
+			 unsigned long data_size, void *data)
 {
 	efi_status_t status;
 
@@ -471,10 +470,8 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,
 }
 
 static efi_status_t
-virt_efi_query_variable_info_nonblocking(u32 attr,
-					 u64 *storage_space,
-					 u64 *remaining_space,
-					 u64 *max_variable_size)
+virt_efi_query_variable_info_nb(u32 attr, u64 *storage_space,
+				u64 *remaining_space, u64 *max_variable_size)
 {
 	efi_status_t status;
 
@@ -556,22 +553,22 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
 	return status;
 }
 
-void efi_native_runtime_setup(void)
+void __init efi_native_runtime_setup(void)
 {
-	efi.get_time = virt_efi_get_time;
-	efi.set_time = virt_efi_set_time;
-	efi.get_wakeup_time = virt_efi_get_wakeup_time;
-	efi.set_wakeup_time = virt_efi_set_wakeup_time;
-	efi.get_variable = virt_efi_get_variable;
-	efi.get_next_variable = virt_efi_get_next_variable;
-	efi.set_variable = virt_efi_set_variable;
-	efi.set_variable_nonblocking = virt_efi_set_variable_nonblocking;
-	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
-	efi.reset_system = virt_efi_reset_system;
-	efi.query_variable_info = virt_efi_query_variable_info;
-	efi.query_variable_info_nonblocking = virt_efi_query_variable_info_nonblocking;
-	efi.update_capsule = virt_efi_update_capsule;
-	efi.query_capsule_caps = virt_efi_query_capsule_caps;
+	efi.get_time			    = virt_efi_get_time;
+	efi.set_time			    = virt_efi_set_time;
+	efi.get_wakeup_time		    = virt_efi_get_wakeup_time;
+	efi.set_wakeup_time		    = virt_efi_set_wakeup_time;
+	efi.get_variable		    = virt_efi_get_variable;
+	efi.get_next_variable		    = virt_efi_get_next_variable;
+	efi.set_variable		    = virt_efi_set_variable;
+	efi.set_variable_nonblocking	    = virt_efi_set_variable_nb;
+	efi.get_next_high_mono_count	    = virt_efi_get_next_high_mono_count;
+	efi.reset_system 		    = virt_efi_reset_system;
+	efi.query_variable_info		    = virt_efi_query_variable_info;
+	efi.query_variable_info_nonblocking = virt_efi_query_variable_info_nb;
+	efi.update_capsule		    = virt_efi_update_capsule;
+	efi.query_capsule_caps		    = virt_efi_query_capsule_caps;
 }
 
 #ifdef CONFIG_ACPI_PRMT

From 572a3d1e5d3a3e335b92e2c28a63c0b27944480c Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Mon, 21 Aug 2023 10:39:20 -0700
Subject: [PATCH 1699/2047] tmpfs,xattr: GFP_KERNEL_ACCOUNT for simple xattrs

It is particularly important for the userns mount case (when a sensible
nr_inodes maximum may not be enforced) that tmpfs user xattrs be subject
to memory cgroup limiting.  Leave temporary buffer allocations as is,
but change the persistent simple xattr allocations from GFP_KERNEL to
GFP_KERNEL_ACCOUNT.  This limits kernfs's cgroupfs too, but that's good.

(I had intended to send this change earlier, but had been confused by
shmem_alloc_inode() using GFP_KERNEL, and thought a discussion would be
needed to change that too: no, I was forgetting the SLAB_ACCOUNT on that
kmem_cache, which implicitly adds __GFP_ACCOUNT to all its allocations.)

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Message-Id: <f6953e5a-4183-8314-38f2-40be60998615@google.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xattr.c | 4 ++--
 mm/shmem.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/xattr.c b/fs/xattr.c
index 2d607542281b..efd4736bc94b 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1093,7 +1093,7 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
 	if (len < sizeof(*new_xattr))
 		return NULL;
 
-	new_xattr = kvmalloc(len, GFP_KERNEL);
+	new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT);
 	if (!new_xattr)
 		return NULL;
 
@@ -1217,7 +1217,7 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
 		if (!new_xattr)
 			return ERR_PTR(-ENOMEM);
 
-		new_xattr->name = kstrdup(name, GFP_KERNEL);
+		new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
 		if (!new_xattr->name) {
 			simple_xattr_free(new_xattr);
 			return ERR_PTR(-ENOMEM);
diff --git a/mm/shmem.c b/mm/shmem.c
index b782edeb69aa..11298c797cdc 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3616,7 +3616,7 @@ static int shmem_initxattrs(struct inode *inode,
 
 		len = strlen(xattr->name) + 1;
 		new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
-					  GFP_KERNEL);
+					  GFP_KERNEL_ACCOUNT);
 		if (!new_xattr->name) {
 			kvfree(new_xattr);
 			break;

From bb5ad5ef7493ed21abc2c8b890204c8f79df86a8 Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@nxp.com>
Date: Sat, 6 May 2023 16:59:27 +0800
Subject: [PATCH 1700/2047] gpio: vf610: switch to dynamic allocat GPIO base

gpiolib want to get completely rid of static gpiobase allocation,
so switch to dynamic allocat GPIO base, also can avoid warning
message:

[    1.529974] gpio gpiochip0: Static allocation of GPIO base
is deprecated, use dynamic allocation.

Signed-off-by: Haibo Chen <haibo.chen@nxp.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-vf610.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c
index 54e7c51f48c8..dbc7ba0ee72c 100644
--- a/drivers/gpio/gpio-vf610.c
+++ b/drivers/gpio/gpio-vf610.c
@@ -258,7 +258,6 @@ static void vf610_gpio_disable_clk(void *data)
 static int vf610_gpio_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *np = dev->of_node;
 	struct vf610_gpio_port *port;
 	struct gpio_chip *gc;
 	struct gpio_irq_chip *girq;
@@ -318,7 +317,7 @@ static int vf610_gpio_probe(struct platform_device *pdev)
 	gc->parent = dev;
 	gc->label = dev_name(dev);
 	gc->ngpio = VF610_GPIO_PER_PORT;
-	gc->base = of_alias_get_id(np, "gpio") * VF610_GPIO_PER_PORT;
+	gc->base = -1;
 
 	gc->request = gpiochip_generic_request;
 	gc->free = gpiochip_generic_free;

From db3b16dcc7e4f5bfbe35806a54fb15dc858d19f8 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Sat, 19 Aug 2023 19:04:43 +0100
Subject: [PATCH 1701/2047] gpio: pcf857x: Extend match data support for OF
 tables

The driver has OF match table, but still it uses an ID lookup table for
retrieving match data. Currently, the driver is working on the
assumption that an I2C device registered via OF will always match a
legacy I2C device ID. Extend match data support for OF tables by using
i2c_get_match_data() instead of the ID lookup for both OF/ID matches by
making similar OF/ID tables.

Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-pcf857x.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c
index c4c785548408..53b69abe6787 100644
--- a/drivers/gpio/gpio-pcf857x.c
+++ b/drivers/gpio/gpio-pcf857x.c
@@ -36,19 +36,19 @@ static const struct i2c_device_id pcf857x_id[] = {
 MODULE_DEVICE_TABLE(i2c, pcf857x_id);
 
 static const struct of_device_id pcf857x_of_table[] = {
-	{ .compatible = "nxp,pcf8574" },
-	{ .compatible = "nxp,pcf8574a" },
-	{ .compatible = "nxp,pca8574" },
-	{ .compatible = "nxp,pca9670" },
-	{ .compatible = "nxp,pca9672" },
-	{ .compatible = "nxp,pca9674" },
-	{ .compatible = "nxp,pcf8575" },
-	{ .compatible = "nxp,pca8575" },
-	{ .compatible = "nxp,pca9671" },
-	{ .compatible = "nxp,pca9673" },
-	{ .compatible = "nxp,pca9675" },
-	{ .compatible = "maxim,max7328" },
-	{ .compatible = "maxim,max7329" },
+	{ .compatible = "nxp,pcf8574", (void *)8 },
+	{ .compatible = "nxp,pcf8574a", (void *)8 },
+	{ .compatible = "nxp,pca8574", (void *)8 },
+	{ .compatible = "nxp,pca9670", (void *)8 },
+	{ .compatible = "nxp,pca9672", (void *)8 },
+	{ .compatible = "nxp,pca9674", (void *)8 },
+	{ .compatible = "nxp,pcf8575", (void *)16 },
+	{ .compatible = "nxp,pca8575", (void *)16 },
+	{ .compatible = "nxp,pca9671", (void *)16 },
+	{ .compatible = "nxp,pca9673", (void *)16 },
+	{ .compatible = "nxp,pca9675", (void *)16 },
+	{ .compatible = "maxim,max7328", (void *)8 },
+	{ .compatible = "maxim,max7329", (void *)8 },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, pcf857x_of_table);
@@ -272,7 +272,6 @@ static const struct irq_chip pcf857x_irq_chip = {
 
 static int pcf857x_probe(struct i2c_client *client)
 {
-	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct pcf857x *gpio;
 	unsigned int n_latch = 0;
 	int status;
@@ -296,7 +295,7 @@ static int pcf857x_probe(struct i2c_client *client)
 	gpio->chip.set_multiple		= pcf857x_set_multiple;
 	gpio->chip.direction_input	= pcf857x_input;
 	gpio->chip.direction_output	= pcf857x_output;
-	gpio->chip.ngpio		= id->driver_data;
+	gpio->chip.ngpio		= (uintptr_t)i2c_get_match_data(client);
 
 	/* NOTE:  the OnSemi jlc1562b is also largely compatible with
 	 * these parts, notably for output.  It has a low-resolution

From f8941e6c4c712948663ec5d7bbb546f1a0f4e3f6 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 22 Aug 2023 15:15:07 +0530
Subject: [PATCH 1702/2047] xen: privcmd: Add support for irqfd

Xen provides support for injecting interrupts to the guests via the
HYPERVISOR_dm_op() hypercall. The same is used by the Virtio based
device backend implementations, in an inefficient manner currently.

Generally, the Virtio backends are implemented to work with the Eventfd
based mechanism. In order to make such backends work with Xen, another
software layer needs to poll the Eventfds and raise an interrupt to the
guest using the Xen based mechanism. This results in an extra context
switch.

This is not a new problem in Linux though. It is present with other
hypervisors like KVM, etc. as well. The generic solution implemented in
the kernel for them is to provide an IOCTL call to pass the interrupt
details and eventfd, which lets the kernel take care of polling the
eventfd and raising of the interrupt, instead of handling this in user
space (which involves an extra context switch).

This patch adds support to inject a specific interrupt to guest using
the eventfd mechanism, by preventing the extra context switch.

Inspired by existing implementations for KVM, etc..

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/8e724ac1f50c2bc1eb8da9b3ff6166f1372570aa.1692697321.git.viresh.kumar@linaro.org
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/Kconfig        |   7 +
 drivers/xen/privcmd.c      | 282 ++++++++++++++++++++++++++++++++++++-
 include/uapi/xen/privcmd.h |  14 ++
 3 files changed, 301 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index d5d7c402b651..d43153fec18e 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -269,6 +269,13 @@ config XEN_PRIVCMD
 	  disaggregated Xen setups this driver might be needed for other
 	  domains, too.
 
+config XEN_PRIVCMD_IRQFD
+	bool "Xen irqfd support"
+	depends on XEN_PRIVCMD && XEN_VIRTIO && EVENTFD
+	help
+	  Using the irqfd mechanism a virtio backend running in a daemon can
+	  speed up interrupt injection into a guest.
+
 config XEN_ACPI_PROCESSOR
 	tristate "Xen ACPI processor"
 	depends on XEN && XEN_PV_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index f447cd37cc4c..f00ad5f5f1d4 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -9,11 +9,16 @@
 
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
+#include <linux/eventfd.h>
+#include <linux/file.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/workqueue.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
@@ -833,6 +838,263 @@ out:
 	return rc;
 }
 
+#ifdef CONFIG_XEN_PRIVCMD_IRQFD
+/* Irqfd support */
+static struct workqueue_struct *irqfd_cleanup_wq;
+static DEFINE_MUTEX(irqfds_lock);
+static LIST_HEAD(irqfds_list);
+
+struct privcmd_kernel_irqfd {
+	struct xen_dm_op_buf xbufs;
+	domid_t dom;
+	bool error;
+	struct eventfd_ctx *eventfd;
+	struct work_struct shutdown;
+	wait_queue_entry_t wait;
+	struct list_head list;
+	poll_table pt;
+};
+
+static void irqfd_deactivate(struct privcmd_kernel_irqfd *kirqfd)
+{
+	lockdep_assert_held(&irqfds_lock);
+
+	list_del_init(&kirqfd->list);
+	queue_work(irqfd_cleanup_wq, &kirqfd->shutdown);
+}
+
+static void irqfd_shutdown(struct work_struct *work)
+{
+	struct privcmd_kernel_irqfd *kirqfd =
+		container_of(work, struct privcmd_kernel_irqfd, shutdown);
+	u64 cnt;
+
+	eventfd_ctx_remove_wait_queue(kirqfd->eventfd, &kirqfd->wait, &cnt);
+	eventfd_ctx_put(kirqfd->eventfd);
+	kfree(kirqfd);
+}
+
+static void irqfd_inject(struct privcmd_kernel_irqfd *kirqfd)
+{
+	u64 cnt;
+	long rc;
+
+	eventfd_ctx_do_read(kirqfd->eventfd, &cnt);
+
+	xen_preemptible_hcall_begin();
+	rc = HYPERVISOR_dm_op(kirqfd->dom, 1, &kirqfd->xbufs);
+	xen_preemptible_hcall_end();
+
+	/* Don't repeat the error message for consecutive failures */
+	if (rc && !kirqfd->error) {
+		pr_err("Failed to configure irq for guest domain: %d\n",
+		       kirqfd->dom);
+	}
+
+	kirqfd->error = rc;
+}
+
+static int
+irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
+{
+	struct privcmd_kernel_irqfd *kirqfd =
+		container_of(wait, struct privcmd_kernel_irqfd, wait);
+	__poll_t flags = key_to_poll(key);
+
+	if (flags & EPOLLIN)
+		irqfd_inject(kirqfd);
+
+	if (flags & EPOLLHUP) {
+		mutex_lock(&irqfds_lock);
+		irqfd_deactivate(kirqfd);
+		mutex_unlock(&irqfds_lock);
+	}
+
+	return 0;
+}
+
+static void
+irqfd_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
+{
+	struct privcmd_kernel_irqfd *kirqfd =
+		container_of(pt, struct privcmd_kernel_irqfd, pt);
+
+	add_wait_queue_priority(wqh, &kirqfd->wait);
+}
+
+static int privcmd_irqfd_assign(struct privcmd_irqfd *irqfd)
+{
+	struct privcmd_kernel_irqfd *kirqfd, *tmp;
+	__poll_t events;
+	struct fd f;
+	void *dm_op;
+	int ret;
+
+	kirqfd = kzalloc(sizeof(*kirqfd) + irqfd->size, GFP_KERNEL);
+	if (!kirqfd)
+		return -ENOMEM;
+	dm_op = kirqfd + 1;
+
+	if (copy_from_user(dm_op, irqfd->dm_op, irqfd->size)) {
+		ret = -EFAULT;
+		goto error_kfree;
+	}
+
+	kirqfd->xbufs.size = irqfd->size;
+	set_xen_guest_handle(kirqfd->xbufs.h, dm_op);
+	kirqfd->dom = irqfd->dom;
+	INIT_WORK(&kirqfd->shutdown, irqfd_shutdown);
+
+	f = fdget(irqfd->fd);
+	if (!f.file) {
+		ret = -EBADF;
+		goto error_kfree;
+	}
+
+	kirqfd->eventfd = eventfd_ctx_fileget(f.file);
+	if (IS_ERR(kirqfd->eventfd)) {
+		ret = PTR_ERR(kirqfd->eventfd);
+		goto error_fd_put;
+	}
+
+	/*
+	 * Install our own custom wake-up handling so we are notified via a
+	 * callback whenever someone signals the underlying eventfd.
+	 */
+	init_waitqueue_func_entry(&kirqfd->wait, irqfd_wakeup);
+	init_poll_funcptr(&kirqfd->pt, irqfd_poll_func);
+
+	mutex_lock(&irqfds_lock);
+
+	list_for_each_entry(tmp, &irqfds_list, list) {
+		if (kirqfd->eventfd == tmp->eventfd) {
+			ret = -EBUSY;
+			mutex_unlock(&irqfds_lock);
+			goto error_eventfd;
+		}
+	}
+
+	list_add_tail(&kirqfd->list, &irqfds_list);
+	mutex_unlock(&irqfds_lock);
+
+	/*
+	 * Check if there was an event already pending on the eventfd before we
+	 * registered, and trigger it as if we didn't miss it.
+	 */
+	events = vfs_poll(f.file, &kirqfd->pt);
+	if (events & EPOLLIN)
+		irqfd_inject(kirqfd);
+
+	/*
+	 * Do not drop the file until the kirqfd is fully initialized, otherwise
+	 * we might race against the EPOLLHUP.
+	 */
+	fdput(f);
+	return 0;
+
+error_eventfd:
+	eventfd_ctx_put(kirqfd->eventfd);
+
+error_fd_put:
+	fdput(f);
+
+error_kfree:
+	kfree(kirqfd);
+	return ret;
+}
+
+static int privcmd_irqfd_deassign(struct privcmd_irqfd *irqfd)
+{
+	struct privcmd_kernel_irqfd *kirqfd;
+	struct eventfd_ctx *eventfd;
+
+	eventfd = eventfd_ctx_fdget(irqfd->fd);
+	if (IS_ERR(eventfd))
+		return PTR_ERR(eventfd);
+
+	mutex_lock(&irqfds_lock);
+
+	list_for_each_entry(kirqfd, &irqfds_list, list) {
+		if (kirqfd->eventfd == eventfd) {
+			irqfd_deactivate(kirqfd);
+			break;
+		}
+	}
+
+	mutex_unlock(&irqfds_lock);
+
+	eventfd_ctx_put(eventfd);
+
+	/*
+	 * Block until we know all outstanding shutdown jobs have completed so
+	 * that we guarantee there will not be any more interrupts once this
+	 * deassign function returns.
+	 */
+	flush_workqueue(irqfd_cleanup_wq);
+
+	return 0;
+}
+
+static long privcmd_ioctl_irqfd(struct file *file, void __user *udata)
+{
+	struct privcmd_data *data = file->private_data;
+	struct privcmd_irqfd irqfd;
+
+	if (copy_from_user(&irqfd, udata, sizeof(irqfd)))
+		return -EFAULT;
+
+	/* No other flags should be set */
+	if (irqfd.flags & ~PRIVCMD_IRQFD_FLAG_DEASSIGN)
+		return -EINVAL;
+
+	/* If restriction is in place, check the domid matches */
+	if (data->domid != DOMID_INVALID && data->domid != irqfd.dom)
+		return -EPERM;
+
+	if (irqfd.flags & PRIVCMD_IRQFD_FLAG_DEASSIGN)
+		return privcmd_irqfd_deassign(&irqfd);
+
+	return privcmd_irqfd_assign(&irqfd);
+}
+
+static int privcmd_irqfd_init(void)
+{
+	irqfd_cleanup_wq = alloc_workqueue("privcmd-irqfd-cleanup", 0, 0);
+	if (!irqfd_cleanup_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void privcmd_irqfd_exit(void)
+{
+	struct privcmd_kernel_irqfd *kirqfd, *tmp;
+
+	mutex_lock(&irqfds_lock);
+
+	list_for_each_entry_safe(kirqfd, tmp, &irqfds_list, list)
+		irqfd_deactivate(kirqfd);
+
+	mutex_unlock(&irqfds_lock);
+
+	destroy_workqueue(irqfd_cleanup_wq);
+}
+#else
+static inline long privcmd_ioctl_irqfd(struct file *file, void __user *udata)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int privcmd_irqfd_init(void)
+{
+	return 0;
+}
+
+static inline void privcmd_irqfd_exit(void)
+{
+}
+#endif /* CONFIG_XEN_PRIVCMD_IRQFD */
+
 static long privcmd_ioctl(struct file *file,
 			  unsigned int cmd, unsigned long data)
 {
@@ -868,6 +1130,10 @@ static long privcmd_ioctl(struct file *file,
 		ret = privcmd_ioctl_mmap_resource(file, udata);
 		break;
 
+	case IOCTL_PRIVCMD_IRQFD:
+		ret = privcmd_ioctl_irqfd(file, udata);
+		break;
+
 	default:
 		break;
 	}
@@ -992,15 +1258,27 @@ static int __init privcmd_init(void)
 	err = misc_register(&xen_privcmdbuf_dev);
 	if (err != 0) {
 		pr_err("Could not register Xen hypercall-buf device\n");
-		misc_deregister(&privcmd_dev);
-		return err;
+		goto err_privcmdbuf;
+	}
+
+	err = privcmd_irqfd_init();
+	if (err != 0) {
+		pr_err("irqfd init failed\n");
+		goto err_irqfd;
 	}
 
 	return 0;
+
+err_irqfd:
+	misc_deregister(&xen_privcmdbuf_dev);
+err_privcmdbuf:
+	misc_deregister(&privcmd_dev);
+	return err;
 }
 
 static void __exit privcmd_exit(void)
 {
+	privcmd_irqfd_exit();
 	misc_deregister(&privcmd_dev);
 	misc_deregister(&xen_privcmdbuf_dev);
 }
diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h
index d2029556083e..375718ba4ab6 100644
--- a/include/uapi/xen/privcmd.h
+++ b/include/uapi/xen/privcmd.h
@@ -98,6 +98,18 @@ struct privcmd_mmap_resource {
 	__u64 addr;
 };
 
+/* For privcmd_irqfd::flags */
+#define PRIVCMD_IRQFD_FLAG_DEASSIGN (1 << 0)
+
+struct privcmd_irqfd {
+	void __user *dm_op;
+	__u32 size; /* Size of structure pointed by dm_op */
+	__u32 fd;
+	__u32 flags;
+	domid_t dom;
+	__u8 pad[2];
+};
+
 /*
  * @cmd: IOCTL_PRIVCMD_HYPERCALL
  * @arg: &privcmd_hypercall_t
@@ -125,5 +137,7 @@ struct privcmd_mmap_resource {
 	_IOC(_IOC_NONE, 'P', 6, sizeof(domid_t))
 #define IOCTL_PRIVCMD_MMAP_RESOURCE				\
 	_IOC(_IOC_NONE, 'P', 7, sizeof(struct privcmd_mmap_resource))
+#define IOCTL_PRIVCMD_IRQFD					\
+	_IOC(_IOC_NONE, 'P', 8, sizeof(struct privcmd_irqfd))
 
 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */

From 051178c366bbc1bf8b4aba5ca5519d7da453c95f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 22 Aug 2023 13:32:50 +0200
Subject: [PATCH 1703/2047] super: use higher-level helper for {freeze,thaw}

It's not necessary to use low-level locking helpers here. Use the
higher-level locking helpers and log if the superblock is dying. Since
the caller is assumed to already hold an active reference it isn't
possible to observe a dying superblock.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/super.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 2f604a6494fb..a284052ebab2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1873,8 +1873,13 @@ int freeze_super(struct super_block *sb)
 {
 	int ret;
 
+	/* Since the caller must already have an active reference... */
 	atomic_inc(&sb->s_active);
-	__super_lock_excl(sb);
+
+	/* ...@sb definitely can't be dying. */
+	if (!super_lock_excl(sb))
+		WARN(1, "Dying superblock while freezing!");
+
 	if (sb->s_writers.frozen != SB_UNFROZEN) {
 		deactivate_locked_super(sb);
 		return -EBUSY;
@@ -1896,7 +1901,10 @@ int freeze_super(struct super_block *sb)
 	/* Release s_umount to preserve sb_start_write -> s_umount ordering */
 	super_unlock_excl(sb);
 	sb_wait_write(sb, SB_FREEZE_WRITE);
-	__super_lock_excl(sb);
+
+	/* We're still holding an active reference. */
+	if (!super_lock_excl(sb))
+		WARN(1, "Dying superblock while freezing!");
 
 	/* Now we go and block page faults... */
 	sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
@@ -1979,7 +1987,8 @@ out:
  */
 int thaw_super(struct super_block *sb)
 {
-	__super_lock_excl(sb);
+	if (!super_lock_excl(sb))
+		WARN(1, "Dying superblock while thawing!");
 	return thaw_super_locked(sb);
 }
 EXPORT_SYMBOL(thaw_super);

From c02d35d89b317994bd713ba82e160c5e7f22d9c8 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Sat, 19 Aug 2023 01:26:07 +0900
Subject: [PATCH 1704/2047] btrfs: zoned: skip splitting and logical rewriting
 on pre-alloc write

When doing a relocation, there is a chance that at the time of
btrfs_reloc_clone_csums(), there is no checksum for the corresponding
region.

In this case, btrfs_finish_ordered_zoned()'s sum points to an invalid item
and so ordered_extent's logical is set to some invalid value. Then,
btrfs_lookup_block_group() in btrfs_zone_finish_endio() failed to find a
block group and will hit an assert or a null pointer dereference as
following.

This can be reprodcued by running btrfs/028 several times (e.g, 4 to 16
times) with a null_blk setup. The device's zone size and capacity is set to
32 MB and the storage size is set to 5 GB on my setup.

    KASAN: null-ptr-deref in range [0x0000000000000088-0x000000000000008f]
    CPU: 6 PID: 3105720 Comm: kworker/u16:13 Tainted: G        W          6.5.0-rc6-kts+ #1
    Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015
    Workqueue: btrfs-endio-write btrfs_work_helper [btrfs]
    RIP: 0010:btrfs_zone_finish_endio.part.0+0x34/0x160 [btrfs]
    Code: 41 54 49 89 fc 55 48 89 f5 53 e8 57 7d fc ff 48 8d b8 88 00 00 00 48 89 c3 48 b8 00 00 00 00 00
    > 3c 02 00 0f 85 02 01 00 00 f6 83 88 00 00 00 01 0f 84 a8 00 00
    RSP: 0018:ffff88833cf87b08 EFLAGS: 00010206
    RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000
    RDX: 0000000000000011 RSI: 0000000000000004 RDI: 0000000000000088
    RBP: 0000000000000002 R08: 0000000000000001 R09: ffffed102877b827
    R10: ffff888143bdc13b R11: ffff888125b1cbc0 R12: ffff888143bdc000
    R13: 0000000000007000 R14: ffff888125b1cba8 R15: 0000000000000000
    FS:  0000000000000000(0000) GS:ffff88881e500000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 00007f3ed85223d5 CR3: 00000001519b4005 CR4: 00000000001706e0
    Call Trace:
     <TASK>
     ? die_addr+0x3c/0xa0
     ? exc_general_protection+0x148/0x220
     ? asm_exc_general_protection+0x22/0x30
     ? btrfs_zone_finish_endio.part.0+0x34/0x160 [btrfs]
     ? btrfs_zone_finish_endio.part.0+0x19/0x160 [btrfs]
     btrfs_finish_one_ordered+0x7b8/0x1de0 [btrfs]
     ? rcu_is_watching+0x11/0xb0
     ? lock_release+0x47a/0x620
     ? btrfs_finish_ordered_zoned+0x59b/0x800 [btrfs]
     ? __pfx_btrfs_finish_one_ordered+0x10/0x10 [btrfs]
     ? btrfs_finish_ordered_zoned+0x358/0x800 [btrfs]
     ? __smp_call_single_queue+0x124/0x350
     ? rcu_is_watching+0x11/0xb0
     btrfs_work_helper+0x19f/0xc60 [btrfs]
     ? __pfx_try_to_wake_up+0x10/0x10
     ? _raw_spin_unlock_irq+0x24/0x50
     ? rcu_is_watching+0x11/0xb0
     process_one_work+0x8c1/0x1430
     ? __pfx_lock_acquire+0x10/0x10
     ? __pfx_process_one_work+0x10/0x10
     ? __pfx_do_raw_spin_lock+0x10/0x10
     ? _raw_spin_lock_irq+0x52/0x60
     worker_thread+0x100/0x12c0
     ? __kthread_parkme+0xc1/0x1f0
     ? __pfx_worker_thread+0x10/0x10
     kthread+0x2ea/0x3c0
     ? __pfx_kthread+0x10/0x10
     ret_from_fork+0x30/0x70
     ? __pfx_kthread+0x10/0x10
     ret_from_fork_asm+0x1b/0x30
     </TASK>

On the zoned mode, writing to pre-allocated region means data relocation
write. Such write always uses WRITE command so there is no need of splitting
and rewriting logical address. Thus, we can just skip the function for the
case.

Fixes: cbfce4c7fbde ("btrfs: optimize the logical to physical mapping for zoned writes")
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index a2f8e7440f8c..09bc325d075d 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1700,10 +1700,21 @@ void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
 {
 	struct btrfs_inode *inode = BTRFS_I(ordered->inode);
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
-	struct btrfs_ordered_sum *sum =
-		list_first_entry(&ordered->list, typeof(*sum), list);
-	u64 logical = sum->logical;
-	u64 len = sum->len;
+	struct btrfs_ordered_sum *sum;
+	u64 logical, len;
+
+	/*
+	 * Write to pre-allocated region is for the data relocation, and so
+	 * it should use WRITE operation. No split/rewrite are necessary.
+	 */
+	if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
+		return;
+
+	ASSERT(!list_empty(&ordered->list));
+	/* The ordered->list can be empty in the above pre-alloc case. */
+	sum = list_first_entry(&ordered->list, struct btrfs_ordered_sum, list);
+	logical = sum->logical;
+	len = sum->len;
 
 	while (len < ordered->disk_num_bytes) {
 		sum = list_next_entry(sum, list);

From a3bf4c36e3bd57c53f3054e47dddecb47a1fbda5 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 14 Jul 2023 19:34:40 +0100
Subject: [PATCH 1705/2047] affs: remove writepage implementation

If the filesystem implements migrate_folio and writepages, there is
no need for a writepage implementation.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/affs/file.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/affs/file.c b/fs/affs/file.c
index e43f2f007ac1..472e2bdd5349 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -15,6 +15,7 @@
 
 #include <linux/uio.h>
 #include <linux/blkdev.h>
+#include <linux/mpage.h>
 #include "affs.h"
 
 static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
@@ -370,9 +371,10 @@ err_alloc:
 	return -ENOSPC;
 }
 
-static int affs_writepage(struct page *page, struct writeback_control *wbc)
+static int affs_writepages(struct address_space *mapping,
+			   struct writeback_control *wbc)
 {
-	return block_write_full_page(page, affs_get_block, wbc);
+	return mpage_writepages(mapping, wbc, affs_get_block);
 }
 
 static int affs_read_folio(struct file *file, struct folio *folio)
@@ -456,10 +458,11 @@ const struct address_space_operations affs_aops = {
 	.dirty_folio	= block_dirty_folio,
 	.invalidate_folio = block_invalidate_folio,
 	.read_folio = affs_read_folio,
-	.writepage = affs_writepage,
+	.writepages = affs_writepages,
 	.write_begin = affs_write_begin,
 	.write_end = affs_write_end,
 	.direct_IO = affs_direct_IO,
+	.migrate_folio = buffer_migrate_folio,
 	.bmap = _affs_bmap
 };
 
@@ -835,9 +838,10 @@ const struct address_space_operations affs_aops_ofs = {
 	.dirty_folio	= block_dirty_folio,
 	.invalidate_folio = block_invalidate_folio,
 	.read_folio = affs_read_folio_ofs,
-	//.writepage = affs_writepage_ofs,
+	//.writepages = affs_writepages_ofs,
 	.write_begin = affs_write_begin_ofs,
-	.write_end = affs_write_end_ofs
+	.write_end = affs_write_end_ofs,
+	.migrate_folio = filemap_migrate_folio,
 };
 
 /* Free any preallocated blocks. */

From 4d4f1468a002b76fb4796985a11671d50c88e520 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 17 Aug 2023 15:12:17 +0300
Subject: [PATCH 1706/2047] affs: rename local toupper() to fn() to avoid
 confusion

A compiler may see the collision with the toupper() defined in ctype.h:

 fs/affs/namei.c:159:19: warning: unused variable 'toupper' [-Wunused-variable]
   159 |         toupper_t toupper = affs_get_toupper(sb);

To prevent this from happening, rename toupper local variable to fn.

Initially this had been introduced by 24579a881513 ("v2.4.3.5 -> v2.4.3.6")
in the history.git by history group.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/affs/namei.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index d12ccfd2a83d..2fe4a5832fcf 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -43,7 +43,7 @@ affs_get_toupper(struct super_block *sb)
  * Note: the dentry argument is the parent dentry.
  */
 static inline int
-__affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr, toupper_t toupper, bool notruncate)
+__affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr, toupper_t fn, bool notruncate)
 {
 	const u8 *name = qstr->name;
 	unsigned long hash;
@@ -57,7 +57,7 @@ __affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr, toupper_t tou
 	hash = init_name_hash(dentry);
 	len = min(qstr->len, AFFSNAMEMAX);
 	for (; len > 0; name++, len--)
-		hash = partial_name_hash(toupper(*name), hash);
+		hash = partial_name_hash(fn(*name), hash);
 	qstr->hash = end_name_hash(hash);
 
 	return 0;
@@ -80,7 +80,7 @@ affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
 }
 
 static inline int __affs_compare_dentry(unsigned int len,
-		const char *str, const struct qstr *name, toupper_t toupper,
+		const char *str, const struct qstr *name, toupper_t fn,
 		bool notruncate)
 {
 	const u8 *aname = str;
@@ -106,7 +106,7 @@ static inline int __affs_compare_dentry(unsigned int len,
 		return 1;
 
 	for (; len > 0; len--)
-		if (toupper(*aname++) != toupper(*bname++))
+		if (fn(*aname++) != fn(*bname++))
 			return 1;
 
 	return 0;
@@ -135,7 +135,7 @@ affs_intl_compare_dentry(const struct dentry *dentry,
  */
 
 static inline int
-affs_match(struct dentry *dentry, const u8 *name2, toupper_t toupper)
+affs_match(struct dentry *dentry, const u8 *name2, toupper_t fn)
 {
 	const u8 *name = dentry->d_name.name;
 	int len = dentry->d_name.len;
@@ -148,7 +148,7 @@ affs_match(struct dentry *dentry, const u8 *name2, toupper_t toupper)
 		return 0;
 
 	for (name2++; len > 0; len--)
-		if (toupper(*name++) != toupper(*name2++))
+		if (fn(*name++) != fn(*name2++))
 			return 0;
 	return 1;
 }
@@ -156,12 +156,12 @@ affs_match(struct dentry *dentry, const u8 *name2, toupper_t toupper)
 int
 affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len)
 {
-	toupper_t toupper = affs_get_toupper(sb);
+	toupper_t fn = affs_get_toupper(sb);
 	u32 hash;
 
 	hash = len = min(len, AFFSNAMEMAX);
 	for (; len > 0; len--)
-		hash = (hash * 13 + toupper(*name++)) & 0x7ff;
+		hash = (hash * 13 + fn(*name++)) & 0x7ff;
 
 	return hash % AFFS_SB(sb)->s_hashsize;
 }
@@ -171,7 +171,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
 {
 	struct super_block *sb = dir->i_sb;
 	struct buffer_head *bh;
-	toupper_t toupper = affs_get_toupper(sb);
+	toupper_t fn = affs_get_toupper(sb);
 	u32 key;
 
 	pr_debug("%s(\"%pd\")\n", __func__, dentry);
@@ -189,7 +189,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
 		bh = affs_bread(sb, key);
 		if (!bh)
 			return ERR_PTR(-EIO);
-		if (affs_match(dentry, AFFS_TAIL(sb, bh)->name, toupper))
+		if (affs_match(dentry, AFFS_TAIL(sb, bh)->name, fn))
 			return bh;
 		key = be32_to_cpu(AFFS_TAIL(sb, bh)->hash_chain);
 	}

From 60ea3db33fbddf559e18567ca8897f6bb9f25290 Mon Sep 17 00:00:00 2001
From: Jinjie Ruan <ruanjinjie@huawei.com>
Date: Tue, 22 Aug 2023 20:46:42 +0800
Subject: [PATCH 1707/2047] spi: at91-usart: Use PTR_ERR_OR_ZERO() to simplify
 code

Return PTR_ERR_OR_ZERO() instead of return 0 or PTR_ERR() to
simplify code.

Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Link: https://lore.kernel.org/r/20230822124643.987079-1-ruanjinjie@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-at91-usart.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/spi/spi-at91-usart.c b/drivers/spi/spi-at91-usart.c
index 75d9bc606442..b11d0f993cc7 100644
--- a/drivers/spi/spi-at91-usart.c
+++ b/drivers/spi/spi-at91-usart.c
@@ -485,10 +485,7 @@ static int at91_usart_gpio_setup(struct platform_device *pdev)
 
 	cs_gpios = devm_gpiod_get_array_optional(&pdev->dev, "cs", GPIOD_OUT_LOW);
 
-	if (IS_ERR(cs_gpios))
-		return PTR_ERR(cs_gpios);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(cs_gpios);
 }
 
 static int at91_usart_spi_probe(struct platform_device *pdev)

From 2872144aec04baa7e43ecd2a60f7f0be3aa843fd Mon Sep 17 00:00:00 2001
From: Anshuman Gupta <anshuman.gupta@intel.com>
Date: Wed, 16 Aug 2023 18:22:16 +0530
Subject: [PATCH 1708/2047] drm/i915/dgfx: Enable d3cold at s2idle

System wide suspend already has support for lmem save/restore during
suspend therefore enabling d3cold for s2idle and keepng it disable for
runtime PM.(Refer below commit for d3cold runtime PM disable justification)
'commit 66eb93e71a7a ("drm/i915/dgfx: Keep PCI autosuspend control
'on' by default on all dGPU")'

It will reduce the DG2 Card power consumption to ~0 Watt
for s2idle power KPI.

v2:
- Added "Cc: stable@vger.kernel.org".

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8755
Cc: stable@vger.kernel.org
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Tested-by: Aaron Ma <aaron.ma@canonical.com>
Tested-by: Jianshui Yu <Jianshui.yu@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230816125216.1722002-1-anshuman.gupta@intel.com
(cherry picked from commit 2643e6d1f2a5e51877be24042d53cf956589be10)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_driver.c | 33 ++++++++++++++++--------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 0ad0c5885ec2..7d8671fdf447 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -443,7 +443,6 @@ static int i915_pcode_init(struct drm_i915_private *i915)
 static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 {
 	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-	struct pci_dev *root_pdev;
 	int ret;
 
 	if (i915_inject_probe_failure(dev_priv))
@@ -557,15 +556,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 
 	intel_bw_init_hw(dev_priv);
 
-	/*
-	 * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
-	 * This should be totally removed when we handle the pci states properly
-	 * on runtime PM and on s2idle cases.
-	 */
-	root_pdev = pcie_find_root_port(pdev);
-	if (root_pdev)
-		pci_d3cold_disable(root_pdev);
-
 	return 0;
 
 err_opregion:
@@ -591,7 +581,6 @@ err_perf:
 static void i915_driver_hw_remove(struct drm_i915_private *dev_priv)
 {
 	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-	struct pci_dev *root_pdev;
 
 	i915_perf_fini(dev_priv);
 
@@ -599,10 +588,6 @@ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv)
 
 	if (pdev->msi_enabled)
 		pci_disable_msi(pdev);
-
-	root_pdev = pcie_find_root_port(pdev);
-	if (root_pdev)
-		pci_d3cold_enable(root_pdev);
 }
 
 /**
@@ -1517,6 +1502,8 @@ static int intel_runtime_suspend(struct device *kdev)
 {
 	struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
 	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
+	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+	struct pci_dev *root_pdev;
 	struct intel_gt *gt;
 	int ret, i;
 
@@ -1568,6 +1555,15 @@ static int intel_runtime_suspend(struct device *kdev)
 		drm_err(&dev_priv->drm,
 			"Unclaimed access detected prior to suspending\n");
 
+	/*
+	 * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
+	 * This should be totally removed when we handle the pci states properly
+	 * on runtime PM.
+	 */
+	root_pdev = pcie_find_root_port(pdev);
+	if (root_pdev)
+		pci_d3cold_disable(root_pdev);
+
 	rpm->suspended = true;
 
 	/*
@@ -1606,6 +1602,8 @@ static int intel_runtime_resume(struct device *kdev)
 {
 	struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
 	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
+	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+	struct pci_dev *root_pdev;
 	struct intel_gt *gt;
 	int ret, i;
 
@@ -1619,6 +1617,11 @@ static int intel_runtime_resume(struct device *kdev)
 
 	intel_opregion_notify_adapter(dev_priv, PCI_D0);
 	rpm->suspended = false;
+
+	root_pdev = pcie_find_root_port(pdev);
+	if (root_pdev)
+		pci_d3cold_enable(root_pdev);
+
 	if (intel_uncore_unclaimed_mmio(&dev_priv->uncore))
 		drm_dbg(&dev_priv->drm,
 			"Unclaimed access during suspend, bios?\n");

From ed7c6a2ba6a682f191f289de01e8eb0a1366c3eb Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Tue, 22 Aug 2023 12:03:43 +0200
Subject: [PATCH 1709/2047] dt-bindings: regulator: qcom,rpmh-regulator: allow
 i, j, l, m & n as RPMh resource name suffix

Add "i", "j", "l", "m" and "n" to the allowed subffix list as they can be
used as RPMh resource name suffixes on new platforms.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Acked-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Link: https://lore.kernel.org/r/20230822-topic-sm8x50-upstream-rpmh-regulator-suffix-v2-1-136b315085a4@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/qcom,rpmh-regulator.yaml      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
index e758093365bc..127a6f39b7f0 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
@@ -92,7 +92,7 @@ properties:
         RPMh resource name suffix used for the regulators found
         on this PMIC.
     $ref: /schemas/types.yaml#/definitions/string
-    enum: [a, b, c, d, e, f, g, h, k]
+    enum: [a, b, c, d, e, f, g, h, i, j, k, l, m, n]
 
   qcom,always-wait-for-ack:
     description: |

From f96801f0cfcefc0a16b146596577c53c75ee9773 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Wed, 19 Jul 2023 09:16:36 +0800
Subject: [PATCH 1710/2047] thermal/of: Fix potential uninitialized value
 access

If of_parse_phandle_with_args() called from __thermal_of_bind() or
__thermal_of_unbind() fails, cooling_spec.np will not be initialized,
so move the of_node_put() calls below the respective return value checks
to avoid dereferencing an uninitialized pointer.

Fixes: 3fd6d6e2b4e8 ("thermal/of: Rework the thermal device tree initialization")
Signed-off-by: Peng Fan <peng.fan@nxp.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_of.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
index 330690a3a208..4ca905723429 100644
--- a/drivers/thermal/thermal_of.c
+++ b/drivers/thermal/thermal_of.c
@@ -291,13 +291,13 @@ static int __thermal_of_unbind(struct device_node *map_np, int index, int trip_i
 	ret = of_parse_phandle_with_args(map_np, "cooling-device", "#cooling-cells",
 					 index, &cooling_spec);
 
-	of_node_put(cooling_spec.np);
-
 	if (ret < 0) {
 		pr_err("Invalid cooling-device entry\n");
 		return ret;
 	}
 
+	of_node_put(cooling_spec.np);
+
 	if (cooling_spec.args_count < 2) {
 		pr_err("wrong reference to cooling device, missing limits\n");
 		return -EINVAL;
@@ -324,13 +324,13 @@ static int __thermal_of_bind(struct device_node *map_np, int index, int trip_id,
 	ret = of_parse_phandle_with_args(map_np, "cooling-device", "#cooling-cells",
 					 index, &cooling_spec);
 
-	of_node_put(cooling_spec.np);
-
 	if (ret < 0) {
 		pr_err("Invalid cooling-device entry\n");
 		return ret;
 	}
 
+	of_node_put(cooling_spec.np);
+
 	if (cooling_spec.args_count < 2) {
 		pr_err("wrong reference to cooling device, missing limits\n");
 		return -EINVAL;

From e26a99dd1522caefcde023aabc0f79d7e6a016bf Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 21 Jul 2023 20:12:16 +0800
Subject: [PATCH 1711/2047] PM: runtime: Remove unsued extern declaration of
 pm_runtime_update_max_time_suspended()

Commit 76e267d822f2 ("PM / Runtime: Remove device fields related to
suspend time, v2") left behind this.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_runtime.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 9a8151a2bdea..7c9b35448563 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -85,8 +85,6 @@ extern void pm_runtime_irq_safe(struct device *dev);
 extern void __pm_runtime_use_autosuspend(struct device *dev, bool use);
 extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
 extern u64 pm_runtime_autosuspend_expiration(struct device *dev);
-extern void pm_runtime_update_max_time_suspended(struct device *dev,
-						 s64 delta_ns);
 extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
 extern void pm_runtime_get_suppliers(struct device *dev);
 extern void pm_runtime_put_suppliers(struct device *dev);

From 5f55836ab41671e005038255fdd60482718d2ca2 Mon Sep 17 00:00:00 2001
From: Clive Lin <clive.lin@mediatek.com>
Date: Mon, 31 Jul 2023 14:03:54 +0800
Subject: [PATCH 1712/2047] PM: QoS: Add check to make sure CPU latency is
 non-negative

CPU latency should never be negative, which will be incorrectly high
when converted to unsigned data type.

Commit 8d36694245f2 ("PM: QoS: Add check to make sure CPU freq is
non-negative") makes sure CPU frequency is non-negative to fix incorrect
behavior in freqency QoS.

Add an analogous check to make sure CPU latency is non-negative so as to
prevent this problem from happening in CPU latency QoS.

Signed-off-by: Clive Lin <clive.lin@mediatek.com>
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/qos.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 782d3b41c1f3..4244b069442e 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -220,6 +220,11 @@ static struct pm_qos_constraints cpu_latency_constraints = {
 	.type = PM_QOS_MIN,
 };
 
+static inline bool cpu_latency_qos_value_invalid(s32 value)
+{
+	return value < 0 && value != PM_QOS_DEFAULT_VALUE;
+}
+
 /**
  * cpu_latency_qos_limit - Return current system-wide CPU latency QoS limit.
  */
@@ -263,7 +268,7 @@ static void cpu_latency_qos_apply(struct pm_qos_request *req,
  */
 void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value)
 {
-	if (!req)
+	if (!req || cpu_latency_qos_value_invalid(value))
 		return;
 
 	if (cpu_latency_qos_request_active(req)) {
@@ -289,7 +294,7 @@ EXPORT_SYMBOL_GPL(cpu_latency_qos_add_request);
  */
 void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value)
 {
-	if (!req)
+	if (!req || cpu_latency_qos_value_invalid(new_value))
 		return;
 
 	if (!cpu_latency_qos_request_active(req)) {

From 1a6e1004f3225bebbe27e1575c22484a56e96f56 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 4 Aug 2023 17:21:01 +0800
Subject: [PATCH 1713/2047] thermal: intel: int340x: simplify the code with
 module_platform_driver()

The init/exit() of the driver only calls platform_driver_{un}register(),
so it can be simpilfied by using module_platform_driver().

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../thermal/intel/int340x_thermal/int3401_thermal.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/thermal/intel/int340x_thermal/int3401_thermal.c b/drivers/thermal/intel/int340x_thermal/int3401_thermal.c
index 217786fba185..c93a28eec4db 100644
--- a/drivers/thermal/intel/int340x_thermal/int3401_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3401_thermal.c
@@ -70,18 +70,7 @@ static struct platform_driver int3401_driver = {
 	},
 };
 
-static int __init proc_thermal_init(void)
-{
-	return platform_driver_register(&int3401_driver);
-}
-
-static void __exit proc_thermal_exit(void)
-{
-	platform_driver_unregister(&int3401_driver);
-}
-
-module_init(proc_thermal_init);
-module_exit(proc_thermal_exit);
+module_platform_driver(int3401_driver);
 
 MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
 MODULE_DESCRIPTION("Processor Thermal Reporting Device Driver");

From 10bb4e4ab7dd3898f413b5b4a3b81f6e9b1f6bf5 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 10 Aug 2023 18:21:19 +0200
Subject: [PATCH 1714/2047] PM: sleep: Add helpers to allow a device to remain
 powered-on

On some platforms a device and its corresponding PM domain, may need to
remain powered-on during system wide suspend, to support various use cases.
For example, when the console_suspend_enabled flag is unset for a serial
controller, the corresponding device may need to remain powered on. Other
use cases exists too.

In fact, we already have the mechanism in the PM core to deal with these
kind of use cases. However, the current naming of the corresponding
functions/flags clearly suggests these should be use for system wakeup.
See device_wakeup_path(), device_set_wakeup_path and
dev->power.wakeup_path.

As a way to extend the use of the existing mechanism, let's introduce two
new helpers functions, device_awake_path() and device_set_awake_path(). At
this point, let them act as wrappers of the existing functions. Ideally,
when all users have been converted to use the new helpers, we may decide to
drop the old ones and rename the flag.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_wakeup.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 77f4849e3418..6eb9adaef52b 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -194,6 +194,16 @@ static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec,
 
 #endif /* !CONFIG_PM_SLEEP */
 
+static inline bool device_awake_path(struct device *dev)
+{
+	return device_wakeup_path(dev);
+}
+
+static inline void device_set_awake_path(struct device *dev)
+{
+	device_set_wakeup_path(dev);
+}
+
 static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
 {
 	return pm_wakeup_ws_event(ws, msec, false);

From ee188ee6ed3821ec20c1d3e8b7aacd844924b10b Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Sun, 13 Aug 2023 10:52:20 +0800
Subject: [PATCH 1715/2047] thermal: intel: intel_soc_dts_iosf: Remove
 redundant check

Remove the redundant check in remove_dts_thermal_zone() because all of
its existing callers pass a valid pointer as the argument.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/intel/intel_soc_dts_iosf.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index 7a66d0f077b0..d00def3c4703 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -212,11 +212,9 @@ static int soc_dts_enable(int id)
 
 static void remove_dts_thermal_zone(struct intel_soc_dts_sensor_entry *dts)
 {
-	if (dts) {
-		iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
-			       SOC_DTS_OFFSET_ENABLE, dts->store_status);
-		thermal_zone_device_unregister(dts->tzone);
-	}
+	iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
+		       SOC_DTS_OFFSET_ENABLE, dts->store_status);
+	thermal_zone_device_unregister(dts->tzone);
 }
 
 static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,

From a436ae9434ec491eefb4ed4c77bdb9c762925976 Mon Sep 17 00:00:00 2001
From: Liao Chang <liaochang1@huawei.com>
Date: Wed, 16 Aug 2023 01:58:53 +0000
Subject: [PATCH 1716/2047] cpufreq: Use clamp() helper macro to improve the
 code readability

The valid values of policy.{min, max} should be between 'min' and 'max',
so use clamp() helper macro to makes cpufreq_verify_within_limits() easier
to follow.

Signed-off-by: Liao Chang <liaochang1@huawei.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 172ff51c1b2a..9bf94ae08158 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -19,6 +19,7 @@
 #include <linux/pm_qos.h>
 #include <linux/spinlock.h>
 #include <linux/sysfs.h>
+#include <linux/minmax.h>
 
 /*********************************************************************
  *                        CPUFREQ INTERFACE                          *
@@ -467,17 +468,8 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy_data *poli
 						unsigned int min,
 						unsigned int max)
 {
-	if (policy->min < min)
-		policy->min = min;
-	if (policy->max < min)
-		policy->max = min;
-	if (policy->min > max)
-		policy->min = max;
-	if (policy->max > max)
-		policy->max = max;
-	if (policy->min > policy->max)
-		policy->min = policy->max;
-	return;
+	policy->max = clamp(policy->max, min, max);
+	policy->min = clamp(policy->min, min, policy->max);
 }
 
 static inline void

From 8d6e5e8268e89979d86501dbb8385ce2e6154de1 Mon Sep 17 00:00:00 2001
From: Swapnil Sapkal <swapnil.sapkal@amd.com>
Date: Fri, 18 Aug 2023 11:44:51 +0000
Subject: [PATCH 1717/2047] cpufreq: amd-pstate-ut: Remove module parameter
 access

In amd-pstate-ut, shared memory-based systems call
get_shared_mem() as part of amd_pstate_ut_check_enabled()
function. This function was written when CONFIG_X86_AMD_PSTATE
was tristate config and amd_pstate can be built as a module.

Currently CONFIG_X86_AMD_PSTATE is a boolean config and module
parameter shared_mem is removed. But amd-pstate-ut code still
accesses this module parameter. Remove those accesses.

Fixes: 456ca88d8a52 ("cpufreq: amd-pstate: change amd-pstate driver to be built-in type")
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Meng Li <li.meng@amd.com>
Reviewed-by: Wyes Karny <wyes.karny@amd.com>
Suggested-by: Wyes Karny <wyes.karny@amd.com>
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
[ rjw: Subject edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/amd-pstate-ut.c | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
index 7f3fe2048981..cf07ee77d3cc 100644
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -64,27 +64,9 @@ static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = {
 static bool get_shared_mem(void)
 {
 	bool result = false;
-	char path[] = "/sys/module/amd_pstate/parameters/shared_mem";
-	char buf[5] = {0};
-	struct file *filp = NULL;
-	loff_t pos = 0;
-	ssize_t ret;
 
-	if (!boot_cpu_has(X86_FEATURE_CPPC)) {
-		filp = filp_open(path, O_RDONLY, 0);
-		if (IS_ERR(filp))
-			pr_err("%s unable to open %s file!\n", __func__, path);
-		else {
-			ret = kernel_read(filp, &buf, sizeof(buf), &pos);
-			if (ret < 0)
-				pr_err("%s read %s file fail ret=%ld!\n",
-					__func__, path, (long)ret);
-			filp_close(filp, NULL);
-		}
-
-		if ('Y' == *buf)
-			result = true;
-	}
+	if (!boot_cpu_has(X86_FEATURE_CPPC))
+		result = true;
 
 	return result;
 }

From 60dd283804479c4a52f995b713f448e2cd65b8c8 Mon Sep 17 00:00:00 2001
From: Swapnil Sapkal <swapnil.sapkal@amd.com>
Date: Fri, 18 Aug 2023 11:44:52 +0000
Subject: [PATCH 1718/2047] cpufreq: amd-pstate-ut: Fix kernel panic when
 loading the driver

After loading the amd-pstate-ut driver, amd_pstate_ut_check_perf()
and amd_pstate_ut_check_freq() use cpufreq_cpu_get() to get the policy
of the CPU and mark it as busy.

In these functions, cpufreq_cpu_put() should be used to release the
policy, but it is not, so any other entity trying to access the policy
is blocked indefinitely.

One such scenario is when amd_pstate mode is changed, leading to the
following splat:

[ 1332.103727] INFO: task bash:2929 blocked for more than 120 seconds.
[ 1332.110001]       Not tainted 6.5.0-rc2-amd-pstate-ut #5
[ 1332.115315] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1332.123140] task:bash            state:D stack:0     pid:2929  ppid:2873   flags:0x00004006
[ 1332.123143] Call Trace:
[ 1332.123145]  <TASK>
[ 1332.123148]  __schedule+0x3c1/0x16a0
[ 1332.123154]  ? _raw_read_lock_irqsave+0x2d/0x70
[ 1332.123157]  schedule+0x6f/0x110
[ 1332.123160]  schedule_timeout+0x14f/0x160
[ 1332.123162]  ? preempt_count_add+0x86/0xd0
[ 1332.123165]  __wait_for_common+0x92/0x190
[ 1332.123168]  ? __pfx_schedule_timeout+0x10/0x10
[ 1332.123170]  wait_for_completion+0x28/0x30
[ 1332.123173]  cpufreq_policy_put_kobj+0x4d/0x90
[ 1332.123177]  cpufreq_policy_free+0x157/0x1d0
[ 1332.123178]  ? preempt_count_add+0x58/0xd0
[ 1332.123180]  cpufreq_remove_dev+0xb6/0x100
[ 1332.123182]  subsys_interface_unregister+0x114/0x120
[ 1332.123185]  ? preempt_count_add+0x58/0xd0
[ 1332.123187]  ? __pfx_amd_pstate_change_driver_mode+0x10/0x10
[ 1332.123190]  cpufreq_unregister_driver+0x3b/0xd0
[ 1332.123192]  amd_pstate_change_driver_mode+0x1e/0x50
[ 1332.123194]  store_status+0xe9/0x180
[ 1332.123197]  dev_attr_store+0x1b/0x30
[ 1332.123199]  sysfs_kf_write+0x42/0x50
[ 1332.123202]  kernfs_fop_write_iter+0x143/0x1d0
[ 1332.123204]  vfs_write+0x2df/0x400
[ 1332.123208]  ksys_write+0x6b/0xf0
[ 1332.123210]  __x64_sys_write+0x1d/0x30
[ 1332.123213]  do_syscall_64+0x60/0x90
[ 1332.123216]  ? fpregs_assert_state_consistent+0x2e/0x50
[ 1332.123219]  ? exit_to_user_mode_prepare+0x49/0x1a0
[ 1332.123223]  ? irqentry_exit_to_user_mode+0xd/0x20
[ 1332.123225]  ? irqentry_exit+0x3f/0x50
[ 1332.123226]  ? exc_page_fault+0x8e/0x190
[ 1332.123228]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
[ 1332.123232] RIP: 0033:0x7fa74c514a37
[ 1332.123234] RSP: 002b:00007ffe31dd0788 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 1332.123238] RAX: ffffffffffffffda RBX: 0000000000000008 RCX: 00007fa74c514a37
[ 1332.123239] RDX: 0000000000000008 RSI: 000055e27c447aa0 RDI: 0000000000000001
[ 1332.123241] RBP: 000055e27c447aa0 R08: 00007fa74c5d1460 R09: 000000007fffffff
[ 1332.123242] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000008
[ 1332.123244] R13: 00007fa74c61a780 R14: 00007fa74c616600 R15: 00007fa74c615a00
[ 1332.123247]  </TASK>

Fix this by calling cpufreq_cpu_put() wherever necessary.

Fixes: 14eb1c96e3a3 ("cpufreq: amd-pstate: Add test module for amd-pstate driver")
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Meng Li <li.meng@amd.com>
Reviewed-by: Wyes Karny <wyes.karny@amd.com>
Suggested-by: Wyes Karny <wyes.karny@amd.com>
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/amd-pstate-ut.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
index cf07ee77d3cc..502d494499ae 100644
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -140,7 +140,7 @@ static void amd_pstate_ut_check_perf(u32 index)
 			if (ret) {
 				amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
 				pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret);
-				return;
+				goto skip_test;
 			}
 
 			nominal_perf = cppc_perf.nominal_perf;
@@ -151,7 +151,7 @@ static void amd_pstate_ut_check_perf(u32 index)
 			if (ret) {
 				amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
 				pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret);
-				return;
+				goto skip_test;
 			}
 
 			nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
@@ -169,7 +169,7 @@ static void amd_pstate_ut_check_perf(u32 index)
 				nominal_perf, cpudata->nominal_perf,
 				lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf,
 				lowest_perf, cpudata->lowest_perf);
-			return;
+			goto skip_test;
 		}
 
 		if (!((highest_perf >= nominal_perf) &&
@@ -180,11 +180,15 @@ static void amd_pstate_ut_check_perf(u32 index)
 			pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
 				__func__, cpu, highest_perf, nominal_perf,
 				lowest_nonlinear_perf, lowest_perf);
-			return;
+			goto skip_test;
 		}
+		cpufreq_cpu_put(policy);
 	}
 
 	amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
+	return;
+skip_test:
+	cpufreq_cpu_put(policy);
 }
 
 /*
@@ -212,14 +216,14 @@ static void amd_pstate_ut_check_freq(u32 index)
 			pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
 				__func__, cpu, cpudata->max_freq, cpudata->nominal_freq,
 				cpudata->lowest_nonlinear_freq, cpudata->min_freq);
-			return;
+			goto skip_test;
 		}
 
 		if (cpudata->min_freq != policy->min) {
 			amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
 			pr_err("%s cpu%d cpudata_min_freq=%d policy_min=%d, they should be equal!\n",
 				__func__, cpu, cpudata->min_freq, policy->min);
-			return;
+			goto skip_test;
 		}
 
 		if (cpudata->boost_supported) {
@@ -231,16 +235,20 @@ static void amd_pstate_ut_check_freq(u32 index)
 				pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
 					__func__, cpu, policy->max, cpudata->max_freq,
 					cpudata->nominal_freq);
-				return;
+				goto skip_test;
 			}
 		} else {
 			amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
 			pr_err("%s cpu%d must support boost!\n", __func__, cpu);
-			return;
+			goto skip_test;
 		}
+		cpufreq_cpu_put(policy);
 	}
 
 	amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
+	return;
+skip_test:
+	cpufreq_cpu_put(policy);
 }
 
 static int __init amd_pstate_ut_init(void)

From e3f9324b231aba1dc707572bfe80be210c2d4cbd Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Wed, 16 Aug 2023 15:54:48 +0000
Subject: [PATCH 1719/2047] RISC-V: Remove ptrace support for vectors

We've found two bugs here: NT_RISCV_VECTOR steps on NT_RISCV_CSR (which
is only for embedded), and we don't have vlenb in the core dumps.  Given
that we've have a pair of bugs croup up as part of the GDB review we've
probably got other issues, so let's just cut this for 6.5 and get it
right.

Fixes: 0c59922c769a ("riscv: Add ptrace vector support")
Reviewed-by: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
Link: https://lore.kernel.org/r/20230816155450.26200-2-andy.chiu@sifive.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/ptrace.c | 69 --------------------------------------
 include/uapi/linux/elf.h   |  1 -
 2 files changed, 70 deletions(-)

diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 1d572cf3140f..487303e3ef22 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -25,9 +25,6 @@ enum riscv_regset {
 #ifdef CONFIG_FPU
 	REGSET_F,
 #endif
-#ifdef CONFIG_RISCV_ISA_V
-	REGSET_V,
-#endif
 };
 
 static int riscv_gpr_get(struct task_struct *target,
@@ -84,61 +81,6 @@ static int riscv_fpr_set(struct task_struct *target,
 }
 #endif
 
-#ifdef CONFIG_RISCV_ISA_V
-static int riscv_vr_get(struct task_struct *target,
-			const struct user_regset *regset,
-			struct membuf to)
-{
-	struct __riscv_v_ext_state *vstate = &target->thread.vstate;
-
-	if (!riscv_v_vstate_query(task_pt_regs(target)))
-		return -EINVAL;
-
-	/*
-	 * Ensure the vector registers have been saved to the memory before
-	 * copying them to membuf.
-	 */
-	if (target == current)
-		riscv_v_vstate_save(current, task_pt_regs(current));
-
-	/* Copy vector header from vstate. */
-	membuf_write(&to, vstate, offsetof(struct __riscv_v_ext_state, datap));
-	membuf_zero(&to, sizeof(vstate->datap));
-
-	/* Copy all the vector registers from vstate. */
-	return membuf_write(&to, vstate->datap, riscv_v_vsize);
-}
-
-static int riscv_vr_set(struct task_struct *target,
-			const struct user_regset *regset,
-			unsigned int pos, unsigned int count,
-			const void *kbuf, const void __user *ubuf)
-{
-	int ret, size;
-	struct __riscv_v_ext_state *vstate = &target->thread.vstate;
-
-	if (!riscv_v_vstate_query(task_pt_regs(target)))
-		return -EINVAL;
-
-	/* Copy rest of the vstate except datap */
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate, 0,
-				 offsetof(struct __riscv_v_ext_state, datap));
-	if (unlikely(ret))
-		return ret;
-
-	/* Skip copy datap. */
-	size = sizeof(vstate->datap);
-	count -= size;
-	ubuf += size;
-
-	/* Copy all the vector registers. */
-	pos = 0;
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate->datap,
-				 0, riscv_v_vsize);
-	return ret;
-}
-#endif
-
 static const struct user_regset riscv_user_regset[] = {
 	[REGSET_X] = {
 		.core_note_type = NT_PRSTATUS,
@@ -158,17 +100,6 @@ static const struct user_regset riscv_user_regset[] = {
 		.set = riscv_fpr_set,
 	},
 #endif
-#ifdef CONFIG_RISCV_ISA_V
-	[REGSET_V] = {
-		.core_note_type = NT_RISCV_VECTOR,
-		.align = 16,
-		.n = ((32 * RISCV_MAX_VLENB) +
-		      sizeof(struct __riscv_v_ext_state)) / sizeof(__u32),
-		.size = sizeof(__u32),
-		.regset_get = riscv_vr_get,
-		.set = riscv_vr_set,
-	},
-#endif
 };
 
 static const struct user_regset_view riscv_user_native_view = {
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 0c8cf359ea5b..e0e159138331 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -443,7 +443,6 @@ typedef struct elf64_shdr {
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
 #define NT_MIPS_FP_MODE	0x801		/* MIPS floating-point mode */
 #define NT_MIPS_MSA	0x802		/* MIPS SIMD registers */
-#define NT_RISCV_VECTOR	0x900		/* RISC-V vector registers */
 #define NT_LOONGARCH_CPUCFG	0xa00	/* LoongArch CPU config registers */
 #define NT_LOONGARCH_CSR	0xa01	/* LoongArch control and status registers */
 #define NT_LOONGARCH_LSX	0xa02	/* LoongArch Loongson SIMD Extension registers */

From c35f3aa34509085bfc9800c86bc9998f8954933d Mon Sep 17 00:00:00 2001
From: Andy Chiu <andy.chiu@sifive.com>
Date: Wed, 16 Aug 2023 15:54:49 +0000
Subject: [PATCH 1720/2047] RISC-V: vector: export VLENB csr in
 __sc_riscv_v_state

VLENB is critical for callers of ptrace to reconstruct Vector register
files from the register dump of NT_RISCV_VECTOR. Also, future systems
may will have a writable VLENB, so add it now to potentially save future
compatibility issue.

Fixes: 0c59922c769a ("riscv: Add ptrace vector support")
Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
Link: https://lore.kernel.org/r/20230816155450.26200-3-andy.chiu@sifive.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/vector.h      | 3 ++-
 arch/riscv/include/uapi/asm/ptrace.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 3d78930cab51..c5ee07b3df07 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -70,8 +70,9 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
 		"csrr	%1, " __stringify(CSR_VTYPE) "\n\t"
 		"csrr	%2, " __stringify(CSR_VL) "\n\t"
 		"csrr	%3, " __stringify(CSR_VCSR) "\n\t"
+		"csrr	%4, " __stringify(CSR_VLENB) "\n\t"
 		: "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
-		  "=r" (dest->vcsr) : :);
+		  "=r" (dest->vcsr), "=r" (dest->vlenb) : :);
 }
 
 static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h
index e17c550986a6..283800130614 100644
--- a/arch/riscv/include/uapi/asm/ptrace.h
+++ b/arch/riscv/include/uapi/asm/ptrace.h
@@ -97,6 +97,7 @@ struct __riscv_v_ext_state {
 	unsigned long vl;
 	unsigned long vtype;
 	unsigned long vcsr;
+	unsigned long vlenb;
 	void *datap;
 	/*
 	 * In signal handler, datap will be set a correct user stack offset

From e0d25c591ac676ece0e1ad6bbd72a159b9355598 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 21 Jun 2023 15:31:56 +0300
Subject: [PATCH 1721/2047] drm/i915: fix Sphinx indentation warning

Fix Sphinx warning about unexpected indent.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230621123156.14907-2-jani.nikula@intel.com
(cherry picked from commit 175b036472f678948b03baabce4a008b7ba91ce7)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_huc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index ddd146265beb..fa70defcb5b2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -26,6 +26,7 @@
  * The kernel driver is only responsible for loading the HuC firmware and
  * triggering its security authentication. This is done differently depending
  * on the platform:
+ *
  * - older platforms (from Gen9 to most Gen12s): the load is performed via DMA
  *   and the authentication via GuC
  * - DG2: load and authentication are both performed via GSC.
@@ -33,6 +34,7 @@
  *   not-DG2 older platforms), while the authentication is done in 2-steps,
  *   a first auth for clear-media workloads via GuC and a second one for all
  *   workloads via GSC.
+ *
  * On platforms where the GuC does the authentication, to correctly do so the
  * HuC binary must be loaded before the GuC one.
  * Loading the HuC is optional; however, not using the HuC might negatively

From 66fbfb35da47f391bdadf9fa7ceb88af4faa9022 Mon Sep 17 00:00:00 2001
From: Andrey Skvortsov <andrej.skvortzov@gmail.com>
Date: Sat, 5 Aug 2023 11:48:47 +0300
Subject: [PATCH 1722/2047] clk: Fix slab-out-of-bounds error in
 devm_clk_release()

Problem can be reproduced by unloading snd_soc_simple_card, because in
devm_get_clk_from_child() devres data is allocated as `struct clk`, but
devm_clk_release() expects devres data to be `struct devm_clk_state`.

KASAN report:
 ==================================================================
 BUG: KASAN: slab-out-of-bounds in devm_clk_release+0x20/0x54
 Read of size 8 at addr ffffff800ee09688 by task (udev-worker)/287

 Call trace:
  dump_backtrace+0xe8/0x11c
  show_stack+0x1c/0x30
  dump_stack_lvl+0x60/0x78
  print_report+0x150/0x450
  kasan_report+0xa8/0xf0
  __asan_load8+0x78/0xa0
  devm_clk_release+0x20/0x54
  release_nodes+0x84/0x120
  devres_release_all+0x144/0x210
  device_unbind_cleanup+0x1c/0xac
  really_probe+0x2f0/0x5b0
  __driver_probe_device+0xc0/0x1f0
  driver_probe_device+0x68/0x120
  __driver_attach+0x140/0x294
  bus_for_each_dev+0xec/0x160
  driver_attach+0x38/0x44
  bus_add_driver+0x24c/0x300
  driver_register+0xf0/0x210
  __platform_driver_register+0x48/0x54
  asoc_simple_card_init+0x24/0x1000 [snd_soc_simple_card]
  do_one_initcall+0xac/0x340
  do_init_module+0xd0/0x300
  load_module+0x2ba4/0x3100
  __do_sys_init_module+0x2c8/0x300
  __arm64_sys_init_module+0x48/0x5c
  invoke_syscall+0x64/0x190
  el0_svc_common.constprop.0+0x124/0x154
  do_el0_svc+0x44/0xdc
  el0_svc+0x14/0x50
  el0t_64_sync_handler+0xec/0x11c
  el0t_64_sync+0x14c/0x150

 Allocated by task 287:
  kasan_save_stack+0x38/0x60
  kasan_set_track+0x28/0x40
  kasan_save_alloc_info+0x20/0x30
  __kasan_kmalloc+0xac/0xb0
  __kmalloc_node_track_caller+0x6c/0x1c4
  __devres_alloc_node+0x44/0xb4
  devm_get_clk_from_child+0x44/0xa0
  asoc_simple_parse_clk+0x1b8/0x1dc [snd_soc_simple_card_utils]
  simple_parse_node.isra.0+0x1ec/0x230 [snd_soc_simple_card]
  simple_dai_link_of+0x1bc/0x334 [snd_soc_simple_card]
  __simple_for_each_link+0x2ec/0x320 [snd_soc_simple_card]
  asoc_simple_probe+0x468/0x4dc [snd_soc_simple_card]
  platform_probe+0x90/0xf0
  really_probe+0x118/0x5b0
  __driver_probe_device+0xc0/0x1f0
  driver_probe_device+0x68/0x120
  __driver_attach+0x140/0x294
  bus_for_each_dev+0xec/0x160
  driver_attach+0x38/0x44
  bus_add_driver+0x24c/0x300
  driver_register+0xf0/0x210
  __platform_driver_register+0x48/0x54
  asoc_simple_card_init+0x24/0x1000 [snd_soc_simple_card]
  do_one_initcall+0xac/0x340
  do_init_module+0xd0/0x300
  load_module+0x2ba4/0x3100
  __do_sys_init_module+0x2c8/0x300
  __arm64_sys_init_module+0x48/0x5c
  invoke_syscall+0x64/0x190
  el0_svc_common.constprop.0+0x124/0x154
  do_el0_svc+0x44/0xdc
  el0_svc+0x14/0x50
  el0t_64_sync_handler+0xec/0x11c
  el0t_64_sync+0x14c/0x150

 The buggy address belongs to the object at ffffff800ee09600
  which belongs to the cache kmalloc-256 of size 256
 The buggy address is located 136 bytes inside of
  256-byte region [ffffff800ee09600, ffffff800ee09700)

 The buggy address belongs to the physical page:
 page:000000002d97303b refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x4ee08
 head:000000002d97303b order:1 compound_mapcount:0 compound_pincount:0
 flags: 0x10200(slab|head|zone=0)
 raw: 0000000000010200 0000000000000000 dead000000000122 ffffff8002c02480
 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
 page dumped because: kasan: bad access detected

 Memory state around the buggy address:
  ffffff800ee09580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
  ffffff800ee09600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 >ffffff800ee09680: 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                       ^
  ffffff800ee09700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
  ffffff800ee09780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ==================================================================

Fixes: abae8e57e49a ("clk: generalize devm_clk_get() a bit")
Signed-off-by: Andrey Skvortsov <andrej.skvortzov@gmail.com>
Link: https://lore.kernel.org/r/20230805084847.3110586-1-andrej.skvortzov@gmail.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clk-devres.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
index 4fb4fd4b06bd..737aa70e2cb3 100644
--- a/drivers/clk/clk-devres.c
+++ b/drivers/clk/clk-devres.c
@@ -205,18 +205,19 @@ EXPORT_SYMBOL(devm_clk_put);
 struct clk *devm_get_clk_from_child(struct device *dev,
 				    struct device_node *np, const char *con_id)
 {
-	struct clk **ptr, *clk;
+	struct devm_clk_state *state;
+	struct clk *clk;
 
-	ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
+	state = devres_alloc(devm_clk_release, sizeof(*state), GFP_KERNEL);
+	if (!state)
 		return ERR_PTR(-ENOMEM);
 
 	clk = of_clk_get_by_name(np, con_id);
 	if (!IS_ERR(clk)) {
-		*ptr = clk;
-		devres_add(dev, ptr);
+		state->clk = clk;
+		devres_add(dev, state);
 	} else {
-		devres_free(ptr);
+		devres_free(state);
 	}
 
 	return clk;

From 67eb617a8e1e46ee726d0b2689dd459aab4e5e18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Mon, 3 Jul 2023 10:11:08 +0200
Subject: [PATCH 1723/2047] selftests/nolibc: simplify call to ioperm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 53fcfafa8c5c ("tools/nolibc/unistd: add syscall()") nolibc
has support for syscall(2).
Use it to get rid of some ifdef-ery.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 486334981e60..c02d89953679 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -1051,11 +1051,7 @@ int main(int argc, char **argv, char **envp)
 		 * exit with status code 2N+1 when N is written to 0x501. We
 		 * hard-code the syscall here as it's arch-dependent.
 		 */
-#if defined(_NOLIBC_SYS_H)
-		else if (my_syscall3(__NR_ioperm, 0x501, 1, 1) == 0)
-#else
-		else if (ioperm(0x501, 1, 1) == 0)
-#endif
+		else if (syscall(__NR_ioperm, 0x501, 1, 1) == 0)
 			__asm__ volatile ("outb %%al, %%dx" :: "d"(0x501), "a"(0));
 		/* if it does nothing, fall back to the regular panic */
 #endif

From f134c7066c9064b4c9756dd86b2ace6b5cb89d20 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 22:50:34 +0800
Subject: [PATCH 1724/2047] tools/nolibc: arch-*.h: fix up code indent errors

More than 8 whitespaces of the code indent are replaced with "tab +
whitespaces" to fix up such errors reported by scripts/checkpatch.pl:

    ERROR: code indent should use tabs where possible
    #64: FILE: tools/include/nolibc/arch-mips.h:64:
    +^I                                                                      \$

    ERROR: code indent should use tabs where possible
    #72: FILE: tools/include/nolibc/arch-mips.h:72:
    +^I          "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \$

This command is used:

    $ sed -i -e '/^\t*        /{s/        /\t/g}' tools/include/nolibc/arch-*.h

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-aarch64.h | 14 +++++++-------
 tools/include/nolibc/arch-arm.h     | 14 +++++++-------
 tools/include/nolibc/arch-i386.h    | 12 ++++++------
 tools/include/nolibc/arch-mips.h    | 24 ++++++++++++------------
 tools/include/nolibc/arch-x86_64.h  | 14 +++++++-------
 5 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
index 11f294a406b7..c911f61365d1 100644
--- a/tools/include/nolibc/arch-aarch64.h
+++ b/tools/include/nolibc/arch-aarch64.h
@@ -56,7 +56,7 @@ struct sys_stat_struct {
 ({                                                                            \
 	register long _num  __asm__ ("x8") = (num);                           \
 	register long _arg1 __asm__ ("x0");                                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"svc #0\n"                                                    \
 		: "=r"(_arg1)                                                 \
@@ -70,7 +70,7 @@ struct sys_stat_struct {
 ({                                                                            \
 	register long _num  __asm__ ("x8") = (num);                           \
 	register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"svc #0\n"                                                    \
 		: "=r"(_arg1)                                                 \
@@ -86,7 +86,7 @@ struct sys_stat_struct {
 	register long _num  __asm__ ("x8") = (num);                           \
 	register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
 	register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"svc #0\n"                                                    \
 		: "=r"(_arg1)                                                 \
@@ -103,7 +103,7 @@ struct sys_stat_struct {
 	register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
 	register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
 	register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"svc #0\n"                                                    \
 		: "=r"(_arg1)                                                 \
@@ -121,7 +121,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
 	register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("x3") = (long)(arg4);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"svc #0\n"                                                    \
 		: "=r"(_arg1)                                                 \
@@ -140,7 +140,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("x3") = (long)(arg4);                    \
 	register long _arg5 __asm__ ("x4") = (long)(arg5);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"svc #0\n"                                                    \
 		: "=r" (_arg1)                                                \
@@ -160,7 +160,7 @@ struct sys_stat_struct {
 	register long _arg4 __asm__ ("x3") = (long)(arg4);                    \
 	register long _arg5 __asm__ ("x4") = (long)(arg5);                    \
 	register long _arg6 __asm__ ("x5") = (long)(arg6);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"svc #0\n"                                                    \
 		: "=r" (_arg1)                                                \
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index ca4c66987497..d5887fd9bc5f 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -90,7 +90,7 @@ struct sys_stat_struct {
 ({                                                                            \
 	register long _num  __asm__(_NOLIBC_SYSCALL_REG) = (num);             \
 	register long _arg1 __asm__ ("r0");                                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
@@ -107,7 +107,7 @@ struct sys_stat_struct {
 ({                                                                            \
 	register long _num  __asm__(_NOLIBC_SYSCALL_REG) = (num);             \
 	register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
@@ -125,7 +125,7 @@ struct sys_stat_struct {
 	register long _num  __asm__(_NOLIBC_SYSCALL_REG) = (num);             \
 	register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
 	register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
@@ -144,7 +144,7 @@ struct sys_stat_struct {
 	register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
 	register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
 	register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
@@ -164,7 +164,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
 	register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("r3") = (long)(arg4);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
@@ -185,7 +185,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("r3") = (long)(arg4);                    \
 	register long _arg5 __asm__ ("r4") = (long)(arg5);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
@@ -207,7 +207,7 @@ struct sys_stat_struct {
 	register long _arg4 __asm__ ("r3") = (long)(arg4);                    \
 	register long _arg5 __asm__ ("r4") = (long)(arg5);                    \
 	register long _arg6 __asm__ ("r5") = (long)(arg6);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
index 3d672d925e9e..c11a53acf159 100644
--- a/tools/include/nolibc/arch-i386.h
+++ b/tools/include/nolibc/arch-i386.h
@@ -57,7 +57,7 @@ struct sys_stat_struct {
 ({                                                                            \
 	long _ret;                                                            \
 	register long _num __asm__ ("eax") = (num);                           \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"int $0x80\n"                                                 \
 		: "=a" (_ret)                                                 \
@@ -72,7 +72,7 @@ struct sys_stat_struct {
 	long _ret;                                                            \
 	register long _num __asm__ ("eax") = (num);                           \
 	register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"int $0x80\n"                                                 \
 		: "=a" (_ret)                                                 \
@@ -89,7 +89,7 @@ struct sys_stat_struct {
 	register long _num __asm__ ("eax") = (num);                           \
 	register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
 	register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"int $0x80\n"                                                 \
 		: "=a" (_ret)                                                 \
@@ -107,7 +107,7 @@ struct sys_stat_struct {
 	register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
 	register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
 	register long _arg3 __asm__ ("edx") = (long)(arg3);                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"int $0x80\n"                                                 \
 		: "=a" (_ret)                                                 \
@@ -126,7 +126,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
 	register long _arg3 __asm__ ("edx") = (long)(arg3);                   \
 	register long _arg4 __asm__ ("esi") = (long)(arg4);                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"int $0x80\n"                                                 \
 		: "=a" (_ret)                                                 \
@@ -146,7 +146,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("edx") = (long)(arg3);                   \
 	register long _arg4 __asm__ ("esi") = (long)(arg4);                   \
 	register long _arg5 __asm__ ("edi") = (long)(arg5);                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"int $0x80\n"                                                 \
 		: "=a" (_ret)                                                 \
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index db24e0837a39..55cd376a98e2 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -61,7 +61,7 @@ struct sys_stat_struct {
 ({                                                                            \
 	register long _num __asm__ ("v0") = (num);                            \
 	register long _arg4 __asm__ ("a3");                                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"addiu $sp, $sp, -32\n"                                       \
 		"syscall\n"                                                   \
@@ -69,7 +69,7 @@ struct sys_stat_struct {
 		: "=r"(_num), "=r"(_arg4)                                     \
 		: "r"(_num)                                                   \
 		: "memory", "cc", "at", "v1", "hi", "lo",                     \
-	          "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+		  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
 	);                                                                    \
 	_arg4 ? -_num : _num;                                                 \
 })
@@ -79,7 +79,7 @@ struct sys_stat_struct {
 	register long _num __asm__ ("v0") = (num);                            \
 	register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
 	register long _arg4 __asm__ ("a3");                                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"addiu $sp, $sp, -32\n"                                       \
 		"syscall\n"                                                   \
@@ -88,7 +88,7 @@ struct sys_stat_struct {
 		: "0"(_num),                                                  \
 		  "r"(_arg1)                                                  \
 		: "memory", "cc", "at", "v1", "hi", "lo",                     \
-	          "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+		  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
 	);                                                                    \
 	_arg4 ? -_num : _num;                                                 \
 })
@@ -99,7 +99,7 @@ struct sys_stat_struct {
 	register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
 	register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
 	register long _arg4 __asm__ ("a3");                                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"addiu $sp, $sp, -32\n"                                       \
 		"syscall\n"                                                   \
@@ -108,7 +108,7 @@ struct sys_stat_struct {
 		: "0"(_num),                                                  \
 		  "r"(_arg1), "r"(_arg2)                                      \
 		: "memory", "cc", "at", "v1", "hi", "lo",                     \
-	          "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+		  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
 	);                                                                    \
 	_arg4 ? -_num : _num;                                                 \
 })
@@ -120,7 +120,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
 	register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("a3");                                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"addiu $sp, $sp, -32\n"                                       \
 		"syscall\n"                                                   \
@@ -129,7 +129,7 @@ struct sys_stat_struct {
 		: "0"(_num),                                                  \
 		  "r"(_arg1), "r"(_arg2), "r"(_arg3)                          \
 		: "memory", "cc", "at", "v1", "hi", "lo",                     \
-	          "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+		  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
 	);                                                                    \
 	_arg4 ? -_num : _num;                                                 \
 })
@@ -141,7 +141,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
 	register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"addiu $sp, $sp, -32\n"                                       \
 		"syscall\n"                                                   \
@@ -150,7 +150,7 @@ struct sys_stat_struct {
 		: "0"(_num),                                                  \
 		  "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4)              \
 		: "memory", "cc", "at", "v1", "hi", "lo",                     \
-	          "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+		  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
 	);                                                                    \
 	_arg4 ? -_num : _num;                                                 \
 })
@@ -163,7 +163,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
 	register long _arg5 = (long)(arg5);                                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"addiu $sp, $sp, -32\n"                                       \
 		"sw %7, 16($sp)\n"                                            \
@@ -173,7 +173,7 @@ struct sys_stat_struct {
 		: "0"(_num),                                                  \
 		  "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5)  \
 		: "memory", "cc", "at", "v1", "hi", "lo",                     \
-	          "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+		  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
 	);                                                                    \
 	_arg4 ? -_num : _num;                                                 \
 })
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
index 6fc4d8392742..1ae73d83aad1 100644
--- a/tools/include/nolibc/arch-x86_64.h
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -59,7 +59,7 @@ struct sys_stat_struct {
 ({                                                                            \
 	long _ret;                                                            \
 	register long _num  __asm__ ("rax") = (num);                          \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
@@ -74,7 +74,7 @@ struct sys_stat_struct {
 	long _ret;                                                            \
 	register long _num  __asm__ ("rax") = (num);                          \
 	register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
@@ -91,7 +91,7 @@ struct sys_stat_struct {
 	register long _num  __asm__ ("rax") = (num);                          \
 	register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
 	register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
@@ -109,7 +109,7 @@ struct sys_stat_struct {
 	register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
 	register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
 	register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
@@ -128,7 +128,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
 	register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
 	register long _arg4 __asm__ ("r10") = (long)(arg4);                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
@@ -148,7 +148,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
 	register long _arg4 __asm__ ("r10") = (long)(arg4);                   \
 	register long _arg5 __asm__ ("r8")  = (long)(arg5);                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
@@ -169,7 +169,7 @@ struct sys_stat_struct {
 	register long _arg4 __asm__ ("r10") = (long)(arg4);                   \
 	register long _arg5 __asm__ ("r8")  = (long)(arg5);                   \
 	register long _arg6 __asm__ ("r9")  = (long)(arg6);                   \
-	                                                                      \
+									      \
 	__asm__  volatile (                                                   \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \

From f09f1912e4cd26949c565fbc0216e3c1b55e0fb4 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 22:52:09 +0800
Subject: [PATCH 1725/2047] toolc/nolibc: arch-*.h: clean up whitespaces after
 __asm__

replace "__asm__  volatile" with "__asm__ volatile" and insert necessary
whitespace before "\" to make sure the lines are aligned.

    $ sed -i -e 's/__asm__  volatile ( /__asm__ volatile (  /g' tools/include/nolibc/*.h

Note, arch-s390.h uses post-tab instead of post-whitespaces, must avoid
insert whitespace just before the tabs:

    $ sed -i -e 's/__asm__  volatile (\t/__asm__ volatile (\t/g' tools/include/nolibc/arch-*.h

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-aarch64.h   | 14 +++++++-------
 tools/include/nolibc/arch-arm.h       | 14 +++++++-------
 tools/include/nolibc/arch-i386.h      | 12 ++++++------
 tools/include/nolibc/arch-loongarch.h | 14 +++++++-------
 tools/include/nolibc/arch-mips.h      | 12 ++++++------
 tools/include/nolibc/arch-riscv.h     | 14 +++++++-------
 tools/include/nolibc/arch-s390.h      | 14 +++++++-------
 tools/include/nolibc/arch-x86_64.h    | 14 +++++++-------
 8 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
index c911f61365d1..6227b77a4a09 100644
--- a/tools/include/nolibc/arch-aarch64.h
+++ b/tools/include/nolibc/arch-aarch64.h
@@ -57,7 +57,7 @@ struct sys_stat_struct {
 	register long _num  __asm__ ("x8") = (num);                           \
 	register long _arg1 __asm__ ("x0");                                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"svc #0\n"                                                    \
 		: "=r"(_arg1)                                                 \
 		: "r"(_num)                                                   \
@@ -71,7 +71,7 @@ struct sys_stat_struct {
 	register long _num  __asm__ ("x8") = (num);                           \
 	register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"svc #0\n"                                                    \
 		: "=r"(_arg1)                                                 \
 		: "r"(_arg1),                                                 \
@@ -87,7 +87,7 @@ struct sys_stat_struct {
 	register long _arg1 __asm__ ("x0") = (long)(arg1);                    \
 	register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"svc #0\n"                                                    \
 		: "=r"(_arg1)                                                 \
 		: "r"(_arg1), "r"(_arg2),                                     \
@@ -104,7 +104,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("x1") = (long)(arg2);                    \
 	register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"svc #0\n"                                                    \
 		: "=r"(_arg1)                                                 \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
@@ -122,7 +122,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("x2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("x3") = (long)(arg4);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"svc #0\n"                                                    \
 		: "=r"(_arg1)                                                 \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
@@ -141,7 +141,7 @@ struct sys_stat_struct {
 	register long _arg4 __asm__ ("x3") = (long)(arg4);                    \
 	register long _arg5 __asm__ ("x4") = (long)(arg5);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"svc #0\n"                                                    \
 		: "=r" (_arg1)                                                \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
@@ -161,7 +161,7 @@ struct sys_stat_struct {
 	register long _arg5 __asm__ ("x4") = (long)(arg5);                    \
 	register long _arg6 __asm__ ("x5") = (long)(arg6);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"svc #0\n"                                                    \
 		: "=r" (_arg1)                                                \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index d5887fd9bc5f..4d4887a5f04b 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -91,7 +91,7 @@ struct sys_stat_struct {
 	register long _num  __asm__(_NOLIBC_SYSCALL_REG) = (num);             \
 	register long _arg1 __asm__ ("r0");                                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
 		_NOLIBC_THUMB_RESTORE_R7                                      \
@@ -108,7 +108,7 @@ struct sys_stat_struct {
 	register long _num  __asm__(_NOLIBC_SYSCALL_REG) = (num);             \
 	register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
 		_NOLIBC_THUMB_RESTORE_R7                                      \
@@ -126,7 +126,7 @@ struct sys_stat_struct {
 	register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
 	register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
 		_NOLIBC_THUMB_RESTORE_R7                                      \
@@ -145,7 +145,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
 	register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
 		_NOLIBC_THUMB_RESTORE_R7                                      \
@@ -165,7 +165,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("r3") = (long)(arg4);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
 		_NOLIBC_THUMB_RESTORE_R7                                      \
@@ -186,7 +186,7 @@ struct sys_stat_struct {
 	register long _arg4 __asm__ ("r3") = (long)(arg4);                    \
 	register long _arg5 __asm__ ("r4") = (long)(arg5);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
 		_NOLIBC_THUMB_RESTORE_R7                                      \
@@ -208,7 +208,7 @@ struct sys_stat_struct {
 	register long _arg5 __asm__ ("r4") = (long)(arg5);                    \
 	register long _arg6 __asm__ ("r5") = (long)(arg6);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		_NOLIBC_THUMB_SET_R7                                          \
 		"svc #0\n"                                                    \
 		_NOLIBC_THUMB_RESTORE_R7                                      \
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
index c11a53acf159..4c6b7c04e2e7 100644
--- a/tools/include/nolibc/arch-i386.h
+++ b/tools/include/nolibc/arch-i386.h
@@ -58,7 +58,7 @@ struct sys_stat_struct {
 	long _ret;                                                            \
 	register long _num __asm__ ("eax") = (num);                           \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"int $0x80\n"                                                 \
 		: "=a" (_ret)                                                 \
 		: "0"(_num)                                                   \
@@ -73,7 +73,7 @@ struct sys_stat_struct {
 	register long _num __asm__ ("eax") = (num);                           \
 	register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"int $0x80\n"                                                 \
 		: "=a" (_ret)                                                 \
 		: "r"(_arg1),                                                 \
@@ -90,7 +90,7 @@ struct sys_stat_struct {
 	register long _arg1 __asm__ ("ebx") = (long)(arg1);                   \
 	register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"int $0x80\n"                                                 \
 		: "=a" (_ret)                                                 \
 		: "r"(_arg1), "r"(_arg2),                                     \
@@ -108,7 +108,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("ecx") = (long)(arg2);                   \
 	register long _arg3 __asm__ ("edx") = (long)(arg3);                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"int $0x80\n"                                                 \
 		: "=a" (_ret)                                                 \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
@@ -127,7 +127,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("edx") = (long)(arg3);                   \
 	register long _arg4 __asm__ ("esi") = (long)(arg4);                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"int $0x80\n"                                                 \
 		: "=a" (_ret)                                                 \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
@@ -147,7 +147,7 @@ struct sys_stat_struct {
 	register long _arg4 __asm__ ("esi") = (long)(arg4);                   \
 	register long _arg5 __asm__ ("edi") = (long)(arg5);                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"int $0x80\n"                                                 \
 		: "=a" (_ret)                                                 \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
index ad3f266e7093..8df42268e578 100644
--- a/tools/include/nolibc/arch-loongarch.h
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -28,7 +28,7 @@
 	register long _num  __asm__ ("a7") = (num);                           \
 	register long _arg1 __asm__ ("a0");                                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall 0\n"                                                 \
 		: "=r"(_arg1)                                                 \
 		: "r"(_num)                                                   \
@@ -43,7 +43,7 @@
 	register long _num  __asm__ ("a7") = (num);                           \
 	register long _arg1 __asm__ ("a0") = (long)(arg1);		      \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall 0\n"                                                 \
 		: "+r"(_arg1)                                                 \
 		: "r"(_num)                                                   \
@@ -59,7 +59,7 @@
 	register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
 	register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall 0\n"                                                 \
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2),                                                 \
@@ -77,7 +77,7 @@
 	register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
 	register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall 0\n"                                                 \
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2), "r"(_arg3),                                     \
@@ -96,7 +96,7 @@
 	register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall 0\n"                                                 \
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2), "r"(_arg3), "r"(_arg4),                         \
@@ -116,7 +116,7 @@
 	register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
 	register long _arg5 __asm__ ("a4") = (long)(arg5);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall 0\n"                                                 \
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5),             \
@@ -137,7 +137,7 @@
 	register long _arg5 __asm__ ("a4") = (long)(arg5);                    \
 	register long _arg6 __asm__ ("a5") = (long)(arg6);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall 0\n"                                                 \
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 55cd376a98e2..22aacc07b1fc 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -62,7 +62,7 @@ struct sys_stat_struct {
 	register long _num __asm__ ("v0") = (num);                            \
 	register long _arg4 __asm__ ("a3");                                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"addiu $sp, $sp, -32\n"                                       \
 		"syscall\n"                                                   \
 		"addiu $sp, $sp, 32\n"                                        \
@@ -80,7 +80,7 @@ struct sys_stat_struct {
 	register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
 	register long _arg4 __asm__ ("a3");                                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"addiu $sp, $sp, -32\n"                                       \
 		"syscall\n"                                                   \
 		"addiu $sp, $sp, 32\n"                                        \
@@ -100,7 +100,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
 	register long _arg4 __asm__ ("a3");                                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"addiu $sp, $sp, -32\n"                                       \
 		"syscall\n"                                                   \
 		"addiu $sp, $sp, 32\n"                                        \
@@ -121,7 +121,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("a3");                                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"addiu $sp, $sp, -32\n"                                       \
 		"syscall\n"                                                   \
 		"addiu $sp, $sp, 32\n"                                        \
@@ -142,7 +142,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"addiu $sp, $sp, -32\n"                                       \
 		"syscall\n"                                                   \
 		"addiu $sp, $sp, 32\n"                                        \
@@ -164,7 +164,7 @@ struct sys_stat_struct {
 	register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
 	register long _arg5 = (long)(arg5);                                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"addiu $sp, $sp, -32\n"                                       \
 		"sw %7, 16($sp)\n"                                            \
 		"syscall\n  "                                                 \
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index a2e8564e66d6..cd958b2f4b1b 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -63,7 +63,7 @@ struct sys_stat_struct {
 	register long _num  __asm__ ("a7") = (num);                           \
 	register long _arg1 __asm__ ("a0");                                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"ecall\n\t"                                                   \
 		: "=r"(_arg1)                                                 \
 		: "r"(_num)                                                   \
@@ -77,7 +77,7 @@ struct sys_stat_struct {
 	register long _num  __asm__ ("a7") = (num);                           \
 	register long _arg1 __asm__ ("a0") = (long)(arg1);		      \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"ecall\n"                                                     \
 		: "+r"(_arg1)                                                 \
 		: "r"(_num)                                                   \
@@ -92,7 +92,7 @@ struct sys_stat_struct {
 	register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
 	register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"ecall\n"                                                     \
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2),                                                 \
@@ -109,7 +109,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
 	register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"ecall\n\t"                                                   \
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2), "r"(_arg3),                                     \
@@ -127,7 +127,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
 	register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"ecall\n"                                                     \
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2), "r"(_arg3), "r"(_arg4),                         \
@@ -146,7 +146,7 @@ struct sys_stat_struct {
 	register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
 	register long _arg5 __asm__ ("a4") = (long)(arg5);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"ecall\n"                                                     \
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5),             \
@@ -166,7 +166,7 @@ struct sys_stat_struct {
 	register long _arg5 __asm__ ("a4") = (long)(arg5);                    \
 	register long _arg6 __asm__ ("a5") = (long)(arg6);                    \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"ecall\n"                                                     \
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index 516dff5bff8b..a644ecd361c0 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -52,7 +52,7 @@ struct sys_stat_struct {
 	register long _num __asm__ ("1") = (num);			\
 	register long _rc __asm__ ("2");				\
 									\
-	__asm__  volatile (						\
+	__asm__ volatile (						\
 		"svc 0\n"						\
 		: "=d"(_rc)						\
 		: "d"(_num)						\
@@ -66,7 +66,7 @@ struct sys_stat_struct {
 	register long _num __asm__ ("1") = (num);			\
 	register long _arg1 __asm__ ("2") = (long)(arg1);		\
 									\
-	__asm__  volatile (						\
+	__asm__ volatile (						\
 		"svc 0\n"						\
 		: "+d"(_arg1)						\
 		: "d"(_num)						\
@@ -81,7 +81,7 @@ struct sys_stat_struct {
 	register long _arg1 __asm__ ("2") = (long)(arg1);		\
 	register long _arg2 __asm__ ("3") = (long)(arg2);		\
 									\
-	__asm__  volatile (						\
+	__asm__ volatile (						\
 		"svc 0\n"						\
 		: "+d"(_arg1)						\
 		: "d"(_arg2), "d"(_num)					\
@@ -97,7 +97,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("3") = (long)(arg2);		\
 	register long _arg3 __asm__ ("4") = (long)(arg3);		\
 									\
-	__asm__  volatile (						\
+	__asm__ volatile (						\
 		"svc 0\n"						\
 		: "+d"(_arg1)						\
 		: "d"(_arg2), "d"(_arg3), "d"(_num)			\
@@ -114,7 +114,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("4") = (long)(arg3);		\
 	register long _arg4 __asm__ ("5") = (long)(arg4);		\
 									\
-	__asm__  volatile (						\
+	__asm__ volatile (						\
 		"svc 0\n"						\
 		: "+d"(_arg1)						\
 		: "d"(_arg2), "d"(_arg3), "d"(_arg4), "d"(_num)		\
@@ -132,7 +132,7 @@ struct sys_stat_struct {
 	register long _arg4 __asm__ ("5") = (long)(arg4);		\
 	register long _arg5 __asm__ ("6") = (long)(arg5);		\
 									\
-	__asm__  volatile (						\
+	__asm__ volatile (						\
 		"svc 0\n"						\
 		: "+d"(_arg1)						\
 		: "d"(_arg2), "d"(_arg3), "d"(_arg4), "d"(_arg5),	\
@@ -152,7 +152,7 @@ struct sys_stat_struct {
 	register long _arg5 __asm__ ("6") = (long)(arg5);		\
 	register long _arg6 __asm__ ("7") = (long)(arg6);		\
 									\
-	__asm__  volatile (						\
+	__asm__ volatile (						\
 		"svc 0\n"						\
 		: "+d"(_arg1)						\
 		: "d"(_arg2), "d"(_arg3), "d"(_arg4), "d"(_arg5),	\
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
index 1ae73d83aad1..e69113742a99 100644
--- a/tools/include/nolibc/arch-x86_64.h
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -60,7 +60,7 @@ struct sys_stat_struct {
 	long _ret;                                                            \
 	register long _num  __asm__ ("rax") = (num);                          \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
 		: "0"(_num)                                                   \
@@ -75,7 +75,7 @@ struct sys_stat_struct {
 	register long _num  __asm__ ("rax") = (num);                          \
 	register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
 		: "r"(_arg1),                                                 \
@@ -92,7 +92,7 @@ struct sys_stat_struct {
 	register long _arg1 __asm__ ("rdi") = (long)(arg1);                   \
 	register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
 		: "r"(_arg1), "r"(_arg2),                                     \
@@ -110,7 +110,7 @@ struct sys_stat_struct {
 	register long _arg2 __asm__ ("rsi") = (long)(arg2);                   \
 	register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
@@ -129,7 +129,7 @@ struct sys_stat_struct {
 	register long _arg3 __asm__ ("rdx") = (long)(arg3);                   \
 	register long _arg4 __asm__ ("r10") = (long)(arg4);                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
@@ -149,7 +149,7 @@ struct sys_stat_struct {
 	register long _arg4 __asm__ ("r10") = (long)(arg4);                   \
 	register long _arg5 __asm__ ("r8")  = (long)(arg5);                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
@@ -170,7 +170,7 @@ struct sys_stat_struct {
 	register long _arg5 __asm__ ("r8")  = (long)(arg5);                   \
 	register long _arg6 __asm__ ("r9")  = (long)(arg6);                   \
 									      \
-	__asm__  volatile (                                                   \
+	__asm__ volatile (                                                    \
 		"syscall\n"                                                   \
 		: "=a"(_ret)                                                  \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \

From 2dca615ade6765404607a9cfe4db648d9263d975 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 22:53:20 +0800
Subject: [PATCH 1726/2047] tools/nolibc: arch-loongarch.h: shrink with
 _NOLIBC_SYSCALL_CLOBBERLIST

my_syscall<N> share the same long clobber list, define a macro for them.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-loongarch.h | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
index 8df42268e578..8aa7724fe38e 100644
--- a/tools/include/nolibc/arch-loongarch.h
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -22,6 +22,8 @@
  * On LoongArch, select() is not implemented so we have to use pselect6().
  */
 #define __ARCH_WANT_SYS_PSELECT6
+#define _NOLIBC_SYSCALL_CLOBBERLIST \
+	"memory", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8"
 
 #define my_syscall0(num)                                                      \
 ({                                                                            \
@@ -32,8 +34,7 @@
 		"syscall 0\n"                                                 \
 		: "=r"(_arg1)                                                 \
 		: "r"(_num)                                                   \
-		: "memory", "$t0", "$t1", "$t2", "$t3",                       \
-		  "$t4", "$t5", "$t6", "$t7", "$t8"                           \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg1;                                                                \
 })
@@ -47,8 +48,7 @@
 		"syscall 0\n"                                                 \
 		: "+r"(_arg1)                                                 \
 		: "r"(_num)                                                   \
-		: "memory", "$t0", "$t1", "$t2", "$t3",                       \
-		  "$t4", "$t5", "$t6", "$t7", "$t8"                           \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg1;                                                                \
 })
@@ -64,8 +64,7 @@
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2),                                                 \
 		  "r"(_num)                                                   \
-		: "memory", "$t0", "$t1", "$t2", "$t3",                       \
-		  "$t4", "$t5", "$t6", "$t7", "$t8"                           \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg1;                                                                \
 })
@@ -82,8 +81,7 @@
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2), "r"(_arg3),                                     \
 		  "r"(_num)                                                   \
-		: "memory", "$t0", "$t1", "$t2", "$t3",                       \
-		  "$t4", "$t5", "$t6", "$t7", "$t8"                           \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg1;                                                                \
 })
@@ -101,8 +99,7 @@
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2), "r"(_arg3), "r"(_arg4),                         \
 		  "r"(_num)                                                   \
-		: "memory", "$t0", "$t1", "$t2", "$t3",                       \
-		  "$t4", "$t5", "$t6", "$t7", "$t8"                           \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg1;                                                                \
 })
@@ -121,8 +118,7 @@
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5),             \
 		  "r"(_num)                                                   \
-		: "memory", "$t0", "$t1", "$t2", "$t3",                       \
-		  "$t4", "$t5", "$t6", "$t7", "$t8"                           \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg1;                                                                \
 })
@@ -142,8 +138,7 @@
 		: "+r"(_arg1)                                                 \
 		: "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
 		  "r"(_num)                                                   \
-		: "memory", "$t0", "$t1", "$t2", "$t3",                       \
-		  "$t4", "$t5", "$t6", "$t7", "$t8"                           \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg1;                                                                \
 })

From 8b9bdab635870e44fd0684be6e70056201f7bc0c Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 22:54:26 +0800
Subject: [PATCH 1727/2047] tools/nolibc: arch-mips.h: shrink with
 _NOLIBC_SYSCALL_CLOBBERLIST

my_syscall<N> share the same long clobber list, define a macro for them.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-mips.h | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 22aacc07b1fc..1848f57777c4 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -57,6 +57,10 @@ struct sys_stat_struct {
  *     don't have to experience issues with register constraints.
  */
 
+#define _NOLIBC_SYSCALL_CLOBBERLIST \
+	"memory", "cc", "at", "v1", "hi", "lo", \
+	"t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"
+
 #define my_syscall0(num)                                                      \
 ({                                                                            \
 	register long _num __asm__ ("v0") = (num);                            \
@@ -68,8 +72,7 @@ struct sys_stat_struct {
 		"addiu $sp, $sp, 32\n"                                        \
 		: "=r"(_num), "=r"(_arg4)                                     \
 		: "r"(_num)                                                   \
-		: "memory", "cc", "at", "v1", "hi", "lo",                     \
-		  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg4 ? -_num : _num;                                                 \
 })
@@ -87,8 +90,7 @@ struct sys_stat_struct {
 		: "=r"(_num), "=r"(_arg4)                                     \
 		: "0"(_num),                                                  \
 		  "r"(_arg1)                                                  \
-		: "memory", "cc", "at", "v1", "hi", "lo",                     \
-		  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg4 ? -_num : _num;                                                 \
 })
@@ -107,8 +109,7 @@ struct sys_stat_struct {
 		: "=r"(_num), "=r"(_arg4)                                     \
 		: "0"(_num),                                                  \
 		  "r"(_arg1), "r"(_arg2)                                      \
-		: "memory", "cc", "at", "v1", "hi", "lo",                     \
-		  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg4 ? -_num : _num;                                                 \
 })
@@ -128,8 +129,7 @@ struct sys_stat_struct {
 		: "=r"(_num), "=r"(_arg4)                                     \
 		: "0"(_num),                                                  \
 		  "r"(_arg1), "r"(_arg2), "r"(_arg3)                          \
-		: "memory", "cc", "at", "v1", "hi", "lo",                     \
-		  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg4 ? -_num : _num;                                                 \
 })
@@ -149,8 +149,7 @@ struct sys_stat_struct {
 		: "=r" (_num), "=r"(_arg4)                                    \
 		: "0"(_num),                                                  \
 		  "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4)              \
-		: "memory", "cc", "at", "v1", "hi", "lo",                     \
-		  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg4 ? -_num : _num;                                                 \
 })
@@ -172,8 +171,7 @@ struct sys_stat_struct {
 		: "=r" (_num), "=r"(_arg4)                                    \
 		: "0"(_num),                                                  \
 		  "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5)  \
-		: "memory", "cc", "at", "v1", "hi", "lo",                     \
-		  "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"  \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
 	);                                                                    \
 	_arg4 ? -_num : _num;                                                 \
 })

From 6d1970e1efc8696bc8c4b8d36c9680882cb2c849 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 22:55:35 +0800
Subject: [PATCH 1728/2047] tools/nolibc: add missing my_syscall6() for mips

It is able to pass the 6th argument like the 5th argument via the stack
for mips, let's add a new my_syscall6() now, see [1] for details:

  The mips/o32 system call convention passes arguments 5 through 8 on
  the user stack.

Both mmap() and pselect6() require my_syscall6().

[1]: https://man7.org/linux/man-pages/man2/syscall.2.html

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-mips.h | 27 ++++++++++++++++++++++++++-
 tools/include/nolibc/nolibc.h    |  9 ++++-----
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 1848f57777c4..a2bfdf57b957 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -166,7 +166,7 @@ struct sys_stat_struct {
 	__asm__ volatile (                                                    \
 		"addiu $sp, $sp, -32\n"                                       \
 		"sw %7, 16($sp)\n"                                            \
-		"syscall\n  "                                                 \
+		"syscall\n"                                                   \
 		"addiu $sp, $sp, 32\n"                                        \
 		: "=r" (_num), "=r"(_arg4)                                    \
 		: "0"(_num),                                                  \
@@ -176,6 +176,31 @@ struct sys_stat_struct {
 	_arg4 ? -_num : _num;                                                 \
 })
 
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+({                                                                            \
+	register long _num __asm__ ("v0")  = (num);                           \
+	register long _arg1 __asm__ ("a0") = (long)(arg1);                    \
+	register long _arg2 __asm__ ("a1") = (long)(arg2);                    \
+	register long _arg3 __asm__ ("a2") = (long)(arg3);                    \
+	register long _arg4 __asm__ ("a3") = (long)(arg4);                    \
+	register long _arg5 = (long)(arg5);                                   \
+	register long _arg6 = (long)(arg6);                                   \
+									      \
+	__asm__ volatile (                                                    \
+		"addiu $sp, $sp, -32\n"                                       \
+		"sw %7, 16($sp)\n"                                            \
+		"sw %8, 20($sp)\n"                                            \
+		"syscall\n"                                                   \
+		"addiu $sp, $sp, 32\n"                                        \
+		: "=r" (_num), "=r"(_arg4)                                    \
+		: "0"(_num),                                                  \
+		  "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+		  "r"(_arg6)                                                  \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
+	);                                                                    \
+	_arg4 ? -_num : _num;                                                 \
+})
+
 char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 05a228a6ee78..1f8d821000ac 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -13,11 +13,10 @@
  * Syscalls are split into 3 levels:
  *   - The lower level is the arch-specific syscall() definition, consisting in
  *     assembly code in compound expressions. These are called my_syscall0() to
- *     my_syscall6() depending on the number of arguments. The MIPS
- *     implementation is limited to 5 arguments. All input arguments are cast
- *     to a long stored in a register. These expressions always return the
- *     syscall's return value as a signed long value which is often either a
- *     pointer or the negated errno value.
+ *     my_syscall6() depending on the number of arguments. All input arguments
+ *     are castto a long stored in a register. These expressions always return
+ *     the syscall's return value as a signed long value which is often either
+ *     a pointer or the negated errno value.
  *
  *   - The second level is mostly architecture-independent. It is made of
  *     static functions called sys_<name>() which rely on my_syscallN()

From 6591be4a73feb955a107c70c7e5b621a97a2eb4b Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 22:56:59 +0800
Subject: [PATCH 1729/2047] tools/nolibc: __sysret: support syscalls who return
 a pointer

No official reference states the errno range, here aligns with musl and
glibc and uses [-MAX_ERRNO, -1] instead of all negative ones.

- musl: src/internal/syscall_ret.c
- glibc: sysdeps/unix/sysv/linux/sysdep.h

The MAX_ERRNO used by musl and glibc is 4095, just like the one nolibc
defined in tools/include/nolibc/errno.h.

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/ZKKdD%2Fp4UkEavru6@1wt.eu/
Suggested-by: David Laight <David.Laight@ACULAB.COM>
Link: https://lore.kernel.org/linux-riscv/94dd5170929f454fbc0a10a2eb3b108d@AcuMS.aculab.com/
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 53bc3ad6593e..3479f54d7957 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -28,13 +28,20 @@
 #include "errno.h"
 #include "types.h"
 
-/* Syscall return helper, set errno as -ret when ret < 0 */
+
+/* Syscall return helper for library routines, set errno as -ret when ret is in
+ * range of [-MAX_ERRNO, -1]
+ *
+ * Note, No official reference states the errno range here aligns with musl
+ * (src/internal/syscall_ret.c) and glibc (sysdeps/unix/sysv/linux/sysdep.h)
+ */
+
 static __inline__ __attribute__((unused, always_inline))
-long __sysret(long ret)
+long __sysret(unsigned long ret)
 {
-	if (ret < 0) {
-		SET_ERRNO(-ret);
-		ret = -1;
+	if (ret >= (unsigned long)-MAX_ERRNO) {
+		SET_ERRNO(-(long)ret);
+		return -1;
 	}
 	return ret;
 }

From 924e9539aeaa89e93268b4a39bac356e4355ab78 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 22:58:20 +0800
Subject: [PATCH 1730/2047] tools/nolibc: clean up mmap() routine

Do several cleanups together:

- Since all supported architectures have my_syscall6() now, remove the
  #ifdef check.

- Move the mmap() related macros to tools/include/nolibc/types.h and
  reuse most of them from <linux/mman.h>

- Apply the new generic __sysret() to convert the calling of sys_map()
  to oneline code

Note, since MAP_FAILED is -1 on Linux, so we can use the generic
__sysret() which returns -1 upon error and still satisfy user land that
checks for MAP_FAILED.

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/20230702192347.GJ16233@1wt.eu/
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h   | 29 ++++++-----------------------
 tools/include/nolibc/types.h |  6 ++++++
 2 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 3479f54d7957..3d01a24e6f7a 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -636,26 +636,11 @@ int mknod(const char *path, mode_t mode, dev_t dev)
 	return __sysret(sys_mknod(path, mode, dev));
 }
 
-#ifndef MAP_SHARED
-#define MAP_SHARED		0x01	/* Share changes */
-#define MAP_PRIVATE		0x02	/* Changes are private */
-#define MAP_SHARED_VALIDATE	0x03	/* share + validate extension flags */
-#endif
-
-#ifndef MAP_FAILED
-#define MAP_FAILED ((void *)-1)
-#endif
-
 #ifndef sys_mmap
 static __attribute__((unused))
 void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
 	       off_t offset)
 {
-#ifndef my_syscall6
-	/* Function not implemented. */
-	return (void *)-ENOSYS;
-#else
-
 	int n;
 
 #if defined(__NR_mmap2)
@@ -666,20 +651,18 @@ void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
 #endif
 
 	return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset);
-#endif
 }
 #endif
 
+/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret()
+ * which returns -1 upon error and still satisfy user land that checks for
+ * MAP_FAILED.
+ */
+
 static __attribute__((unused))
 void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
 {
-	void *ret = sys_mmap(addr, length, prot, flags, fd, offset);
-
-	if ((unsigned long)ret >= -4095UL) {
-		SET_ERRNO(-(long)ret);
-		ret = MAP_FAILED;
-	}
-	return ret;
+	return (void *)__sysret((unsigned long)sys_mmap(addr, length, prot, flags, fd, offset));
 }
 
 static __attribute__((unused))
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
index f96e28bff4ba..5e1bac8509ec 100644
--- a/tools/include/nolibc/types.h
+++ b/tools/include/nolibc/types.h
@@ -8,6 +8,7 @@
 #define _NOLIBC_TYPES_H
 
 #include "std.h"
+#include <linux/mman.h>
 #include <linux/time.h>
 #include <linux/stat.h>
 
@@ -81,6 +82,11 @@
 #define MAXPATHLEN     (PATH_MAX)
 #endif
 
+/* flags for mmap */
+#ifndef MAP_FAILED
+#define MAP_FAILED ((void *)-1)
+#endif
+
 /* whence values for lseek() */
 #define SEEK_SET       0
 #define SEEK_CUR       1

From 4201cfce15fe35d9d90ae870aba12eb84c0452ab Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 22:59:53 +0800
Subject: [PATCH 1731/2047] tools/nolibc: clean up sbrk() routine

Fix up the error reported by scripts/checkpatch.pl:

    ERROR: do not use assignment in if condition
    #95: FILE: tools/include/nolibc/sys.h:95:
    +	if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))

Apply the new generic __sysret() to merge the SET_ERRNO() and return
lines.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 3d01a24e6f7a..61a3204b00d7 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -89,14 +89,13 @@ int brk(void *addr)
 static __attribute__((unused))
 void *sbrk(intptr_t inc)
 {
-	void *ret;
-
 	/* first call to find current end */
-	if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))
+	void *ret = sys_brk(0);
+
+	if (ret && sys_brk(ret + inc) == ret + inc)
 		return ret + inc;
 
-	SET_ERRNO(ENOMEM);
-	return (void *)-1;
+	return (void *)__sysret(-ENOMEM);
 }
 
 
From 938b5b983330a55af8e31eabf329db7dbc7a5038 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 23:01:09 +0800
Subject: [PATCH 1732/2047] selftests/nolibc: export argv0 for some tests

argv0 is the path to nolibc-test program itself, which is a very good
always existing readable file for some tests, let's export it.

Note, the path may be absolute or relative, please make sure the tests
work with both of them. If it is relative, we must make sure the current
path is the one specified by the PWD environment variable.

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/ZKKbS3cwKcHgnGwu@1wt.eu/
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index c02d89953679..1e4a39548f26 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -43,6 +43,9 @@
 /* will be used by nolibc by getenv() */
 char **environ;
 
+/* will be used by some test cases as readable file, please don't write it */
+static const char *argv0;
+
 /* definition of a series of tests */
 struct test {
 	const char *name;              /* test name */
@@ -948,6 +951,7 @@ int main(int argc, char **argv, char **envp)
 	int idx;
 	char *test;
 
+	argv0 = argv[0];
 	environ = envp;
 
 	/* when called as init, it's possible that no console was opened, for

From 82e339c23036d0abefa7ef68d8fb134b0a7f14f4 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 23:02:16 +0800
Subject: [PATCH 1733/2047] selftests/nolibc: prepare: create /dev/zero

/dev/zero is commonly used to allocate anonymous memory, it is a very
good file for tests, let's prepare it.

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/20230702193306.GK16233@1wt.eu/
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 1e4a39548f26..24db3be08a15 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -894,11 +894,13 @@ int prepare(void)
 	 */
 	if (stat("/dev/.", &stat_buf) == 0 || mkdir("/dev", 0755) == 0) {
 		if (stat("/dev/console", &stat_buf) != 0 ||
-		    stat("/dev/null", &stat_buf) != 0) {
+		    stat("/dev/null", &stat_buf) != 0 ||
+		    stat("/dev/zero", &stat_buf) != 0) {
 			/* try devtmpfs first, otherwise fall back to manual creation */
 			if (mount("/dev", "/dev", "devtmpfs", 0, 0) != 0) {
 				mknod("/dev/console", 0600 | S_IFCHR, makedev(5, 1));
 				mknod("/dev/null",    0666 | S_IFCHR, makedev(1, 3));
+				mknod("/dev/zero",    0666 | S_IFCHR, makedev(1, 5));
 			}
 		}
 	}

From 29f5540be3925d76ef6257d23b1782cdbf55c94f Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 23:03:26 +0800
Subject: [PATCH 1734/2047] selftests/nolibc: add EXPECT_PTREQ, EXPECT_PTRNE
 and EXPECT_PTRER

The syscalls like sbrk() and mmap() return pointers, to test them, more
pointer compare test macros are required, add them:

- EXPECT_PTREQ() expects two equal pointers.
- EXPECT_PTRNE() expects two non-equal pointers.
- EXPECT_PTRER() expects failure with a specified errno.
- EXPECT_PTRER2() expects failure with one of two specified errnos.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 58 ++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 24db3be08a15..ea22359d3873 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -364,6 +364,64 @@ static int expect_ptrnz(const void *expr, int llen)
 	return ret;
 }
 
+#define EXPECT_PTREQ(cond, expr, cmp)				\
+	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptreq(expr, llen, cmp); } while (0)
+
+static int expect_ptreq(const void *expr, int llen, const void *cmp)
+{
+	int ret = 0;
+
+	llen += printf(" = <%p> ", expr);
+	if (expr != cmp) {
+		ret = 1;
+		llen += pad_spc(llen, 64, "[FAIL]\n");
+	} else {
+		llen += pad_spc(llen, 64, " [OK]\n");
+	}
+	return ret;
+}
+
+#define EXPECT_PTRNE(cond, expr, cmp)				\
+	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrne(expr, llen, cmp); } while (0)
+
+static int expect_ptrne(const void *expr, int llen, const void *cmp)
+{
+	int ret = 0;
+
+	llen += printf(" = <%p> ", expr);
+	if (expr == cmp) {
+		ret = 1;
+		llen += pad_spc(llen, 64, "[FAIL]\n");
+	} else {
+		llen += pad_spc(llen, 64, " [OK]\n");
+	}
+	return ret;
+}
+
+#define EXPECT_PTRER2(cond, expr, expret, experr1, experr2)		\
+	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrerr2(expr, expret, experr1, experr2, llen); } while (0)
+
+#define EXPECT_PTRER(cond, expr, expret, experr)			\
+	EXPECT_PTRER2(cond, expr, expret, experr, 0)
+
+static int expect_ptrerr2(const void *expr, const void *expret, int experr1, int experr2, int llen)
+{
+	int ret = 0;
+	int _errno = errno;
+
+	llen += printf(" = <%p> %s ", expr, errorname(_errno));
+	if (expr != expret || (_errno != experr1 && _errno != experr2)) {
+		ret = 1;
+		if (experr2 == 0)
+			llen += printf(" != (<%p> %s) ", expret, errorname(experr1));
+		else
+			llen += printf(" != (<%p> %s %s) ", expret, errorname(experr1), errorname(experr2));
+		llen += pad_spc(llen, 64, "[FAIL]\n");
+	} else {
+		llen += pad_spc(llen, 64, " [OK]\n");
+	}
+	return ret;
+}
 
 #define EXPECT_STRZR(cond, expr)				\
 	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strzr(expr, llen); } while (0)

From f193ecbff0effc06190036d92ec2fa582d9e6824 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 23:04:38 +0800
Subject: [PATCH 1735/2047] selftests/nolibc: add sbrk_0 to test current brk
 getting

>From musl 0.9.14 (to the latest version 1.2.3), both sbrk() and brk()
have almost been disabled for they conflict with malloc, only sbrk(0) is
still permitted as a way to get the current location of the program
break, let's support such case.

EXPECT_PTRNE() is used to expect sbrk() always successfully getting the
current break.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index ea22359d3873..5e17e7907379 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -633,6 +633,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(kill_0);            EXPECT_SYSZR(1, kill(getpid(), 0)); break;
 		CASE_TEST(kill_CONT);         EXPECT_SYSZR(1, kill(getpid(), 0)); break;
 		CASE_TEST(kill_BADPID);       EXPECT_SYSER(1, kill(INT_MAX, 0), -1, ESRCH); break;
+		CASE_TEST(sbrk_0);            EXPECT_PTRNE(1, sbrk(0), (void *)-1); break;
 		CASE_TEST(sbrk);              if ((p1 = p2 = sbrk(4096)) != (void *)-1) p2 = sbrk(-4096); EXPECT_SYSZR(1, (p2 == (void *)-1) || p2 == p1); break;
 		CASE_TEST(brk);               EXPECT_SYSZR(1, brk(sbrk(0))); break;
 		CASE_TEST(chdir_root);        EXPECT_SYSZR(1, chdir("/")); break;

From d4a3b2b99810ef398c352585edff2c23b17ef86d Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 23:05:49 +0800
Subject: [PATCH 1736/2047] selftests/nolibc: add mmap_bad test case

The length argument of mmap() must be greater than 0, passing a zero
length argument expects failure with -EINVAL.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 5e17e7907379..39c145b5e03d 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -669,6 +669,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(lseek_m1);          EXPECT_SYSER(1, lseek(-1, 0, SEEK_SET), -1, EBADF); break;
 		CASE_TEST(lseek_0);           EXPECT_SYSER(1, lseek(0, 0, SEEK_SET), -1, ESPIPE); break;
 		CASE_TEST(mkdir_root);        EXPECT_SYSER(1, mkdir("/", 0755), -1, EEXIST); break;
+		CASE_TEST(mmap_bad);          EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break;
 		CASE_TEST(open_tty);          EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break;
 		CASE_TEST(open_blah);         EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break;
 		CASE_TEST(poll_null);         EXPECT_SYSZR(1, poll(NULL, 0, 0)); break;

From ba3d0892be0e2d5a7d144c5519920130639524fc Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 23:07:13 +0800
Subject: [PATCH 1737/2047] selftests/nolibc: add munmap_bad test case

The addr argument of munmap() must be a multiple of the page size,
passing invalid (void *)1 addr expects failure with -EINVAL.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 39c145b5e03d..a11fec00fd01 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -670,6 +670,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(lseek_0);           EXPECT_SYSER(1, lseek(0, 0, SEEK_SET), -1, ESPIPE); break;
 		CASE_TEST(mkdir_root);        EXPECT_SYSER(1, mkdir("/", 0755), -1, EEXIST); break;
 		CASE_TEST(mmap_bad);          EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break;
+		CASE_TEST(munmap_bad);        EXPECT_SYSER(1, munmap((void *)1, 0), -1, EINVAL); break;
 		CASE_TEST(open_tty);          EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break;
 		CASE_TEST(open_blah);         EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break;
 		CASE_TEST(poll_null);         EXPECT_SYSZR(1, poll(NULL, 0, 0)); break;

From fcdbf5dda418aad3fdc5fcedbdb41900a24178a8 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Fri, 7 Jul 2023 23:08:20 +0800
Subject: [PATCH 1738/2047] selftests/nolibc: add mmap_munmap_good test case
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mmap() a file with a good offset and then munmap() it. a non-zero offset
is passed to test the 6th argument of my_syscall6().

Note, it is not easy to find a unique file for mmap() in different
scenes, so, a file list is used to search the right one:

- /dev/zero: is commonly used to allocate anonymous memory and is likely
  present and readable

- /proc/1/exe: for 'run' and 'run-user' target, 'run-user' can not find
  '/proc/self/exe'

- /proc/self/exe: for 'libc-test' target, normal program 'libc-test' has
  no permission to access '/proc/1/exe'

- argv0: the path of the program itself, let it pass even with worst
  case scene: no procfs and no /dev/zero

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/20230702193306.GK16233@1wt.eu/
Suggested-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/lkml/bff82ea6-610b-4471-a28b-6c76c28604a6@t-8ch.de/
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 60 ++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index a11fec00fd01..84bcc725d8d6 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -595,6 +595,65 @@ static int test_stat_timestamps(void)
 	return 0;
 }
 
+int test_mmap_munmap(void)
+{
+	int ret, fd, i;
+	void *mem;
+	size_t page_size, file_size, length;
+	off_t offset, pa_offset;
+	struct stat stat_buf;
+	const char * const files[] = {
+		"/dev/zero",
+		"/proc/1/exe", "/proc/self/exe",
+		argv0,
+		NULL
+	};
+
+	page_size = getpagesize();
+	if (page_size < 0)
+		return -1;
+
+	/* find a right file to mmap, existed and accessible */
+	for (i = 0; files[i] != NULL; i++) {
+		ret = fd = open(files[i], O_RDONLY);
+		if (ret == -1)
+			continue;
+		else
+			break;
+	}
+	if (ret == -1)
+		return ret;
+
+	ret = stat(files[i], &stat_buf);
+	if (ret == -1)
+		goto end;
+
+	/* file size of the special /dev/zero is 0, let's assign one manually */
+	if (i == 0)
+		file_size = 3*page_size;
+	else
+		file_size = stat_buf.st_size;
+
+	offset = file_size - 1;
+	if (offset < 0)
+		offset = 0;
+	length = file_size - offset;
+	pa_offset = offset & ~(page_size - 1);
+
+	mem = mmap(NULL, length + offset - pa_offset, PROT_READ, MAP_SHARED, fd, pa_offset);
+	if (mem == MAP_FAILED) {
+		ret = -1;
+		goto end;
+	}
+
+	ret = munmap(mem, length + offset - pa_offset);
+
+end:
+	close(fd);
+	return ret;
+}
+
+
 /* Run syscall tests between IDs <min> and <max>.
  * Return 0 on success, non-zero on failure.
  */
@@ -671,6 +730,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(mkdir_root);        EXPECT_SYSER(1, mkdir("/", 0755), -1, EEXIST); break;
 		CASE_TEST(mmap_bad);          EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break;
 		CASE_TEST(munmap_bad);        EXPECT_SYSER(1, munmap((void *)1, 0), -1, EINVAL); break;
+		CASE_TEST(mmap_munmap_good);  EXPECT_SYSZR(1, test_mmap_munmap()); break;
 		CASE_TEST(open_tty);          EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break;
 		CASE_TEST(open_blah);         EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break;
 		CASE_TEST(poll_null);         EXPECT_SYSZR(1, poll(NULL, 0, 0)); break;

From cfb672f94f6e42ed8a472a1340afc2b41d265731 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:22:39 +0800
Subject: [PATCH 1739/2047] selftests/nolibc: add run-libc-test target

allow run and report glibc or musl based libc-test.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 000621f21adc..d408b688b291 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -132,6 +132,10 @@ nolibc-test: nolibc-test.c sysroot/$(ARCH)/include
 libc-test: nolibc-test.c
 	$(QUIET_CC)$(CC) -o $@ $<
 
+run-libc-test: libc-test
+	$(Q)./libc-test > "$(CURDIR)/run.out" || :
+	$(Q)$(REPORT) $(CURDIR)/run.out
+
 # qemu user-land test
 run-user: nolibc-test
 	$(Q)qemu-$(QEMU_ARCH) ./nolibc-test > "$(CURDIR)/run.out" || :

From 46cf630c53f359bb9cfe7f487c3a878068fd373d Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:23:45 +0800
Subject: [PATCH 1740/2047] selftests/nolibc: stat_fault: silence NULL argument
 warning with glibc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use another invalid address (void *)1 instead of NULL to silence this
compile warning with glibc:

    $ make libc-test
      CC      libc-test
    nolibc-test.c: In function ‘run_syscall’:
    nolibc-test.c:622:49: warning: null argument where non-null required (argument 1) [-Wnonnull]
      622 |   CASE_TEST(stat_fault);        EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break;
          |                                                 ^~~~
    nolibc-test.c:304:79: note: in definition of macro ‘EXPECT_SYSER2’
      304 |  do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syserr2(expr, expret, experr1, experr2, llen); } while (0)
          |                                                                               ^~~~
    nolibc-test.c:622:33: note: in expansion of macro ‘EXPECT_SYSER’
      622 |   CASE_TEST(stat_fault);        EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break;

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 84bcc725d8d6..78060329ce2a 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -743,7 +743,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(select_stdout);     EXPECT_SYSNE(1, ({ fd_set fds; FD_ZERO(&fds); FD_SET(1, &fds); select(2, NULL, &fds, NULL, NULL); }), -1); break;
 		CASE_TEST(select_fault);      EXPECT_SYSER(1, select(1, (void *)1, NULL, NULL, 0), -1, EFAULT); break;
 		CASE_TEST(stat_blah);         EXPECT_SYSER(1, stat("/proc/self/blah", &stat_buf), -1, ENOENT); break;
-		CASE_TEST(stat_fault);        EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break;
+		CASE_TEST(stat_fault);        EXPECT_SYSER(1, stat((void *)1, &stat_buf), -1, EFAULT); break;
 		CASE_TEST(stat_timestamps);   EXPECT_SYSZR(1, test_stat_timestamps()); break;
 		CASE_TEST(symlink_root);      EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break;
 		CASE_TEST(unlink_root);       EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break;

From 79b4f68e9e245dba34f7acc4e1e26d8df5f9a4f8 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:25:11 +0800
Subject: [PATCH 1741/2047] selftests/nolibc: gettid: restore for glibc and
 musl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As the gettid manpage [1] shows, glibc 2.30 has gettid support, so,
let's enable the test for glibc >= 2.30.

gettid works on musl too.

[1]: https://man7.org/linux/man-pages/man2/gettid.2.html

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 78060329ce2a..d51eac56ceac 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -668,6 +668,7 @@ int run_syscall(int min, int max)
 	int tmp;
 	int ret = 0;
 	void *p1, *p2;
+	int has_gettid = 1;
 
 	/* <proc> indicates whether or not /proc is mounted */
 	proc = stat("/proc", &stat_buf) == 0;
@@ -675,6 +676,11 @@ int run_syscall(int min, int max)
 	/* this will be used to skip certain tests that can't be run unprivileged */
 	euid0 = geteuid() == 0;
 
+	/* from 2.30, glibc provides gettid() */
+#if defined(__GLIBC_MINOR__) && defined(__GLIBC__)
+	has_gettid = __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 30);
+#endif
+
 	for (test = min; test >= 0 && test <= max; test++) {
 		int llen = 0; /* line length */
 
@@ -684,9 +690,7 @@ int run_syscall(int min, int max)
 		switch (test + __LINE__ + 1) {
 		CASE_TEST(getpid);            EXPECT_SYSNE(1, getpid(), -1); break;
 		CASE_TEST(getppid);           EXPECT_SYSNE(1, getppid(), -1); break;
-#ifdef NOLIBC
-		CASE_TEST(gettid);            EXPECT_SYSNE(1, gettid(), -1); break;
-#endif
+		CASE_TEST(gettid);            EXPECT_SYSNE(has_gettid, gettid(), -1); break;
 		CASE_TEST(getpgid_self);      EXPECT_SYSNE(1, getpgid(0), -1); break;
 		CASE_TEST(getpgid_bad);       EXPECT_SYSER(1, getpgid(-1), -1, ESRCH); break;
 		CASE_TEST(kill_0);            EXPECT_SYSZR(1, kill(getpid(), 0)); break;

From 5f2de00e2c9ce00708bbf24f70ceda2ed35e9780 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:26:24 +0800
Subject: [PATCH 1742/2047] selftests/nolibc: add _LARGEFILE64_SOURCE for musl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_GNU_SOURCE Implies _LARGEFILE64_SOURCE in glibc, but in musl, the
default configuration doesn't enable _LARGEFILE64_SOURCE.

>From include/dirent.h of musl, getdents64 is provided as getdents when
_LARGEFILE64_SOURCE is defined.

    #if defined(_LARGEFILE64_SOURCE)
    ...
    #define getdents64 getdents
    #endif

Let's define _LARGEFILE64_SOURCE to fix up this compile error:

    tools/testing/selftests/nolibc/nolibc-test.c: In function ‘test_getdents64’:
    tools/testing/selftests/nolibc/nolibc-test.c:453:8: warning: implicit declaration of function ‘getdents64’; did you mean ‘getdents’? [-Wimplicit-function-declaration]
      453 |  ret = getdents64(fd, (void *)buffer, sizeof(buffer));
          |        ^~~~~~~~~~
          |        getdents
    /usr/bin/ld: /tmp/ccKILm5u.o: in function `test_getdents64':
    nolibc-test.c:(.text+0xe3e): undefined reference to `getdents64'
    collect2: error: ld returned 1 exit status

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index d51eac56ceac..60bbc272cdcf 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
 #define _GNU_SOURCE
+#define _LARGEFILE64_SOURCE
 
 /* libc-specific include files
  * The program may be built in 3 ways:

From 989abf1c7bc590969ef1b19b8e64b69423bfacc7 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:27:40 +0800
Subject: [PATCH 1743/2047] selftests/nolibc: fix up int_fast16/32_t test cases
 for musl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

musl limits the fast signed int in 32bit, but glibc and nolibc don't, to
let such test cases work on musl, let's provide the type based
SINT_MAX_OF_TYPE(type) and SINT_MIN_OF_TYPE(type).

Suggested-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/lkml/bc635c4f-67fe-4e86-bfdf-bcb4879b928d@t-8ch.de/
Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 60bbc272cdcf..bc7a54cef42a 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -41,6 +41,10 @@
 #endif
 #endif
 
+/* for the type of int_fast16_t and int_fast32_t, musl differs from glibc and nolibc */
+#define SINT_MAX_OF_TYPE(type) (((type)1 << (sizeof(type) * 8 - 2)) - (type)1 + ((type)1 << (sizeof(type) * 8 - 2)))
+#define SINT_MIN_OF_TYPE(type) (-SINT_MAX_OF_TYPE(type) - 1)
+
 /* will be used by nolibc by getenv() */
 char **environ;
 
@@ -828,11 +832,11 @@ int run_stdlib(int min, int max)
 		CASE_TEST(limit_int_fast8_max);     EXPECT_EQ(1, INT_FAST8_MAX,    (int_fast8_t)     0x7f); break;
 		CASE_TEST(limit_int_fast8_min);     EXPECT_EQ(1, INT_FAST8_MIN,    (int_fast8_t)     0x80); break;
 		CASE_TEST(limit_uint_fast8_max);    EXPECT_EQ(1, UINT_FAST8_MAX,   (uint_fast8_t)    0xff); break;
-		CASE_TEST(limit_int_fast16_min);    EXPECT_EQ(1, INT_FAST16_MIN,   (int_fast16_t)    INTPTR_MIN); break;
-		CASE_TEST(limit_int_fast16_max);    EXPECT_EQ(1, INT_FAST16_MAX,   (int_fast16_t)    INTPTR_MAX); break;
+		CASE_TEST(limit_int_fast16_min);    EXPECT_EQ(1, INT_FAST16_MIN,   (int_fast16_t)    SINT_MIN_OF_TYPE(int_fast16_t)); break;
+		CASE_TEST(limit_int_fast16_max);    EXPECT_EQ(1, INT_FAST16_MAX,   (int_fast16_t)    SINT_MAX_OF_TYPE(int_fast16_t)); break;
 		CASE_TEST(limit_uint_fast16_max);   EXPECT_EQ(1, UINT_FAST16_MAX,  (uint_fast16_t)   UINTPTR_MAX); break;
-		CASE_TEST(limit_int_fast32_min);    EXPECT_EQ(1, INT_FAST32_MIN,   (int_fast32_t)    INTPTR_MIN); break;
-		CASE_TEST(limit_int_fast32_max);    EXPECT_EQ(1, INT_FAST32_MAX,   (int_fast32_t)    INTPTR_MAX); break;
+		CASE_TEST(limit_int_fast32_min);    EXPECT_EQ(1, INT_FAST32_MIN,   (int_fast32_t)    SINT_MIN_OF_TYPE(int_fast32_t)); break;
+		CASE_TEST(limit_int_fast32_max);    EXPECT_EQ(1, INT_FAST32_MAX,   (int_fast32_t)    SINT_MAX_OF_TYPE(int_fast32_t)); break;
 		CASE_TEST(limit_uint_fast32_max);   EXPECT_EQ(1, UINT_FAST32_MAX,  (uint_fast32_t)   UINTPTR_MAX); break;
 		CASE_TEST(limit_int_fast64_min);    EXPECT_EQ(1, INT_FAST64_MIN,   (int_fast64_t)    INT64_MIN); break;
 		CASE_TEST(limit_int_fast64_max);    EXPECT_EQ(1, INT_FAST64_MAX,   (int_fast64_t)    INT64_MAX); break;

From 788aca91ab5e4cccae158aac03a1d653c6da2fde Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:29:30 +0800
Subject: [PATCH 1744/2047] tools/nolibc: types.h: add RB_ flags for reboot()

Both glibc and musl provide RB_ flags via <sys/reboot.h> for reboot(),
they don't need to include <linux/reboot.h>, let nolibc provide RB_
flags too.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h   |  1 -
 tools/include/nolibc/types.h | 12 +++++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 61a3204b00d7..804bc0231ec7 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -21,7 +21,6 @@
 #include <linux/auxvec.h>
 #include <linux/fcntl.h> /* for O_* and AT_* */
 #include <linux/stat.h>  /* for statx() */
-#include <linux/reboot.h> /* for LINUX_REBOOT_* */
 #include <linux/prctl.h>
 
 #include "arch.h"
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
index 5e1bac8509ec..23963e48d8ee 100644
--- a/tools/include/nolibc/types.h
+++ b/tools/include/nolibc/types.h
@@ -9,8 +9,9 @@
 
 #include "std.h"
 #include <linux/mman.h>
-#include <linux/time.h>
+#include <linux/reboot.h> /* for LINUX_REBOOT_* */
 #include <linux/stat.h>
+#include <linux/time.h>
 
 
 /* Only the generic macros and types may be defined here. The arch-specific
@@ -92,6 +93,15 @@
 #define SEEK_CUR       1
 #define SEEK_END       2
 
+/* flags for reboot */
+#define RB_AUTOBOOT     LINUX_REBOOT_CMD_RESTART
+#define RB_HALT_SYSTEM  LINUX_REBOOT_CMD_HALT
+#define RB_ENABLE_CAD   LINUX_REBOOT_CMD_CAD_ON
+#define RB_DISABLE_CAD  LINUX_REBOOT_CMD_CAD_OFF
+#define RB_POWER_OFF    LINUX_REBOOT_CMD_POWER_OFF
+#define RB_SW_SUSPEND   LINUX_REBOOT_CMD_SW_SUSPEND
+#define RB_KEXEC        LINUX_REBOOT_CMD_KEXEC
+
 /* Macros used on waitpid()'s return status */
 #define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
 #define WIFEXITED(status)   (((status) & 0x7f) == 0)

From 950add280c0b2221f2f28d0595c9c82f40cfe3ed Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:30:58 +0800
Subject: [PATCH 1745/2047] selftests/nolibc: prefer <sys/reboot.h> to
 <linux/reboot.h>

Since both glibc and musl provide RB_ flags via <sys/reboot.h>, and we
just add RB_ flags for nolibc, let's use RB_ flags instead of
LINUX_REBOOT_ flags and only reserve the required <sys/reboot.h> header.

This allows compile libc-test for musl libc without the linux headers.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index bc7a54cef42a..dc080add9637 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -15,7 +15,6 @@
 #include <string.h>
 #ifndef _NOLIBC_STDIO_H
 /* standard libcs need more includes */
-#include <linux/reboot.h>
 #include <sys/io.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
@@ -1181,7 +1180,7 @@ int main(int argc, char **argv, char **envp)
 		 */
 		printf("Leaving init with final status: %d\n", !!ret);
 		if (ret == 0)
-			reboot(LINUX_REBOOT_CMD_POWER_OFF);
+			reboot(RB_POWER_OFF);
 #if defined(__x86_64__)
 		/* QEMU started with "-device isa-debug-exit -no-reboot" will
 		 * exit with status code 2N+1 when N is written to 0x501. We

From c388c9920da2679f62bec48d00ca9e80e9d0a364 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:32:05 +0800
Subject: [PATCH 1746/2047] selftests/nolibc: fix up kernel parameters support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kernel parameters allow pass two types of strings, one type is like
'noapic', another type is like 'panic=5', the first type is passed as
arguments of the init program, the second type is passed as environment
variables of the init program.

when users pass kernel parameters like this:

    noapic NOLIBC_TEST=syscall

our nolibc-test program will use the test setting from argv[1] and
ignore the one from NOLIBC_TEST environment variable, and at last, it
will print the following line and ignore the whole test setting.

    Ignoring unknown test name 'noapic'

reversing the parsing order does solve the above issue:

    test = getenv("NOLIBC_TEST");
    if (test)
        test = argv[1];

but it still doesn't work with such kernel parameters (without
NOLIBC_TEST environment variable):

    noapic FOO=bar

To support all of the potential kernel parameters, let's verify the test
setting from both of argv[1] and NOLIBC_TEST environment variable.

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 33 ++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index dc080add9637..c15a4211ff26 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -1073,6 +1073,35 @@ static const struct test test_names[] = {
 	{ 0 }
 };
 
+int is_setting_valid(char *test)
+{
+	int idx, len, test_len, valid = 0;
+	char delimiter;
+
+	if (!test)
+		return valid;
+
+	test_len = strlen(test);
+
+	for (idx = 0; test_names[idx].name; idx++) {
+		len = strlen(test_names[idx].name);
+		if (test_len < len)
+			continue;
+
+		if (strncmp(test, test_names[idx].name, len) != 0)
+			continue;
+
+		delimiter = test[len];
+		if (delimiter != ':' && delimiter != ',' && delimiter != '\0')
+			continue;
+
+		valid = 1;
+		break;
+	}
+
+	return valid;
+}
+
 int main(int argc, char **argv, char **envp)
 {
 	int min = 0;
@@ -1099,10 +1128,10 @@ int main(int argc, char **argv, char **envp)
 	 *    syscall:5-15[:.*],stdlib:8-10
 	 */
 	test = argv[1];
-	if (!test)
+	if (!is_setting_valid(test))
 		test = getenv("NOLIBC_TEST");
 
-	if (test) {
+	if (is_setting_valid(test)) {
 		char *comma, *colon, *dash, *value;
 
 		do {

From f7a419e35ba3c6301fa096370548599853ef2b5d Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:33:13 +0800
Subject: [PATCH 1747/2047] selftests/nolibc: link_cross: use
 /proc/self/cmdline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For CONFIG_NET=n, there would be no /proc/self/net, so, use
/proc/self/cmdline instead.

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index c15a4211ff26..76b1f7a27c86 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -732,7 +732,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(link_root1);        EXPECT_SYSER(1, link("/", "/"), -1, EEXIST); break;
 		CASE_TEST(link_blah);         EXPECT_SYSER(1, link("/proc/self/blah", "/blah"), -1, ENOENT); break;
 		CASE_TEST(link_dir);          EXPECT_SYSER(euid0, link("/", "/blah"), -1, EPERM); break;
-		CASE_TEST(link_cross);        EXPECT_SYSER(proc, link("/proc/self/net", "/blah"), -1, EXDEV); break;
+		CASE_TEST(link_cross);        EXPECT_SYSER(proc, link("/proc/self/cmdline", "/blah"), -1, EXDEV); break;
 		CASE_TEST(lseek_m1);          EXPECT_SYSER(1, lseek(-1, 0, SEEK_SET), -1, EBADF); break;
 		CASE_TEST(lseek_0);           EXPECT_SYSER(1, lseek(0, 0, SEEK_SET), -1, ESPIPE); break;
 		CASE_TEST(mkdir_root);        EXPECT_SYSER(1, mkdir("/", 0755), -1, EEXIST); break;

From f4191f3d524289080284fa3a48ccd096b319d280 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:34:21 +0800
Subject: [PATCH 1748/2047] tools/nolibc: add rmdir() support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

a reverse operation of mkdir() is meaningful, add rmdir() here.

required by nolibc-test to remove /proc while CONFIG_PROC_FS is not
enabled.

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 804bc0231ec7..dee56894a811 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -611,6 +611,28 @@ int mkdir(const char *path, mode_t mode)
 	return __sysret(sys_mkdir(path, mode));
 }
 
+/*
+ * int rmdir(const char *path);
+ */
+
+static __attribute__((unused))
+int sys_rmdir(const char *path)
+{
+#ifdef __NR_rmdir
+	return my_syscall1(__NR_rmdir, path);
+#elif defined(__NR_unlinkat)
+	return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR);
+#else
+	return -ENOSYS;
+#endif
+}
+
+static __attribute__((unused))
+int rmdir(const char *path)
+{
+	return __sysret(sys_rmdir(path));
+}
+
 
 /*
  * int mknod(const char *path, mode_t mode, dev_t dev);

From 4e14e84442fe59fbe677ff4c328a3075bf514e26 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:35:38 +0800
Subject: [PATCH 1749/2047] selftests/nolibc: add a new rmdir() test case
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A new rmdir_blah test case is added to remove a non-existing /blah,
which expects failure with ENOENT errno.

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 76b1f7a27c86..bb39a1e7fd46 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -746,6 +746,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(poll_fault);        EXPECT_SYSER(1, poll((void *)1, 1, 0), -1, EFAULT); break;
 		CASE_TEST(prctl);             EXPECT_SYSER(1, prctl(PR_SET_NAME, (unsigned long)NULL, 0, 0, 0), -1, EFAULT); break;
 		CASE_TEST(read_badf);         EXPECT_SYSER(1, read(-1, &tmp, 1), -1, EBADF); break;
+		CASE_TEST(rmdir_blah);        EXPECT_SYSER(1, rmdir("/blah"), -1, ENOENT); break;
 		CASE_TEST(sched_yield);       EXPECT_SYSZR(1, sched_yield()); break;
 		CASE_TEST(select_null);       EXPECT_SYSZR(1, ({ struct timeval tv = { 0 }; select(0, NULL, NULL, NULL, &tv); })); break;
 		CASE_TEST(select_stdout);     EXPECT_SYSNE(1, ({ fd_set fds; FD_ZERO(&fds); FD_SET(1, &fds); select(2, NULL, &fds, NULL, NULL); }), -1); break;

From b8b26108e4d5a0d004393e8a53d374b2b076ba20 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:37:45 +0800
Subject: [PATCH 1750/2047] selftests/nolibc: fix up failures when
 CONFIG_PROC_FS=n
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For CONFIG_PROC_FS=n, the /proc is not mountable, but the /proc
directory has been created in the prepare() stage whenever /proc is
there or not.

so, the checking of /proc in the run_syscall() stage will be always true
and at last it will fail all of the procfs dependent test cases, which
deviates from the 'cond' check design of the EXPECT_xx macros, without
procfs, these test cases should be skipped instead of failed.

To solve this issue, one method is checking /proc/self instead of /proc,
another method is removing the /proc directory completely for
CONFIG_PROC_FS=n, we apply the second method to avoid misleading the
users.

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index bb39a1e7fd46..55cc2296c292 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -1057,8 +1057,11 @@ int prepare(void)
 
 	/* try to mount /proc if not mounted. Silently fail otherwise */
 	if (stat("/proc/.", &stat_buf) == 0 || mkdir("/proc", 0755) == 0) {
-		if (stat("/proc/self", &stat_buf) != 0)
-			mount("/proc", "/proc", "proc", 0, 0);
+		if (stat("/proc/self", &stat_buf) != 0) {
+			/* If not mountable, remove /proc completely to avoid misuse */
+			if (mount("none", "/proc", "proc", 0, 0) != 0)
+				rmdir("/proc");
+		}
 	}
 
 	return 0;

From bbb14546bd22b1c41af7182d085abe89ae21eecd Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:38:57 +0800
Subject: [PATCH 1751/2047] selftests/nolibc: prepare /tmp for tests that need
 to write
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

create a /tmp directory. If it succeeds, the directory is writable,
which is normally the case when booted from an initramfs anyway.

This will be used instead of procfs for some tests.

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Link: https://lore.kernel.org/lkml/20230710050600.9697-1-falcon@tinylab.org/
[wt: removed the unneeded mount() call]
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 55cc2296c292..bc2f40c9cee6 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -1064,6 +1064,9 @@ int prepare(void)
 		}
 	}
 
+	/* some tests rely on a writable /tmp */
+	mkdir("/tmp", 0755);
+
 	return 0;
 }
 

From 6861b1a3398ec3f1b830d7d190a6ffbc3be8ab6b Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:40:08 +0800
Subject: [PATCH 1752/2047] selftests/nolibc: vfprintf: remove MEMFD_CREATE
 dependency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The vfprintf test case require to open a temporary file to write, the
old memfd_create() method is perfect but has strong dependency on
MEMFD_CREATE and also TMPFS or HUGETLBFS (see fs/Kconfig):

    config MEMFD_CREATE
	def_bool TMPFS || HUGETLBFS

And from v6.2, MFD_NOEXEC_SEAL must be passed for the non-executable
memfd, otherwise, The kernel warning will be output to the test result
like this:

        Running test 'vfprintf'
        0 emptymemfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL, pid=1 'init'
         "" = ""                                                  [OK]

To avoid such warning and also to remove the MEMFD_CREATE dependency,
let's open a file from tmpfs directly.

The /tmp directory is used to detect the existing of tmpfs, if not
there, skip instead of fail.

And further, for pid == 1, the initramfs is loaded as ramfs, which can
be used as tmpfs, so, it is able to further remove TMPFS dependency too.

Suggested-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/lkml/9ad51430-b7c0-47dc-80af-20c86539498d@t-8ch.de
Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index bc2f40c9cee6..ad1f6358b253 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -876,10 +876,10 @@ static int expect_vfprintf(int llen, size_t c, const char *expected, const char
 	FILE *memfile;
 	va_list args;
 
-	fd = memfd_create("vfprintf", 0);
+	fd = open("/tmp", O_TMPFILE | O_EXCL | O_RDWR, 0600);
 	if (fd == -1) {
-		pad_spc(llen, 64, "[FAIL]\n");
-		return 1;
+		pad_spc(llen, 64, "[SKIPPED]\n");
+		return 0;
 	}
 
 	memfile = fdopen(fd, "w+");

From 38fc0a3553ced0158d97047f5fb3b73898226a3c Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:42:01 +0800
Subject: [PATCH 1753/2047] selftests/nolibc: chdir_root: restore current path
 after test

The PWD environment variable has the path of the nolibc-test program,
the current path must be the same as it, otherwise, the test cases will
fail with relative path (e.g. ./nolibc-test).

Since only chdir_root really changes the current path, let's restore it
with the PWD environment variable.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index ad1f6358b253..64fca7c6ca49 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -703,7 +703,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(sbrk_0);            EXPECT_PTRNE(1, sbrk(0), (void *)-1); break;
 		CASE_TEST(sbrk);              if ((p1 = p2 = sbrk(4096)) != (void *)-1) p2 = sbrk(-4096); EXPECT_SYSZR(1, (p2 == (void *)-1) || p2 == p1); break;
 		CASE_TEST(brk);               EXPECT_SYSZR(1, brk(sbrk(0))); break;
-		CASE_TEST(chdir_root);        EXPECT_SYSZR(1, chdir("/")); break;
+		CASE_TEST(chdir_root);        EXPECT_SYSZR(1, chdir("/")); chdir(getenv("PWD")); break;
 		CASE_TEST(chdir_dot);         EXPECT_SYSZR(1, chdir(".")); break;
 		CASE_TEST(chdir_blah);        EXPECT_SYSER(1, chdir("/blah"), -1, ENOENT); break;
 		CASE_TEST(chmod_net);         EXPECT_SYSZR(proc, chmod("/proc/self/net", 0555)); break;

From f576d3c075dbc469480e737a97916be3c8f1a723 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:43:44 +0800
Subject: [PATCH 1754/2047] selftests/nolibc: stat_timestamps: remove procfs
 dependency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'/proc/self/' is a good path which doesn't have stale time info but it
is only available for CONFIG_PROC_FS=y.

When CONFIG_PROC_FS=n, use argv0 instead of '/proc/self', use '/' for the
worst case.

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 64fca7c6ca49..d7a84ef10bba 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -584,7 +584,7 @@ static int test_stat_timestamps(void)
 	if (sizeof(st.st_atim.tv_sec) != sizeof(st.st_atime))
 		return 1;
 
-	if (stat("/proc/self/", &st))
+	if (stat("/proc/self/", &st) && stat(argv0, &st) && stat("/", &st))
 		return 1;
 
 	if (st.st_atim.tv_sec != st.st_atime || st.st_atim.tv_nsec > 1000000000)

From 135b622e4855981cad487aead7e6992358a0d25c Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:45:08 +0800
Subject: [PATCH 1755/2047] selftests/nolibc: chroot_exe: remove procfs
 dependency

Since argv0 also works for CONFIG_PROC_FS=n, let's use it instead of
'/proc/self/exe'.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index d7a84ef10bba..be9f664c8435 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -711,7 +711,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(chown_self);        EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break;
 		CASE_TEST(chroot_root);       EXPECT_SYSZR(euid0, chroot("/")); break;
 		CASE_TEST(chroot_blah);       EXPECT_SYSER(1, chroot("/proc/self/blah"), -1, ENOENT); break;
-		CASE_TEST(chroot_exe);        EXPECT_SYSER(proc, chroot("/proc/self/exe"), -1, ENOTDIR); break;
+		CASE_TEST(chroot_exe);        EXPECT_SYSER(1, chroot(argv0), -1, ENOTDIR); break;
 		CASE_TEST(close_m1);          EXPECT_SYSER(1, close(-1), -1, EBADF); break;
 		CASE_TEST(close_dup);         EXPECT_SYSZR(1, close(dup(0))); break;
 		CASE_TEST(dup_0);             tmp = dup(0);  EXPECT_SYSNE(1, tmp, -1); close(tmp); break;

From 148e9718e2a288ef4fee16089cb9555414052fbb Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 8 Jul 2023 02:46:20 +0800
Subject: [PATCH 1756/2047] selftests/nolibc: add chmod_argv0 test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

argv0 is readable and chmodable, let's use it for chmod test, but a safe
umask should be used, the readable and executable modes should be
reserved.

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index be9f664c8435..6c94e491e2f8 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -706,6 +706,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(chdir_root);        EXPECT_SYSZR(1, chdir("/")); chdir(getenv("PWD")); break;
 		CASE_TEST(chdir_dot);         EXPECT_SYSZR(1, chdir(".")); break;
 		CASE_TEST(chdir_blah);        EXPECT_SYSER(1, chdir("/blah"), -1, ENOENT); break;
+		CASE_TEST(chmod_argv0);       EXPECT_SYSZR(1, chmod(argv0, 0555)); break;
 		CASE_TEST(chmod_net);         EXPECT_SYSZR(proc, chmod("/proc/self/net", 0555)); break;
 		CASE_TEST(chmod_self);        EXPECT_SYSER(proc, chmod("/proc/self", 0555), -1, EPERM); break;
 		CASE_TEST(chown_self);        EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break;

From c0315c79aaa23958fbe82c218be2ba3635749769 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Thu, 6 Jul 2023 17:03:34 +0800
Subject: [PATCH 1757/2047] selftests/nolibc: report: print a summarized test
 status

one of the test status: success, warning and failure is printed to
summarize the passed, skipped and failed values.

- "success" means no skipped and no failed.
- "warning" means has at least one skipped and no failed.
- "failure" means all tests are failed.

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/20230702164358.GB16233@1wt.eu/
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index d408b688b291..84b9a46ad678 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -85,7 +85,8 @@ CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 \
 LDFLAGS := -s
 
 REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++;print} /\[SKIPPED\][\r]*$$/{s++} \
-		END{ printf("%d test(s) passed, %d skipped, %d failed.\n", p, s, f); \
+		END{ printf("%d test(s) passed, %d skipped, %d failed => status: ", p, s, f); \
+		if (f) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \
 		printf("See all results in %s\n", ARGV[1]); }'
 
 help:

From c0faa0dace195789d50173966f6b1d5f4ceae498 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Thu, 6 Jul 2023 17:08:16 +0800
Subject: [PATCH 1758/2047] selftests/nolibc: report: print total tests

Let's count and print the total number of tests, now, the data of
passed, skipped and failed have the same format.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 84b9a46ad678..a02be8b0a569 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -85,7 +85,7 @@ CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 \
 LDFLAGS := -s
 
 REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++;print} /\[SKIPPED\][\r]*$$/{s++} \
-		END{ printf("%d test(s) passed, %d skipped, %d failed => status: ", p, s, f); \
+		END{ printf("%d test(s): %d passed, %d skipped, %d failed => status: ", p+s+f, p, s, f); \
 		if (f) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \
 		printf("See all results in %s\n", ARGV[1]); }'
 

From 0ac908e304030d04b0df49fda9f216ffa204a527 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Thu, 6 Jul 2023 17:10:08 +0800
Subject: [PATCH 1759/2047] selftests/nolibc: report: align passed, skipped and
 failed

align the test values for different runs and different architectures.

Since the total number of tests is not bigger than 1000 currently, let's
align them with "%3d".

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
[wt: s/%03d/%3d/ as discussed with Zhangjin]
Link: https://lore.kernel.org/lkml/20230709185112.97236-1-falcon@tinylab.org/
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index a02be8b0a569..bb23c5790520 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -85,7 +85,7 @@ CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 \
 LDFLAGS := -s
 
 REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++;print} /\[SKIPPED\][\r]*$$/{s++} \
-		END{ printf("%d test(s): %d passed, %d skipped, %d failed => status: ", p+s+f, p, s, f); \
+		END{ printf("%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \
 		if (f) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \
 		printf("See all results in %s\n", ARGV[1]); }'
 

From 7d92e89363755978c616c8d9d9f8961989e62be8 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Thu, 6 Jul 2023 17:11:17 +0800
Subject: [PATCH 1760/2047] selftests/nolibc: report: extrude the test status
 line

two newlines are added around the test summary line to extrude the test
status.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index bb23c5790520..1da4e07f0b3b 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -85,9 +85,9 @@ CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 \
 LDFLAGS := -s
 
 REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++;print} /\[SKIPPED\][\r]*$$/{s++} \
-		END{ printf("%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \
+		END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \
 		if (f) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \
-		printf("See all results in %s\n", ARGV[1]); }'
+		printf("\nSee all results in %s\n", ARGV[1]); }'
 
 help:
 	@echo "Supported targets under selftests/nolibc:"

From 4beb9be811d7c20b3b5ae8c07720d8f5cf066e1e Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Thu, 6 Jul 2023 17:12:28 +0800
Subject: [PATCH 1761/2047] selftests/nolibc: report: add newline before test
 failures

a newline is inserted just before the test failures to avoid mixing the
test failures with the raw test log.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 1da4e07f0b3b..d31d6cea82e2 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -84,7 +84,7 @@ CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 \
 		$(CFLAGS_$(ARCH)) $(CFLAGS_STACKPROTECTOR)
 LDFLAGS := -s
 
-REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{f++;print} /\[SKIPPED\][\r]*$$/{s++} \
+REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \
 		END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \
 		if (f) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \
 		printf("\nSee all results in %s\n", ARGV[1]); }'

From 67d108e2a2bd258b49902ea9d85c25a53a7c5e5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Mon, 10 Jul 2023 20:01:34 +0200
Subject: [PATCH 1762/2047] tools/nolibc: completely remove optional environ
 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In commit 52e423f5b93e ("tools/nolibc: export environ as a weak symbol on i386")
and friends the asm startup logic was extended to directly populate the
"environ" array.

This makes it impossible for "environ" to be dropped by the linker.
Therefore also drop the other logic to handle non-present "environ".

Also add a testcase to validate the initialization of environ.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/stdlib.h                | 12 ++----------
 tools/testing/selftests/nolibc/nolibc-test.c |  7 ++++---
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 902162f80337..bacfd35c5156 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -83,11 +83,10 @@ void free(void *ptr)
  * declared as a char **, and must be terminated by a NULL (it is recommended
  * to set this variable to the "envp" argument of main()). If the requested
  * environment variable exists its value is returned otherwise NULL is
- * returned. getenv() is forcefully inlined so that the reference to "environ"
- * will be dropped if unused, even at -O0.
+ * returned.
  */
 static __attribute__((unused))
-char *_getenv(const char *name, char **environ)
+char *getenv(const char *name)
 {
 	int idx, i;
 
@@ -102,13 +101,6 @@ char *_getenv(const char *name, char **environ)
 	return NULL;
 }
 
-static __inline__ __attribute__((unused,always_inline))
-char *getenv(const char *name)
-{
-	extern char **environ;
-	return _getenv(name, environ);
-}
-
 static __attribute__((unused))
 unsigned long getauxval(unsigned long type)
 {
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 6c94e491e2f8..8682e6f1e7a4 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -44,8 +44,8 @@
 #define SINT_MAX_OF_TYPE(type) (((type)1 << (sizeof(type) * 8 - 2)) - (type)1 + ((type)1 << (sizeof(type) * 8 - 2)))
 #define SINT_MIN_OF_TYPE(type) (-SINT_MAX_OF_TYPE(type) - 1)
 
-/* will be used by nolibc by getenv() */
-char **environ;
+/* will be used to test initialization of environ */
+static char **test_envp;
 
 /* will be used by some test cases as readable file, please don't write it */
 static const char *argv0;
@@ -787,6 +787,7 @@ int run_stdlib(int min, int max)
 		 * test numbers.
 		 */
 		switch (test + __LINE__ + 1) {
+		CASE_TEST(environ);            EXPECT_PTREQ(1, environ, test_envp); break;
 		CASE_TEST(getenv_TERM);        EXPECT_STRNZ(1, getenv("TERM")); break;
 		CASE_TEST(getenv_blah);        EXPECT_STRZR(1, getenv("blah")); break;
 		CASE_TEST(setcmp_blah_blah);   EXPECT_EQ(1, strcmp("blah", "blah"), 0); break;
@@ -1120,7 +1121,7 @@ int main(int argc, char **argv, char **envp)
 	char *test;
 
 	argv0 = argv[0];
-	environ = envp;
+	test_envp = envp;
 
 	/* when called as init, it's possible that no console was opened, for
 	 * example if no /dev file system was provided. We'll check that fd#1

From 3097783ecf3b963a6b700f32b6190b0910e39724 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Tue, 11 Jul 2023 11:48:39 +0200
Subject: [PATCH 1763/2047] selftests/nolibc: make evaluation of test
 conditions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If "cond" is a multi-token statement the behavior of the preprocessor
will lead to the negation "!" to be only applied to the first token.
Although currently no test uses such multi-token conditions but it can
happen at any time.

Put braces around "cond" to ensure the negation works as expected.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 42 ++++++++++----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 8682e6f1e7a4..54b8421fe54a 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -147,7 +147,7 @@ static int pad_spc(int llen, int cnt, const char *fmt, ...)
  */
 
 #define EXPECT_ZR(cond, expr)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_zr(expr, llen); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_zr(expr, llen); } while (0)
 
 static int expect_zr(int expr, int llen)
 {
@@ -160,7 +160,7 @@ static int expect_zr(int expr, int llen)
 
 
 #define EXPECT_NZ(cond, expr, val)			\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_nz(expr, llen; } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_nz(expr, llen; } while (0)
 
 static int expect_nz(int expr, int llen)
 {
@@ -173,7 +173,7 @@ static int expect_nz(int expr, int llen)
 
 
 #define EXPECT_EQ(cond, expr, val)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_eq(expr, llen, val); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_eq(expr, llen, val); } while (0)
 
 static int expect_eq(uint64_t expr, int llen, uint64_t val)
 {
@@ -186,7 +186,7 @@ static int expect_eq(uint64_t expr, int llen, uint64_t val)
 
 
 #define EXPECT_NE(cond, expr, val)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ne(expr, llen, val); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ne(expr, llen, val); } while (0)
 
 static int expect_ne(int expr, int llen, int val)
 {
@@ -199,7 +199,7 @@ static int expect_ne(int expr, int llen, int val)
 
 
 #define EXPECT_GE(cond, expr, val)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ge(expr, llen, val); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ge(expr, llen, val); } while (0)
 
 static int expect_ge(int expr, int llen, int val)
 {
@@ -212,7 +212,7 @@ static int expect_ge(int expr, int llen, int val)
 
 
 #define EXPECT_GT(cond, expr, val)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_gt(expr, llen, val); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_gt(expr, llen, val); } while (0)
 
 static int expect_gt(int expr, int llen, int val)
 {
@@ -225,7 +225,7 @@ static int expect_gt(int expr, int llen, int val)
 
 
 #define EXPECT_LE(cond, expr, val)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_le(expr, llen, val); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_le(expr, llen, val); } while (0)
 
 static int expect_le(int expr, int llen, int val)
 {
@@ -238,7 +238,7 @@ static int expect_le(int expr, int llen, int val)
 
 
 #define EXPECT_LT(cond, expr, val)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_lt(expr, llen, val); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_lt(expr, llen, val); } while (0)
 
 static int expect_lt(int expr, int llen, int val)
 {
@@ -251,7 +251,7 @@ static int expect_lt(int expr, int llen, int val)
 
 
 #define EXPECT_SYSZR(cond, expr)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syszr(expr, llen); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syszr(expr, llen); } while (0)
 
 static int expect_syszr(int expr, int llen)
 {
@@ -270,7 +270,7 @@ static int expect_syszr(int expr, int llen)
 
 
 #define EXPECT_SYSEQ(cond, expr, val)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syseq(expr, llen, val); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syseq(expr, llen, val); } while (0)
 
 static int expect_syseq(int expr, int llen, int val)
 {
@@ -289,7 +289,7 @@ static int expect_syseq(int expr, int llen, int val)
 
 
 #define EXPECT_SYSNE(cond, expr, val)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_sysne(expr, llen, val); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_sysne(expr, llen, val); } while (0)
 
 static int expect_sysne(int expr, int llen, int val)
 {
@@ -308,7 +308,7 @@ static int expect_sysne(int expr, int llen, int val)
 
 
 #define EXPECT_SYSER2(cond, expr, expret, experr1, experr2)		\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syserr2(expr, expret, experr1, experr2, llen); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syserr2(expr, expret, experr1, experr2, llen); } while (0)
 
 #define EXPECT_SYSER(cond, expr, expret, experr)			\
 	EXPECT_SYSER2(cond, expr, expret, experr, 0)
@@ -334,7 +334,7 @@ static int expect_syserr2(int expr, int expret, int experr1, int experr2, int ll
 
 
 #define EXPECT_PTRZR(cond, expr)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrzr(expr, llen); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrzr(expr, llen); } while (0)
 
 static int expect_ptrzr(const void *expr, int llen)
 {
@@ -352,7 +352,7 @@ static int expect_ptrzr(const void *expr, int llen)
 
 
 #define EXPECT_PTRNZ(cond, expr)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrnz(expr, llen); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrnz(expr, llen); } while (0)
 
 static int expect_ptrnz(const void *expr, int llen)
 {
@@ -369,7 +369,7 @@ static int expect_ptrnz(const void *expr, int llen)
 }
 
 #define EXPECT_PTREQ(cond, expr, cmp)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptreq(expr, llen, cmp); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptreq(expr, llen, cmp); } while (0)
 
 static int expect_ptreq(const void *expr, int llen, const void *cmp)
 {
@@ -386,7 +386,7 @@ static int expect_ptreq(const void *expr, int llen, const void *cmp)
 }
 
 #define EXPECT_PTRNE(cond, expr, cmp)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrne(expr, llen, cmp); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrne(expr, llen, cmp); } while (0)
 
 static int expect_ptrne(const void *expr, int llen, const void *cmp)
 {
@@ -403,7 +403,7 @@ static int expect_ptrne(const void *expr, int llen, const void *cmp)
 }
 
 #define EXPECT_PTRER2(cond, expr, expret, experr1, experr2)		\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrerr2(expr, expret, experr1, experr2, llen); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrerr2(expr, expret, experr1, experr2, llen); } while (0)
 
 #define EXPECT_PTRER(cond, expr, expret, experr)			\
 	EXPECT_PTRER2(cond, expr, expret, experr, 0)
@@ -428,7 +428,7 @@ static int expect_ptrerr2(const void *expr, const void *expret, int experr1, int
 }
 
 #define EXPECT_STRZR(cond, expr)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strzr(expr, llen); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strzr(expr, llen); } while (0)
 
 static int expect_strzr(const char *expr, int llen)
 {
@@ -446,7 +446,7 @@ static int expect_strzr(const char *expr, int llen)
 
 
 #define EXPECT_STRNZ(cond, expr)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strnz(expr, llen); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strnz(expr, llen); } while (0)
 
 static int expect_strnz(const char *expr, int llen)
 {
@@ -464,7 +464,7 @@ static int expect_strnz(const char *expr, int llen)
 
 
 #define EXPECT_STREQ(cond, expr, cmp)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_streq(expr, llen, cmp); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_streq(expr, llen, cmp); } while (0)
 
 static int expect_streq(const char *expr, int llen, const char *cmp)
 {
@@ -482,7 +482,7 @@ static int expect_streq(const char *expr, int llen, const char *cmp)
 
 
 #define EXPECT_STRNE(cond, expr, cmp)				\
-	do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strne(expr, llen, cmp); } while (0)
+	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strne(expr, llen, cmp); } while (0)
 
 static int expect_strne(const char *expr, int llen, const char *cmp)
 {

From b184a261e526ada6207b62a96624cc97d741565c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Tue, 11 Jul 2023 11:48:40 +0200
Subject: [PATCH 1764/2047] selftests/nolibc: simplify status printing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pad_spc() is only ever used to print the status message of testcases.
The line size is always constant, the return value is never used and the
format string is never used as such.

Remove all the unneeded logic and simplify the API and its users.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 154 ++++++++++---------
 1 file changed, 81 insertions(+), 73 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 54b8421fe54a..644490d93070 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -127,17 +127,25 @@ static void putcharn(char c, size_t n)
 	fputs(buf, stdout);
 }
 
-static int pad_spc(int llen, int cnt, const char *fmt, ...)
+enum RESULT {
+	OK,
+	FAIL,
+	SKIPPED,
+};
+
+static void result(int llen, enum RESULT r)
 {
-	va_list args;
-	int ret;
+	const char *msg;
 
-	putcharn(' ', cnt - llen);
+	if (r == OK)
+		msg = " [OK]";
+	else if (r == SKIPPED)
+		msg = "[SKIPPED]";
+	else
+		msg = "[FAIL]";
 
-	va_start(args, fmt);
-	ret = vfprintf(stdout, fmt, args);
-	va_end(args);
-	return ret < 0 ? ret : ret + cnt - llen;
+	putcharn(' ', 64 - llen);
+	puts(msg);
 }
 
 /* The tests below are intended to be used by the macroes, which evaluate
@@ -147,111 +155,111 @@ static int pad_spc(int llen, int cnt, const char *fmt, ...)
  */
 
 #define EXPECT_ZR(cond, expr)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_zr(expr, llen); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_zr(expr, llen); } while (0)
 
 static int expect_zr(int expr, int llen)
 {
 	int ret = !(expr == 0);
 
 	llen += printf(" = %d ", expr);
-	pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
+	result(llen, ret ? FAIL : OK);
 	return ret;
 }
 
 
 #define EXPECT_NZ(cond, expr, val)			\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_nz(expr, llen; } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_nz(expr, llen; } while (0)
 
 static int expect_nz(int expr, int llen)
 {
 	int ret = !(expr != 0);
 
 	llen += printf(" = %d ", expr);
-	pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
+	result(llen, ret ? FAIL : OK);
 	return ret;
 }
 
 
 #define EXPECT_EQ(cond, expr, val)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_eq(expr, llen, val); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_eq(expr, llen, val); } while (0)
 
 static int expect_eq(uint64_t expr, int llen, uint64_t val)
 {
 	int ret = !(expr == val);
 
 	llen += printf(" = %lld ", (long long)expr);
-	pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
+	result(llen, ret ? FAIL : OK);
 	return ret;
 }
 
 
 #define EXPECT_NE(cond, expr, val)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ne(expr, llen, val); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ne(expr, llen, val); } while (0)
 
 static int expect_ne(int expr, int llen, int val)
 {
 	int ret = !(expr != val);
 
 	llen += printf(" = %d ", expr);
-	pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
+	result(llen, ret ? FAIL : OK);
 	return ret;
 }
 
 
 #define EXPECT_GE(cond, expr, val)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ge(expr, llen, val); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ge(expr, llen, val); } while (0)
 
 static int expect_ge(int expr, int llen, int val)
 {
 	int ret = !(expr >= val);
 
 	llen += printf(" = %d ", expr);
-	pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
+	result(llen, ret ? FAIL : OK);
 	return ret;
 }
 
 
 #define EXPECT_GT(cond, expr, val)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_gt(expr, llen, val); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_gt(expr, llen, val); } while (0)
 
 static int expect_gt(int expr, int llen, int val)
 {
 	int ret = !(expr > val);
 
 	llen += printf(" = %d ", expr);
-	pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
+	result(llen, ret ? FAIL : OK);
 	return ret;
 }
 
 
 #define EXPECT_LE(cond, expr, val)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_le(expr, llen, val); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_le(expr, llen, val); } while (0)
 
 static int expect_le(int expr, int llen, int val)
 {
 	int ret = !(expr <= val);
 
 	llen += printf(" = %d ", expr);
-	pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
+	result(llen, ret ? FAIL : OK);
 	return ret;
 }
 
 
 #define EXPECT_LT(cond, expr, val)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_lt(expr, llen, val); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_lt(expr, llen, val); } while (0)
 
 static int expect_lt(int expr, int llen, int val)
 {
 	int ret = !(expr < val);
 
 	llen += printf(" = %d ", expr);
-	pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
+	result(llen, ret ? FAIL : OK);
 	return ret;
 }
 
 
 #define EXPECT_SYSZR(cond, expr)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syszr(expr, llen); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_syszr(expr, llen); } while (0)
 
 static int expect_syszr(int expr, int llen)
 {
@@ -260,17 +268,17 @@ static int expect_syszr(int expr, int llen)
 	if (expr) {
 		ret = 1;
 		llen += printf(" = %d %s ", expr, errorname(errno));
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
 		llen += printf(" = %d ", expr);
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
 
 
 #define EXPECT_SYSEQ(cond, expr, val)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syseq(expr, llen, val); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_syseq(expr, llen, val); } while (0)
 
 static int expect_syseq(int expr, int llen, int val)
 {
@@ -279,17 +287,17 @@ static int expect_syseq(int expr, int llen, int val)
 	if (expr != val) {
 		ret = 1;
 		llen += printf(" = %d %s ", expr, errorname(errno));
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
 		llen += printf(" = %d ", expr);
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
 
 
 #define EXPECT_SYSNE(cond, expr, val)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_sysne(expr, llen, val); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_sysne(expr, llen, val); } while (0)
 
 static int expect_sysne(int expr, int llen, int val)
 {
@@ -298,17 +306,17 @@ static int expect_sysne(int expr, int llen, int val)
 	if (expr == val) {
 		ret = 1;
 		llen += printf(" = %d %s ", expr, errorname(errno));
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
 		llen += printf(" = %d ", expr);
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
 
 
 #define EXPECT_SYSER2(cond, expr, expret, experr1, experr2)		\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syserr2(expr, expret, experr1, experr2, llen); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_syserr2(expr, expret, experr1, experr2, llen); } while (0)
 
 #define EXPECT_SYSER(cond, expr, expret, experr)			\
 	EXPECT_SYSER2(cond, expr, expret, experr, 0)
@@ -325,16 +333,16 @@ static int expect_syserr2(int expr, int expret, int experr1, int experr2, int ll
 			llen += printf(" != (%d %s) ", expret, errorname(experr1));
 		else
 			llen += printf(" != (%d %s %s) ", expret, errorname(experr1), errorname(experr2));
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
 
 
 #define EXPECT_PTRZR(cond, expr)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrzr(expr, llen); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrzr(expr, llen); } while (0)
 
 static int expect_ptrzr(const void *expr, int llen)
 {
@@ -343,16 +351,16 @@ static int expect_ptrzr(const void *expr, int llen)
 	llen += printf(" = <%p> ", expr);
 	if (expr) {
 		ret = 1;
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
 
 
 #define EXPECT_PTRNZ(cond, expr)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrnz(expr, llen); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrnz(expr, llen); } while (0)
 
 static int expect_ptrnz(const void *expr, int llen)
 {
@@ -361,15 +369,15 @@ static int expect_ptrnz(const void *expr, int llen)
 	llen += printf(" = <%p> ", expr);
 	if (!expr) {
 		ret = 1;
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
 
 #define EXPECT_PTREQ(cond, expr, cmp)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptreq(expr, llen, cmp); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptreq(expr, llen, cmp); } while (0)
 
 static int expect_ptreq(const void *expr, int llen, const void *cmp)
 {
@@ -378,15 +386,15 @@ static int expect_ptreq(const void *expr, int llen, const void *cmp)
 	llen += printf(" = <%p> ", expr);
 	if (expr != cmp) {
 		ret = 1;
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
 
 #define EXPECT_PTRNE(cond, expr, cmp)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrne(expr, llen, cmp); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrne(expr, llen, cmp); } while (0)
 
 static int expect_ptrne(const void *expr, int llen, const void *cmp)
 {
@@ -395,15 +403,15 @@ static int expect_ptrne(const void *expr, int llen, const void *cmp)
 	llen += printf(" = <%p> ", expr);
 	if (expr == cmp) {
 		ret = 1;
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
 
 #define EXPECT_PTRER2(cond, expr, expret, experr1, experr2)		\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrerr2(expr, expret, experr1, experr2, llen); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrerr2(expr, expret, experr1, experr2, llen); } while (0)
 
 #define EXPECT_PTRER(cond, expr, expret, experr)			\
 	EXPECT_PTRER2(cond, expr, expret, experr, 0)
@@ -420,15 +428,15 @@ static int expect_ptrerr2(const void *expr, const void *expret, int experr1, int
 			llen += printf(" != (<%p> %s) ", expret, errorname(experr1));
 		else
 			llen += printf(" != (<%p> %s %s) ", expret, errorname(experr1), errorname(experr2));
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
 
 #define EXPECT_STRZR(cond, expr)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strzr(expr, llen); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strzr(expr, llen); } while (0)
 
 static int expect_strzr(const char *expr, int llen)
 {
@@ -437,16 +445,16 @@ static int expect_strzr(const char *expr, int llen)
 	llen += printf(" = <%s> ", expr);
 	if (expr) {
 		ret = 1;
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
 
 
 #define EXPECT_STRNZ(cond, expr)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strnz(expr, llen); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strnz(expr, llen); } while (0)
 
 static int expect_strnz(const char *expr, int llen)
 {
@@ -455,16 +463,16 @@ static int expect_strnz(const char *expr, int llen)
 	llen += printf(" = <%s> ", expr);
 	if (!expr) {
 		ret = 1;
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
 
 
 #define EXPECT_STREQ(cond, expr, cmp)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_streq(expr, llen, cmp); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_streq(expr, llen, cmp); } while (0)
 
 static int expect_streq(const char *expr, int llen, const char *cmp)
 {
@@ -473,16 +481,16 @@ static int expect_streq(const char *expr, int llen, const char *cmp)
 	llen += printf(" = <%s> ", expr);
 	if (strcmp(expr, cmp) != 0) {
 		ret = 1;
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
 
 
 #define EXPECT_STRNE(cond, expr, cmp)				\
-	do { if (!(cond)) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strne(expr, llen, cmp); } while (0)
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strne(expr, llen, cmp); } while (0)
 
 static int expect_strne(const char *expr, int llen, const char *cmp)
 {
@@ -491,9 +499,9 @@ static int expect_strne(const char *expr, int llen, const char *cmp)
 	llen += printf(" = <%s> ", expr);
 	if (strcmp(expr, cmp) == 0) {
 		ret = 1;
-		llen += pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 	} else {
-		llen += pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 	}
 	return ret;
 }
@@ -880,13 +888,13 @@ static int expect_vfprintf(int llen, size_t c, const char *expected, const char
 
 	fd = open("/tmp", O_TMPFILE | O_EXCL | O_RDWR, 0600);
 	if (fd == -1) {
-		pad_spc(llen, 64, "[SKIPPED]\n");
+		result(llen, SKIPPED);
 		return 0;
 	}
 
 	memfile = fdopen(fd, "w+");
 	if (!memfile) {
-		pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 		return 1;
 	}
 
@@ -896,7 +904,7 @@ static int expect_vfprintf(int llen, size_t c, const char *expected, const char
 
 	if (w != c) {
 		llen += printf(" written(%d) != %d", w, (int) c);
-		pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 		return 1;
 	}
 
@@ -910,14 +918,14 @@ static int expect_vfprintf(int llen, size_t c, const char *expected, const char
 
 	if (r != w) {
 		llen += printf(" written(%d) != read(%d)", w, r);
-		pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 		return 1;
 	}
 
 	llen += printf(" \"%s\" = \"%s\"", expected, buf);
 	ret = strncmp(expected, buf, c);
 
-	pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n");
+	result(llen, ret ? FAIL : OK);
 	return ret;
 }
 
@@ -973,14 +981,14 @@ static int run_protection(int min, int max)
 
 #if !defined(_NOLIBC_STACKPROTECTOR)
 	llen += printf("not supported");
-	pad_spc(llen, 64, "[SKIPPED]\n");
+	result(llen, SKIPPED);
 	return 0;
 #endif
 
 #if defined(_NOLIBC_STACKPROTECTOR)
 	if (!__stack_chk_guard) {
 		llen += printf("__stack_chk_guard not initialized");
-		pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 		return 1;
 	}
 #endif
@@ -991,7 +999,7 @@ static int run_protection(int min, int max)
 	switch (pid) {
 	case -1:
 		llen += printf("fork()");
-		pad_spc(llen, 64, "[FAIL]\n");
+		result(llen, FAIL);
 		return 1;
 
 	case 0:
@@ -1007,10 +1015,10 @@ static int run_protection(int min, int max)
 
 		if (pid == -1 || !WIFSIGNALED(status) || WTERMSIG(status) != SIGABRT) {
 			llen += printf("waitpid()");
-			pad_spc(llen, 64, "[FAIL]\n");
+			result(llen, FAIL);
 			return 1;
 		}
-		pad_spc(llen, 64, " [OK]\n");
+		result(llen, OK);
 		return 0;
 	}
 }

From ceb528feb7c89c9ef1e25e00494db9c6866cd4c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Tue, 11 Jul 2023 11:48:42 +0200
Subject: [PATCH 1765/2047] selftests/nolibc: avoid gaps in test numbers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As the test numbers are based on line numbers gaps without testcases are
to be avoided.
Instead use the already existing test condition logic to implement
conditional execution.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 30 +++++++++-----------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 644490d93070..7cd1aa800b8a 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -851,23 +851,19 @@ int run_stdlib(int min, int max)
 		CASE_TEST(limit_int_fast64_min);    EXPECT_EQ(1, INT_FAST64_MIN,   (int_fast64_t)    INT64_MIN); break;
 		CASE_TEST(limit_int_fast64_max);    EXPECT_EQ(1, INT_FAST64_MAX,   (int_fast64_t)    INT64_MAX); break;
 		CASE_TEST(limit_uint_fast64_max);   EXPECT_EQ(1, UINT_FAST64_MAX,  (uint_fast64_t)   UINT64_MAX); break;
-#if __SIZEOF_LONG__ == 8
-		CASE_TEST(limit_intptr_min);        EXPECT_EQ(1, INTPTR_MIN,       (intptr_t)        0x8000000000000000LL); break;
-		CASE_TEST(limit_intptr_max);        EXPECT_EQ(1, INTPTR_MAX,       (intptr_t)        0x7fffffffffffffffLL); break;
-		CASE_TEST(limit_uintptr_max);       EXPECT_EQ(1, UINTPTR_MAX,      (uintptr_t)       0xffffffffffffffffULL); break;
-		CASE_TEST(limit_ptrdiff_min);       EXPECT_EQ(1, PTRDIFF_MIN,      (ptrdiff_t)       0x8000000000000000LL); break;
-		CASE_TEST(limit_ptrdiff_max);       EXPECT_EQ(1, PTRDIFF_MAX,      (ptrdiff_t)       0x7fffffffffffffffLL); break;
-		CASE_TEST(limit_size_max);          EXPECT_EQ(1, SIZE_MAX,         (size_t)          0xffffffffffffffffULL); break;
-#elif __SIZEOF_LONG__ == 4
-		CASE_TEST(limit_intptr_min);        EXPECT_EQ(1, INTPTR_MIN,       (intptr_t)        0x80000000); break;
-		CASE_TEST(limit_intptr_max);        EXPECT_EQ(1, INTPTR_MAX,       (intptr_t)        0x7fffffff); break;
-		CASE_TEST(limit_uintptr_max);       EXPECT_EQ(1, UINTPTR_MAX,      (uintptr_t)       0xffffffffU); break;
-		CASE_TEST(limit_ptrdiff_min);       EXPECT_EQ(1, PTRDIFF_MIN,      (ptrdiff_t)       0x80000000); break;
-		CASE_TEST(limit_ptrdiff_max);       EXPECT_EQ(1, PTRDIFF_MAX,      (ptrdiff_t)       0x7fffffff); break;
-		CASE_TEST(limit_size_max);          EXPECT_EQ(1, SIZE_MAX,         (size_t)          0xffffffffU); break;
-#else
-# warning "__SIZEOF_LONG__ is undefined"
-#endif /* __SIZEOF_LONG__ */
+		CASE_TEST(sizeof_long_sane);        EXPECT_EQ(1, sizeof(long) == 8 || sizeof(long) == 4, 1); break;
+		CASE_TEST(limit_intptr_min_64);     EXPECT_EQ(sizeof(long) == 8, INTPTR_MIN,  (intptr_t)  0x8000000000000000LL); break;
+		CASE_TEST(limit_intptr_max_64);     EXPECT_EQ(sizeof(long) == 8, INTPTR_MAX,  (intptr_t)  0x7fffffffffffffffLL); break;
+		CASE_TEST(limit_uintptr_max_64);    EXPECT_EQ(sizeof(long) == 8, UINTPTR_MAX, (uintptr_t) 0xffffffffffffffffULL); break;
+		CASE_TEST(limit_ptrdiff_min_64);    EXPECT_EQ(sizeof(long) == 8, PTRDIFF_MIN, (ptrdiff_t) 0x8000000000000000LL); break;
+		CASE_TEST(limit_ptrdiff_max_64);    EXPECT_EQ(sizeof(long) == 8, PTRDIFF_MAX, (ptrdiff_t) 0x7fffffffffffffffLL); break;
+		CASE_TEST(limit_size_max_64);       EXPECT_EQ(sizeof(long) == 8, SIZE_MAX,    (size_t)    0xffffffffffffffffULL); break;
+		CASE_TEST(limit_intptr_min_32);     EXPECT_EQ(sizeof(long) == 4, INTPTR_MIN,  (intptr_t)  0x80000000); break;
+		CASE_TEST(limit_intptr_max_32);     EXPECT_EQ(sizeof(long) == 4, INTPTR_MAX,  (intptr_t)  0x7fffffff); break;
+		CASE_TEST(limit_uintptr_max_32);    EXPECT_EQ(sizeof(long) == 4, UINTPTR_MAX, (uintptr_t) 0xffffffffU); break;
+		CASE_TEST(limit_ptrdiff_min_32);    EXPECT_EQ(sizeof(long) == 4, PTRDIFF_MIN, (ptrdiff_t) 0x80000000); break;
+		CASE_TEST(limit_ptrdiff_max_32);    EXPECT_EQ(sizeof(long) == 4, PTRDIFF_MAX, (ptrdiff_t) 0x7fffffff); break;
+		CASE_TEST(limit_size_max_32);       EXPECT_EQ(sizeof(long) == 4, SIZE_MAX,    (size_t)    0xffffffffU); break;
 		case __LINE__:
 			return ret; /* must be last */
 		/* note: do not set any defaults so as to permit holes above */

From 20233498359a29f7b2ff4e8fbdb0a1a7c8d5744c Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:17:43 +0800
Subject: [PATCH 1766/2047] tools/nolibc: arch-*.h: add missing space after ','

Fix up such errors reported by scripts/checkpatch.pl:

    ERROR: space required after that ',' (ctx:VxV)
    #148: FILE: tools/include/nolibc/arch-aarch64.h:148:
    +void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
                             ^

    ERROR: space required after that ',' (ctx:VxV)
    #148: FILE: tools/include/nolibc/arch-aarch64.h:148:
    +void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
                                      ^

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-aarch64.h   | 2 +-
 tools/include/nolibc/arch-arm.h       | 2 +-
 tools/include/nolibc/arch-i386.h      | 2 +-
 tools/include/nolibc/arch-loongarch.h | 2 +-
 tools/include/nolibc/arch-mips.h      | 2 +-
 tools/include/nolibc/arch-riscv.h     | 2 +-
 tools/include/nolibc/arch-s390.h      | 2 +-
 tools/include/nolibc/arch-x86_64.h    | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
index 6227b77a4a09..6151be6cd7a9 100644
--- a/tools/include/nolibc/arch-aarch64.h
+++ b/tools/include/nolibc/arch-aarch64.h
@@ -175,7 +175,7 @@ char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
 /* startup code */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 #ifdef _NOLIBC_STACKPROTECTOR
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index 4d4887a5f04b..5b12b6e1c83e 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -225,7 +225,7 @@ char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
 /* startup code */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 #ifdef _NOLIBC_STACKPROTECTOR
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
index 4c6b7c04e2e7..35680b4a25d4 100644
--- a/tools/include/nolibc/arch-i386.h
+++ b/tools/include/nolibc/arch-i386.h
@@ -190,7 +190,7 @@ const unsigned long *_auxv __attribute__((weak));
  * 2) The deepest stack frame should be set to zero
  *
  */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 #ifdef _NOLIBC_STACKPROTECTOR
diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
index 8aa7724fe38e..ada5a69e3fcc 100644
--- a/tools/include/nolibc/arch-loongarch.h
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -167,7 +167,7 @@ const unsigned long *_auxv __attribute__((weak));
 #endif
 
 /* startup code */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 #ifdef _NOLIBC_STACKPROTECTOR
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index a2bfdf57b957..dd0f12131b55 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -205,7 +205,7 @@ char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
 /* startup code, note that it's called __start on MIPS */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) __no_stack_protector __start(void)
+void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector __start(void)
 {
 	__asm__ volatile (
 		/*".set nomips16\n"*/
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index cd958b2f4b1b..1dd7083c2ac8 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -180,7 +180,7 @@ char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
 /* startup code */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 		".option push\n"
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index a644ecd361c0..8254caff8bfa 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -166,7 +166,7 @@ char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
 /* startup code */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 		"lg	%r2,0(%r15)\n"		/* argument count */
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
index e69113742a99..fb00ab2e9274 100644
--- a/tools/include/nolibc/arch-x86_64.h
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -190,7 +190,7 @@ const unsigned long *_auxv __attribute__((weak));
  * 2) The deepest stack frame should be zero (the %rbp).
  *
  */
-void __attribute__((weak,noreturn,optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 #ifdef _NOLIBC_STACKPROTECTOR

From bff60150f7c464d80d86f289c056c2ad2afb3c05 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:18:54 +0800
Subject: [PATCH 1767/2047] tools/nolibc: fix up startup failures for -O0 under
 gcc < 11.1.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As gcc doc [1] shows:

  Most optimizations are completely disabled at -O0 or if an -O level is
  not set on the command line, even if individual optimization flags are
  specified.

Test result [2] shows, gcc>=11.1.0 deviates from the above description,
but before gcc 11.1.0, "-O0" still forcely uses frame pointer in the
_start function even if the individual optimize("omit-frame-pointer")
flag is specified.

The frame pointer related operations will change the stack pointer (e.g.
In x86_64, an extra "push %rbp" will be inserted at the beginning of
_start) and make it differs from the one we expected, as a result, break
the whole startup function.

To fix up this issue, as suggested by Thomas, the individual "Os" and
"omit-frame-pointer" optimize flags are used together on _start function
to disable frame pointer completely even if the -O0 is set on the
command line.

[1]: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
[2]: https://lore.kernel.org/lkml/20230714094723.140603-1-falcon@tinylab.org/

Suggested-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/lkml/34b21ba5-7b59-4b3b-9ed6-ef9a3a5e06f7@t-8ch.de/
Fixes: 7f8548589661 ("tools/nolibc: make compiler and assembler agree on the section around _start")
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-aarch64.h   | 2 +-
 tools/include/nolibc/arch-arm.h       | 2 +-
 tools/include/nolibc/arch-i386.h      | 2 +-
 tools/include/nolibc/arch-loongarch.h | 2 +-
 tools/include/nolibc/arch-mips.h      | 2 +-
 tools/include/nolibc/arch-riscv.h     | 2 +-
 tools/include/nolibc/arch-s390.h      | 2 +-
 tools/include/nolibc/arch-x86_64.h    | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
index 6151be6cd7a9..21e9482a4235 100644
--- a/tools/include/nolibc/arch-aarch64.h
+++ b/tools/include/nolibc/arch-aarch64.h
@@ -175,7 +175,7 @@ char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
 /* startup code */
-void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 #ifdef _NOLIBC_STACKPROTECTOR
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index 5b12b6e1c83e..4451bef5f94d 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -225,7 +225,7 @@ char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
 /* startup code */
-void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 #ifdef _NOLIBC_STACKPROTECTOR
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
index 35680b4a25d4..4c94a81e860f 100644
--- a/tools/include/nolibc/arch-i386.h
+++ b/tools/include/nolibc/arch-i386.h
@@ -190,7 +190,7 @@ const unsigned long *_auxv __attribute__((weak));
  * 2) The deepest stack frame should be set to zero
  *
  */
-void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 #ifdef _NOLIBC_STACKPROTECTOR
diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
index ada5a69e3fcc..590155a4e543 100644
--- a/tools/include/nolibc/arch-loongarch.h
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -167,7 +167,7 @@ const unsigned long *_auxv __attribute__((weak));
 #endif
 
 /* startup code */
-void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 #ifdef _NOLIBC_STACKPROTECTOR
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index dd0f12131b55..d3f0bf4c4245 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -205,7 +205,7 @@ char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
 /* startup code, note that it's called __start on MIPS */
-void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector __start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector __start(void)
 {
 	__asm__ volatile (
 		/*".set nomips16\n"*/
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index 1dd7083c2ac8..322c96f4acdb 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -180,7 +180,7 @@ char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
 /* startup code */
-void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 		".option push\n"
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index 8254caff8bfa..587cc91295fa 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -166,7 +166,7 @@ char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
 /* startup code */
-void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 		"lg	%r2,0(%r15)\n"		/* argument count */
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
index fb00ab2e9274..5e950a04bc77 100644
--- a/tools/include/nolibc/arch-x86_64.h
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -190,7 +190,7 @@ const unsigned long *_auxv __attribute__((weak));
  * 2) The deepest stack frame should be zero (the %rbp).
  *
  */
-void __attribute__((weak, noreturn, optimize("omit-frame-pointer"))) __no_stack_protector _start(void)
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 #ifdef _NOLIBC_STACKPROTECTOR

From af93807eaef689a4964fdec085c77c24b49a5560 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:20:01 +0800
Subject: [PATCH 1768/2047] tools/nolibc: remove the old sys_stat support

The statx manpage [1] shows that it has been supported from Linux 4.11
and glibc 2.28, the Linux support can be checked for all of the
architectures with this command:

    $ git grep -r statx v4.11 arch/ include/uapi/asm-generic/unistd.h \
      | grep -E "aarch64|arm|mips|s390|x86|:include/uapi"

Besides riscv and loongarch, all of the nolibc supported architectures
have added sys_statx from Linux v4.11. riscv is mainlined to v4.15,
loongarch is mainlined to v5.19, both of them use the generic unistd.h,
so, they have added sys_statx from their first mainline versions.

The current oldest stable branch is v4.14, only reserving sys_statx
still preserves compatibility with all of the supported stable branches,
So, let's remove the old arch related and dependent sys_stat support
completely.

This is friendly to the future new architecture porting.

[1]: https://man7.org/linux/man-pages/man2/statx.2.html

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-aarch64.h | 28 -------------
 tools/include/nolibc/arch-arm.h     | 37 -----------------
 tools/include/nolibc/arch-i386.h    | 26 ------------
 tools/include/nolibc/arch-mips.h    | 28 -------------
 tools/include/nolibc/arch-riscv.h   | 23 -----------
 tools/include/nolibc/arch-s390.h    | 25 ------------
 tools/include/nolibc/arch-x86_64.h  | 27 -------------
 tools/include/nolibc/sys.h          | 63 +++++------------------------
 tools/include/nolibc/types.h        |  4 +-
 9 files changed, 13 insertions(+), 248 deletions(-)

diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
index 21e9482a4235..3c0a5f47b3e8 100644
--- a/tools/include/nolibc/arch-aarch64.h
+++ b/tools/include/nolibc/arch-aarch64.h
@@ -9,34 +9,6 @@
 
 #include "compiler.h"
 
-/* The struct returned by the newfstatat() syscall. Differs slightly from the
- * x86_64's stat one by field ordering, so be careful.
- */
-struct sys_stat_struct {
-	unsigned long   st_dev;
-	unsigned long   st_ino;
-	unsigned int    st_mode;
-	unsigned int    st_nlink;
-	unsigned int    st_uid;
-	unsigned int    st_gid;
-
-	unsigned long   st_rdev;
-	unsigned long   __pad1;
-	long            st_size;
-	int             st_blksize;
-	int             __pad2;
-
-	long            st_blocks;
-	long            st_atime;
-	unsigned long   st_atime_nsec;
-	long            st_mtime;
-
-	unsigned long   st_mtime_nsec;
-	long            st_ctime;
-	unsigned long   st_ctime_nsec;
-	unsigned int    __unused[2];
-};
-
 /* Syscalls for AARCH64 :
  *   - registers are 64-bit
  *   - stack is 16-byte aligned
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index 4451bef5f94d..a06dad789a22 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -9,43 +9,6 @@
 
 #include "compiler.h"
 
-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
- * exactly 56 bytes (stops before the unused array). In big endian, the format
- * differs as devices are returned as short only.
- */
-struct sys_stat_struct {
-#if defined(__ARMEB__)
-	unsigned short st_dev;
-	unsigned short __pad1;
-#else
-	unsigned long  st_dev;
-#endif
-	unsigned long  st_ino;
-	unsigned short st_mode;
-	unsigned short st_nlink;
-	unsigned short st_uid;
-	unsigned short st_gid;
-
-#if defined(__ARMEB__)
-	unsigned short st_rdev;
-	unsigned short __pad2;
-#else
-	unsigned long  st_rdev;
-#endif
-	unsigned long  st_size;
-	unsigned long  st_blksize;
-	unsigned long  st_blocks;
-
-	unsigned long  st_atime;
-	unsigned long  st_atime_nsec;
-	unsigned long  st_mtime;
-	unsigned long  st_mtime_nsec;
-
-	unsigned long  st_ctime;
-	unsigned long  st_ctime_nsec;
-	unsigned long  __unused[2];
-};
-
 /* Syscalls for ARM in ARM or Thumb modes :
  *   - registers are 32-bit
  *   - stack is 8-byte aligned
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
index 4c94a81e860f..fe0b73f032c3 100644
--- a/tools/include/nolibc/arch-i386.h
+++ b/tools/include/nolibc/arch-i386.h
@@ -9,32 +9,6 @@
 
 #include "compiler.h"
 
-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
- * exactly 56 bytes (stops before the unused array).
- */
-struct sys_stat_struct {
-	unsigned long  st_dev;
-	unsigned long  st_ino;
-	unsigned short st_mode;
-	unsigned short st_nlink;
-	unsigned short st_uid;
-	unsigned short st_gid;
-
-	unsigned long  st_rdev;
-	unsigned long  st_size;
-	unsigned long  st_blksize;
-	unsigned long  st_blocks;
-
-	unsigned long  st_atime;
-	unsigned long  st_atime_nsec;
-	unsigned long  st_mtime;
-	unsigned long  st_mtime_nsec;
-
-	unsigned long  st_ctime;
-	unsigned long  st_ctime_nsec;
-	unsigned long  __unused[2];
-};
-
 /* Syscalls for i386 :
  *   - mostly similar to x86_64
  *   - registers are 32-bit
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index d3f0bf4c4245..7242fc9de04f 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -9,34 +9,6 @@
 
 #include "compiler.h"
 
-/* The struct returned by the stat() syscall. 88 bytes are returned by the
- * syscall.
- */
-struct sys_stat_struct {
-	unsigned int  st_dev;
-	long          st_pad1[3];
-	unsigned long st_ino;
-	unsigned int  st_mode;
-	unsigned int  st_nlink;
-	unsigned int  st_uid;
-	unsigned int  st_gid;
-	unsigned int  st_rdev;
-	long          st_pad2[2];
-	long          st_size;
-	long          st_pad3;
-
-	long          st_atime;
-	long          st_atime_nsec;
-	long          st_mtime;
-	long          st_mtime_nsec;
-
-	long          st_ctime;
-	long          st_ctime_nsec;
-	long          st_blksize;
-	long          st_blocks;
-	long          st_pad4[14];
-};
-
 /* Syscalls for MIPS ABI O32 :
  *   - WARNING! there's always a delayed slot!
  *   - WARNING again, the syntax is different, registers take a '$' and numbers
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index 322c96f4acdb..d49f5ecbf815 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -9,29 +9,6 @@
 
 #include "compiler.h"
 
-struct sys_stat_struct {
-	unsigned long	st_dev;		/* Device.  */
-	unsigned long	st_ino;		/* File serial number.  */
-	unsigned int	st_mode;	/* File mode.  */
-	unsigned int	st_nlink;	/* Link count.  */
-	unsigned int	st_uid;		/* User ID of the file's owner.  */
-	unsigned int	st_gid;		/* Group ID of the file's group. */
-	unsigned long	st_rdev;	/* Device number, if device.  */
-	unsigned long	__pad1;
-	long		st_size;	/* Size of file, in bytes.  */
-	int		st_blksize;	/* Optimal block size for I/O.  */
-	int		__pad2;
-	long		st_blocks;	/* Number 512-byte blocks allocated. */
-	long		st_atime;	/* Time of last access.  */
-	unsigned long	st_atime_nsec;
-	long		st_mtime;	/* Time of last modification.  */
-	unsigned long	st_mtime_nsec;
-	long		st_ctime;	/* Time of last status change.  */
-	unsigned long	st_ctime_nsec;
-	unsigned int	__unused4;
-	unsigned int	__unused5;
-};
-
 #if   __riscv_xlen == 64
 #define PTRLOG "3"
 #define SZREG  "8"
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index 587cc91295fa..3b94ae0cb1d1 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -10,31 +10,6 @@
 
 #include "compiler.h"
 
-/* The struct returned by the stat() syscall, equivalent to stat64(). The
- * syscall returns 116 bytes and stops in the middle of __unused.
- */
-
-struct sys_stat_struct {
-	unsigned long	st_dev;
-	unsigned long	st_ino;
-	unsigned long	st_nlink;
-	unsigned int	st_mode;
-	unsigned int	st_uid;
-	unsigned int	st_gid;
-	unsigned int	__pad1;
-	unsigned long	st_rdev;
-	unsigned long	st_size;
-	unsigned long	st_atime;
-	unsigned long	st_atime_nsec;
-	unsigned long	st_mtime;
-	unsigned long	st_mtime_nsec;
-	unsigned long	st_ctime;
-	unsigned long	st_ctime_nsec;
-	unsigned long	st_blksize;
-	long		st_blocks;
-	unsigned long	__unused[3];
-};
-
 /* Syscalls for s390:
  *   - registers are 64-bit
  *   - syscall number is passed in r1
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
index 5e950a04bc77..6b494ca471ef 100644
--- a/tools/include/nolibc/arch-x86_64.h
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -9,33 +9,6 @@
 
 #include "compiler.h"
 
-/* The struct returned by the stat() syscall, equivalent to stat64(). The
- * syscall returns 116 bytes and stops in the middle of __unused.
- */
-struct sys_stat_struct {
-	unsigned long st_dev;
-	unsigned long st_ino;
-	unsigned long st_nlink;
-	unsigned int  st_mode;
-	unsigned int  st_uid;
-
-	unsigned int  st_gid;
-	unsigned int  __pad0;
-	unsigned long st_rdev;
-	long          st_size;
-	long          st_blksize;
-
-	long          st_blocks;
-	unsigned long st_atime;
-	unsigned long st_atime_nsec;
-	unsigned long st_mtime;
-
-	unsigned long st_mtime_nsec;
-	unsigned long st_ctime;
-	unsigned long st_ctime_nsec;
-	long          __unused[3];
-};
-
 /* Syscalls for x86_64 :
  *   - registers are 64-bit
  *   - syscall number is passed in rax
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index dee56894a811..8bfe7db20b80 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -943,15 +943,19 @@ pid_t setsid(void)
 	return __sysret(sys_setsid());
 }
 
-#if defined(__NR_statx)
 /*
  * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf);
+ * int stat(const char *path, struct stat *buf);
  */
 
 static __attribute__((unused))
 int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
 {
+#ifdef __NR_statx
 	return my_syscall5(__NR_statx, fd, path, flags, mask, buf);
+#else
+	return -ENOSYS;
+#endif
 }
 
 static __attribute__((unused))
@@ -959,24 +963,18 @@ int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *
 {
 	return __sysret(sys_statx(fd, path, flags, mask, buf));
 }
-#endif
 
-/*
- * int stat(const char *path, struct stat *buf);
- * Warning: the struct stat's layout is arch-dependent.
- */
 
-#if defined(__NR_statx) && !defined(__NR_newfstatat) && !defined(__NR_stat)
-/*
- * Maybe we can just use statx() when available for all architectures?
- */
 static __attribute__((unused))
-int sys_stat(const char *path, struct stat *buf)
+int stat(const char *path, struct stat *buf)
 {
 	struct statx statx;
 	long ret;
 
-	ret = sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx);
+	ret = __sysret(sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx));
+	if (ret == -1)
+		return ret;
+
 	buf->st_dev          = ((statx.stx_dev_minor & 0xff)
 			       | (statx.stx_dev_major << 8)
 			       | ((statx.stx_dev_minor & ~0xff) << 12));
@@ -997,47 +995,8 @@ int sys_stat(const char *path, struct stat *buf)
 	buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec;
 	buf->st_ctim.tv_sec  = statx.stx_ctime.tv_sec;
 	buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec;
-	return ret;
-}
-#else
-static __attribute__((unused))
-int sys_stat(const char *path, struct stat *buf)
-{
-	struct sys_stat_struct stat;
-	long ret;
 
-#ifdef __NR_newfstatat
-	/* only solution for arm64 */
-	ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
-#elif defined(__NR_stat)
-	ret = my_syscall2(__NR_stat, path, &stat);
-#else
-	return -ENOSYS;
-#endif
-	buf->st_dev          = stat.st_dev;
-	buf->st_ino          = stat.st_ino;
-	buf->st_mode         = stat.st_mode;
-	buf->st_nlink        = stat.st_nlink;
-	buf->st_uid          = stat.st_uid;
-	buf->st_gid          = stat.st_gid;
-	buf->st_rdev         = stat.st_rdev;
-	buf->st_size         = stat.st_size;
-	buf->st_blksize      = stat.st_blksize;
-	buf->st_blocks       = stat.st_blocks;
-	buf->st_atim.tv_sec  = stat.st_atime;
-	buf->st_atim.tv_nsec = stat.st_atime_nsec;
-	buf->st_mtim.tv_sec  = stat.st_mtime;
-	buf->st_mtim.tv_nsec = stat.st_mtime_nsec;
-	buf->st_ctim.tv_sec  = stat.st_ctime;
-	buf->st_ctim.tv_nsec = stat.st_ctime_nsec;
-	return ret;
-}
-#endif
-
-static __attribute__((unused))
-int stat(const char *path, struct stat *buf)
-{
-	return __sysret(sys_stat(path, buf));
+	return 0;
 }
 
 
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
index 23963e48d8ee..8cfc4c860fa4 100644
--- a/tools/include/nolibc/types.h
+++ b/tools/include/nolibc/types.h
@@ -15,8 +15,8 @@
 
 
 /* Only the generic macros and types may be defined here. The arch-specific
- * ones such as the O_RDONLY and related macros used by fcntl() and open(), or
- * the layout of sys_stat_struct must not be defined here.
+ * ones such as the O_RDONLY and related macros used by fcntl() and open()
+ * must not be defined here.
  */
 
 /* stat flags (WARNING, octal here). We need to check for an existing

From 17336755150b19a697079f9c3945eff7882a77c5 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:21:08 +0800
Subject: [PATCH 1769/2047] tools/nolibc: add new crt.h with _start_c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As the environ and _auxv support added for nolibc, the assembly _start
function becomes more and more complex and therefore makes the porting
of nolibc to new architectures harder and harder.

To simplify portability, this C version of _start_c() is added to do
most of the assembly start operations in C, which reduces the complexity
a lot and will eventually simplify the porting of nolibc to the new
architectures.

The new _start_c() only requires a stack pointer argument, it will find
argc, argv, envp/environ and _auxv for us, and then call main(),
finally, it exit() with main's return status. With this new _start_c(),
the future new architectures only require to add very few assembly
instructions.

As suggested by Thomas, users may use a different signature of main
(e.g. void main(void)), a _nolibc_main alias is added for main to
silence the warning about potential conflicting types.

As suggested by Willy, the code is carefully polished for both smaller
size and better readability with local variables and the right types.

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/20230715095729.GC24086@1wt.eu/
Suggested-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/lkml/90fdd255-32f4-4caf-90ff-06456b53dac3@t-8ch.de/
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/Makefile |  1 +
 tools/include/nolibc/crt.h    | 57 +++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 tools/include/nolibc/crt.h

diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index 64d67b080744..909b6eb500fe 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -27,6 +27,7 @@ nolibc_arch := $(patsubst arm64,aarch64,$(ARCH))
 arch_file := arch-$(nolibc_arch).h
 all_files := \
 		compiler.h \
+		crt.h \
 		ctype.h \
 		errno.h \
 		nolibc.h \
diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
new file mode 100644
index 000000000000..92a2a0478cb1
--- /dev/null
+++ b/tools/include/nolibc/crt.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * C Run Time support for NOLIBC
+ * Copyright (C) 2023 Zhangjin Wu <falcon@tinylab.org>
+ */
+
+#ifndef _NOLIBC_CRT_H
+#define _NOLIBC_CRT_H
+
+char **environ __attribute__((weak));
+const unsigned long *_auxv __attribute__((weak));
+
+static void exit(int);
+
+void _start_c(long *sp)
+{
+	long argc;
+	char **argv;
+	char **envp;
+	const unsigned long *auxv;
+	/* silence potential warning: conflicting types for 'main' */
+	int _nolibc_main(int, char **, char **) __asm__ ("main");
+
+	/*
+	 * sp  :    argc          <-- argument count, required by main()
+	 * argv:    argv[0]       <-- argument vector, required by main()
+	 *          argv[1]
+	 *          ...
+	 *          argv[argc-1]
+	 *          null
+	 * environ: environ[0]    <-- environment variables, required by main() and getenv()
+	 *          environ[1]
+	 *          ...
+	 *          null
+	 * _auxv:   _auxv[0]      <-- auxiliary vector, required by getauxval()
+	 *          _auxv[1]
+	 *          ...
+	 *          null
+	 */
+
+	/* assign argc and argv */
+	argc = *sp;
+	argv = (void *)(sp + 1);
+
+	/* find environ */
+	environ = envp = argv + argc + 1;
+
+	/* find _auxv */
+	for (auxv = (void *)envp; *auxv++;)
+		;
+	_auxv = auxv;
+
+	/* go to application */
+	exit(_nolibc_main(argc, argv, envp));
+}
+
+#endif /* _NOLIBC_CRT_H */

From d7f16723d37c6aeaa4fb93b433db367ad8398b9c Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:22:15 +0800
Subject: [PATCH 1770/2047] tools/nolibc: stackprotector.h: add empty
 __stack_chk_init for !_NOLIBC_STACKPROTECTOR

Let's define an empty __stack_chk_init for the !_NOLIBC_STACKPROTECTOR
branch.

This allows to remove #ifdef around every call of __stack_chk_init().

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/stackprotector.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h
index 88f7b2d098ff..b620f2b9578d 100644
--- a/tools/include/nolibc/stackprotector.h
+++ b/tools/include/nolibc/stackprotector.h
@@ -45,6 +45,8 @@ void __stack_chk_init(void)
 	if (__stack_chk_guard != (uintptr_t) &__stack_chk_guard)
 		__stack_chk_guard ^= (uintptr_t) &__stack_chk_guard;
 }
+#else /* !defined(_NOLIBC_STACKPROTECTOR) */
+__inline__ void __stack_chk_init(void) {}
 #endif /* defined(_NOLIBC_STACKPROTECTOR) */
 
 #endif /* _NOLIBC_STACKPROTECTOR_H */

From 06f2a62c81336574b08827a29a817b4250a310ed Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:23:21 +0800
Subject: [PATCH 1771/2047] tools/nolibc: crt.h: initialize stack protector
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As suggested by Thomas, It is able to move the stackprotector
initialization from the assembly _start to the beginning of the new
_start_c(). Let's call __stack_chk_init() in _start_c() as a
preparation.

Suggested-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/lkml/a00284a6-54b1-498c-92aa-44997fa78403@t-8ch.de/
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/crt.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
index 92a2a0478cb1..32e128b0fb62 100644
--- a/tools/include/nolibc/crt.h
+++ b/tools/include/nolibc/crt.h
@@ -10,6 +10,7 @@
 char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
+void __stack_chk_init(void);
 static void exit(int);
 
 void _start_c(long *sp)
@@ -21,6 +22,9 @@ void _start_c(long *sp)
 	/* silence potential warning: conflicting types for 'main' */
 	int _nolibc_main(int, char **, char **) __asm__ ("main");
 
+	/* initialize stack protector */
+	__stack_chk_init();
+
 	/*
 	 * sp  :    argc          <-- argument count, required by main()
 	 * argv:    argv[0]       <-- argument vector, required by main()

From 61f988072173076a2333510757936bcaa9de7986 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:24:29 +0800
Subject: [PATCH 1772/2047] tools/nolibc: arm: shrink _start with _start_c

move most of the _start operations to _start_c(), include the
stackprotector initialization.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-arm.h | 44 ++++-----------------------------
 1 file changed, 5 insertions(+), 39 deletions(-)

diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index a06dad789a22..cae4afa7c1c7 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -8,6 +8,7 @@
 #define _NOLIBC_ARCH_ARM_H
 
 #include "compiler.h"
+#include "crt.h"
 
 /* Syscalls for ARM in ARM or Thumb modes :
  *   - registers are 32-bit
@@ -183,49 +184,14 @@
 	_arg1;                                                                \
 })
 
-
-char **environ __attribute__((weak));
-const unsigned long *_auxv __attribute__((weak));
-
 /* startup code */
 void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
-#ifdef _NOLIBC_STACKPROTECTOR
-		"bl __stack_chk_init\n"       /* initialize stack protector                          */
-#endif
-		"pop {%r0}\n"                 /* argc was in the stack                               */
-		"mov %r1, %sp\n"              /* argv = sp                                           */
-
-		"add %r2, %r0, $1\n"          /* envp = (argc + 1) ...                               */
-		"lsl %r2, %r2, $2\n"          /*        * 4        ...                               */
-		"add %r2, %r2, %r1\n"         /*        + argv                                       */
-		"ldr %r3, 1f\n"               /* r3 = &environ (see below)                           */
-		"str %r2, [r3]\n"             /* store envp into environ                             */
-
-		"mov r4, r2\n"                /* search for auxv (follows NULL after last env)       */
-		"0:\n"
-		"mov r5, r4\n"                /* r5 = r4                                             */
-		"add r4, r4, #4\n"            /* r4 += 4                                             */
-		"ldr r5,[r5]\n"               /* r5 = *r5 = *(r4-4)                                  */
-		"cmp r5, #0\n"                /* and stop at NULL after last env                     */
-		"bne 0b\n"
-		"ldr %r3, 2f\n"               /* r3 = &_auxv (low bits)                              */
-		"str r4, [r3]\n"              /* store r4 into _auxv                                 */
-
-		"mov %r3, $8\n"               /* AAPCS : sp must be 8-byte aligned in the            */
-		"neg %r3, %r3\n"              /*         callee, and bl doesn't push (lr=pc)         */
-		"and %r3, %r3, %r1\n"         /* so we do sp = r1(=sp) & r3(=-8);                    */
-		"mov %sp, %r3\n"
-
-		"bl main\n"                   /* main() returns the status code, we'll exit with it. */
-		"movs r7, $1\n"               /* NR_exit == 1                                        */
-		"svc $0x00\n"
-		".align 2\n"                  /* below are the pointers to a few variables           */
-		"1:\n"
-		".word environ\n"
-		"2:\n"
-		".word _auxv\n"
+		"mov %r0, sp\n"         /* save stack pointer to %r0, as arg1 of _start_c */
+		"and ip, %r0, #-8\n"    /* sp must be 8-byte aligned in the callee        */
+		"mov sp, ip\n"
+		"bl  _start_c\n"        /* transfer to c runtime                          */
 	);
 	__builtin_unreachable();
 }

From ded8af47c21ccfda97d40d1eafb47238455048ab Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:25:35 +0800
Subject: [PATCH 1773/2047] tools/nolibc: aarch64: shrink _start with _start_c

move most of the _start operations to _start_c(), include the
stackprotector initialization.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-aarch64.h | 27 ++++-----------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
index 3c0a5f47b3e8..6c33c46848e3 100644
--- a/tools/include/nolibc/arch-aarch64.h
+++ b/tools/include/nolibc/arch-aarch64.h
@@ -8,6 +8,7 @@
 #define _NOLIBC_ARCH_AARCH64_H
 
 #include "compiler.h"
+#include "crt.h"
 
 /* Syscalls for AARCH64 :
  *   - registers are 64-bit
@@ -143,33 +144,13 @@
 	_arg1;                                                                \
 })
 
-char **environ __attribute__((weak));
-const unsigned long *_auxv __attribute__((weak));
-
 /* startup code */
 void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
-#ifdef _NOLIBC_STACKPROTECTOR
-		"bl __stack_chk_init\n"   /* initialize stack protector                     */
-#endif
-		"ldr x0, [sp]\n"     /* argc (x0) was in the stack                          */
-		"add x1, sp, 8\n"    /* argv (x1) = sp                                      */
-		"lsl x2, x0, 3\n"    /* envp (x2) = 8*argc ...                              */
-		"add x2, x2, 8\n"    /*           + 8 (skip null)                           */
-		"add x2, x2, x1\n"   /*           + argv                                    */
-		"adrp x3, environ\n"          /* x3 = &environ (high bits)                  */
-		"str x2, [x3, #:lo12:environ]\n" /* store envp into environ                 */
-		"mov x4, x2\n"       /* search for auxv (follows NULL after last env)       */
-		"0:\n"
-		"ldr x5, [x4], 8\n"  /* x5 = *x4; x4 += 8                                   */
-		"cbnz x5, 0b\n"      /* and stop at NULL after last env                     */
-		"adrp x3, _auxv\n"   /* x3 = &_auxv (high bits)                             */
-		"str x4, [x3, #:lo12:_auxv]\n" /* store x4 into _auxv                       */
-		"and sp, x1, -16\n"  /* sp must be 16-byte aligned in the callee            */
-		"bl main\n"          /* main() returns the status code, we'll exit with it. */
-		"mov x8, 93\n"       /* NR_exit == 93                                       */
-		"svc #0\n"
+		"mov x0, sp\n"          /* save stack pointer to x0, as arg1 of _start_c */
+		"and sp, x0, -16\n"     /* sp must be 16-byte aligned in the callee      */
+		"bl  _start_c\n"        /* transfer to c runtime                         */
 	);
 	__builtin_unreachable();
 }

From 2ab446336b17aad362c6decee29b4efd83a01979 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:26:42 +0800
Subject: [PATCH 1774/2047] tools/nolibc: i386: shrink _start with _start_c

move most of the _start operations to _start_c(), include the
stackprotector initialization.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-i386.h | 34 +++++++-------------------------
 1 file changed, 7 insertions(+), 27 deletions(-)

diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
index fe0b73f032c3..64415b9fac77 100644
--- a/tools/include/nolibc/arch-i386.h
+++ b/tools/include/nolibc/arch-i386.h
@@ -8,6 +8,7 @@
 #define _NOLIBC_ARCH_I386_H
 
 #include "compiler.h"
+#include "crt.h"
 
 /* Syscalls for i386 :
  *   - mostly similar to x86_64
@@ -154,9 +155,6 @@
 	_eax;							\
 })
 
-char **environ __attribute__((weak));
-const unsigned long *_auxv __attribute__((weak));
-
 /* startup code */
 /*
  * i386 System V ABI mandates:
@@ -167,30 +165,12 @@ const unsigned long *_auxv __attribute__((weak));
 void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
-#ifdef _NOLIBC_STACKPROTECTOR
-		"call __stack_chk_init\n"   /* initialize stack protector                    */
-#endif
-		"pop %eax\n"                /* argc   (first arg, %eax)                      */
-		"mov %esp, %ebx\n"          /* argv[] (second arg, %ebx)                     */
-		"lea 4(%ebx,%eax,4),%ecx\n" /* then a NULL then envp (third arg, %ecx)       */
-		"mov %ecx, environ\n"       /* save environ                                  */
-		"xor %ebp, %ebp\n"          /* zero the stack frame                          */
-		"mov %ecx, %edx\n"          /* search for auxv (follows NULL after last env) */
-		"0:\n"
-		"add $4, %edx\n"            /* search for auxv using edx, it follows the     */
-		"cmp -4(%edx), %ebp\n"      /* ... NULL after last env (ebp is zero here)    */
-		"jnz 0b\n"
-		"mov %edx, _auxv\n"         /* save it into _auxv                            */
-		"and $-16, %esp\n"          /* x86 ABI : esp must be 16-byte aligned before  */
-		"sub $4, %esp\n"            /* the call instruction (args are aligned)       */
-		"push %ecx\n"               /* push all registers on the stack so that we    */
-		"push %ebx\n"               /* support both regparm and plain stack modes    */
-		"push %eax\n"
-		"call main\n"               /* main() returns the status code in %eax        */
-		"mov %eax, %ebx\n"          /* retrieve exit code (32-bit int)               */
-		"movl $1, %eax\n"           /* NR_exit == 1                                  */
-		"int $0x80\n"               /* exit now                                      */
-		"hlt\n"                     /* ensure it does not                            */
+		"xor  %ebp, %ebp\n"       /* zero the stack frame                                */
+		"mov  %esp, %eax\n"       /* save stack pointer to %eax, as arg1 of _start_c     */
+		"and  $-16, %esp\n"       /* last pushed argument must be 16-byte aligned        */
+		"push %eax\n"             /* push arg1 on stack to support plain stack modes too */
+		"call _start_c\n"         /* transfer to c runtime                               */
+		"hlt\n"                   /* ensure it does not return                           */
 	);
 	__builtin_unreachable();
 }

From 539287d75178fd77c195520477dcd35a4d28a737 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:27:49 +0800
Subject: [PATCH 1775/2047] tools/nolibc: x86_64: shrink _start with _start_c

move most of the _start operations to _start_c(), include the
stackprotector initialization.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-x86_64.h | 29 ++++++-----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
index 6b494ca471ef..e5ccb926c903 100644
--- a/tools/include/nolibc/arch-x86_64.h
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -8,6 +8,7 @@
 #define _NOLIBC_ARCH_X86_64_H
 
 #include "compiler.h"
+#include "crt.h"
 
 /* Syscalls for x86_64 :
  *   - registers are 64-bit
@@ -153,9 +154,6 @@
 	_ret;                                                                 \
 })
 
-char **environ __attribute__((weak));
-const unsigned long *_auxv __attribute__((weak));
-
 /* startup code */
 /*
  * x86-64 System V ABI mandates:
@@ -166,26 +164,11 @@ const unsigned long *_auxv __attribute__((weak));
 void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
-#ifdef _NOLIBC_STACKPROTECTOR
-		"call __stack_chk_init\n"   /* initialize stack protector                          */
-#endif
-		"pop %rdi\n"                /* argc   (first arg, %rdi)                            */
-		"mov %rsp, %rsi\n"          /* argv[] (second arg, %rsi)                           */
-		"lea 8(%rsi,%rdi,8),%rdx\n" /* then a NULL then envp (third arg, %rdx)             */
-		"mov %rdx, environ\n"       /* save environ                                        */
-		"xor %ebp, %ebp\n"          /* zero the stack frame                                */
-		"mov %rdx, %rax\n"          /* search for auxv (follows NULL after last env)       */
-		"0:\n"
-		"add $8, %rax\n"            /* search for auxv using rax, it follows the           */
-		"cmp -8(%rax), %rbp\n"      /* ... NULL after last env (rbp is zero here)          */
-		"jnz 0b\n"
-		"mov %rax, _auxv\n"         /* save it into _auxv                                  */
-		"and $-16, %rsp\n"          /* x86 ABI : esp must be 16-byte aligned before call   */
-		"call main\n"               /* main() returns the status code, we'll exit with it. */
-		"mov %eax, %edi\n"          /* retrieve exit code (32 bit)                         */
-		"mov $60, %eax\n"           /* NR_exit == 60                                       */
-		"syscall\n"                 /* really exit                                         */
-		"hlt\n"                     /* ensure it does not return                           */
+		"xor  %ebp, %ebp\n"       /* zero the stack frame                            */
+		"mov  %rsp, %rdi\n"       /* save stack pointer to %rdi, as arg1 of _start_c */
+		"and  $-16, %rsp\n"       /* %rsp must be 16-byte aligned before call        */
+		"call _start_c\n"         /* transfer to c runtime                           */
+		"hlt\n"                   /* ensure it does not return                       */
 	);
 	__builtin_unreachable();
 }

From 431b806b9bc33cf505204f4047182524120aaf7a Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:28:55 +0800
Subject: [PATCH 1776/2047] tools/nolibc: mips: shrink _start with _start_c

move most of the _start operations to _start_c(), include the
stackprotector initialization.

Also clean up the instructions in delayed slots.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-mips.h | 46 ++++++--------------------------
 1 file changed, 8 insertions(+), 38 deletions(-)

diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 7242fc9de04f..4ab6fa54beee 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -8,6 +8,7 @@
 #define _NOLIBC_ARCH_MIPS_H
 
 #include "compiler.h"
+#include "crt.h"
 
 /* Syscalls for MIPS ABI O32 :
  *   - WARNING! there's always a delayed slot!
@@ -173,50 +174,19 @@
 	_arg4 ? -_num : _num;                                                 \
 })
 
-char **environ __attribute__((weak));
-const unsigned long *_auxv __attribute__((weak));
-
 /* startup code, note that it's called __start on MIPS */
 void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector __start(void)
 {
 	__asm__ volatile (
-		/*".set nomips16\n"*/
 		".set push\n"
-		".set    noreorder\n"
+		".set noreorder\n"
 		".option pic0\n"
-#ifdef _NOLIBC_STACKPROTECTOR
-		"jal __stack_chk_init\n" /* initialize stack protector                         */
-		"nop\n"                  /* delayed slot                                       */
-#endif
-		/*".ent __start\n"*/
-		/*"__start:\n"*/
-		"lw $a0,($sp)\n"        /* argc was in the stack                               */
-		"addiu  $a1, $sp, 4\n"  /* argv = sp + 4                                       */
-		"sll $a2, $a0, 2\n"     /* a2 = argc * 4                                       */
-		"add   $a2, $a2, $a1\n" /* envp = argv + 4*argc ...                            */
-		"addiu $a2, $a2, 4\n"   /*        ... + 4                                      */
-		"lui $a3, %hi(environ)\n"     /* load environ into a3 (hi)                     */
-		"addiu $a3, %lo(environ)\n"   /* load environ into a3 (lo)                     */
-		"sw $a2,($a3)\n"              /* store envp(a2) into environ                   */
-
-		"move $t0, $a2\n"             /* iterate t0 over envp, look for NULL           */
-		"0:"                          /* do {                                          */
-		"lw $a3, ($t0)\n"             /*   a3=*(t0);                                   */
-		"bne $a3, $0, 0b\n"           /* } while (a3);                                 */
-		"addiu $t0, $t0, 4\n"         /* delayed slot: t0+=4;                          */
-		"lui $a3, %hi(_auxv)\n"       /* load _auxv into a3 (hi)                       */
-		"addiu $a3, %lo(_auxv)\n"     /* load _auxv into a3 (lo)                       */
-		"sw $t0, ($a3)\n"             /* store t0 into _auxv                           */
-
-		"li $t0, -8\n"
-		"and $sp, $sp, $t0\n"   /* sp must be 8-byte aligned                           */
-		"addiu $sp,$sp,-16\n"   /* the callee expects to save a0..a3 there!            */
-		"jal main\n"            /* main() returns the status code, we'll exit with it. */
-		"nop\n"                 /* delayed slot                                        */
-		"move $a0, $v0\n"       /* retrieve 32-bit exit code from v0                   */
-		"li $v0, 4001\n"        /* NR_exit == 4001                                     */
-		"syscall\n"
-		/*".end __start\n"*/
+		"move  $a0, $sp\n"       /* save stack pointer to $a0, as arg1 of _start_c */
+		"li    $t0, -8\n"
+		"and   $sp, $sp, $t0\n"  /* $sp must be 8-byte aligned                     */
+		"addiu $sp, $sp, -16\n"  /* the callee expects to save a0..a3 there        */
+		"jal   _start_c\n"       /* transfer to c runtime                          */
+		" nop\n"                 /* delayed slot                                   */
 		".set pop\n"
 	);
 	__builtin_unreachable();

From 61bd4621c056a04221b0c0353aa645a799d5c029 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:30:02 +0800
Subject: [PATCH 1777/2047] tools/nolibc: loongarch: shrink _start with
 _start_c

move most of the _start operations to _start_c(), include the
stackprotector initialization.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-loongarch.h | 44 +++------------------------
 1 file changed, 4 insertions(+), 40 deletions(-)

diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
index 590155a4e543..bf98f6220195 100644
--- a/tools/include/nolibc/arch-loongarch.h
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -8,6 +8,7 @@
 #define _NOLIBC_ARCH_LOONGARCH_H
 
 #include "compiler.h"
+#include "crt.h"
 
 /* Syscalls for LoongArch :
  *   - stack is 16-byte aligned
@@ -143,26 +144,9 @@
 	_arg1;                                                                \
 })
 
-char **environ __attribute__((weak));
-const unsigned long *_auxv __attribute__((weak));
-
 #if __loongarch_grlen == 32
-#define LONGLOG      "2"
-#define SZREG        "4"
-#define REG_L        "ld.w"
-#define LONG_S       "st.w"
-#define LONG_ADD     "add.w"
-#define LONG_ADDI    "addi.w"
-#define LONG_SLL     "slli.w"
 #define LONG_BSTRINS "bstrins.w"
 #else /* __loongarch_grlen == 64 */
-#define LONGLOG      "3"
-#define SZREG        "8"
-#define REG_L        "ld.d"
-#define LONG_S       "st.d"
-#define LONG_ADD     "add.d"
-#define LONG_ADDI    "addi.d"
-#define LONG_SLL     "slli.d"
 #define LONG_BSTRINS "bstrins.d"
 #endif
 
@@ -170,29 +154,9 @@ const unsigned long *_auxv __attribute__((weak));
 void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
-#ifdef _NOLIBC_STACKPROTECTOR
-		"bl __stack_chk_init\n"               /* initialize stack protector                          */
-#endif
-		REG_L        " $a0, $sp, 0\n"         /* argc (a0) was in the stack                          */
-		LONG_ADDI    " $a1, $sp, "SZREG"\n"   /* argv (a1) = sp + SZREG                              */
-		LONG_SLL     " $a2, $a0, "LONGLOG"\n" /* envp (a2) = SZREG*argc ...                          */
-		LONG_ADDI    " $a2, $a2, "SZREG"\n"   /*             + SZREG (skip null)                     */
-		LONG_ADD     " $a2, $a2, $a1\n"       /*             + argv                                  */
-
-		"move          $a3, $a2\n"            /* iterate a3 over envp to find auxv (after NULL)      */
-		"0:\n"                                /* do {                                                */
-		REG_L        " $a4, $a3, 0\n"         /*   a4 = *a3;                                         */
-		LONG_ADDI    " $a3, $a3, "SZREG"\n"   /*   a3 += sizeof(void*);                              */
-		"bne           $a4, $zero, 0b\n"      /* } while (a4);                                       */
-		"la.pcrel      $a4, _auxv\n"          /* a4 = &_auxv                                         */
-		LONG_S       " $a3, $a4, 0\n"         /* store a3 into _auxv                                 */
-
-		"la.pcrel      $a3, environ\n"        /* a3 = &environ                                       */
-		LONG_S       " $a2, $a3, 0\n"         /* store envp(a2) into environ                         */
-		LONG_BSTRINS " $sp, $zero, 3, 0\n"    /* sp must be 16-byte aligned                          */
-		"bl            main\n"                /* main() returns the status code, we'll exit with it. */
-		"li.w          $a7, 93\n"             /* NR_exit == 93                                       */
-		"syscall       0\n"
+		"move          $a0, $sp\n"         /* save stack pointer to $a0, as arg1 of _start_c */
+		LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned                    */
+		"bl            _start_c\n"         /* transfer to c runtime                          */
 	);
 	__builtin_unreachable();
 }

From eea70cdac61087fdb7a1c1e6f306c0bf38ec39d4 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:31:12 +0800
Subject: [PATCH 1778/2047] tools/nolibc: riscv: shrink _start with _start_c

move most of the _start operations to _start_c(), include the
stackprotector initialization.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-riscv.h | 44 ++++---------------------------
 1 file changed, 5 insertions(+), 39 deletions(-)

diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index d49f5ecbf815..950cc2283fd7 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -8,18 +8,7 @@
 #define _NOLIBC_ARCH_RISCV_H
 
 #include "compiler.h"
-
-#if   __riscv_xlen == 64
-#define PTRLOG "3"
-#define SZREG  "8"
-#define REG_L  "ld"
-#define REG_S  "sd"
-#elif __riscv_xlen == 32
-#define PTRLOG "2"
-#define SZREG  "4"
-#define REG_L  "lw"
-#define REG_S  "sw"
-#endif
+#include "crt.h"
 
 /* Syscalls for RISCV :
  *   - stack is 16-byte aligned
@@ -153,40 +142,17 @@
 	_arg1;                                                                \
 })
 
-char **environ __attribute__((weak));
-const unsigned long *_auxv __attribute__((weak));
-
 /* startup code */
 void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
 		".option push\n"
 		".option norelax\n"
-		"lla   gp, __global_pointer$\n"
+		"lla  gp, __global_pointer$\n"
 		".option pop\n"
-#ifdef _NOLIBC_STACKPROTECTOR
-		"call __stack_chk_init\n"    /* initialize stack protector                          */
-#endif
-		REG_L" a0, 0(sp)\n"          /* argc (a0) was in the stack                          */
-		"add   a1, sp, "SZREG"\n"    /* argv (a1) = sp                                      */
-		"slli  a2, a0, "PTRLOG"\n"   /* envp (a2) = SZREG*argc ...                          */
-		"add   a2, a2, "SZREG"\n"    /*             + SZREG (skip null)                     */
-		"add   a2,a2,a1\n"           /*             + argv                                  */
-
-		"add   a3, a2, zero\n"       /* iterate a3 over envp to find auxv (after NULL)      */
-		"0:\n"                       /* do {                                                */
-		REG_L" a4, 0(a3)\n"          /*   a4 = *a3;                                         */
-		"add   a3, a3, "SZREG"\n"    /*   a3 += sizeof(void*);                              */
-		"bne   a4, zero, 0b\n"       /* } while (a4);                                       */
-		"lui   a4, %hi(_auxv)\n"     /* a4 = &_auxv (high bits)                             */
-		REG_S" a3, %lo(_auxv)(a4)\n" /* store a3 into _auxv                                 */
-
-		"lui   a3, %hi(environ)\n"   /* a3 = &environ (high bits)                           */
-		REG_S" a2,%lo(environ)(a3)\n"/* store envp(a2) into environ                         */
-		"andi  sp,a1,-16\n"          /* sp must be 16-byte aligned                          */
-		"call  main\n"               /* main() returns the status code, we'll exit with it. */
-		"li a7, 93\n"                /* NR_exit == 93                                       */
-		"ecall\n"
+		"mv   a0, sp\n"           /* save stack pointer to a0, as arg1 of _start_c */
+		"andi sp, a0, -16\n"      /* sp must be 16-byte aligned                    */
+		"call _start_c\n"         /* transfer to c runtime                         */
 	);
 	__builtin_unreachable();
 }

From c48d8af2faf2b407e80ec1e9bd23872601d0c0b3 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:32:19 +0800
Subject: [PATCH 1779/2047] tools/nolibc: s390: shrink _start with _start_c

move most of the _start operations to _start_c(), include the
stackprotector initialization.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-s390.h | 36 +++++---------------------------
 1 file changed, 5 insertions(+), 31 deletions(-)

diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index 3b94ae0cb1d1..5d60fd43f883 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -9,6 +9,7 @@
 #include <asm/unistd.h>
 
 #include "compiler.h"
+#include "crt.h"
 
 /* Syscalls for s390:
  *   - registers are 64-bit
@@ -137,41 +138,14 @@
 	_arg1;								\
 })
 
-char **environ __attribute__((weak));
-const unsigned long *_auxv __attribute__((weak));
-
 /* startup code */
 void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 	__asm__ volatile (
-		"lg	%r2,0(%r15)\n"		/* argument count */
-		"la	%r3,8(%r15)\n"		/* argument pointers */
-
-		"xgr	%r0,%r0\n"		/* r0 will be our NULL value */
-		/* search for envp */
-		"lgr	%r4,%r3\n"		/* start at argv */
-		"0:\n"
-		"clg	%r0,0(%r4)\n"		/* entry zero? */
-		"la	%r4,8(%r4)\n"		/* advance pointer */
-		"jnz	0b\n"			/* no -> test next pointer */
-						/* yes -> r4 now contains start of envp */
-		"larl	%r1,environ\n"
-		"stg	%r4,0(%r1)\n"
-
-		/* search for auxv */
-		"lgr	%r5,%r4\n"		/* start at envp */
-		"1:\n"
-		"clg	%r0,0(%r5)\n"		/* entry zero? */
-		"la	%r5,8(%r5)\n"		/* advance pointer */
-		"jnz	1b\n"			/* no -> test next pointer */
-		"larl	%r1,_auxv\n"		/* yes -> store value in _auxv */
-		"stg	%r5,0(%r1)\n"
-
-		"aghi	%r15,-160\n"		/* allocate new stackframe */
-		"xc	0(8,%r15),0(%r15)\n"	/* clear backchain */
-		"brasl	%r14,main\n"		/* ret value of main is arg to exit */
-		"lghi	%r1,1\n"		/* __NR_exit */
-		"svc	0\n"
+		"lgr	%r2, %r15\n"          /* save stack pointer to %r2, as arg1 of _start_c */
+		"aghi	%r15, -160\n"         /* allocate new stackframe                        */
+		"xc	0(8,%r15), 0(%r15)\n" /* clear backchain                                */
+		"brasl	%r14, _start_c\n"     /* transfer to c runtime                          */
 	);
 	__builtin_unreachable();
 }

From fd3a9efde8db4b9e7738d1338c23ac43de723d5f Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:33:26 +0800
Subject: [PATCH 1780/2047] selftests/nolibc: add EXPECT_PTRGE, EXPECT_PTRGT,
 EXPECT_PTRLE, EXPECT_PTRLT

4 new pointer compare macros are added, they are similar to the integer
compare macros.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 50 ++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 7cd1aa800b8a..b3a66c3ffaf5 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -410,6 +410,56 @@ static int expect_ptrne(const void *expr, int llen, const void *cmp)
 	return ret;
 }
 
+#define EXPECT_PTRGE(cond, expr, cmp)				\
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrge(expr, llen, cmp); } while (0)
+
+static int expect_ptrge(const void *expr, int llen, const void *cmp)
+{
+	int ret = !(expr >= cmp);
+
+	llen += printf(" = <%p> ", expr);
+	result(llen, ret ? FAIL : OK);
+	return ret;
+}
+
+#define EXPECT_PTRGT(cond, expr, cmp)				\
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrgt(expr, llen, cmp); } while (0)
+
+static int expect_ptrgt(const void *expr, int llen, const void *cmp)
+{
+	int ret = !(expr > cmp);
+
+	llen += printf(" = <%p> ", expr);
+	result(llen, ret ? FAIL : OK);
+	return ret;
+}
+
+
+#define EXPECT_PTRLE(cond, expr, cmp)				\
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrle(expr, llen, cmp); } while (0)
+
+static int expect_ptrle(const void *expr, int llen, const void *cmp)
+{
+	int ret = !(expr <= cmp);
+
+	llen += printf(" = <%p> ", expr);
+	result(llen, ret ? FAIL : OK);
+	return ret;
+}
+
+
+#define EXPECT_PTRLT(cond, expr, cmp)				\
+	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrlt(expr, llen, cmp); } while (0)
+
+static int expect_ptrlt(const void *expr, int llen, const void *cmp)
+{
+	int ret = !(expr < cmp);
+
+	llen += printf(" = <%p> ", expr);
+	result(llen, ret ? FAIL : OK);
+	return ret;
+}
+
 #define EXPECT_PTRER2(cond, expr, expret, experr1, experr2)		\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrerr2(expr, expret, experr1, experr2, llen); } while (0)
 

From 48967b73f8fe7e64e1d292e963dd45daff4ffa60 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:34:32 +0800
Subject: [PATCH 1781/2047] selftests/nolibc: add testcases for startup code

The startup code is critical to get the right argc, argv, envp/environ
and _auxv, let's add a startup test group and the corresponding
testcases.

The "environ" test case is also moved from the stdlib test group to this
new startup test group and it is renamed to "environ_envp".

Since argv0 has been used by many other test cases, let's add testcases
to gurantee it too.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 56 +++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index b3a66c3ffaf5..737205702e3d 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -15,6 +15,7 @@
 #include <string.h>
 #ifndef _NOLIBC_STDIO_H
 /* standard libcs need more includes */
+#include <sys/auxv.h>
 #include <sys/io.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
@@ -47,6 +48,12 @@
 /* will be used to test initialization of environ */
 static char **test_envp;
 
+/* will be used to test initialization of argv */
+static char **test_argv;
+
+/* will be used to test initialization of argc */
+static int test_argc;
+
 /* will be used by some test cases as readable file, please don't write it */
 static const char *argv0;
 
@@ -561,6 +568,51 @@ static int expect_strne(const char *expr, int llen, const char *cmp)
 #define CASE_TEST(name) \
 	case __LINE__: llen += printf("%d %s", test, #name);
 
+int run_startup(int min, int max)
+{
+	int test;
+	int ret = 0;
+	/* kernel at least passes HOME and TERM, shell passes more */
+	int env_total = 2;
+	/* checking NULL for argv/argv0, environ and _auxv is not enough, let's compare with sbrk(0) or &end */
+	extern char end;
+	char *brk = sbrk(0) != (void *)-1 ? sbrk(0) : &end;
+	/* differ from nolibc, both glibc and musl have no global _auxv */
+	const unsigned long *test_auxv = (void *)-1;
+#ifdef NOLIBC
+	test_auxv = _auxv;
+#endif
+
+	for (test = min; test >= 0 && test <= max; test++) {
+		int llen = 0; /* line length */
+
+		/* avoid leaving empty lines below, this will insert holes into
+		 * test numbers.
+		 */
+		switch (test + __LINE__ + 1) {
+		CASE_TEST(argc);             EXPECT_GE(1, test_argc, 1); break;
+		CASE_TEST(argv_addr);        EXPECT_PTRGT(1, test_argv, brk); break;
+		CASE_TEST(argv_environ);     EXPECT_PTRLT(1, test_argv, environ); break;
+		CASE_TEST(argv_total);       EXPECT_EQ(1, environ - test_argv - 1, test_argc ?: 1); break;
+		CASE_TEST(argv0_addr);       EXPECT_PTRGT(1, argv0, brk); break;
+		CASE_TEST(argv0_str);        EXPECT_STRNZ(1, argv0 > brk ? argv0 : NULL); break;
+		CASE_TEST(argv0_len);        EXPECT_GE(1,  argv0 > brk ? strlen(argv0) : 0, 1); break;
+		CASE_TEST(environ_addr);     EXPECT_PTRGT(1, environ, brk); break;
+		CASE_TEST(environ_envp);     EXPECT_PTREQ(1, environ, test_envp); break;
+		CASE_TEST(environ_auxv);     EXPECT_PTRLT(test_auxv != (void *)-1, environ, test_auxv); break;
+		CASE_TEST(environ_total);    EXPECT_GE(test_auxv != (void *)-1, (void *)test_auxv - (void *)environ - 1, env_total); break;
+		CASE_TEST(environ_HOME);     EXPECT_PTRNZ(1, getenv("HOME")); break;
+		CASE_TEST(auxv_addr);        EXPECT_PTRGT(test_auxv != (void *)-1, test_auxv, brk); break;
+		CASE_TEST(auxv_AT_UID);      EXPECT_EQ(1, getauxval(AT_UID), getuid()); break;
+		CASE_TEST(auxv_AT_PAGESZ);   EXPECT_GE(1, getauxval(AT_PAGESZ), 4096); break;
+		case __LINE__:
+			return ret; /* must be last */
+		/* note: do not set any defaults so as to permit holes above */
+		}
+	}
+	return ret;
+}
+
 
 /* used by some syscall tests below */
 int test_getdents64(const char *dir)
@@ -845,7 +897,6 @@ int run_stdlib(int min, int max)
 		 * test numbers.
 		 */
 		switch (test + __LINE__ + 1) {
-		CASE_TEST(environ);            EXPECT_PTREQ(1, environ, test_envp); break;
 		CASE_TEST(getenv_TERM);        EXPECT_STRNZ(1, getenv("TERM")); break;
 		CASE_TEST(getenv_blah);        EXPECT_STRZR(1, getenv("blah")); break;
 		CASE_TEST(setcmp_blah_blah);   EXPECT_EQ(1, strcmp("blah", "blah"), 0); break;
@@ -1129,6 +1180,7 @@ int prepare(void)
 /* This is the definition of known test names, with their functions */
 static const struct test test_names[] = {
 	/* add new tests here */
+	{ .name = "startup",    .func = run_startup    },
 	{ .name = "syscall",    .func = run_syscall    },
 	{ .name = "stdlib",     .func = run_stdlib     },
 	{ .name = "vfprintf",   .func = run_vfprintf   },
@@ -1175,6 +1227,8 @@ int main(int argc, char **argv, char **envp)
 	char *test;
 
 	argv0 = argv[0];
+	test_argc = argc;
+	test_argv = argv;
 	test_envp = envp;
 
 	/* when called as init, it's possible that no console was opened, for

From b81434073b7a113f37f2a2b69c6c28605d4ffb6f Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:35:39 +0800
Subject: [PATCH 1782/2047] selftests/nolibc: allow run nolibc-test locally

It is able to run nolibc-test directly without qemu-user when the target
machine is the same as the host machine.

Sometimes, the result running locally may help a lot when the qemu-user
package is too old.

When the target machine differs from the host machine, it is also able
to run nolibc-test directly with qemu-user-static + binfmt_misc.

Link: https://lore.kernel.org/lkml/ZKutZwIOfy5MqedG@1wt.eu/
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index d31d6cea82e2..b42e67b1a7e2 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -133,10 +133,16 @@ nolibc-test: nolibc-test.c sysroot/$(ARCH)/include
 libc-test: nolibc-test.c
 	$(QUIET_CC)$(CC) -o $@ $<
 
+# local libc-test
 run-libc-test: libc-test
 	$(Q)./libc-test > "$(CURDIR)/run.out" || :
 	$(Q)$(REPORT) $(CURDIR)/run.out
 
+# local nolibc-test
+run-nolibc-test: nolibc-test
+	$(Q)./nolibc-test > "$(CURDIR)/run.out" || :
+	$(Q)$(REPORT) $(CURDIR)/run.out
+
 # qemu user-land test
 run-user: nolibc-test
 	$(Q)qemu-$(QEMU_ARCH) ./nolibc-test > "$(CURDIR)/run.out" || :

From 850fad7de8277d3ad1009c766d0edfdd67e744eb Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 16 Jul 2023 02:36:46 +0800
Subject: [PATCH 1783/2047] selftests/nolibc: allow test -include
 /path/to/nolibc.h

As the head comment of nolibc-test.c shows, it can be built in 3 ways:

    $(CC) -nostdlib -include /path/to/nolibc.h => NOLIBC already defined
    $(CC) -nostdlib -I/path/to/nolibc/sysroot  => _NOLIBC_* guards are present
    $(CC) with default libc                    => NOLIBC* never defined

Only last two of them are tested currently, let's allow test the first one too.

This may help to find issues about using nolibc.h to build programs. it
derives from this change:

    commit 3a8039e289a3 ("tools/nolibc: Fix build of stdio.h due to header ordering")

Usage:

    // test with sysroot by default
    $ make run-user

    // test without sysroot, using nolibc.h directly
    $ make run-user NOLIBC_SYSROOT=0

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index b42e67b1a7e2..f42adef87e12 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -126,9 +126,15 @@ sysroot/$(ARCH)/include:
 	$(Q)$(MAKE) -C ../../../include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone
 	$(Q)mv sysroot/sysroot sysroot/$(ARCH)
 
+ifneq ($(NOLIBC_SYSROOT),0)
 nolibc-test: nolibc-test.c sysroot/$(ARCH)/include
 	$(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
 	  -nostdlib -static -Isysroot/$(ARCH)/include $< -lgcc
+else
+nolibc-test: nolibc-test.c
+	$(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
+	  -nostdlib -static -include ../../../include/nolibc/nolibc.h $< -lgcc
+endif
 
 libc-test: nolibc-test.c
 	$(QUIET_CC)$(CC) -o $@ $<

From 4893c22eb2f4364bc037e9451dfc3bd36a231901 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Wed, 26 Jul 2023 08:06:55 +0100
Subject: [PATCH 1784/2047] tools/nolibc/stdio: add setvbuf() to set buffering
 mode

Add a minimal implementation of setvbuf(), which error checks the mode
argument (as required by spec) and returns. Since nolibc never buffers
output, nothing needs to be done.

The kselftest framework recently added a call to setvbuf(). As a result,
any tests that use the kselftest framework and nolibc cause a compiler
error due to missing function. This provides an urgent fix for the
problem which is preventing arm64 testing on linux-next.

Example:

clang --target=aarch64-linux-gnu -fintegrated-as
-Werror=unknown-warning-option -Werror=ignored-optimization-argument
-Werror=option-ignored -Werror=unused-command-line-argument
--target=aarch64-linux-gnu -fintegrated-as
-fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
-include ../../../../include/nolibc/nolibc.h -I../..\
-static -ffreestanding -Wall za-fork.c
build/kselftest/arm64/fp/za-fork-asm.o
-o build/kselftest/arm64/fp/za-fork
In file included from <built-in>:1:
In file included from ./../../../../include/nolibc/nolibc.h:97:
In file included from ./../../../../include/nolibc/arch.h:25:
./../../../../include/nolibc/arch-aarch64.h:178:35: warning: unknown
attribute 'optimize' ignored [-Wunknown-attributes]
void __attribute__((weak,noreturn,optimize("omit-frame-pointer")))
__no_stack_protector _start(void)
                                  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from za-fork.c:12:
../../kselftest.h:123:2: error: call to undeclared function 'setvbuf';
ISO C99 and later do not support implicit function declarations
[-Wimplicit-function-declaration]
        setvbuf(stdout, NULL, _IOLBF, 0);
        ^
../../kselftest.h:123:24: error: use of undeclared identifier '_IOLBF'
        setvbuf(stdout, NULL, _IOLBF, 0);
                              ^
1 warning and 2 errors generated.

Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
Link: https://lore.kernel.org/linux-kselftest/CA+G9fYus3Z8r2cg3zLv8uH8MRrzLFVWdnor02SNr=rCz+_WGVg@mail.gmail.com/
Reviewed-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/stdio.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index 0eef91daf289..a3778aff4fa9 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -21,6 +21,11 @@
 #define EOF (-1)
 #endif
 
+/* Buffering mode used by setvbuf.  */
+#define _IOFBF 0	/* Fully buffered. */
+#define _IOLBF 1	/* Line buffered. */
+#define _IONBF 2	/* No buffering. */
+
 /* just define FILE as a non-empty type. The value of the pointer gives
  * the FD: FILE=~fd for fd>=0 or NULL for fd<0. This way positive FILE
  * are immediately identified as abnormal entries (i.e. possible copies
@@ -350,6 +355,25 @@ void perror(const char *msg)
 	fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
 }
 
+static __attribute__((unused))
+int setvbuf(FILE *stream, char *buf, int mode, size_t size)
+{
+	/*
+	 * nolibc does not support buffering so this is a nop. Just check mode
+	 * is valid as required by the spec.
+	 */
+	switch (mode) {
+	case _IOFBF:
+	case _IOLBF:
+	case _IONBF:
+		break;
+	default:
+		return EOF;
+	}
+
+	return 0;
+}
+
 /* make sure to include all global symbols */
 #include "nolibc.h"
 

From b0cc7491c98917f191f14efce7630b547f7ec419 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 14 Jul 2023 11:44:18 -0600
Subject: [PATCH 1785/2047] crypto: drivers - Explicitly include correct DT
 includes

The DT of_device.h and of_platform.h date back to the separate
of_platform_bus_type before it as merged into the regular platform bus.
As part of that merge prepping Arm DT support 13 years ago, they
"temporarily" include each other. They also include platform_device.h
and of.h. As a result, there's a pretty much random mix of those include
files used throughout the tree. In order to detangle these headers and
replace the implicit includes with struct declarations, users need to
explicitly include the correct includes.

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c   | 1 -
 drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c   | 1 -
 drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c   | 1 -
 drivers/crypto/amlogic/amlogic-gxl-core.c           | 1 -
 drivers/crypto/aspeed/aspeed-acry.c                 | 3 ---
 drivers/crypto/atmel-aes.c                          | 6 ++----
 drivers/crypto/atmel-ecc.c                          | 2 +-
 drivers/crypto/atmel-sha.c                          | 6 ++----
 drivers/crypto/atmel-tdes.c                         | 6 ++----
 drivers/crypto/bcm/cipher.c                         | 3 +--
 drivers/crypto/caam/ctrl.c                          | 1 +
 drivers/crypto/caam/jr.c                            | 1 +
 drivers/crypto/caam/qi.c                            | 1 +
 drivers/crypto/ccree/cc_driver.c                    | 1 -
 drivers/crypto/exynos-rng.c                         | 2 +-
 drivers/crypto/gemini/sl3516-ce-core.c              | 1 -
 drivers/crypto/img-hash.c                           | 4 ++--
 drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c | 3 ++-
 drivers/crypto/n2_core.c                            | 2 +-
 drivers/crypto/omap-aes.c                           | 1 -
 drivers/crypto/omap-des.c                           | 2 --
 drivers/crypto/omap-sham.c                          | 1 -
 drivers/crypto/rockchip/rk3288_crypto.c             | 1 -
 drivers/crypto/s5p-sss.c                            | 1 -
 drivers/crypto/sa2ul.c                              | 3 ++-
 drivers/crypto/sahara.c                             | 1 -
 drivers/crypto/starfive/jh7110-cryp.c               | 2 +-
 drivers/crypto/starfive/jh7110-hash.c               | 1 -
 drivers/crypto/stm32/stm32-cryp.c                   | 2 +-
 drivers/crypto/stm32/stm32-hash.c                   | 2 +-
 drivers/crypto/talitos.c                            | 4 ++--
 drivers/crypto/xilinx/zynqmp-aes-gcm.c              | 2 +-
 drivers/crypto/xilinx/zynqmp-sha.c                  | 1 -
 33 files changed, 25 insertions(+), 45 deletions(-)

diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c
index 51a3a7b5b985..3bcfcfc37084 100644
--- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c
+++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c
@@ -14,7 +14,6 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <crypto/scatterwalk.h>
 #include <linux/scatterlist.h>
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
index 1c2c32d0568e..d4ccd5254280 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
@@ -24,7 +24,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
index 62c001c9efc2..4a9587285c04 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
@@ -23,7 +23,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
diff --git a/drivers/crypto/amlogic/amlogic-gxl-core.c b/drivers/crypto/amlogic/amlogic-gxl-core.c
index 9846d791c956..da6dfe0f9ac3 100644
--- a/drivers/crypto/amlogic/amlogic-gxl-core.c
+++ b/drivers/crypto/amlogic/amlogic-gxl-core.c
@@ -18,7 +18,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 
 #include "amlogic-gxl.h"
diff --git a/drivers/crypto/aspeed/aspeed-acry.c b/drivers/crypto/aspeed/aspeed-acry.c
index 2a970367e686..247c568aa8df 100644
--- a/drivers/crypto/aspeed/aspeed-acry.c
+++ b/drivers/crypto/aspeed/aspeed-acry.c
@@ -15,9 +15,6 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 9692254faad9..55b5f577b01c 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -28,7 +28,7 @@
 #include <linux/irq.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/delay.h>
 #include <linux/crypto.h>
 #include <crypto/scatterwalk.h>
@@ -2533,13 +2533,11 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
 	}
 }
 
-#if defined(CONFIG_OF)
 static const struct of_device_id atmel_aes_dt_ids[] = {
 	{ .compatible = "atmel,at91sam9g46-aes" },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, atmel_aes_dt_ids);
-#endif
 
 static int atmel_aes_probe(struct platform_device *pdev)
 {
@@ -2678,7 +2676,7 @@ static struct platform_driver atmel_aes_driver = {
 	.remove		= atmel_aes_remove,
 	.driver		= {
 		.name	= "atmel_aes",
-		.of_match_table = of_match_ptr(atmel_aes_dt_ids),
+		.of_match_table = atmel_aes_dt_ids,
 	},
 };
 
diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index 432beabd79e6..590ea984c622 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -14,7 +14,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 99a9ff8e743f..3622120add62 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -28,7 +28,7 @@
 #include <linux/irq.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/delay.h>
 #include <linux/crypto.h>
 #include <crypto/scatterwalk.h>
@@ -2571,14 +2571,12 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd)
 	}
 }
 
-#if defined(CONFIG_OF)
 static const struct of_device_id atmel_sha_dt_ids[] = {
 	{ .compatible = "atmel,at91sam9g46-sha" },
 	{ /* sentinel */ }
 };
 
 MODULE_DEVICE_TABLE(of, atmel_sha_dt_ids);
-#endif
 
 static int atmel_sha_probe(struct platform_device *pdev)
 {
@@ -2708,7 +2706,7 @@ static struct platform_driver atmel_sha_driver = {
 	.remove		= atmel_sha_remove,
 	.driver		= {
 		.name	= "atmel_sha",
-		.of_match_table	= of_match_ptr(atmel_sha_dt_ids),
+		.of_match_table	= atmel_sha_dt_ids,
 	},
 };
 
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index ba8981f326cf..099b32a10dd7 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -28,7 +28,7 @@
 #include <linux/irq.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/delay.h>
 #include <linux/crypto.h>
 #include <crypto/scatterwalk.h>
@@ -1139,13 +1139,11 @@ static void atmel_tdes_get_cap(struct atmel_tdes_dev *dd)
 	}
 }
 
-#if defined(CONFIG_OF)
 static const struct of_device_id atmel_tdes_dt_ids[] = {
 	{ .compatible = "atmel,at91sam9g46-tdes" },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, atmel_tdes_dt_ids);
-#endif
 
 static int atmel_tdes_probe(struct platform_device *pdev)
 {
@@ -1274,7 +1272,7 @@ static struct platform_driver atmel_tdes_driver = {
 	.remove		= atmel_tdes_remove,
 	.driver		= {
 		.name	= "atmel_tdes",
-		.of_match_table = of_match_ptr(atmel_tdes_dt_ids),
+		.of_match_table = atmel_tdes_dt_ids,
 	},
 };
 
diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index 4c46357e2570..689be70d69c1 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -15,8 +15,7 @@
 #include <linux/kthread.h>
 #include <linux/rtnetlink.h>
 #include <linux/sched.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/io.h>
 #include <linux/bitops.h>
 
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 2a228a36fa15..f63c3ed26c37 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -9,6 +9,7 @@
 #include <linux/device.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
 #include <linux/sys_soc.h>
 #include <linux/fsl/mc.h>
 
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 07ec2f27cc68..b1f1b393b98e 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -9,6 +9,7 @@
 
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
+#include <linux/platform_device.h>
 
 #include "compat.h"
 #include "ctrl.h"
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 2ad2c1035856..46a083849a8e 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/netdevice.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <soc/fsl/qman.h>
diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c
index c57f929805d5..0f0694037dd7 100644
--- a/drivers/crypto/ccree/cc_driver.c
+++ b/drivers/crypto/ccree/cc_driver.c
@@ -14,7 +14,6 @@
 #include <linux/of.h>
 #include <linux/clk.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/pm_runtime.h>
 
 #include "cc_driver.h"
diff --git a/drivers/crypto/exynos-rng.c b/drivers/crypto/exynos-rng.c
index b1df66be9adc..5d60a4bcb511 100644
--- a/drivers/crypto/exynos-rng.c
+++ b/drivers/crypto/exynos-rng.c
@@ -15,7 +15,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 
 #include <crypto/internal/rng.h>
diff --git a/drivers/crypto/gemini/sl3516-ce-core.c b/drivers/crypto/gemini/sl3516-ce-core.c
index 0fd47ab9df5c..0f43c6e39bb9 100644
--- a/drivers/crypto/gemini/sl3516-ce-core.c
+++ b/drivers/crypto/gemini/sl3516-ce-core.c
@@ -21,7 +21,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
index 359aa2b41016..45063693859c 100644
--- a/drivers/crypto/img-hash.c
+++ b/drivers/crypto/img-hash.c
@@ -13,7 +13,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 
@@ -1105,7 +1105,7 @@ static struct platform_driver img_hash_driver = {
 	.driver		= {
 		.name	= "img-hash-accelerator",
 		.pm	= &img_hash_pm_ops,
-		.of_match_table	= of_match_ptr(img_hash_match),
+		.of_match_table	= img_hash_match,
 	}
 };
 module_platform_driver(img_hash_driver);
diff --git a/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c b/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c
index 57a20281ead8..daba8ca05dbe 100644
--- a/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c
+++ b/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c
@@ -16,8 +16,9 @@
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/string.h>
 
 #include "ocs-hcu.h"
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 4f6ca229ee5e..d5a32d71a3e9 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/cpumask.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index ea1331218105..ed83023dd77a 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -28,7 +28,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index ae9e9e4eb94c..089dd45eaedd 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -29,8 +29,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/scatterlist.h>
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 4b79d54fd671..a6b4a0b3ace3 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -30,7 +30,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 937826c50470..77d5705a5d96 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -23,7 +23,6 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/reset.h>
 #include <linux/spinlock.h>
 
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index 1c4d5fb05d69..fe8cf9ba8005 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -20,7 +20,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 
diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index df5f9d675c57..6238d34f8db2 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -15,7 +15,8 @@
 #include <linux/dmapool.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 4c799df3e883..62d93526920f 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -27,7 +27,6 @@
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
 
diff --git a/drivers/crypto/starfive/jh7110-cryp.c b/drivers/crypto/starfive/jh7110-cryp.c
index 890ad5259329..08e974e0dd12 100644
--- a/drivers/crypto/starfive/jh7110-cryp.c
+++ b/drivers/crypto/starfive/jh7110-cryp.c
@@ -15,8 +15,8 @@
 #include <linux/interrupt.h>
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
diff --git a/drivers/crypto/starfive/jh7110-hash.c b/drivers/crypto/starfive/jh7110-hash.c
index 739e944229af..cc7650198d70 100644
--- a/drivers/crypto/starfive/jh7110-hash.c
+++ b/drivers/crypto/starfive/jh7110-hash.c
@@ -17,7 +17,6 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
index c67239686b1e..f095f0065428 100644
--- a/drivers/crypto/stm32/stm32-cryp.c
+++ b/drivers/crypto/stm32/stm32-cryp.c
@@ -18,7 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index 53cf922d1a38..2b2382d4332c 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -21,7 +21,7 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index bb27f011cf31..4ca4fbd227bc 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -19,9 +19,9 @@
 #include <linux/interrupt.h>
 #include <linux/crypto.h>
 #include <linux/hw_random.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
 #include <linux/spinlock.h>
diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
index 42fe2b7afef2..ce335578b759 100644
--- a/drivers/crypto/xilinx/zynqmp-aes-gcm.c
+++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
@@ -14,7 +14,7 @@
 #include <linux/firmware/xlnx-zynqmp.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/string.h>
 
diff --git a/drivers/crypto/xilinx/zynqmp-sha.c b/drivers/crypto/xilinx/zynqmp-sha.c
index 43ff170ff1c2..426bf1a72ba6 100644
--- a/drivers/crypto/xilinx/zynqmp-sha.c
+++ b/drivers/crypto/xilinx/zynqmp-sha.c
@@ -15,7 +15,6 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 
 #define ZYNQMP_DMA_BIT_MASK		32U

From 447e56023fc281c588e4977add552f4d49d78b22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Wed, 26 Jul 2023 08:08:13 +0200
Subject: [PATCH 1786/2047] selftests/nolibc: avoid buffer underrun in space
 printing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the test description is longer than the status alignment the
parameter 'n' to putcharn() would lead to a signed underflow that then
gets converted to a very large unsigned value.
This in turn leads out-of-bound writes in memset() crashing the
application.

The failure case of EXPECT_PTRER() used in "mmap_bad" exhibits this
exact behavior.

Fixes: 29f5540be392 ("selftests/nolibc: add EXPECT_PTREQ, EXPECT_PTRNE and EXPECT_PTRER")
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 737205702e3d..3f5a256631ca 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -151,7 +151,8 @@ static void result(int llen, enum RESULT r)
 	else
 		msg = "[FAIL]";
 
-	putcharn(' ', 64 - llen);
+	if (llen < 64)
+		putcharn(' ', 64 - llen);
 	puts(msg);
 }
 

From e7d0129df681aa23bb6528f44186accc38d655f0 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Wed, 26 Jul 2023 00:43:36 +0800
Subject: [PATCH 1787/2047] selftests/nolibc: mmap_munmap_good: fix up return
 value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The other tests use 1 as failure, mmap_munmap_good uses -1 as failure,
let's fix up this.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 3f5a256631ca..4d39823a567e 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -726,7 +726,7 @@ int test_mmap_munmap(void)
 
 	page_size = getpagesize();
 	if (page_size < 0)
-		return -1;
+		return 1;
 
 	/* find a right file to mmap, existed and accessible */
 	for (i = 0; files[i] != NULL; i++) {
@@ -737,7 +737,7 @@ int test_mmap_munmap(void)
 			break;
 	}
 	if (ret == -1)
-		return ret;
+		return 1;
 
 	ret = stat(files[i], &stat_buf);
 	if (ret == -1)
@@ -757,7 +757,7 @@ int test_mmap_munmap(void)
 
 	mem = mmap(NULL, length + offset - pa_offset, PROT_READ, MAP_SHARED, fd, pa_offset);
 	if (mem == MAP_FAILED) {
-		ret = -1;
+		ret = 1;
 		goto end;
 	}
 
@@ -765,7 +765,7 @@ int test_mmap_munmap(void)
 
 end:
 	close(fd);
-	return ret;
+	return !!ret;
 }
 
 
From 3ec38af6eedb1659f4a2b0350f61782952dd50b7 Mon Sep 17 00:00:00 2001
From: Yuan Tan <tanyuan@tinylab.org>
Date: Tue, 1 Aug 2023 23:39:54 +0800
Subject: [PATCH 1788/2047] tools/nolibc: add pipe() and pipe2() support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to manual page [1], posix spec [2] and source code like
arch/mips/kernel/syscall.c, for historic reasons, the sys_pipe() syscall
on some architectures has an unusual calling convention.  It returns
results in two registers which means there is no need for it to do
verify the validity of a userspace pointer argument.  Historically that
used to be expensive in Linux.  These days the performance advantage is
negligible.

Nolibc doesn't support the unusual calling convention above, luckily
Linux provides a generic sys_pipe2() with an additional flags argument
from 2.6.27. If flags is 0, then pipe2() is the same as pipe(). So here
we use sys_pipe2() to implement the pipe().

pipe2() is also provided to allow users to use flags argument on demand.

[1]: https://man7.org/linux/man-pages/man2/pipe.2.html
[2]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/pipe.html

Suggested-by: Zhangjin Wu <falcon@tinylab.org>
Link: https://lore.kernel.org/all/20230729100401.GA4577@1wt.eu/
Signed-off-by: Yuan Tan <tanyuan@tinylab.org>
Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 8bfe7db20b80..56f63eb48a1b 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -752,6 +752,30 @@ int open(const char *path, int flags, ...)
 }
 
 
+/*
+ * int pipe2(int pipefd[2], int flags);
+ * int pipe(int pipefd[2]);
+ */
+
+static __attribute__((unused))
+int sys_pipe2(int pipefd[2], int flags)
+{
+	return my_syscall2(__NR_pipe2, pipefd, flags);
+}
+
+static __attribute__((unused))
+int pipe2(int pipefd[2], int flags)
+{
+	return __sysret(sys_pipe2(pipefd, flags));
+}
+
+static __attribute__((unused))
+int pipe(int pipefd[2])
+{
+	return pipe2(pipefd, 0);
+}
+
+
 /*
  * int prctl(int option, unsigned long arg2, unsigned long arg3,
  *                       unsigned long arg4, unsigned long arg5);

From 5c01259b1256ebc59bf6d64b396e7464f21c2583 Mon Sep 17 00:00:00 2001
From: Yuan Tan <tanyuan@tinylab.org>
Date: Tue, 1 Aug 2023 23:40:23 +0800
Subject: [PATCH 1789/2047] selftests/nolibc: add testcase for pipe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a test case of pipe that sends and receives message in a single
process.

Suggested-by: Thomas Weißschuh <thomas@t-8ch.de>
Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/all/c5de2d13-3752-4e1b-90d9-f58cca99c702@t-8ch.de/
Signed-off-by: Yuan Tan <tanyuan@tinylab.org>
Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
[wt: fixed the "len" type to size_t to address a sign-compare warning
 with upcoming patches]
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 22 ++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 4d39823a567e..7952107a2db8 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -768,6 +768,27 @@ end:
 	return !!ret;
 }
 
+static int test_pipe(void)
+{
+	const char *const msg = "hello, nolibc";
+	int pipefd[2];
+	char buf[32];
+	size_t len;
+
+	if (pipe(pipefd) == -1)
+		return 1;
+
+	write(pipefd[1], msg, strlen(msg));
+	close(pipefd[1]);
+	len = read(pipefd[0], buf, sizeof(buf));
+	close(pipefd[0]);
+
+	if (len != strlen(msg))
+		return 1;
+
+	return !!memcmp(buf, msg, len);
+}
+
 
 /* Run syscall tests between IDs <min> and <max>.
  * Return 0 on success, non-zero on failure.
@@ -853,6 +874,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(mmap_munmap_good);  EXPECT_SYSZR(1, test_mmap_munmap()); break;
 		CASE_TEST(open_tty);          EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break;
 		CASE_TEST(open_blah);         EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break;
+		CASE_TEST(pipe);              EXPECT_SYSZR(1, test_pipe()); break;
 		CASE_TEST(poll_null);         EXPECT_SYSZR(1, poll(NULL, 0, 0)); break;
 		CASE_TEST(poll_stdout);       EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break;
 		CASE_TEST(poll_fault);        EXPECT_SYSER(1, poll((void *)1, 1, 0), -1, EFAULT); break;

From f2f5eaefa17e4c432ffd22577682c21543ee39cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:45 +0200
Subject: [PATCH 1790/2047] tools/nolibc: drop unused variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Nobody needs it, get rid of it.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 56f63eb48a1b..e12dd962c578 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -738,7 +738,6 @@ static __attribute__((unused))
 int open(const char *path, int flags, ...)
 {
 	mode_t mode = 0;
-	int ret;
 
 	if (flags & O_CREAT) {
 		va_list args;

From 640775022572380b6f78247f10c036e69d404947 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:46 +0200
Subject: [PATCH 1791/2047] tools/nolibc: fix return type of getpagesize()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's documented as returning int which is also implemented by glibc and
musl, so adopt that return type.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h                   | 4 ++--
 tools/testing/selftests/nolibc/nolibc-test.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index e12dd962c578..c151533ba8e9 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -460,11 +460,11 @@ pid_t gettid(void)
 static unsigned long getauxval(unsigned long key);
 
 /*
- * long getpagesize(void);
+ * int getpagesize(void);
  */
 
 static __attribute__((unused))
-long getpagesize(void)
+int getpagesize(void)
 {
 	return __sysret(getauxval(AT_PAGESZ) ?: -ENOENT);
 }
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 7952107a2db8..31f2bd789e2d 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -636,7 +636,7 @@ int test_getdents64(const char *dir)
 
 static int test_getpagesize(void)
 {
-	long x = getpagesize();
+	int x = getpagesize();
 	int c;
 
 	if (x < 0)

From 809145f8421b2212dd61c6f7385f79b78b7485d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:47 +0200
Subject: [PATCH 1792/2047] tools/nolibc: setvbuf: avoid unused parameter
 warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This warning will be enabled later so avoid triggering it.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/stdio.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index a3778aff4fa9..cae402c11e57 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -356,7 +356,10 @@ void perror(const char *msg)
 }
 
 static __attribute__((unused))
-int setvbuf(FILE *stream, char *buf, int mode, size_t size)
+int setvbuf(FILE *stream __attribute__((unused)),
+	    char *buf __attribute__((unused)),
+	    int mode,
+	    size_t size __attribute__((unused)))
 {
 	/*
 	 * nolibc does not support buffering so this is a nop. Just check mode

From 04694658ad4a7df13a74160864d87ab858a9da53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:48 +0200
Subject: [PATCH 1793/2047] tools/nolibc: sys: avoid implicit sign cast
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

getauxval() returns an unsigned long but the overall type of the ternary
operator needs to be signed.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index c151533ba8e9..833d6c5e86dc 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -466,7 +466,7 @@ static unsigned long getauxval(unsigned long key);
 static __attribute__((unused))
 int getpagesize(void)
 {
-	return __sysret(getauxval(AT_PAGESZ) ?: -ENOENT);
+	return __sysret((int)getauxval(AT_PAGESZ) ?: -ENOENT);
 }
 
 
From 202a0bd12f877497eb73a42b22cf2562fb255963 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:49 +0200
Subject: [PATCH 1794/2047] tools/nolibc: stdint: use __SIZE_TYPE__ for size_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise both gcc and clang may generate warnings about type
mismatches:

sysroot/mips/include/string.h:12:14: warning: mismatch in argument 1 type of built-in function 'malloc'; expected 'unsigned int' [-Wbuiltin-declaration-mismatch]
   12 | static void *malloc(size_t len);
      |              ^~~~~~

The compiler provides __SIZE_TYPE__ as the type that corresponds to size_t
(typically "long unsigned int" or "unsigned int"). It was verified to be
available at least since gcc-3.4 and clang-3.8, so from now on we'll use
this definition for size_t.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/lkml/20230805161929.GA15284@1wt.eu/
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/stdint.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h
index 4b282435a59a..6665e272e213 100644
--- a/tools/include/nolibc/stdint.h
+++ b/tools/include/nolibc/stdint.h
@@ -15,7 +15,7 @@ typedef unsigned int       uint32_t;
 typedef   signed int        int32_t;
 typedef unsigned long long uint64_t;
 typedef   signed long long  int64_t;
-typedef unsigned long        size_t;
+typedef __SIZE_TYPE__        size_t;
 typedef   signed long       ssize_t;
 typedef unsigned long     uintptr_t;
 typedef   signed long      intptr_t;

From ca283457b3c675e5ab6762a750b1b9c495864993 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Sun, 6 Aug 2023 12:58:52 +0200
Subject: [PATCH 1795/2047] selftests/nolibc: avoid warnings during intptr
 tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Recent fix ceb528feb7c8 ("selftests/nolibc: avoid gaps in test numbers")
had the annoying side effect of always returning skipped tests, which
are normally supposed to happen only when certain features are missing
to run the test (missing kernel options, toolchain not supporting
stack-protector etc). As such there are now always warnings. Let's
modify the test to not use the condition and instead use a ternary
expression to check the result.

Fixes: ceb528feb7c8 ("selftests/nolibc: avoid gaps in test numbers")
Cc: Thomas WeiÃ<9F>schuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 31f2bd789e2d..c6b8d05f7f46 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -976,18 +976,13 @@ int run_stdlib(int min, int max)
 		CASE_TEST(limit_int_fast64_max);    EXPECT_EQ(1, INT_FAST64_MAX,   (int_fast64_t)    INT64_MAX); break;
 		CASE_TEST(limit_uint_fast64_max);   EXPECT_EQ(1, UINT_FAST64_MAX,  (uint_fast64_t)   UINT64_MAX); break;
 		CASE_TEST(sizeof_long_sane);        EXPECT_EQ(1, sizeof(long) == 8 || sizeof(long) == 4, 1); break;
-		CASE_TEST(limit_intptr_min_64);     EXPECT_EQ(sizeof(long) == 8, INTPTR_MIN,  (intptr_t)  0x8000000000000000LL); break;
-		CASE_TEST(limit_intptr_max_64);     EXPECT_EQ(sizeof(long) == 8, INTPTR_MAX,  (intptr_t)  0x7fffffffffffffffLL); break;
-		CASE_TEST(limit_uintptr_max_64);    EXPECT_EQ(sizeof(long) == 8, UINTPTR_MAX, (uintptr_t) 0xffffffffffffffffULL); break;
-		CASE_TEST(limit_ptrdiff_min_64);    EXPECT_EQ(sizeof(long) == 8, PTRDIFF_MIN, (ptrdiff_t) 0x8000000000000000LL); break;
-		CASE_TEST(limit_ptrdiff_max_64);    EXPECT_EQ(sizeof(long) == 8, PTRDIFF_MAX, (ptrdiff_t) 0x7fffffffffffffffLL); break;
-		CASE_TEST(limit_size_max_64);       EXPECT_EQ(sizeof(long) == 8, SIZE_MAX,    (size_t)    0xffffffffffffffffULL); break;
-		CASE_TEST(limit_intptr_min_32);     EXPECT_EQ(sizeof(long) == 4, INTPTR_MIN,  (intptr_t)  0x80000000); break;
-		CASE_TEST(limit_intptr_max_32);     EXPECT_EQ(sizeof(long) == 4, INTPTR_MAX,  (intptr_t)  0x7fffffff); break;
-		CASE_TEST(limit_uintptr_max_32);    EXPECT_EQ(sizeof(long) == 4, UINTPTR_MAX, (uintptr_t) 0xffffffffU); break;
-		CASE_TEST(limit_ptrdiff_min_32);    EXPECT_EQ(sizeof(long) == 4, PTRDIFF_MIN, (ptrdiff_t) 0x80000000); break;
-		CASE_TEST(limit_ptrdiff_max_32);    EXPECT_EQ(sizeof(long) == 4, PTRDIFF_MAX, (ptrdiff_t) 0x7fffffff); break;
-		CASE_TEST(limit_size_max_32);       EXPECT_EQ(sizeof(long) == 4, SIZE_MAX,    (size_t)    0xffffffffU); break;
+		CASE_TEST(limit_intptr_min);        EXPECT_EQ(1, INTPTR_MIN,  sizeof(long) == 8 ? (intptr_t)  0x8000000000000000LL  : (intptr_t)  0x80000000); break;
+		CASE_TEST(limit_intptr_max);        EXPECT_EQ(1, INTPTR_MAX,  sizeof(long) == 8 ? (intptr_t)  0x7fffffffffffffffLL  : (intptr_t)  0x7fffffff); break;
+		CASE_TEST(limit_uintptr_max);       EXPECT_EQ(1, UINTPTR_MAX, sizeof(long) == 8 ? (uintptr_t) 0xffffffffffffffffULL : (uintptr_t) 0xffffffffU); break;
+		CASE_TEST(limit_ptrdiff_min);       EXPECT_EQ(1, PTRDIFF_MIN, sizeof(long) == 8 ? (ptrdiff_t) 0x8000000000000000LL  : (ptrdiff_t) 0x80000000); break;
+		CASE_TEST(limit_ptrdiff_max);       EXPECT_EQ(1, PTRDIFF_MAX, sizeof(long) == 8 ? (ptrdiff_t) 0x7fffffffffffffffLL  : (ptrdiff_t) 0x7fffffff); break;
+		CASE_TEST(limit_size_max);          EXPECT_EQ(1, SIZE_MAX,    sizeof(long) == 8 ? (size_t)    0xffffffffffffffffULL : (size_t)    0xffffffffU); break;
+
 		case __LINE__:
 			return ret; /* must be last */
 		/* note: do not set any defaults so as to permit holes above */

From 79df81aaea1158ad57878b4e5f341dc2618bdaac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:50 +0200
Subject: [PATCH 1796/2047] selftests/nolibc: drop unused variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These got copied around as new testcases where created.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index c6b8d05f7f46..fce04fde0a2d 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -909,9 +909,7 @@ int run_syscall(int min, int max)
 int run_stdlib(int min, int max)
 {
 	int test;
-	int tmp;
 	int ret = 0;
-	void *p1, *p2;
 
 	for (test = min; test >= 0 && test <= max; test++) {
 		int llen = 0; /* line length */
@@ -1047,9 +1045,7 @@ static int expect_vfprintf(int llen, size_t c, const char *expected, const char
 static int run_vfprintf(int min, int max)
 {
 	int test;
-	int tmp;
 	int ret = 0;
-	void *p1, *p2;
 
 	for (test = min; test >= 0 && test <= max; test++) {
 		int llen = 0; /* line length */

From 10874f20ee8752b04cac22f2a711394c7060b913 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:51 +0200
Subject: [PATCH 1797/2047] selftests/nolibc: mark test helpers as potentially
 unused
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When warning about unused functions these would be reported by we want
to keep them for future use.

Suggested-by: Zhangjin Wu <falcon@tinylab.org>
Link: https://lore.kernel.org/lkml/20230731064826.16584-1-falcon@tinylab.org/
Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/20230731224929.GA18296@1wt.eu/
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 75 +++++++++++++-------
 1 file changed, 50 insertions(+), 25 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index fce04fde0a2d..b7d1a996626b 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -165,7 +165,8 @@ static void result(int llen, enum RESULT r)
 #define EXPECT_ZR(cond, expr)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_zr(expr, llen); } while (0)
 
-static int expect_zr(int expr, int llen)
+static __attribute__((unused))
+int expect_zr(int expr, int llen)
 {
 	int ret = !(expr == 0);
 
@@ -178,7 +179,8 @@ static int expect_zr(int expr, int llen)
 #define EXPECT_NZ(cond, expr, val)			\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_nz(expr, llen; } while (0)
 
-static int expect_nz(int expr, int llen)
+static __attribute__((unused))
+int expect_nz(int expr, int llen)
 {
 	int ret = !(expr != 0);
 
@@ -191,7 +193,8 @@ static int expect_nz(int expr, int llen)
 #define EXPECT_EQ(cond, expr, val)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_eq(expr, llen, val); } while (0)
 
-static int expect_eq(uint64_t expr, int llen, uint64_t val)
+static __attribute__((unused))
+int expect_eq(uint64_t expr, int llen, uint64_t val)
 {
 	int ret = !(expr == val);
 
@@ -204,7 +207,8 @@ static int expect_eq(uint64_t expr, int llen, uint64_t val)
 #define EXPECT_NE(cond, expr, val)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ne(expr, llen, val); } while (0)
 
-static int expect_ne(int expr, int llen, int val)
+static __attribute__((unused))
+int expect_ne(int expr, int llen, int val)
 {
 	int ret = !(expr != val);
 
@@ -217,7 +221,8 @@ static int expect_ne(int expr, int llen, int val)
 #define EXPECT_GE(cond, expr, val)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ge(expr, llen, val); } while (0)
 
-static int expect_ge(int expr, int llen, int val)
+static __attribute__((unused))
+int expect_ge(int expr, int llen, int val)
 {
 	int ret = !(expr >= val);
 
@@ -230,7 +235,8 @@ static int expect_ge(int expr, int llen, int val)
 #define EXPECT_GT(cond, expr, val)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_gt(expr, llen, val); } while (0)
 
-static int expect_gt(int expr, int llen, int val)
+static __attribute__((unused))
+int expect_gt(int expr, int llen, int val)
 {
 	int ret = !(expr > val);
 
@@ -243,7 +249,8 @@ static int expect_gt(int expr, int llen, int val)
 #define EXPECT_LE(cond, expr, val)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_le(expr, llen, val); } while (0)
 
-static int expect_le(int expr, int llen, int val)
+static __attribute__((unused))
+int expect_le(int expr, int llen, int val)
 {
 	int ret = !(expr <= val);
 
@@ -256,7 +263,8 @@ static int expect_le(int expr, int llen, int val)
 #define EXPECT_LT(cond, expr, val)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_lt(expr, llen, val); } while (0)
 
-static int expect_lt(int expr, int llen, int val)
+static __attribute__((unused))
+int expect_lt(int expr, int llen, int val)
 {
 	int ret = !(expr < val);
 
@@ -269,7 +277,8 @@ static int expect_lt(int expr, int llen, int val)
 #define EXPECT_SYSZR(cond, expr)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_syszr(expr, llen); } while (0)
 
-static int expect_syszr(int expr, int llen)
+static __attribute__((unused))
+int expect_syszr(int expr, int llen)
 {
 	int ret = 0;
 
@@ -288,7 +297,8 @@ static int expect_syszr(int expr, int llen)
 #define EXPECT_SYSEQ(cond, expr, val)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_syseq(expr, llen, val); } while (0)
 
-static int expect_syseq(int expr, int llen, int val)
+static __attribute__((unused))
+int expect_syseq(int expr, int llen, int val)
 {
 	int ret = 0;
 
@@ -307,7 +317,8 @@ static int expect_syseq(int expr, int llen, int val)
 #define EXPECT_SYSNE(cond, expr, val)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_sysne(expr, llen, val); } while (0)
 
-static int expect_sysne(int expr, int llen, int val)
+static __attribute__((unused))
+int expect_sysne(int expr, int llen, int val)
 {
 	int ret = 0;
 
@@ -329,7 +340,8 @@ static int expect_sysne(int expr, int llen, int val)
 #define EXPECT_SYSER(cond, expr, expret, experr)			\
 	EXPECT_SYSER2(cond, expr, expret, experr, 0)
 
-static int expect_syserr2(int expr, int expret, int experr1, int experr2, int llen)
+static __attribute__((unused))
+int expect_syserr2(int expr, int expret, int experr1, int experr2, int llen)
 {
 	int ret = 0;
 	int _errno = errno;
@@ -352,7 +364,8 @@ static int expect_syserr2(int expr, int expret, int experr1, int experr2, int ll
 #define EXPECT_PTRZR(cond, expr)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrzr(expr, llen); } while (0)
 
-static int expect_ptrzr(const void *expr, int llen)
+static __attribute__((unused))
+int expect_ptrzr(const void *expr, int llen)
 {
 	int ret = 0;
 
@@ -370,7 +383,8 @@ static int expect_ptrzr(const void *expr, int llen)
 #define EXPECT_PTRNZ(cond, expr)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrnz(expr, llen); } while (0)
 
-static int expect_ptrnz(const void *expr, int llen)
+static __attribute__((unused))
+int expect_ptrnz(const void *expr, int llen)
 {
 	int ret = 0;
 
@@ -387,7 +401,8 @@ static int expect_ptrnz(const void *expr, int llen)
 #define EXPECT_PTREQ(cond, expr, cmp)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptreq(expr, llen, cmp); } while (0)
 
-static int expect_ptreq(const void *expr, int llen, const void *cmp)
+static __attribute__((unused))
+int expect_ptreq(const void *expr, int llen, const void *cmp)
 {
 	int ret = 0;
 
@@ -404,7 +419,8 @@ static int expect_ptreq(const void *expr, int llen, const void *cmp)
 #define EXPECT_PTRNE(cond, expr, cmp)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrne(expr, llen, cmp); } while (0)
 
-static int expect_ptrne(const void *expr, int llen, const void *cmp)
+static __attribute__((unused))
+int expect_ptrne(const void *expr, int llen, const void *cmp)
 {
 	int ret = 0;
 
@@ -421,7 +437,8 @@ static int expect_ptrne(const void *expr, int llen, const void *cmp)
 #define EXPECT_PTRGE(cond, expr, cmp)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrge(expr, llen, cmp); } while (0)
 
-static int expect_ptrge(const void *expr, int llen, const void *cmp)
+static __attribute__((unused))
+int expect_ptrge(const void *expr, int llen, const void *cmp)
 {
 	int ret = !(expr >= cmp);
 
@@ -433,7 +450,8 @@ static int expect_ptrge(const void *expr, int llen, const void *cmp)
 #define EXPECT_PTRGT(cond, expr, cmp)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrgt(expr, llen, cmp); } while (0)
 
-static int expect_ptrgt(const void *expr, int llen, const void *cmp)
+static __attribute__((unused))
+int expect_ptrgt(const void *expr, int llen, const void *cmp)
 {
 	int ret = !(expr > cmp);
 
@@ -446,7 +464,8 @@ static int expect_ptrgt(const void *expr, int llen, const void *cmp)
 #define EXPECT_PTRLE(cond, expr, cmp)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrle(expr, llen, cmp); } while (0)
 
-static int expect_ptrle(const void *expr, int llen, const void *cmp)
+static __attribute__((unused))
+int expect_ptrle(const void *expr, int llen, const void *cmp)
 {
 	int ret = !(expr <= cmp);
 
@@ -459,7 +478,8 @@ static int expect_ptrle(const void *expr, int llen, const void *cmp)
 #define EXPECT_PTRLT(cond, expr, cmp)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrlt(expr, llen, cmp); } while (0)
 
-static int expect_ptrlt(const void *expr, int llen, const void *cmp)
+static __attribute__((unused))
+int expect_ptrlt(const void *expr, int llen, const void *cmp)
 {
 	int ret = !(expr < cmp);
 
@@ -474,7 +494,8 @@ static int expect_ptrlt(const void *expr, int llen, const void *cmp)
 #define EXPECT_PTRER(cond, expr, expret, experr)			\
 	EXPECT_PTRER2(cond, expr, expret, experr, 0)
 
-static int expect_ptrerr2(const void *expr, const void *expret, int experr1, int experr2, int llen)
+static __attribute__((unused))
+int expect_ptrerr2(const void *expr, const void *expret, int experr1, int experr2, int llen)
 {
 	int ret = 0;
 	int _errno = errno;
@@ -496,7 +517,8 @@ static int expect_ptrerr2(const void *expr, const void *expret, int experr1, int
 #define EXPECT_STRZR(cond, expr)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strzr(expr, llen); } while (0)
 
-static int expect_strzr(const char *expr, int llen)
+static __attribute__((unused))
+int expect_strzr(const char *expr, int llen)
 {
 	int ret = 0;
 
@@ -514,7 +536,8 @@ static int expect_strzr(const char *expr, int llen)
 #define EXPECT_STRNZ(cond, expr)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strnz(expr, llen); } while (0)
 
-static int expect_strnz(const char *expr, int llen)
+static __attribute__((unused))
+int expect_strnz(const char *expr, int llen)
 {
 	int ret = 0;
 
@@ -532,7 +555,8 @@ static int expect_strnz(const char *expr, int llen)
 #define EXPECT_STREQ(cond, expr, cmp)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_streq(expr, llen, cmp); } while (0)
 
-static int expect_streq(const char *expr, int llen, const char *cmp)
+static __attribute__((unused))
+int expect_streq(const char *expr, int llen, const char *cmp)
 {
 	int ret = 0;
 
@@ -550,7 +574,8 @@ static int expect_streq(const char *expr, int llen, const char *cmp)
 #define EXPECT_STRNE(cond, expr, cmp)				\
 	do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strne(expr, llen, cmp); } while (0)
 
-static int expect_strne(const char *expr, int llen, const char *cmp)
+static __attribute__((unused))
+int expect_strne(const char *expr, int llen, const char *cmp)
 {
 	int ret = 0;
 

From 17e66f235e4ab43423741dcb946a4d140d8ba34b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:52 +0200
Subject: [PATCH 1798/2047] selftests/nolibc: make functions static if possible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows the compiler to generate warnings if they go unused.

Functions that are supposed to be used as breakpoints should not be
static, so un-statify those if necessary.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index b7d1a996626b..da9926e6ec82 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -80,7 +80,7 @@ char *itoa(int i)
 /* returns the error name (e.g. "ENOENT") for common errors, "SUCCESS" for 0,
  * or the decimal value for less common ones.
  */
-const char *errorname(int err)
+static const char *errorname(int err)
 {
 	switch (err) {
 	case 0: return "SUCCESS";
@@ -659,7 +659,7 @@ int test_getdents64(const char *dir)
 	return ret;
 }
 
-static int test_getpagesize(void)
+int test_getpagesize(void)
 {
 	int x = getpagesize();
 	int c;
@@ -688,7 +688,7 @@ static int test_getpagesize(void)
 	return !c;
 }
 
-static int test_fork(void)
+int test_fork(void)
 {
 	int status;
 	pid_t pid;
@@ -713,7 +713,7 @@ static int test_fork(void)
 	}
 }
 
-static int test_stat_timestamps(void)
+int test_stat_timestamps(void)
 {
 	struct stat st;
 
@@ -793,7 +793,7 @@ end:
 	return !!ret;
 }
 
-static int test_pipe(void)
+int test_pipe(void)
 {
 	const char *const msg = "hello, nolibc";
 	int pipefd[2];
@@ -1227,7 +1227,7 @@ static const struct test test_names[] = {
 	{ 0 }
 };
 
-int is_setting_valid(char *test)
+static int is_setting_valid(char *test)
 {
 	int idx, len, test_len, valid = 0;
 	char delimiter;

From c8d078153fd8407073b714c6d4d6c9b95d5aa4e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:53 +0200
Subject: [PATCH 1799/2047] selftests/nolibc: avoid unused parameter warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This warning will be enabled later so avoid triggering it.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index da9926e6ec82..9372abcac4c3 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -1108,7 +1108,8 @@ static int smash_stack(void)
 	return 1;
 }
 
-static int run_protection(int min, int max)
+static int run_protection(int min __attribute__((unused)),
+			  int max __attribute__((unused)))
 {
 	pid_t pid;
 	int llen = 0, status;

From 711f91fdec714d6307f1fa044fb74d0a742b1494 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:54 +0200
Subject: [PATCH 1800/2047] selftests/nolibc: avoid sign-compare warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These warnings will be enabled later so avoid triggering them.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 9372abcac4c3..c2646707ccc2 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -737,9 +737,9 @@ int test_stat_timestamps(void)
 
 int test_mmap_munmap(void)
 {
-	int ret, fd, i;
+	int ret, fd, i, page_size;
 	void *mem;
-	size_t page_size, file_size, length;
+	size_t file_size, length;
 	off_t offset, pa_offset;
 	struct stat stat_buf;
 	const char * const files[] = {
@@ -1017,7 +1017,7 @@ int run_stdlib(int min, int max)
 #define EXPECT_VFPRINTF(c, expected, fmt, ...)				\
 	ret += expect_vfprintf(llen, c, expected, fmt, ##__VA_ARGS__)
 
-static int expect_vfprintf(int llen, size_t c, const char *expected, const char *fmt, ...)
+static int expect_vfprintf(int llen, int c, const char *expected, const char *fmt, ...)
 {
 	int ret, fd, w, r;
 	char buf[100];
@@ -1041,7 +1041,7 @@ static int expect_vfprintf(int llen, size_t c, const char *expected, const char
 	va_end(args);
 
 	if (w != c) {
-		llen += printf(" written(%d) != %d", w, (int) c);
+		llen += printf(" written(%d) != %d", w, c);
 		result(llen, FAIL);
 		return 1;
 	}

From 37266a9ec7f5a3144c2070baeea2628c30ef07e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:55 +0200
Subject: [PATCH 1801/2047] selftests/nolibc: use correct return type for
 read() and write()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid truncating values before comparing them.

As printf in nolibc doesn't support ssize_t add casts to int for
printing.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index c2646707ccc2..23a5e4c57083 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -1019,7 +1019,8 @@ int run_stdlib(int min, int max)
 
 static int expect_vfprintf(int llen, int c, const char *expected, const char *fmt, ...)
 {
-	int ret, fd, w, r;
+	int ret, fd;
+	ssize_t w, r;
 	char buf[100];
 	FILE *memfile;
 	va_list args;
@@ -1041,7 +1042,7 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm
 	va_end(args);
 
 	if (w != c) {
-		llen += printf(" written(%d) != %d", w, c);
+		llen += printf(" written(%d) != %d", (int)w, c);
 		result(llen, FAIL);
 		return 1;
 	}
@@ -1055,7 +1056,7 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm
 	fclose(memfile);
 
 	if (r != w) {
-		llen += printf(" written(%d) != read(%d)", w, r);
+		llen += printf(" written(%d) != read(%d)", (int)w, (int)r);
 		result(llen, FAIL);
 		return 1;
 	}

From 9c5e490093e83e165022e0311bd7df5aa06cc860 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:56 +0200
Subject: [PATCH 1802/2047] selftests/nolibc: prevent out of bounds access in
 expect_vfprintf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If read() fails and returns -1 (or returns garbage for some other
reason) buf would be accessed out of bounds.
Only use the return value of read() after it has been validated.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/nolibc-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 23a5e4c57083..e2b70641a1e7 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -1051,7 +1051,6 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm
 	lseek(fd, 0, SEEK_SET);
 
 	r = read(fd, buf, sizeof(buf) - 1);
-	buf[r] = '\0';
 
 	fclose(memfile);
 
@@ -1061,6 +1060,7 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm
 		return 1;
 	}
 
+	buf[r] = '\0';
 	llen += printf(" \"%s\" = \"%s\"", expected, buf);
 	ret = strncmp(expected, buf, c);
 

From 711edef8f7cfa57f07cd336d77970a2b182bf9fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:57 +0200
Subject: [PATCH 1803/2047] selftests/nolibc: don't strip nolibc-test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Binary size is not important for nolibc-test and some debugging
information is nice to have, so don't strip the binary during linking.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index f42adef87e12..b82d29b6c37f 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -82,7 +82,7 @@ CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call
 CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 \
 		$(call cc-option,-fno-stack-protector) \
 		$(CFLAGS_$(ARCH)) $(CFLAGS_STACKPROTECTOR)
-LDFLAGS := -s
+LDFLAGS :=
 
 REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \
 		END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \

From 45f65f8d04db6fa328ab8e0191a8b9462d4bad95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Aug 2023 09:28:58 +0200
Subject: [PATCH 1804/2047] selftests/nolibc: enable compiler warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It will help the developers to avoid cruft and detect some bugs.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index b82d29b6c37f..e8d09cbee2ab 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -79,7 +79,7 @@ endif
 CFLAGS_s390 = -m64
 CFLAGS_mips = -EL
 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
-CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 \
+CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
 		$(call cc-option,-fno-stack-protector) \
 		$(CFLAGS_$(ARCH)) $(CFLAGS_STACKPROTECTOR)
 LDFLAGS :=

From 024a6c29f0cda622702dcf3a2597607e23431d3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Fri, 28 Jul 2023 23:21:52 +0200
Subject: [PATCH 1805/2047] MAINTAINERS: nolibc: add myself as co-maintainer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As discussed with Willy, Paul and Shuah add myself as maintainer for
the nolibc subsystem.

Link: https://lore.kernel.org/lkml/7afafb6c-9664-44a1-bc8f-d20239db1dd5@paulmck-laptop/
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Acked-by: Willy Tarreau <w@1wt.eu>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3be1bdfe8ecc..2da165b62d21 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14979,6 +14979,7 @@ F:	include/linux/power/bq27xxx_battery.h
 
 NOLIBC HEADER FILE
 M:	Willy Tarreau <w@1wt.eu>
+M:	Thomas Weißschuh <linux@weissschuh.net>
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/wtarreau/nolibc.git
 F:	tools/include/nolibc/

From 0cb0675ec37e1640e0304dc9ff7e4ba0b9ea8965 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 6 Aug 2023 02:39:26 +0800
Subject: [PATCH 1806/2047] tools/nolibc: add support for powerpc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both syscall declarations and _start code definition are added for
powerpc to nolibc.

Like mips, powerpc uses a register (exactly, the summary overflow bit)
to record the error occurred, and uses another register to return the
value [1]. So, the return value of every syscall declaration must be
normalized to match the __sysret() helper, return -value when there is
an error, otheriwse, return value directly.

Glibc and musl use different methods to check the summary overflow bit,
glibc (sysdeps/unix/sysv/linux/powerpc/sysdep.h) saves the cr register
to r0 at first, and then check the summary overflow bit in cr0:

    mfcr r0
    r0 & (1 << 28) ? -r3 : r3

    -->

    10003c14:       7c 00 00 26     mfcr    r0
    10003c18:       74 09 10 00     andis.  r9,r0,4096
    10003c1c:       41 82 00 08     beq     0x10003c24
    10003c20:       7c 63 00 d0     neg     r3,r3

Musl (arch/powerpc/syscall_arch.h) directly checks the summary overflow
bit with the 'bns' instruction, it is smaller:

    /* no summary overflow bit means no error, return value directly */
    bns+ 1f
    /* otherwise, return negated value */
    neg r3, r3
    1:

    -->

    10000418:       40 a3 00 08     bns     0x10000420
    1000041c:       7c 63 00 d0     neg     r3,r3

Like musl, Linux (arch/powerpc/include/asm/vdso/gettimeofday.h) uses the
same method for do_syscall_2() too.

Here applies the second method to get smaller size.

[1]: https://man7.org/linux/man-pages/man2/syscall.2.html

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-powerpc.h | 197 ++++++++++++++++++++++++++++
 tools/include/nolibc/arch.h         |   2 +
 2 files changed, 199 insertions(+)
 create mode 100644 tools/include/nolibc/arch-powerpc.h

diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h
new file mode 100644
index 000000000000..8332c9d3e5d6
--- /dev/null
+++ b/tools/include/nolibc/arch-powerpc.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * PowerPC specific definitions for NOLIBC
+ * Copyright (C) 2023 Zhangjin Wu <falcon@tinylab.org>
+ */
+
+#ifndef _NOLIBC_ARCH_POWERPC_H
+#define _NOLIBC_ARCH_POWERPC_H
+
+#include "compiler.h"
+#include "crt.h"
+
+/* Syscalls for PowerPC :
+ *   - stack is 16-byte aligned
+ *   - syscall number is passed in r0
+ *   - arguments are in r3, r4, r5, r6, r7, r8, r9
+ *   - the system call is performed by calling "sc"
+ *   - syscall return comes in r3, and the summary overflow bit is checked
+ *     to know if an error occurred, in which case errno is in r3.
+ *   - the arguments are cast to long and assigned into the target
+ *     registers which are then simply passed as registers to the asm code,
+ *     so that we don't have to experience issues with register constraints.
+ */
+
+#define _NOLIBC_SYSCALL_CLOBBERLIST \
+	"memory", "cr0", "r12", "r11", "r10", "r9"
+
+#define my_syscall0(num)                                                     \
+({                                                                           \
+	register long _ret  __asm__ ("r3");                                  \
+	register long _num  __asm__ ("r0") = (num);                          \
+									     \
+	__asm__ volatile (                                                   \
+		"	sc\n"                                                \
+		"	bns+ 1f\n"                                           \
+		"	neg  %0, %0\n"                                       \
+		"1:\n"                                                       \
+		: "=r"(_ret), "+r"(_num)                                     \
+		:                                                            \
+		: _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6", "r5", "r4"  \
+	);                                                                   \
+	_ret;                                                                \
+})
+
+#define my_syscall1(num, arg1)                                               \
+({                                                                           \
+	register long _ret  __asm__ ("r3");                                  \
+	register long _num  __asm__ ("r0") = (num);                          \
+	register long _arg1 __asm__ ("r3") = (long)(arg1);                   \
+									     \
+	__asm__ volatile (                                                   \
+		"	sc\n"                                                \
+		"	bns+ 1f\n"                                           \
+		"	neg  %0, %0\n"                                       \
+		"1:\n"                                                       \
+		: "=r"(_ret), "+r"(_num)                                     \
+		: "0"(_arg1)                                                 \
+		: _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6", "r5", "r4"  \
+	);                                                                   \
+	_ret;                                                                \
+})
+
+
+#define my_syscall2(num, arg1, arg2)                                         \
+({                                                                           \
+	register long _ret  __asm__ ("r3");                                  \
+	register long _num  __asm__ ("r0") = (num);                          \
+	register long _arg1 __asm__ ("r3") = (long)(arg1);                   \
+	register long _arg2 __asm__ ("r4") = (long)(arg2);                   \
+									     \
+	__asm__ volatile (                                                   \
+		"	sc\n"                                                \
+		"	bns+ 1f\n"                                           \
+		"	neg  %0, %0\n"                                       \
+		"1:\n"                                                       \
+		: "=r"(_ret), "+r"(_num), "+r"(_arg2)                        \
+		: "0"(_arg1)                                                 \
+		: _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6", "r5"        \
+	);                                                                   \
+	_ret;                                                                \
+})
+
+
+#define my_syscall3(num, arg1, arg2, arg3)                                   \
+({                                                                           \
+	register long _ret  __asm__ ("r3");                                  \
+	register long _num  __asm__ ("r0") = (num);                          \
+	register long _arg1 __asm__ ("r3") = (long)(arg1);                   \
+	register long _arg2 __asm__ ("r4") = (long)(arg2);                   \
+	register long _arg3 __asm__ ("r5") = (long)(arg3);                   \
+									     \
+	__asm__ volatile (                                                   \
+		"	sc\n"                                                \
+		"	bns+ 1f\n"                                           \
+		"	neg  %0, %0\n"                                       \
+		"1:\n"                                                       \
+		: "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3)           \
+		: "0"(_arg1)                                                 \
+		: _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6"              \
+	);                                                                   \
+	_ret;                                                                \
+})
+
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                             \
+({                                                                           \
+	register long _ret  __asm__ ("r3");                                  \
+	register long _num  __asm__ ("r0") = (num);                          \
+	register long _arg1 __asm__ ("r3") = (long)(arg1);                   \
+	register long _arg2 __asm__ ("r4") = (long)(arg2);                   \
+	register long _arg3 __asm__ ("r5") = (long)(arg3);                   \
+	register long _arg4 __asm__ ("r6") = (long)(arg4);                   \
+									     \
+	__asm__ volatile (                                                   \
+		"	sc\n"                                                \
+		"	bns+ 1f\n"                                           \
+		"	neg  %0, %0\n"                                       \
+		"1:\n"                                                       \
+		: "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3),          \
+		  "+r"(_arg4)                                                \
+		: "0"(_arg1)                                                 \
+		: _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7"                    \
+	);                                                                   \
+	_ret;                                                                \
+})
+
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                       \
+({                                                                           \
+	register long _ret  __asm__ ("r3");                                  \
+	register long _num  __asm__ ("r0") = (num);                          \
+	register long _arg1 __asm__ ("r3") = (long)(arg1);                   \
+	register long _arg2 __asm__ ("r4") = (long)(arg2);                   \
+	register long _arg3 __asm__ ("r5") = (long)(arg3);                   \
+	register long _arg4 __asm__ ("r6") = (long)(arg4);                   \
+	register long _arg5 __asm__ ("r7") = (long)(arg5);                   \
+									     \
+	__asm__ volatile (                                                   \
+		"	sc\n"                                                \
+		"	bns+ 1f\n"                                           \
+		"	neg  %0, %0\n"                                       \
+		"1:\n"                                                       \
+		: "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3),          \
+		  "+r"(_arg4), "+r"(_arg5)                                   \
+		: "0"(_arg1)                                                 \
+		: _NOLIBC_SYSCALL_CLOBBERLIST, "r8"                          \
+	);                                                                   \
+	_ret;                                                                \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                 \
+({                                                                           \
+	register long _ret  __asm__ ("r3");                                  \
+	register long _num  __asm__ ("r0") = (num);                          \
+	register long _arg1 __asm__ ("r3") = (long)(arg1);                   \
+	register long _arg2 __asm__ ("r4") = (long)(arg2);                   \
+	register long _arg3 __asm__ ("r5") = (long)(arg3);                   \
+	register long _arg4 __asm__ ("r6") = (long)(arg4);                   \
+	register long _arg5 __asm__ ("r7") = (long)(arg5);                   \
+	register long _arg6 __asm__ ("r8") = (long)(arg6);                   \
+									     \
+	__asm__ volatile (                                                   \
+		"	sc\n"                                                \
+		"	bns+ 1f\n"                                           \
+		"	neg  %0, %0\n"                                       \
+		"1:\n"                                                       \
+		: "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3),          \
+		  "+r"(_arg4), "+r"(_arg5), "+r"(_arg6)                      \
+		: "0"(_arg1)                                                 \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                \
+	);                                                                   \
+	_ret;                                                                \
+})
+
+/* FIXME: For 32-bit PowerPC, with newer gcc compilers (e.g. gcc 13.1.0),
+ * "omit-frame-pointer" fails with __attribute__((no_stack_protector)) but
+ * works with __attribute__((__optimize__("-fno-stack-protector")))
+ */
+#ifdef __no_stack_protector
+#undef __no_stack_protector
+#define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector")))
+#endif
+
+/* startup code */
+void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
+{
+	__asm__ volatile (
+		"mr     3, 1\n"         /* save stack pointer to r3, as arg1 of _start_c */
+		"clrrwi 1, 1, 4\n"      /* align the stack to 16 bytes                   */
+		"li     0, 0\n"         /* zero the frame pointer                        */
+		"stwu   1, -16(1)\n"    /* the initial stack frame                       */
+		"bl     _start_c\n"     /* transfer to c runtime                         */
+	);
+	__builtin_unreachable();
+}
+
+#endif /* _NOLIBC_ARCH_POWERPC_H */
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index 82b43935650f..e276fb0680af 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -25,6 +25,8 @@
 #include "arch-aarch64.h"
 #elif defined(__mips__) && defined(_ABIO32)
 #include "arch-mips.h"
+#elif defined(__powerpc__)
+#include "arch-powerpc.h"
 #elif defined(__riscv)
 #include "arch-riscv.h"
 #elif defined(__s390x__)

From e45ce88e6591bf609f997aea6dc1fdf1bc45fd43 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 6 Aug 2023 02:40:31 +0800
Subject: [PATCH 1807/2047] tools/nolibc: add support for powerpc64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This follows the 64-bit PowerPC ABI [1], refers to the slides: "A new
ABI for little-endian PowerPC64 Design & Implementation" [2] and the
musl code in arch/powerpc64/crt_arch.h.

First, stdu and clrrdi are used instead of stwu and clrrwi for
powerpc64.

Second, the stack frame size is increased to 32 bytes for powerpc64, 32
bytes is the minimal stack frame size supported described in [2].

Besides, the TOC pointer (GOT pointer) must be saved to r2.

This works on both little endian and big endian 64-bit PowerPC.

[1]: https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.pdf
[2]: https://www.llvm.org/devmtg/2014-04/PDFs/Talks/Euro-LLVM-2014-Weigand.pdf

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-powerpc.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h
index 8332c9d3e5d6..76c3784f9dc7 100644
--- a/tools/include/nolibc/arch-powerpc.h
+++ b/tools/include/nolibc/arch-powerpc.h
@@ -172,6 +172,7 @@
 	_ret;                                                                \
 })
 
+#ifndef __powerpc64__
 /* FIXME: For 32-bit PowerPC, with newer gcc compilers (e.g. gcc 13.1.0),
  * "omit-frame-pointer" fails with __attribute__((no_stack_protector)) but
  * works with __attribute__((__optimize__("-fno-stack-protector")))
@@ -180,10 +181,24 @@
 #undef __no_stack_protector
 #define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector")))
 #endif
+#endif /* !__powerpc64__ */
 
 /* startup code */
 void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
+#ifdef __powerpc64__
+	/* On 64-bit PowerPC, save TOC/GOT pointer to r2 */
+	extern char TOC __asm__ (".TOC.");
+	register volatile long r2 __asm__ ("r2") = (void *)&TOC - (void *)_start;
+
+	__asm__ volatile (
+		"mr     3, 1\n"         /* save stack pointer to r3, as arg1 of _start_c */
+		"clrrdi 1, 1, 4\n"      /* align the stack to 16 bytes                   */
+		"li     0, 0\n"         /* zero the frame pointer                        */
+		"stdu   1, -32(1)\n"    /* the initial stack frame                       */
+		"bl     _start_c\n"     /* transfer to c runtime                         */
+	);
+#else
 	__asm__ volatile (
 		"mr     3, 1\n"         /* save stack pointer to r3, as arg1 of _start_c */
 		"clrrwi 1, 1, 4\n"      /* align the stack to 16 bytes                   */
@@ -191,6 +206,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_
 		"stwu   1, -16(1)\n"    /* the initial stack frame                       */
 		"bl     _start_c\n"     /* transfer to c runtime                         */
 	);
+#endif
 	__builtin_unreachable();
 }
 

From c6c3734fb6b1c438d5586748753fee52a135c078 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 6 Aug 2023 02:41:37 +0800
Subject: [PATCH 1808/2047] selftests/nolibc: add XARCH and ARCH mapping
 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most of the CPU architectures have different variants, but kernel
usually only accepts parts of them via the ARCH variable, the others
should be customized via kernel config files.

To simplify testing, a new XARCH variable is added to extend the
kernel's ARCH with a few variants of the same architecture, and it is
used to customize variant specific variables, at last XARCH is converted
to the kernel's ARCH:

  e.g. make run XARCH=<one of the supported variants>
                | \
                |  `-> variant specific variables:
                |      IMAGE, DEFCONFIG, QEMU_ARCH, QEMU_ARGS, CFLAGS ...
                \
                 `---> kernel's ARCH

XARCH and ARCH are carefully mapped to allow users to pass architecture
variants via XARCH or pass architecture via ARCH from cmdline.

PowerPC is the first user and also a very good reference architecture of
this mapping, it has variants with different combinations of
32-bit/64-bit and bit endian/little endian.

To use this mapping, the other architectures can refer to PowerPC, If
the target architecture only has one variant, XARCH is simply an alias
of ARCH, no additional mapping required.

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/20230702171715.GD16233@1wt.eu/
Link: https://lore.kernel.org/lkml/20230730061801.GA7690@1wt.eu/
Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 46 ++++++++++++++++++-------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index e8d09cbee2ab..09f3cead3853 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -14,6 +14,27 @@ include $(srctree)/scripts/subarch.include
 ARCH = $(SUBARCH)
 endif
 
+# XARCH extends the kernel's ARCH with a few variants of the same
+# architecture that only differ by the configuration, the toolchain
+# and the Qemu program used. It is copied as-is into ARCH except for
+# a few specific values which are mapped like this:
+#
+#  XARCH        | ARCH      | config
+#  -------------|-----------|-------------------------
+#  ppc          | powerpc   | 32 bits
+#  ppc64        | powerpc   | 64 bits big endian
+#  ppc64le      | powerpc   | 64 bits little endian
+#
+# It is recommended to only use XARCH, though it does not harm if
+# ARCH is already set. For simplicity, ARCH is sufficient for all
+# architectures where both are equal.
+
+# configure default variants for target kernel supported architectures
+XARCH            = $(or $(XARCH_$(ARCH)),$(ARCH))
+
+# map from user input variants to their kernel supported architectures
+ARCH            := $(or $(ARCH_$(XARCH)),$(XARCH))
+
 # kernel image names by architecture
 IMAGE_i386       = arch/x86/boot/bzImage
 IMAGE_x86_64     = arch/x86/boot/bzImage
@@ -24,7 +45,7 @@ IMAGE_mips       = vmlinuz
 IMAGE_riscv      = arch/riscv/boot/Image
 IMAGE_s390       = arch/s390/boot/bzImage
 IMAGE_loongarch  = arch/loongarch/boot/vmlinuz.efi
-IMAGE            = $(IMAGE_$(ARCH))
+IMAGE            = $(IMAGE_$(XARCH))
 IMAGE_NAME       = $(notdir $(IMAGE))
 
 # default kernel configurations that appear to be usable
@@ -37,7 +58,7 @@ DEFCONFIG_mips       = malta_defconfig
 DEFCONFIG_riscv      = defconfig
 DEFCONFIG_s390       = defconfig
 DEFCONFIG_loongarch  = defconfig
-DEFCONFIG            = $(DEFCONFIG_$(ARCH))
+DEFCONFIG            = $(DEFCONFIG_$(XARCH))
 
 # optional tests to run (default = all)
 TEST =
@@ -52,7 +73,7 @@ QEMU_ARCH_mips       = mipsel  # works with malta_defconfig
 QEMU_ARCH_riscv      = riscv64
 QEMU_ARCH_s390       = s390x
 QEMU_ARCH_loongarch  = loongarch64
-QEMU_ARCH            = $(QEMU_ARCH_$(ARCH))
+QEMU_ARCH            = $(QEMU_ARCH_$(XARCH))
 
 # QEMU_ARGS : some arch-specific args to pass to qemu
 QEMU_ARGS_i386       = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
@@ -64,7 +85,7 @@ QEMU_ARGS_mips       = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_riscv      = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_s390       = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_loongarch  = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS            = $(QEMU_ARGS_$(ARCH)) $(QEMU_ARGS_EXTRA)
+QEMU_ARGS            = $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_EXTRA)
 
 # OUTPUT is only set when run from the main makefile, otherwise
 # it defaults to this nolibc directory.
@@ -81,7 +102,7 @@ CFLAGS_mips = -EL
 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
 CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
 		$(call cc-option,-fno-stack-protector) \
-		$(CFLAGS_$(ARCH)) $(CFLAGS_STACKPROTECTOR)
+		$(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR)
 LDFLAGS :=
 
 REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \
@@ -96,24 +117,25 @@ help:
 	@echo "  sysroot      create the nolibc sysroot here (uses \$$ARCH)"
 	@echo "  nolibc-test  build the executable (uses \$$CC and \$$CROSS_COMPILE)"
 	@echo "  libc-test    build an executable using the compiler's default libc instead"
-	@echo "  run-user     runs the executable under QEMU (uses \$$ARCH, \$$TEST)"
+	@echo "  run-user     runs the executable under QEMU (uses \$$XARCH, \$$TEST)"
 	@echo "  initramfs    prepare the initramfs with nolibc-test"
-	@echo "  defconfig    create a fresh new default config (uses \$$ARCH)"
-	@echo "  kernel       (re)build the kernel with the initramfs (uses \$$ARCH)"
-	@echo "  run          runs the kernel in QEMU after building it (uses \$$ARCH, \$$TEST)"
-	@echo "  rerun        runs a previously prebuilt kernel in QEMU (uses \$$ARCH, \$$TEST)"
+	@echo "  defconfig    create a fresh new default config (uses \$$XARCH)"
+	@echo "  kernel       (re)build the kernel with the initramfs (uses \$$XARCH)"
+	@echo "  run          runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)"
+	@echo "  rerun        runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)"
 	@echo "  clean        clean the sysroot, initramfs, build and output files"
 	@echo ""
 	@echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST."
 	@echo ""
 	@echo "Currently using the following variables:"
 	@echo "  ARCH          = $(ARCH)"
+	@echo "  XARCH         = $(XARCH)"
 	@echo "  CROSS_COMPILE = $(CROSS_COMPILE)"
 	@echo "  CC            = $(CC)"
 	@echo "  OUTPUT        = $(OUTPUT)"
 	@echo "  TEST          = $(TEST)"
-	@echo "  QEMU_ARCH     = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$ARCH]"
-	@echo "  IMAGE_NAME    = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$ARCH]"
+	@echo "  QEMU_ARCH     = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$XARCH]"
+	@echo "  IMAGE_NAME    = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$XARCH]"
 	@echo ""
 
 all: run

From 587e98459102448d77c6d06aaca88aa99d3b8279 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 6 Aug 2023 02:42:43 +0800
Subject: [PATCH 1809/2047] selftests/nolibc: add test support for ppc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Kernel uses ARCH=powerpc for both 32-bit and 64-bit PowerPC, here adds a
ppc variant for 32-bit PowerPC and uses it as the default variant of
powerpc architecture.

Users can pass XARCH=ppc (or ARCH=powerpc) to test 32-bit PowerPC.

The default qemu-system-ppc g3beige machine [1] is used to run 32-bit
powerpc kernel with pmac32_defconfig. The missing PMACZILOG serial tty
and console are enabled in another patch [2].

Note,

- zImage doesn't boot due to "qemu-system-ppc: Some ROM regions are
  overlapping" error, so, vmlinux is used instead.

- since the VSX support may be disabled in kernel side, to avoid
  "illegal instruction" errors due to missing VSX kernel support, let's
  simply let compiler not generate vector/scalar (VSX) instructions via
  the '-mno-vsx' option.

- as 'man gcc' shows, '-mmultiple' is used to generate code that uses
  the load multiple word instructions and the store multiple word
  instructions. Those instructions do not work when the processor is in
  little-endian mode (except PPC740/PPC750), so, we only enable it
  for big endian powerpc.

[1]: https://qemu.readthedocs.io/en/latest/system/ppc/powermac.html
[2]: https://lore.kernel.org/lkml/bb7b5f9958b3e3a20f6573ff7ce7c5dc566e7e32.1690982937.git.tanyuan@tinylab.org/

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/ZL9leVOI25S2+0+g@1wt.eu/
Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 09f3cead3853..2103fc79c4ef 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -30,9 +30,11 @@ endif
 # architectures where both are equal.
 
 # configure default variants for target kernel supported architectures
+XARCH_powerpc    = ppc
 XARCH            = $(or $(XARCH_$(ARCH)),$(ARCH))
 
 # map from user input variants to their kernel supported architectures
+ARCH_ppc         = powerpc
 ARCH            := $(or $(ARCH_$(XARCH)),$(XARCH))
 
 # kernel image names by architecture
@@ -42,6 +44,7 @@ IMAGE_x86        = arch/x86/boot/bzImage
 IMAGE_arm64      = arch/arm64/boot/Image
 IMAGE_arm        = arch/arm/boot/zImage
 IMAGE_mips       = vmlinuz
+IMAGE_ppc        = vmlinux
 IMAGE_riscv      = arch/riscv/boot/Image
 IMAGE_s390       = arch/s390/boot/bzImage
 IMAGE_loongarch  = arch/loongarch/boot/vmlinuz.efi
@@ -55,6 +58,7 @@ DEFCONFIG_x86        = defconfig
 DEFCONFIG_arm64      = defconfig
 DEFCONFIG_arm        = multi_v7_defconfig
 DEFCONFIG_mips       = malta_defconfig
+DEFCONFIG_ppc        = pmac32_defconfig
 DEFCONFIG_riscv      = defconfig
 DEFCONFIG_s390       = defconfig
 DEFCONFIG_loongarch  = defconfig
@@ -70,6 +74,7 @@ QEMU_ARCH_x86        = x86_64
 QEMU_ARCH_arm64      = aarch64
 QEMU_ARCH_arm        = arm
 QEMU_ARCH_mips       = mipsel  # works with malta_defconfig
+QEMU_ARCH_ppc        = ppc
 QEMU_ARCH_riscv      = riscv64
 QEMU_ARCH_s390       = s390x
 QEMU_ARCH_loongarch  = loongarch64
@@ -82,6 +87,7 @@ QEMU_ARGS_x86        = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(
 QEMU_ARGS_arm64      = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_arm        = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_mips       = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_ppc        = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_riscv      = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_s390       = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_loongarch  = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
@@ -97,6 +103,7 @@ else
 Q=@
 endif
 
+CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
 CFLAGS_s390 = -m64
 CFLAGS_mips = -EL
 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))

From 8a5040cb3f5ac3b1bea5aaa1150daf43b883f938 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 6 Aug 2023 02:43:53 +0800
Subject: [PATCH 1810/2047] selftests/nolibc: add test support for ppc64le
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Kernel uses ARCH=powerpc for both 32-bit and 64-bit PowerPC, here adds a
ppc64le variant for little endian 64-bit PowerPC, users can pass
XARCH=ppc64le to test it.

The powernv machine of qemu-system-ppc64le is used for there is just a
working powernv_defconfig.

As the document [1] shows:

  PowerNV (as Non-Virtualized) is the “bare metal” platform using the
  OPAL firmware. It runs Linux on IBM and OpenPOWER systems and it can be
  used as an hypervisor OS, running KVM guests, or simply as a host OS.

Notes,

- since the VSX support may be disabled in kernel side, to avoid
  "illegal instruction" errors due to missing VSX kernel support, let's
  simply let compiler not generate vector/scalar (VSX) instructions via
  the '-mno-vsx' option.

- little endian ppc64 prefers elfv2 to elfv1 if the toolchain (e.g. gcc
  13.1.0) supports it, let's align with kernel, otherwise, our elfv1
  binary will not run on kernel with elfv2 ABI.

[1]: https://qemu.readthedocs.io/en/latest/system/ppc/powernv.html

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/20230722120747.GC17311@1wt.eu/
Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 2103fc79c4ef..a017b2048959 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -35,6 +35,7 @@ XARCH            = $(or $(XARCH_$(ARCH)),$(ARCH))
 
 # map from user input variants to their kernel supported architectures
 ARCH_ppc         = powerpc
+ARCH_ppc64le     = powerpc
 ARCH            := $(or $(ARCH_$(XARCH)),$(XARCH))
 
 # kernel image names by architecture
@@ -45,6 +46,7 @@ IMAGE_arm64      = arch/arm64/boot/Image
 IMAGE_arm        = arch/arm/boot/zImage
 IMAGE_mips       = vmlinuz
 IMAGE_ppc        = vmlinux
+IMAGE_ppc64le    = arch/powerpc/boot/zImage
 IMAGE_riscv      = arch/riscv/boot/Image
 IMAGE_s390       = arch/s390/boot/bzImage
 IMAGE_loongarch  = arch/loongarch/boot/vmlinuz.efi
@@ -59,6 +61,7 @@ DEFCONFIG_arm64      = defconfig
 DEFCONFIG_arm        = multi_v7_defconfig
 DEFCONFIG_mips       = malta_defconfig
 DEFCONFIG_ppc        = pmac32_defconfig
+DEFCONFIG_ppc64le    = powernv_defconfig
 DEFCONFIG_riscv      = defconfig
 DEFCONFIG_s390       = defconfig
 DEFCONFIG_loongarch  = defconfig
@@ -75,6 +78,7 @@ QEMU_ARCH_arm64      = aarch64
 QEMU_ARCH_arm        = arm
 QEMU_ARCH_mips       = mipsel  # works with malta_defconfig
 QEMU_ARCH_ppc        = ppc
+QEMU_ARCH_ppc64le    = ppc64le
 QEMU_ARCH_riscv      = riscv64
 QEMU_ARCH_s390       = s390x
 QEMU_ARCH_loongarch  = loongarch64
@@ -88,6 +92,7 @@ QEMU_ARGS_arm64      = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC
 QEMU_ARGS_arm        = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_mips       = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_ppc        = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_ppc64le    = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_riscv      = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_s390       = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_loongarch  = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
@@ -104,6 +109,7 @@ Q=@
 endif
 
 CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
+CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
 CFLAGS_s390 = -m64
 CFLAGS_mips = -EL
 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))

From faeb4e09fe77262f9a6b2f9f874eec0b6850721e Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sun, 6 Aug 2023 02:44:58 +0800
Subject: [PATCH 1811/2047] selftests/nolibc: add test support for ppc64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Kernel uses ARCH=powerpc for both 32-bit and 64-bit PowerPC, here adds a
ppc64 variant for big endian 64-bit PowerPC, users can pass XARCH=ppc64
to test it.

The powernv machine of qemu-system-ppc64 is used with
powernv_be_defconfig.

As the document [1] shows:

  PowerNV (as Non-Virtualized) is the “bare metal” platform using the
  OPAL firmware. It runs Linux on IBM and OpenPOWER systems and it can be
  used as an hypervisor OS, running KVM guests, or simply as a host OS.

Notes,

- differs from little endian 64-bit PowerPC, vmlinux is used instead of
  zImage, because big endian zImage [2] only boot on qemu with x-vof=on
  (added from qemu v7.0) and a fixup patch [3] for qemu v7.0.51:

- since the VSX support may be disabled in kernel side, to avoid
  "illegal instruction" errors due to missing VSX kernel support, let's
  simply let compiler not generate vector/scalar (VSX) instructions via
  the '-mno-vsx' option.

- as 'man gcc' shows, '-mmultiple' is used to generate code that uses
  the load multiple word instructions and the store multiple word
  instructions. Those instructions do not work when the processor is in
  little-endian mode (except PPC740/PPC750), so, we only enable it
  for big endian powerpc.

- for big endian ppc64, as the help message from arch/powerpc/Kconfig
  shows, the V2 ABI is standard for 64-bit little-endian, but for
  big-endian it is less well tested by kernel and toolchain, so, use
  elfv1 as-is, no need to explicitly ask toolchain to use elfv2 here.

[1]: https://qemu.readthedocs.io/en/latest/system/ppc/powernv.html
[2]: https://github.com/linuxppc/issues/issues/402
[3]: https://lore.kernel.org/qemu-devel/20220504065536.3534488-1-aik@ozlabs.ru/

Suggested-by: Willy Tarreau <w@1wt.eu>
Link: https://lore.kernel.org/lkml/20230722121019.GD17311@1wt.eu/
Link: https://lore.kernel.org/lkml/20230719043353.GC5331@1wt.eu/
Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index a017b2048959..f7eff081f41f 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -35,6 +35,7 @@ XARCH            = $(or $(XARCH_$(ARCH)),$(ARCH))
 
 # map from user input variants to their kernel supported architectures
 ARCH_ppc         = powerpc
+ARCH_ppc64       = powerpc
 ARCH_ppc64le     = powerpc
 ARCH            := $(or $(ARCH_$(XARCH)),$(XARCH))
 
@@ -46,6 +47,7 @@ IMAGE_arm64      = arch/arm64/boot/Image
 IMAGE_arm        = arch/arm/boot/zImage
 IMAGE_mips       = vmlinuz
 IMAGE_ppc        = vmlinux
+IMAGE_ppc64      = vmlinux
 IMAGE_ppc64le    = arch/powerpc/boot/zImage
 IMAGE_riscv      = arch/riscv/boot/Image
 IMAGE_s390       = arch/s390/boot/bzImage
@@ -61,6 +63,7 @@ DEFCONFIG_arm64      = defconfig
 DEFCONFIG_arm        = multi_v7_defconfig
 DEFCONFIG_mips       = malta_defconfig
 DEFCONFIG_ppc        = pmac32_defconfig
+DEFCONFIG_ppc64      = powernv_be_defconfig
 DEFCONFIG_ppc64le    = powernv_defconfig
 DEFCONFIG_riscv      = defconfig
 DEFCONFIG_s390       = defconfig
@@ -78,6 +81,7 @@ QEMU_ARCH_arm64      = aarch64
 QEMU_ARCH_arm        = arm
 QEMU_ARCH_mips       = mipsel  # works with malta_defconfig
 QEMU_ARCH_ppc        = ppc
+QEMU_ARCH_ppc64      = ppc64
 QEMU_ARCH_ppc64le    = ppc64le
 QEMU_ARCH_riscv      = riscv64
 QEMU_ARCH_s390       = s390x
@@ -92,6 +96,7 @@ QEMU_ARGS_arm64      = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC
 QEMU_ARGS_arm        = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_mips       = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_ppc        = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_ppc64      = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_ppc64le    = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_riscv      = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_s390       = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
@@ -109,6 +114,7 @@ Q=@
 endif
 
 CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
+CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
 CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
 CFLAGS_s390 = -m64
 CFLAGS_mips = -EL

From ce1bb82b1c53585e781e9ec0bf22df23aff104c6 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 5 Aug 2023 14:11:02 +0800
Subject: [PATCH 1812/2047] selftests/nolibc: allow report with existing test
 log
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After the tests finish, it is valuable to report and summarize with
existing test log.

This avoid rerun or run the tests again when not necessary.

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index f7eff081f41f..56abe044173c 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -216,6 +216,10 @@ rerun:
 	$(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
 	$(Q)$(REPORT) $(CURDIR)/run.out
 
+# report with existing test log
+report:
+	$(Q)$(REPORT) $(CURDIR)/run.out
+
 clean:
 	$(call QUIET_CLEAN, sysroot)
 	$(Q)rm -rf sysroot

From dcb677c3d3290c18cfdbc54d2f8fcf0279c06206 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 5 Aug 2023 14:12:06 +0800
Subject: [PATCH 1813/2047] tools/nolibc: stackprotector.h: make
 __stack_chk_init static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows to generate smaller text/data/dec size.

As the _start_c() function added by crt.h, __stack_chk_init() is called
from _start_c() instead of the assembly _start. So, it is able to mark
it with static now.

Reviewed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/crt.h            | 2 +-
 tools/include/nolibc/stackprotector.h | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
index 32e128b0fb62..a5f33fef1672 100644
--- a/tools/include/nolibc/crt.h
+++ b/tools/include/nolibc/crt.h
@@ -10,7 +10,7 @@
 char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
-void __stack_chk_init(void);
+static void __stack_chk_init(void);
 static void exit(int);
 
 void _start_c(long *sp)
diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h
index b620f2b9578d..13f1d0e60387 100644
--- a/tools/include/nolibc/stackprotector.h
+++ b/tools/include/nolibc/stackprotector.h
@@ -37,8 +37,7 @@ void __stack_chk_fail_local(void)
 __attribute__((weak,section(".data.nolibc_stack_chk")))
 uintptr_t __stack_chk_guard;
 
-__attribute__((weak,section(".text.nolibc_stack_chk"))) __no_stack_protector
-void __stack_chk_init(void)
+static __no_stack_protector void __stack_chk_init(void)
 {
 	my_syscall3(__NR_getrandom, &__stack_chk_guard, sizeof(__stack_chk_guard), 0);
 	/* a bit more randomness in case getrandom() fails, ensure the guard is never 0 */
@@ -46,7 +45,7 @@ void __stack_chk_init(void)
 		__stack_chk_guard ^= (uintptr_t) &__stack_chk_guard;
 }
 #else /* !defined(_NOLIBC_STACKPROTECTOR) */
-__inline__ void __stack_chk_init(void) {}
+static void __stack_chk_init(void) {}
 #endif /* defined(_NOLIBC_STACKPROTECTOR) */
 
 #endif /* _NOLIBC_STACKPROTECTOR_H */

From 418c846821506d01882a948ce90947f264afc606 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Sat, 12 Aug 2023 04:30:25 +0800
Subject: [PATCH 1814/2047] selftests/nolibc: libc-test: use HOSTCC instead of
 CC

libc-test is mainly added to compare the behavior of nolibc to the
system libc, it is meaningless and error-prone with cross compiling.

Let's use HOSTCC instead of CC to avoid wrongly use cross compiler when
CROSS_COMPILE is passed or customized.

Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Fixes: cfb672f94f6e ("selftests/nolibc: add run-libc-test target")
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/testing/selftests/nolibc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 56abe044173c..dfe66776a331 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -178,7 +178,7 @@ nolibc-test: nolibc-test.c
 endif
 
 libc-test: nolibc-test.c
-	$(QUIET_CC)$(CC) -o $@ $<
+	$(QUIET_CC)$(HOSTCC) -o $@ $<
 
 # local libc-test
 run-libc-test: libc-test

From 872dbfa0321796dace602cb7d368f0ec20af8f16 Mon Sep 17 00:00:00 2001
From: Zhangjin Wu <falcon@tinylab.org>
Date: Mon, 7 Aug 2023 19:00:48 +0800
Subject: [PATCH 1815/2047] tools/nolibc: silence ppc64 compile warnings

Silence the following warnings reported by the new -Wall -Wextra options
with pure assembly code.

    In file included from sysroot/powerpc/include/stdio.h:13,
                     from nolibc-test.c:13:
    sysroot/powerpc/include/arch.h: In function '_start':
    sysroot/powerpc/include/arch.h:192:32: warning: unused variable 'r2' [-Wunused-variable]
      192 |         register volatile long r2 __asm__ ("r2") = (void *)&TOC - (void *)_start;
          |                                ^~
    sysroot/powerpc/include/arch.h:187:97: warning: optimization may eliminate reads and/or writes to register variables [-Wvolatile-register-var]
      187 | void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
          |                                                                                                 ^~~~~~

Since only elfv2 ABI requires to save the TOC/GOT pointer to r2
register, when using elfv1 ABI, the old C code is simply ignored by the
compiler, but the compiler can not ignore the inline assembly code and
will introduce build failure or running segfaults. So, let's further
only add the new assembly code for elfv2 ABI with the checking of
_CALL_ELF == 2.

Link: https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.pdf
Link: https://www.llvm.org/devmtg/2014-04/PDFs/Talks/Euro-LLVM-2014-Weigand.pdf
Signed-off-by: Zhangjin Wu <falcon@tinylab.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/arch-powerpc.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h
index 76c3784f9dc7..ac212e6185b2 100644
--- a/tools/include/nolibc/arch-powerpc.h
+++ b/tools/include/nolibc/arch-powerpc.h
@@ -187,9 +187,17 @@
 void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void)
 {
 #ifdef __powerpc64__
-	/* On 64-bit PowerPC, save TOC/GOT pointer to r2 */
-	extern char TOC __asm__ (".TOC.");
-	register volatile long r2 __asm__ ("r2") = (void *)&TOC - (void *)_start;
+#if _CALL_ELF == 2
+	/* with -mabi=elfv2, save TOC/GOT pointer to r2
+	 * r12 is global entry pointer, we use it to compute TOC from r12
+	 * https://www.llvm.org/devmtg/2014-04/PDFs/Talks/Euro-LLVM-2014-Weigand.pdf
+	 * https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.pdf
+	 */
+	__asm__ volatile (
+		"addis  2, 12, .TOC. - _start@ha\n"
+		"addi   2,  2, .TOC. - _start@l\n"
+	);
+#endif /* _CALL_ELF == 2 */
 
 	__asm__ volatile (
 		"mr     3, 1\n"         /* save stack pointer to r3, as arg1 of _start_c */

From fb01ff635efd0aba862c843587554167aacc4d2f Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Tue, 15 Aug 2023 13:44:55 +0200
Subject: [PATCH 1816/2047] tools/nolibc: keep brk(), sbrk(), mmap() away from
 __sysret()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The __sysret() function causes some undesirable casts so we'll revert
it. In order to keep it simple it will now only support integer return
values like in the past, so we must basically revert the changes that
were made to these 3 syscalls which return a pointer so that they
simply rely on their own test and the SET_ERRNO() macro.

Fixes: 4201cfce15fe ("tools/nolibc: clean up sbrk() routine")
Fixes: 924e9539aeaa ("tools/nolibc: clean up mmap() routine")
Fixes: d27447bc2e0a ("tools/nolibc: sys.h: apply __sysret() helper")
Link: https://lore.kernel.org/lkml/20230806095846.GB10627@1wt.eu/
Link: https://lore.kernel.org/lkml/ZNKOJY+g66nkIyvv@1wt.eu/
Cc: Zhangjin Wu <falcon@tinylab.org>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Thomas Weißschuh <thomas@t-8ch.de>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 833d6c5e86dc..bfe1647a3a30 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -82,7 +82,13 @@ void *sys_brk(void *addr)
 static __attribute__((unused))
 int brk(void *addr)
 {
-	return __sysret(sys_brk(addr) ? 0 : -ENOMEM);
+	void *ret = sys_brk(addr);
+
+	if (!ret) {
+		SET_ERRNO(ENOMEM);
+		return -1;
+	}
+	return 0;
 }
 
 static __attribute__((unused))
@@ -94,7 +100,8 @@ void *sbrk(intptr_t inc)
 	if (ret && sys_brk(ret + inc) == ret + inc)
 		return ret + inc;
 
-	return (void *)__sysret(-ENOMEM);
+	SET_ERRNO(ENOMEM);
+	return (void *)-1;
 }
 
 
@@ -682,7 +689,13 @@ void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
 static __attribute__((unused))
 void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
 {
-	return (void *)__sysret((unsigned long)sys_mmap(addr, length, prot, flags, fd, offset));
+	void *ret = sys_mmap(addr, length, prot, flags, fd, offset);
+
+	if ((unsigned long)ret >= -4095UL) {
+		SET_ERRNO(-(long)ret);
+		ret = MAP_FAILED;
+	}
+	return ret;
 }
 
 static __attribute__((unused))

From 556fb7131e03b0283672fb40f6dc2d151752aaa7 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Tue, 15 Aug 2023 13:55:23 +0200
Subject: [PATCH 1817/2047] tools/nolibc: avoid undesired casts in the
 __sysret() macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Having __sysret() as an inline function has the unfortunate effect of
adding casts and large constants comparisons after the syscall returns
that significantly inflate some light code that's otherwise syscall-
heavy. Even nolibc-test grew by ~1%.

Let's switch back to a macro for this, and use it only with signed
arguments. Note that it is also possible to design a slightly more
complex macro covering unsigned and pointers but we only have 3 such
syscalls so it is pointless, and these were just addressed not to use
this macro anymore. Now for the argument (the local variable containing
the syscall return value), any negative value is an error, that results
in -1 being returned and errno to be assigned the opposite value.

This may be revisited again in the future if really needed but for now
let's get back to something sane.

Fixes: 428905da6ec4 ("tools/nolibc: sys.h: add a syscall return helper")
Link: https://lore.kernel.org/lkml/20230806095846.GB10627@1wt.eu/
Link: https://lore.kernel.org/lkml/ZNKOJY+g66nkIyvv@1wt.eu/
Cc: Zhangjin Wu <falcon@tinylab.org>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Thomas Weißschuh <thomas@t-8ch.de>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 tools/include/nolibc/sys.h | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index bfe1647a3a30..fdb6bd6c0e2f 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -28,22 +28,21 @@
 #include "types.h"
 
 
-/* Syscall return helper for library routines, set errno as -ret when ret is in
- * range of [-MAX_ERRNO, -1]
- *
- * Note, No official reference states the errno range here aligns with musl
- * (src/internal/syscall_ret.c) and glibc (sysdeps/unix/sysv/linux/sysdep.h)
+/* Syscall return helper: takes the syscall value in argument and checks for an
+ * error in it. This may only be used with signed returns (int or long), but
+ * not with pointers. An error is any value < 0. When an error is encountered,
+ * -ret is set into errno and -1 is returned. Otherwise the returned value is
+ * passed as-is with its type preserved.
  */
 
-static __inline__ __attribute__((unused, always_inline))
-long __sysret(unsigned long ret)
-{
-	if (ret >= (unsigned long)-MAX_ERRNO) {
-		SET_ERRNO(-(long)ret);
-		return -1;
-	}
-	return ret;
-}
+#define __sysret(arg)							\
+({									\
+	__typeof__(arg) __sysret_arg = (arg);				\
+	(__sysret_arg < 0)                              /* error ? */	\
+		? (({ SET_ERRNO(-__sysret_arg); }), -1) /* ret -1 with errno = -arg */ \
+		: __sysret_arg;                         /* return original value */ \
+})
+
 
 /* Functions in this file only describe syscalls. They're declared static so
  * that the compiler usually decides to inline them while still being allowed

From e7f2e65699e2290fd547ec12a17008764e5d9620 Mon Sep 17 00:00:00 2001
From: Wei Chen <harperchen1110@gmail.com>
Date: Thu, 10 Aug 2023 08:23:33 +0000
Subject: [PATCH 1818/2047] media: vcodec: Fix potential array out-of-bounds in
 encoder queue_setup

variable *nplanes is provided by user via system call argument. The
possible value of q_data->fmt->num_planes is 1-3, while the value
of *nplanes can be 1-8. The array access by index i can cause array
out-of-bounds.

Fix this bug by checking *nplanes against the array size.

Fixes: 4e855a6efa54 ("[media] vcodec: mediatek: Add Mediatek V4L2 Video Encoder Driver")
Signed-off-by: Wei Chen <harperchen1110@gmail.com>
Cc: stable@vger.kernel.org
Reviewed-by: Chen-Yu Tsai <wenst@chromium.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c
index 9ff439a50f53..315e97a2450e 100644
--- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c
+++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c
@@ -821,6 +821,8 @@ static int vb2ops_venc_queue_setup(struct vb2_queue *vq,
 		return -EINVAL;
 
 	if (*nplanes) {
+		if (*nplanes != q_data->fmt->num_planes)
+			return -EINVAL;
 		for (i = 0; i < *nplanes; i++)
 			if (sizes[i] < q_data->sizeimage[i])
 				return -EINVAL;

From e531fdb5cd5ee2564b7fe10c8a9219e2b2fac61e Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Fri, 18 Aug 2023 07:59:38 -0700
Subject: [PATCH 1819/2047] dma-buf/sw_sync: Avoid recursive lock during fence
 signal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a signal callback releases the sw_sync fence, that will trigger a
deadlock as the timeline_fence_release recurses onto the fence->lock
(used both for signaling and the the timeline tree).

To avoid that, temporarily hold an extra reference to the signalled
fences until after we drop the lock.

(This is an alternative implementation of https://patchwork.kernel.org/patch/11664717/
which avoids some potential UAF issues with the original patch.)

v2: Remove now obsolete comment, use list_move_tail() and
    list_del_init()

Reported-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
Signed-off-by: Rob Clark <robdclark@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230818145939.39697-1-robdclark@gmail.com
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/sw_sync.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 63f0aeb66db6..f0a35277fd84 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -191,6 +191,7 @@ static const struct dma_fence_ops timeline_fence_ops = {
  */
 static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 {
+	LIST_HEAD(signalled);
 	struct sync_pt *pt, *next;
 
 	trace_sync_timeline(obj);
@@ -203,21 +204,20 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 		if (!timeline_fence_signaled(&pt->base))
 			break;
 
-		list_del_init(&pt->link);
+		dma_fence_get(&pt->base);
+
+		list_move_tail(&pt->link, &signalled);
 		rb_erase(&pt->node, &obj->pt_tree);
 
-		/*
-		 * A signal callback may release the last reference to this
-		 * fence, causing it to be freed. That operation has to be
-		 * last to avoid a use after free inside this loop, and must
-		 * be after we remove the fence from the timeline in order to
-		 * prevent deadlocking on timeline->lock inside
-		 * timeline_fence_release().
-		 */
 		dma_fence_signal_locked(&pt->base);
 	}
 
 	spin_unlock_irq(&obj->lock);
+
+	list_for_each_entry_safe(pt, next, &signalled, link) {
+		list_del_init(&pt->link);
+		dma_fence_put(&pt->base);
+	}
 }
 
 /**

From 1dfe3a5a7cefbe2162cecb759f3933baea22c393 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 21 Aug 2023 17:35:26 +0100
Subject: [PATCH 1820/2047] entry: Remove empty addr_limit_user_check()

Back when set_fs() was a generic API for altering the address limit,
addr_limit_user_check() was a safety measure to prevent userspace being
able to issue syscalls with an unbound limit.

With the the removal of set_fs() as a generic API, the last user of
addr_limit_user_check() was removed in commit:

  b5a5a01d8e9a44ec ("arm64: uaccess: remove addr_limit_user_check()")

... as since that commit, no architecture defines TIF_FSCHECK, and hence
addr_limit_user_check() always expands to nothing.

Remove addr_limit_user_check(), updating the comment in
exit_to_user_mode_prepare() to no longer refer to it. At the same time,
the comment is reworded to be a little more generic so as to cover
kmap_assert_nomap() in addition to lockdep_sys_exit().

No functional change.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20230821163526.2319443-1-mark.rutland@arm.com
---
 include/linux/syscalls.h | 16 ----------------
 kernel/entry/common.c    |  3 +--
 2 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 03e3d0121d5e..c4b9b66b0d05 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -283,22 +283,6 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
 #define SYSCALL32_DEFINE6 SYSCALL_DEFINE6
 #endif
 
-/*
- * Called before coming back to user-mode. Returning to user-mode with an
- * address limit different than USER_DS can allow to overwrite kernel memory.
- */
-static inline void addr_limit_user_check(void)
-{
-#ifdef TIF_FSCHECK
-	if (!test_thread_flag(TIF_FSCHECK))
-		return;
-#endif
-
-#ifdef TIF_FSCHECK
-	clear_thread_flag(TIF_FSCHECK);
-#endif
-}
-
 /*
  * These syscall function prototypes are kept in the same order as
  * include/uapi/asm-generic/unistd.h. Architecture specific entries go below,
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index be61332c66b5..d7ee4bc3f2ba 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -205,8 +205,7 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs)
 
 	arch_exit_to_user_mode_prepare(regs, ti_work);
 
-	/* Ensure that the address limit is intact and no locks are held */
-	addr_limit_user_check();
+	/* Ensure that kernel state is sane for a return to userspace */
 	kmap_assert_nomap();
 	lockdep_assert_irqs_disabled();
 	lockdep_sys_exit();

From 1613781d7e8a93618ff3a6b37f81f06769b53717 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Wed, 23 Aug 2023 09:53:08 +0100
Subject: [PATCH 1821/2047] ASoC: cs35l41: Correct amp_gain_tlv values

The current analog gain TLV seems to have completely incorrect values in
it. The gain starts at 0.5dB, proceeds in 1dB steps, and has no mute
value, correct the control to match.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20230823085308.753572-1-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs35l41.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c
index 6ac501f008ec..8a879b6f4829 100644
--- a/sound/soc/codecs/cs35l41.c
+++ b/sound/soc/codecs/cs35l41.c
@@ -168,7 +168,7 @@ static int cs35l41_get_fs_mon_config_index(int freq)
 static const DECLARE_TLV_DB_RANGE(dig_vol_tlv,
 		0, 0, TLV_DB_SCALE_ITEM(TLV_DB_GAIN_MUTE, 0, 1),
 		1, 913, TLV_DB_MINMAX_ITEM(-10200, 1200));
-static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 0, 1, 1);
+static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 50, 100, 0);
 
 static const struct snd_kcontrol_new dre_ctrl =
 	SOC_DAPM_SINGLE("Switch", CS35L41_PWR_CTRL3, 20, 1, 0);

From dedf98dd1cfb61cfc74be9248b90a49b42c6dead Mon Sep 17 00:00:00 2001
From: Li Zetao <lizetao1@huawei.com>
Date: Tue, 15 Aug 2023 16:08:33 +0800
Subject: [PATCH 1822/2047] s390/pci: use builtin_misc_device macro to simplify
 the code

Use the builtin_misc_device macro to simplify the code, which is the same
as declaring with device_initcall().

Signed-off-by: Li Zetao <lizetao1@huawei.com>
Acked-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20230815080833.1103609-1-lizetao1@huawei.com
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/pci/pci_clp.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index ee367798e388..ee90a91ed888 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -666,9 +666,4 @@ static struct miscdevice clp_misc_device = {
 	.fops = &clp_misc_fops,
 };
 
-static int __init clp_misc_init(void)
-{
-	return misc_register(&clp_misc_device);
-}
-
-device_initcall(clp_misc_init);
+builtin_misc_device(clp_misc_device);

From 862c74a56d56a31cb75a15bdd1b6c82e752d4c51 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Mon, 21 Aug 2023 09:39:13 +0200
Subject: [PATCH 1823/2047] dt-bindings: cpufreq: qcom-hw: add a 4th frequency
 domain

On new platforms, a 4th frequency domain is used, document it.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Acked-by: Manivannan Sadhasivam <mani@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 .../devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml         | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
index a6b3bb8fdf33..c1d225fcf2d5 100644
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
@@ -49,6 +49,7 @@ properties:
       - description: Frequency domain 0 register region
       - description: Frequency domain 1 register region
       - description: Frequency domain 2 register region
+      - description: Frequency domain 3 register region
 
   reg-names:
     minItems: 1
@@ -56,6 +57,7 @@ properties:
       - const: freq-domain0
       - const: freq-domain1
       - const: freq-domain2
+      - const: freq-domain3
 
   clocks:
     items:
@@ -69,7 +71,7 @@ properties:
 
   interrupts:
     minItems: 1
-    maxItems: 3
+    maxItems: 4
 
   interrupt-names:
     minItems: 1
@@ -77,6 +79,7 @@ properties:
       - const: dcvsh-irq-0
       - const: dcvsh-irq-1
       - const: dcvsh-irq-2
+      - const: dcvsh-irq-3
 
   '#freq-domain-cells':
     const: 1

From 5f19d0969a9a29cff7fbbb0f758c9246e3f6beaf Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Mon, 21 Aug 2023 09:39:14 +0200
Subject: [PATCH 1824/2047] cpufreq: qcom-cpufreq-hw: add support for 4 freq
 domains

Add support for up to 4 frequency domains as used on new
platforms.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Acked-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/qcom-cpufreq-hw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index f24cf2eddf1e..70b0f21968a0 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -28,7 +28,7 @@
 
 #define GT_IRQ_STATUS			BIT(2)
 
-#define MAX_FREQ_DOMAINS		3
+#define MAX_FREQ_DOMAINS		4
 
 struct qcom_cpufreq_soc_data {
 	u32 reg_enable;

From e3157bb55d3ed81ab3e21d1e2a07527d674556b4 Mon Sep 17 00:00:00 2001
From: Ferry Meng <mengferry@linux.alibaba.com>
Date: Wed, 9 Aug 2023 14:06:37 +0800
Subject: [PATCH 1825/2047] erofs: refine warning messages for zdata I/Os

Don't warn users since -EINTR can be returned due to user interruption.
Also suppress warning messages of readmore.

Signed-off-by: Ferry Meng <mengferry@linux.alibaba.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Yue Hu <huyue2@coolpad.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Link: https://lore.kernel.org/r/20230809060637.21311-1-mengferry@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/zdata.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index de4f12152b62..53820271e538 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1848,15 +1848,10 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
 
 		page = erofs_grab_cache_page_nowait(inode->i_mapping, index);
 		if (page) {
-			if (PageUptodate(page)) {
+			if (PageUptodate(page))
 				unlock_page(page);
-			} else {
-				err = z_erofs_do_read_page(f, page);
-				if (err)
-					erofs_err(inode->i_sb,
-						  "readmore error at page %lu @ nid %llu",
-						  index, EROFS_I(inode)->nid);
-			}
+			else
+				(void)z_erofs_do_read_page(f, page);
 			put_page(page);
 		}
 
@@ -1885,8 +1880,9 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
 	/* if some compressed cluster ready, need submit them anyway */
 	z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0), false);
 
-	if (err)
-		erofs_err(inode->i_sb, "failed to read, err [%d]", err);
+	if (err && err != -EINTR)
+		erofs_err(inode->i_sb, "read error %d @ %lu of nid %llu",
+			  err, folio->index, EROFS_I(inode)->nid);
 
 	erofs_put_metabuf(&f.map.buf);
 	erofs_release_pages(&f.pagepool);
@@ -1920,10 +1916,9 @@ static void z_erofs_readahead(struct readahead_control *rac)
 		head = (void *)page_private(page);
 
 		err = z_erofs_do_read_page(&f, page);
-		if (err)
-			erofs_err(inode->i_sb,
-				  "readahead error at page %lu @ nid %llu",
-				  page->index, EROFS_I(inode)->nid);
+		if (err && err != -EINTR)
+			erofs_err(inode->i_sb, "readahead error %d @ %lu of nid %llu",
+				  err, page->index, EROFS_I(inode)->nid);
 		put_page(page);
 	}
 	z_erofs_pcluster_readmore(&f, rac, false);

From 428f27cc8d3202ebbc5013fe53e8596949955d85 Mon Sep 17 00:00:00 2001
From: Ferry Meng <mengferry@linux.alibaba.com>
Date: Tue, 15 Aug 2023 17:48:47 +0800
Subject: [PATCH 1826/2047] erofs: clean up redundant comment and adjust code
 alignment

Remove some redundant comments in erofs/super.c, and avoid unncessary
line breaks for cleanup.

Signed-off-by: Ferry Meng <mengferry@linux.alibaba.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Link: https://lore.kernel.org/r/20230815094849.53249-1-mengferry@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/super.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 3d04cc33a6b8..f3b33332a446 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -21,8 +21,7 @@
 static struct kmem_cache *erofs_inode_cachep __read_mostly;
 struct file_system_type erofs_fs_type;
 
-void _erofs_err(struct super_block *sb, const char *function,
-		const char *fmt, ...)
+void _erofs_err(struct super_block *sb, const char *func, const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
@@ -32,12 +31,11 @@ void _erofs_err(struct super_block *sb, const char *function,
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	pr_err("(device %s): %s: %pV", sb->s_id, function, &vaf);
+	pr_err("(device %s): %s: %pV", sb->s_id, func, &vaf);
 	va_end(args);
 }
 
-void _erofs_info(struct super_block *sb, const char *function,
-		 const char *fmt, ...)
+void _erofs_info(struct super_block *sb, const char *func, const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
@@ -102,11 +100,9 @@ static void erofs_free_inode(struct inode *inode)
 {
 	struct erofs_inode *vi = EROFS_I(inode);
 
-	/* be careful of RCU symlink path */
 	if (inode->i_op == &erofs_fast_symlink_iops)
 		kfree(inode->i_link);
 	kfree(vi->xattr_shared_xattrs);
-
 	kmem_cache_free(erofs_inode_cachep, vi);
 }
 
@@ -119,8 +115,7 @@ static bool check_layout_compatibility(struct super_block *sb,
 
 	/* check if current kernel meets all mandatory requirements */
 	if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) {
-		erofs_err(sb,
-			  "unidentified incompatible feature %x, please upgrade kernel version",
+		erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel",
 			   feature & ~EROFS_ALL_FEATURE_INCOMPAT);
 		return false;
 	}
@@ -433,7 +428,6 @@ out:
 	return ret;
 }
 
-/* set up default EROFS parameters */
 static void erofs_default_options(struct erofs_fs_context *ctx)
 {
 #ifdef CONFIG_EROFS_FS_ZIP
@@ -735,7 +729,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
 	xa_init(&sbi->managed_pslots);
 #endif
 
-	/* get the root inode */
 	inode = erofs_iget(sb, ROOT_NID(sbi));
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
@@ -752,7 +745,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
 		return -ENOMEM;
 
 	erofs_shrinker_register(sb);
-	/* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */
 	if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) {
 		sbi->packed_inode = erofs_iget(sb, sbi->packed_nid);
 		if (IS_ERR(sbi->packed_inode)) {
@@ -885,10 +877,6 @@ static int erofs_init_fs_context(struct fs_context *fc)
 	return 0;
 }
 
-/*
- * could be triggered after deactivate_locked_super()
- * is called, thus including umount and failed to initialize.
- */
 static void erofs_kill_sb(struct super_block *sb)
 {
 	struct erofs_sb_info *sbi;
@@ -917,7 +905,6 @@ static void erofs_kill_sb(struct super_block *sb)
 	sb->s_fs_info = NULL;
 }
 
-/* called when ->s_root is non-NULL */
 static void erofs_put_super(struct super_block *sb)
 {
 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
@@ -1018,7 +1005,6 @@ static void __exit erofs_module_exit(void)
 	erofs_pcpubuf_exit();
 }
 
-/* get filesystem statistics */
 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_sb;

From 8ec9a25258244c4b3955ad052cd4c74ccf3d5d6a Mon Sep 17 00:00:00 2001
From: Ferry Meng <mengferry@linux.alibaba.com>
Date: Tue, 15 Aug 2023 17:48:48 +0800
Subject: [PATCH 1827/2047] erofs: add necessary kmem_cache_create flags for
 erofs inode cache

To improve memory access efficiency and enable statistics functionality,
add SLAB_MEM_SPREAD and SLAB_ACCOUNT flag during erofs_inode_cachep's
allocation time.

Signed-off-by: Ferry Meng <mengferry@linux.alibaba.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Link: https://lore.kernel.org/r/20230815094849.53249-2-mengferry@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/super.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index f3b33332a446..e5fb1775dd02 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -941,9 +941,9 @@ static int __init erofs_module_init(void)
 	erofs_check_ondisk_layout_definitions();
 
 	erofs_inode_cachep = kmem_cache_create("erofs_inode",
-					       sizeof(struct erofs_inode), 0,
-					       SLAB_RECLAIM_ACCOUNT,
-					       erofs_inode_init_once);
+			sizeof(struct erofs_inode), 0,
+			SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
+			erofs_inode_init_once);
 	if (!erofs_inode_cachep)
 		return -ENOMEM;
 

From d442495c9676b6587f0058c5843b2d053dd1765b Mon Sep 17 00:00:00 2001
From: Ferry Meng <mengferry@linux.alibaba.com>
Date: Tue, 15 Aug 2023 17:48:49 +0800
Subject: [PATCH 1828/2047] erofs: remove redundant erofs_fs_type declaration
 in super.c

As erofs_fs_type has been declared in internal.h, there is no use to
declare repeatedly in super.c.

Signed-off-by: Ferry Meng <mengferry@linux.alibaba.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
eviewed-by: Chao Yu <chao@kernel.org>
Link: https://lore.kernel.org/r/20230815094849.53249-3-mengferry@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/super.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index e5fb1775dd02..91ed1460613b 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -19,7 +19,6 @@
 #include <trace/events/erofs.h>
 
 static struct kmem_cache *erofs_inode_cachep __read_mostly;
-struct file_system_type erofs_fs_type;
 
 void _erofs_err(struct super_block *sb, const char *func, const char *fmt, ...)
 {

From 8b00be163f7b57cbf957b3d27b5a7ca1e2495cfa Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 17 Aug 2023 16:28:06 +0800
Subject: [PATCH 1829/2047] erofs: simplify z_erofs_read_fragment()

A trivial cleanup to make the fragment handling logic more clear.

Reviewed-by: Yue Hu <huyue2@coolpad.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230817082813.81180-1-hsiangkao@linux.alibaba.com
---
 fs/erofs/zdata.c | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 53820271e538..dc104add0a99 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -932,34 +932,27 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
 	return true;
 }
 
-static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos,
-				 struct page *page, unsigned int pageofs,
-				 unsigned int len)
+static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
+			unsigned int cur, unsigned int end, erofs_off_t pos)
 {
-	struct super_block *sb = inode->i_sb;
-	struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode;
+	struct inode *packed_inode = EROFS_SB(sb)->packed_inode;
 	struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
-	u8 *src, *dst;
-	unsigned int i, cnt;
+	unsigned int cnt;
+	u8 *src;
 
 	if (!packed_inode)
 		return -EFSCORRUPTED;
 
 	buf.inode = packed_inode;
-	pos += EROFS_I(inode)->z_fragmentoff;
-	for (i = 0; i < len; i += cnt) {
-		cnt = min_t(unsigned int, len - i,
+	for (; cur < end; cur += cnt, pos += cnt) {
+		cnt = min_t(unsigned int, end - cur,
 			    sb->s_blocksize - erofs_blkoff(sb, pos));
 		src = erofs_bread(&buf, erofs_blknr(sb, pos), EROFS_KMAP);
 		if (IS_ERR(src)) {
 			erofs_put_metabuf(&buf);
 			return PTR_ERR(src);
 		}
-
-		dst = kmap_local_page(page);
-		memcpy(dst + pageofs + i, src + erofs_blkoff(sb, pos), cnt);
-		kunmap_local(dst);
-		pos += cnt;
+		memcpy_to_page(page, cur, src + erofs_blkoff(sb, pos), cnt);
 	}
 	erofs_put_metabuf(&buf);
 	return 0;
@@ -972,7 +965,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
 	struct erofs_map_blocks *const map = &fe->map;
 	const loff_t offset = page_offset(page);
 	bool tight = true, exclusive;
-	unsigned int cur, end, spiltted;
+	unsigned int cur, end, len, spiltted;
 	int err = 0;
 
 	/* register locked file pages as online pages in pack */
@@ -1041,17 +1034,11 @@ hitted:
 		goto next_part;
 	}
 	if (map->m_flags & EROFS_MAP_FRAGMENT) {
-		unsigned int pageofs, skip, len;
+		erofs_off_t fpos = offset + cur - map->m_la;
 
-		if (offset > map->m_la) {
-			pageofs = 0;
-			skip = offset - map->m_la;
-		} else {
-			pageofs = map->m_la & ~PAGE_MASK;
-			skip = 0;
-		}
-		len = min_t(unsigned int, map->m_llen - skip, end - cur);
-		err = z_erofs_read_fragment(inode, skip, page, pageofs, len);
+		len = min_t(unsigned int, map->m_llen - fpos, end - cur);
+		err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len,
+				EROFS_I(inode)->z_fragmentoff + fpos);
 		if (err)
 			goto out;
 		++spiltted;

From dcba1b232e26ebadbd215728199455d38a59253e Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 17 Aug 2023 16:28:07 +0800
Subject: [PATCH 1830/2047] erofs: avoid obsolete {collector,collection} terms

{collector,collection} were once reserved in order to indicate different
runtime logical extent instance of multi-reference pclusters.

However, de-duplicated decompression has been landed in a more flexable
way, thus `struct z_erofs_collection` was formally removed in commit
87ca34a7065d ("erofs: get rid of `struct z_erofs_collection'").

Let's handle the remaining leftovers, for example:
    `z_erofs_collector_begin` => `z_erofs_pcluster_begin`
    `z_erofs_collector_end` => `z_erofs_pcluster_end`

as well as some comments.  No logic changes.

Reviewed-by: Yue Hu <huyue2@coolpad.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230817082813.81180-2-hsiangkao@linux.alibaba.com
---
 fs/erofs/zdata.c | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index dc104add0a99..4ed99346c4e1 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -507,19 +507,17 @@ enum z_erofs_pclustermode {
 	 */
 	Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE,
 	/*
-	 * The current collection has been linked with the owned chain, and
-	 * could also be linked with the remaining collections, which means
-	 * if the processing page is the tail page of the collection, thus
-	 * the current collection can safely use the whole page (since
-	 * the previous collection is under control) for in-place I/O, as
-	 * illustrated below:
-	 *  ________________________________________________________________
-	 * |  tail (partial) page |          head (partial) page           |
-	 * |  (of the current cl) |      (of the previous collection)      |
-	 * |                      |                                        |
-	 * |__PCLUSTER_FOLLOWED___|___________PCLUSTER_FOLLOWED____________|
+	 * The pcluster was just linked to a decompression chain by us.  It can
+	 * also be linked with the remaining pclusters, which means if the
+	 * processing page is the tail page of a pcluster, this pcluster can
+	 * safely use the whole page (since the previous pcluster is within the
+	 * same chain) for in-place I/O, as illustrated below:
+	 *  ___________________________________________________
+	 * |  tail (partial) page  |    head (partial) page    |
+	 * |  (of the current pcl) |   (of the previous pcl)   |
+	 * |___PCLUSTER_FOLLOWED___|_____PCLUSTER_FOLLOWED_____|
 	 *
-	 * [  (*) the above page can be used as inplace I/O.               ]
+	 * [  (*) the page above can be used as inplace I/O.   ]
 	 */
 	Z_EROFS_PCLUSTER_FOLLOWED,
 };
@@ -851,7 +849,7 @@ err_out:
 	return err;
 }
 
-static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
+static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
 {
 	struct erofs_map_blocks *map = &fe->map;
 	struct erofs_workgroup *grp = NULL;
@@ -908,12 +906,12 @@ void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
 	call_rcu(&pcl->rcu, z_erofs_rcu_callback);
 }
 
-static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
+static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
 {
 	struct z_erofs_pcluster *pcl = fe->pcl;
 
 	if (!pcl)
-		return false;
+		return;
 
 	z_erofs_bvec_iter_end(&fe->biter);
 	mutex_unlock(&pcl->lock);
@@ -929,7 +927,7 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
 		erofs_workgroup_put(&pcl->obj);
 
 	fe->pcl = NULL;
-	return true;
+	fe->backmost = false;
 }
 
 static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
@@ -978,8 +976,7 @@ repeat:
 
 	if (offset + cur < map->m_la ||
 	    offset + cur >= map->m_la + map->m_llen) {
-		if (z_erofs_collector_end(fe))
-			fe->backmost = false;
+		z_erofs_pcluster_end(fe);
 		map->m_la = offset + cur;
 		map->m_llen = 0;
 		err = z_erofs_map_blocks_iter(inode, map, 0);
@@ -995,7 +992,7 @@ repeat:
 	    map->m_flags & EROFS_MAP_FRAGMENT)
 		goto hitted;
 
-	err = z_erofs_collector_begin(fe);
+	err = z_erofs_pcluster_begin(fe);
 	if (err)
 		goto out;
 
@@ -1862,7 +1859,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
 	z_erofs_pcluster_readmore(&f, NULL, true);
 	err = z_erofs_do_read_page(&f, page);
 	z_erofs_pcluster_readmore(&f, NULL, false);
-	(void)z_erofs_collector_end(&f);
+	z_erofs_pcluster_end(&f);
 
 	/* if some compressed cluster ready, need submit them anyway */
 	z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0), false);
@@ -1909,7 +1906,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
 		put_page(page);
 	}
 	z_erofs_pcluster_readmore(&f, rac, false);
-	(void)z_erofs_collector_end(&f);
+	z_erofs_pcluster_end(&f);
 
 	z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_pages), true);
 	erofs_put_metabuf(&f.map.buf);

From aeebae9d77217709f8ae3edb0cd7858ec8c7a9d6 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 17 Aug 2023 16:28:08 +0800
Subject: [PATCH 1831/2047] erofs: move preparation logic into
 z_erofs_pcluster_begin()

Some preparation logic should be part of z_erofs_pcluster_begin()
instead of z_erofs_do_read_page().  Let's move now.

Reviewed-by: Yue Hu <huyue2@coolpad.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230817082813.81180-3-hsiangkao@linux.alibaba.com
---
 fs/erofs/zdata.c | 60 ++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 33 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 4ed99346c4e1..9b67991edc7b 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -852,6 +852,8 @@ err_out:
 static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
 {
 	struct erofs_map_blocks *map = &fe->map;
+	struct super_block *sb = fe->inode->i_sb;
+	erofs_blk_t blknr = erofs_blknr(sb, map->m_pa);
 	struct erofs_workgroup *grp = NULL;
 	int ret;
 
@@ -861,8 +863,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
 	DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
 
 	if (!(map->m_flags & EROFS_MAP_META)) {
-		grp = erofs_find_workgroup(fe->inode->i_sb,
-					   map->m_pa >> PAGE_SHIFT);
+		grp = erofs_find_workgroup(sb, blknr);
 	} else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
 		DBG_BUGON(1);
 		return -EFSCORRUPTED;
@@ -881,9 +882,26 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
 	} else if (ret) {
 		return ret;
 	}
+
 	z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
 				Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
-	/* since file-backed online pages are traversed in reverse order */
+	if (!z_erofs_is_inline_pcluster(fe->pcl)) {
+		/* bind cache first when cached decompression is preferred */
+		z_erofs_bind_cache(fe);
+	} else {
+		void *mptr;
+
+		mptr = erofs_read_metabuf(&map->buf, sb, blknr, EROFS_NO_KMAP);
+		if (IS_ERR(mptr)) {
+			ret = PTR_ERR(mptr);
+			erofs_err(sb, "failed to get inline data %d", ret);
+			return ret;
+		}
+		get_page(map->buf.page);
+		WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page);
+		fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
+	}
+	/* file-backed inplace I/O pages are traversed in reverse order */
 	fe->icur = z_erofs_pclusterpages(fe->pcl);
 	return 0;
 }
@@ -982,39 +1000,15 @@ repeat:
 		err = z_erofs_map_blocks_iter(inode, map, 0);
 		if (err)
 			goto out;
-	} else {
-		if (fe->pcl)
-			goto hitted;
-		/* didn't get a valid pcluster previously (very rare) */
+	} else if (fe->pcl) {
+		goto hitted;
 	}
 
-	if (!(map->m_flags & EROFS_MAP_MAPPED) ||
-	    map->m_flags & EROFS_MAP_FRAGMENT)
-		goto hitted;
-
-	err = z_erofs_pcluster_begin(fe);
-	if (err)
-		goto out;
-
-	if (z_erofs_is_inline_pcluster(fe->pcl)) {
-		void *mp;
-
-		mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb,
-					erofs_blknr(inode->i_sb, map->m_pa),
-					EROFS_NO_KMAP);
-		if (IS_ERR(mp)) {
-			err = PTR_ERR(mp);
-			erofs_err(inode->i_sb,
-				  "failed to get inline page, err %d", err);
+	if ((map->m_flags & EROFS_MAP_MAPPED) &&
+	    !(map->m_flags & EROFS_MAP_FRAGMENT)) {
+		err = z_erofs_pcluster_begin(fe);
+		if (err)
 			goto out;
-		}
-		get_page(fe->map.buf.page);
-		WRITE_ONCE(fe->pcl->compressed_bvecs[0].page,
-			   fe->map.buf.page);
-		fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
-	} else {
-		/* bind cache first when cached decompression is preferred */
-		z_erofs_bind_cache(fe);
 	}
 hitted:
 	/*

From e4c1cf523d820730a86cae2c6d55924833b6f7ac Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 17 Aug 2023 16:28:09 +0800
Subject: [PATCH 1832/2047] erofs: tidy up z_erofs_do_read_page()

 - Fix a typo: spiltted => split;

 - Move !EROFS_MAP_MAPPED and EROFS_MAP_FRAGMENT upwards;

 - Increase `split` in advance to avoid unnecessary repeats.

Reviewed-by: Yue Hu <huyue2@coolpad.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230817082813.81180-4-hsiangkao@linux.alibaba.com
---
 fs/erofs/zdata.c | 53 ++++++++++++++++++++++--------------------------
 1 file changed, 24 insertions(+), 29 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 9b67991edc7b..6c78566efb92 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -981,49 +981,34 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
 	struct erofs_map_blocks *const map = &fe->map;
 	const loff_t offset = page_offset(page);
 	bool tight = true, exclusive;
-	unsigned int cur, end, len, spiltted;
+	unsigned int cur, end, len, split;
 	int err = 0;
 
-	/* register locked file pages as online pages in pack */
 	z_erofs_onlinepage_init(page);
 
-	spiltted = 0;
+	split = 0;
 	end = PAGE_SIZE;
 repeat:
-	cur = end - 1;
-
-	if (offset + cur < map->m_la ||
-	    offset + cur >= map->m_la + map->m_llen) {
+	if (offset + end - 1 < map->m_la ||
+	    offset + end - 1 >= map->m_la + map->m_llen) {
 		z_erofs_pcluster_end(fe);
-		map->m_la = offset + cur;
+		map->m_la = offset + end - 1;
 		map->m_llen = 0;
 		err = z_erofs_map_blocks_iter(inode, map, 0);
 		if (err)
 			goto out;
-	} else if (fe->pcl) {
-		goto hitted;
 	}
 
-	if ((map->m_flags & EROFS_MAP_MAPPED) &&
-	    !(map->m_flags & EROFS_MAP_FRAGMENT)) {
-		err = z_erofs_pcluster_begin(fe);
-		if (err)
-			goto out;
-	}
-hitted:
-	/*
-	 * Ensure the current partial page belongs to this submit chain rather
-	 * than other concurrent submit chains or the noio(bypass) chain since
-	 * those chains are handled asynchronously thus the page cannot be used
-	 * for inplace I/O or bvpage (should be processed in a strict order.)
-	 */
-	tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
+	cur = offset > map->m_la ? 0 : map->m_la - offset;
+	/* bump split parts first to avoid several separate cases */
+	++split;
 
-	cur = end - min_t(erofs_off_t, offset + end - map->m_la, end);
 	if (!(map->m_flags & EROFS_MAP_MAPPED)) {
 		zero_user_segment(page, cur, end);
+		tight = false;
 		goto next_part;
 	}
+
 	if (map->m_flags & EROFS_MAP_FRAGMENT) {
 		erofs_off_t fpos = offset + cur - map->m_la;
 
@@ -1032,12 +1017,24 @@ hitted:
 				EROFS_I(inode)->z_fragmentoff + fpos);
 		if (err)
 			goto out;
-		++spiltted;
 		tight = false;
 		goto next_part;
 	}
 
-	exclusive = (!cur && (!spiltted || tight));
+	if (!fe->pcl) {
+		err = z_erofs_pcluster_begin(fe);
+		if (err)
+			goto out;
+	}
+
+	/*
+	 * Ensure the current partial page belongs to this submit chain rather
+	 * than other concurrent submit chains or the noio(bypass) chain since
+	 * those chains are handled asynchronously thus the page cannot be used
+	 * for inplace I/O or bvpage (should be processed in a strict order.)
+	 */
+	tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
+	exclusive = (!cur && ((split <= 1) || tight));
 	if (cur)
 		tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
 
@@ -1050,8 +1047,6 @@ hitted:
 		goto out;
 
 	z_erofs_onlinepage_split(page);
-	/* bump up the number of spiltted parts of a page */
-	++spiltted;
 	if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
 		fe->pcl->multibases = true;
 	if (fe->pcl->length < offset + end - map->m_la) {

From 9a05c6a8bc26138d34e87b39e6a815603bc2a66c Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 17 Aug 2023 16:28:10 +0800
Subject: [PATCH 1833/2047] erofs: drop z_erofs_page_mark_eio()

It can be folded into z_erofs_onlinepage_endio() to simplify the code.

Reviewed-by: Yue Hu <huyue2@coolpad.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230817082813.81180-5-hsiangkao@linux.alibaba.com
---
 fs/erofs/zdata.c | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 6c78566efb92..82834e302fb4 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -143,22 +143,17 @@ static inline void z_erofs_onlinepage_split(struct page *page)
 	atomic_inc((atomic_t *)&page->private);
 }
 
-static inline void z_erofs_page_mark_eio(struct page *page)
+static void z_erofs_onlinepage_endio(struct page *page, int err)
 {
-	int orig;
+	int orig, v;
+
+	DBG_BUGON(!PagePrivate(page));
 
 	do {
 		orig = atomic_read((atomic_t *)&page->private);
-	} while (atomic_cmpxchg((atomic_t *)&page->private, orig,
-				orig | Z_EROFS_PAGE_EIO) != orig);
-}
+		v = (orig - 1) | (err ? Z_EROFS_PAGE_EIO : 0);
+	} while (atomic_cmpxchg((atomic_t *)&page->private, orig, v) != orig);
 
-static inline void z_erofs_onlinepage_endio(struct page *page)
-{
-	unsigned int v;
-
-	DBG_BUGON(!PagePrivate(page));
-	v = atomic_dec_return((atomic_t *)&page->private);
 	if (!(v & ~Z_EROFS_PAGE_EIO)) {
 		set_page_private(page, 0);
 		ClearPagePrivate(page);
@@ -1067,9 +1062,7 @@ next_part:
 		goto repeat;
 
 out:
-	if (err)
-		z_erofs_page_mark_eio(page);
-	z_erofs_onlinepage_endio(page);
+	z_erofs_onlinepage_endio(page, err);
 	return err;
 }
 
@@ -1172,9 +1165,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
 			cur += len;
 		}
 		kunmap_local(dst);
-		if (err)
-			z_erofs_page_mark_eio(bvi->bvec.page);
-		z_erofs_onlinepage_endio(bvi->bvec.page);
+		z_erofs_onlinepage_endio(bvi->bvec.page, err);
 		list_del(p);
 		kfree(bvi);
 	}
@@ -1345,9 +1336,7 @@ out:
 		/* recycle all individual short-lived pages */
 		if (z_erofs_put_shortlivedpage(be->pagepool, page))
 			continue;
-		if (err)
-			z_erofs_page_mark_eio(page);
-		z_erofs_onlinepage_endio(page);
+		z_erofs_onlinepage_endio(page, err);
 	}
 
 	if (be->decompressed_pages != be->onstack_pages)

From 06ec03660d819fdb88692d62a86464daf6ddb0a5 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 17 Aug 2023 16:28:11 +0800
Subject: [PATCH 1834/2047] erofs: get rid of fe->backmost for cache
 decompression

EROFS_MAP_FULL_MAPPED is more accurate to decide if caching the last
incomplete pcluster for later read or not.

Reviewed-by: Yue Hu <huyue2@coolpad.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230817082813.81180-6-hsiangkao@linux.alibaba.com
---
 fs/erofs/zdata.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 82834e302fb4..7e55e95bc33a 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -528,8 +528,6 @@ struct z_erofs_decompress_frontend {
 	z_erofs_next_pcluster_t owned_head;
 	enum z_erofs_pclustermode mode;
 
-	/* used for applying cache strategy on the fly */
-	bool backmost;
 	erofs_off_t headoffset;
 
 	/* a pointer used to pick up inplace I/O pages */
@@ -538,7 +536,7 @@ struct z_erofs_decompress_frontend {
 
 #define DECOMPRESS_FRONTEND_INIT(__i) { \
 	.inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
-	.mode = Z_EROFS_PCLUSTER_FOLLOWED, .backmost = true }
+	.mode = Z_EROFS_PCLUSTER_FOLLOWED }
 
 static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
 {
@@ -547,7 +545,7 @@ static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
 	if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED)
 		return false;
 
-	if (fe->backmost)
+	if (!(fe->map.m_flags & EROFS_MAP_FULL_MAPPED))
 		return true;
 
 	if (cachestrategy >= EROFS_ZIP_CACHE_READAROUND &&
@@ -940,7 +938,6 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
 		erofs_workgroup_put(&pcl->obj);
 
 	fe->pcl = NULL;
-	fe->backmost = false;
 }
 
 static int z_erofs_read_fragment(struct super_block *sb, struct page *page,

From 491b1105a8e25d9d26ea321839e057a752e31383 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 17 Aug 2023 16:28:12 +0800
Subject: [PATCH 1835/2047] erofs: adapt folios for z_erofs_readahead()

It's a straight-forward conversion except that readahead_folio()
will do folio_put() in advance but it doesn't matter since folios
are still locked.

As before, since file-backed folios (pages for now) are locked, so
we could temporarily use folio->private as an internal counter to
indicate split parts of each folio for the corresponding pclusters
to decompress.

When such counter becomes zero, the folio will be finally unlocked
(see compress.h and z_erofs_onlinepage_endio()).

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230817082813.81180-7-hsiangkao@linux.alibaba.com
---
 fs/erofs/zdata.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 7e55e95bc33a..78631d113d98 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1853,37 +1853,35 @@ static void z_erofs_readahead(struct readahead_control *rac)
 	struct inode *const inode = rac->mapping->host;
 	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
 	struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
-	struct page *head = NULL, *page;
-	unsigned int nr_pages;
+	struct folio *head = NULL, *folio;
+	unsigned int nr_folios;
+	int err;
 
 	f.headoffset = readahead_pos(rac);
 
 	z_erofs_pcluster_readmore(&f, rac, true);
-	nr_pages = readahead_count(rac);
-	trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
+	nr_folios = readahead_count(rac);
+	trace_erofs_readpages(inode, readahead_index(rac), nr_folios, false);
 
-	while ((page = readahead_page(rac))) {
-		set_page_private(page, (unsigned long)head);
-		head = page;
+	while ((folio = readahead_folio(rac))) {
+		folio->private = head;
+		head = folio;
 	}
 
+	/* traverse in reverse order for best metadata I/O performance */
 	while (head) {
-		struct page *page = head;
-		int err;
+		folio = head;
+		head = folio_get_private(folio);
 
-		/* traversal in reverse order */
-		head = (void *)page_private(page);
-
-		err = z_erofs_do_read_page(&f, page);
+		err = z_erofs_do_read_page(&f, &folio->page);
 		if (err && err != -EINTR)
-			erofs_err(inode->i_sb, "readahead error %d @ %lu of nid %llu",
-				  err, page->index, EROFS_I(inode)->nid);
-		put_page(page);
+			erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
+				  folio->index, EROFS_I(inode)->nid);
 	}
 	z_erofs_pcluster_readmore(&f, rac, false);
 	z_erofs_pcluster_end(&f);
 
-	z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_pages), true);
+	z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_folios), true);
 	erofs_put_metabuf(&f.map.buf);
 	erofs_release_pages(&f.pagepool);
 }

From c33ad3b2b7100ac944aad38d3ebc89cb5f654e94 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 17 Aug 2023 16:39:42 +0800
Subject: [PATCH 1836/2047] erofs: adapt folios for z_erofs_read_folio()

It's a straight-forward conversion and no logic changes (except that
it renames the corresponding tracepoint.)

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230817083942.103303-1-hsiangkao@linux.alibaba.com
---
 fs/erofs/zdata.c             |  9 ++++-----
 include/trace/events/erofs.h | 16 ++++++++--------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 78631d113d98..da52b1cccd38 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1822,17 +1822,16 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
 
 static int z_erofs_read_folio(struct file *file, struct folio *folio)
 {
-	struct page *page = &folio->page;
-	struct inode *const inode = page->mapping->host;
+	struct inode *const inode = folio->mapping->host;
 	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
 	struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
 	int err;
 
-	trace_erofs_readpage(page, false);
-	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
+	trace_erofs_read_folio(folio, false);
+	f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
 
 	z_erofs_pcluster_readmore(&f, NULL, true);
-	err = z_erofs_do_read_page(&f, page);
+	err = z_erofs_do_read_page(&f, &folio->page);
 	z_erofs_pcluster_readmore(&f, NULL, false);
 	z_erofs_pcluster_end(&f);
 
diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index 71dbe8bfa7db..e18684b02c3d 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -80,11 +80,11 @@ TRACE_EVENT(erofs_fill_inode,
 		  __entry->blkaddr, __entry->ofs)
 );
 
-TRACE_EVENT(erofs_readpage,
+TRACE_EVENT(erofs_read_folio,
 
-	TP_PROTO(struct page *page, bool raw),
+	TP_PROTO(struct folio *folio, bool raw),
 
-	TP_ARGS(page, raw),
+	TP_ARGS(folio, raw),
 
 	TP_STRUCT__entry(
 		__field(dev_t,		dev	)
@@ -96,11 +96,11 @@ TRACE_EVENT(erofs_readpage,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= page->mapping->host->i_sb->s_dev;
-		__entry->nid	= EROFS_I(page->mapping->host)->nid;
-		__entry->dir	= S_ISDIR(page->mapping->host->i_mode);
-		__entry->index	= page->index;
-		__entry->uptodate = PageUptodate(page);
+		__entry->dev	= folio->mapping->host->i_sb->s_dev;
+		__entry->nid	= EROFS_I(folio->mapping->host)->nid;
+		__entry->dir	= S_ISDIR(folio->mapping->host->i_mode);
+		__entry->index	= folio->index;
+		__entry->uptodate = folio_test_uptodate(folio);
 		__entry->raw = raw;
 	),
 

From 5ec693ca70dd88db20094cab9aa045852363b93f Mon Sep 17 00:00:00 2001
From: sunshijie <sunshijie@xiaomi.com>
Date: Mon, 21 Aug 2023 23:21:16 +0800
Subject: [PATCH 1837/2047] erofs: don't warn dedupe and fragments features
 anymore

The `dedupe` and `fragments` features have been merged for a year.
They are mostly stable now.

Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: sunshijie <sunshijie@xiaomi.com>
Link: https://lore.kernel.org/r/20230821041737.2673401-1-sunshijie@xiaomi.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/super.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 91ed1460613b..44a24d573f1f 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -418,10 +418,6 @@ static int erofs_read_superblock(struct super_block *sb)
 
 	if (erofs_is_fscache_mode(sb))
 		erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!");
-	if (erofs_sb_has_fragments(sbi))
-		erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!");
-	if (erofs_sb_has_dedupe(sbi))
-		erofs_info(sb, "EXPERIMENTAL global deduplication feature in use. Use at your own risk!");
 out:
 	erofs_put_metabuf(&buf);
 	return ret;

From 91b1ad0815fbb1095c8b9e8a2bf4201186afe304 Mon Sep 17 00:00:00 2001
From: Jingbo Xu <jefflexu@linux.alibaba.com>
Date: Tue, 22 Aug 2023 19:05:30 +0800
Subject: [PATCH 1838/2047] erofs: release ztailpacking pclusters properly

Currently ztailpacking pclusters are chained with FOLLOWED_NOINPLACE and
not recorded into the managed_pslots XArray.

After commit 7674a42f35ea ("erofs: use struct lockref to replace
handcrafted approach"), ztailpacking pclusters won't be freed with
erofs_workgroup_put() anymore, which will cause the following issue:

BUG erofs_pcluster-1 (Tainted: G           OE     ): Objects remaining in erofs_pcluster-1 on __kmem_cache_shutdown()

Use z_erofs_free_pcluster() directly to free ztailpacking pclusters.

Fixes: 7674a42f35ea ("erofs: use struct lockref to replace handcrafted approach")
Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Link: https://lore.kernel.org/r/20230822110530.96831-1-jefflexu@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/zdata.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index da52b1cccd38..036f610e044b 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1369,7 +1369,10 @@ static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
 		owned = READ_ONCE(be.pcl->next);
 
 		z_erofs_decompress_pcluster(&be, io->eio ? -EIO : 0);
-		erofs_workgroup_put(&be.pcl->obj);
+		if (z_erofs_is_inline_pcluster(be.pcl))
+			z_erofs_free_pcluster(be.pcl);
+		else
+			erofs_workgroup_put(&be.pcl->obj);
 	}
 }
 

From 1d0eb6143c1e85d3f9a3f5a616ee7e5dc351d33b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Aug 2023 18:16:25 +0200
Subject: [PATCH 1839/2047] ALSA: ymfpci: Fix the missing snd_card_free() call
 at probe error

Like a few other drivers, YMFPCI driver needs to clean up with
snd_card_free() call at an error path of the probe; otherwise the
other devres resources are released before the card and it results in
the UAF.

This patch uses the helper for handling the probe error gracefully.

Fixes: f33fc1576757 ("ALSA: ymfpci: Create card with device-managed snd_devm_card_new()")
Cc: <stable@vger.kernel.org>
Reported-and-tested-by: Takashi Yano <takashi.yano@nifty.ne.jp>
Closes: https://lore.kernel.org/r/20230823135846.1812-1-takashi.yano@nifty.ne.jp
Link: https://lore.kernel.org/r/20230823161625.5807-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/ymfpci/ymfpci.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sound/pci/ymfpci/ymfpci.c b/sound/pci/ymfpci/ymfpci.c
index b033bd290940..48444dda44de 100644
--- a/sound/pci/ymfpci/ymfpci.c
+++ b/sound/pci/ymfpci/ymfpci.c
@@ -152,8 +152,8 @@ static inline int snd_ymfpci_create_gameport(struct snd_ymfpci *chip, int dev, i
 void snd_ymfpci_free_gameport(struct snd_ymfpci *chip) { }
 #endif /* SUPPORT_JOYSTICK */
 
-static int snd_card_ymfpci_probe(struct pci_dev *pci,
-				 const struct pci_device_id *pci_id)
+static int __snd_card_ymfpci_probe(struct pci_dev *pci,
+				   const struct pci_device_id *pci_id)
 {
 	static int dev;
 	struct snd_card *card;
@@ -348,6 +348,12 @@ static int snd_card_ymfpci_probe(struct pci_dev *pci,
 	return 0;
 }
 
+static int snd_card_ymfpci_probe(struct pci_dev *pci,
+				 const struct pci_device_id *pci_id)
+{
+	return snd_card_free_on_error(&pci->dev, __snd_card_ymfpci_probe(pci, pci_id));
+}
+
 static struct pci_driver ymfpci_driver = {
 	.name = KBUILD_MODNAME,
 	.id_table = snd_ymfpci_ids,

From 78aabcb3211aa4b7e8f8a1d5d4bdad699a42d4ba Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 22 Aug 2023 13:28:02 +0200
Subject: [PATCH 1840/2047] cpuidle: teo: Avoid unnecessary variable
 assignments

Notice that it is not necessary to assign tick_intercept_sum in every
iteration of the first loop over idle states in teo_select(), because
the intercept_sum value does not change after the assignment in a
given iteration of the loop, so its value after the last iteration of
the loop can be used for computing the tick_intercept_sum value
directly.

Modify the code accordingly.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 31050fdc633f..7244f71c59c5 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -450,8 +450,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		hit_sum += prev_bin->hits;
 		recent_sum += prev_bin->recent;
 
-		tick_intercept_sum = intercept_sum;
-
 		if (dev->states_usage[i].disable)
 			continue;
 
@@ -484,7 +482,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		goto end;
 	}
 
-	tick_intercept_sum += cpu_data->state_bins[drv->state_count-1].intercepts;
+	tick_intercept_sum = intercept_sum +
+			cpu_data->state_bins[drv->state_count-1].intercepts;
 
 	/*
 	 * If the sum of the intercepts metric for all of the idle states

From 14abdfae508228a7307f7491b5c4215ae70c6542 Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Fri, 16 Jun 2023 15:09:34 -0400
Subject: [PATCH 1841/2047] drm/vmwgfx: Fix shader stage validation

For multiple commands the driver was not correctly validating the shader
stages resulting in possible kernel oopses. The validation code was only.
if ever, checking the upper bound on the shader stages but never a lower
bound (valid shader stages start at 1 not 0).

Fixes kernel oopses ending up in vmw_binding_add, e.g.:
Oops: 0000 [#1] PREEMPT SMP PTI
CPU: 1 PID: 2443 Comm: testcase Not tainted 6.3.0-rc4-vmwgfx #1
Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020
RIP: 0010:vmw_binding_add+0x4c/0x140 [vmwgfx]
Code: 7e 30 49 83 ff 0e 0f 87 ea 00 00 00 4b 8d 04 7f 89 d2 89 cb 48 c1 e0 03 4c 8b b0 40 3d 93 c0 48 8b 80 48 3d 93 c0 49 0f af de <48> 03 1c d0 4c 01 e3 49 8>
RSP: 0018:ffffb8014416b968 EFLAGS: 00010206
RAX: ffffffffc0933ec0 RBX: 0000000000000000 RCX: 0000000000000000
RDX: 00000000ffffffff RSI: ffffb8014416b9c0 RDI: ffffb8014316f000
RBP: ffffb8014416b998 R08: 0000000000000003 R09: 746f6c735f726564
R10: ffffffffaaf2bda0 R11: 732e676e69646e69 R12: ffffb8014316f000
R13: ffffb8014416b9c0 R14: 0000000000000040 R15: 0000000000000006
FS:  00007fba8c0af740(0000) GS:ffff8a1277c80000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000007c0933eb8 CR3: 0000000118244001 CR4: 00000000003706e0
Call Trace:
 <TASK>
 vmw_view_bindings_add+0xf5/0x1b0 [vmwgfx]
 ? ___drm_dbg+0x8a/0xb0 [drm]
 vmw_cmd_dx_set_shader_res+0x8f/0xc0 [vmwgfx]
 vmw_execbuf_process+0x590/0x1360 [vmwgfx]
 vmw_execbuf_ioctl+0x173/0x370 [vmwgfx]
 ? __drm_dev_dbg+0xb4/0xe0 [drm]
 ? __pfx_vmw_execbuf_ioctl+0x10/0x10 [vmwgfx]
 drm_ioctl_kernel+0xbc/0x160 [drm]
 drm_ioctl+0x2d2/0x580 [drm]
 ? __pfx_vmw_execbuf_ioctl+0x10/0x10 [vmwgfx]
 ? do_fault+0x1a6/0x420
 vmw_generic_ioctl+0xbd/0x180 [vmwgfx]
 vmw_unlocked_ioctl+0x19/0x20 [vmwgfx]
 __x64_sys_ioctl+0x96/0xd0
 do_syscall_64+0x5d/0x90
 ? handle_mm_fault+0xe4/0x2f0
 ? debug_smp_processor_id+0x1b/0x30
 ? fpregs_assert_state_consistent+0x2e/0x50
 ? exit_to_user_mode_prepare+0x40/0x180
 ? irqentry_exit_to_user_mode+0xd/0x20
 ? irqentry_exit+0x3f/0x50
 ? exc_page_fault+0x8b/0x180
 entry_SYSCALL_64_after_hwframe+0x72/0xdc

Signed-off-by: Zack Rusin <zackr@vmware.com>
Cc: security@openanolis.org
Reported-by: Ziming Zhang <ezrakiez@gmail.com>
Testcase-found-by: Niels De Graef <ndegraef@redhat.com>
Fixes: d80efd5cb3de ("drm/vmwgfx: Initial DX support")
Cc: <stable@vger.kernel.org> # v4.3+
Reviewed-by: Maaz Mombasawala<mombasawalam@vmware.com>
Reviewed-by: Martin Krastev <krastevm@vmware.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230616190934.54828-1-zack@kde.org
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h     | 12 ++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 29 ++++++++++---------------
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 3810a9984a7f..58bfdf203eca 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1513,4 +1513,16 @@ static inline bool vmw_has_fences(struct vmw_private *vmw)
 	return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0;
 }
 
+static inline bool vmw_shadertype_is_valid(enum vmw_sm_type shader_model,
+					   u32 shader_type)
+{
+	SVGA3dShaderType max_allowed = SVGA3D_SHADERTYPE_PREDX_MAX;
+
+	if (shader_model >= VMW_SM_5)
+		max_allowed = SVGA3D_SHADERTYPE_MAX;
+	else if (shader_model >= VMW_SM_4)
+		max_allowed = SVGA3D_SHADERTYPE_DX10_MAX;
+	return shader_type >= SVGA3D_SHADERTYPE_MIN && shader_type < max_allowed;
+}
+
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 6b9aa2b4ef54..d30c0e3d3ab7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -1992,7 +1992,7 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv,
 
 	cmd = container_of(header, typeof(*cmd), header);
 
-	if (cmd->body.type >= SVGA3D_SHADERTYPE_PREDX_MAX) {
+	if (!vmw_shadertype_is_valid(VMW_SM_LEGACY, cmd->body.type)) {
 		VMW_DEBUG_USER("Illegal shader type %u.\n",
 			       (unsigned int) cmd->body.type);
 		return -EINVAL;
@@ -2115,8 +2115,6 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
 				      SVGA3dCmdHeader *header)
 {
 	VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetSingleConstantBuffer);
-	SVGA3dShaderType max_shader_num = has_sm5_context(dev_priv) ?
-		SVGA3D_NUM_SHADERTYPE : SVGA3D_NUM_SHADERTYPE_DX10;
 
 	struct vmw_resource *res = NULL;
 	struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context);
@@ -2133,6 +2131,14 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		return ret;
 
+	if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type) ||
+	    cmd->body.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) {
+		VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n",
+			       (unsigned int) cmd->body.type,
+			       (unsigned int) cmd->body.slot);
+		return -EINVAL;
+	}
+
 	binding.bi.ctx = ctx_node->ctx;
 	binding.bi.res = res;
 	binding.bi.bt = vmw_ctx_binding_cb;
@@ -2141,14 +2147,6 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
 	binding.size = cmd->body.sizeInBytes;
 	binding.slot = cmd->body.slot;
 
-	if (binding.shader_slot >= max_shader_num ||
-	    binding.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) {
-		VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n",
-			       (unsigned int) cmd->body.type,
-			       (unsigned int) binding.slot);
-		return -EINVAL;
-	}
-
 	vmw_binding_add(ctx_node->staged, &binding.bi, binding.shader_slot,
 			binding.slot);
 
@@ -2207,15 +2205,13 @@ static int vmw_cmd_dx_set_shader_res(struct vmw_private *dev_priv,
 {
 	VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShaderResources) =
 		container_of(header, typeof(*cmd), header);
-	SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ?
-		SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX;
 
 	u32 num_sr_view = (cmd->header.size - sizeof(cmd->body)) /
 		sizeof(SVGA3dShaderResourceViewId);
 
 	if ((u64) cmd->body.startView + (u64) num_sr_view >
 	    (u64) SVGA3D_DX_MAX_SRVIEWS ||
-	    cmd->body.type >= max_allowed) {
+	    !vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) {
 		VMW_DEBUG_USER("Invalid shader binding.\n");
 		return -EINVAL;
 	}
@@ -2239,8 +2235,6 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv,
 				 SVGA3dCmdHeader *header)
 {
 	VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShader);
-	SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ?
-		SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX;
 	struct vmw_resource *res = NULL;
 	struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context);
 	struct vmw_ctx_bindinfo_shader binding;
@@ -2251,8 +2245,7 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv,
 
 	cmd = container_of(header, typeof(*cmd), header);
 
-	if (cmd->body.type >= max_allowed ||
-	    cmd->body.type < SVGA3D_SHADERTYPE_MIN) {
+	if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) {
 		VMW_DEBUG_USER("Illegal shader type %u.\n",
 			       (unsigned int) cmd->body.type);
 		return -EINVAL;

From f9e96bf1905479f18e83a3a4c314a8dfa56ede2c Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Fri, 18 Aug 2023 00:13:01 -0400
Subject: [PATCH 1842/2047] drm/vmwgfx: Fix possible invalid drm gem put calls

vmw_bo_unreference sets the input buffer to null on exit, resulting in
null ptr deref's on the subsequent drm gem put calls.

This went unnoticed because only very old userspace would be exercising
those paths but it wouldn't be hard to hit on old distros with brand
new kernels.

Introduce a new function that abstracts unrefing of user bo's to make
the code cleaner and more explicit.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reported-by: Ian Forbes <iforbes@vmware.com>
Fixes: 9ef8d83e8e25 ("drm/vmwgfx: Do not drop the reference to the handle too soon")
Cc: <stable@vger.kernel.org> # v6.4+
Reviewed-by: Maaz Mombasawala<mombasawalam@vmware.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230818041301.407636-1-zack@kde.org
---
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c      | 6 ++----
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.h      | 8 ++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 6 ++----
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c     | 6 ++----
 drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c | 3 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_shader.c  | 3 +--
 6 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 82094c137855..c43853597776 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -497,10 +497,9 @@ static int vmw_user_bo_synccpu_release(struct drm_file *filp,
 		if (!(flags & drm_vmw_synccpu_allow_cs)) {
 			atomic_dec(&vmw_bo->cpu_writers);
 		}
-		ttm_bo_put(&vmw_bo->tbo);
+		vmw_user_bo_unref(vmw_bo);
 	}
 
-	drm_gem_object_put(&vmw_bo->tbo.base);
 	return ret;
 }
 
@@ -540,8 +539,7 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data,
 			return ret;
 
 		ret = vmw_user_bo_synccpu_grab(vbo, arg->flags);
-		vmw_bo_unreference(&vbo);
-		drm_gem_object_put(&vbo->tbo.base);
+		vmw_user_bo_unref(vbo);
 		if (unlikely(ret != 0)) {
 			if (ret == -ERESTARTSYS || ret == -EBUSY)
 				return -EBUSY;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
index 50a836e70994..1d433fceed3d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
@@ -195,6 +195,14 @@ static inline struct vmw_bo *vmw_bo_reference(struct vmw_bo *buf)
 	return buf;
 }
 
+static inline void vmw_user_bo_unref(struct vmw_bo *vbo)
+{
+	if (vbo) {
+		ttm_bo_put(&vbo->tbo);
+		drm_gem_object_put(&vbo->tbo.base);
+	}
+}
+
 static inline struct vmw_bo *to_vmw_bo(struct drm_gem_object *gobj)
 {
 	return container_of((gobj), struct vmw_bo, tbo.base);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index d30c0e3d3ab7..98e0723ca6f5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -1164,8 +1164,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
 	}
 	vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB);
 	ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
-	ttm_bo_put(&vmw_bo->tbo);
-	drm_gem_object_put(&vmw_bo->tbo.base);
+	vmw_user_bo_unref(vmw_bo);
 	if (unlikely(ret != 0))
 		return ret;
 
@@ -1221,8 +1220,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 	vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM,
 			     VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM);
 	ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
-	ttm_bo_put(&vmw_bo->tbo);
-	drm_gem_object_put(&vmw_bo->tbo.base);
+	vmw_user_bo_unref(vmw_bo);
 	if (unlikely(ret != 0))
 		return ret;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index b62207be3363..1489ad73c103 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1665,10 +1665,8 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 
 err_out:
 	/* vmw_user_lookup_handle takes one ref so does new_fb */
-	if (bo) {
-		vmw_bo_unreference(&bo);
-		drm_gem_object_put(&bo->tbo.base);
-	}
+	if (bo)
+		vmw_user_bo_unref(bo);
 	if (surface)
 		vmw_surface_unreference(&surface);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index 7e112319a23c..fb85f244c3d0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -451,8 +451,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data,
 
 	ret = vmw_overlay_update_stream(dev_priv, buf, arg, true);
 
-	vmw_bo_unreference(&buf);
-	drm_gem_object_put(&buf->tbo.base);
+	vmw_user_bo_unref(buf);
 
 out_unlock:
 	mutex_unlock(&overlay->mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
index e7226db8b242..1e81ff2422cf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
@@ -809,8 +809,7 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv,
 				    shader_type, num_input_sig,
 				    num_output_sig, tfile, shader_handle);
 out_bad_arg:
-	vmw_bo_unreference(&buffer);
-	drm_gem_object_put(&buffer->tbo.base);
+	vmw_user_bo_unref(buffer);
 	return ret;
 }
 

From ab4109f91b328ff5cb5e1279f64d443241add2d1 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Tue, 22 Aug 2023 21:29:42 +0200
Subject: [PATCH 1843/2047] gpio: sim: dispose of irq mappings before
 destroying the irq_sim domain

If a GPIO simulator device is unbound with interrupts still requested,
we will hit a use-after-free issue in __irq_domain_deactivate_irq(). The
owner of the irq domain must dispose of all mappings before destroying
the domain object.

Fixes: cb8c474e79be ("gpio: sim: new testing module")
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-sim.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index f1f6f1c32987..8fb11a5395eb 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -291,6 +291,15 @@ static void gpio_sim_mutex_destroy(void *data)
 	mutex_destroy(lock);
 }
 
+static void gpio_sim_dispose_mappings(void *data)
+{
+	struct gpio_sim_chip *chip = data;
+	unsigned int i;
+
+	for (i = 0; i < chip->gc.ngpio; i++)
+		irq_dispose_mapping(irq_find_mapping(chip->irq_sim, i));
+}
+
 static void gpio_sim_sysfs_remove(void *data)
 {
 	struct gpio_sim_chip *chip = data;
@@ -406,6 +415,10 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev)
 	if (IS_ERR(chip->irq_sim))
 		return PTR_ERR(chip->irq_sim);
 
+	ret = devm_add_action_or_reset(dev, gpio_sim_dispose_mappings, chip);
+	if (ret)
+		return ret;
+
 	mutex_init(&chip->lock);
 	ret = devm_add_action_or_reset(dev, gpio_sim_mutex_destroy,
 				       &chip->lock);

From 6e39c1ac688161b4db3617aabbca589b395242bc Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Tue, 22 Aug 2023 21:29:43 +0200
Subject: [PATCH 1844/2047] gpio: sim: pass the GPIO device's software node to
 irq domain

Associate the swnode of the GPIO device's (which is the interrupt
controller here) with the irq domain. Otherwise the interrupt-controller
device attribute is a no-op.

Fixes: cb8c474e79be ("gpio: sim: new testing module")
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-sim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index 8fb11a5395eb..533d81572579 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -411,7 +411,7 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev)
 	if (!chip->pull_map)
 		return -ENOMEM;
 
-	chip->irq_sim = devm_irq_domain_create_sim(dev, NULL, num_lines);
+	chip->irq_sim = devm_irq_domain_create_sim(dev, swnode, num_lines);
 	if (IS_ERR(chip->irq_sim))
 		return PTR_ERR(chip->irq_sim);
 

From a94e7ccfc400c024976f3c2f31689ed843498b7c Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 22 Aug 2023 14:30:14 +0300
Subject: [PATCH 1845/2047] drm: Add an HPD poll helper to reschedule the poll
 work
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a helper to reschedule drm_mode_config::output_poll_work after
polling has been enabled for a connector (and needing a reschedule,
since previously polling was disabled for all connectors and hence
output_poll_work was not running).

This is needed by the next patch fixing HPD polling on i915.

CC: stable@vger.kernel.org # 6.4+
Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230822113015.41224-1-imre.deak@intel.com
(cherry picked from commit fe2352fd64029918174de4b460dfe6df0c6911cd)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/drm_probe_helper.c | 68 ++++++++++++++++++++----------
 include/drm/drm_probe_helper.h     |  1 +
 2 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 2fb9bf901a2c..3f479483d7d8 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -262,6 +262,26 @@ static bool drm_kms_helper_enable_hpd(struct drm_device *dev)
 }
 
 #define DRM_OUTPUT_POLL_PERIOD (10*HZ)
+static void reschedule_output_poll_work(struct drm_device *dev)
+{
+	unsigned long delay = DRM_OUTPUT_POLL_PERIOD;
+
+	if (dev->mode_config.delayed_event)
+		/*
+		 * FIXME:
+		 *
+		 * Use short (1s) delay to handle the initial delayed event.
+		 * This delay should not be needed, but Optimus/nouveau will
+		 * fail in a mysterious way if the delayed event is handled as
+		 * soon as possible like it is done in
+		 * drm_helper_probe_single_connector_modes() in case the poll
+		 * was enabled before.
+		 */
+		delay = HZ;
+
+	schedule_delayed_work(&dev->mode_config.output_poll_work, delay);
+}
+
 /**
  * drm_kms_helper_poll_enable - re-enable output polling.
  * @dev: drm_device
@@ -279,37 +299,41 @@ static bool drm_kms_helper_enable_hpd(struct drm_device *dev)
  */
 void drm_kms_helper_poll_enable(struct drm_device *dev)
 {
-	bool poll = false;
-	unsigned long delay = DRM_OUTPUT_POLL_PERIOD;
-
 	if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll ||
 	    dev->mode_config.poll_running)
 		return;
 
-	poll = drm_kms_helper_enable_hpd(dev);
-
-	if (dev->mode_config.delayed_event) {
-		/*
-		 * FIXME:
-		 *
-		 * Use short (1s) delay to handle the initial delayed event.
-		 * This delay should not be needed, but Optimus/nouveau will
-		 * fail in a mysterious way if the delayed event is handled as
-		 * soon as possible like it is done in
-		 * drm_helper_probe_single_connector_modes() in case the poll
-		 * was enabled before.
-		 */
-		poll = true;
-		delay = HZ;
-	}
-
-	if (poll)
-		schedule_delayed_work(&dev->mode_config.output_poll_work, delay);
+	if (drm_kms_helper_enable_hpd(dev) ||
+	    dev->mode_config.delayed_event)
+		reschedule_output_poll_work(dev);
 
 	dev->mode_config.poll_running = true;
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_enable);
 
+/**
+ * drm_kms_helper_poll_reschedule - reschedule the output polling work
+ * @dev: drm_device
+ *
+ * This function reschedules the output polling work, after polling for a
+ * connector has been enabled.
+ *
+ * Drivers must call this helper after enabling polling for a connector by
+ * setting %DRM_CONNECTOR_POLL_CONNECT / %DRM_CONNECTOR_POLL_DISCONNECT flags
+ * in drm_connector::polled. Note that after disabling polling by clearing these
+ * flags for a connector will stop the output polling work automatically if
+ * the polling is disabled for all other connectors as well.
+ *
+ * The function can be called only after polling has been enabled by calling
+ * drm_kms_helper_poll_init() / drm_kms_helper_poll_enable().
+ */
+void drm_kms_helper_poll_reschedule(struct drm_device *dev)
+{
+	if (dev->mode_config.poll_running)
+		reschedule_output_poll_work(dev);
+}
+EXPORT_SYMBOL(drm_kms_helper_poll_reschedule);
+
 static enum drm_connector_status
 drm_helper_probe_detect_ctx(struct drm_connector *connector, bool force)
 {
diff --git a/include/drm/drm_probe_helper.h b/include/drm/drm_probe_helper.h
index 4977e0ab72db..fad3c4003b2b 100644
--- a/include/drm/drm_probe_helper.h
+++ b/include/drm/drm_probe_helper.h
@@ -25,6 +25,7 @@ void drm_kms_helper_connector_hotplug_event(struct drm_connector *connector);
 
 void drm_kms_helper_poll_disable(struct drm_device *dev);
 void drm_kms_helper_poll_enable(struct drm_device *dev);
+void drm_kms_helper_poll_reschedule(struct drm_device *dev);
 bool drm_kms_helper_is_poll_worker(void);
 
 enum drm_mode_status drm_crtc_helper_mode_valid_fixed(struct drm_crtc *crtc,

From 1dcc437427bbcebc8381226352f7ade08a271191 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 22 Aug 2023 14:30:15 +0300
Subject: [PATCH 1846/2047] drm/i915: Fix HPD polling, reenabling the output
 poll work as needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After the commit in the Fixes: line below, HPD polling stopped working
on i915, since after that change calling drm_kms_helper_poll_enable()
doesn't restart drm_mode_config::output_poll_work if the work was
stopped (no connectors needing polling) and enabling polling for a
connector (during runtime suspend or detecting an HPD IRQ storm).

After the above change calling drm_kms_helper_poll_enable() is a nop
after it's been called already and polling for some connectors was
disabled/re-enabled.

Fix this by calling drm_kms_helper_poll_reschedule() added in the
previous patch instead, which reschedules the work whenever expected.

Fixes: d33a54e3991d ("drm/probe_helper: sort out poll_running vs poll_enabled")
CC: stable@vger.kernel.org # 6.4+
Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230822113015.41224-2-imre.deak@intel.com
(cherry picked from commit 50452f2f76852322620b63e62922b85e955abe94)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/display/intel_hotplug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 1160fa20433b..5eac7032bb5a 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -211,7 +211,7 @@ intel_hpd_irq_storm_switch_to_polling(struct drm_i915_private *dev_priv)
 
 	/* Enable polling and queue hotplug re-enabling. */
 	if (hpd_disabled) {
-		drm_kms_helper_poll_enable(&dev_priv->drm);
+		drm_kms_helper_poll_reschedule(&dev_priv->drm);
 		mod_delayed_work(dev_priv->unordered_wq,
 				 &dev_priv->display.hotplug.reenable_work,
 				 msecs_to_jiffies(HPD_STORM_REENABLE_DELAY));
@@ -649,7 +649,7 @@ static void i915_hpd_poll_init_work(struct work_struct *work)
 	drm_connector_list_iter_end(&conn_iter);
 
 	if (enabled)
-		drm_kms_helper_poll_enable(&dev_priv->drm);
+		drm_kms_helper_poll_reschedule(&dev_priv->drm);
 
 	mutex_unlock(&dev_priv->drm.mode_config.mutex);
 

From 1d2a22fa6d2511d5871d87c15b4fe7a944fe3b2a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Wed, 23 Aug 2023 10:52:58 +0200
Subject: [PATCH 1847/2047] gpio: mlxbf3: use capital "OR" for multiple
 licenses in SPDX

Documentation/process/license-rules.rst and checkpatch expect the SPDX
identifier syntax for multiple licenses to use capital "OR".  Correct it
to keep consistent format and avoid copy-paste issues.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-mlxbf3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c
index 0a5f241a8352..7a3e1760fc5b 100644
--- a/drivers/gpio/gpio-mlxbf3.c
+++ b/drivers/gpio/gpio-mlxbf3.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-only or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
 /* Copyright (C) 2022 NVIDIA CORPORATION & AFFILIATES */
 
 #include <linux/bitfield.h>

From 1f69383b203e28cf8a4ca9570e572da1699f76cd Mon Sep 17 00:00:00 2001
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
Date: Fri, 18 Aug 2023 10:03:05 -0700
Subject: [PATCH 1848/2047] x86/fpu: Invalidate FPU state correctly on exec()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The thread flag TIF_NEED_FPU_LOAD indicates that the FPU saved state is
valid and should be reloaded when returning to userspace. However, the
kernel will skip doing this if the FPU registers are already valid as
determined by fpregs_state_valid(). The logic embedded there considers
the state valid if two cases are both true:

  1: fpu_fpregs_owner_ctx points to the current tasks FPU state
  2: the last CPU the registers were live in was the current CPU.

This is usually correct logic. A CPU’s fpu_fpregs_owner_ctx is set to
the current FPU during the fpregs_restore_userregs() operation, so it
indicates that the registers have been restored on this CPU. But this
alone doesn’t preclude that the task hasn’t been rescheduled to a
different CPU, where the registers were modified, and then back to the
current CPU. To verify that this was not the case the logic relies on the
second condition. So the assumption is that if the registers have been
restored, AND they haven’t had the chance to be modified (by being
loaded on another CPU), then they MUST be valid on the current CPU.

Besides the lazy FPU optimizations, the other cases where the FPU
registers might not be valid are when the kernel modifies the FPU register
state or the FPU saved buffer. In this case the operation modifying the
FPU state needs to let the kernel know the correspondence has been
broken. The comment in “arch/x86/kernel/fpu/context.h” has:
/*
...
 * If the FPU register state is valid, the kernel can skip restoring the
 * FPU state from memory.
 *
 * Any code that clobbers the FPU registers or updates the in-memory
 * FPU state for a task MUST let the rest of the kernel know that the
 * FPU registers are no longer valid for this task.
 *
 * Either one of these invalidation functions is enough. Invalidate
 * a resource you control: CPU if using the CPU for something else
 * (with preemption disabled), FPU for the current task, or a task that
 * is prevented from running by the current task.
 */

However, this is not completely true. When the kernel modifies the
registers or saved FPU state, it can only rely on
__fpu_invalidate_fpregs_state(), which wipes the FPU’s last_cpu
tracking. The exec path instead relies on fpregs_deactivate(), which sets
the CPU’s FPU context to NULL. This was observed to fail to restore the
reset FPU state to the registers when returning to userspace in the
following scenario:

1. A task is executing in userspace on CPU0
	- CPU0’s FPU context points to tasks
	- fpu->last_cpu=CPU0

2. The task exec()’s

3. While in the kernel the task is preempted
	- CPU0 gets a thread executing in the kernel (such that no other
		FPU context is activated)
	- Scheduler sets task’s fpu->last_cpu=CPU0 when scheduling out

4. Task is migrated to CPU1

5. Continuing the exec(), the task gets to
   fpu_flush_thread()->fpu_reset_fpregs()
	- Sets CPU1’s fpu context to NULL
	- Copies the init state to the task’s FPU buffer
	- Sets TIF_NEED_FPU_LOAD on the task

6. The task reschedules back to CPU0 before completing the exec() and
   returning to userspace
	- During the reschedule, scheduler finds TIF_NEED_FPU_LOAD is set
	- Skips saving the registers and updating task’s fpu→last_cpu,
	  because TIF_NEED_FPU_LOAD is the canonical source.

7. Now CPU0’s FPU context is still pointing to the task’s, and
   fpu->last_cpu is still CPU0. So fpregs_state_valid() returns true even
   though the reset FPU state has not been restored.

So the root cause is that exec() is doing the wrong kind of invalidate. It
should reset fpu->last_cpu via __fpu_invalidate_fpregs_state(). Further,
fpu__drop() doesn't really seem appropriate as the task (and FPU) are not
going away, they are just getting reset as part of an exec. So switch to
__fpu_invalidate_fpregs_state().

Also, delete the misleading comment that says that either kind of
invalidate will be enough, because it’s not always the case.

Fixes: 33344368cb08 ("x86/fpu: Clean up the fpu__clear() variants")
Reported-by: Lei Wang <lei4.wang@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Lijun Pan <lijun.pan@intel.com>
Reviewed-by: Sohil Mehta <sohil.mehta@intel.com>
Acked-by: Lijun Pan <lijun.pan@intel.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20230818170305.502891-1-rick.p.edgecombe@intel.com
---
 arch/x86/kernel/fpu/context.h | 3 +--
 arch/x86/kernel/fpu/core.c    | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h
index af5cbdd9bd29..f6d856bd50bc 100644
--- a/arch/x86/kernel/fpu/context.h
+++ b/arch/x86/kernel/fpu/context.h
@@ -19,8 +19,7 @@
  * FPU state for a task MUST let the rest of the kernel know that the
  * FPU registers are no longer valid for this task.
  *
- * Either one of these invalidation functions is enough. Invalidate
- * a resource you control: CPU if using the CPU for something else
+ * Invalidate a resource you control: CPU if using the CPU for something else
  * (with preemption disabled), FPU for the current task, or a task that
  * is prevented from running by the current task.
  */
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 1015af1ae562..98e507cc7d34 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -679,7 +679,7 @@ static void fpu_reset_fpregs(void)
 	struct fpu *fpu = &current->thread.fpu;
 
 	fpregs_lock();
-	fpu__drop(fpu);
+	__fpu_invalidate_fpregs_state(fpu);
 	/*
 	 * This does not change the actual hardware registers. It just
 	 * resets the memory image and sets TIF_NEED_FPU_LOAD so a

From 2c66ca3949dc701da7f4c9407f2140ae425683a5 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Wed, 23 Aug 2023 14:57:47 +0800
Subject: [PATCH 1849/2047] x86/fpu: Set X86_FEATURE_OSXSAVE feature after
 enabling OSXSAVE in CR4

0-Day found a 34.6% regression in stress-ng's 'af-alg' test case, and
bisected it to commit b81fac906a8f ("x86/fpu: Move FPU initialization into
arch_cpu_finalize_init()"), which optimizes the FPU init order, and moves
the CR4_OSXSAVE enabling into a later place:

   arch_cpu_finalize_init
       identify_boot_cpu
	   identify_cpu
	       generic_identify
                   get_cpu_cap --> setup cpu capability
       ...
       fpu__init_cpu
           fpu__init_cpu_xstate
               cr4_set_bits(X86_CR4_OSXSAVE);

As the FPU is not yet initialized the CPU capability setup fails to set
X86_FEATURE_OSXSAVE. Many security module like 'camellia_aesni_avx_x86_64'
depend on this feature and therefore fail to load, causing the regression.

Cure this by setting X86_FEATURE_OSXSAVE feature right after OSXSAVE
enabling.

[ tglx: Moved it into the actual BSP FPU initialization code and added a comment ]

Fixes: b81fac906a8f ("x86/fpu: Move FPU initialization into arch_cpu_finalize_init()")
Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/lkml/202307192135.203ac24e-oliver.sang@intel.com
Link: https://lore.kernel.org/lkml/20230823065747.92257-1-feng.tang@intel.com
---
 arch/x86/kernel/fpu/xstate.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 0bab497c9436..1afbc4866b10 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -882,6 +882,13 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
 		goto out_disable;
 	}
 
+	/*
+	 * CPU capabilities initialization runs before FPU init. So
+	 * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
+	 * functional, set the feature bit so depending code works.
+	 */
+	setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
+
 	print_xstate_offset_size();
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		fpu_kernel_cfg.max_features,

From 8b94da92559f7e403dc7ab81937cc50f949ee2fd Mon Sep 17 00:00:00 2001
From: Mikel Rychliski <mikel@mikelr.com>
Date: Wed, 23 Aug 2023 17:51:58 -0400
Subject: [PATCH 1850/2047] x86/efistub: Fix PCI ROM preservation in mixed mode

preserve_pci_rom_image() was accessing the romsize field in
efi_pci_io_protocol_t directly instead of using the efi_table_attr()
helper. This prevents the ROM image from being saved correctly during a
mixed mode boot.

Fixes: 2c3625cb9fa2 ("efi/x86: Fold __setup_efi_pci32() and __setup_efi_pci64() into one function")
Signed-off-by: Mikel Rychliski <mikel@mikelr.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/libstub/x86-stub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 220be75a5cdc..146477da2b98 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -72,7 +72,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
 	rom->data.type	= SETUP_PCI;
 	rom->data.len	= size - sizeof(struct setup_data);
 	rom->data.next	= 0;
-	rom->pcilen	= pci->romsize;
+	rom->pcilen	= romsize;
 	*__rom = rom;
 
 	status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16,

From a11937b3cff5449871f428e46e202481dc61a9de Mon Sep 17 00:00:00 2001
From: Liming Sun <limings@nvidia.com>
Date: Tue, 22 Aug 2023 15:59:28 -0400
Subject: [PATCH 1851/2047] mmc: sdhci-of-dwcmshc: Add error handling in
 dwcmshc_resume

This commit adds handling in dwcmshc_resume() for different error
cases.

Signed-off-by: Liming Sun <limings@nvidia.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230822195929.168552-1-limings@nvidia.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-dwcmshc.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index 31c1892f4ecd..bc332a035032 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -630,17 +630,32 @@ static int dwcmshc_resume(struct device *dev)
 	if (!IS_ERR(priv->bus_clk)) {
 		ret = clk_prepare_enable(priv->bus_clk);
 		if (ret)
-			return ret;
+			goto disable_clk;
 	}
 
 	if (rk_priv) {
 		ret = clk_bulk_prepare_enable(RK35xx_MAX_CLKS,
 					      rk_priv->rockchip_clks);
 		if (ret)
-			return ret;
+			goto disable_bus_clk;
 	}
 
-	return sdhci_resume_host(host);
+	ret = sdhci_resume_host(host);
+	if (ret)
+		goto disable_rockchip_clks;
+
+	return 0;
+
+disable_rockchip_clks:
+	if (rk_priv)
+		clk_bulk_disable_unprepare(RK35xx_MAX_CLKS,
+					   rk_priv->rockchip_clks);
+disable_bus_clk:
+	if (!IS_ERR(priv->bus_clk))
+		clk_disable_unprepare(priv->bus_clk);
+disable_clk:
+	clk_disable_unprepare(pltfm_host->clk);
+	return ret;
 }
 #endif
 

From 48fe8fadbe5e03edfd83315c331b171a9ae245a4 Mon Sep 17 00:00:00 2001
From: Liming Sun <limings@nvidia.com>
Date: Tue, 22 Aug 2023 15:59:29 -0400
Subject: [PATCH 1852/2047] mmc: sdhci-of-dwcmshc: Add runtime PM operations

This commit implements the runtime PM operations to disable eMMC card clock
when idle.

Reviewed-by: David Thompson <davthompson@nvidia.com>
Signed-off-by: Liming Sun <limings@nvidia.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20230822195929.168552-2-limings@nvidia.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-dwcmshc.c | 64 ++++++++++++++++++++++++++++-
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index bc332a035032..3a3bae6948a8 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/reset.h>
 #include <linux/sizes.h>
 
@@ -548,9 +549,13 @@ static int dwcmshc_probe(struct platform_device *pdev)
 
 	host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
 
+	pm_runtime_get_noresume(dev);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+
 	err = sdhci_setup_host(host);
 	if (err)
-		goto err_clk;
+		goto err_rpm;
 
 	if (rk_priv)
 		dwcmshc_rk35xx_postinit(host, priv);
@@ -559,10 +564,15 @@ static int dwcmshc_probe(struct platform_device *pdev)
 	if (err)
 		goto err_setup_host;
 
+	pm_runtime_put(dev);
+
 	return 0;
 
 err_setup_host:
 	sdhci_cleanup_host(host);
+err_rpm:
+	pm_runtime_disable(dev);
+	pm_runtime_put_noidle(dev);
 err_clk:
 	clk_disable_unprepare(pltfm_host->clk);
 	clk_disable_unprepare(priv->bus_clk);
@@ -600,6 +610,8 @@ static int dwcmshc_suspend(struct device *dev)
 	struct rk35xx_priv *rk_priv = priv->priv;
 	int ret;
 
+	pm_runtime_resume(dev);
+
 	ret = sdhci_suspend_host(host);
 	if (ret)
 		return ret;
@@ -659,7 +671,55 @@ disable_clk:
 }
 #endif
 
-static SIMPLE_DEV_PM_OPS(dwcmshc_pmops, dwcmshc_suspend, dwcmshc_resume);
+#ifdef CONFIG_PM
+
+static void dwcmshc_enable_card_clk(struct sdhci_host *host)
+{
+	u16 ctrl;
+
+	ctrl = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+	if ((ctrl & SDHCI_CLOCK_INT_EN) && !(ctrl & SDHCI_CLOCK_CARD_EN)) {
+		ctrl |= SDHCI_CLOCK_CARD_EN;
+		sdhci_writew(host, ctrl, SDHCI_CLOCK_CONTROL);
+	}
+}
+
+static void dwcmshc_disable_card_clk(struct sdhci_host *host)
+{
+	u16 ctrl;
+
+	ctrl = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+	if (ctrl & SDHCI_CLOCK_CARD_EN) {
+		ctrl &= ~SDHCI_CLOCK_CARD_EN;
+		sdhci_writew(host, ctrl, SDHCI_CLOCK_CONTROL);
+	}
+}
+
+static int dwcmshc_runtime_suspend(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+
+	dwcmshc_disable_card_clk(host);
+
+	return 0;
+}
+
+static int dwcmshc_runtime_resume(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+
+	dwcmshc_enable_card_clk(host);
+
+	return 0;
+}
+
+#endif
+
+static const struct dev_pm_ops dwcmshc_pmops = {
+	SET_SYSTEM_SLEEP_PM_OPS(dwcmshc_suspend, dwcmshc_resume)
+	SET_RUNTIME_PM_OPS(dwcmshc_runtime_suspend,
+			   dwcmshc_runtime_resume, NULL)
+};
 
 static struct platform_driver sdhci_dwcmshc_driver = {
 	.driver	= {

From c008323fe361bd62a43d9fb29737dacd5c067fb7 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 23 Aug 2023 20:11:49 -0500
Subject: [PATCH 1853/2047] ASoC: amd: yc: Fix a non-functional mic on Lenovo
 82SJ

Lenovo 82SJ doesn't have DMIC connected like 82V2 does.  Narrow
the match down to only cover 82V2.

Reported-by: prosenfeld@Yuhsbstudents.org
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217063
Fixes: 2232b2dd8cd4 ("ASoC: amd: yc: Add Lenovo Yoga Slim 7 Pro X to quirks table")
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com
Link: https://lore.kernel.org/r/20230824011149.1395-1-mario.limonciello@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org
---
 sound/soc/amd/yc/acp6x-mach.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index a2661dd637a5..b304b3562c82 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -217,7 +217,7 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
 		.driver_data = &acp6x_card,
 		.matches = {
 			DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "82"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "82V2"),
 		}
 	},
 	{

From bfe2e8f569073e7297ad19932b980964105bb777 Mon Sep 17 00:00:00 2001
From: Stas Sergeev <stsp2@yandex.ru>
Date: Thu, 22 Jun 2023 21:52:24 +0500
Subject: [PATCH 1854/2047] selftests: add OFD lock tests

Test the basic locking stuff on 2 fds: multiple read locks,
conflicts between read and write locks, use of len==0 for queries.
Also tests for F_UNLCK F_OFD_GETLK extension.

[ jlayton: fix unlink() pathname in selftest ]

Cc: Jeff Layton <jlayton@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Shuah Khan <shuah@kernel.org>
Cc: linux-kselftest@vger.kernel.org
Cc: linux-api@vger.kernel.org
Signed-off-by: Stas Sergeev <stsp2@yandex.ru>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 tools/testing/selftests/filelock/Makefile   |   5 +
 tools/testing/selftests/filelock/ofdlocks.c | 132 ++++++++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100644 tools/testing/selftests/filelock/Makefile
 create mode 100644 tools/testing/selftests/filelock/ofdlocks.c

diff --git a/tools/testing/selftests/filelock/Makefile b/tools/testing/selftests/filelock/Makefile
new file mode 100644
index 000000000000..478e82f8b464
--- /dev/null
+++ b/tools/testing/selftests/filelock/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS := ofdlocks
+
+include ../lib.mk
diff --git a/tools/testing/selftests/filelock/ofdlocks.c b/tools/testing/selftests/filelock/ofdlocks.c
new file mode 100644
index 000000000000..a55b79810ab2
--- /dev/null
+++ b/tools/testing/selftests/filelock/ofdlocks.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include "../kselftest.h"
+
+static int lock_set(int fd, struct flock *fl)
+{
+	int ret;
+
+	fl->l_pid = 0;		// needed for OFD locks
+	fl->l_whence = SEEK_SET;
+	ret = fcntl(fd, F_OFD_SETLK, fl);
+	if (ret)
+		perror("fcntl()");
+	return ret;
+}
+
+static int lock_get(int fd, struct flock *fl)
+{
+	int ret;
+
+	fl->l_pid = 0;		// needed for OFD locks
+	fl->l_whence = SEEK_SET;
+	ret = fcntl(fd, F_OFD_GETLK, fl);
+	if (ret)
+		perror("fcntl()");
+	return ret;
+}
+
+int main(void)
+{
+	int rc;
+	struct flock fl, fl2;
+	int fd = open("/tmp/aa", O_RDWR | O_CREAT | O_EXCL, 0600);
+	int fd2 = open("/tmp/aa", O_RDONLY);
+
+	unlink("/tmp/aa");
+	assert(fd != -1);
+	assert(fd2 != -1);
+	ksft_print_msg("[INFO] opened fds %i %i\n", fd, fd2);
+
+	/* Set some read lock */
+	fl.l_type = F_RDLCK;
+	fl.l_start = 5;
+	fl.l_len = 3;
+	rc = lock_set(fd, &fl);
+	if (rc == 0) {
+		ksft_print_msg
+		    ("[SUCCESS] set OFD read lock on first fd\n");
+	} else {
+		ksft_print_msg("[FAIL] to set OFD read lock on first fd\n");
+		return -1;
+	}
+	/* Make sure read locks do not conflict on different fds. */
+	fl.l_type = F_RDLCK;
+	fl.l_start = 5;
+	fl.l_len = 1;
+	rc = lock_get(fd2, &fl);
+	if (rc != 0)
+		return -1;
+	if (fl.l_type != F_UNLCK) {
+		ksft_print_msg("[FAIL] read locks conflicted\n");
+		return -1;
+	}
+	/* Make sure read/write locks do conflict on different fds. */
+	fl.l_type = F_WRLCK;
+	fl.l_start = 5;
+	fl.l_len = 1;
+	rc = lock_get(fd2, &fl);
+	if (rc != 0)
+		return -1;
+	if (fl.l_type != F_UNLCK) {
+		ksft_print_msg
+		    ("[SUCCESS] read and write locks conflicted\n");
+	} else {
+		ksft_print_msg
+		    ("[SUCCESS] read and write locks not conflicted\n");
+		return -1;
+	}
+	/* Get info about the lock on first fd. */
+	fl.l_type = F_UNLCK;
+	fl.l_start = 5;
+	fl.l_len = 1;
+	rc = lock_get(fd, &fl);
+	if (rc != 0) {
+		ksft_print_msg
+		    ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n");
+		return -1;
+	}
+	if (fl.l_type != F_UNLCK) {
+		ksft_print_msg
+		    ("[SUCCESS] F_UNLCK test returns: locked, type %i pid %i len %zi\n",
+		     fl.l_type, fl.l_pid, fl.l_len);
+	} else {
+		ksft_print_msg
+		    ("[FAIL] F_OFD_GETLK with F_UNLCK did not return lock info\n");
+		return -1;
+	}
+	/* Try the same but by locking everything by len==0. */
+	fl2.l_type = F_UNLCK;
+	fl2.l_start = 0;
+	fl2.l_len = 0;
+	rc = lock_get(fd, &fl2);
+	if (rc != 0) {
+		ksft_print_msg
+		    ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n");
+		return -1;
+	}
+	if (memcmp(&fl, &fl2, sizeof(fl))) {
+		ksft_print_msg
+		    ("[FAIL] F_UNLCK test returns: locked, type %i pid %i len %zi\n",
+		     fl.l_type, fl.l_pid, fl.l_len);
+		return -1;
+	}
+	ksft_print_msg("[SUCCESS] F_UNLCK with len==0 returned the same\n");
+	/* Get info about the lock on second fd - no locks on it. */
+	fl.l_type = F_UNLCK;
+	fl.l_start = 0;
+	fl.l_len = 0;
+	lock_get(fd2, &fl);
+	if (fl.l_type != F_UNLCK) {
+		ksft_print_msg
+		    ("[FAIL] F_OFD_GETLK with F_UNLCK return lock info from another fd\n");
+		return -1;
+	}
+	return 0;
+}

From bd4c4680c039fa9defddcff0ef354c147f7868b3 Mon Sep 17 00:00:00 2001
From: Jakub Wilk <jwilk@jwilk.net>
Date: Fri, 21 Jul 2023 11:21:47 +0200
Subject: [PATCH 1855/2047] fs/locks: Fix typo

Signed-off-by: Jakub Wilk <jwilk@jwilk.net>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/locks.c b/fs/locks.c
index 210766007e63..1114b3b8aa1e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2151,7 +2151,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);
  * @fl: The file_lock who's fl_pid should be translated
  * @ns: The namespace into which the pid should be translated
  *
- * Used to tranlate a fl_pid into a namespace virtual pid number
+ * Used to translate a fl_pid into a namespace virtual pid number
  */
 static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
 {

From 74f6f5912693ce454384eaeec48705646a21c74f Mon Sep 17 00:00:00 2001
From: Will Shiu <Will.Shiu@mediatek.com>
Date: Fri, 21 Jul 2023 13:19:04 +0800
Subject: [PATCH 1856/2047] locks: fix KASAN: use-after-free in
 trace_event_raw_event_filelock_lock

As following backtrace, the struct file_lock request , in posix_lock_inode
is free before ftrace function using.
Replace the ftrace function ahead free flow could fix the use-after-free
issue.

[name:report&]===============================================
BUG:KASAN: use-after-free in trace_event_raw_event_filelock_lock+0x80/0x12c
[name:report&]Read at addr f6ffff8025622620 by task NativeThread/16753
[name:report_hw_tags&]Pointer tag: [f6], memory tag: [fe]
[name:report&]
BT:
Hardware name: MT6897 (DT)
Call trace:
 dump_backtrace+0xf8/0x148
 show_stack+0x18/0x24
 dump_stack_lvl+0x60/0x7c
 print_report+0x2c8/0xa08
 kasan_report+0xb0/0x120
 __do_kernel_fault+0xc8/0x248
 do_bad_area+0x30/0xdc
 do_tag_check_fault+0x1c/0x30
 do_mem_abort+0x58/0xbc
 el1_abort+0x3c/0x5c
 el1h_64_sync_handler+0x54/0x90
 el1h_64_sync+0x68/0x6c
 trace_event_raw_event_filelock_lock+0x80/0x12c
 posix_lock_inode+0xd0c/0xd60
 do_lock_file_wait+0xb8/0x190
 fcntl_setlk+0x2d8/0x440
...
[name:report&]
[name:report&]Allocated by task 16752:
...
 slab_post_alloc_hook+0x74/0x340
 kmem_cache_alloc+0x1b0/0x2f0
 posix_lock_inode+0xb0/0xd60
...
 [name:report&]
 [name:report&]Freed by task 16752:
...
  kmem_cache_free+0x274/0x5b0
  locks_dispose_list+0x3c/0x148
  posix_lock_inode+0xc40/0xd60
  do_lock_file_wait+0xb8/0x190
  fcntl_setlk+0x2d8/0x440
  do_fcntl+0x150/0xc18
...

Signed-off-by: Will Shiu <Will.Shiu@mediatek.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/locks.c b/fs/locks.c
index 1114b3b8aa1e..0a99196764a8 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1316,6 +1316,7 @@ retry:
  out:
 	spin_unlock(&ctx->flc_lock);
 	percpu_up_read(&file_rwsem);
+	trace_posix_lock_inode(inode, request, error);
 	/*
 	 * Free any unused locks.
 	 */
@@ -1324,7 +1325,6 @@ retry:
 	if (new_fl2)
 		locks_free_lock(new_fl2);
 	locks_dispose_list(&dispose);
-	trace_posix_lock_inode(inode, request, error);
 
 	return error;
 }

From 8073a98e95323abdea5491533bf5cb51e0ba18d9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 24 Aug 2023 10:30:27 -0400
Subject: [PATCH 1857/2047] NFSD: Fix a thinko introduced by recent trace point
 changes

The fixed commit erroneously removed a call to nfsd_end_grace(),
which makes calls to write_v4_end_grace() a no-op.

Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202308241229.68396422-oliver.sang@intel.com
Fixes: 39d432fc7630 ("NFSD: trace nfsctl operations")
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfsctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1b8b1aab9a15..4302ca0ff6ed 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1105,6 +1105,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
 			if (!nn->nfsd_serv)
 				return -EBUSY;
 			trace_nfsd_end_grace(netns(file));
+			nfsd4_end_grace(nn);
 			break;
 		default:
 			return -EINVAL;

From 1e6f01f7285559a5302e85677b8e59a5de9b0ea5 Mon Sep 17 00:00:00 2001
From: Justin Stitt <justinstitt@google.com>
Date: Thu, 24 Aug 2023 18:52:18 +0000
Subject: [PATCH 1858/2047] x86/platform/uv: Refactor code using deprecated
 strcpy()/strncpy() interfaces to use strscpy()

Both `strncpy` and `strcpy` are deprecated for use on NUL-terminated
destination strings [1].

A suitable replacement is `strscpy` [2] due to the fact that it
guarantees NUL-termination on its destination buffer argument which is
_not_ the case for `strncpy` or `strcpy`!

In this case, we can drop both the forced NUL-termination and the `... -1` from:
|       strncpy(arg, val, ACTION_LEN - 1);
as `strscpy` implicitly has this behavior.

Also include slight refactor to code removing possible new-line chars as
per Yang Yang's work at [3]. This reduces code size and complexity by
using more robust and better understood interfaces.

Co-developed-by: Yang Yang <yang.yang29@zte.com.cn>
Signed-off-by: Justin Stitt <justinstitt@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Dimitri Sivanich <sivanich@hpe.com>
Link: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings[1]
Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2]
Link: https://lore.kernel.org/all/202212091545310085328@zte.com.cn/ [3]
Link: https://github.com/KSPP/linux/issues/90
Link: https://lore.kernel.org/r/20230824-strncpy-arch-x86-platform-uv-uv_nmi-v2-1-e16d9a3ec570@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/uv/uv_nmi.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index a60af0230e27..a6ab43f69b7d 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -202,21 +202,17 @@ static int param_set_action(const char *val, const struct kernel_param *kp)
 {
 	int i;
 	int n = ARRAY_SIZE(valid_acts);
-	char arg[ACTION_LEN], *p;
+	char arg[ACTION_LEN];
 
 	/* (remove possible '\n') */
-	strncpy(arg, val, ACTION_LEN - 1);
-	arg[ACTION_LEN - 1] = '\0';
-	p = strchr(arg, '\n');
-	if (p)
-		*p = '\0';
+	strscpy(arg, val, strnchrnul(val, sizeof(arg)-1, '\n') - val + 1);
 
 	for (i = 0; i < n; i++)
 		if (!strcmp(arg, valid_acts[i].action))
 			break;
 
 	if (i < n) {
-		strcpy(uv_nmi_action, arg);
+		strscpy(uv_nmi_action, arg, sizeof(uv_nmi_action));
 		pr_info("UV: New NMI action:%s\n", uv_nmi_action);
 		return 0;
 	}
@@ -959,7 +955,7 @@ static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
 
 		/* Unexpected return, revert action to "dump" */
 		if (master)
-			strncpy(uv_nmi_action, "dump", strlen(uv_nmi_action));
+			strscpy(uv_nmi_action, "dump", sizeof(uv_nmi_action));
 	}
 
 	/* Pause as all CPU's enter the NMI handler */

From 4108d141bfd04cf7b2bbf96e899ee5392f62e8a2 Mon Sep 17 00:00:00 2001
From: Justin Stitt <justinstitt@google.com>
Date: Tue, 22 Aug 2023 22:13:36 +0000
Subject: [PATCH 1859/2047] x86/hpet: Refactor code using deprecated strncpy()
 interface to use strscpy()

`strncpy` is deprecated for use on NUL-terminated destination strings [1].

A suitable replacement is `strscpy` [2] due to the fact that it
guarantees NUL-termination on its destination buffer argument which is
_not_ the case for `strncpy`!

In this case, it is a simple swap from `strncpy` to `strscpy`. There is
one slight difference, though. If NUL-padding is a functional
requirement here we should opt for `strscpy_pad`. It seems like this
shouldn't be needed as I see no obvious signs of any padding being
required.

Signed-off-by: Justin Stitt <justinstitt@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings[1]
Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2]
Link: https://github.com/KSPP/linux/issues/90
Link: https://lore.kernel.org/r/20230822-strncpy-arch-x86-kernel-hpet-v1-1-2c7d3be86f4a@google.com
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index c8eb1ac5125a..1648aa0204d9 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -421,7 +421,7 @@ static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc)
 	 * the IO_APIC has been initialized.
 	 */
 	hc->cpu = boot_cpu_data.cpu_index;
-	strncpy(hc->name, "hpet", sizeof(hc->name));
+	strscpy(hc->name, "hpet", sizeof(hc->name));
 	hpet_init_clockevent(hc, 50);
 
 	hc->evt.tick_resume	= hpet_clkevt_legacy_resume;

From 212f07a21622cbd4bb271b558b2d3ae0652e9875 Mon Sep 17 00:00:00 2001
From: Justin Stitt <justinstitt@google.com>
Date: Tue, 22 Aug 2023 22:05:30 +0000
Subject: [PATCH 1860/2047] x86/platform/uv: Refactor code using deprecated
 strncpy() interface to use strscpy()

`strncpy` is deprecated for use on NUL-terminated destination strings [1].

A suitable replacement is `strscpy` [2] due to the fact that it
guarantees NUL-termination on its destination buffer argument which is
_not_ the case for `strncpy`!

In this case, it means we can drop the `...-1` from:
|       strncpy(to, from, len-1);

as well as remove the comment mentioning NUL-termination as `strscpy`
implicitly grants us this behavior.

There should be no functional change as I don't believe the padding from
`strncpy` is needed here. If it turns out that the padding is necessary
we should use `strscpy_pad` as a direct replacement.

Signed-off-by: Justin Stitt <justinstitt@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Dimitri Sivanich <sivanich@hpe.com>
Link: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings[1]
Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2]
Link: https://github.com/KSPP/linux/issues/90
Link: https://lore.kernel.org/r/20230822-strncpy-arch-x86-kernel-apic-x2apic_uv_x-v1-1-91d681d0b3f3@google.com
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index d9384d5b4b8e..b524dee1cbbb 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -294,8 +294,7 @@ static void __init early_get_apic_socketid_shift(void)
 
 static void __init uv_stringify(int len, char *to, char *from)
 {
-	/* Relies on 'to' being NULL chars so result will be NULL terminated */
-	strncpy(to, from, len-1);
+	strscpy(to, from, len);
 
 	/* Trim trailing spaces */
 	(void)strim(to);
@@ -1013,7 +1012,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index,
 
 	/* One (UV2) mapping */
 	if (index == UV2_MMIOH) {
-		strncpy(id, "MMIOH", sizeof(id));
+		strscpy(id, "MMIOH", sizeof(id));
 		max_io = max_pnode;
 		mapped = 0;
 		goto map_exit;

From 2c65477f14a359db9f1edee5dd8e683d3dae69e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Thu, 24 Aug 2023 16:28:32 +0300
Subject: [PATCH 1861/2047] perf/x86/uncore: Remove unnecessary ?: operator
 around pcibios_err_to_errno() call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If err == 0, pcibios_err_to_errno(err) returns 0 so the ?: construct
can be removed.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230824132832.78705-15-ilpo.jarvinen@linux.intel.com
---
 arch/x86/events/intel/uncore_snbep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index d49e90dc04a4..4d349986f76a 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1502,7 +1502,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
 
 	pci_dev_put(ubox_dev);
 
-	return err ? pcibios_err_to_errno(err) : 0;
+	return pcibios_err_to_errno(err);
 }
 
 int snbep_uncore_pci_init(void)

From 2f88c8e802c8b128a155976631f4eb2ce4f3c805 Mon Sep 17 00:00:00 2001
From: Shrikanth Hegde <sshegde@linux.vnet.ibm.com>
Date: Thu, 24 Aug 2023 13:33:42 +0530
Subject: [PATCH 1862/2047] sched/eevdf/doc: Modify the documented knob to
 base_slice_ns as well

After committing the scheduler to EEVDF, we renamed the 'min_granularity_ns'
sysctl to 'base_slice_ns':

   e4ec3318a17f ("sched/debug: Rename sysctl_sched_min_granularity to sysctl_sched_base_slice")

... but we forgot to rename it in the documentation. Do that now.

Fixes: e4ec3318a17f ("sched/debug: Rename sysctl_sched_min_granularity to sysctl_sched_base_slice")
Signed-off-by: Shrikanth Hegde <sshegde@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230824080342.543396-1-sshegde@linux.vnet.ibm.com
---
 Documentation/scheduler/sched-design-CFS.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/scheduler/sched-design-CFS.rst b/Documentation/scheduler/sched-design-CFS.rst
index 03db55504515..f68919800f05 100644
--- a/Documentation/scheduler/sched-design-CFS.rst
+++ b/Documentation/scheduler/sched-design-CFS.rst
@@ -94,7 +94,7 @@ other HZ detail.  Thus the CFS scheduler has no notion of "timeslices" in the
 way the previous scheduler had, and has no heuristics whatsoever.  There is
 only one central tunable (you have to switch on CONFIG_SCHED_DEBUG):
 
-   /sys/kernel/debug/sched/min_granularity_ns
+   /sys/kernel/debug/sched/base_slice_ns
 
 which can be used to tune the scheduler from "desktop" (i.e., low latencies) to
 "server" (i.e., good batching) workloads.  It defaults to a setting suitable

From ef21fa7c198e04f3d3053b1c5b5f2b4b225c3350 Mon Sep 17 00:00:00 2001
From: Mingzheng Xing <xingmingzheng@iscas.ac.cn>
Date: Fri, 25 Aug 2023 03:08:52 +0800
Subject: [PATCH 1863/2047] riscv: Fix build errors using binutils2.37
 toolchains

When building the kernel with binutils 2.37 and GCC-11.1.0/GCC-11.2.0,
the following error occurs:

  Assembler messages:
  Error: cannot find default versions of the ISA extension `zicsr'
  Error: cannot find default versions of the ISA extension `zifencei'

The above error originated from this commit of binutils[0], which has been
resolved and backported by GCC-12.1.0[1] and GCC-11.3.0[2].

So fix this by change the GCC version in
CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC to GCC-11.3.0.

Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f0bae2552db1dd4f1995608fbf6648fcee4e9e0c [0]
Link: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ca2bbb88f999f4d3cc40e89bc1aba712505dd598 [1]
Link: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d29f5d6ab513c52fd872f532c492e35ae9fd6671 [2]
Fixes: ca09f772ccca ("riscv: Handle zicsr/zifencei issue between gcc and binutils")
Reported-by: Conor Dooley <conor.dooley@microchip.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Mingzheng Xing <xingmingzheng@iscas.ac.cn>
Link: https://lore.kernel.org/r/20230824190852.45470-1-xingmingzheng@iscas.ac.cn
Closes: https://lore.kernel.org/all/20230823-captive-abdomen-befd942a4a73@wendy/
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Tested-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 10e7a7ad175a..bea7b73e895d 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -580,15 +580,15 @@ config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
 	  and Zifencei are supported in binutils from version 2.36 onwards.
 	  To make life easier, and avoid forcing toolchains that default to a
 	  newer ISA spec to version 2.2, relax the check to binutils >= 2.36.
-	  For clang < 17 or GCC < 11.1.0, for which this is not possible, this is
-	  dealt with in CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC.
+	  For clang < 17 or GCC < 11.3.0, for which this is not possible or need
+	  special treatment, this is dealt with in TOOLCHAIN_NEEDS_OLD_ISA_SPEC.
 
 config TOOLCHAIN_NEEDS_OLD_ISA_SPEC
 	def_bool y
 	depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
 	# https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16
-	# https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b03be74bad08c382da47e048007a78fa3fb4ef49
-	depends on (CC_IS_CLANG && CLANG_VERSION < 170000) || (CC_IS_GCC && GCC_VERSION < 110100)
+	# https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d29f5d6ab513c52fd872f532c492e35ae9fd6671
+	depends on (CC_IS_CLANG && CLANG_VERSION < 170000) || (CC_IS_GCC && GCC_VERSION < 110300)
 	help
 	  Certain versions of clang and GCC do not support zicsr and zifencei via
 	  -march. This option causes an older ISA spec compatible with these older

From 21cc7f816c670423a9dae06ad7de5fbc40da97c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 24 Aug 2023 21:56:17 +0200
Subject: [PATCH 1864/2047] regulator: aw37503: Switch back to use struct
 i2c_driver's .probe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

struct i2c_driver::probe_new is about to go away. Switch the driver to
use the probe callback with the same prototype.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de
Link: https://lore.kernel.org/r/20230824195617.8888-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org
---
 drivers/regulator/aw37503-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/aw37503-regulator.c b/drivers/regulator/aw37503-regulator.c
index e01ef6ad3eb8..a5ff6dfd29b5 100644
--- a/drivers/regulator/aw37503-regulator.c
+++ b/drivers/regulator/aw37503-regulator.c
@@ -229,7 +229,7 @@ static struct i2c_driver aw37503_i2c_driver = {
 		.name = "aw37503",
 		.of_match_table = aw37503_of_match,
 	},
-	.probe_new = aw37503_probe,
+	.probe = aw37503_probe,
 	.id_table = aw37503_id,
 };
 

From 2f406263e3e954aa24c1248edcfa9be0c1bb30fa Mon Sep 17 00:00:00 2001
From: Yin Fengwei <fengwei.yin@intel.com>
Date: Tue, 8 Aug 2023 10:09:15 +0800
Subject: [PATCH 1865/2047] madvise:madvise_cold_or_pageout_pte_range(): don't
 use mapcount() against large folio for sharing check

Patch series "don't use mapcount() to check large folio sharing", v2.

In madvise_cold_or_pageout_pte_range() and madvise_free_pte_range(),
folio_mapcount() is used to check whether the folio is shared.  But it's
not correct as folio_mapcount() returns total mapcount of large folio.

Use folio_estimated_sharers() here as the estimated number is enough.

This patchset will fix the cases:
User space application call madvise() with MADV_FREE, MADV_COLD and
MADV_PAGEOUT for specific address range. There are THP mapped to the
range. Without the patchset, the THP is skipped. With the patch, the
THP will be split and handled accordingly.

David reported the cow self test skip some cases because of MADV_PAGEOUT
skip THP:
https://lore.kernel.org/linux-mm/9e92e42d-488f-47db-ac9d-75b24cd0d037@intel.com/T/#mbf0f2ec7fbe45da47526de1d7036183981691e81
and I confirmed this patchset make it work again.


This patch (of 3):

Commit 07e8c82b5eff ("madvise: convert madvise_cold_or_pageout_pte_range()
to use folios") replaced the page_mapcount() with folio_mapcount() to
check whether the folio is shared by other mapping.

It's not correct for large folio.  folio_mapcount() returns the total
mapcount of large folio which is not suitable to detect whether the folio
is shared.

Use folio_estimated_sharers() which returns a estimated number of shares.
That means it's not 100% correct.  It should be OK for madvise case here.

User-visible effects is that the THP is skipped when user call madvise.
But the correct behavior is THP should be split and processed then.

NOTE: this change is a temporary fix to reduce the user-visible effects
before the long term fix from David is ready.

Link: https://lkml.kernel.org/r/20230808020917.2230692-1-fengwei.yin@intel.com
Link: https://lkml.kernel.org/r/20230808020917.2230692-2-fengwei.yin@intel.com
Fixes: 07e8c82b5eff ("madvise: convert madvise_cold_or_pageout_pte_range() to use folios")
Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
Reviewed-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/madvise.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index bfe0e06427bd..46802b4cf65a 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -384,7 +384,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
 		folio = pfn_folio(pmd_pfn(orig_pmd));
 
 		/* Do not interfere with other mappings of this folio */
-		if (folio_mapcount(folio) != 1)
+		if (folio_estimated_sharers(folio) != 1)
 			goto huge_unlock;
 
 		if (pageout_anon_only_filter && !folio_test_anon(folio))
@@ -458,7 +458,7 @@ regular_folio:
 		if (folio_test_large(folio)) {
 			int err;
 
-			if (folio_mapcount(folio) != 1)
+			if (folio_estimated_sharers(folio) != 1)
 				break;
 			if (pageout_anon_only_filter && !folio_test_anon(folio))
 				break;

From 20b18aada1856b2ce0512b087a8681342af73e60 Mon Sep 17 00:00:00 2001
From: Yin Fengwei <fengwei.yin@intel.com>
Date: Tue, 8 Aug 2023 10:09:16 +0800
Subject: [PATCH 1866/2047] madvise:madvise_free_huge_pmd(): don't use
 mapcount() against large folio for sharing check

Commit fc986a38b670 ("mm: huge_memory: convert madvise_free_huge_pmd to
use a folio") replaced the page_mapcount() with folio_mapcount() to check
whether the folio is shared by other mapping.

It's not correct for large folios. folio_mapcount() returns the total
mapcount of large folio which is not suitable to detect whether the folio
is shared.

Use folio_estimated_sharers() which returns a estimated number of shares.
That means it's not 100% correct. It should be OK for madvise case here.

User-visible effects is that the THP is skipped when user call madvise.
But the correct behavior is THP should be split and processed then.

NOTE: this change is a temporary fix to reduce the user-visible effects
before the long term fix from David is ready.

Link: https://lkml.kernel.org/r/20230808020917.2230692-3-fengwei.yin@intel.com
Fixes: fc986a38b670 ("mm: huge_memory: convert madvise_free_huge_pmd to use a folio")
Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
Reviewed-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/huge_memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f15d557e5708..164d22365bde 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1612,7 +1612,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	 * If other processes are mapping this folio, we couldn't discard
 	 * the folio unless they all do MADV_FREE so let's skip the folio.
 	 */
-	if (folio_mapcount(folio) != 1)
+	if (folio_estimated_sharers(folio) != 1)
 		goto out;
 
 	if (!folio_trylock(folio))

From 0e0e9bd5f7b9d40fd03b70092367247d52da1db0 Mon Sep 17 00:00:00 2001
From: Yin Fengwei <fengwei.yin@intel.com>
Date: Tue, 8 Aug 2023 10:09:17 +0800
Subject: [PATCH 1867/2047] madvise:madvise_free_pte_range(): don't use
 mapcount() against large folio for sharing check

Commit 98b211d6415f ("madvise: convert madvise_free_pte_range() to use a
folio") replaced the page_mapcount() with folio_mapcount() to check
whether the folio is shared by other mapping.

It's not correct for large folios. folio_mapcount() returns the total
mapcount of large folio which is not suitable to detect whether the folio
is shared.

Use folio_estimated_sharers() which returns a estimated number of shares.
That means it's not 100% correct. It should be OK for madvise case here.

User-visible effects is that the THP is skipped when user call madvise.
But the correct behavior is THP should be split and processed then.

NOTE: this change is a temporary fix to reduce the user-visible effects
before the long term fix from David is ready.

Link: https://lkml.kernel.org/r/20230808020917.2230692-4-fengwei.yin@intel.com
Fixes: 98b211d6415f ("madvise: convert madvise_free_pte_range() to use a folio")
Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
Reviewed-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/madvise.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 46802b4cf65a..ec30f48f8f2e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -680,7 +680,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 		if (folio_test_large(folio)) {
 			int err;
 
-			if (folio_mapcount(folio) != 1)
+			if (folio_estimated_sharers(folio) != 1)
 				break;
 			if (!folio_trylock(folio))
 				break;

From cfeb6ae8bcb96ccf674724f223661bbcef7b0d0b Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Fri, 18 Aug 2023 20:43:55 -0400
Subject: [PATCH 1868/2047] maple_tree: disable mas_wr_append() when other
 readers are possible

The current implementation of append may cause duplicate data and/or
incorrect ranges to be returned to a reader during an update.  Although
this has not been reported or seen, disable the append write operation
while the tree is in rcu mode out of an abundance of caution.

During the analysis of the mas_next_slot() the following was
artificially created by separating the writer and reader code:

Writer:                                 reader:
mas_wr_append
    set end pivot
    updates end metata
    Detects write to last slot
    last slot write is to start of slot
    store current contents in slot
    overwrite old end pivot
                                        mas_next_slot():
                                                read end metadata
                                                read old end pivot
                                                return with incorrect range
    store new value

Alternatively:

Writer:                                 reader:
mas_wr_append
    set end pivot
    updates end metata
    Detects write to last slot
    last lost write to end of slot
    store value
                                        mas_next_slot():
                                                read end metadata
                                                read old end pivot
                                                read new end pivot
                                                return with incorrect range
    set old end pivot

There may be other accesses that are not safe since we are now updating
both metadata and pointers, so disabling append if there could be rcu
readers is the safest action.

Link: https://lkml.kernel.org/r/20230819004356.1454718-2-Liam.Howlett@oracle.com
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/maple_tree.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 4dd73cf936a6..f723024e1426 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -4265,6 +4265,10 @@ static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
  * mas_wr_append: Attempt to append
  * @wr_mas: the maple write state
  *
+ * This is currently unsafe in rcu mode since the end of the node may be cached
+ * by readers while the node contents may be updated which could result in
+ * inaccurate information.
+ *
  * Return: True if appended, false otherwise
  */
 static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
@@ -4274,6 +4278,9 @@ static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
 	struct ma_state *mas = wr_mas->mas;
 	unsigned char node_pivots = mt_pivots[wr_mas->type];
 
+	if (mt_in_rcu(mas->tree))
+		return false;
+
 	if (mas->offset != wr_mas->node_end)
 		return false;
 

From 5e56982dd0759d8b345fe1468297dd4f630db5d7 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Mon, 21 Aug 2023 17:05:33 +0100
Subject: [PATCH 1869/2047] selftests: cachestat: test for cachestat
 availability

Patch series "selftests: cachestat: fix run on older kernels", v2.

I ran all kernel selftests on some test machine, and stumbled upon
cachestat failing (among others).  These patches fix the run on older
kernels and when the current directory is on a tmpfs instance.


This patch (of 2):

As cachestat is a new syscall, it won't be available on older kernels, for
instance those running on a development machine.  At the moment the test
reports all tests as "not ok" in this case.

Test for the cachestat syscall availability first, before doing further
tests, and bail out early with a TAP SKIP comment.

This also uses the opportunity to add the proper TAP headers, and add one
check for proper error handling (illegal file descriptor).

Link: https://lkml.kernel.org/r/20230821160534.3414911-1-andre.przywara@arm.com
Link: https://lkml.kernel.org/r/20230821160534.3414911-2-andre.przywara@arm.com
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Nhat Pham <nphamcs@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .../selftests/cachestat/test_cachestat.c      | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index 54d09b820ed4..b0c06393bcad 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -15,6 +15,8 @@
 
 #include "../kselftest.h"
 
+#define NR_TESTS	8
+
 static const char * const dev_files[] = {
 	"/dev/zero", "/dev/null", "/dev/urandom",
 	"/proc/version", "/proc"
@@ -236,7 +238,23 @@ out:
 
 int main(void)
 {
-	int ret = 0;
+	int ret;
+
+	ksft_print_header();
+
+	ret = syscall(__NR_cachestat, -1, NULL, NULL, 0);
+	if (ret == -1 && errno == ENOSYS)
+		ksft_exit_skip("cachestat syscall not available\n");
+
+	ksft_set_plan(NR_TESTS);
+
+	if (ret == -1 && errno == EBADF) {
+		ksft_test_result_pass("bad file descriptor recognized\n");
+		ret = 0;
+	} else {
+		ksft_test_result_fail("bad file descriptor ignored\n");
+		ret = 1;
+	}
 
 	for (int i = 0; i < 5; i++) {
 		const char *dev_filename = dev_files[i];

From f84f62e69963d7742acec4340ec1c4c7ef22b887 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Mon, 21 Aug 2023 17:05:34 +0100
Subject: [PATCH 1870/2047] selftests: cachestat: catch failing fsync test on
 tmpfs

The cachestat kselftest runs a test on a normal file, which is created
temporarily in the current directory.  Among the tests it runs there is a
call to fsync(), which is expected to clean all dirty pages used by the
file.

However the tmpfs filesystem implements fsync() as noop_fsync(), so the
call will not even attempt to clean anything when this test file happens
to live on a tmpfs instance.  This happens in an initramfs, or when the
current directory is in /dev/shm or sometimes /tmp.

To avoid this test failing wrongly, use statfs() to check which filesystem
the test file lives on.  If that is "tmpfs", we skip the fsync() test.

Since the fsync test is only one part of the "normal file" test, we now
execute this twice, skipping the fsync part on the first call.  This way
only the second test, including the fsync part, would be skipped.

Link: https://lkml.kernel.org/r/20230821160534.3414911-3-andre.przywara@arm.com
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .../selftests/cachestat/test_cachestat.c      | 62 ++++++++++++++-----
 1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index b0c06393bcad..c037553cfd92 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -4,10 +4,12 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <linux/kernel.h>
+#include <linux/magic.h>
 #include <linux/mman.h>
 #include <sys/mman.h>
 #include <sys/shm.h>
 #include <sys/syscall.h>
+#include <sys/vfs.h>
 #include <unistd.h>
 #include <string.h>
 #include <fcntl.h>
@@ -15,7 +17,7 @@
 
 #include "../kselftest.h"
 
-#define NR_TESTS	8
+#define NR_TESTS	9
 
 static const char * const dev_files[] = {
 	"/dev/zero", "/dev/null", "/dev/urandom",
@@ -92,6 +94,20 @@ out:
 	return ret;
 }
 
+/*
+ * fsync() is implemented via noop_fsync() on tmpfs. This makes the fsync()
+ * test fail below, so we need to check for test file living on a tmpfs.
+ */
+static bool is_on_tmpfs(int fd)
+{
+	struct statfs statfs_buf;
+
+	if (fstatfs(fd, &statfs_buf))
+		return false;
+
+	return statfs_buf.f_type == TMPFS_MAGIC;
+}
+
 /*
  * Open/create the file at filename, (optionally) write random data to it
  * (exactly num_pages), then test the cachestat syscall on this file.
@@ -99,13 +115,13 @@ out:
  * If test_fsync == true, fsync the file, then check the number of dirty
  * pages.
  */
-bool test_cachestat(const char *filename, bool write_random, bool create,
-		bool test_fsync, unsigned long num_pages, int open_flags,
-		mode_t open_mode)
+static int test_cachestat(const char *filename, bool write_random, bool create,
+			  bool test_fsync, unsigned long num_pages,
+			  int open_flags, mode_t open_mode)
 {
 	size_t PS = sysconf(_SC_PAGESIZE);
 	int filesize = num_pages * PS;
-	bool ret = true;
+	int ret = KSFT_PASS;
 	long syscall_ret;
 	struct cachestat cs;
 	struct cachestat_range cs_range = { 0, filesize };
@@ -114,7 +130,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
 
 	if (fd == -1) {
 		ksft_print_msg("Unable to create/open file.\n");
-		ret = false;
+		ret = KSFT_FAIL;
 		goto out;
 	} else {
 		ksft_print_msg("Create/open %s\n", filename);
@@ -123,7 +139,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
 	if (write_random) {
 		if (!write_exactly(fd, filesize)) {
 			ksft_print_msg("Unable to access urandom.\n");
-			ret = false;
+			ret = KSFT_FAIL;
 			goto out1;
 		}
 	}
@@ -134,7 +150,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
 
 	if (syscall_ret) {
 		ksft_print_msg("Cachestat returned non-zero.\n");
-		ret = false;
+		ret = KSFT_FAIL;
 		goto out1;
 
 	} else {
@@ -144,15 +160,17 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
 			if (cs.nr_cache + cs.nr_evicted != num_pages) {
 				ksft_print_msg(
 					"Total number of cached and evicted pages is off.\n");
-				ret = false;
+				ret = KSFT_FAIL;
 			}
 		}
 	}
 
 	if (test_fsync) {
-		if (fsync(fd)) {
+		if (is_on_tmpfs(fd)) {
+			ret = KSFT_SKIP;
+		} else if (fsync(fd)) {
 			ksft_print_msg("fsync fails.\n");
-			ret = false;
+			ret = KSFT_FAIL;
 		} else {
 			syscall_ret = syscall(cachestat_nr, fd, &cs_range, &cs, 0);
 
@@ -163,13 +181,13 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
 				print_cachestat(&cs);
 
 				if (cs.nr_dirty) {
-					ret = false;
+					ret = KSFT_FAIL;
 					ksft_print_msg(
 						"Number of dirty should be zero after fsync.\n");
 				}
 			} else {
 				ksft_print_msg("Cachestat (after fsync) returned non-zero.\n");
-				ret = false;
+				ret = KSFT_FAIL;
 				goto out1;
 			}
 		}
@@ -260,7 +278,7 @@ int main(void)
 		const char *dev_filename = dev_files[i];
 
 		if (test_cachestat(dev_filename, false, false, false,
-			4, O_RDONLY, 0400))
+			4, O_RDONLY, 0400) == KSFT_PASS)
 			ksft_test_result_pass("cachestat works with %s\n", dev_filename);
 		else {
 			ksft_test_result_fail("cachestat fails with %s\n", dev_filename);
@@ -269,13 +287,27 @@ int main(void)
 	}
 
 	if (test_cachestat("tmpfilecachestat", true, true,
-		true, 4, O_CREAT | O_RDWR, 0400 | 0600))
+		false, 4, O_CREAT | O_RDWR, 0600) == KSFT_PASS)
 		ksft_test_result_pass("cachestat works with a normal file\n");
 	else {
 		ksft_test_result_fail("cachestat fails with normal file\n");
 		ret = 1;
 	}
 
+	switch (test_cachestat("tmpfilecachestat", true, true,
+		true, 4, O_CREAT | O_RDWR, 0600)) {
+	case KSFT_FAIL:
+		ksft_test_result_fail("cachestat fsync fails with normal file\n");
+		ret = KSFT_FAIL;
+		break;
+	case KSFT_PASS:
+		ksft_test_result_pass("cachestat fsync works with a normal file\n");
+		break;
+	case KSFT_SKIP:
+		ksft_test_result_skip("tmpfilecachestat is on tmpfs\n");
+		break;
+	}
+
 	if (test_cachestat_shmem())
 		ksft_test_result_pass("cachestat works with a shmem file\n");
 	else {

From e5548f85b4527c4c803b7eae7887c10bf8f90c97 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 22 Aug 2023 22:14:47 -0700
Subject: [PATCH 1871/2047] shmem: fix smaps BUG sleeping while atomic

smaps_pte_hole_lookup() is calling shmem_partial_swap_usage() with page
table lock held: but shmem_partial_swap_usage() does cond_resched_rcu() if
need_resched(): "BUG: sleeping function called from invalid context".

Since shmem_partial_swap_usage() is designed to count across a range, but
smaps_pte_hole_lookup() only calls it for a single page slot, just break
out of the loop on the last or only page, before checking need_resched().

Link: https://lkml.kernel.org/r/6fe3b3ec-abdf-332f-5c23-6a3b3a3b11a9@google.com
Fixes: 230100321518 ("mm/smaps: simplify shmem handling of pte holes")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: <stable@vger.kernel.org>	[5.16+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/shmem.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index f5af4b943e42..d963c747dabc 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -806,14 +806,16 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
 	XA_STATE(xas, &mapping->i_pages, start);
 	struct page *page;
 	unsigned long swapped = 0;
+	unsigned long max = end - 1;
 
 	rcu_read_lock();
-	xas_for_each(&xas, page, end - 1) {
+	xas_for_each(&xas, page, max) {
 		if (xas_retry(&xas, page))
 			continue;
 		if (xa_is_value(page))
 			swapped++;
-
+		if (xas.xa_index == max)
+			break;
 		if (need_resched()) {
 			xas_pause(&xas);
 			cond_resched_rcu();

From b96c22476ff4bc3459933f87ba52cbb7910ffc6b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 24 Aug 2023 23:48:22 +0100
Subject: [PATCH 1872/2047] regulator: userspace-consumer: Drop event support
 for this cycle

Drop commit 22475bcc2083 ("regulator: userspace-consumer: Add regulator
event support") since Zev Weiss points out that it leaks the constants
we use for notifications out as ABI which isn't ideal, we should have
something more abstracted there.  There's a definite need for this
feature but it needs some more work on the interface.

Signed-off-by: Mark Brown <broonie@kernel.org
Link: https://lore.kernel.org/r/20230824-regulator-remove-status-sysfs-v1-1-554956e8c1ca@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org
---
 drivers/regulator/userspace-consumer.c | 52 +-------------------------
 1 file changed, 1 insertion(+), 51 deletions(-)

diff --git a/drivers/regulator/userspace-consumer.c b/drivers/regulator/userspace-consumer.c
index a0b980022993..97f075ed68c9 100644
--- a/drivers/regulator/userspace-consumer.c
+++ b/drivers/regulator/userspace-consumer.c
@@ -29,10 +29,6 @@ struct userspace_consumer_data {
 
 	int num_supplies;
 	struct regulator_bulk_data *supplies;
-
-	struct kobject *kobj;
-	struct notifier_block nb;
-	unsigned long events;
 };
 
 static ssize_t name_show(struct device *dev,
@@ -93,30 +89,12 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
-static DEFINE_MUTEX(events_lock);
-
-static ssize_t events_show(struct device *dev,
-			   struct device_attribute *attr, char *buf)
-{
-	struct userspace_consumer_data *data = dev_get_drvdata(dev);
-	unsigned long e;
-
-	mutex_lock(&events_lock);
-	e = data->events;
-	data->events = 0;
-	mutex_unlock(&events_lock);
-
-	return sprintf(buf, "0x%lx\n", e);
-}
-
 static DEVICE_ATTR_RO(name);
 static DEVICE_ATTR_RW(state);
-static DEVICE_ATTR_RO(events);
 
 static struct attribute *attributes[] = {
 	&dev_attr_name.attr,
 	&dev_attr_state.attr,
-	&dev_attr_events.attr,
 	NULL,
 };
 
@@ -137,28 +115,12 @@ static const struct attribute_group attr_group = {
 	.is_visible =  attr_visible,
 };
 
-static int regulator_userspace_notify(struct notifier_block *nb,
-				      unsigned long event,
-				      void *ignored)
-{
-	struct userspace_consumer_data *data =
-		container_of(nb, struct userspace_consumer_data, nb);
-
-	mutex_lock(&events_lock);
-	data->events |= event;
-	mutex_unlock(&events_lock);
-
-	sysfs_notify(data->kobj, NULL, dev_attr_events.attr.name);
-
-	return NOTIFY_OK;
-}
-
 static int regulator_userspace_consumer_probe(struct platform_device *pdev)
 {
 	struct regulator_userspace_consumer_data tmpdata;
 	struct regulator_userspace_consumer_data *pdata;
 	struct userspace_consumer_data *drvdata;
-	int i, ret;
+	int ret;
 
 	pdata = dev_get_platdata(&pdev->dev);
 	if (!pdata) {
@@ -191,7 +153,6 @@ static int regulator_userspace_consumer_probe(struct platform_device *pdev)
 	drvdata->num_supplies = pdata->num_supplies;
 	drvdata->supplies = pdata->supplies;
 	drvdata->no_autoswitch = pdata->no_autoswitch;
-	drvdata->kobj = &pdev->dev.kobj;
 
 	mutex_init(&drvdata->lock);
 
@@ -225,13 +186,6 @@ static int regulator_userspace_consumer_probe(struct platform_device *pdev)
 	}
 	drvdata->enabled = !!ret;
 
-	drvdata->nb.notifier_call = regulator_userspace_notify;
-	for (i = 0; i < drvdata->num_supplies; i++) {
-		ret = devm_regulator_register_notifier(drvdata->supplies[i].consumer, &drvdata->nb);
-		if (ret)
-			goto err_enable;
-	}
-
 	return 0;
 
 err_enable:
@@ -243,10 +197,6 @@ err_enable:
 static int regulator_userspace_consumer_remove(struct platform_device *pdev)
 {
 	struct userspace_consumer_data *data = platform_get_drvdata(pdev);
-	int i;
-
-	for (i = 0; i < data->num_supplies; i++)
-		devm_regulator_unregister_notifier(data->supplies[i].consumer, &data->nb);
 
 	sysfs_remove_group(&pdev->dev.kobj, &attr_group);
 

From 84a58e60038fa0366006977dba85eae16b2e3d78 Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Date: Mon, 21 Aug 2023 16:05:54 -0700
Subject: [PATCH 1873/2047] net/mlx5: Dynamic cyclecounter shift calculation
 for PTP free running clock

Use a dynamic calculation to determine the shift value for the internal
timer cyclecounter that will lead to the highest precision frequency
adjustments. Previously used a constant for the shift value assuming all
devices supported by the driver had a nominal frequency of 1GHz. However,
there are devices that operate at different frequencies. The previous shift
value constant would break the PHC functionality for those devices.

Reported-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Closes: https://lore.kernel.org/netdev/20230815151507.3028503-1-vadfed@meta.com/
Fixes: 6a4010927562 ("net/mlx5: Update cyclecounter shift value to improve ptp free running mode precision")
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Tested-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Acked-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20230821230554.236210-1-rrameshbabu@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../ethernet/mellanox/mlx5/core/lib/clock.c   | 32 ++++++++++++++++---
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 377372f0578a..aa29f09e8356 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -32,16 +32,13 @@
 
 #include <linux/clocksource.h>
 #include <linux/highmem.h>
+#include <linux/log2.h>
 #include <linux/ptp_clock_kernel.h>
 #include <rdma/mlx5-abi.h>
 #include "lib/eq.h"
 #include "en.h"
 #include "clock.h"
 
-enum {
-	MLX5_CYCLES_SHIFT	= 31
-};
-
 enum {
 	MLX5_PIN_MODE_IN		= 0x0,
 	MLX5_PIN_MODE_OUT		= 0x1,
@@ -93,6 +90,31 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev)
 	return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify);
 }
 
+static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz)
+{
+	/* Optimal shift constant leads to corrections above just 1 scaled ppm.
+	 *
+	 * Two sets of equations are needed to derive the optimal shift
+	 * constant for the cyclecounter.
+	 *
+	 *    dev_freq_khz * 1000 / 2^shift_constant = 1 scaled_ppm
+	 *    ppb = scaled_ppm * 1000 / 2^16
+	 *
+	 * Using the two equations together
+	 *
+	 *    dev_freq_khz * 1000 / 1 scaled_ppm = 2^shift_constant
+	 *    dev_freq_khz * 2^16 / 1 ppb = 2^shift_constant
+	 *    dev_freq_khz = 2^(shift_constant - 16)
+	 *
+	 * then yields
+	 *
+	 *    shift_constant = ilog2(dev_freq_khz) + 16
+	 */
+
+	return min(ilog2(dev_freq_khz) + 16,
+		   ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz));
+}
+
 static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp)
 {
 	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
@@ -909,7 +931,7 @@ static void mlx5_timecounter_init(struct mlx5_core_dev *mdev)
 
 	dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz);
 	timer->cycles.read = read_internal_timer;
-	timer->cycles.shift = MLX5_CYCLES_SHIFT;
+	timer->cycles.shift = mlx5_ptp_shift_constant(dev_freq);
 	timer->cycles.mult = clocksource_khz2mult(dev_freq,
 						  timer->cycles.shift);
 	timer->nominal_c_mult = timer->cycles.mult;

From 60c5fd2e8f3c42a5abc565ba9876ead1da5ad2b7 Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Tue, 22 Aug 2023 01:52:54 +0000
Subject: [PATCH 1874/2047] scsi: core: raid_class: Remove raid_component_add()

The raid_component_add() function was added to the kernel tree via patch
"[SCSI] embryonic RAID class" (2005). Remove this function since it never
has had any callers in the Linux kernel. And also raid_component_release()
is only used in raid_component_add(), so it is also removed.

Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Link: https://lore.kernel.org/r/20230822015254.184270-1-wangzhu9@huawei.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Fixes: 04b5b5cb0136 ("scsi: core: Fix possible memory leak if device_add() fails")
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/raid_class.c  | 48 --------------------------------------
 include/linux/raid_class.h |  4 ----
 2 files changed, 52 deletions(-)

diff --git a/drivers/scsi/raid_class.c b/drivers/scsi/raid_class.c
index 711252e52d8e..95a86e0dfd77 100644
--- a/drivers/scsi/raid_class.c
+++ b/drivers/scsi/raid_class.c
@@ -209,54 +209,6 @@ raid_attr_ro_state(level);
 raid_attr_ro_fn(resync);
 raid_attr_ro_state_fn(state);
 
-static void raid_component_release(struct device *dev)
-{
-	struct raid_component *rc =
-		container_of(dev, struct raid_component, dev);
-	dev_printk(KERN_ERR, rc->dev.parent, "COMPONENT RELEASE\n");
-	put_device(rc->dev.parent);
-	kfree(rc);
-}
-
-int raid_component_add(struct raid_template *r,struct device *raid_dev,
-		       struct device *component_dev)
-{
-	struct device *cdev =
-		attribute_container_find_class_device(&r->raid_attrs.ac,
-						      raid_dev);
-	struct raid_component *rc;
-	struct raid_data *rd = dev_get_drvdata(cdev);
-	int err;
-
-	rc = kzalloc(sizeof(*rc), GFP_KERNEL);
-	if (!rc)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&rc->node);
-	device_initialize(&rc->dev);
-	rc->dev.release = raid_component_release;
-	rc->dev.parent = get_device(component_dev);
-	rc->num = rd->component_count++;
-
-	dev_set_name(&rc->dev, "component-%d", rc->num);
-	list_add_tail(&rc->node, &rd->component_list);
-	rc->dev.class = &raid_class.class;
-	err = device_add(&rc->dev);
-	if (err)
-		goto err_out;
-
-	return 0;
-
-err_out:
-	put_device(&rc->dev);
-	list_del(&rc->node);
-	rd->component_count--;
-	put_device(component_dev);
-	kfree(rc);
-	return err;
-}
-EXPORT_SYMBOL(raid_component_add);
-
 struct raid_template *
 raid_class_attach(struct raid_function_template *ft)
 {
diff --git a/include/linux/raid_class.h b/include/linux/raid_class.h
index 6a9b177d5c41..e50416ba9cd9 100644
--- a/include/linux/raid_class.h
+++ b/include/linux/raid_class.h
@@ -77,7 +77,3 @@ DEFINE_RAID_ATTRIBUTE(enum raid_state, state)
 	
 struct raid_template *raid_class_attach(struct raid_function_template *);
 void raid_class_release(struct raid_template *);
-
-int __must_check raid_component_add(struct raid_template *, struct device *,
-				    struct device *);
-

From 1bd3a76880b2bce017987cf53780b372cf59528e Mon Sep 17 00:00:00 2001
From: Zhu Wang <wangzhu9@huawei.com>
Date: Sat, 19 Aug 2023 08:39:41 +0000
Subject: [PATCH 1875/2047] scsi: snic: Fix double free in snic_tgt_create()

Commit 41320b18a0e0 ("scsi: snic: Fix possible memory leak if device_add()
fails") fixed the memory leak caused by dev_set_name() when device_add()
failed. However, it did not consider that 'tgt' has already been released
when put_device(&tgt->dev) is called. Remove kfree(tgt) in the error path
to avoid double free of 'tgt' and move put_device(&tgt->dev) after the
removed kfree(tgt) to avoid a use-after-free.

Fixes: 41320b18a0e0 ("scsi: snic: Fix possible memory leak if device_add() fails")
Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
Link: https://lore.kernel.org/r/20230819083941.164365-1-wangzhu9@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/snic/snic_disc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/snic/snic_disc.c b/drivers/scsi/snic/snic_disc.c
index e429ad23c396..4db3ba62fcd3 100644
--- a/drivers/scsi/snic/snic_disc.c
+++ b/drivers/scsi/snic/snic_disc.c
@@ -303,12 +303,11 @@ snic_tgt_create(struct snic *snic, struct snic_tgt_id *tgtid)
 			      "Snic Tgt: device_add, with err = %d\n",
 			      ret);
 
-		put_device(&tgt->dev);
 		put_device(&snic->shost->shost_gendev);
 		spin_lock_irqsave(snic->shost->host_lock, flags);
 		list_del(&tgt->list);
 		spin_unlock_irqrestore(snic->shost->host_lock, flags);
-		kfree(tgt);
+		put_device(&tgt->dev);
 		tgt = NULL;
 
 		return tgt;

From a4f39c9f14a634e4cd35fcd338c239d11fcc73fc Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed, 23 Aug 2023 15:41:02 +0200
Subject: [PATCH 1876/2047] net: handle ARPHRD_PPP in dev_is_mac_header_xmit()

The goal is to support a bpf_redirect() from an ethernet device (ingress)
to a ppp device (egress).
The l2 header is added automatically by the ppp driver, thus the ethernet
header should be removed.

CC: stable@vger.kernel.org
Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Tested-by: Siwar Zitouni <siwar.zitouni@6wind.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_arp.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index 1ed52441972f..10a1e81434cb 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -53,6 +53,10 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev)
 	case ARPHRD_NONE:
 	case ARPHRD_RAWIP:
 	case ARPHRD_PIMREG:
+	/* PPP adds its l2 header automatically in ppp_start_xmit().
+	 * This makes it look like an l3 device to __bpf_redirect() and tcf_mirred_init().
+	 */
+	case ARPHRD_PPP:
 		return false;
 	default:
 		return true;

From 0aacec49c29e7c5b1487e859b0c0a42388c34092 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 23 Aug 2023 08:18:14 -0700
Subject: [PATCH 1877/2047] ice: avoid executing commands on other ports when
 driving sync

The ice hardware has a synchronization mechanism used to drive the
simultaneous application of commands on both PHY ports and the source timer
in the MAC.

When issuing a sync via ice_ptp_exec_tmr_cmd(), the hardware will
simultaneously apply the commands programmed for the main timer and each
PHY port. Neither the main timer command register, nor the PHY port command
registers auto clear on command execution.

During the execution of a timer command intended for a single port on E822
devices, such as those used to configure a PHY during link up, the driver
is not correctly clearing the previous commands.

This results in unintentionally executing the last programmed command on
the main timer and other PHY ports whenever performing reconfiguration on
E822 ports after link up. This results in unintended side effects on other
timers, depending on what command was previously programmed.

To fix this, the driver must ensure that the main timer and all other PHY
ports are properly initialized to perform no action.

The enumeration for timer commands does not include an enumeration value
for doing nothing. Introduce ICE_PTP_NOP for this purpose. When writing a
timer command to hardware, leave the command bits set to zero which
indicates that no operation should be performed on that port.

Modify ice_ptp_one_port_cmd() to always initialize all ports. For all ports
other than the one being configured, write their timer command register to
ICE_PTP_NOP. This ensures that no side effect happens on the timer command.

To fix this for the PHY ports, modify ice_ptp_one_port_cmd() to always
initialize all other ports to ICE_PTP_NOP. This ensures that no side
effects happen on the other ports.

Call ice_ptp_src_cmd() with a command value if ICE_PTP_NOP in
ice_sync_phy_timer_e822() and ice_start_phy_timer_e822().

With both of these changes, the driver should no longer execute a stale
command on the main timer or another PHY port when reconfiguring one of the
PHY ports after link up.

Fixes: 3a7496234d17 ("ice: implement basic E822 PTP support")
Signed-off-by: Siddaraju DH <siddaraju.dh@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Sunitha Mekala <sunithax.d.mekala@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 55 +++++++++++++++++++--
 drivers/net/ethernet/intel/ice/ice_ptp_hw.h |  3 +-
 2 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
index a38614d21ea8..de1d83300481 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -131,6 +131,8 @@ static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
 	case READ_TIME:
 		cmd_val |= GLTSYN_CMD_READ_TIME;
 		break;
+	case ICE_PTP_NOP:
+		break;
 	}
 
 	wr32(hw, GLTSYN_CMD, cmd_val);
@@ -1226,18 +1228,18 @@ ice_ptp_read_port_capture(struct ice_hw *hw, u8 port, u64 *tx_ts, u64 *rx_ts)
 }
 
 /**
- * ice_ptp_one_port_cmd - Prepare a single PHY port for a timer command
+ * ice_ptp_write_port_cmd_e822 - Prepare a single PHY port for a timer command
  * @hw: pointer to HW struct
  * @port: Port to which cmd has to be sent
  * @cmd: Command to be sent to the port
  *
  * Prepare the requested port for an upcoming timer sync command.
  *
- * Note there is no equivalent of this operation on E810, as that device
- * always handles all external PHYs internally.
+ * Do not use this function directly. If you want to configure exactly one
+ * port, use ice_ptp_one_port_cmd() instead.
  */
 static int
-ice_ptp_one_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd)
+ice_ptp_write_port_cmd_e822(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd)
 {
 	u32 cmd_val, val;
 	u8 tmr_idx;
@@ -1261,6 +1263,8 @@ ice_ptp_one_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd)
 	case ADJ_TIME_AT_TIME:
 		cmd_val |= PHY_CMD_ADJ_TIME_AT_TIME;
 		break;
+	case ICE_PTP_NOP:
+		break;
 	}
 
 	/* Tx case */
@@ -1306,6 +1310,39 @@ ice_ptp_one_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd)
 	return 0;
 }
 
+/**
+ * ice_ptp_one_port_cmd - Prepare one port for a timer command
+ * @hw: pointer to the HW struct
+ * @configured_port: the port to configure with configured_cmd
+ * @configured_cmd: timer command to prepare on the configured_port
+ *
+ * Prepare the configured_port for the configured_cmd, and prepare all other
+ * ports for ICE_PTP_NOP. This causes the configured_port to execute the
+ * desired command while all other ports perform no operation.
+ */
+static int
+ice_ptp_one_port_cmd(struct ice_hw *hw, u8 configured_port,
+		     enum ice_ptp_tmr_cmd configured_cmd)
+{
+	u8 port;
+
+	for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+		enum ice_ptp_tmr_cmd cmd;
+		int err;
+
+		if (port == configured_port)
+			cmd = configured_cmd;
+		else
+			cmd = ICE_PTP_NOP;
+
+		err = ice_ptp_write_port_cmd_e822(hw, port, cmd);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 /**
  * ice_ptp_port_cmd_e822 - Prepare all ports for a timer command
  * @hw: pointer to the HW struct
@@ -1322,7 +1359,7 @@ ice_ptp_port_cmd_e822(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
 	for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
 		int err;
 
-		err = ice_ptp_one_port_cmd(hw, port, cmd);
+		err = ice_ptp_write_port_cmd_e822(hw, port, cmd);
 		if (err)
 			return err;
 	}
@@ -2252,6 +2289,9 @@ static int ice_sync_phy_timer_e822(struct ice_hw *hw, u8 port)
 	if (err)
 		goto err_unlock;
 
+	/* Do not perform any action on the main timer */
+	ice_ptp_src_cmd(hw, ICE_PTP_NOP);
+
 	/* Issue the sync to activate the time adjustment */
 	ice_ptp_exec_tmr_cmd(hw);
 
@@ -2372,6 +2412,9 @@ int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port)
 	if (err)
 		return err;
 
+	/* Do not perform any action on the main timer */
+	ice_ptp_src_cmd(hw, ICE_PTP_NOP);
+
 	ice_ptp_exec_tmr_cmd(hw);
 
 	err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val);
@@ -2847,6 +2890,8 @@ static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
 	case ADJ_TIME_AT_TIME:
 		cmd_val = GLTSYN_CMD_ADJ_INIT_TIME;
 		break;
+	case ICE_PTP_NOP:
+		return 0;
 	}
 
 	/* Read, modify, write */
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
index 3b68cb91bd81..096685237ca6 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
@@ -9,7 +9,8 @@ enum ice_ptp_tmr_cmd {
 	INIT_INCVAL,
 	ADJ_TIME,
 	ADJ_TIME_AT_TIME,
-	READ_TIME
+	READ_TIME,
+	ICE_PTP_NOP,
 };
 
 enum ice_ptp_serdes {

From 49fa4b0d06705a24a81bb8be6eb175059b77f0a7 Mon Sep 17 00:00:00 2001
From: Ratheesh Kannoth <rkannoth@marvell.com>
Date: Thu, 24 Aug 2023 08:33:01 +0530
Subject: [PATCH 1878/2047] octeontx2-pf: fix page_pool creation fail for rings
 > 32k

octeontx2 driver calls page_pool_create() during driver probe()
and fails if queue size > 32k. Page pool infra uses these buffers
as shock absorbers for burst traffic. These pages are pinned down
over time as working sets varies, due to the recycling nature
of page pool, given page pool (currently) don't have a shrinker
mechanism, the pages remain pinned down in ptr_ring.
Instead of clamping page_pool size to 32k at
most, limit it even more to 2k to avoid wasting memory.

This have been tested on octeontx2 CN10KA hardware.
TCP and UDP tests using iperf shows no performance regressions.

Fixes: b2e3406a38f0 ("octeontx2-pf: Add support for page pool")
Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: Ratheesh Kannoth <rkannoth@marvell.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 2 +-
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h   | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 77c8f650f7ac..3e1c70c74622 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -1432,7 +1432,7 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
 	}
 
 	pp_params.flags = PP_FLAG_PAGE_FRAG | PP_FLAG_DMA_MAP;
-	pp_params.pool_size = numptrs;
+	pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs);
 	pp_params.nid = NUMA_NO_NODE;
 	pp_params.dev = pfvf->dev;
 	pp_params.dma_dir = DMA_FROM_DEVICE;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
index b5d689eeff80..9e3bfbe5c480 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -23,6 +23,8 @@
 #define	OTX2_ETH_HLEN		(VLAN_ETH_HLEN + VLAN_HLEN)
 #define	OTX2_MIN_MTU		60
 
+#define OTX2_PAGE_POOL_SZ	2048
+
 #define OTX2_MAX_GSO_SEGS	255
 #define OTX2_MAX_FRAGS_IN_SQE	9
 

From 786c96e92fb9e854cb8b0cb7399bb2fb28e15c4b Mon Sep 17 00:00:00 2001
From: Jinjie Ruan <ruanjinjie@huawei.com>
Date: Thu, 24 Aug 2023 14:43:36 +0800
Subject: [PATCH 1879/2047] net: arcnet: Do not call kfree_skb() under
 local_irq_disable()

It is not allowed to call kfree_skb() from hardware interrupt
context or with hardware interrupts being disabled.
So replace kfree_skb() with dev_kfree_skb_irq() under
local_irq_disable(). Compile tested only.

Fixes: 05fcd31cc472 ("arcnet: add err_skb package for package status feedback")
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arcnet/arcnet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 99265667538c..d9e052c49ba1 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -464,7 +464,7 @@ static void arcnet_reply_tasklet(struct tasklet_struct *t)
 
 	ret = sock_queue_err_skb(sk, ackskb);
 	if (ret)
-		kfree_skb(ackskb);
+		dev_kfree_skb_irq(ackskb);
 
 	local_irq_enable();
 };

From bac806830fde94ccf41482535fc442c02656febc Mon Sep 17 00:00:00 2001
From: Wenchao Chen <wenchao.chen@unisoc.com>
Date: Fri, 25 Aug 2023 17:17:42 +0800
Subject: [PATCH 1880/2047] mmc: core: Add host specific tuning support for SD
 HS mode

To support the need for host specific tuning for SD high-speed mode, let's
add two new optional callbacks, ->prepare|execute_sd_hs_tuning() and let's
call them when switching into the SD high-speed mode.

Note that, during the tuning process it's also needed for host drivers to
send commands to the SD card to verify that the tuning process succeeds.
Therefore, let's also share the corresponding functions from the core to
allow this.

Signed-off-by: Wenchao Chen <wenchao.chen@unisoc.com>
Link: https://lore.kernel.org/r/20230825091743.15613-2-wenchao.chen@unisoc.com
[Ulf: Dropped unnecessary function declarations and updated the commit msg]
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc_ops.h |  1 -
 drivers/mmc/core/sd.c      | 14 ++++++++++++++
 drivers/mmc/core/sd_ops.c  |  1 +
 drivers/mmc/core/sd_ops.h  |  2 --
 include/linux/mmc/host.h   |  8 ++++++++
 5 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 09ffbc00908b..92d4194c7893 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -32,7 +32,6 @@ int mmc_send_adtc_data(struct mmc_card *card, struct mmc_host *host, u32 opcode,
 		       u32 args, void *buf, unsigned len);
 int mmc_send_csd(struct mmc_card *card, u32 *csd);
 int __mmc_send_status(struct mmc_card *card, u32 *status, unsigned int retries);
-int mmc_send_status(struct mmc_card *card, u32 *status);
 int mmc_send_cid(struct mmc_host *host, u32 *cid);
 int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp);
 int mmc_spi_set_crc(struct mmc_host *host, int use_crc);
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 246ce027ae0a..c3e554344c99 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1518,6 +1518,13 @@ retry:
 		 */
 		mmc_set_clock(host, mmc_sd_get_max_clock(card));
 
+		if (host->ios.timing == MMC_TIMING_SD_HS &&
+			host->ops->prepare_sd_hs_tuning) {
+			err = host->ops->prepare_sd_hs_tuning(host, card);
+			if (err)
+				goto free_card;
+		}
+
 		/*
 		 * Switch to wider bus (if supported).
 		 */
@@ -1529,6 +1536,13 @@ retry:
 
 			mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
 		}
+
+		if (host->ios.timing == MMC_TIMING_SD_HS &&
+			host->ops->execute_sd_hs_tuning) {
+			err = host->ops->execute_sd_hs_tuning(host, card);
+			if (err)
+				goto free_card;
+		}
 	}
 cont:
 	if (!oldcard) {
diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c
index ef8d1dce5af1..a59cd592f06e 100644
--- a/drivers/mmc/core/sd_ops.c
+++ b/drivers/mmc/core/sd_ops.c
@@ -323,6 +323,7 @@ int mmc_sd_switch(struct mmc_card *card, int mode, int group,
 	return mmc_send_adtc_data(card, card->host, SD_SWITCH, cmd_args, resp,
 				  64);
 }
+EXPORT_SYMBOL_GPL(mmc_sd_switch);
 
 int mmc_app_sd_status(struct mmc_card *card, void *ssr)
 {
diff --git a/drivers/mmc/core/sd_ops.h b/drivers/mmc/core/sd_ops.h
index 3ba7b3cf4652..7667fc223b74 100644
--- a/drivers/mmc/core/sd_ops.h
+++ b/drivers/mmc/core/sd_ops.h
@@ -19,8 +19,6 @@ int mmc_send_if_cond(struct mmc_host *host, u32 ocr);
 int mmc_send_if_cond_pcie(struct mmc_host *host, u32 ocr);
 int mmc_send_relative_addr(struct mmc_host *host, unsigned int *rca);
 int mmc_app_send_scr(struct mmc_card *card);
-int mmc_sd_switch(struct mmc_card *card, int mode, int group,
-	u8 value, u8 *resp);
 int mmc_app_sd_status(struct mmc_card *card, void *ssr);
 int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card);
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 461d1543893b..62a6847a3b6f 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -184,6 +184,12 @@ struct mmc_host_ops {
 	/* Execute HS400 tuning depending host driver */
 	int	(*execute_hs400_tuning)(struct mmc_host *host, struct mmc_card *card);
 
+	/* Optional callback to prepare for SD high-speed tuning */
+	int	(*prepare_sd_hs_tuning)(struct mmc_host *host, struct mmc_card *card);
+
+	/* Optional callback to execute SD high-speed tuning */
+	int	(*execute_sd_hs_tuning)(struct mmc_host *host, struct mmc_card *card);
+
 	/* Prepare switch to DDR during the HS400 init sequence */
 	int	(*hs400_prepare_ddr)(struct mmc_host *host);
 
@@ -665,6 +671,8 @@ static inline void mmc_debugfs_err_stats_inc(struct mmc_host *host,
 	host->err_stats[stat] += 1;
 }
 
+int mmc_sd_switch(struct mmc_card *card, int mode, int group, u8 value, u8 *resp);
+int mmc_send_status(struct mmc_card *card, u32 *status);
 int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
 int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
 int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);

From 146c7c330507c0384bf29d567186632bfe975927 Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Thu, 24 Aug 2023 15:43:08 +0200
Subject: [PATCH 1881/2047] mlxsw: i2c: Fix chunk size setting in output
 mailbox buffer

The driver reads commands output from the output mailbox. If the size
of the output mailbox is not a multiple of the transaction /
block size, then the driver will not issue enough read transactions
to read the entire output, which can result in driver initialization
errors.

Fix by determining the number of transactions using DIV_ROUND_UP().

Fixes: 3029a693beda ("mlxsw: i2c: Allow flexible setting of I2C transactions size")
Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/i2c.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index 41298835a11e..47af7ef7e4ee 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -444,7 +444,7 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
 	} else {
 		/* No input mailbox is case of initialization query command. */
 		reg_size = MLXSW_I2C_MAX_DATA_SIZE;
-		num = reg_size / mlxsw_i2c->block_size;
+		num = DIV_ROUND_UP(reg_size, mlxsw_i2c->block_size);
 
 		if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
 			dev_err(&client->dev, "Could not acquire lock");

From d7248f1cc835bd80e936dc5b2d94b149bdd0077d Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Thu, 24 Aug 2023 15:43:09 +0200
Subject: [PATCH 1882/2047] mlxsw: i2c: Limit single transaction buffer size

Maximum size of buffer is obtained from underlying I2C adapter and in
case adapter allows I2C transaction buffer size greater than 100 bytes,
transaction will fail due to firmware limitation.

As a result driver will fail initialization.

Limit the maximum size of transaction buffer by 100 bytes to fit to
firmware.

Remove unnecessary calculation:
max_t(u16, MLXSW_I2C_BLK_DEF, quirk_size).
This condition can not happened.

Fixes: 3029a693beda ("mlxsw: i2c: Allow flexible setting of I2C transactions size")
Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/i2c.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index 47af7ef7e4ee..d23f293e285c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -48,6 +48,7 @@
 #define MLXSW_I2C_MBOX_SIZE_BITS	12
 #define MLXSW_I2C_ADDR_BUF_SIZE		4
 #define MLXSW_I2C_BLK_DEF		32
+#define MLXSW_I2C_BLK_MAX		100
 #define MLXSW_I2C_RETRY			5
 #define MLXSW_I2C_TIMEOUT_MSECS		5000
 #define MLXSW_I2C_MAX_DATA_SIZE		256
@@ -653,7 +654,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client)
 			return -EOPNOTSUPP;
 		}
 
-		mlxsw_i2c->block_size = max_t(u16, MLXSW_I2C_BLK_DEF,
+		mlxsw_i2c->block_size = min_t(u16, MLXSW_I2C_BLK_MAX,
 					      min_t(u16, quirks->max_read_len,
 						    quirks->max_write_len));
 	} else {

From 3fc134a07438055fc93ce1bbacf2702ddd09500c Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Thu, 24 Aug 2023 15:43:10 +0200
Subject: [PATCH 1883/2047] mlxsw: core_hwmon: Adjust module label names based
 on MTCAP sensor counter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Transceiver module temperature sensors are indexed after ASIC and
platform sensors. The current label printing method does not take this
into account and simply prints the index of the transceiver module
sensor.

On new systems that have platform sensors this results in incorrect
(shifted) transceiver module labels being printed:

$ sensors
[...]
front panel 002:  +37.0°C  (crit = +70.0°C, emerg = +75.0°C)
front panel 003:  +47.0°C  (crit = +70.0°C, emerg = +75.0°C)
[...]

Fix by taking the sensor count into account. After the fix:

$ sensors
[...]
front panel 001:  +37.0°C  (crit = +70.0°C, emerg = +75.0°C)
front panel 002:  +47.0°C  (crit = +70.0°C, emerg = +75.0°C)
[...]

Fixes: a53779de6a0e ("mlxsw: core: Add QSFP module temperature label attribute to hwmon")
Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 70735068cf29..0fd290d776ff 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -405,7 +405,8 @@ mlxsw_hwmon_module_temp_label_show(struct device *dev,
 			container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
 
 	return sprintf(buf, "front panel %03u\n",
-		       mlxsw_hwmon_attr->type_index);
+		       mlxsw_hwmon_attr->type_index + 1 -
+		       mlxsw_hwmon_attr->mlxsw_hwmon_dev->sensor_count);
 }
 
 static ssize_t

From d83d251bf3c2521bfce59016977da6c1cf995d61 Mon Sep 17 00:00:00 2001
From: Wenchao Chen <wenchao.chen@unisoc.com>
Date: Fri, 25 Aug 2023 17:17:43 +0800
Subject: [PATCH 1884/2047] mmc: sdhci-sprd: Add SD HS mode online tuning

First of all, Unisoc's IC provides cmd delay and read delay to ensure
that the host can get the correct data. However, according to SD Spec,
there is no need to do tuning in high speed mode, but with the
development of chip processes, it is more and more difficult to find
a suitable delay to cover all the chips. Therefore, we need SD high
speed mode online tuning.

In addition, we added mmc_sd_switch() and mmc_send_status() to the
header file to allow it to be usable by the driver.

Signed-off-by: Wenchao Chen <wenchao.chen@unisoc.com>
Link: https://lore.kernel.org/r/20230825091743.15613-3-wenchao.chen@unisoc.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-sprd.c | 149 ++++++++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)

diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c
index 28639d6621fd..649ae075e229 100644
--- a/drivers/mmc/host/sdhci-sprd.c
+++ b/drivers/mmc/host/sdhci-sprd.c
@@ -9,6 +9,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/highmem.h>
 #include <linux/iopoll.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_gpio.h>
@@ -72,6 +74,11 @@
 #define SDHCI_SPRD_CLK_DEF_RATE		26000000
 #define SDHCI_SPRD_PHY_DLL_CLK		52000000
 
+#define SDHCI_SPRD_MAX_RANGE		0xff
+#define SDHCI_SPRD_CMD_DLY_MASK		GENMASK(15, 8)
+#define SDHCI_SPRD_POSRD_DLY_MASK	GENMASK(23, 16)
+#define SDHCI_SPRD_CPST_EN		GENMASK(27, 24)
+
 struct sdhci_sprd_host {
 	u32 version;
 	struct clk *clk_sdio;
@@ -85,6 +92,11 @@ struct sdhci_sprd_host {
 	u32 phy_delay[MMC_TIMING_MMC_HS400 + 2];
 };
 
+enum sdhci_sprd_tuning_type {
+	SDHCI_SPRD_TUNING_SD_HS_CMD,
+	SDHCI_SPRD_TUNING_SD_HS_DATA,
+};
+
 struct sdhci_sprd_phy_cfg {
 	const char *property;
 	u8 timing;
@@ -532,6 +544,138 @@ static void sdhci_sprd_hs400_enhanced_strobe(struct mmc_host *mmc,
 		     SDHCI_SPRD_REG_32_DLL_DLY);
 }
 
+static int mmc_send_tuning_cmd(struct mmc_card *card)
+{
+	return mmc_send_status(card, NULL);
+}
+
+static int mmc_send_tuning_data(struct mmc_card *card)
+{
+	u8 *status;
+	int ret;
+
+	status = kmalloc(64, GFP_KERNEL);
+	if (!status)
+		return -ENOMEM;
+
+	ret = mmc_sd_switch(card, 0, 0, 0, status);
+
+	kfree(status);
+
+	return ret;
+}
+
+static int sdhci_sprd_get_best_clk_sample(struct mmc_host *mmc, u8 *value)
+{
+	int range_end = SDHCI_SPRD_MAX_RANGE;
+	int range_length = 0;
+	int middle_range = 0;
+	int count = 0;
+	int i;
+
+	for (i = 0; i <= SDHCI_SPRD_MAX_RANGE; i++) {
+		if (value[i]) {
+			pr_debug("%s: tuning ok: %d\n", mmc_hostname(mmc), i);
+			count++;
+		} else {
+			pr_debug("%s: tuning fail: %d\n", mmc_hostname(mmc), i);
+			if (range_length < count) {
+				range_length = count;
+				range_end = i - 1;
+				count = 0;
+			}
+		}
+	}
+
+	if (!count)
+		return -EIO;
+
+	if (count > range_length) {
+		range_length = count;
+		range_end = i - 1;
+	}
+
+	middle_range = range_end - (range_length - 1) / 2;
+
+	return middle_range;
+}
+
+static int sdhci_sprd_tuning(struct mmc_host *mmc, struct mmc_card *card,
+			enum sdhci_sprd_tuning_type type)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	struct sdhci_sprd_host *sprd_host = TO_SPRD_HOST(host);
+	u32 *p = sprd_host->phy_delay;
+	u32 dll_cfg, dll_dly;
+	int best_clk_sample;
+	int err = 0;
+	u8 *value;
+	int i;
+
+	value = kmalloc(SDHCI_SPRD_MAX_RANGE + 1, GFP_KERNEL);
+	if (!value)
+		return -ENOMEM;
+
+	sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
+
+	dll_cfg = sdhci_readl(host, SDHCI_SPRD_REG_32_DLL_CFG);
+	dll_cfg &= ~SDHCI_SPRD_CPST_EN;
+	sdhci_writel(host, dll_cfg, SDHCI_SPRD_REG_32_DLL_CFG);
+
+	dll_dly = p[mmc->ios.timing];
+
+	for (i = 0; i <= SDHCI_SPRD_MAX_RANGE; i++) {
+		if (type == SDHCI_SPRD_TUNING_SD_HS_CMD) {
+			dll_dly &= ~SDHCI_SPRD_CMD_DLY_MASK;
+			dll_dly |= ((i << 8) & SDHCI_SPRD_CMD_DLY_MASK);
+		} else {
+			dll_dly &= ~SDHCI_SPRD_POSRD_DLY_MASK;
+			dll_dly |= ((i << 16) & SDHCI_SPRD_POSRD_DLY_MASK);
+		}
+
+		sdhci_writel(host, dll_dly, SDHCI_SPRD_REG_32_DLL_DLY);
+
+		if (type == SDHCI_SPRD_TUNING_SD_HS_CMD)
+			value[i] = !mmc_send_tuning_cmd(card);
+		else
+			value[i] = !mmc_send_tuning_data(card);
+	}
+
+	best_clk_sample = sdhci_sprd_get_best_clk_sample(mmc, value);
+	if (best_clk_sample < 0) {
+		dev_err(mmc_dev(host->mmc), "all tuning phase fail!\n");
+		goto out;
+	}
+
+	if (type == SDHCI_SPRD_TUNING_SD_HS_CMD) {
+		p[mmc->ios.timing] &= ~SDHCI_SPRD_CMD_DLY_MASK;
+		p[mmc->ios.timing] |= ((best_clk_sample << 8) & SDHCI_SPRD_CMD_DLY_MASK);
+	} else {
+		p[mmc->ios.timing] &= ~(SDHCI_SPRD_POSRD_DLY_MASK);
+		p[mmc->ios.timing] |= ((best_clk_sample << 16) & SDHCI_SPRD_POSRD_DLY_MASK);
+	}
+
+	pr_debug("%s: the best clk sample %d, delay value 0x%08x\n",
+			mmc_hostname(host->mmc), best_clk_sample, p[mmc->ios.timing]);
+
+out:
+	sdhci_writel(host, p[mmc->ios.timing], SDHCI_SPRD_REG_32_DLL_DLY);
+
+	kfree(value);
+
+	return err;
+}
+
+static int sdhci_sprd_prepare_sd_hs_cmd_tuning(struct mmc_host *mmc, struct mmc_card *card)
+{
+	return sdhci_sprd_tuning(mmc, card, SDHCI_SPRD_TUNING_SD_HS_CMD);
+}
+
+static int sdhci_sprd_execute_sd_hs_data_tuning(struct mmc_host *mmc, struct mmc_card *card)
+{
+	return sdhci_sprd_tuning(mmc, card, SDHCI_SPRD_TUNING_SD_HS_DATA);
+}
+
 static void sdhci_sprd_phy_param_parse(struct sdhci_sprd_host *sprd_host,
 				       struct device_node *np)
 {
@@ -576,6 +720,11 @@ static int sdhci_sprd_probe(struct platform_device *pdev)
 	host->mmc_host_ops.request = sdhci_sprd_request;
 	host->mmc_host_ops.hs400_enhanced_strobe =
 		sdhci_sprd_hs400_enhanced_strobe;
+	host->mmc_host_ops.prepare_sd_hs_tuning =
+		sdhci_sprd_prepare_sd_hs_cmd_tuning;
+	host->mmc_host_ops.execute_sd_hs_tuning =
+		sdhci_sprd_execute_sd_hs_data_tuning;
+
 	/*
 	 * We can not use the standard ops to change and detect the voltage
 	 * signal for Spreadtrum SD host controller, since our voltage regulator

From 21b61fe48c2fc43d98ebb67a1f3832e0478fa523 Mon Sep 17 00:00:00 2001
From: Jijie Shao <shaojijie@huawei.com>
Date: Thu, 24 Aug 2023 10:41:35 +0800
Subject: [PATCH 1885/2047] drivers/perf: hisi: Update HiSilicon PMU
 maintainers

Since Guangbin and Shaokun have left HiSilicon and will no longer
maintain the drivers, update the maintainer information and
thanks for their work.

Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20230824024135.1291459-1-shaojijie@huawei.com
[will: left the HNS3 title as-is to avoid the churn of resorting the entries]
Signed-off-by: Will Deacon <will@kernel.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index d516295978a4..ce4d209a5bf5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9306,7 +9306,7 @@ F:	drivers/crypto/hisilicon/hpre/hpre_crypto.c
 F:	drivers/crypto/hisilicon/hpre/hpre_main.c
 
 HISILICON HNS3 PMU DRIVER
-M:	Guangbin Huang <huangguangbin2@huawei.com>
+M:	Jijie Shao <shaojijie@huawei.com>
 S:	Supported
 F:	Documentation/admin-guide/perf/hns3-pmu.rst
 F:	drivers/perf/hisilicon/hns3_pmu.c
@@ -9344,7 +9344,7 @@ F:	Documentation/devicetree/bindings/net/hisilicon*.txt
 F:	drivers/net/ethernet/hisilicon/
 
 HISILICON PMU DRIVER
-M:	Shaokun Zhang <zhangshaokun@hisilicon.com>
+M:	Yicong Yang <yangyicong@hisilicon.com>
 M:	Jonathan Cameron <jonathan.cameron@huawei.com>
 S:	Supported
 W:	http://www.hisilicon.com

From fab9516f02b418e37d3cde6c21c316085262aece Mon Sep 17 00:00:00 2001
From: Adam Guerin <adam.guerin@intel.com>
Date: Mon, 14 Aug 2023 16:52:30 +0100
Subject: [PATCH 1886/2047] crypto: qat - fix crypto capability detection for
 4xxx

When extending the capability detection logic for 4xxx devices the
SMx algorithms were accidentally missed.
Enable these SMx capabilities by default for QAT GEN4 devices.

Check for device variants where the SMx algorithms are explicitly
disabled by the GEN4 hardware. This is indicated in fusectl1
register.
Mask out SM3 and SM4 based on a bit specific to those algorithms.
Mask out SM2 if the PKE slice is not present.

Fixes: 4b44d28c715d ("crypto: qat - extend crypto capability detection for 4xxx")
Signed-off-by: Adam Guerin <adam.guerin@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Fiona Trahe <fiona.trahe@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 9 +++++++++
 drivers/crypto/intel/qat/qat_common/icp_qat_hw.h     | 5 ++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
index 268a1f7694fc..dd4464b7e00b 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -225,6 +225,8 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev)
 			  ICP_ACCEL_CAPABILITIES_HKDF |
 			  ICP_ACCEL_CAPABILITIES_CHACHA_POLY |
 			  ICP_ACCEL_CAPABILITIES_AESGCM_SPC |
+			  ICP_ACCEL_CAPABILITIES_SM3 |
+			  ICP_ACCEL_CAPABILITIES_SM4 |
 			  ICP_ACCEL_CAPABILITIES_AES_V2;
 
 	/* A set bit in fusectl1 means the feature is OFF in this SKU */
@@ -248,12 +250,19 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev)
 		capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CIPHER;
 	}
 
+	if (fusectl1 & ICP_ACCEL_4XXX_MASK_SMX_SLICE) {
+		capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_SM3;
+		capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_SM4;
+	}
+
 	capabilities_asym = ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC |
 			  ICP_ACCEL_CAPABILITIES_CIPHER |
+			  ICP_ACCEL_CAPABILITIES_SM2 |
 			  ICP_ACCEL_CAPABILITIES_ECEDMONT;
 
 	if (fusectl1 & ICP_ACCEL_4XXX_MASK_PKE_SLICE) {
 		capabilities_asym &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC;
+		capabilities_asym &= ~ICP_ACCEL_CAPABILITIES_SM2;
 		capabilities_asym &= ~ICP_ACCEL_CAPABILITIES_ECEDMONT;
 	}
 
diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_hw.h b/drivers/crypto/intel/qat/qat_common/icp_qat_hw.h
index a65059e56248..0c8883e2ccc6 100644
--- a/drivers/crypto/intel/qat/qat_common/icp_qat_hw.h
+++ b/drivers/crypto/intel/qat/qat_common/icp_qat_hw.h
@@ -97,7 +97,10 @@ enum icp_qat_capabilities_mask {
 	ICP_ACCEL_CAPABILITIES_SHA3_EXT = BIT(15),
 	ICP_ACCEL_CAPABILITIES_AESGCM_SPC = BIT(16),
 	ICP_ACCEL_CAPABILITIES_CHACHA_POLY = BIT(17),
-	/* Bits 18-21 are currently reserved */
+	ICP_ACCEL_CAPABILITIES_SM2 = BIT(18),
+	ICP_ACCEL_CAPABILITIES_SM3 = BIT(19),
+	ICP_ACCEL_CAPABILITIES_SM4 = BIT(20),
+	/* Bit 21 is currently reserved */
 	ICP_ACCEL_CAPABILITIES_CNV_INTEGRITY = BIT(22),
 	ICP_ACCEL_CAPABILITIES_CNV_INTEGRITY64 = BIT(23),
 	ICP_ACCEL_CAPABILITIES_LZ4_COMPRESSION = BIT(24),

From ef5b52a631f8c18353e80ccab8408b963305510c Mon Sep 17 00:00:00 2001
From: Thore Sommer <public@thson.de>
Date: Tue, 15 Aug 2023 14:29:42 +0300
Subject: [PATCH 1887/2047] X.509: if signature is unsupported skip validation

When the hash algorithm for the signature is not available the digest size
is 0 and the signature in the certificate is marked as unsupported.

When validating a self-signed certificate, this needs to be checked,
because otherwise trying to validate the signature will fail with an
warning:

Loading compiled-in X.509 certificates
WARNING: CPU: 0 PID: 1 at crypto/rsa-pkcs1pad.c:537 \
pkcs1pad_verify+0x46/0x12c
...
Problem loading in-kernel X.509 certificate (-22)

Signed-off-by: Thore Sommer <public@thson.de>
Cc: stable@vger.kernel.org # v4.7+
Fixes: 6c2dc5ae4ab7 ("X.509: Extract signature digest and make self-signed cert checks earlier")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/asymmetric_keys/x509_public_key.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index 6fdfc82e23a8..7c71db3ac23d 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -130,6 +130,11 @@ int x509_check_for_self_signed(struct x509_certificate *cert)
 			goto out;
 	}
 
+	if (cert->unsupported_sig) {
+		ret = 0;
+		goto out;
+	}
+
 	ret = public_key_verify_signature(cert->pub, cert->sig);
 	if (ret < 0) {
 		if (ret == -ENOPKG) {

From 9687daf785c035b4b6f73c998e6ee75bb0697c7e Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Thu, 17 Aug 2023 21:48:54 +0800
Subject: [PATCH 1888/2047] crypto: chelsio - Remove unused declarations

These declarations are not implemented now, remove them.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/chelsio/chcr_core.h   | 1 -
 drivers/crypto/chelsio/chcr_crypto.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index f7c8bb95a71b..5e9d568131fe 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -133,7 +133,6 @@ int start_crypto(void);
 int stop_crypto(void);
 int chcr_uld_rx_handler(void *handle, const __be64 *rsp,
 			const struct pkt_gl *pgl);
-int chcr_uld_tx_handler(struct sk_buff *skb, struct net_device *dev);
 int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 		     int err);
 #endif /* __CHCR_CORE_H__ */
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 7f88ddb08631..1d693b8436e6 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -344,7 +344,6 @@ void chcr_add_cipher_dst_ent(struct skcipher_request *req,
 			     struct cpl_rx_phys_dsgl *phys_cpl,
 			     struct  cipher_wr_param *wrparam,
 			     unsigned short qid);
-int sg_nents_len_skip(struct scatterlist *sg, u64 len, u64 skip);
 void chcr_add_hash_src_ent(struct ahash_request *req, struct ulptx_sgl *ulptx,
 			   struct hash_wr_param *param);
 int chcr_hash_dma_map(struct device *dev, struct ahash_request *req);

From 85b9bf9a514d991fcecb118d0a8a35e754ff9265 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Thu, 24 Aug 2023 13:33:20 +0200
Subject: [PATCH 1889/2047] Revert "dt-bindings: crypto: qcom,prng: Add SM8450"

This reverts commit b9296bb41275 ("dt-bindings: crypto: qcom,prng: Add SM8450"),
since the RNG HW on the SM8450 SoC is in fact a True Random Number Generator,
a more appropriate compatible should be instead as reported at [1].

[1] https://lore.kernel.org/all/20230818161720.3644424-1-quic_omprsing@quicinc.com/

Suggested-by: Om Prakash Singh <quic_omprsing@quicinc.com>
Suggested-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../devicetree/bindings/crypto/qcom,prng.yaml | 24 ++++---------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/Documentation/devicetree/bindings/crypto/qcom,prng.yaml b/Documentation/devicetree/bindings/crypto/qcom,prng.yaml
index 36b0ebd9a44b..bb42f4588b40 100644
--- a/Documentation/devicetree/bindings/crypto/qcom,prng.yaml
+++ b/Documentation/devicetree/bindings/crypto/qcom,prng.yaml
@@ -11,13 +11,9 @@ maintainers:
 
 properties:
   compatible:
-    oneOf:
-      - enum:
-          - qcom,prng  # 8916 etc.
-          - qcom,prng-ee  # 8996 and later using EE
-      - items:
-          - const: qcom,sm8450-prng-ee
-          - const: qcom,prng-ee
+    enum:
+      - qcom,prng  # 8916 etc.
+      - qcom,prng-ee  # 8996 and later using EE
 
   reg:
     maxItems: 1
@@ -32,18 +28,8 @@ properties:
 required:
   - compatible
   - reg
-
-allOf:
-  - if:
-      not:
-        properties:
-          compatible:
-            contains:
-              const: qcom,sm8450-prng-ee
-    then:
-      required:
-        - clocks
-        - clock-names
+  - clocks
+  - clock-names
 
 additionalProperties: false
 

From ea05787136256b078fab3cc7db33caf24678ee47 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Thu, 24 Aug 2023 17:00:17 +0100
Subject: [PATCH 1890/2047] gpio: pca953x: Use i2c_get_match_data()

Replace device_get_match_data() and id lookup for retrieving match data
by i2c_get_match_data().

Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-pca953x.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index a806a3c1b801..929a2d075fe4 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -1051,7 +1051,6 @@ out:
 
 static int pca953x_probe(struct i2c_client *client)
 {
-	const struct i2c_device_id *i2c_id = i2c_client_get_device_id(client);
 	struct pca953x_platform_data *pdata;
 	struct pca953x_chip *chip;
 	int irq_base = 0;
@@ -1090,6 +1089,9 @@ static int pca953x_probe(struct i2c_client *client)
 	}
 
 	chip->client = client;
+	chip->driver_data = (uintptr_t)i2c_get_match_data(client);
+	if (!chip->driver_data)
+		return -ENODEV;
 
 	reg = devm_regulator_get(&client->dev, "vcc");
 	if (IS_ERR(reg))
@@ -1102,20 +1104,6 @@ static int pca953x_probe(struct i2c_client *client)
 	}
 	chip->regulator = reg;
 
-	if (i2c_id) {
-		chip->driver_data = i2c_id->driver_data;
-	} else {
-		const void *match;
-
-		match = device_get_match_data(&client->dev);
-		if (!match) {
-			ret = -ENODEV;
-			goto err_exit;
-		}
-
-		chip->driver_data = (uintptr_t)match;
-	}
-
 	i2c_set_clientdata(client, chip);
 
 	pca953x_setup_gpio(chip, chip->driver_data & PCA_GPIO_MASK);

From 1b961a75abfc043b2e5a8f97e1939d12242f4346 Mon Sep 17 00:00:00 2001
From: Liam Beguin <liambeguin@gmail.com>
Date: Thu, 24 Aug 2023 19:16:26 -0400
Subject: [PATCH 1891/2047] dt-bindings: gpio: pca95xx: document new tca9538
 chip

Add a compatible for tca9538.

Signed-off-by: Liam Beguin <liambeguin@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
[Bartosz: tweaked the commit message]
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml b/Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml
index fa116148ee90..99febb8ea1b6 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml
+++ b/Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml
@@ -66,6 +66,7 @@ properties:
               - ti,tca6408
               - ti,tca6416
               - ti,tca6424
+              - ti,tca9538
               - ti,tca9539
               - ti,tca9554
 

From 3d0957b07e27abd3237b1fe0c7f06485ba80852f Mon Sep 17 00:00:00 2001
From: Liam Beguin <liambeguin@gmail.com>
Date: Thu, 24 Aug 2023 19:16:25 -0400
Subject: [PATCH 1892/2047] gpio: pca953x: add support for TCA9538

The TCA9538 is an 8 bit version of the already supported TCA9539.
This chip also has interrupt support.

Signed-off-by: Liam Beguin <liambeguin@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-pca953x.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 929a2d075fe4..bdd50a78e414 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -108,6 +108,7 @@ static const struct i2c_device_id pca953x_id[] = {
 	{ "tca6408", 8  | PCA953X_TYPE | PCA_INT, },
 	{ "tca6416", 16 | PCA953X_TYPE | PCA_INT, },
 	{ "tca6424", 24 | PCA953X_TYPE | PCA_INT, },
+	{ "tca9538", 8  | PCA953X_TYPE | PCA_INT, },
 	{ "tca9539", 16 | PCA953X_TYPE | PCA_INT, },
 	{ "tca9554", 8  | PCA953X_TYPE | PCA_INT, },
 	{ "xra1202", 8  | PCA953X_TYPE },
@@ -1342,6 +1343,7 @@ static const struct of_device_id pca953x_dt_ids[] = {
 	{ .compatible = "ti,tca6408", .data = OF_953X( 8, PCA_INT), },
 	{ .compatible = "ti,tca6416", .data = OF_953X(16, PCA_INT), },
 	{ .compatible = "ti,tca6424", .data = OF_953X(24, PCA_INT), },
+	{ .compatible = "ti,tca9538", .data = OF_953X( 8, PCA_INT), },
 	{ .compatible = "ti,tca9539", .data = OF_953X(16, PCA_INT), },
 
 	{ .compatible = "onnn,cat9554", .data = OF_953X( 8, PCA_INT), },

From ac3f9c9f1b37edaa7d1a9b908bc79d843955a1a2 Mon Sep 17 00:00:00 2001
From: Steve Rutherford <srutherford@google.com>
Date: Thu, 24 Aug 2023 15:37:31 -0700
Subject: [PATCH 1893/2047] x86/sev: Make enc_dec_hypercall() accept a size
 instead of npages

enc_dec_hypercall() accepted a page count instead of a size, which
forced its callers to round up. As a result, non-page aligned
vaddrs caused pages to be spuriously marked as decrypted via the
encryption status hypercall, which in turn caused consistent
corruption of pages during live migration. Live migration requires
accurate encryption status information to avoid migrating pages
from the wrong perspective.

Fixes: 064ce6c550a0 ("mm: x86: Invoke hypercall when page encryption status is changed")
Signed-off-by: Steve Rutherford <srutherford@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Tested-by: Ben Hillier <bhillier@google.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20230824223731.2055016-1-srutherford@google.com
---
 arch/x86/include/asm/mem_encrypt.h |  6 +++---
 arch/x86/kernel/kvm.c              |  4 +---
 arch/x86/mm/mem_encrypt_amd.c      | 13 ++++++-------
 3 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 7f97a8a97e24..473b16d73b47 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -50,8 +50,8 @@ void __init sme_enable(struct boot_params *bp);
 
 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
 int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
-void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
-					    bool enc);
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr,
+					    unsigned long size, bool enc);
 
 void __init mem_encrypt_free_decrypted_mem(void);
 
@@ -85,7 +85,7 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
 static inline int __init
 early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
 static inline void __init
-early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
+early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) {}
 
 static inline void mem_encrypt_free_decrypted_mem(void) { }
 
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1cceac5984da..526d4da3dcd4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -966,10 +966,8 @@ static void __init kvm_init_platform(void)
 		 * Ensure that _bss_decrypted section is marked as decrypted in the
 		 * shared pages list.
 		 */
-		nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
-					PAGE_SIZE);
 		early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
-						nr_pages, 0);
+						__end_bss_decrypted - __start_bss_decrypted, 0);
 
 		/*
 		 * If not booted using EFI, enable Live migration support.
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 54bbd5163e8d..6faea41e99b6 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -288,11 +288,10 @@ static bool amd_enc_cache_flush_required(void)
 	return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT);
 }
 
-static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
+static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
 {
 #ifdef CONFIG_PARAVIRT
-	unsigned long sz = npages << PAGE_SHIFT;
-	unsigned long vaddr_end = vaddr + sz;
+	unsigned long vaddr_end = vaddr + size;
 
 	while (vaddr < vaddr_end) {
 		int psize, pmask, level;
@@ -342,7 +341,7 @@ static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool e
 		snp_set_memory_private(vaddr, npages);
 
 	if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
-		enc_dec_hypercall(vaddr, npages, enc);
+		enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc);
 
 	return true;
 }
@@ -466,7 +465,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
 
 	ret = 0;
 
-	early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
+	early_set_mem_enc_dec_hypercall(start, size, enc);
 out:
 	__flush_tlb_all();
 	return ret;
@@ -482,9 +481,9 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
 	return early_set_memory_enc_dec(vaddr, size, true);
 }
 
-void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
 {
-	enc_dec_hypercall(vaddr, npages, enc);
+	enc_dec_hypercall(vaddr, size, enc);
 }
 
 void __init sme_early_init(void)

From ce6e94722523f85e265455eb3296ebdbe996b3b5 Mon Sep 17 00:00:00 2001
From: Balamanikandan Gunasundar <balamanikandan.gunasundar@microchip.com>
Date: Fri, 25 Aug 2023 15:21:55 +0530
Subject: [PATCH 1894/2047] mmc: atmel-mci: Convert to gpio descriptors

Replace the legacy GPIO APIs with gpio descriptor consumer interface.
To maintain backward compatibility, we rely on the "cd-inverted"
property to manage the invertion flag instead of GPIO property.

Signed-off-by: Balamanikandan Gunasundar <balamanikandan.gunasundar@microchip.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230825095157.76073-2-balamanikandan.gunasundar@microchip.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/atmel-mci.c | 77 +++++++++++++++++-------------------
 include/linux/atmel-mci.h    |  4 +-
 2 files changed, 39 insertions(+), 42 deletions(-)

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 1f89d02c4685..892d29fed9ec 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -11,14 +11,14 @@
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
+#include <linux/irq.h>
+#include <linux/gpio/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <linux/seq_file.h>
@@ -387,8 +387,8 @@ struct atmel_mci_slot {
 #define ATMCI_CARD_NEED_INIT	1
 #define ATMCI_SHUTDOWN		2
 
-	int			detect_pin;
-	int			wp_pin;
+	struct gpio_desc        *detect_pin;
+	struct gpio_desc	*wp_pin;
 	bool			detect_is_active_high;
 
 	struct timer_list	detect_timer;
@@ -636,7 +636,10 @@ atmci_of_init(struct platform_device *pdev)
 			pdata->slot[slot_id].bus_width = 1;
 
 		pdata->slot[slot_id].detect_pin =
-			of_get_named_gpio(cnp, "cd-gpios", 0);
+			devm_fwnode_gpiod_get(&pdev->dev, of_fwnode_handle(cnp),
+					      "cd", GPIOD_IN, "cd-gpios");
+		if (IS_ERR(pdata->slot[slot_id].detect_pin))
+			pdata->slot[slot_id].detect_pin = NULL;
 
 		pdata->slot[slot_id].detect_is_active_high =
 			of_property_read_bool(cnp, "cd-inverted");
@@ -645,7 +648,10 @@ atmci_of_init(struct platform_device *pdev)
 			of_property_read_bool(cnp, "non-removable");
 
 		pdata->slot[slot_id].wp_pin =
-			of_get_named_gpio(cnp, "wp-gpios", 0);
+			devm_fwnode_gpiod_get(&pdev->dev, of_fwnode_handle(cnp),
+					      "wp", GPIOD_IN, "wp-gpios");
+		if (IS_ERR(pdata->slot[slot_id].wp_pin))
+			pdata->slot[slot_id].wp_pin = NULL;
 	}
 
 	return pdata;
@@ -1508,8 +1514,8 @@ static int atmci_get_ro(struct mmc_host *mmc)
 	int			read_only = -ENOSYS;
 	struct atmel_mci_slot	*slot = mmc_priv(mmc);
 
-	if (gpio_is_valid(slot->wp_pin)) {
-		read_only = gpio_get_value(slot->wp_pin);
+	if (slot->wp_pin) {
+		read_only = gpiod_get_value(slot->wp_pin);
 		dev_dbg(&mmc->class_dev, "card is %s\n",
 				read_only ? "read-only" : "read-write");
 	}
@@ -1522,8 +1528,8 @@ static int atmci_get_cd(struct mmc_host *mmc)
 	int			present = -ENOSYS;
 	struct atmel_mci_slot	*slot = mmc_priv(mmc);
 
-	if (gpio_is_valid(slot->detect_pin)) {
-		present = !(gpio_get_value(slot->detect_pin) ^
+	if (slot->detect_pin) {
+		present = !(gpiod_get_raw_value(slot->detect_pin) ^
 			    slot->detect_is_active_high);
 		dev_dbg(&mmc->class_dev, "card is %spresent\n",
 				present ? "" : "not ");
@@ -1636,8 +1642,8 @@ static void atmci_detect_change(struct timer_list *t)
 	if (test_bit(ATMCI_SHUTDOWN, &slot->flags))
 		return;
 
-	enable_irq(gpio_to_irq(slot->detect_pin));
-	present = !(gpio_get_value(slot->detect_pin) ^
+	enable_irq(gpiod_to_irq(slot->detect_pin));
+	present = !(gpiod_get_raw_value(slot->detect_pin) ^
 		    slot->detect_is_active_high);
 	present_old = test_bit(ATMCI_CARD_PRESENT, &slot->flags);
 
@@ -2236,9 +2242,9 @@ static int atmci_init_slot(struct atmel_mci *host,
 	dev_dbg(&mmc->class_dev,
 	        "slot[%u]: bus_width=%u, detect_pin=%d, "
 		"detect_is_active_high=%s, wp_pin=%d\n",
-		id, slot_data->bus_width, slot_data->detect_pin,
+		id, slot_data->bus_width, desc_to_gpio(slot_data->detect_pin),
 		slot_data->detect_is_active_high ? "true" : "false",
-		slot_data->wp_pin);
+		desc_to_gpio(slot_data->wp_pin));
 
 	mmc->ops = &atmci_ops;
 	mmc->f_min = DIV_ROUND_UP(host->bus_hz, 512);
@@ -2274,31 +2280,24 @@ static int atmci_init_slot(struct atmel_mci *host,
 
 	/* Assume card is present initially */
 	set_bit(ATMCI_CARD_PRESENT, &slot->flags);
-	if (gpio_is_valid(slot->detect_pin)) {
-		if (devm_gpio_request(&host->pdev->dev, slot->detect_pin,
-				      "mmc_detect")) {
-			dev_dbg(&mmc->class_dev, "no detect pin available\n");
-			slot->detect_pin = -EBUSY;
-		} else if (gpio_get_value(slot->detect_pin) ^
-				slot->detect_is_active_high) {
+	if (slot->detect_pin) {
+		if (gpiod_get_raw_value(slot->detect_pin) ^
+		    slot->detect_is_active_high) {
 			clear_bit(ATMCI_CARD_PRESENT, &slot->flags);
 		}
+	} else {
+		dev_dbg(&mmc->class_dev, "no detect pin available\n");
 	}
 
-	if (!gpio_is_valid(slot->detect_pin)) {
+	if (!slot->detect_pin) {
 		if (slot_data->non_removable)
 			mmc->caps |= MMC_CAP_NONREMOVABLE;
 		else
 			mmc->caps |= MMC_CAP_NEEDS_POLL;
 	}
 
-	if (gpio_is_valid(slot->wp_pin)) {
-		if (devm_gpio_request(&host->pdev->dev, slot->wp_pin,
-				      "mmc_wp")) {
-			dev_dbg(&mmc->class_dev, "no WP pin available\n");
-			slot->wp_pin = -EBUSY;
-		}
-	}
+	if (!slot->wp_pin)
+		dev_dbg(&mmc->class_dev, "no WP pin available\n");
 
 	host->slot[id] = slot;
 	mmc_regulator_get_supply(mmc);
@@ -2308,18 +2307,18 @@ static int atmci_init_slot(struct atmel_mci *host,
 		return ret;
 	}
 
-	if (gpio_is_valid(slot->detect_pin)) {
+	if (slot->detect_pin) {
 		timer_setup(&slot->detect_timer, atmci_detect_change, 0);
 
-		ret = request_irq(gpio_to_irq(slot->detect_pin),
-				atmci_detect_interrupt,
-				IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-				"mmc-detect", slot);
+		ret = request_irq(gpiod_to_irq(slot->detect_pin),
+				  atmci_detect_interrupt,
+				  IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
+				  "mmc-detect", slot);
 		if (ret) {
 			dev_dbg(&mmc->class_dev,
 				"could not request IRQ %d for detect pin\n",
-				gpio_to_irq(slot->detect_pin));
-			slot->detect_pin = -EBUSY;
+				gpiod_to_irq(slot->detect_pin));
+			slot->detect_pin = NULL;
 		}
 	}
 
@@ -2338,10 +2337,8 @@ static void atmci_cleanup_slot(struct atmel_mci_slot *slot,
 
 	mmc_remove_host(slot->mmc);
 
-	if (gpio_is_valid(slot->detect_pin)) {
-		int pin = slot->detect_pin;
-
-		free_irq(gpio_to_irq(pin), slot);
+	if (slot->detect_pin) {
+		free_irq(gpiod_to_irq(slot->detect_pin), slot);
 		del_timer_sync(&slot->detect_timer);
 	}
 
diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h
index 1491af38cc6e..017e7d8f6126 100644
--- a/include/linux/atmel-mci.h
+++ b/include/linux/atmel-mci.h
@@ -26,8 +26,8 @@
  */
 struct mci_slot_pdata {
 	unsigned int		bus_width;
-	int			detect_pin;
-	int			wp_pin;
+	struct gpio_desc        *detect_pin;
+	struct gpio_desc	*wp_pin;
 	bool			detect_is_active_high;
 	bool			non_removable;
 };

From d2c6d518c21d73d96616e08a19eccd4642f4bafa Mon Sep 17 00:00:00 2001
From: Balamanikandan Gunasundar <balamanikandan.gunasundar@microchip.com>
Date: Fri, 25 Aug 2023 15:21:56 +0530
Subject: [PATCH 1895/2047] mmc: atmel-mci: move atmel MCI header file

Move the contents of linux/atmel-mci.h into
drivers/mmc/host/atmel-mci.c as it is only used in one file

Signed-off-by: Balamanikandan Gunasundar <balamanikandan.gunasundar@microchip.com>
Reviewed-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Link: https://lore.kernel.org/r/20230825095157.76073-3-balamanikandan.gunasundar@microchip.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/atmel-mci.c | 39 +++++++++++++++++++++++++++++-
 include/linux/atmel-mci.h    | 46 ------------------------------------
 2 files changed, 38 insertions(+), 47 deletions(-)
 delete mode 100644 include/linux/atmel-mci.h

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 892d29fed9ec..6f815818dd22 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -29,7 +29,6 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/sdio.h>
 
-#include <linux/atmel-mci.h>
 #include <linux/atmel_pdc.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
@@ -39,6 +38,8 @@
 #include <asm/io.h>
 #include <asm/unaligned.h>
 
+#define ATMCI_MAX_NR_SLOTS	2
+
 /*
  * Superset of MCI IP registers integrated in Atmel AT91 Processor
  * Registers and bitfields marked with [2] are only available in MCI2
@@ -200,6 +201,42 @@ enum atmci_pdc_buf {
 	PDC_SECOND_BUF,
 };
 
+/**
+ * struct mci_slot_pdata - board-specific per-slot configuration
+ * @bus_width: Number of data lines wired up the slot
+ * @detect_pin: GPIO pin wired to the card detect switch
+ * @wp_pin: GPIO pin wired to the write protect sensor
+ * @detect_is_active_high: The state of the detect pin when it is active
+ * @non_removable: The slot is not removable, only detect once
+ *
+ * If a given slot is not present on the board, @bus_width should be
+ * set to 0. The other fields are ignored in this case.
+ *
+ * Any pins that aren't available should be set to a negative value.
+ *
+ * Note that support for multiple slots is experimental -- some cards
+ * might get upset if we don't get the clock management exactly right.
+ * But in most cases, it should work just fine.
+ */
+struct mci_slot_pdata {
+	unsigned int		bus_width;
+	struct gpio_desc        *detect_pin;
+	struct gpio_desc	*wp_pin;
+	bool			detect_is_active_high;
+	bool			non_removable;
+};
+
+/**
+ * struct mci_platform_data - board-specific MMC/SDcard configuration
+ * @dma_slave: DMA slave interface to use in data transfers.
+ * @slot: Per-slot configuration data.
+ */
+struct mci_platform_data {
+	void			*dma_slave;
+	dma_filter_fn		dma_filter;
+	struct mci_slot_pdata	slot[ATMCI_MAX_NR_SLOTS];
+};
+
 struct atmel_mci_caps {
 	bool    has_dma_conf_reg;
 	bool    has_pdc;
diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h
deleted file mode 100644
index 017e7d8f6126..000000000000
--- a/include/linux/atmel-mci.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LINUX_ATMEL_MCI_H
-#define __LINUX_ATMEL_MCI_H
-
-#include <linux/types.h>
-#include <linux/dmaengine.h>
-
-#define ATMCI_MAX_NR_SLOTS	2
-
-/**
- * struct mci_slot_pdata - board-specific per-slot configuration
- * @bus_width: Number of data lines wired up the slot
- * @detect_pin: GPIO pin wired to the card detect switch
- * @wp_pin: GPIO pin wired to the write protect sensor
- * @detect_is_active_high: The state of the detect pin when it is active
- * @non_removable: The slot is not removable, only detect once
- *
- * If a given slot is not present on the board, @bus_width should be
- * set to 0. The other fields are ignored in this case.
- *
- * Any pins that aren't available should be set to a negative value.
- *
- * Note that support for multiple slots is experimental -- some cards
- * might get upset if we don't get the clock management exactly right.
- * But in most cases, it should work just fine.
- */
-struct mci_slot_pdata {
-	unsigned int		bus_width;
-	struct gpio_desc        *detect_pin;
-	struct gpio_desc	*wp_pin;
-	bool			detect_is_active_high;
-	bool			non_removable;
-};
-
-/**
- * struct mci_platform_data - board-specific MMC/SDcard configuration
- * @dma_slave: DMA slave interface to use in data transfers.
- * @slot: Per-slot configuration data.
- */
-struct mci_platform_data {
-	void			*dma_slave;
-	dma_filter_fn		dma_filter;
-	struct mci_slot_pdata	slot[ATMCI_MAX_NR_SLOTS];
-};
-
-#endif /* __LINUX_ATMEL_MCI_H */

From 98ac9e4fc07f101c435f1ab6b395b6245b096a68 Mon Sep 17 00:00:00 2001
From: Balamanikandan Gunasundar <balamanikandan.gunasundar@microchip.com>
Date: Fri, 25 Aug 2023 15:21:57 +0530
Subject: [PATCH 1896/2047] mmc: atmel-mci: Move card detect gpio polarity
 quirk to gpiolib

The polarity of the card detection gpio is handled by the "cd-inverted"
property in the device tree. Move this inversion logic to gpiolib to avoid
reading the gpio raw value.

Signed-off-by: Balamanikandan Gunasundar <balamanikandan.gunasundar@microchip.com>
Suggested-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230825095157.76073-4-balamanikandan.gunasundar@microchip.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/gpio/gpiolib-of.c    | 20 ++++++++++++++++++--
 drivers/mmc/host/atmel-mci.c | 33 +++++++++++++++------------------
 2 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 1436cdb5fa26..9694eb5afa21 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -209,6 +209,8 @@ static void of_gpio_set_polarity_by_property(const struct device_node *np,
 					     const char *propname,
 					     enum of_gpio_flags *flags)
 {
+	const struct device_node *np_compat = np;
+	const struct device_node *np_propname = np;
 	static const struct {
 		const char *compatible;
 		const char *gpio_propname;
@@ -252,15 +254,29 @@ static void of_gpio_set_polarity_by_property(const struct device_node *np,
 #if IS_ENABLED(CONFIG_REGULATOR_GPIO)
 		{ "regulator-gpio",    "enable-gpio",  "enable-active-high" },
 		{ "regulator-gpio",    "enable-gpios", "enable-active-high" },
+#endif
+#if IS_ENABLED(CONFIG_MMC_ATMELMCI)
+		{ "atmel,hsmci",       "cd-gpios",     "cd-inverted" },
 #endif
 	};
 	unsigned int i;
 	bool active_high;
 
+#if IS_ENABLED(CONFIG_MMC_ATMELMCI)
+	/*
+	 * The Atmel HSMCI has compatible property in the parent node and
+	 * gpio property in a child node
+	 */
+	if (of_device_is_compatible(np->parent, "atmel,hsmci")) {
+		np_compat = np->parent;
+		np_propname = np;
+	}
+#endif
+
 	for (i = 0; i < ARRAY_SIZE(gpios); i++) {
-		if (of_device_is_compatible(np, gpios[i].compatible) &&
+		if (of_device_is_compatible(np_compat, gpios[i].compatible) &&
 		    !strcmp(propname, gpios[i].gpio_propname)) {
-			active_high = of_property_read_bool(np,
+			active_high = of_property_read_bool(np_propname,
 						gpios[i].polarity_propname);
 			of_gpio_quirk_polarity(np, active_high, flags);
 			break;
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 6f815818dd22..535783c43105 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -206,7 +206,6 @@ enum atmci_pdc_buf {
  * @bus_width: Number of data lines wired up the slot
  * @detect_pin: GPIO pin wired to the card detect switch
  * @wp_pin: GPIO pin wired to the write protect sensor
- * @detect_is_active_high: The state of the detect pin when it is active
  * @non_removable: The slot is not removable, only detect once
  *
  * If a given slot is not present on the board, @bus_width should be
@@ -222,7 +221,6 @@ struct mci_slot_pdata {
 	unsigned int		bus_width;
 	struct gpio_desc        *detect_pin;
 	struct gpio_desc	*wp_pin;
-	bool			detect_is_active_high;
 	bool			non_removable;
 };
 
@@ -405,7 +403,6 @@ struct atmel_mci {
  *	available.
  * @wp_pin: GPIO pin used for card write protect sending, or negative
  *	if not available.
- * @detect_is_active_high: The state of the detect pin when it is active.
  * @detect_timer: Timer used for debouncing @detect_pin interrupts.
  */
 struct atmel_mci_slot {
@@ -426,7 +423,6 @@ struct atmel_mci_slot {
 
 	struct gpio_desc        *detect_pin;
 	struct gpio_desc	*wp_pin;
-	bool			detect_is_active_high;
 
 	struct timer_list	detect_timer;
 };
@@ -644,6 +640,7 @@ atmci_of_init(struct platform_device *pdev)
 	struct device_node *cnp;
 	struct mci_platform_data *pdata;
 	u32 slot_id;
+	int err;
 
 	if (!np) {
 		dev_err(&pdev->dev, "device node not found\n");
@@ -675,11 +672,12 @@ atmci_of_init(struct platform_device *pdev)
 		pdata->slot[slot_id].detect_pin =
 			devm_fwnode_gpiod_get(&pdev->dev, of_fwnode_handle(cnp),
 					      "cd", GPIOD_IN, "cd-gpios");
-		if (IS_ERR(pdata->slot[slot_id].detect_pin))
+		err = PTR_ERR_OR_ZERO(pdata->slot[slot_id].detect_pin);
+		if (err) {
+			if (err != -ENOENT)
+				return ERR_PTR(err);
 			pdata->slot[slot_id].detect_pin = NULL;
-
-		pdata->slot[slot_id].detect_is_active_high =
-			of_property_read_bool(cnp, "cd-inverted");
+		}
 
 		pdata->slot[slot_id].non_removable =
 			of_property_read_bool(cnp, "non-removable");
@@ -687,8 +685,12 @@ atmci_of_init(struct platform_device *pdev)
 		pdata->slot[slot_id].wp_pin =
 			devm_fwnode_gpiod_get(&pdev->dev, of_fwnode_handle(cnp),
 					      "wp", GPIOD_IN, "wp-gpios");
-		if (IS_ERR(pdata->slot[slot_id].wp_pin))
+		err = PTR_ERR_OR_ZERO(pdata->slot[slot_id].wp_pin);
+		if (err) {
+			if (err != -ENOENT)
+				return ERR_PTR(err);
 			pdata->slot[slot_id].wp_pin = NULL;
+		}
 	}
 
 	return pdata;
@@ -1566,8 +1568,7 @@ static int atmci_get_cd(struct mmc_host *mmc)
 	struct atmel_mci_slot	*slot = mmc_priv(mmc);
 
 	if (slot->detect_pin) {
-		present = !(gpiod_get_raw_value(slot->detect_pin) ^
-			    slot->detect_is_active_high);
+		present = gpiod_get_value_cansleep(slot->detect_pin);
 		dev_dbg(&mmc->class_dev, "card is %spresent\n",
 				present ? "" : "not ");
 	}
@@ -1680,8 +1681,7 @@ static void atmci_detect_change(struct timer_list *t)
 		return;
 
 	enable_irq(gpiod_to_irq(slot->detect_pin));
-	present = !(gpiod_get_raw_value(slot->detect_pin) ^
-		    slot->detect_is_active_high);
+	present = gpiod_get_value_cansleep(slot->detect_pin);
 	present_old = test_bit(ATMCI_CARD_PRESENT, &slot->flags);
 
 	dev_vdbg(&slot->mmc->class_dev, "detect change: %d (was %d)\n",
@@ -2272,7 +2272,6 @@ static int atmci_init_slot(struct atmel_mci *host,
 	slot->host = host;
 	slot->detect_pin = slot_data->detect_pin;
 	slot->wp_pin = slot_data->wp_pin;
-	slot->detect_is_active_high = slot_data->detect_is_active_high;
 	slot->sdc_reg = sdc_reg;
 	slot->sdio_irq = sdio_irq;
 
@@ -2280,7 +2279,7 @@ static int atmci_init_slot(struct atmel_mci *host,
 	        "slot[%u]: bus_width=%u, detect_pin=%d, "
 		"detect_is_active_high=%s, wp_pin=%d\n",
 		id, slot_data->bus_width, desc_to_gpio(slot_data->detect_pin),
-		slot_data->detect_is_active_high ? "true" : "false",
+		!gpiod_is_active_low(slot_data->detect_pin) ? "true" : "false",
 		desc_to_gpio(slot_data->wp_pin));
 
 	mmc->ops = &atmci_ops;
@@ -2318,10 +2317,8 @@ static int atmci_init_slot(struct atmel_mci *host,
 	/* Assume card is present initially */
 	set_bit(ATMCI_CARD_PRESENT, &slot->flags);
 	if (slot->detect_pin) {
-		if (gpiod_get_raw_value(slot->detect_pin) ^
-		    slot->detect_is_active_high) {
+		if (!gpiod_get_value_cansleep(slot->detect_pin))
 			clear_bit(ATMCI_CARD_PRESENT, &slot->flags);
-		}
 	} else {
 		dev_dbg(&mmc->class_dev, "no detect pin available\n");
 	}

From 70332ec735202ceb874407be29f68919ab898dce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Thu, 24 Aug 2023 16:28:23 +0300
Subject: [PATCH 1897/2047] hwmon: (via686a) Do PCI error checks on own line
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of if conditions with line splits, use the usual error handling
pattern with a separate variable to improve readability.

No functional changes intended.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20230824132832.78705-6-ilpo.jarvinen@linux.intel.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/via686a.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/hwmon/via686a.c b/drivers/hwmon/via686a.c
index 37d7374896f6..407933d6e425 100644
--- a/drivers/hwmon/via686a.c
+++ b/drivers/hwmon/via686a.c
@@ -855,16 +855,17 @@ static int via686a_pci_probe(struct pci_dev *dev,
 				       const struct pci_device_id *id)
 {
 	u16 address, val;
+	int ret;
 
 	if (force_addr) {
 		address = force_addr & ~(VIA686A_EXTENT - 1);
 		dev_warn(&dev->dev, "Forcing ISA address 0x%x\n", address);
-		if (PCIBIOS_SUCCESSFUL !=
-		    pci_write_config_word(dev, VIA686A_BASE_REG, address | 1))
+		ret = pci_write_config_word(dev, VIA686A_BASE_REG, address | 1);
+		if (ret != PCIBIOS_SUCCESSFUL)
 			return -ENODEV;
 	}
-	if (PCIBIOS_SUCCESSFUL !=
-	    pci_read_config_word(dev, VIA686A_BASE_REG, &val))
+	ret = pci_read_config_word(dev, VIA686A_BASE_REG, &val);
+	if (ret != PCIBIOS_SUCCESSFUL)
 		return -ENODEV;
 
 	address = val & ~(VIA686A_EXTENT - 1);
@@ -874,8 +875,8 @@ static int via686a_pci_probe(struct pci_dev *dev,
 		return -ENODEV;
 	}
 
-	if (PCIBIOS_SUCCESSFUL !=
-	    pci_read_config_word(dev, VIA686A_ENABLE_REG, &val))
+	ret = pci_read_config_word(dev, VIA686A_ENABLE_REG, &val);
+	if (ret != PCIBIOS_SUCCESSFUL)
 		return -ENODEV;
 	if (!(val & 0x0001)) {
 		if (!force_addr) {
@@ -886,9 +887,8 @@ static int via686a_pci_probe(struct pci_dev *dev,
 		}
 
 		dev_warn(&dev->dev, "Enabling sensors\n");
-		if (PCIBIOS_SUCCESSFUL !=
-		    pci_write_config_word(dev, VIA686A_ENABLE_REG,
-					  val | 0x0001))
+		ret = pci_write_config_word(dev, VIA686A_ENABLE_REG, val | 0x1);
+		if (ret != PCIBIOS_SUCCESSFUL)
 			return -ENODEV;
 	}
 

From 1e3c3a7989495512f2b180adb4f22d500cb79960 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Thu, 24 Aug 2023 16:28:24 +0300
Subject: [PATCH 1898/2047] hwmon: (vt8231) Do PCI error checks on own line
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of if conditions with line splits, use the usual error handling
pattern with a separate variable to improve readability.

No functional changes intended.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20230824132832.78705-7-ilpo.jarvinen@linux.intel.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/vt8231.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/hwmon/vt8231.c b/drivers/hwmon/vt8231.c
index b7c6392ba673..16bc16d33cd1 100644
--- a/drivers/hwmon/vt8231.c
+++ b/drivers/hwmon/vt8231.c
@@ -971,13 +971,15 @@ static int vt8231_pci_probe(struct pci_dev *dev,
 				const struct pci_device_id *id)
 {
 	u16 address, val;
+	int ret;
+
 	if (force_addr) {
 		address = force_addr & 0xff00;
 		dev_warn(&dev->dev, "Forcing ISA address 0x%x\n",
 			 address);
 
-		if (PCIBIOS_SUCCESSFUL !=
-		    pci_write_config_word(dev, VT8231_BASE_REG, address | 1))
+		ret = pci_write_config_word(dev, VT8231_BASE_REG, address | 1);
+		if (ret != PCIBIOS_SUCCESSFUL)
 			return -ENODEV;
 	}
 
@@ -997,9 +999,8 @@ static int vt8231_pci_probe(struct pci_dev *dev,
 
 	if (!(val & 0x0001)) {
 		dev_warn(&dev->dev, "enabling sensors\n");
-		if (PCIBIOS_SUCCESSFUL !=
-			pci_write_config_word(dev, VT8231_ENABLE_REG,
-							val | 0x0001))
+		ret = pci_write_config_word(dev, VT8231_ENABLE_REG, val | 0x1);
+		if (ret != PCIBIOS_SUCCESSFUL)
 			return -ENODEV;
 	}
 

From a1f38987dd1cd2fce4a6a00fc7377c3d556a240f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Thu, 24 Aug 2023 16:28:31 +0300
Subject: [PATCH 1899/2047] hwmon: (sis5595) Do PCI error checks on own line
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of if conditions with line splits, use the usual error handling
pattern with a separate variable to improve readability. Handle error
print with a label instead of trying to chain everything into a single
if condition.

No functional changes intended.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20230824132832.78705-14-ilpo.jarvinen@linux.intel.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/sis5595.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/drivers/hwmon/sis5595.c b/drivers/hwmon/sis5595.c
index b0b05fd12221..0a0479501e11 100644
--- a/drivers/hwmon/sis5595.c
+++ b/drivers/hwmon/sis5595.c
@@ -798,7 +798,7 @@ static int sis5595_pci_probe(struct pci_dev *dev,
 {
 	u16 address;
 	u8 enable;
-	int *i;
+	int *i, err;
 
 	for (i = blacklist; *i != 0; i++) {
 		struct pci_dev *d;
@@ -818,8 +818,8 @@ static int sis5595_pci_probe(struct pci_dev *dev,
 		pci_write_config_word(dev, SIS5595_BASE_REG, force_addr);
 	}
 
-	if (PCIBIOS_SUCCESSFUL !=
-	    pci_read_config_word(dev, SIS5595_BASE_REG, &address)) {
+	err = pci_read_config_word(dev, SIS5595_BASE_REG, &address);
+	if (err != PCIBIOS_SUCCESSFUL) {
 		dev_err(&dev->dev, "Failed to read ISA address\n");
 		return -ENODEV;
 	}
@@ -836,22 +836,23 @@ static int sis5595_pci_probe(struct pci_dev *dev,
 		return -ENODEV;
 	}
 
-	if (PCIBIOS_SUCCESSFUL !=
-	    pci_read_config_byte(dev, SIS5595_ENABLE_REG, &enable)) {
+	err = pci_read_config_byte(dev, SIS5595_ENABLE_REG, &enable);
+	if (err != PCIBIOS_SUCCESSFUL) {
 		dev_err(&dev->dev, "Failed to read enable register\n");
 		return -ENODEV;
 	}
 	if (!(enable & 0x80)) {
-		if ((PCIBIOS_SUCCESSFUL !=
-		     pci_write_config_byte(dev, SIS5595_ENABLE_REG,
-					   enable | 0x80))
-		 || (PCIBIOS_SUCCESSFUL !=
-		     pci_read_config_byte(dev, SIS5595_ENABLE_REG, &enable))
-		 || (!(enable & 0x80))) {
-			/* doesn't work for some chips! */
-			dev_err(&dev->dev, "Failed to enable HWM device\n");
-			return -ENODEV;
-		}
+		err = pci_write_config_byte(dev, SIS5595_ENABLE_REG, enable | 0x80);
+		if (err != PCIBIOS_SUCCESSFUL)
+			goto enable_fail;
+
+		err = pci_read_config_byte(dev, SIS5595_ENABLE_REG, &enable);
+		if (err != PCIBIOS_SUCCESSFUL)
+			goto enable_fail;
+
+		/* doesn't work for some chips! */
+		if (!(enable & 0x80))
+			goto enable_fail;
 	}
 
 	if (platform_driver_register(&sis5595_driver)) {
@@ -871,6 +872,10 @@ static int sis5595_pci_probe(struct pci_dev *dev,
 	 */
 	return -ENODEV;
 
+enable_fail:
+	dev_err(&dev->dev, "Failed to enable HWM device\n");
+	goto exit;
+
 exit_unregister:
 	pci_dev_put(dev);
 	platform_driver_unregister(&sis5595_driver);

From dadca53dd948f70dbfd9af90aff72ebd27d3fc53 Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Thu, 24 Aug 2023 16:40:06 +0000
Subject: [PATCH 1900/2047] hwmon: (mlxreg-fan) Extend number of supported fans

Some new big modular systems can be equipped with up to 24 fans.
Extend maximum number of fans accordingly.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Link: https://lore.kernel.org/r/20230824164006.26868-1-vadimp@nvidia.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/mlxreg-fan.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/mlxreg-fan.c b/drivers/hwmon/mlxreg-fan.c
index c2a96468c9b4..a5f89aab3fb4 100644
--- a/drivers/hwmon/mlxreg-fan.c
+++ b/drivers/hwmon/mlxreg-fan.c
@@ -12,7 +12,7 @@
 #include <linux/regmap.h>
 #include <linux/thermal.h>
 
-#define MLXREG_FAN_MAX_TACHO		14
+#define MLXREG_FAN_MAX_TACHO		24
 #define MLXREG_FAN_MAX_PWM		4
 #define MLXREG_FAN_PWM_NOT_CONNECTED	0xff
 #define MLXREG_FAN_MAX_STATE		10
@@ -300,6 +300,16 @@ static const struct hwmon_channel_info * const mlxreg_fan_hwmon_info[] = {
 			   HWMON_F_INPUT | HWMON_F_FAULT,
 			   HWMON_F_INPUT | HWMON_F_FAULT,
 			   HWMON_F_INPUT | HWMON_F_FAULT,
+			   HWMON_F_INPUT | HWMON_F_FAULT,
+			   HWMON_F_INPUT | HWMON_F_FAULT,
+			   HWMON_F_INPUT | HWMON_F_FAULT,
+			   HWMON_F_INPUT | HWMON_F_FAULT,
+			   HWMON_F_INPUT | HWMON_F_FAULT,
+			   HWMON_F_INPUT | HWMON_F_FAULT,
+			   HWMON_F_INPUT | HWMON_F_FAULT,
+			   HWMON_F_INPUT | HWMON_F_FAULT,
+			   HWMON_F_INPUT | HWMON_F_FAULT,
+			   HWMON_F_INPUT | HWMON_F_FAULT,
 			   HWMON_F_INPUT | HWMON_F_FAULT),
 	HWMON_CHANNEL_INFO(pwm,
 			   HWMON_PWM_INPUT,

From d103337e38e7e64c3d915029e947b1cb0b512737 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Thu, 24 Aug 2023 21:44:54 +0100
Subject: [PATCH 1901/2047] hwmon: (tmp513) Fix the channel number in
 tmp51x_is_visible()

The supported channels for this driver are {0..3}. Fix the incorrect
channel in tmp51x_is_visible().

Reported-by: Guenter Roeck <linux@roeck-us.net>
Closes: https://lore.kernel.org/all/ea0eccc0-a29f-41e4-9049-a1a13f8b16f1@roeck-us.net/
Fixes: 59dfa75e5d82 ("hwmon: Add driver for Texas Instruments TMP512/513 sensor chips.")
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://lore.kernel.org/r/20230824204456.401580-2-biju.das.jz@bp.renesas.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/tmp513.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/tmp513.c b/drivers/hwmon/tmp513.c
index 7db5d0fc24a4..8ee6e02b02e3 100644
--- a/drivers/hwmon/tmp513.c
+++ b/drivers/hwmon/tmp513.c
@@ -434,7 +434,7 @@ static umode_t tmp51x_is_visible(const void *_data,
 
 	switch (type) {
 	case hwmon_temp:
-		if (data->id == tmp512 && channel == 4)
+		if (data->id == tmp512 && channel == 3)
 			return 0;
 		switch (attr) {
 		case hwmon_temp_input:

From 919a83d020a8dfa1411c1dc1cff23a833f0f5268 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Thu, 24 Aug 2023 21:44:55 +0100
Subject: [PATCH 1902/2047] hwmon: (tmp513) Simplify probe()

Simpilfy probe() by replacing device_get_match_data() and id lookup for
retrieving match data by i2c_get_match_data().

Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://lore.kernel.org/r/20230824204456.401580-3-biju.das.jz@bp.renesas.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/tmp513.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/hwmon/tmp513.c b/drivers/hwmon/tmp513.c
index 8ee6e02b02e3..9a180b1030c9 100644
--- a/drivers/hwmon/tmp513.c
+++ b/drivers/hwmon/tmp513.c
@@ -720,10 +720,7 @@ static int tmp51x_probe(struct i2c_client *client)
 	if (!data)
 		return -ENOMEM;
 
-	if (client->dev.of_node)
-		data->id = (uintptr_t)device_get_match_data(&client->dev);
-	else
-		data->id = i2c_match_id(tmp51x_id, client)->driver_data;
+	data->id = (uintptr_t)i2c_get_match_data(client);
 
 	ret = tmp51x_configure(dev, data);
 	if (ret < 0) {

From b8e2771b7f880a0b899b380ef15476160c693b59 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Fri, 25 Aug 2023 23:40:26 +0800
Subject: [PATCH 1903/2047] LoongArch: Remove redundant "source
 drivers/firmware/Kconfig"

In drivers/Kconfig, drivers/firmware/Kconfig is sourced for all ports so
there is no need to source it in the port-specific Kconfig file.  And
sourcing it here also caused the "Firmware Drivers" menu appeared two
times: one in the "Device Drivers" menu, another in the toplevel menu.
This is really puzzling so remove it.

Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/Kconfig | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index e71d5bf2cee0..465759f6b0ed 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -662,5 +662,3 @@ source "kernel/power/Kconfig"
 source "drivers/acpi/Kconfig"
 
 endmenu
-
-source "drivers/firmware/Kconfig"

From 3f301dc292eb122eff61b8b2906e519154b0327f Mon Sep 17 00:00:00 2001
From: WANG Xuerui <git@xen0n.name>
Date: Fri, 25 Aug 2023 23:40:26 +0800
Subject: [PATCH 1904/2047] LoongArch: Replace -ffreestanding with
 finer-grained -fno-builtin's

As explained by Nick in the original issue: the kernel usually does a
good job of providing library helpers that have similar semantics as
their ordinary userspace libc equivalents, but -ffreestanding disables
such libcall optimization and other related features in the compiler,
which can lead to unexpected things such as CONFIG_FORTIFY_SOURCE not
working (!).

However, due to the desire for better control over unaligned accesses
with respect to CONFIG_ARCH_STRICT_ALIGN, and also for avoiding the
GCC bug https://gcc.gnu.org/PR109465, we do want to still disable
optimizations for the memory libcalls (memcpy, memmove and memset for
now). Use finer-grained -fno-builtin-* toggles to achieve this without
losing source fortification and other libcall optimizations.

Closes: https://github.com/ClangBuiltLinux/linux/issues/1897
Reported-by: Nathan Chancellor <nathan@kernel.org>
Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: WANG Xuerui <git@xen0n.name>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index b1e5db51b61c..ef87bab46754 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -83,8 +83,8 @@ KBUILD_CFLAGS_KERNEL		+= -fPIE
 LDFLAGS_vmlinux			+= -static -pie --no-dynamic-linker -z notext
 endif
 
-cflags-y += -ffreestanding
 cflags-y += $(call cc-option, -mno-check-zero-division)
+cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
 
 load-y		= 0x9000000000200000
 bootvars-y	= VMLINUX_LOAD_ADDRESS=$(load-y)

From 347aa8dec220c7692981e1a175391ea7620abfeb Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 25 Aug 2023 23:40:26 +0800
Subject: [PATCH 1905/2047] LoongArch: Remove unneeded #include <asm/export.h>

There is no EXPORT_SYMBOL() line there, hence #include <asm/export.h>
is unneeded.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/kernel/mcount_dyn.S | 1 -
 arch/loongarch/lib/unaligned.S     | 1 -
 arch/loongarch/mm/tlbex.S          | 1 -
 3 files changed, 3 deletions(-)

diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S
index e16ab0b98e5a..482aa553aa2d 100644
--- a/arch/loongarch/kernel/mcount_dyn.S
+++ b/arch/loongarch/kernel/mcount_dyn.S
@@ -3,7 +3,6 @@
  * Copyright (C) 2022 Loongson Technology Corporation Limited
  */
 
-#include <asm/export.h>
 #include <asm/ftrace.h>
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S
index 9177fd638f07..185f82d85810 100644
--- a/arch/loongarch/lib/unaligned.S
+++ b/arch/loongarch/lib/unaligned.S
@@ -9,7 +9,6 @@
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 .L_fixup_handle_unaligned:
diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
index 4ad78703de6f..ca17dd3a1915 100644
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -3,7 +3,6 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/loongarch.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>

From 55b46ff9396caca6ebb73a1d5a00f6f8698391cc Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 25 Aug 2023 23:40:26 +0800
Subject: [PATCH 1906/2047] LoongArch: Replace #include <asm/export.h> with
 #include <linux/export.h>

Commit ddb5cdbafaaad ("kbuild: generate KSYMTAB entries by modpost")
deprecated <asm/export.h>, which is now a wrapper of <linux/export.h>.

Replace #include <asm/export.h> with #include <linux/export.h>.

After all the <asm/export.h> lines are converted, <asm/export.h> and
<asm-generic/export.h> will be removed.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/kernel/fpu.S     | 2 +-
 arch/loongarch/kernel/mcount.S  | 2 +-
 arch/loongarch/lib/clear_user.S | 2 +-
 arch/loongarch/lib/copy_user.S  | 2 +-
 arch/loongarch/lib/memcpy.S     | 2 +-
 arch/loongarch/lib/memmove.S    | 2 +-
 arch/loongarch/lib/memset.S     | 2 +-
 arch/loongarch/mm/page.S        | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index f3df5f0a4509..501094a09f5d 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -6,12 +6,12 @@
  *
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
 #include <asm/asm-offsets.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 #include <asm/fpregdef.h>
 #include <asm/loongarch.h>
 #include <asm/regdef.h>
diff --git a/arch/loongarch/kernel/mcount.S b/arch/loongarch/kernel/mcount.S
index cb8e5803de4b..3015896016a0 100644
--- a/arch/loongarch/kernel/mcount.S
+++ b/arch/loongarch/kernel/mcount.S
@@ -5,7 +5,7 @@
  * Copyright (C) 2022 Loongson Technology Corporation Limited
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/ftrace.h>
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
index 9dcf71719387..0790eadce166 100644
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -3,12 +3,12 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 .irp to, 0, 1, 2, 3, 4, 5, 6, 7
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index fecd08cad702..bfe3d2793d00 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -3,12 +3,12 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 .irp to, 0, 1, 2, 3, 4, 5, 6, 7
diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S
index 39ce6621c704..cc30b3b6252f 100644
--- a/arch/loongarch/lib/memcpy.S
+++ b/arch/loongarch/lib/memcpy.S
@@ -3,11 +3,11 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 SYM_FUNC_START(memcpy)
diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S
index 45b725ba7867..7dc76d1484b6 100644
--- a/arch/loongarch/lib/memmove.S
+++ b/arch/loongarch/lib/memmove.S
@@ -3,11 +3,11 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 SYM_FUNC_START(memmove)
diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S
index b39c6194e3ae..3f20f7996e8e 100644
--- a/arch/loongarch/lib/memset.S
+++ b/arch/loongarch/lib/memset.S
@@ -3,11 +3,11 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 .macro fill_to_64 r0
diff --git a/arch/loongarch/mm/page.S b/arch/loongarch/mm/page.S
index 4c874a7af0ad..7ad76551d313 100644
--- a/arch/loongarch/mm/page.S
+++ b/arch/loongarch/mm/page.S
@@ -2,9 +2,9 @@
 /*
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/page.h>
 #include <asm/regdef.h>
 

From a746ceb1f32cc375e983c4bc40378a72ff26c766 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 25 Aug 2023 23:40:26 +0800
Subject: [PATCH 1907/2047] LoongArch: Remove <asm/export.h>

All *.S files under arch/loongarch/ have been converted to include
<linux/export.h> instead of <asm/export.h>.

Remove <asm/export.h>.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/Kbuild | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 6b222f227342..93783fa24f6e 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += dma-contiguous.h
-generic-y += export.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += early_ioremap.h

From 6933c11fb501a40681d43336b3e0eee9df2abee0 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Fri, 25 Aug 2023 23:40:26 +0800
Subject: [PATCH 1908/2047] LoongArch: Do not kill the task in die() if
 notify_die() returns NOTIFY_STOP

If notify_die() returns NOTIFY_STOP, honor the return value from the
handler chain invocation in die() and return without killing the task
as, through a debugger, the fault may have been fixed. It makes sense
even if ignoring the event will make the system unstable: by allowing
access through a debugger it has been compromised already anyway. It
makes our port consistent with x86, arm64, riscv and csky.

Commit 20c0d2d44029 ("[PATCH] i386: pass proper trap numbers to die
chain handlers") may be the earliest of similar changes.

Link: https://lore.kernel.org/r/43DDF02E.76F0.0078.0@novell.com/
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/ptrace.h |  2 +-
 arch/loongarch/kernel/traps.c       | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h
index 35f0958163ac..164bc90672e4 100644
--- a/arch/loongarch/include/asm/ptrace.h
+++ b/arch/loongarch/include/asm/ptrace.h
@@ -162,7 +162,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long val
 #define instruction_pointer(regs) ((regs)->csr_era)
 #define profile_pc(regs) instruction_pointer(regs)
 
-extern void die(const char *, struct pt_regs *) __noreturn;
+extern void die(const char *, struct pt_regs *);
 
 static inline void die_if_kernel(const char *str, struct pt_regs *regs)
 {
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 8fb5e7a77145..dfb34cc8b7e6 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -383,16 +383,15 @@ void show_registers(struct pt_regs *regs)
 
 static DEFINE_RAW_SPINLOCK(die_lock);
 
-void __noreturn die(const char *str, struct pt_regs *regs)
+void die(const char *str, struct pt_regs *regs)
 {
+	int ret;
 	static int die_counter;
-	int sig = SIGSEGV;
 
 	oops_enter();
 
-	if (notify_die(DIE_OOPS, str, regs, 0, current->thread.trap_nr,
-		       SIGSEGV) == NOTIFY_STOP)
-		sig = 0;
+	ret = notify_die(DIE_OOPS, str, regs, 0,
+			 current->thread.trap_nr, SIGSEGV);
 
 	console_verbose();
 	raw_spin_lock_irq(&die_lock);
@@ -414,7 +413,8 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 	if (panic_on_oops)
 		panic("Fatal exception");
 
-	make_task_dead(sig);
+	if (ret != NOTIFY_STOP)
+		make_task_dead(SIGSEGV);
 }
 
 static inline void setup_vint_size(unsigned int size)

From a038ae7148469ab6cf4afadb155a15d9554a0b59 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Fri, 25 Aug 2023 23:40:26 +0800
Subject: [PATCH 1909/2047] LoongArch: Return earlier in die() if notify_die()
 returns NOTIFY_STOP

After the call to oops_exit(), it should not panic or execute
the crash kernel if the oops is to be suppressed.

Suggested-by: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/kernel/traps.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index dfb34cc8b7e6..89699db45cec 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -404,6 +404,9 @@ void die(const char *str, struct pt_regs *regs)
 
 	oops_exit();
 
+	if (ret == NOTIFY_STOP)
+		return;
+
 	if (regs && kexec_should_crash(current))
 		crash_kexec(regs);
 
@@ -413,8 +416,7 @@ void die(const char *str, struct pt_regs *regs)
 	if (panic_on_oops)
 		panic("Fatal exception");
 
-	if (ret != NOTIFY_STOP)
-		make_task_dead(SIGSEGV);
+	make_task_dead(SIGSEGV);
 }
 
 static inline void setup_vint_size(unsigned int size)

From 8879515e1219857df1f142e23840b91a37cbde88 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Fri, 25 Aug 2023 23:40:26 +0800
Subject: [PATCH 1910/2047] LoongArch: Add identifier names to arguments of
 die() declaration

Add identifier names to arguments of die() declaration in ptrace.h
to fix the following checkpatch warnings:

  WARNING: function definition argument 'const char *' should also have an identifier name
  WARNING: function definition argument 'struct pt_regs *' should also have an identifier name

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/ptrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h
index 164bc90672e4..f3ddaed9ef7f 100644
--- a/arch/loongarch/include/asm/ptrace.h
+++ b/arch/loongarch/include/asm/ptrace.h
@@ -162,7 +162,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long val
 #define instruction_pointer(regs) ((regs)->csr_era)
 #define profile_pc(regs) instruction_pointer(regs)
 
-extern void die(const char *, struct pt_regs *);
+extern void die(const char *str, struct pt_regs *regs);
 
 static inline void die_if_kernel(const char *str, struct pt_regs *regs)
 {

From c337c849ab528241897ddfb6f334912ead0bfd66 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Fri, 25 Aug 2023 23:40:38 +0800
Subject: [PATCH 1911/2047] LoongArch: Put the body of play_dead() into
 arch_cpu_idle_dead()

The initial aim is to silence the following objtool warning:

arch/loongarch/kernel/process.o: warning: objtool: arch_cpu_idle_dead() falls through to next function start_thread()

According to tools/objtool/Documentation/objtool.txt, this is because
the last instruction of arch_cpu_idle_dead() is a call to a noreturn
function play_dead(). In order to silence the warning, one simple way
is to add the noreturn function play_dead() to objtool's hard-coded
global_noreturns array, that is to say, just put "NORETURN(play_dead)"
into tools/objtool/noreturns.h, it works well.

But I noticed that play_dead() is only defined once and only called by
arch_cpu_idle_dead(), so put the body of play_dead() into the caller
arch_cpu_idle_dead(), then remove the noreturn function play_dead() is
an alternative way which can reduce the overhead of the function call
at the same time.

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/smp.h | 2 --
 arch/loongarch/kernel/process.c  | 7 -------
 arch/loongarch/kernel/smp.c      | 2 +-
 3 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index 416b653bccb4..66ecb480c894 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -98,8 +98,6 @@ static inline void __cpu_die(unsigned int cpu)
 {
 	loongson_cpu_die(cpu);
 }
-
-extern void __noreturn play_dead(void);
 #endif
 
 #endif /* __ASM_SMP_H */
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index 2e04eb07abb6..4ee1e9d6a65f 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -61,13 +61,6 @@ EXPORT_SYMBOL(__stack_chk_guard);
 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-#ifdef CONFIG_HOTPLUG_CPU
-void __noreturn arch_cpu_idle_dead(void)
-{
-	play_dead();
-}
-#endif
-
 asmlinkage void ret_from_fork(void);
 asmlinkage void ret_from_kernel_thread(void);
 
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 8ea1bbcf13a7..6667b0a90f81 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -317,7 +317,7 @@ void loongson_cpu_die(unsigned int cpu)
 	mb();
 }
 
-void play_dead(void)
+void __noreturn arch_cpu_idle_dead(void)
 {
 	register uint64_t addr;
 	register void (*init_fn)(void);

From 33f0467fe06934d5e4ea6e24ce2b9c65ce618e26 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Thu, 24 Aug 2023 20:46:59 -0700
Subject: [PATCH 1912/2047] kallsyms: Fix kallsyms_selftest failure

Kernel test robot reported a kallsyms_test failure when clang lto is
enabled (thin or full) and CONFIG_KALLSYMS_SELFTEST is also enabled.
I can reproduce in my local environment with the following error message
with thin lto:
  [    1.877897] kallsyms_selftest: Test for 1750th symbol failed: (tsc_cs_mark_unstable) addr=ffffffff81038090
  [    1.877901] kallsyms_selftest: abort

It appears that commit 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes
from promoted global functions") caused the failure. Commit 8cc32a9bbf29
changed cleanup_symbol_name() based on ".llvm." instead of '.' where
".llvm." is appended to a before-lto-optimization local symbol name.
We need to propagate such knowledge in kallsyms_selftest.c as well.

Further more, compare_symbol_name() in kallsyms.c needs change as well.
In scripts/kallsyms.c, kallsyms_names and kallsyms_seqs_of_names are used
to record symbol names themselves and index to symbol names respectively.
For example:
  kallsyms_names:
    ...
    __amd_smn_rw._entry       <== seq 1000
    __amd_smn_rw._entry.5     <== seq 1001
    __amd_smn_rw.llvm.<hash>  <== seq 1002
    ...

kallsyms_seqs_of_names are sorted based on cleanup_symbol_name() through, so
the order in kallsyms_seqs_of_names actually has

  index 1000:   seq 1002   <== __amd_smn_rw.llvm.<hash> (actual symbol comparison using '__amd_smn_rw')
  index 1001:   seq 1000   <== __amd_smn_rw._entry
  index 1002:   seq 1001   <== __amd_smn_rw._entry.5

Let us say at a particular point, at index 1000, symbol '__amd_smn_rw.llvm.<hash>'
is comparing to '__amd_smn_rw._entry' where '__amd_smn_rw._entry' is the one to
search e.g., with function kallsyms_on_each_match_symbol(). The current implementation
will find out '__amd_smn_rw._entry' is less than '__amd_smn_rw.llvm.<hash>' and
then continue to search e.g., index 999 and never found a match although the actual
index 1001 is a match.

To fix this issue, let us do cleanup_symbol_name() first and then do comparison.
In the above case, comparing '__amd_smn_rw' vs '__amd_smn_rw._entry' and
'__amd_smn_rw._entry' being greater than '__amd_smn_rw', the next comparison will
be > index 1000 and eventually index 1001 will be hit an a match is found.

For any symbols not having '.llvm.' substr, there is no functionality change
for compare_symbol_name().

Fixes: 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions")
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202308232200.1c932a90-oliver.sang@intel.com
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Reviewed-by: Song Liu <song@kernel.org>
Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/r/20230825034659.1037627-1-yonghong.song@linux.dev
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/kallsyms.c          | 17 +++++++----------
 kernel/kallsyms_selftest.c | 23 +----------------------
 2 files changed, 8 insertions(+), 32 deletions(-)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 016d997131d4..e12d26c10dba 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -188,16 +188,13 @@ static bool cleanup_symbol_name(char *s)
 
 static int compare_symbol_name(const char *name, char *namebuf)
 {
-	int ret;
-
-	ret = strcmp(name, namebuf);
-	if (!ret)
-		return ret;
-
-	if (cleanup_symbol_name(namebuf) && !strcmp(name, namebuf))
-		return 0;
-
-	return ret;
+	/* The kallsyms_seqs_of_names is sorted based on names after
+	 * cleanup_symbol_name() (see scripts/kallsyms.c) if clang lto is enabled.
+	 * To ensure correct bisection in kallsyms_lookup_names(), do
+	 * cleanup_symbol_name(namebuf) before comparing name and namebuf.
+	 */
+	cleanup_symbol_name(namebuf);
+	return strcmp(name, namebuf);
 }
 
 static unsigned int get_symbol_seq(int index)
diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c
index a2e3745d15c4..e05ddc33a752 100644
--- a/kernel/kallsyms_selftest.c
+++ b/kernel/kallsyms_selftest.c
@@ -196,7 +196,7 @@ static bool match_cleanup_name(const char *s, const char *name)
 	if (!IS_ENABLED(CONFIG_LTO_CLANG))
 		return false;
 
-	p = strchr(s, '.');
+	p = strstr(s, ".llvm.");
 	if (!p)
 		return false;
 
@@ -344,27 +344,6 @@ static int test_kallsyms_basic_function(void)
 			goto failed;
 		}
 
-		/*
-		 * The first '.' may be the initial letter, in which case the
-		 * entire symbol name will be truncated to an empty string in
-		 * cleanup_symbol_name(). Do not test these symbols.
-		 *
-		 * For example:
-		 * cat /proc/kallsyms | awk '{print $3}' | grep -E "^\." | head
-		 * .E_read_words
-		 * .E_leading_bytes
-		 * .E_trailing_bytes
-		 * .E_write_words
-		 * .E_copy
-		 * .str.292.llvm.12122243386960820698
-		 * .str.24.llvm.12122243386960820698
-		 * .str.29.llvm.12122243386960820698
-		 * .str.75.llvm.12122243386960820698
-		 * .str.99.llvm.12122243386960820698
-		 */
-		if (IS_ENABLED(CONFIG_LTO_CLANG) && !namebuf[0])
-			continue;
-
 		lookup_addr = kallsyms_lookup_name(namebuf);
 
 		memset(stat, 0, sizeof(*stat));

From 382d4cd1847517ffcb1800fd462b625db7b2ebea Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 25 Aug 2023 21:50:33 +0200
Subject: [PATCH 1913/2047] lib/clz_ctz.c: Fix __clzdi2() and __ctzdi2() for
 32-bit kernels

The gcc compiler translates on some architectures the 64-bit
__builtin_clzll() function to a call to the libgcc function __clzdi2(),
which should take a 64-bit parameter on 32- and 64-bit platforms.

But in the current kernel code, the built-in __clzdi2() function is
defined to operate (wrongly) on 32-bit parameters if BITS_PER_LONG ==
32, thus the return values on 32-bit kernels are in the range from
[0..31] instead of the expected [0..63] range.

This patch fixes the in-kernel functions __clzdi2() and __ctzdi2() to
take a 64-bit parameter on 32-bit kernels as well, thus it makes the
functions identical for 32- and 64-bit kernels.

This bug went unnoticed since kernel 3.11 for over 10 years, and here
are some possible reasons for that:

 a) Some architectures have assembly instructions to count the bits and
    which are used instead of calling __clzdi2(), e.g. on x86 the bsr
    instruction and on ppc cntlz is used. On such architectures the
    wrong __clzdi2() implementation isn't used and as such the bug has
    no effect and won't be noticed.

 b) Some architectures link to libgcc.a, and the in-kernel weak
    functions get replaced by the correct 64-bit variants from libgcc.a.

 c) __builtin_clzll() and __clzdi2() doesn't seem to be used in many
    places in the kernel, and most likely only in uncritical functions,
    e.g. when printing hex values via seq_put_hex_ll(). The wrong return
    value will still print the correct number, but just in a wrong
    formatting (e.g. with too many leading zeroes).

 d) 32-bit kernels aren't used that much any longer, so they are less
    tested.

A trivial testcase to verify if the currently running 32-bit kernel is
affected by the bug is to look at the output of /proc/self/maps:

Here the kernel uses a correct implementation of __clzdi2():

  root@debian:~# cat /proc/self/maps
  00010000-00019000 r-xp 00000000 08:05 787324     /usr/bin/cat
  00019000-0001a000 rwxp 00009000 08:05 787324     /usr/bin/cat
  0001a000-0003b000 rwxp 00000000 00:00 0          [heap]
  f7551000-f770d000 r-xp 00000000 08:05 794765     /usr/lib/hppa-linux-gnu/libc.so.6
  ...

and this kernel uses the broken implementation of __clzdi2():

  root@debian:~# cat /proc/self/maps
  0000000010000-0000000019000 r-xp 00000000 000000008:000000005 787324  /usr/bin/cat
  0000000019000-000000001a000 rwxp 000000009000 000000008:000000005 787324  /usr/bin/cat
  000000001a000-000000003b000 rwxp 00000000 00:00 0  [heap]
  00000000f73d1000-00000000f758d000 r-xp 00000000 000000008:000000005 794765  /usr/lib/hppa-linux-gnu/libc.so.6
  ...

Signed-off-by: Helge Deller <deller@gmx.de>
Fixes: 4df87bb7b6a22 ("lib: add weak clz/ctz functions")
Cc: Chanho Min <chanho.min@lge.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: stable@vger.kernel.org # v3.11+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/clz_ctz.c | 32 ++++++--------------------------
 1 file changed, 6 insertions(+), 26 deletions(-)

diff --git a/lib/clz_ctz.c b/lib/clz_ctz.c
index 0d3a686b5ba2..fb8c0c5c2bd2 100644
--- a/lib/clz_ctz.c
+++ b/lib/clz_ctz.c
@@ -28,36 +28,16 @@ int __weak __clzsi2(int val)
 }
 EXPORT_SYMBOL(__clzsi2);
 
-int __weak __clzdi2(long val);
-int __weak __ctzdi2(long val);
-#if BITS_PER_LONG == 32
-
-int __weak __clzdi2(long val)
+int __weak __clzdi2(u64 val);
+int __weak __clzdi2(u64 val)
 {
-	return 32 - fls((int)val);
+	return 64 - fls64(val);
 }
 EXPORT_SYMBOL(__clzdi2);
 
-int __weak __ctzdi2(long val)
+int __weak __ctzdi2(u64 val);
+int __weak __ctzdi2(u64 val)
 {
-	return __ffs((u32)val);
+	return __ffs64(val);
 }
 EXPORT_SYMBOL(__ctzdi2);
-
-#elif BITS_PER_LONG == 64
-
-int __weak __clzdi2(long val)
-{
-	return 64 - fls64((u64)val);
-}
-EXPORT_SYMBOL(__clzdi2);
-
-int __weak __ctzdi2(long val)
-{
-	return __ffs64((u64)val);
-}
-EXPORT_SYMBOL(__ctzdi2);
-
-#else
-#error BITS_PER_LONG not 32 or 64
-#endif

From 76903a9648744c081547c91f31ec3917204b74e5 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Fri, 25 Aug 2023 13:20:36 -0700
Subject: [PATCH 1914/2047] kallsyms: Change func signature for
 cleanup_symbol_name()

All users of cleanup_symbol_name() do not use the return value.
So let us change the return value of cleanup_symbol_name() to
'void' to reflect its usage pattern.

Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230825202036.441212-1-yonghong.song@linux.dev
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/kallsyms.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index e12d26c10dba..18edd57b5fe8 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -163,12 +163,12 @@ unsigned long kallsyms_sym_address(int idx)
 	return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
 }
 
-static bool cleanup_symbol_name(char *s)
+static void cleanup_symbol_name(char *s)
 {
 	char *res;
 
 	if (!IS_ENABLED(CONFIG_LTO_CLANG))
-		return false;
+		return;
 
 	/*
 	 * LLVM appends various suffixes for local functions and variables that
@@ -178,12 +178,10 @@ static bool cleanup_symbol_name(char *s)
 	 * - foo.llvm.[0-9a-f]+
 	 */
 	res = strstr(s, ".llvm.");
-	if (res) {
+	if (res)
 		*res = '\0';
-		return true;
-	}
 
-	return false;
+	return;
 }
 
 static int compare_symbol_name(const char *name, char *namebuf)

From 5f536ac6a5a7b67351e4e5ae4f9e1e57d31268e6 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Aug 2023 16:59:56 -0700
Subject: [PATCH 1915/2047] LoadPin: Annotate struct
 dm_verity_loadpin_trusted_root_digest with __counted_by

Prepare for the coming implementation by GCC and Clang of the __counted_by
attribute. Flexible array members annotated with __counted_by can have
their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS
(for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family
functions).

As found with Coccinelle[1], add __counted_by for struct dm_verity_loadpin_trusted_root_digest.
Additionally, since the element count member must be set before accessing
the annotated flexible array member, move its initialization earlier.

[1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci

Cc: Alasdair Kergon <agk@redhat.com>
Cc: Mike Snitzer <snitzer@kernel.org>
Cc: dm-devel@redhat.com
Cc: Paul Moore <paul@paul-moore.com>
Cc: James Morris <jmorris@namei.org>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: linux-security-module@vger.kernel.org
Link: https://lore.kernel.org/r/20230817235955.never.762-kees@kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/dm-verity-loadpin.h | 2 +-
 security/loadpin/loadpin.c        | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/dm-verity-loadpin.h b/include/linux/dm-verity-loadpin.h
index 552b817ab102..3ac6dbaeaa37 100644
--- a/include/linux/dm-verity-loadpin.h
+++ b/include/linux/dm-verity-loadpin.h
@@ -12,7 +12,7 @@ extern struct list_head dm_verity_loadpin_trusted_root_digests;
 struct dm_verity_loadpin_trusted_root_digest {
 	struct list_head node;
 	unsigned int len;
-	u8 data[];
+	u8 data[] __counted_by(len);
 };
 
 #if IS_ENABLED(CONFIG_SECURITY_LOADPIN_VERITY)
diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c
index ebae964f7cc9..a9d40456a064 100644
--- a/security/loadpin/loadpin.c
+++ b/security/loadpin/loadpin.c
@@ -336,6 +336,7 @@ static int read_trusted_verity_root_digests(unsigned int fd)
 			rc = -ENOMEM;
 			goto err;
 		}
+		trd->len = len;
 
 		if (hex2bin(trd->data, d, len)) {
 			kfree(trd);
@@ -343,8 +344,6 @@ static int read_trusted_verity_root_digests(unsigned int fd)
 			goto err;
 		}
 
-		trd->len = len;
-
 		list_add_tail(&trd->node, &dm_verity_loadpin_trusted_root_digests);
 	}
 

From a9ac2e18779597f280d68a5b5f5bdd51a34080fa Mon Sep 17 00:00:00 2001
From: Suman Ghosh <sumang@marvell.com>
Date: Thu, 24 Aug 2023 13:40:30 +0530
Subject: [PATCH 1916/2047] octeontx2-pf: Fix PFC TX scheduler free

During PFC TX schedulers free, flag TXSCHQ_FREE_ALL was being set
which caused free up all schedulers other than the PFC schedulers.
This patch fixes that to free only the PFC Tx schedulers.

Fixes: 99c969a83d82 ("octeontx2-pf: Add egress PFC support")
Signed-off-by: Suman Ghosh <sumang@marvell.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20230824081032.436432-2-sumang@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../ethernet/marvell/octeontx2/nic/otx2_common.c  |  1 +
 .../ethernet/marvell/octeontx2/nic/otx2_dcbnl.c   | 15 ++++-----------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 3e1c70c74622..b9712040a0bc 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -804,6 +804,7 @@ void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq)
 
 	mutex_unlock(&pfvf->mbox.lock);
 }
+EXPORT_SYMBOL(otx2_txschq_free_one);
 
 void otx2_txschq_stop(struct otx2_nic *pfvf)
 {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
index ccaf97bb1ce0..6492749dd7c8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
@@ -125,19 +125,12 @@ int otx2_pfc_txschq_alloc(struct otx2_nic *pfvf)
 
 static int otx2_pfc_txschq_stop_one(struct otx2_nic *pfvf, u8 prio)
 {
-	struct nix_txsch_free_req *free_req;
+	int lvl;
 
-	mutex_lock(&pfvf->mbox.lock);
 	/* free PFC TLx nodes */
-	free_req = otx2_mbox_alloc_msg_nix_txsch_free(&pfvf->mbox);
-	if (!free_req) {
-		mutex_unlock(&pfvf->mbox.lock);
-		return -ENOMEM;
-	}
-
-	free_req->flags = TXSCHQ_FREE_ALL;
-	otx2_sync_mbox_msg(&pfvf->mbox);
-	mutex_unlock(&pfvf->mbox.lock);
+	for (lvl = 0; lvl < pfvf->hw.txschq_link_cfg_lvl; lvl++)
+		otx2_txschq_free_one(pfvf, lvl,
+				     pfvf->pfc_schq_list[lvl][prio]);
 
 	pfvf->pfc_alloc_status[prio] = false;
 	return 0;

From 47bcc9c1cf6aa60156c7532983090e86d9d171b6 Mon Sep 17 00:00:00 2001
From: Hariprasad Kelam <hkelam@marvell.com>
Date: Thu, 24 Aug 2023 13:40:31 +0530
Subject: [PATCH 1917/2047] octeontx2-af: CN10KB: fix PFC configuration

Suppose user has enabled pfc with prio 0,1 on a PF netdev(eth0)
	dcb pfc set dev eth0 prio-pfc o:on 1:on
later user enabled pfc priorities 2 and 3 on the VF interface(eth1)
	dcb pfc set dev eth1 prio-pfc 2:on 3:on

Instead of enabling pfc on all priorities (0..3), the driver only
enables on priorities 2,3. This patch corrects the issue by using
the proper CSR address.

Fixes: b9d0fedc6234 ("octeontx2-af: cn10kb: Add RPM_USX MAC support")
Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20230824081032.436432-3-sumang@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
index b4fcb20c3f4f..af21e2030cff 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
@@ -355,8 +355,8 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause,
 
 void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable)
 {
+	u64 cfg, pfc_class_mask_cfg;
 	rpm_t *rpm = rpmd;
-	u64 cfg;
 
 	/* ALL pause frames received are completely ignored */
 	cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
@@ -380,9 +380,11 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable)
 		rpm_write(rpm, 0, RPMX_CMR_CHAN_MSK_OR, ~0ULL);
 
 	/* Disable all PFC classes */
-	cfg = rpm_read(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL);
+	pfc_class_mask_cfg = is_dev_rpm2(rpm) ? RPM2_CMRX_PRT_CBFC_CTL :
+						RPMX_CMRX_PRT_CBFC_CTL;
+	cfg = rpm_read(rpm, lmac_id, pfc_class_mask_cfg);
 	cfg = FIELD_SET(RPM_PFC_CLASS_MASK, 0, cfg);
-	rpm_write(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL, cfg);
+	rpm_write(rpm, lmac_id, pfc_class_mask_cfg, cfg);
 }
 
 int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat)
@@ -605,8 +607,11 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p
 	if (!is_lmac_valid(rpm, lmac_id))
 		return -ENODEV;
 
+	pfc_class_mask_cfg = is_dev_rpm2(rpm) ? RPM2_CMRX_PRT_CBFC_CTL :
+						RPMX_CMRX_PRT_CBFC_CTL;
+
 	cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG);
-	class_en = rpm_read(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL);
+	class_en = rpm_read(rpm, lmac_id, pfc_class_mask_cfg);
 	pfc_en |= FIELD_GET(RPM_PFC_CLASS_MASK, class_en);
 
 	if (rx_pause) {
@@ -635,10 +640,6 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p
 		cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE;
 
 	rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg);
-
-	pfc_class_mask_cfg = is_dev_rpm2(rpm) ? RPM2_CMRX_PRT_CBFC_CTL :
-						RPMX_CMRX_PRT_CBFC_CTL;
-
 	rpm_write(rpm, lmac_id, pfc_class_mask_cfg, class_en);
 
 	return 0;

From 597d0ec0e4ca6a912affea4cc94df08959e9ec74 Mon Sep 17 00:00:00 2001
From: Suman Ghosh <sumang@marvell.com>
Date: Thu, 24 Aug 2023 13:40:32 +0530
Subject: [PATCH 1918/2047] cteonxt2-pf: Fix backpressure config for multiple
 PFC priorities to work simultaneously

MAC (CGX or RPM) asserts backpressure at TL3 or TL2 node of the egress
hierarchical scheduler tree depending on link level config done. If
there are multiple PFC priorities enabled at a time and for all such
flows to backoff, each priority will have to assert backpressure at
different TL3/TL2 scheduler nodes and these flows will need to submit
egress pkts to these nodes.

Current PFC configuration has an issue where in only one backpressure
scheduler node is being allocated which is resulting in only one PFC
priority to work. This patch fixes this issue.

Fixes: 99c969a83d82 ("octeontx2-pf: Add egress PFC support")
Signed-off-by: Suman Ghosh <sumang@marvell.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20230824081032.436432-4-sumang@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
index 6492749dd7c8..bfddbff7bcdf 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
@@ -70,7 +70,7 @@ static int otx2_pfc_txschq_alloc_one(struct otx2_nic *pfvf, u8 prio)
 	 * link config level. These rest of the scheduler can be
 	 * same as hw.txschq_list.
 	 */
-	for (lvl = 0; lvl < pfvf->hw.txschq_link_cfg_lvl; lvl++)
+	for (lvl = 0; lvl <= pfvf->hw.txschq_link_cfg_lvl; lvl++)
 		req->schq[lvl] = 1;
 
 	rc = otx2_sync_mbox_msg(&pfvf->mbox);
@@ -83,7 +83,7 @@ static int otx2_pfc_txschq_alloc_one(struct otx2_nic *pfvf, u8 prio)
 		return PTR_ERR(rsp);
 
 	/* Setup transmit scheduler list */
-	for (lvl = 0; lvl < pfvf->hw.txschq_link_cfg_lvl; lvl++) {
+	for (lvl = 0; lvl <= pfvf->hw.txschq_link_cfg_lvl; lvl++) {
 		if (!rsp->schq[lvl])
 			return -ENOSPC;
 
@@ -128,7 +128,7 @@ static int otx2_pfc_txschq_stop_one(struct otx2_nic *pfvf, u8 prio)
 	int lvl;
 
 	/* free PFC TLx nodes */
-	for (lvl = 0; lvl < pfvf->hw.txschq_link_cfg_lvl; lvl++)
+	for (lvl = 0; lvl <= pfvf->hw.txschq_link_cfg_lvl; lvl++)
 		otx2_txschq_free_one(pfvf, lvl,
 				     pfvf->pfc_schq_list[lvl][prio]);
 

From c4413a20fa6d7c4888009fb7dd391685f196cd36 Mon Sep 17 00:00:00 2001
From: Alex Austin <alex.austin@amd.com>
Date: Thu, 24 Aug 2023 17:46:57 +0100
Subject: [PATCH 1919/2047] sfc: Check firmware supports Ethernet PTP filter

Not all firmware variants support RSS filters. Do not fail all PTP
functionality when raw ethernet PTP filters fail to insert.

Fixes: e4616f64726b ("sfc: support PTP over Ethernet")
Signed-off-by: Alex Austin <alex.austin@amd.com>
Acked-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Link: https://lore.kernel.org/r/20230824164657.42379-1-alex.austin@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/sfc/ptp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 0c40571133cb..00cf6de3bb2b 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -1485,7 +1485,9 @@ static int efx_ptp_insert_multicast_filters(struct efx_nic *efx)
 			goto fail;
 
 		rc = efx_ptp_insert_eth_multicast_filter(efx);
-		if (rc < 0)
+
+		/* Not all firmware variants support this filter */
+		if (rc < 0 && rc != -EPROTONOSUPPORT)
 			goto fail;
 	}
 

From b3d26c5702c7d6c45456326e56d2ccf3f103e60f Mon Sep 17 00:00:00 2001
From: Budimir Markovic <markovicbudimir@gmail.com>
Date: Thu, 24 Aug 2023 01:49:05 -0700
Subject: [PATCH 1920/2047] net/sched: sch_hfsc: Ensure inner classes have fsc
 curve

HFSC assumes that inner classes have an fsc curve, but it is currently
possible for classes without an fsc curve to become parents. This leads
to bugs including a use-after-free.

Don't allow non-root classes without HFSC_FSC to become parents.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Budimir Markovic <markovicbudimir@gmail.com>
Signed-off-by: Budimir Markovic <markovicbudimir@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20230824084905.422-1-markovicbudimir@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/sch_hfsc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 70b0c5873d32..61d52594ff6d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1012,6 +1012,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		if (parent == NULL)
 			return -ENOENT;
 	}
+	if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
+		NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC");
+		return -EINVAL;
+	}
 
 	if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
 		return -EINVAL;

From 91202ce78fcd070982a115f0bf6f328af619aa00 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Thu, 24 Aug 2023 09:17:50 -0700
Subject: [PATCH 1921/2047] pds_core: protect devlink callbacks from fw_down
 state

Don't access structs that have been cleared when in the fw_down
state and the various structs have been cleaned and are waiting
to recover.  This caused a panic on rmmod when already in fw_down
and devlink_param_unregister() tried to check the parameters.

Fixes: 40ced8944536 ("pds_core: devlink params for enabling VIF support")
Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Brett Creeley <brett.creeley@amd.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20230824161754.34264-2-shannon.nelson@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/amd/pds_core/devlink.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
index 9c6b3653c1c7..d9607033bbf2 100644
--- a/drivers/net/ethernet/amd/pds_core/devlink.c
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -10,6 +10,9 @@ pdsc_viftype *pdsc_dl_find_viftype_by_id(struct pdsc *pdsc,
 {
 	int vt;
 
+	if (!pdsc->viftype_status)
+		return NULL;
+
 	for (vt = 0; vt < PDS_DEV_TYPE_MAX; vt++) {
 		if (pdsc->viftype_status[vt].dl_id == dl_id)
 			return &pdsc->viftype_status[vt];

From e48b894a1db7f6ce66bff0402ab21ff9f0e56034 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Thu, 24 Aug 2023 09:17:51 -0700
Subject: [PATCH 1922/2047] pds_core: no health reporter in VF

Make sure the health reporter is set up before we use it in
our devlink health updates, especially since the VF doesn't
set up the health reporter.

Fixes: 25b450c05a49 ("pds_core: add devlink health facilities")
Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Brett Creeley <brett.creeley@amd.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20230824161754.34264-3-shannon.nelson@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/amd/pds_core/core.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index f2c79456d745..383e3311a52c 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -524,7 +524,8 @@ static void pdsc_fw_down(struct pdsc *pdsc)
 	}
 
 	/* Notify clients of fw_down */
-	devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
+	if (pdsc->fw_reporter)
+		devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
 	pdsc_notify(PDS_EVENT_RESET, &reset_event);
 
 	pdsc_stop(pdsc);
@@ -554,8 +555,9 @@ static void pdsc_fw_up(struct pdsc *pdsc)
 
 	/* Notify clients of fw_up */
 	pdsc->fw_recoveries++;
-	devlink_health_reporter_state_update(pdsc->fw_reporter,
-					     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+	if (pdsc->fw_reporter)
+		devlink_health_reporter_state_update(pdsc->fw_reporter,
+						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
 	pdsc_notify(PDS_EVENT_RESET, &reset_event);
 
 	return;

From 95e383226d6fcda6c217912f11edf8d74de9cc85 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Thu, 24 Aug 2023 09:17:52 -0700
Subject: [PATCH 1923/2047] pds_core: no reset command for VF

The VF doesn't need to send a reset command, and in a PCI reset
scenario it might not have a valid IO space to write to anyway.

Fixes: 523847df1b37 ("pds_core: add devcmd device interfaces")
Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Brett Creeley <brett.creeley@amd.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20230824161754.34264-4-shannon.nelson@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/amd/pds_core/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 383e3311a52c..36f9b932b9e2 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -464,7 +464,8 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
 {
 	int i;
 
-	pdsc_devcmd_reset(pdsc);
+	if (!pdsc->pdev->is_virtfn)
+		pdsc_devcmd_reset(pdsc);
 	pdsc_qcq_free(pdsc, &pdsc->notifyqcq);
 	pdsc_qcq_free(pdsc, &pdsc->adminqcq);
 

From 969cfd4c8ca50c32901342cdd3d677c3ffe61371 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Thu, 24 Aug 2023 09:17:53 -0700
Subject: [PATCH 1924/2047] pds_core: check for work queue before use

Add a check that the wq exists before queuing up work for a
failed devcmd, as the PF is responsible for health and the VF
doesn't have a wq.

Fixes: c2dbb0904310 ("pds_core: health timer and workqueue")
Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Brett Creeley <brett.creeley@amd.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20230824161754.34264-5-shannon.nelson@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/amd/pds_core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
index debe5216fe29..524f422ee7ac 100644
--- a/drivers/net/ethernet/amd/pds_core/dev.c
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -183,7 +183,7 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
 	err = pdsc_devcmd_wait(pdsc, max_seconds);
 	memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
 
-	if (err == -ENXIO || err == -ETIMEDOUT)
+	if ((err == -ENXIO || err == -ETIMEDOUT) && pdsc->wq)
 		queue_work(pdsc->wq, &pdsc->health_work);
 
 	return err;

From 0ea064e74bc8f915aba3f2d0fb3418247a09b73d Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Thu, 24 Aug 2023 09:17:54 -0700
Subject: [PATCH 1925/2047] pds_core: pass opcode to devcmd_wait

Don't rely on the PCI memory for the devcmd opcode because we
read a 0xff value if the PCI bus is broken, which can cause us
to report a bogus dev_cmd opcode later.

Fixes: 523847df1b37 ("pds_core: add devcmd device interfaces")
Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Brett Creeley <brett.creeley@amd.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20230824161754.34264-6-shannon.nelson@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/amd/pds_core/dev.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
index 524f422ee7ac..f77cd9f5a2fd 100644
--- a/drivers/net/ethernet/amd/pds_core/dev.c
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -121,7 +121,7 @@ static const char *pdsc_devcmd_str(int opcode)
 	}
 }
 
-static int pdsc_devcmd_wait(struct pdsc *pdsc, int max_seconds)
+static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds)
 {
 	struct device *dev = pdsc->dev;
 	unsigned long start_time;
@@ -131,9 +131,6 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, int max_seconds)
 	int done = 0;
 	int err = 0;
 	int status;
-	int opcode;
-
-	opcode = ioread8(&pdsc->cmd_regs->cmd.opcode);
 
 	start_time = jiffies;
 	max_wait = start_time + (max_seconds * HZ);
@@ -180,7 +177,7 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
 
 	memcpy_toio(&pdsc->cmd_regs->cmd, cmd, sizeof(*cmd));
 	pdsc_devcmd_dbell(pdsc);
-	err = pdsc_devcmd_wait(pdsc, max_seconds);
+	err = pdsc_devcmd_wait(pdsc, cmd->opcode, max_seconds);
 	memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
 
 	if ((err == -ENXIO || err == -ETIMEDOUT) && pdsc->wq)

From 4d2b748305e96fb76202a0d1072a285b1500bff3 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Fri, 25 Aug 2023 16:12:26 +0200
Subject: [PATCH 1926/2047] x86/microcode: Remove remaining references to
 CONFIG_MICROCODE_AMD

Commit e6bcfdd75d53 ("x86/microcode: Hide the config knob") removed the
MICROCODE_AMD config, but left some references in defconfigs and comments,
that have no effect on any kernel build around.

Clean up those remaining config references. No functional change.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230825141226.13566-1-lukas.bulwahn@gmail.com
---
 arch/x86/configs/i386_defconfig          | 1 -
 arch/x86/configs/x86_64_defconfig        | 1 -
 arch/x86/kernel/cpu/microcode/internal.h | 4 ++--
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 3cf34912abfe..c33250fa8a49 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -33,7 +33,6 @@ CONFIG_HYPERVISOR_GUEST=y
 CONFIG_PARAVIRT=y
 CONFIG_NR_CPUS=8
 CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_MICROCODE_AMD=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 CONFIG_X86_CHECK_BIOS_CORRUPTION=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 27759236fd60..2aae0c0b2e16 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -31,7 +31,6 @@ CONFIG_SMP=y
 CONFIG_HYPERVISOR_GUEST=y
 CONFIG_PARAVIRT=y
 CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_MICROCODE_AMD=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 CONFIG_NUMA=y
diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h
index 8ee9392e5c68..bf883aa71233 100644
--- a/arch/x86/kernel/cpu/microcode/internal.h
+++ b/arch/x86/kernel/cpu/microcode/internal.h
@@ -104,7 +104,7 @@ int save_microcode_in_initrd_amd(unsigned int family);
 void reload_ucode_amd(unsigned int cpu);
 struct microcode_ops *init_amd_microcode(void);
 void exit_amd_microcode(void);
-#else /* CONFIG_MICROCODE_AMD */
+#else /* CONFIG_CPU_SUP_AMD */
 static inline void load_ucode_amd_bsp(unsigned int family) { }
 static inline void load_ucode_amd_ap(unsigned int family) { }
 static inline void load_ucode_amd_early(unsigned int family) { }
@@ -112,7 +112,7 @@ static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EI
 static inline void reload_ucode_amd(unsigned int cpu) { }
 static inline struct microcode_ops *init_amd_microcode(void) { return NULL; }
 static inline void exit_amd_microcode(void) { }
-#endif /* !CONFIG_MICROCODE_AMD */
+#endif /* !CONFIG_CPU_SUP_AMD */
 
 #ifdef CONFIG_CPU_SUP_INTEL
 void load_ucode_intel_bsp(void);

From 656f9aec07dba7c61d469727494a5d1b18d0bef4 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Sat, 26 Aug 2023 22:21:57 +0800
Subject: [PATCH 1927/2047] LoongArch: Ensure FP/SIMD registers in the core
 dump file is up to date

This is a port of commit 379eb01c21795edb4c ("riscv: Ensure the value
of FP registers in the core dump file is up to date").

The values of FP/SIMD registers in the core dump file come from the
thread.fpu. However, kernel saves the FP/SIMD registers only before
scheduling out the process. If no process switch happens during the
exception handling, kernel will not have a chance to save the latest
values of FP/SIMD registers. So it may cause their values in the core
dump file incorrect. To solve this problem, force fpr_get()/simd_get()
to save the FP/SIMD registers into the thread.fpu if the target task
equals the current task.

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/fpu.h | 22 ++++++++++++++++++----
 arch/loongarch/kernel/ptrace.c   |  4 ++++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h
index b541f6248837..c2d8962fda00 100644
--- a/arch/loongarch/include/asm/fpu.h
+++ b/arch/loongarch/include/asm/fpu.h
@@ -173,16 +173,30 @@ static inline void restore_fp(struct task_struct *tsk)
 		_restore_fp(&tsk->thread.fpu);
 }
 
-static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
+static inline void save_fpu_regs(struct task_struct *tsk)
 {
+	unsigned int euen;
+
 	if (tsk == current) {
 		preempt_disable();
-		if (is_fpu_owner())
+
+		euen = csr_read32(LOONGARCH_CSR_EUEN);
+
+#ifdef CONFIG_CPU_HAS_LASX
+		if (euen & CSR_EUEN_LASXEN)
+			_save_lasx(&current->thread.fpu);
+		else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+		if (euen & CSR_EUEN_LSXEN)
+			_save_lsx(&current->thread.fpu);
+		else
+#endif
+		if (euen & CSR_EUEN_FPEN)
 			_save_fp(&current->thread.fpu);
+
 		preempt_enable();
 	}
-
-	return tsk->thread.fpu.fpr;
 }
 
 static inline int is_simd_owner(void)
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index a0767c3a0f0a..f72adbf530c6 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -147,6 +147,8 @@ static int fpr_get(struct task_struct *target,
 {
 	int r;
 
+	save_fpu_regs(target);
+
 	if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
 		r = gfpr_get(target, &to);
 	else
@@ -278,6 +280,8 @@ static int simd_get(struct task_struct *target,
 {
 	const unsigned int wr_size = NUM_FPU_REGS * regset->size;
 
+	save_fpu_regs(target);
+
 	if (!tsk_used_math(target)) {
 		/* The task hasn't used FP or LSX, fill with 0xff */
 		copy_pad_fprs(target, regset, &to, 0);

From 9730870b484e9de852b51df08a8b357b1129489e Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Sat, 26 Aug 2023 22:21:57 +0800
Subject: [PATCH 1928/2047] LoongArch: Fix hw_breakpoint_control() for
 watchpoints

In hw_breakpoint_control(), encode_ctrl_reg() has already encoded the
MWPnCFG3_LoadEn/MWPnCFG3_StoreEn bits in info->ctrl. We don't need to
add (1 << MWPnCFG3_LoadEn | 1 << MWPnCFG3_StoreEn) unconditionally.

Otherwise we can't set read watchpoint and write watchpoint separately.

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/kernel/hw_breakpoint.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c
index 021b59c248fa..fc55c4de2a11 100644
--- a/arch/loongarch/kernel/hw_breakpoint.c
+++ b/arch/loongarch/kernel/hw_breakpoint.c
@@ -207,8 +207,7 @@ static int hw_breakpoint_control(struct perf_event *bp,
 			write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
 		} else {
 			ctrl = encode_ctrl_reg(info->ctrl);
-			write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE |
-				     1 << MWPnCFG3_LoadEn | 1 << MWPnCFG3_StoreEn);
+			write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE);
 		}
 		enable = csr_read64(LOONGARCH_CSR_CRMD);
 		csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD);

From 9f5deb551655a4cff04b21ecffdcdab75112da3a Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 26 Aug 2023 17:40:04 +0200
Subject: [PATCH 1929/2047] genirq: Fix software resend lockup and nested
 resend

The switch to using hlist for managing software resend of interrupts
broke resend in at least two ways:

First, unconditionally adding interrupt descriptors to the resend list can
corrupt the list when the descriptor in question has already been
added. This causes the resend tasklet to loop indefinitely with interrupts
disabled as was recently reported with the Lenovo ThinkPad X13s after
threaded NAPI was disabled in the ath11k WiFi driver.

This bug is easily fixed by restoring the old semantics of irq_sw_resend()
so that it can be called also for descriptors that have already been marked
for resend.

Second, the offending commit also broke software resend of nested
interrupts by simply discarding the code that made sure that such
interrupts are retriggered using the parent interrupt.

Add back the corresponding code that adds the parent descriptor to the
resend list.

Fixes: bc06a9e08742 ("genirq: Use hlist for managing resend handlers")
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/lkml/20230809073432.4193-1-johan+linaro@kernel.org/
Link: https://lore.kernel.org/r/20230826154004.1417-1-johan+linaro@kernel.org
---
 kernel/irq/resend.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index edec335c0a7a..5f2c66860ac6 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -68,11 +68,16 @@ static int irq_sw_resend(struct irq_desc *desc)
 		 */
 		if (!desc->parent_irq)
 			return -EINVAL;
+
+		desc = irq_to_desc(desc->parent_irq);
+		if (!desc)
+			return -EINVAL;
 	}
 
 	/* Add to resend_list and activate the softirq: */
 	raw_spin_lock(&irq_resend_lock);
-	hlist_add_head(&desc->resend_node, &irq_resend_list);
+	if (hlist_unhashed(&desc->resend_node))
+		hlist_add_head(&desc->resend_node, &irq_resend_list);
 	raw_spin_unlock(&irq_resend_lock);
 	tasklet_schedule(&resend_tasklet);
 	return 0;

From 439c4be98318751415a8f3690e4fa0fb62474a72 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Date: Thu, 24 Aug 2023 12:28:37 +0100
Subject: [PATCH 1930/2047] sfc: introduce ethernet pedit set action
 infrastructure

Introduce the initial ethernet pedit set action infrastructure in
preparation for adding mac src and dst pedit action offloads.

Co-developed-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/mae.c | 83 ++++++++++++++++++++++++++++++++--
 drivers/net/ethernet/sfc/mae.h |  4 ++
 drivers/net/ethernet/sfc/tc.c  | 70 ++++++++++++++++++++++++++++
 drivers/net/ethernet/sfc/tc.h  | 56 ++++++++++++++++++++---
 4 files changed, 202 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c
index 3b8780c76b6e..a7ad7ab8c5f4 100644
--- a/drivers/net/ethernet/sfc/mae.c
+++ b/drivers/net/ethernet/sfc/mae.c
@@ -1219,6 +1219,71 @@ fail:
 	return rc;
 }
 
+/**
+ * efx_mae_allocate_pedit_mac() - allocate pedit MAC address in HW.
+ * @efx:	NIC we're installing a pedit MAC address on
+ * @ped:	pedit MAC action to be installed
+ *
+ * Attempts to install @ped in HW and populates its id with an index of this
+ * entry in the firmware MAC address table on success.
+ *
+ * Return: negative value on error, 0 in success.
+ */
+int efx_mae_allocate_pedit_mac(struct efx_nic *efx,
+			       struct efx_tc_mac_pedit_action *ped)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_LEN);
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_MAC_ADDR_ALLOC_IN_LEN);
+	size_t outlen;
+	int rc;
+
+	BUILD_BUG_ON(MC_CMD_MAE_MAC_ADDR_ALLOC_IN_MAC_ADDR_LEN !=
+		     sizeof(ped->h_addr));
+	memcpy(MCDI_PTR(inbuf, MAE_MAC_ADDR_ALLOC_IN_MAC_ADDR), ped->h_addr,
+	       sizeof(ped->h_addr));
+	rc = efx_mcdi_rpc(efx, MC_CMD_MAE_MAC_ADDR_ALLOC, inbuf, sizeof(inbuf),
+			  outbuf, sizeof(outbuf), &outlen);
+	if (rc)
+		return rc;
+	if (outlen < sizeof(outbuf))
+		return -EIO;
+	ped->fw_id = MCDI_DWORD(outbuf, MAE_MAC_ADDR_ALLOC_OUT_MAC_ID);
+	return 0;
+}
+
+/**
+ * efx_mae_free_pedit_mac() - free pedit MAC address in HW.
+ * @efx:	NIC we're installing a pedit MAC address on
+ * @ped:	pedit MAC action that needs to be freed
+ *
+ * Frees @ped in HW, check that firmware did not free a different one and clears
+ * the id (which denotes the index of the entry in the MAC address table).
+ */
+void efx_mae_free_pedit_mac(struct efx_nic *efx,
+			    struct efx_tc_mac_pedit_action *ped)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_MAC_ADDR_FREE_OUT_LEN(1));
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_MAC_ADDR_FREE_IN_LEN(1));
+	size_t outlen;
+	int rc;
+
+	MCDI_SET_DWORD(inbuf, MAE_MAC_ADDR_FREE_IN_MAC_ID, ped->fw_id);
+	rc = efx_mcdi_rpc(efx, MC_CMD_MAE_MAC_ADDR_FREE, inbuf,
+			  sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
+	if (rc || outlen < sizeof(outbuf))
+		return;
+	/* FW freed a different ID than we asked for, should also never happen.
+	 * Warn because it means we've now got a different idea to the FW of
+	 * what MAC addresses exist, which could cause mayhem later.
+	 */
+	if (WARN_ON(MCDI_DWORD(outbuf, MAE_MAC_ADDR_FREE_OUT_FREED_MAC_ID) != ped->fw_id))
+		return;
+	/* We're probably about to free @ped, but let's just make sure its
+	 * fw_id is blatted so that it won't look valid if it leaks out.
+	 */
+	ped->fw_id = MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL;
+}
+
 int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ACTION_SET_ALLOC_OUT_LEN);
@@ -1231,10 +1296,20 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act)
 			      MAE_ACTION_SET_ALLOC_IN_VLAN_POP, act->vlan_pop,
 			      MAE_ACTION_SET_ALLOC_IN_DECAP, act->decap);
 
-	MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID,
-		       MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL);
-	MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DST_MAC_ID,
-		       MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL);
+	if (act->src_mac)
+		MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID,
+			       act->src_mac->fw_id);
+	else
+		MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID,
+			       MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL);
+
+	if (act->dst_mac)
+		MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DST_MAC_ID,
+			       act->dst_mac->fw_id);
+	else
+		MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DST_MAC_ID,
+			       MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL);
+
 	if (act->count && !WARN_ON(!act->count->cnt))
 		MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_COUNTER_ID,
 			       act->count->cnt->fw_id);
diff --git a/drivers/net/ethernet/sfc/mae.h b/drivers/net/ethernet/sfc/mae.h
index e88e80574f15..8df30bc4f3ba 100644
--- a/drivers/net/ethernet/sfc/mae.h
+++ b/drivers/net/ethernet/sfc/mae.h
@@ -103,6 +103,10 @@ int efx_mae_update_encap_md(struct efx_nic *efx,
 int efx_mae_free_encap_md(struct efx_nic *efx,
 			  struct efx_tc_encap_action *encap);
 
+int efx_mae_allocate_pedit_mac(struct efx_nic *efx,
+			       struct efx_tc_mac_pedit_action *ped);
+void efx_mae_free_pedit_mac(struct efx_nic *efx,
+			    struct efx_tc_mac_pedit_action *ped);
 int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act);
 int efx_mae_free_action_set(struct efx_nic *efx, u32 fw_id);
 
diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index 039180c61c83..8a9fc2f47514 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -86,6 +86,12 @@ s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
 	return mport;
 }
 
+static const struct rhashtable_params efx_tc_mac_ht_params = {
+	.key_len	= offsetofend(struct efx_tc_mac_pedit_action, h_addr),
+	.key_offset	= 0,
+	.head_offset	= offsetof(struct efx_tc_mac_pedit_action, linkage),
+};
+
 static const struct rhashtable_params efx_tc_encap_match_ht_params = {
 	.key_len	= offsetof(struct efx_tc_encap_match, linkage),
 	.key_offset	= 0,
@@ -110,6 +116,56 @@ static const struct rhashtable_params efx_tc_recirc_ht_params = {
 	.head_offset	= offsetof(struct efx_tc_recirc_id, linkage),
 };
 
+static struct efx_tc_mac_pedit_action __maybe_unused *efx_tc_flower_get_mac(struct efx_nic *efx,
+							     unsigned char h_addr[ETH_ALEN],
+							     struct netlink_ext_ack *extack)
+{
+	struct efx_tc_mac_pedit_action *ped, *old;
+	int rc;
+
+	ped = kzalloc(sizeof(*ped), GFP_USER);
+	if (!ped)
+		return ERR_PTR(-ENOMEM);
+	memcpy(ped->h_addr, h_addr, ETH_ALEN);
+	old = rhashtable_lookup_get_insert_fast(&efx->tc->mac_ht,
+						&ped->linkage,
+						efx_tc_mac_ht_params);
+	if (old) {
+		/* don't need our new entry */
+		kfree(ped);
+		if (!refcount_inc_not_zero(&old->ref))
+			return ERR_PTR(-EAGAIN);
+		/* existing entry found, ref taken */
+		return old;
+	}
+
+	rc = efx_mae_allocate_pedit_mac(efx, ped);
+	if (rc < 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to store pedit MAC address in hw");
+		goto out_remove;
+	}
+
+	/* ref and return */
+	refcount_set(&ped->ref, 1);
+	return ped;
+out_remove:
+	rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
+			       efx_tc_mac_ht_params);
+	kfree(ped);
+	return ERR_PTR(rc);
+}
+
+static void __maybe_unused efx_tc_flower_put_mac(struct efx_nic *efx,
+				  struct efx_tc_mac_pedit_action *ped)
+{
+	if (!refcount_dec_and_test(&ped->ref))
+		return; /* still in use */
+	rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
+			       efx_tc_mac_ht_params);
+	efx_mae_free_pedit_mac(efx, ped);
+	kfree(ped);
+}
+
 static void efx_tc_free_action_set(struct efx_nic *efx,
 				   struct efx_tc_action_set *act, bool in_hw)
 {
@@ -2156,6 +2212,14 @@ static void efx_tc_lhs_free(void *ptr, void *arg)
 	kfree(rule);
 }
 
+static void efx_tc_mac_free(void *ptr, void *__unused)
+{
+	struct efx_tc_mac_pedit_action *ped = ptr;
+
+	WARN_ON(refcount_read(&ped->ref));
+	kfree(ped);
+}
+
 static void efx_tc_flow_free(void *ptr, void *arg)
 {
 	struct efx_tc_flow_rule *rule = ptr;
@@ -2196,6 +2260,9 @@ int efx_init_struct_tc(struct efx_nic *efx)
 	rc = efx_tc_init_counters(efx);
 	if (rc < 0)
 		goto fail_counters;
+	rc = rhashtable_init(&efx->tc->mac_ht, &efx_tc_mac_ht_params);
+	if (rc < 0)
+		goto fail_mac_ht;
 	rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params);
 	if (rc < 0)
 		goto fail_encap_match_ht;
@@ -2233,6 +2300,8 @@ fail_lhs_rule_ht:
 fail_match_action_ht:
 	rhashtable_destroy(&efx->tc->encap_match_ht);
 fail_encap_match_ht:
+	rhashtable_destroy(&efx->tc->mac_ht);
+fail_mac_ht:
 	efx_tc_destroy_counters(efx);
 fail_counters:
 	efx_tc_destroy_encap_actions(efx);
@@ -2268,6 +2337,7 @@ void efx_fini_struct_tc(struct efx_nic *efx)
 	rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx);
 	WARN_ON(!ida_is_empty(&efx->tc->recirc_ida));
 	ida_destroy(&efx->tc->recirc_ida);
+	rhashtable_free_and_destroy(&efx->tc->mac_ht, efx_tc_mac_free, NULL);
 	efx_tc_fini_counters(efx);
 	efx_tc_fini_encap_actions(efx);
 	mutex_unlock(&efx->tc->mutex);
diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h
index 40d2c803fca8..7b6a6a3d8e4c 100644
--- a/drivers/net/ethernet/sfc/tc.h
+++ b/drivers/net/ethernet/sfc/tc.h
@@ -18,6 +18,23 @@
 
 #define IS_ALL_ONES(v)	(!(typeof (v))~(v))
 
+/**
+ * struct efx_tc_mac_pedit_action - mac pedit action fields
+ *
+ * @h_addr:	mac address field of ethernet header
+ * @linkage:	rhashtable reference
+ * @ref:	reference count
+ * @fw_id:	index of this entry in firmware MAC address table
+ *
+ * MAC address edits are indirected through a table in the hardware
+ */
+struct efx_tc_mac_pedit_action {
+	u8 h_addr[ETH_ALEN];
+	struct rhash_head linkage;
+	refcount_t ref;
+	u32 fw_id; /* index of this entry in firmware MAC address table */
+};
+
 static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr)
 {
 	return !memchr_inv(addr, 0xff, sizeof(*addr));
@@ -25,20 +42,43 @@ static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr)
 
 struct efx_tc_encap_action; /* see tc_encap_actions.h */
 
+/**
+ * struct efx_tc_action_set - collection of tc action fields
+ *
+ * @vlan_push: the number of vlan headers to push
+ * @vlan_pop: the number of vlan headers to pop
+ * @decap: used to indicate a tunnel header decapsulation should take place
+ * @deliver: used to indicate a deliver action should take place
+ * @vlan_tci: tci fields for vlan push actions
+ * @vlan_proto: ethernet types for vlan push actions
+ * @count: counter mapping
+ * @encap_md: encap entry in tc_encap_ht table
+ * @encap_user: linked list of encap users (encap_md->users)
+ * @user: owning action-set-list. Only populated if @encap_md is; used by efx_tc_update_encap() fallback handling
+ * @count_user: linked list of counter users (counter->users)
+ * @dest_mport: destination mport
+ * @src_mac: source mac entry in tc_mac_ht table
+ * @dst_mac: destination mac entry in tc_mac_ht table
+ * @fw_id: index of this entry in firmware actions table
+ * @list: linked list of tc actions
+ *
+ */
 struct efx_tc_action_set {
 	u16 vlan_push:2;
 	u16 vlan_pop:2;
 	u16 decap:1;
 	u16 deliver:1;
-	__be16 vlan_tci[2]; /* TCIs for vlan_push */
-	__be16 vlan_proto[2]; /* Ethertypes for vlan_push */
+	__be16 vlan_tci[2];
+	__be16 vlan_proto[2];
 	struct efx_tc_counter_index *count;
-	struct efx_tc_encap_action *encap_md; /* entry in tc_encap_ht table */
-	struct list_head encap_user; /* entry on encap_md->users list */
-	struct efx_tc_action_set_list *user; /* Only populated if encap_md */
-	struct list_head count_user; /* entry on counter->users list, if encap */
+	struct efx_tc_encap_action *encap_md;
+	struct list_head encap_user;
+	struct efx_tc_action_set_list *user;
+	struct list_head count_user;
 	u32 dest_mport;
-	u32 fw_id; /* index of this entry in firmware actions table */
+	struct efx_tc_mac_pedit_action *src_mac;
+	struct efx_tc_mac_pedit_action *dst_mac;
+	u32 fw_id;
 	struct list_head list;
 };
 
@@ -220,6 +260,7 @@ struct efx_tc_table_ct { /* TABLE_ID_CONNTRACK_TABLE */
  * @counter_ht: Hashtable of TC counters (FW IDs and counter values)
  * @counter_id_ht: Hashtable mapping TC counter cookies to counters
  * @encap_ht: Hashtable of TC encap actions
+ * @mac_ht: Hashtable of MAC address entries (for pedits)
  * @encap_match_ht: Hashtable of TC encap matches
  * @match_action_ht: Hashtable of TC match-action rules
  * @lhs_rule_ht: Hashtable of TC left-hand (act ct & goto chain) rules
@@ -257,6 +298,7 @@ struct efx_tc_state {
 	struct rhashtable counter_ht;
 	struct rhashtable counter_id_ht;
 	struct rhashtable encap_ht;
+	struct rhashtable mac_ht;
 	struct rhashtable encap_match_ht;
 	struct rhashtable match_action_ht;
 	struct rhashtable lhs_rule_ht;

From 0c676503bd4ff726f5a4adc8fb54e09a78685b1f Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Date: Thu, 24 Aug 2023 12:28:38 +0100
Subject: [PATCH 1931/2047] sfc: add mac source and destination pedit action
 offload

Introduce the first pedit set offload functionality for the sfc driver.
In addition to this, add offload functionality for both mac source and
destination pedit set actions.

Co-developed-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/tc.c | 209 +++++++++++++++++++++++++++++++++-
 1 file changed, 207 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index 8a9fc2f47514..b9d7c4bb7e69 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -116,7 +116,7 @@ static const struct rhashtable_params efx_tc_recirc_ht_params = {
 	.head_offset	= offsetof(struct efx_tc_recirc_id, linkage),
 };
 
-static struct efx_tc_mac_pedit_action __maybe_unused *efx_tc_flower_get_mac(struct efx_nic *efx,
+static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx,
 							     unsigned char h_addr[ETH_ALEN],
 							     struct netlink_ext_ack *extack)
 {
@@ -155,7 +155,7 @@ out_remove:
 	return ERR_PTR(rc);
 }
 
-static void __maybe_unused efx_tc_flower_put_mac(struct efx_nic *efx,
+static void efx_tc_flower_put_mac(struct efx_nic *efx,
 				  struct efx_tc_mac_pedit_action *ped)
 {
 	if (!refcount_dec_and_test(&ped->ref))
@@ -191,6 +191,10 @@ static void efx_tc_free_action_set(struct efx_nic *efx,
 		list_del(&act->encap_user);
 		efx_tc_flower_release_encap_md(efx, act->encap_md);
 	}
+	if (act->src_mac)
+		efx_tc_flower_put_mac(efx, act->src_mac);
+	if (act->dst_mac)
+		efx_tc_flower_put_mac(efx, act->dst_mac);
 	kfree(act);
 }
 
@@ -753,6 +757,7 @@ static const char *efx_tc_encap_type_name(enum efx_encap_type typ)
 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
 enum efx_tc_action_order {
 	EFX_TC_AO_DECAP,
+	EFX_TC_AO_PEDIT_MAC_ADDRS,
 	EFX_TC_AO_VLAN_POP,
 	EFX_TC_AO_VLAN_PUSH,
 	EFX_TC_AO_COUNT,
@@ -767,6 +772,11 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
 	case EFX_TC_AO_DECAP:
 		if (act->decap)
 			return false;
+		/* PEDIT_MAC_ADDRS must not happen before DECAP, though it
+		 * can wait until much later
+		 */
+		if (act->dst_mac || act->src_mac)
+			return false;
 		fallthrough;
 	case EFX_TC_AO_VLAN_POP:
 		if (act->vlan_pop >= 2)
@@ -786,6 +796,7 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
 		if (act->count)
 			return false;
 		fallthrough;
+	case EFX_TC_AO_PEDIT_MAC_ADDRS:
 	case EFX_TC_AO_ENCAP:
 		if (act->encap_md)
 			return false;
@@ -956,6 +967,191 @@ static void efx_tc_flower_release_lhs_actions(struct efx_nic *efx,
 		efx_tc_flower_put_counter_index(efx, act->count);
 }
 
+/**
+ * struct efx_tc_mangler_state - accumulates 32-bit pedits into fields
+ *
+ * @dst_mac_32:	dst_mac[0:3] has been populated
+ * @dst_mac_16:	dst_mac[4:5] has been populated
+ * @src_mac_16:	src_mac[0:1] has been populated
+ * @src_mac_32:	src_mac[2:5] has been populated
+ * @dst_mac:	h_dest field of ethhdr
+ * @src_mac:	h_source field of ethhdr
+ *
+ * Since FLOW_ACTION_MANGLE comes in 32-bit chunks that do not
+ * necessarily equate to whole fields of the packet header, this
+ * structure is used to hold the cumulative effect of the partial
+ * field pedits that have been processed so far.
+ */
+struct efx_tc_mangler_state {
+	u8 dst_mac_32:1; /* eth->h_dest[0:3] */
+	u8 dst_mac_16:1; /* eth->h_dest[4:5] */
+	u8 src_mac_16:1; /* eth->h_source[0:1] */
+	u8 src_mac_32:1; /* eth->h_source[2:5] */
+	unsigned char dst_mac[ETH_ALEN];
+	unsigned char src_mac[ETH_ALEN];
+};
+
+/** efx_tc_complete_mac_mangle() - pull complete field pedits out of @mung
+ * @efx:	NIC we're installing a flow rule on
+ * @act:	action set (cursor) to update
+ * @mung:	accumulated partial mangles
+ * @extack:	netlink extended ack for reporting errors
+ *
+ * Check @mung to find any combinations of partial mangles that can be
+ * combined into a complete packet field edit, add that edit to @act,
+ * and consume the partial mangles from @mung.
+ */
+
+static int efx_tc_complete_mac_mangle(struct efx_nic *efx,
+				      struct efx_tc_action_set *act,
+				      struct efx_tc_mangler_state *mung,
+				      struct netlink_ext_ack *extack)
+{
+	struct efx_tc_mac_pedit_action *ped;
+
+	if (mung->dst_mac_32 && mung->dst_mac_16) {
+		ped = efx_tc_flower_get_mac(efx, mung->dst_mac, extack);
+		if (IS_ERR(ped))
+			return PTR_ERR(ped);
+
+		/* Check that we have not already populated dst_mac */
+		if (act->dst_mac)
+			efx_tc_flower_put_mac(efx, act->dst_mac);
+
+		act->dst_mac = ped;
+
+		/* consume the incomplete state */
+		mung->dst_mac_32 = 0;
+		mung->dst_mac_16 = 0;
+	}
+	if (mung->src_mac_16 && mung->src_mac_32) {
+		ped = efx_tc_flower_get_mac(efx, mung->src_mac, extack);
+		if (IS_ERR(ped))
+			return PTR_ERR(ped);
+
+		/* Check that we have not already populated src_mac */
+		if (act->src_mac)
+			efx_tc_flower_put_mac(efx, act->src_mac);
+
+		act->src_mac = ped;
+
+		/* consume the incomplete state */
+		mung->src_mac_32 = 0;
+		mung->src_mac_16 = 0;
+	}
+	return 0;
+}
+
+/**
+ * efx_tc_mangle() - handle a single 32-bit (or less) pedit
+ * @efx:	NIC we're installing a flow rule on
+ * @act:	action set (cursor) to update
+ * @fa:		FLOW_ACTION_MANGLE action metadata
+ * @mung:	accumulator for partial mangles
+ * @extack:	netlink extended ack for reporting errors
+ *
+ * Identify the fields written by a FLOW_ACTION_MANGLE, and record
+ * the partial mangle state in @mung.  If this mangle completes an
+ * earlier partial mangle, consume and apply to @act by calling
+ * efx_tc_complete_mac_mangle().
+ */
+
+static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act,
+			 const struct flow_action_entry *fa,
+			 struct efx_tc_mangler_state *mung,
+			 struct netlink_ext_ack *extack)
+{
+	__le32 mac32;
+	__le16 mac16;
+
+	switch (fa->mangle.htype) {
+	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
+		BUILD_BUG_ON(offsetof(struct ethhdr, h_dest) != 0);
+		BUILD_BUG_ON(offsetof(struct ethhdr, h_source) != 6);
+		if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_PEDIT_MAC_ADDRS)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Pedit mangle mac action violates action order");
+			return -EOPNOTSUPP;
+		}
+		switch (fa->mangle.offset) {
+		case 0:
+			if (fa->mangle.mask) {
+				NL_SET_ERR_MSG_FMT_MOD(extack,
+						       "Unsupported: mask (%#x) of eth.dst32 mangle",
+						       fa->mangle.mask);
+				return -EOPNOTSUPP;
+			}
+			/* Ethernet address is little-endian */
+			mac32 = cpu_to_le32(fa->mangle.val);
+			memcpy(mung->dst_mac, &mac32, sizeof(mac32));
+			mung->dst_mac_32 = 1;
+			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
+		case 4:
+			if (fa->mangle.mask == 0xffff) {
+				mac16 = cpu_to_le16(fa->mangle.val >> 16);
+				memcpy(mung->src_mac, &mac16, sizeof(mac16));
+				mung->src_mac_16 = 1;
+			} else if (fa->mangle.mask == 0xffff0000) {
+				mac16 = cpu_to_le16((u16)fa->mangle.val);
+				memcpy(mung->dst_mac + 4, &mac16, sizeof(mac16));
+				mung->dst_mac_16 = 1;
+			} else {
+				NL_SET_ERR_MSG_FMT_MOD(extack,
+						       "Unsupported: mask (%#x) of eth+4 mangle is not high or low 16b",
+						       fa->mangle.mask);
+				return -EOPNOTSUPP;
+			}
+			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
+		case 8:
+			if (fa->mangle.mask) {
+				NL_SET_ERR_MSG_FMT_MOD(extack,
+						       "Unsupported: mask (%#x) of eth.src32 mangle",
+						       fa->mangle.mask);
+				return -EOPNOTSUPP;
+			}
+			mac32 = cpu_to_le32(fa->mangle.val);
+			memcpy(mung->src_mac + 2, &mac32, sizeof(mac32));
+			mung->src_mac_32 = 1;
+			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
+		default:
+			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported: mangle eth+%u %x/%x",
+					       fa->mangle.offset, fa->mangle.val, fa->mangle.mask);
+			return -EOPNOTSUPP;
+		}
+		break;
+	default:
+		NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule",
+				       fa->mangle.htype);
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+/**
+ * efx_tc_incomplete_mangle() - check for leftover partial pedits
+ * @mung:	accumulator for partial mangles
+ * @extack:	netlink extended ack for reporting errors
+ *
+ * Since the MAE can only overwrite whole fields, any partial
+ * field mangle left over on reaching packet delivery (mirred or
+ * end of TC actions) cannot be offloaded.  Check for any such
+ * and reject them with -%EOPNOTSUPP.
+ */
+
+static int efx_tc_incomplete_mangle(struct efx_tc_mangler_state *mung,
+				    struct netlink_ext_ack *extack)
+{
+	if (mung->dst_mac_32 || mung->dst_mac_16) {
+		NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of destination MAC address");
+		return -EOPNOTSUPP;
+	}
+	if (mung->src_mac_16 || mung->src_mac_32) {
+		NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of source MAC address");
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
 static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
 					 struct net_device *net_dev,
 					 struct flow_cls_offload *tc)
@@ -1351,6 +1547,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
 	struct netlink_ext_ack *extack = tc->common.extack;
 	const struct ip_tunnel_info *encap_info = NULL;
 	struct efx_tc_flow_rule *rule = NULL, *old;
+	struct efx_tc_mangler_state mung = {};
 	struct efx_tc_action_set *act = NULL;
 	const struct flow_action_entry *fa;
 	struct efx_rep *from_efv, *to_efv;
@@ -1687,6 +1884,11 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
 			act->vlan_proto[act->vlan_push] = fa->vlan.proto;
 			act->vlan_push++;
 			break;
+		case FLOW_ACTION_MANGLE:
+			rc = efx_tc_mangle(efx, act, fa, &mung, extack);
+			if (rc < 0)
+				goto release;
+			break;
 		case FLOW_ACTION_TUNNEL_ENCAP:
 			if (encap_info) {
 				/* Can't specify encap multiple times.
@@ -1726,6 +1928,9 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
 		}
 	}
 
+	rc = efx_tc_incomplete_mangle(&mung, extack);
+	if (rc < 0)
+		goto release;
 	if (act) {
 		/* Not shot/redirected, so deliver to default dest */
 		if (from_efv == EFX_EFV_PF)

From 66f728872636e1167aedc5a18be6760083911900 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Date: Thu, 24 Aug 2023 12:28:39 +0100
Subject: [PATCH 1932/2047] sfc: add decrement ttl by offloading set ipv4 ttl
 actions

Offload pedit set ipv4 ttl field, where the ttl field has already been
matched and the new value is one less, by translating it to a decrement.

Co-developed-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/mae.c |  6 ++-
 drivers/net/ethernet/sfc/tc.c  | 75 +++++++++++++++++++++++++++++++++-
 drivers/net/ethernet/sfc/tc.h  |  2 +
 3 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c
index a7ad7ab8c5f4..c3e2b4a21d10 100644
--- a/drivers/net/ethernet/sfc/mae.c
+++ b/drivers/net/ethernet/sfc/mae.c
@@ -1291,10 +1291,12 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act)
 	size_t outlen;
 	int rc;
 
-	MCDI_POPULATE_DWORD_3(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS,
+	MCDI_POPULATE_DWORD_4(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS,
 			      MAE_ACTION_SET_ALLOC_IN_VLAN_PUSH, act->vlan_push,
 			      MAE_ACTION_SET_ALLOC_IN_VLAN_POP, act->vlan_pop,
-			      MAE_ACTION_SET_ALLOC_IN_DECAP, act->decap);
+			      MAE_ACTION_SET_ALLOC_IN_DECAP, act->decap,
+			      MAE_ACTION_SET_ALLOC_IN_DO_DECR_IP_TTL,
+			      act->do_ttl_dec);
 
 	if (act->src_mac)
 		MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID,
diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index b9d7c4bb7e69..57bdd56b7113 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -31,6 +31,7 @@ enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
 	return EFX_ENCAP_TYPE_NONE;
 }
 
+#define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff)
 #define EFX_EFV_PF	NULL
 /* Look up the representor information (efv) for a device.
  * May return NULL for the PF (us), or an error pointer for a device that
@@ -757,6 +758,7 @@ static const char *efx_tc_encap_type_name(enum efx_encap_type typ)
 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
 enum efx_tc_action_order {
 	EFX_TC_AO_DECAP,
+	EFX_TC_AO_DEC_TTL,
 	EFX_TC_AO_PEDIT_MAC_ADDRS,
 	EFX_TC_AO_VLAN_POP,
 	EFX_TC_AO_VLAN_PUSH,
@@ -777,6 +779,10 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
 		 */
 		if (act->dst_mac || act->src_mac)
 			return false;
+
+		/* Decrementing ttl must not happen before DECAP */
+		if (act->do_ttl_dec)
+			return false;
 		fallthrough;
 	case EFX_TC_AO_VLAN_POP:
 		if (act->vlan_pop >= 2)
@@ -803,6 +809,10 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
 		fallthrough;
 	case EFX_TC_AO_DELIVER:
 		return !act->deliver;
+	case EFX_TC_AO_DEC_TTL:
+		if (act->encap_md)
+			return false;
+		return !act->do_ttl_dec;
 	default:
 		/* Bad caller.  Whatever they wanted to do, say they can't. */
 		WARN_ON_ONCE(1);
@@ -1049,6 +1059,7 @@ static int efx_tc_complete_mac_mangle(struct efx_nic *efx,
  * @fa:		FLOW_ACTION_MANGLE action metadata
  * @mung:	accumulator for partial mangles
  * @extack:	netlink extended ack for reporting errors
+ * @match:	original match used along with the mangle action
  *
  * Identify the fields written by a FLOW_ACTION_MANGLE, and record
  * the partial mangle state in @mung.  If this mangle completes an
@@ -1059,10 +1070,12 @@ static int efx_tc_complete_mac_mangle(struct efx_nic *efx,
 static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act,
 			 const struct flow_action_entry *fa,
 			 struct efx_tc_mangler_state *mung,
-			 struct netlink_ext_ack *extack)
+			 struct netlink_ext_ack *extack,
+			 struct efx_tc_match *match)
 {
 	__le32 mac32;
 	__le16 mac16;
+	u8 tr_ttl;
 
 	switch (fa->mangle.htype) {
 	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
@@ -1119,6 +1132,64 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act,
 			return -EOPNOTSUPP;
 		}
 		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+		switch (fa->mangle.offset) {
+		case offsetof(struct iphdr, ttl):
+			/* we currently only support pedit IP4 when it applies
+			 * to TTL and then only when it can be achieved with a
+			 * decrement ttl action
+			 */
+
+			/* check that pedit applies to ttl only */
+			if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) {
+				NL_SET_ERR_MSG_FMT_MOD(extack,
+						       "Unsupported: mask (%#x) out of range, only support mangle action on ipv4.ttl",
+						       fa->mangle.mask);
+				return -EOPNOTSUPP;
+			}
+
+			/* we can only convert to a dec ttl when we have an
+			 * exact match on the ttl field
+			 */
+			if (match->mask.ip_ttl != U8_MAX) {
+				NL_SET_ERR_MSG_FMT_MOD(extack,
+						       "Unsupported: only support mangle ipv4.ttl when we have an exact match on ttl, mask used for match (%#x)",
+						       match->mask.ip_ttl);
+				return -EOPNOTSUPP;
+			}
+
+			/* check that we don't try to decrement 0, which equates
+			 * to setting the ttl to 0xff
+			 */
+			if (match->value.ip_ttl == 0) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Unsupported: we cannot decrement ttl past 0");
+				return -EOPNOTSUPP;
+			}
+
+			/* check that we do not decrement ttl twice */
+			if (!efx_tc_flower_action_order_ok(act,
+							   EFX_TC_AO_DEC_TTL)) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Unsupported: multiple dec ttl");
+				return -EOPNOTSUPP;
+			}
+
+			/* check pedit can be achieved with decrement action */
+			tr_ttl = match->value.ip_ttl - 1;
+			if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) == tr_ttl) {
+				act->do_ttl_dec = 1;
+				return 0;
+			}
+
+			fallthrough;
+		default:
+			NL_SET_ERR_MSG_FMT_MOD(extack,
+					       "Unsupported: only support mangle on the ttl field (offset is %u)",
+					       fa->mangle.offset);
+			return -EOPNOTSUPP;
+		}
+		break;
 	default:
 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule",
 				       fa->mangle.htype);
@@ -1885,7 +1956,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
 			act->vlan_push++;
 			break;
 		case FLOW_ACTION_MANGLE:
-			rc = efx_tc_mangle(efx, act, fa, &mung, extack);
+			rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match);
 			if (rc < 0)
 				goto release;
 			break;
diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h
index 7b6a6a3d8e4c..4dd2c378fd9f 100644
--- a/drivers/net/ethernet/sfc/tc.h
+++ b/drivers/net/ethernet/sfc/tc.h
@@ -48,6 +48,7 @@ struct efx_tc_encap_action; /* see tc_encap_actions.h */
  * @vlan_push: the number of vlan headers to push
  * @vlan_pop: the number of vlan headers to pop
  * @decap: used to indicate a tunnel header decapsulation should take place
+ * @do_ttl_dec: used to indicate IP TTL / Hop Limit should be decremented
  * @deliver: used to indicate a deliver action should take place
  * @vlan_tci: tci fields for vlan push actions
  * @vlan_proto: ethernet types for vlan push actions
@@ -67,6 +68,7 @@ struct efx_tc_action_set {
 	u16 vlan_push:2;
 	u16 vlan_pop:2;
 	u16 decap:1;
+	u16 do_ttl_dec:1;
 	u16 deliver:1;
 	__be16 vlan_tci[2];
 	__be16 vlan_proto[2];

From 9dbc8d2b9a02f938d32741297d56cc364dc0dce6 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Date: Thu, 24 Aug 2023 12:28:40 +0100
Subject: [PATCH 1933/2047] sfc: add decrement ipv6 hop limit by offloading set
 hop limit actions

Offload pedit set ipv6 hop limit, where the hop limit has already been
matched and the new value is one less, by translating it to a decrement.

Co-developed-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/tc.c | 59 +++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index 57bdd56b7113..354c635be59f 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -32,6 +32,8 @@ enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
 }
 
 #define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff)
+/* Hoplimit is stored in the most significant byte in the pedit ipv6 header action */
+#define EFX_TC_HDR_TYPE_HLIMIT_MASK ~((u32)0xff000000)
 #define EFX_EFV_PF	NULL
 /* Look up the representor information (efv) for a device.
  * May return NULL for the PF (us), or an error pointer for a device that
@@ -1190,6 +1192,63 @@ static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act,
 			return -EOPNOTSUPP;
 		}
 		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+		switch (fa->mangle.offset) {
+		case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
+			/* we currently only support pedit IP6 when it applies
+			 * to the hoplimit and then only when it can be achieved
+			 * with a decrement hoplimit action
+			 */
+
+			/* check that pedit applies to ttl only */
+			if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) {
+				NL_SET_ERR_MSG_FMT_MOD(extack,
+						       "Unsupported: mask (%#x) out of range, only support mangle action on ipv6.hop_limit",
+						       fa->mangle.mask);
+
+				return -EOPNOTSUPP;
+			}
+
+			/* we can only convert to a dec ttl when we have an
+			 * exact match on the ttl field
+			 */
+			if (match->mask.ip_ttl != U8_MAX) {
+				NL_SET_ERR_MSG_FMT_MOD(extack,
+						       "Unsupported: only support mangle ipv6.hop_limit when we have an exact match on ttl, mask used for match (%#x)",
+						       match->mask.ip_ttl);
+				return -EOPNOTSUPP;
+			}
+
+			/* check that we don't try to decrement 0, which equates
+			 * to setting the ttl to 0xff
+			 */
+			if (match->value.ip_ttl == 0) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Unsupported: we cannot decrement hop_limit past 0");
+				return -EOPNOTSUPP;
+			}
+
+			/* check that we do not decrement hoplimit twice */
+			if (!efx_tc_flower_action_order_ok(act,
+							   EFX_TC_AO_DEC_TTL)) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Unsupported: multiple dec ttl");
+				return -EOPNOTSUPP;
+			}
+
+			/* check pedit can be achieved with decrement action */
+			tr_ttl = match->value.ip_ttl - 1;
+			if ((fa->mangle.val >> 24) == tr_ttl) {
+				act->do_ttl_dec = 1;
+				return 0;
+			}
+
+			fallthrough;
+		default:
+			NL_SET_ERR_MSG_FMT_MOD(extack,
+					       "Unsupported: only support mangle on the hop_limit field");
+			return -EOPNOTSUPP;
+		}
 	default:
 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule",
 				       fa->mangle.htype);

From 64848f062e33df61fb1f9f7b7a3dc7b44e0d36d7 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Date: Thu, 24 Aug 2023 12:28:41 +0100
Subject: [PATCH 1934/2047] sfc: introduce pedit add actions on the ipv4 ttl
 field

Introduce pedit add actions and use it to achieve decrement ttl offload.
Decrement ttl can be achieved by adding 0xff to the ttl field.

Co-developed-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/tc.c | 46 +++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index 354c635be59f..dfe3a8bf74f0 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -1054,6 +1054,47 @@ static int efx_tc_complete_mac_mangle(struct efx_nic *efx,
 	return 0;
 }
 
+static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act,
+			    const struct flow_action_entry *fa,
+			    struct netlink_ext_ack *extack)
+{
+	switch (fa->mangle.htype) {
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+		switch (fa->mangle.offset) {
+		case offsetof(struct iphdr, ttl):
+			/* check that pedit applies to ttl only */
+			if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK)
+				break;
+
+			/* Adding 0xff is equivalent to decrementing the ttl.
+			 * Other added values are not supported.
+			 */
+			if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) != U8_MAX)
+				break;
+
+			/* check that we do not decrement ttl twice */
+			if (!efx_tc_flower_action_order_ok(act,
+							   EFX_TC_AO_DEC_TTL)) {
+				NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl");
+				return -EOPNOTSUPP;
+			}
+			act->do_ttl_dec = 1;
+			return 0;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	NL_SET_ERR_MSG_FMT_MOD(extack,
+			       "Unsupported: ttl add action type %x %x %x/%x",
+			       fa->mangle.htype, fa->mangle.offset,
+			       fa->mangle.val, fa->mangle.mask);
+	return -EOPNOTSUPP;
+}
+
 /**
  * efx_tc_mangle() - handle a single 32-bit (or less) pedit
  * @efx:	NIC we're installing a flow rule on
@@ -2014,6 +2055,11 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
 			act->vlan_proto[act->vlan_push] = fa->vlan.proto;
 			act->vlan_push++;
 			break;
+		case FLOW_ACTION_ADD:
+			rc = efx_tc_pedit_add(efx, act, fa, extack);
+			if (rc < 0)
+				goto release;
+			break;
 		case FLOW_ACTION_MANGLE:
 			rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match);
 			if (rc < 0)

From e8e0bd60e4833e5bcfa220d7b4c07456c83c1ea2 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Date: Thu, 24 Aug 2023 12:28:42 +0100
Subject: [PATCH 1935/2047] sfc: extend pedit add action to handle decrement
 ipv6 hop limit

Extend the pedit add actions to handle this case for ipv6. Similar to ipv4
dec ttl, decrementing ipv6 hop limit can be achieved by adding 0xff to the
hop limit field.

Co-developed-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/tc.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index dfe3a8bf74f0..047322b04d4f 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -1084,6 +1084,31 @@ static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act,
 			break;
 		}
 		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+		switch (fa->mangle.offset) {
+		case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
+			/* check that pedit applies to hoplimit only */
+			if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK)
+				break;
+
+			/* Adding 0xff is equivalent to decrementing the hoplimit.
+			 * Other added values are not supported.
+			 */
+			if ((fa->mangle.val >> 24) != U8_MAX)
+				break;
+
+			/* check that we do not decrement hoplimit twice */
+			if (!efx_tc_flower_action_order_ok(act,
+							   EFX_TC_AO_DEC_TTL)) {
+				NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl");
+				return -EOPNOTSUPP;
+			}
+			act->do_ttl_dec = 1;
+			return 0;
+		default:
+			break;
+		}
+		break;
 	default:
 		break;
 	}

From f0035689c036ade6faba4a91e2dc4879d1f02f82 Mon Sep 17 00:00:00 2001
From: MD Danish Anwar <danishanwar@ti.com>
Date: Thu, 24 Aug 2023 17:16:14 +0530
Subject: [PATCH 1936/2047] dt-bindings: net: Add ICSS IEP

Add a DT binding document for the ICSS Industrial Ethernet Peripheral(IEP)
hardware. IEP supports packet timestamping, PTP and PPS.

Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/ti,icss-iep.yaml  | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/ti,icss-iep.yaml

diff --git a/Documentation/devicetree/bindings/net/ti,icss-iep.yaml b/Documentation/devicetree/bindings/net/ti,icss-iep.yaml
new file mode 100644
index 000000000000..f5c22d6dcaee
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ti,icss-iep.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/ti,icss-iep.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments ICSS Industrial Ethernet Peripheral (IEP) module
+
+maintainers:
+  - Md Danish Anwar <danishanwar@ti.com>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - ti,am642-icss-iep
+              - ti,j721e-icss-iep
+          - const: ti,am654-icss-iep
+
+      - const: ti,am654-icss-iep
+
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+    description: phandle to the IEP source clock
+
+required:
+  - compatible
+  - reg
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    /* AM65x */
+    icssg0_iep0: iep@2e000 {
+        compatible = "ti,am654-icss-iep";
+        reg = <0x2e000 0x1000>;
+        clocks = <&icssg0_iepclk_mux>;
+    };

From b12056278378c4fb995a32a26034ac370d177d98 Mon Sep 17 00:00:00 2001
From: MD Danish Anwar <danishanwar@ti.com>
Date: Thu, 24 Aug 2023 17:16:15 +0530
Subject: [PATCH 1937/2047] dt-bindings: net: Add IEP property in ICSSG

Add IEP property in ICSSG hardware DT binding document.
ICSSG uses IEP (Industrial Ethernet Peripheral) to support timestamping
of ethernet packets, PTP and PPS.

Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/ti,icssg-prueth.yaml         | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml
index 8ec30b3eb760..311c570165f9 100644
--- a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml
+++ b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml
@@ -52,6 +52,14 @@ properties:
     description:
       phandle to MII_RT module's syscon regmap
 
+  ti,iep:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    maxItems: 2
+    items:
+      maxItems: 1
+    description:
+      phandle to IEP (Industrial Ethernet Peripheral) for ICSSG
+
   interrupts:
     maxItems: 2
     description:
@@ -155,6 +163,7 @@ examples:
                     "tx1-0", "tx1-1", "tx1-2", "tx1-3",
                     "rx0", "rx1";
         ti,mii-g-rt = <&icssg2_mii_g_rt>;
+        ti,iep = <&icssg2_iep0>, <&icssg2_iep1>;
         interrupt-parent = <&icssg2_intc>;
         interrupts = <24 0 2>, <25 1 3>;
         interrupt-names = "tx_ts0", "tx_ts1";

From c1e0230eeaab26984f6e5c5575486e96a63e2e48 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Thu, 24 Aug 2023 17:16:16 +0530
Subject: [PATCH 1938/2047] net: ti: icss-iep: Add IEP driver

Add a driver for Industrial Ethernet Peripheral (IEP) block of PRUSS to
support timestamping of ethernet packets and thus support PTP and PPS
for PRU ethernet ports.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/Kconfig          |  11 +
 drivers/net/ethernet/ti/Makefile         |   1 +
 drivers/net/ethernet/ti/icssg/icss_iep.c | 939 +++++++++++++++++++++++
 drivers/net/ethernet/ti/icssg/icss_iep.h |  38 +
 4 files changed, 989 insertions(+)
 create mode 100644 drivers/net/ethernet/ti/icssg/icss_iep.c
 create mode 100644 drivers/net/ethernet/ti/icssg/icss_iep.h

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 63e510b6860f..1af5a90720ec 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -196,4 +196,15 @@ config TI_ICSSG_PRUETH
 	  to support the Ethernet operation. Currently, it supports Ethernet
 	  with 1G and 100M link speed.
 
+config TI_ICSS_IEP
+	tristate "TI PRU ICSS IEP driver"
+	depends on TI_PRUSS
+	default TI_PRUSS
+	help
+	  This driver enables support for the PRU-ICSS Industrial Ethernet
+	  Peripheral within a PRU-ICSS subsystem present on various TI SoCs.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called icss_iep.
+
 endif # NET_VENDOR_TI
diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile
index 9176d79c36e1..34fd7a716ba6 100644
--- a/drivers/net/ethernet/ti/Makefile
+++ b/drivers/net/ethernet/ti/Makefile
@@ -38,3 +38,4 @@ icssg-prueth-y := k3-cppi-desc-pool.o \
 		  icssg/icssg_mii_cfg.o \
 		  icssg/icssg_stats.o \
 		  icssg/icssg_ethtool.o
+obj-$(CONFIG_TI_ICSS_IEP) += icssg/icss_iep.o
diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c
new file mode 100644
index 000000000000..bcc056bf45da
--- /dev/null
+++ b/drivers/net/ethernet/ti/icssg/icss_iep.c
@@ -0,0 +1,939 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Texas Instruments ICSSG Industrial Ethernet Peripheral (IEP) Driver
+ *
+ * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/timekeeping.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+
+#include "icss_iep.h"
+
+#define IEP_MAX_DEF_INC		0xf
+#define IEP_MAX_COMPEN_INC		0xfff
+#define IEP_MAX_COMPEN_COUNT	0xffffff
+
+#define IEP_GLOBAL_CFG_CNT_ENABLE	BIT(0)
+#define IEP_GLOBAL_CFG_DEFAULT_INC_MASK		GENMASK(7, 4)
+#define IEP_GLOBAL_CFG_DEFAULT_INC_SHIFT	4
+#define IEP_GLOBAL_CFG_COMPEN_INC_MASK		GENMASK(19, 8)
+#define IEP_GLOBAL_CFG_COMPEN_INC_SHIFT		8
+
+#define IEP_GLOBAL_STATUS_CNT_OVF	BIT(0)
+
+#define IEP_CMP_CFG_SHADOW_EN		BIT(17)
+#define IEP_CMP_CFG_CMP0_RST_CNT_EN	BIT(0)
+#define IEP_CMP_CFG_CMP_EN(cmp)		(GENMASK(16, 1) & (1 << ((cmp) + 1)))
+
+#define IEP_CMP_STATUS(cmp)		(1 << (cmp))
+
+#define IEP_SYNC_CTRL_SYNC_EN		BIT(0)
+#define IEP_SYNC_CTRL_SYNC_N_EN(n)	(GENMASK(2, 1) & (BIT(1) << (n)))
+
+#define IEP_MIN_CMP	0
+#define IEP_MAX_CMP	15
+
+#define ICSS_IEP_64BIT_COUNTER_SUPPORT		BIT(0)
+#define ICSS_IEP_SLOW_COMPEN_REG_SUPPORT	BIT(1)
+#define ICSS_IEP_SHADOW_MODE_SUPPORT		BIT(2)
+
+#define LATCH_INDEX(ts_index)			((ts_index) + 6)
+#define IEP_CAP_CFG_CAPNR_1ST_EVENT_EN(n)	BIT(LATCH_INDEX(n))
+#define IEP_CAP_CFG_CAP_ASYNC_EN(n)		BIT(LATCH_INDEX(n) + 10)
+
+enum {
+	ICSS_IEP_GLOBAL_CFG_REG,
+	ICSS_IEP_GLOBAL_STATUS_REG,
+	ICSS_IEP_COMPEN_REG,
+	ICSS_IEP_SLOW_COMPEN_REG,
+	ICSS_IEP_COUNT_REG0,
+	ICSS_IEP_COUNT_REG1,
+	ICSS_IEP_CAPTURE_CFG_REG,
+	ICSS_IEP_CAPTURE_STAT_REG,
+
+	ICSS_IEP_CAP6_RISE_REG0,
+	ICSS_IEP_CAP6_RISE_REG1,
+
+	ICSS_IEP_CAP7_RISE_REG0,
+	ICSS_IEP_CAP7_RISE_REG1,
+
+	ICSS_IEP_CMP_CFG_REG,
+	ICSS_IEP_CMP_STAT_REG,
+	ICSS_IEP_CMP0_REG0,
+	ICSS_IEP_CMP0_REG1,
+	ICSS_IEP_CMP1_REG0,
+	ICSS_IEP_CMP1_REG1,
+
+	ICSS_IEP_CMP8_REG0,
+	ICSS_IEP_CMP8_REG1,
+	ICSS_IEP_SYNC_CTRL_REG,
+	ICSS_IEP_SYNC0_STAT_REG,
+	ICSS_IEP_SYNC1_STAT_REG,
+	ICSS_IEP_SYNC_PWIDTH_REG,
+	ICSS_IEP_SYNC0_PERIOD_REG,
+	ICSS_IEP_SYNC1_DELAY_REG,
+	ICSS_IEP_SYNC_START_REG,
+	ICSS_IEP_MAX_REGS,
+};
+
+/**
+ * struct icss_iep_plat_data - Plat data to handle SoC variants
+ * @config: Regmap configuration data
+ * @reg_offs: register offsets to capture offset differences across SoCs
+ * @flags: Flags to represent IEP properties
+ */
+struct icss_iep_plat_data {
+	struct regmap_config *config;
+	u32 reg_offs[ICSS_IEP_MAX_REGS];
+	u32 flags;
+};
+
+struct icss_iep {
+	struct device *dev;
+	void __iomem *base;
+	const struct icss_iep_plat_data *plat_data;
+	struct regmap *map;
+	struct device_node *client_np;
+	unsigned long refclk_freq;
+	int clk_tick_time;	/* one refclk tick time in ns */
+	struct ptp_clock_info ptp_info;
+	struct ptp_clock *ptp_clock;
+	struct mutex ptp_clk_mutex;	/* PHC access serializer */
+	spinlock_t irq_lock; /* CMP IRQ vs icss_iep_ptp_enable access */
+	u32 def_inc;
+	s16 slow_cmp_inc;
+	u32 slow_cmp_count;
+	const struct icss_iep_clockops *ops;
+	void *clockops_data;
+	u32 cycle_time_ns;
+	u32 perout_enabled;
+	bool pps_enabled;
+	int cap_cmp_irq;
+	u64 period;
+	u32 latch_enable;
+};
+
+/**
+ * icss_iep_get_count_hi() - Get the upper 32 bit IEP counter
+ * @iep: Pointer to structure representing IEP.
+ *
+ * Return: upper 32 bit IEP counter
+ */
+int icss_iep_get_count_hi(struct icss_iep *iep)
+{
+	u32 val = 0;
+
+	if (iep && (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT))
+		val = readl(iep->base + iep->plat_data->reg_offs[ICSS_IEP_COUNT_REG1]);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(icss_iep_get_count_hi);
+
+/**
+ * icss_iep_get_count_low() - Get the lower 32 bit IEP counter
+ * @iep: Pointer to structure representing IEP.
+ *
+ * Return: lower 32 bit IEP counter
+ */
+int icss_iep_get_count_low(struct icss_iep *iep)
+{
+	u32 val = 0;
+
+	if (iep)
+		val = readl(iep->base + iep->plat_data->reg_offs[ICSS_IEP_COUNT_REG0]);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(icss_iep_get_count_low);
+
+/**
+ * icss_iep_get_ptp_clock_idx() - Get PTP clock index using IEP driver
+ * @iep: Pointer to structure representing IEP.
+ *
+ * Return: PTP clock index, -1 if not registered
+ */
+int icss_iep_get_ptp_clock_idx(struct icss_iep *iep)
+{
+	if (!iep || !iep->ptp_clock)
+		return -1;
+	return ptp_clock_index(iep->ptp_clock);
+}
+EXPORT_SYMBOL_GPL(icss_iep_get_ptp_clock_idx);
+
+static void icss_iep_set_counter(struct icss_iep *iep, u64 ns)
+{
+	if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT)
+		writel(upper_32_bits(ns), iep->base +
+		       iep->plat_data->reg_offs[ICSS_IEP_COUNT_REG1]);
+	writel(upper_32_bits(ns), iep->base + iep->plat_data->reg_offs[ICSS_IEP_COUNT_REG0]);
+}
+
+static void icss_iep_update_to_next_boundary(struct icss_iep *iep, u64 start_ns);
+
+/**
+ * icss_iep_settime() - Set time of the PTP clock using IEP driver
+ * @iep: Pointer to structure representing IEP.
+ * @ns: Time to be set in nanoseconds
+ *
+ * This API uses writel() instead of regmap_write() for write operations as
+ * regmap_write() is too slow and this API is time sensitive.
+ */
+static void icss_iep_settime(struct icss_iep *iep, u64 ns)
+{
+	unsigned long flags;
+
+	if (iep->ops && iep->ops->settime) {
+		iep->ops->settime(iep->clockops_data, ns);
+		return;
+	}
+
+	spin_lock_irqsave(&iep->irq_lock, flags);
+	if (iep->pps_enabled || iep->perout_enabled)
+		writel(0, iep->base + iep->plat_data->reg_offs[ICSS_IEP_SYNC_CTRL_REG]);
+
+	icss_iep_set_counter(iep, ns);
+
+	if (iep->pps_enabled || iep->perout_enabled) {
+		icss_iep_update_to_next_boundary(iep, ns);
+		writel(IEP_SYNC_CTRL_SYNC_N_EN(0) | IEP_SYNC_CTRL_SYNC_EN,
+		       iep->base + iep->plat_data->reg_offs[ICSS_IEP_SYNC_CTRL_REG]);
+	}
+	spin_unlock_irqrestore(&iep->irq_lock, flags);
+}
+
+/**
+ * icss_iep_gettime() - Get time of the PTP clock using IEP driver
+ * @iep: Pointer to structure representing IEP.
+ * @sts: Pointer to structure representing PTP system timestamp.
+ *
+ * This API uses readl() instead of regmap_read() for read operations as
+ * regmap_read() is too slow and this API is time sensitive.
+ *
+ * Return: The current timestamp of the PTP clock using IEP driver
+ */
+static u64 icss_iep_gettime(struct icss_iep *iep,
+			    struct ptp_system_timestamp *sts)
+{
+	u32 ts_hi = 0, ts_lo;
+	unsigned long flags;
+
+	if (iep->ops && iep->ops->gettime)
+		return iep->ops->gettime(iep->clockops_data, sts);
+
+	/* use local_irq_x() to make it work for both RT/non-RT */
+	local_irq_save(flags);
+
+	/* no need to play with hi-lo, hi is latched when lo is read */
+	ptp_read_system_prets(sts);
+	ts_lo = readl(iep->base + iep->plat_data->reg_offs[ICSS_IEP_COUNT_REG0]);
+	ptp_read_system_postts(sts);
+	if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT)
+		ts_hi = readl(iep->base + iep->plat_data->reg_offs[ICSS_IEP_COUNT_REG1]);
+
+	local_irq_restore(flags);
+
+	return (u64)ts_lo | (u64)ts_hi << 32;
+}
+
+static void icss_iep_enable(struct icss_iep *iep)
+{
+	regmap_update_bits(iep->map, ICSS_IEP_GLOBAL_CFG_REG,
+			   IEP_GLOBAL_CFG_CNT_ENABLE,
+			   IEP_GLOBAL_CFG_CNT_ENABLE);
+}
+
+static void icss_iep_disable(struct icss_iep *iep)
+{
+	regmap_update_bits(iep->map, ICSS_IEP_GLOBAL_CFG_REG,
+			   IEP_GLOBAL_CFG_CNT_ENABLE,
+			   0);
+}
+
+static void icss_iep_enable_shadow_mode(struct icss_iep *iep)
+{
+	u32 cycle_time;
+	int cmp;
+
+	cycle_time = iep->cycle_time_ns - iep->def_inc;
+
+	icss_iep_disable(iep);
+
+	/* disable shadow mode */
+	regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG,
+			   IEP_CMP_CFG_SHADOW_EN, 0);
+
+	/* enable shadow mode */
+	regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG,
+			   IEP_CMP_CFG_SHADOW_EN, IEP_CMP_CFG_SHADOW_EN);
+
+	/* clear counters */
+	icss_iep_set_counter(iep, 0);
+
+	/* clear overflow status */
+	regmap_update_bits(iep->map, ICSS_IEP_GLOBAL_STATUS_REG,
+			   IEP_GLOBAL_STATUS_CNT_OVF,
+			   IEP_GLOBAL_STATUS_CNT_OVF);
+
+	/* clear compare status */
+	for (cmp = IEP_MIN_CMP; cmp < IEP_MAX_CMP; cmp++) {
+		regmap_update_bits(iep->map, ICSS_IEP_CMP_STAT_REG,
+				   IEP_CMP_STATUS(cmp), IEP_CMP_STATUS(cmp));
+	}
+
+	/* enable reset counter on CMP0 event */
+	regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG,
+			   IEP_CMP_CFG_CMP0_RST_CNT_EN,
+			   IEP_CMP_CFG_CMP0_RST_CNT_EN);
+	/* enable compare */
+	regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG,
+			   IEP_CMP_CFG_CMP_EN(0),
+			   IEP_CMP_CFG_CMP_EN(0));
+
+	/* set CMP0 value to cycle time */
+	regmap_write(iep->map, ICSS_IEP_CMP0_REG0, cycle_time);
+	if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT)
+		regmap_write(iep->map, ICSS_IEP_CMP0_REG1, cycle_time);
+
+	icss_iep_set_counter(iep, 0);
+	icss_iep_enable(iep);
+}
+
+static void icss_iep_set_default_inc(struct icss_iep *iep, u8 def_inc)
+{
+	regmap_update_bits(iep->map, ICSS_IEP_GLOBAL_CFG_REG,
+			   IEP_GLOBAL_CFG_DEFAULT_INC_MASK,
+			   def_inc << IEP_GLOBAL_CFG_DEFAULT_INC_SHIFT);
+}
+
+static void icss_iep_set_compensation_inc(struct icss_iep *iep, u16 compen_inc)
+{
+	struct device *dev = regmap_get_device(iep->map);
+
+	if (compen_inc > IEP_MAX_COMPEN_INC) {
+		dev_err(dev, "%s: too high compensation inc %d\n",
+			__func__, compen_inc);
+		compen_inc = IEP_MAX_COMPEN_INC;
+	}
+
+	regmap_update_bits(iep->map, ICSS_IEP_GLOBAL_CFG_REG,
+			   IEP_GLOBAL_CFG_COMPEN_INC_MASK,
+			   compen_inc << IEP_GLOBAL_CFG_COMPEN_INC_SHIFT);
+}
+
+static void icss_iep_set_compensation_count(struct icss_iep *iep,
+					    u32 compen_count)
+{
+	struct device *dev = regmap_get_device(iep->map);
+
+	if (compen_count > IEP_MAX_COMPEN_COUNT) {
+		dev_err(dev, "%s: too high compensation count %d\n",
+			__func__, compen_count);
+		compen_count = IEP_MAX_COMPEN_COUNT;
+	}
+
+	regmap_write(iep->map, ICSS_IEP_COMPEN_REG, compen_count);
+}
+
+static void icss_iep_set_slow_compensation_count(struct icss_iep *iep,
+						 u32 compen_count)
+{
+	regmap_write(iep->map, ICSS_IEP_SLOW_COMPEN_REG, compen_count);
+}
+
+/* PTP PHC operations */
+static int icss_iep_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct icss_iep *iep = container_of(ptp, struct icss_iep, ptp_info);
+	s32 ppb = scaled_ppm_to_ppb(scaled_ppm);
+	u32 cyc_count;
+	u16 cmp_inc;
+
+	mutex_lock(&iep->ptp_clk_mutex);
+
+	/* ppb is amount of frequency we want to adjust in 1GHz (billion)
+	 * e.g. 100ppb means we need to speed up clock by 100Hz
+	 * i.e. at end of 1 second (1 billion ns) clock time, we should be
+	 * counting 100 more ns.
+	 * We use IEP slow compensation to achieve continuous freq. adjustment.
+	 * There are 2 parts. Cycle time and adjustment per cycle.
+	 * Simplest case would be 1 sec Cycle time. Then adjustment
+	 * pre cycle would be (def_inc + ppb) value.
+	 * Cycle time will have to be chosen based on how worse the ppb is.
+	 * e.g. smaller the ppb, cycle time has to be large.
+	 * The minimum adjustment we can do is +-1ns per cycle so let's
+	 * reduce the cycle time to get 1ns per cycle adjustment.
+	 *	1ppb = 1sec cycle time & 1ns adjust
+	 *	1000ppb = 1/1000 cycle time & 1ns adjust per cycle
+	 */
+
+	if (iep->cycle_time_ns)
+		iep->slow_cmp_inc = iep->clk_tick_time;	/* 4ns adj per cycle */
+	else
+		iep->slow_cmp_inc = 1;	/* 1ns adjust per cycle */
+
+	if (ppb < 0) {
+		iep->slow_cmp_inc = -iep->slow_cmp_inc;
+		ppb = -ppb;
+	}
+
+	cyc_count = NSEC_PER_SEC;		/* 1s cycle time @1GHz */
+	cyc_count /= ppb;		/* cycle time per ppb */
+
+	/* slow_cmp_count is decremented every clock cycle, e.g. @250MHz */
+	if (!iep->cycle_time_ns)
+		cyc_count /= iep->clk_tick_time;
+	iep->slow_cmp_count = cyc_count;
+
+	/* iep->clk_tick_time is def_inc */
+	cmp_inc = iep->clk_tick_time + iep->slow_cmp_inc;
+	icss_iep_set_compensation_inc(iep, cmp_inc);
+	icss_iep_set_slow_compensation_count(iep, iep->slow_cmp_count);
+
+	mutex_unlock(&iep->ptp_clk_mutex);
+
+	return 0;
+}
+
+static int icss_iep_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct icss_iep *iep = container_of(ptp, struct icss_iep, ptp_info);
+	s64 ns;
+
+	mutex_lock(&iep->ptp_clk_mutex);
+	if (iep->ops && iep->ops->adjtime) {
+		iep->ops->adjtime(iep->clockops_data, delta);
+	} else {
+		ns = icss_iep_gettime(iep, NULL);
+		ns += delta;
+		icss_iep_settime(iep, ns);
+	}
+	mutex_unlock(&iep->ptp_clk_mutex);
+
+	return 0;
+}
+
+static int icss_iep_ptp_gettimeex(struct ptp_clock_info *ptp,
+				  struct timespec64 *ts,
+				  struct ptp_system_timestamp *sts)
+{
+	struct icss_iep *iep = container_of(ptp, struct icss_iep, ptp_info);
+	u64 ns;
+
+	mutex_lock(&iep->ptp_clk_mutex);
+	ns = icss_iep_gettime(iep, sts);
+	*ts = ns_to_timespec64(ns);
+	mutex_unlock(&iep->ptp_clk_mutex);
+
+	return 0;
+}
+
+static int icss_iep_ptp_settime(struct ptp_clock_info *ptp,
+				const struct timespec64 *ts)
+{
+	struct icss_iep *iep = container_of(ptp, struct icss_iep, ptp_info);
+	u64 ns;
+
+	mutex_lock(&iep->ptp_clk_mutex);
+	ns = timespec64_to_ns(ts);
+	icss_iep_settime(iep, ns);
+	mutex_unlock(&iep->ptp_clk_mutex);
+
+	return 0;
+}
+
+static void icss_iep_update_to_next_boundary(struct icss_iep *iep, u64 start_ns)
+{
+	u64 ns, p_ns;
+	u32 offset;
+
+	ns = icss_iep_gettime(iep, NULL);
+	if (start_ns < ns)
+		start_ns = ns;
+	p_ns = iep->period;
+	/* Round up to next period boundary */
+	start_ns += p_ns - 1;
+	offset = do_div(start_ns, p_ns);
+	start_ns = start_ns * p_ns;
+	/* If it is too close to update, shift to next boundary */
+	if (p_ns - offset < 10)
+		start_ns += p_ns;
+
+	regmap_write(iep->map, ICSS_IEP_CMP1_REG0, lower_32_bits(start_ns));
+	if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT)
+		regmap_write(iep->map, ICSS_IEP_CMP1_REG1, upper_32_bits(start_ns));
+}
+
+static int icss_iep_perout_enable_hw(struct icss_iep *iep,
+				     struct ptp_perout_request *req, int on)
+{
+	int ret;
+	u64 cmp;
+
+	if (iep->ops && iep->ops->perout_enable) {
+		ret = iep->ops->perout_enable(iep->clockops_data, req, on, &cmp);
+		if (ret)
+			return ret;
+
+		if (on) {
+			/* Configure CMP */
+			regmap_write(iep->map, ICSS_IEP_CMP1_REG0, lower_32_bits(cmp));
+			if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT)
+				regmap_write(iep->map, ICSS_IEP_CMP1_REG1, upper_32_bits(cmp));
+			/* Configure SYNC, 1ms pulse width */
+			regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, 1000000);
+			regmap_write(iep->map, ICSS_IEP_SYNC0_PERIOD_REG, 0);
+			regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, 0);
+			regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); /* one-shot mode */
+			/* Enable CMP 1 */
+			regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG,
+					   IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1));
+		} else {
+			/* Disable CMP 1 */
+			regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG,
+					   IEP_CMP_CFG_CMP_EN(1), 0);
+
+			/* clear regs */
+			regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0);
+			if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT)
+				regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0);
+		}
+	} else {
+		if (on) {
+			u64 start_ns;
+
+			iep->period = ((u64)req->period.sec * NSEC_PER_SEC) +
+				      req->period.nsec;
+			start_ns = ((u64)req->period.sec * NSEC_PER_SEC)
+				   + req->period.nsec;
+			icss_iep_update_to_next_boundary(iep, start_ns);
+
+			/* Enable Sync in single shot mode  */
+			regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG,
+				     IEP_SYNC_CTRL_SYNC_N_EN(0) | IEP_SYNC_CTRL_SYNC_EN);
+			/* Enable CMP 1 */
+			regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG,
+					   IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1));
+		} else {
+			/* Disable CMP 1 */
+			regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG,
+					   IEP_CMP_CFG_CMP_EN(1), 0);
+
+			/* clear CMP regs */
+			regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0);
+			if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT)
+				regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0);
+
+			/* Disable sync */
+			regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0);
+		}
+	}
+
+	return 0;
+}
+
+static int icss_iep_perout_enable(struct icss_iep *iep,
+				  struct ptp_perout_request *req, int on)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	mutex_lock(&iep->ptp_clk_mutex);
+
+	if (iep->pps_enabled) {
+		ret = -EBUSY;
+		goto exit;
+	}
+
+	if (iep->perout_enabled == !!on)
+		goto exit;
+
+	spin_lock_irqsave(&iep->irq_lock, flags);
+	ret = icss_iep_perout_enable_hw(iep, req, on);
+	if (!ret)
+		iep->perout_enabled = !!on;
+	spin_unlock_irqrestore(&iep->irq_lock, flags);
+
+exit:
+	mutex_unlock(&iep->ptp_clk_mutex);
+
+	return ret;
+}
+
+static int icss_iep_pps_enable(struct icss_iep *iep, int on)
+{
+	struct ptp_clock_request rq;
+	struct timespec64 ts;
+	unsigned long flags;
+	int ret = 0;
+	u64 ns;
+
+	mutex_lock(&iep->ptp_clk_mutex);
+
+	if (iep->perout_enabled) {
+		ret = -EBUSY;
+		goto exit;
+	}
+
+	if (iep->pps_enabled == !!on)
+		goto exit;
+
+	spin_lock_irqsave(&iep->irq_lock, flags);
+
+	rq.perout.index = 0;
+	if (on) {
+		ns = icss_iep_gettime(iep, NULL);
+		ts = ns_to_timespec64(ns);
+		rq.perout.period.sec = 1;
+		rq.perout.period.nsec = 0;
+		rq.perout.start.sec = ts.tv_sec + 2;
+		rq.perout.start.nsec = 0;
+		ret = icss_iep_perout_enable_hw(iep, &rq.perout, on);
+	} else {
+		ret = icss_iep_perout_enable_hw(iep, &rq.perout, on);
+	}
+
+	if (!ret)
+		iep->pps_enabled = !!on;
+
+	spin_unlock_irqrestore(&iep->irq_lock, flags);
+
+exit:
+	mutex_unlock(&iep->ptp_clk_mutex);
+
+	return ret;
+}
+
+static int icss_iep_extts_enable(struct icss_iep *iep, u32 index, int on)
+{
+	u32 val, cap, ret = 0;
+
+	mutex_lock(&iep->ptp_clk_mutex);
+
+	if (iep->ops && iep->ops->extts_enable) {
+		ret = iep->ops->extts_enable(iep->clockops_data, index, on);
+		goto exit;
+	}
+
+	if (((iep->latch_enable & BIT(index)) >> index) == on)
+		goto exit;
+
+	regmap_read(iep->map, ICSS_IEP_CAPTURE_CFG_REG, &val);
+	cap = IEP_CAP_CFG_CAP_ASYNC_EN(index) | IEP_CAP_CFG_CAPNR_1ST_EVENT_EN(index);
+	if (on) {
+		val |= cap;
+		iep->latch_enable |= BIT(index);
+	} else {
+		val &= ~cap;
+		iep->latch_enable &= ~BIT(index);
+	}
+	regmap_write(iep->map, ICSS_IEP_CAPTURE_CFG_REG, val);
+
+exit:
+	mutex_unlock(&iep->ptp_clk_mutex);
+
+	return ret;
+}
+
+static int icss_iep_ptp_enable(struct ptp_clock_info *ptp,
+			       struct ptp_clock_request *rq, int on)
+{
+	struct icss_iep *iep = container_of(ptp, struct icss_iep, ptp_info);
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_PEROUT:
+		return icss_iep_perout_enable(iep, &rq->perout, on);
+	case PTP_CLK_REQ_PPS:
+		return icss_iep_pps_enable(iep, on);
+	case PTP_CLK_REQ_EXTTS:
+		return icss_iep_extts_enable(iep, rq->extts.index, on);
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static struct ptp_clock_info icss_iep_ptp_info = {
+	.owner		= THIS_MODULE,
+	.name		= "ICSS IEP timer",
+	.max_adj	= 10000000,
+	.adjfine	= icss_iep_ptp_adjfine,
+	.adjtime	= icss_iep_ptp_adjtime,
+	.gettimex64	= icss_iep_ptp_gettimeex,
+	.settime64	= icss_iep_ptp_settime,
+	.enable		= icss_iep_ptp_enable,
+};
+
+struct icss_iep *icss_iep_get_idx(struct device_node *np, int idx)
+{
+	struct platform_device *pdev;
+	struct device_node *iep_np;
+	struct icss_iep *iep;
+
+	iep_np = of_parse_phandle(np, "ti,iep", idx);
+	if (!iep_np || !of_device_is_available(iep_np))
+		return ERR_PTR(-ENODEV);
+
+	pdev = of_find_device_by_node(iep_np);
+	of_node_put(iep_np);
+
+	if (!pdev)
+		/* probably IEP not yet probed */
+		return ERR_PTR(-EPROBE_DEFER);
+
+	iep = platform_get_drvdata(pdev);
+	if (!iep)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	device_lock(iep->dev);
+	if (iep->client_np) {
+		device_unlock(iep->dev);
+		dev_err(iep->dev, "IEP is already acquired by %s",
+			iep->client_np->name);
+		return ERR_PTR(-EBUSY);
+	}
+	iep->client_np = np;
+	device_unlock(iep->dev);
+	get_device(iep->dev);
+
+	return iep;
+}
+EXPORT_SYMBOL_GPL(icss_iep_get_idx);
+
+struct icss_iep *icss_iep_get(struct device_node *np)
+{
+	return icss_iep_get_idx(np, 0);
+}
+EXPORT_SYMBOL_GPL(icss_iep_get);
+
+void icss_iep_put(struct icss_iep *iep)
+{
+	device_lock(iep->dev);
+	iep->client_np = NULL;
+	device_unlock(iep->dev);
+	put_device(iep->dev);
+}
+EXPORT_SYMBOL_GPL(icss_iep_put);
+
+int icss_iep_init(struct icss_iep *iep, const struct icss_iep_clockops *clkops,
+		  void *clockops_data, u32 cycle_time_ns)
+{
+	int ret = 0;
+
+	iep->cycle_time_ns = cycle_time_ns;
+	iep->clk_tick_time = iep->def_inc;
+	iep->ops = clkops;
+	iep->clockops_data = clockops_data;
+	icss_iep_set_default_inc(iep, iep->def_inc);
+	icss_iep_set_compensation_inc(iep, iep->def_inc);
+	icss_iep_set_compensation_count(iep, 0);
+	regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, iep->refclk_freq / 10); /* 100 ms pulse */
+	regmap_write(iep->map, ICSS_IEP_SYNC0_PERIOD_REG, 0);
+	if (iep->plat_data->flags & ICSS_IEP_SLOW_COMPEN_REG_SUPPORT)
+		icss_iep_set_slow_compensation_count(iep, 0);
+
+	if (!(iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) ||
+	    !(iep->plat_data->flags & ICSS_IEP_SLOW_COMPEN_REG_SUPPORT))
+		goto skip_perout;
+
+	if (iep->ops && iep->ops->perout_enable) {
+		iep->ptp_info.n_per_out = 1;
+		iep->ptp_info.pps = 1;
+	}
+
+	if (iep->ops && iep->ops->extts_enable)
+		iep->ptp_info.n_ext_ts = 2;
+
+skip_perout:
+	if (cycle_time_ns)
+		icss_iep_enable_shadow_mode(iep);
+	else
+		icss_iep_enable(iep);
+	icss_iep_settime(iep, ktime_get_real_ns());
+
+	iep->ptp_clock = ptp_clock_register(&iep->ptp_info, iep->dev);
+	if (IS_ERR(iep->ptp_clock)) {
+		ret = PTR_ERR(iep->ptp_clock);
+		iep->ptp_clock = NULL;
+		dev_err(iep->dev, "Failed to register ptp clk %d\n", ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(icss_iep_init);
+
+int icss_iep_exit(struct icss_iep *iep)
+{
+	if (iep->ptp_clock) {
+		ptp_clock_unregister(iep->ptp_clock);
+		iep->ptp_clock = NULL;
+	}
+	icss_iep_disable(iep);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(icss_iep_exit);
+
+static int icss_iep_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct icss_iep *iep;
+	struct clk *iep_clk;
+
+	iep = devm_kzalloc(dev, sizeof(*iep), GFP_KERNEL);
+	if (!iep)
+		return -ENOMEM;
+
+	iep->dev = dev;
+	iep->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(iep->base))
+		return -ENODEV;
+
+	iep_clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(iep_clk))
+		return PTR_ERR(iep_clk);
+
+	iep->refclk_freq = clk_get_rate(iep_clk);
+
+	iep->def_inc = NSEC_PER_SEC / iep->refclk_freq;	/* ns per clock tick */
+	if (iep->def_inc > IEP_MAX_DEF_INC) {
+		dev_err(dev, "Failed to set def_inc %d.  IEP_clock is too slow to be supported\n",
+			iep->def_inc);
+		return -EINVAL;
+	}
+
+	iep->plat_data = device_get_match_data(dev);
+	if (!iep->plat_data)
+		return -EINVAL;
+
+	iep->map = devm_regmap_init(dev, NULL, iep, iep->plat_data->config);
+	if (IS_ERR(iep->map)) {
+		dev_err(dev, "Failed to create regmap for IEP %ld\n",
+			PTR_ERR(iep->map));
+		return PTR_ERR(iep->map);
+	}
+
+	iep->ptp_info = icss_iep_ptp_info;
+	mutex_init(&iep->ptp_clk_mutex);
+	spin_lock_init(&iep->irq_lock);
+	dev_set_drvdata(dev, iep);
+	icss_iep_disable(iep);
+
+	return 0;
+}
+
+static bool am654_icss_iep_valid_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case ICSS_IEP_GLOBAL_CFG_REG ... ICSS_IEP_SYNC_START_REG:
+		return true;
+	default:
+		return false;
+	}
+
+	return false;
+}
+
+static int icss_iep_regmap_write(void *context, unsigned int reg,
+				 unsigned int val)
+{
+	struct icss_iep *iep = context;
+
+	writel(val, iep->base + iep->plat_data->reg_offs[reg]);
+
+	return 0;
+}
+
+static int icss_iep_regmap_read(void *context, unsigned int reg,
+				unsigned int *val)
+{
+	struct icss_iep *iep = context;
+
+	*val = readl(iep->base + iep->plat_data->reg_offs[reg]);
+
+	return 0;
+}
+
+static struct regmap_config am654_icss_iep_regmap_config = {
+	.name = "icss iep",
+	.reg_stride = 1,
+	.reg_write = icss_iep_regmap_write,
+	.reg_read = icss_iep_regmap_read,
+	.writeable_reg = am654_icss_iep_valid_reg,
+	.readable_reg = am654_icss_iep_valid_reg,
+	.fast_io = 1,
+};
+
+static const struct icss_iep_plat_data am654_icss_iep_plat_data = {
+	.flags = ICSS_IEP_64BIT_COUNTER_SUPPORT |
+		 ICSS_IEP_SLOW_COMPEN_REG_SUPPORT |
+		 ICSS_IEP_SHADOW_MODE_SUPPORT,
+	.reg_offs = {
+		[ICSS_IEP_GLOBAL_CFG_REG] = 0x00,
+		[ICSS_IEP_COMPEN_REG] = 0x08,
+		[ICSS_IEP_SLOW_COMPEN_REG] = 0x0C,
+		[ICSS_IEP_COUNT_REG0] = 0x10,
+		[ICSS_IEP_COUNT_REG1] = 0x14,
+		[ICSS_IEP_CAPTURE_CFG_REG] = 0x18,
+		[ICSS_IEP_CAPTURE_STAT_REG] = 0x1c,
+
+		[ICSS_IEP_CAP6_RISE_REG0] = 0x50,
+		[ICSS_IEP_CAP6_RISE_REG1] = 0x54,
+
+		[ICSS_IEP_CAP7_RISE_REG0] = 0x60,
+		[ICSS_IEP_CAP7_RISE_REG1] = 0x64,
+
+		[ICSS_IEP_CMP_CFG_REG] = 0x70,
+		[ICSS_IEP_CMP_STAT_REG] = 0x74,
+		[ICSS_IEP_CMP0_REG0] = 0x78,
+		[ICSS_IEP_CMP0_REG1] = 0x7c,
+		[ICSS_IEP_CMP1_REG0] = 0x80,
+		[ICSS_IEP_CMP1_REG1] = 0x84,
+
+		[ICSS_IEP_CMP8_REG0] = 0xc0,
+		[ICSS_IEP_CMP8_REG1] = 0xc4,
+		[ICSS_IEP_SYNC_CTRL_REG] = 0x180,
+		[ICSS_IEP_SYNC0_STAT_REG] = 0x188,
+		[ICSS_IEP_SYNC1_STAT_REG] = 0x18c,
+		[ICSS_IEP_SYNC_PWIDTH_REG] = 0x190,
+		[ICSS_IEP_SYNC0_PERIOD_REG] = 0x194,
+		[ICSS_IEP_SYNC1_DELAY_REG] = 0x198,
+		[ICSS_IEP_SYNC_START_REG] = 0x19c,
+	},
+	.config = &am654_icss_iep_regmap_config,
+};
+
+static const struct of_device_id icss_iep_of_match[] = {
+	{
+		.compatible = "ti,am654-icss-iep",
+		.data = &am654_icss_iep_plat_data,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, icss_iep_of_match);
+
+static struct platform_driver icss_iep_driver = {
+	.driver = {
+		.name = "icss-iep",
+		.of_match_table = icss_iep_of_match,
+	},
+	.probe = icss_iep_probe,
+};
+module_platform_driver(icss_iep_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TI ICSS IEP driver");
+MODULE_AUTHOR("Roger Quadros <rogerq@ti.com>");
+MODULE_AUTHOR("Md Danish Anwar <danishanwar@ti.com>");
diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.h b/drivers/net/ethernet/ti/icssg/icss_iep.h
new file mode 100644
index 000000000000..9eee44ae4990
--- /dev/null
+++ b/drivers/net/ethernet/ti/icssg/icss_iep.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Texas Instruments ICSSG Industrial Ethernet Peripheral (IEP) Driver
+ *
+ * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ *
+ */
+
+#ifndef __NET_TI_ICSS_IEP_H
+#define __NET_TI_ICSS_IEP_H
+
+#include <linux/mutex.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/regmap.h>
+
+struct icss_iep;
+
+/* Firmware specific clock operations */
+struct icss_iep_clockops {
+	void (*settime)(void *clockops_data, u64 ns);
+	void (*adjtime)(void *clockops_data, s64 delta);
+	u64 (*gettime)(void *clockops_data, struct ptp_system_timestamp *sts);
+	int (*perout_enable)(void *clockops_data,
+			     struct ptp_perout_request *req, int on,
+			     u64 *cmp);
+	int (*extts_enable)(void *clockops_data, u32 index, int on);
+};
+
+struct icss_iep *icss_iep_get(struct device_node *np);
+struct icss_iep *icss_iep_get_idx(struct device_node *np, int idx);
+void icss_iep_put(struct icss_iep *iep);
+int icss_iep_init(struct icss_iep *iep, const struct icss_iep_clockops *clkops,
+		  void *clockops_data, u32 cycle_time_ns);
+int icss_iep_exit(struct icss_iep *iep);
+int icss_iep_get_count_low(struct icss_iep *iep);
+int icss_iep_get_count_hi(struct icss_iep *iep);
+int icss_iep_get_ptp_clock_idx(struct icss_iep *iep);
+
+#endif /* __NET_TI_ICSS_IEP_H */

From 186734c158865747f7160b461afcabfaab8f9bca Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Thu, 24 Aug 2023 17:16:17 +0530
Subject: [PATCH 1939/2047] net: ti: icssg-prueth: add packet timestamping and
 ptp support

Add packet timestamping TS and PTP PHC clock support.

For AM65x and AM64x:
 - IEP1 is not used
 - IEP0 is configured in shadow mode with 1ms cycle and shared between
Linux and FW. It provides time and TS in number cycles, so special
conversation in ns is required.
 - IEP0 shared between PRUeth ports.
 - IEP0 supports PPS, periodic output.
 - IEP0 settime() and enabling PPS required FW interraction.
 - RX TS provided with each packet in CPPI5 descriptor.
 - TX TS returned through separate ICSSG hw queues for each port. TX TS
readiness is signaled by INTC IRQ. Only one packet at time can be requested
for TX TS.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Co-developed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/Kconfig               |   1 +
 drivers/net/ethernet/ti/icssg/icss_iep.h      |   1 +
 drivers/net/ethernet/ti/icssg/icssg_ethtool.c |  21 +
 drivers/net/ethernet/ti/icssg/icssg_prueth.c  | 424 +++++++++++++++++-
 drivers/net/ethernet/ti/icssg/icssg_prueth.h  |  26 +-
 5 files changed, 467 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 1af5a90720ec..88b5b1b47779 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -186,6 +186,7 @@ config CPMAC
 config TI_ICSSG_PRUETH
 	tristate "TI Gigabit PRU Ethernet driver"
 	select PHYLIB
+	select TI_ICSS_IEP
 	depends on PRU_REMOTEPROC
 	depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER
 	help
diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.h b/drivers/net/ethernet/ti/icssg/icss_iep.h
index 9eee44ae4990..9c7f4d0a0916 100644
--- a/drivers/net/ethernet/ti/icssg/icss_iep.h
+++ b/drivers/net/ethernet/ti/icssg/icss_iep.h
@@ -13,6 +13,7 @@
 #include <linux/regmap.h>
 
 struct icss_iep;
+extern const struct icss_iep_clockops prueth_iep_clockops;
 
 /* Firmware specific clock operations */
 struct icss_iep_clockops {
diff --git a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c
index 02c312f01d10..a27ec1dcc8d5 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c
@@ -109,6 +109,26 @@ static void emac_get_ethtool_stats(struct net_device *ndev,
 			*(data++) = emac->stats[i];
 }
 
+static int emac_get_ts_info(struct net_device *ndev,
+			    struct ethtool_ts_info *info)
+{
+	struct prueth_emac *emac = netdev_priv(ndev);
+
+	info->so_timestamping =
+		SOF_TIMESTAMPING_TX_HARDWARE |
+		SOF_TIMESTAMPING_TX_SOFTWARE |
+		SOF_TIMESTAMPING_RX_HARDWARE |
+		SOF_TIMESTAMPING_RX_SOFTWARE |
+		SOF_TIMESTAMPING_SOFTWARE |
+		SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	info->phc_index = icss_iep_get_ptp_clock_idx(emac->iep);
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON);
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
+
+	return 0;
+}
+
 static int emac_set_channels(struct net_device *ndev,
 			     struct ethtool_channels *ch)
 {
@@ -176,6 +196,7 @@ const struct ethtool_ops icssg_ethtool_ops = {
 	.get_sset_count = emac_get_sset_count,
 	.get_ethtool_stats = emac_get_ethtool_stats,
 	.get_strings = emac_get_strings,
+	.get_ts_info = emac_get_ts_info,
 	.get_channels = emac_get_channels,
 	.set_channels = emac_set_channels,
 	.get_link_ksettings = emac_get_link_ksettings,
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 47b941fb0198..1bcb4e174652 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -56,6 +56,8 @@
 /* CTRLMMR_ICSSG_RGMII_CTRL register bits */
 #define ICSSG_CTRL_RGMII_ID_MODE                BIT(24)
 
+#define IEP_DEFAULT_CYCLE_TIME_NS	1000000	/* 1 ms */
+
 static void prueth_cleanup_rx_chns(struct prueth_emac *emac,
 				   struct prueth_rx_chn *rx_chn,
 				   int max_rflows)
@@ -471,6 +473,37 @@ static int prueth_dma_rx_push(struct prueth_emac *emac,
 					desc_rx, desc_dma);
 }
 
+static u64 icssg_ts_to_ns(u32 hi_sw, u32 hi, u32 lo, u32 cycle_time_ns)
+{
+	u32 iepcount_lo, iepcount_hi, hi_rollover_count;
+	u64 ns;
+
+	iepcount_lo = lo & GENMASK(19, 0);
+	iepcount_hi = (hi & GENMASK(11, 0)) << 12 | lo >> 20;
+	hi_rollover_count = hi >> 11;
+
+	ns = ((u64)hi_rollover_count) << 23 | (iepcount_hi + hi_sw);
+	ns = ns * cycle_time_ns + iepcount_lo;
+
+	return ns;
+}
+
+static void emac_rx_timestamp(struct prueth_emac *emac,
+			      struct sk_buff *skb, u32 *psdata)
+{
+	struct skb_shared_hwtstamps *ssh;
+	u64 ns;
+
+	u32 hi_sw = readl(emac->prueth->shram.va +
+			  TIMESYNC_FW_WC_COUNT_HI_SW_OFFSET_OFFSET);
+	ns = icssg_ts_to_ns(hi_sw, psdata[1], psdata[0],
+			    IEP_DEFAULT_CYCLE_TIME_NS);
+
+	ssh = skb_hwtstamps(skb);
+	memset(ssh, 0, sizeof(*ssh));
+	ssh->hwtstamp = ns_to_ktime(ns);
+}
+
 static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
 {
 	struct prueth_rx_chn *rx_chn = &emac->rx_chns;
@@ -480,6 +513,7 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
 	struct sk_buff *skb, *new_skb;
 	dma_addr_t desc_dma, buf_dma;
 	void **swdata;
+	u32 *psdata;
 	int ret;
 
 	ret = k3_udma_glue_pop_rx_chn(rx_chn->rx_chn, flow_id, &desc_dma);
@@ -497,6 +531,11 @@ static int emac_rx_packet(struct prueth_emac *emac, u32 flow_id)
 	swdata = cppi5_hdesc_get_swdata(desc_rx);
 	skb = *swdata;
 
+	psdata = cppi5_hdesc_get_psdata(desc_rx);
+	/* RX HW timestamp */
+	if (emac->rx_ts_enabled)
+		emac_rx_timestamp(emac, skb, psdata);
+
 	cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len);
 	k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma);
 	pkt_len = cppi5_hdesc_get_pktlen(desc_rx);
@@ -557,6 +596,86 @@ static void prueth_rx_cleanup(void *data, dma_addr_t desc_dma)
 	dev_kfree_skb_any(skb);
 }
 
+static int emac_get_tx_ts(struct prueth_emac *emac,
+			  struct emac_tx_ts_response *rsp)
+{
+	struct prueth *prueth = emac->prueth;
+	int slice = prueth_emac_slice(emac);
+	int addr;
+
+	addr = icssg_queue_pop(prueth, slice == 0 ?
+			       ICSSG_TS_POP_SLICE0 : ICSSG_TS_POP_SLICE1);
+	if (addr < 0)
+		return addr;
+
+	memcpy_fromio(rsp, prueth->shram.va + addr, sizeof(*rsp));
+	/* return buffer back for to pool */
+	icssg_queue_push(prueth, slice == 0 ?
+			 ICSSG_TS_PUSH_SLICE0 : ICSSG_TS_PUSH_SLICE1, addr);
+
+	return 0;
+}
+
+static void tx_ts_work(struct prueth_emac *emac)
+{
+	struct skb_shared_hwtstamps ssh;
+	struct emac_tx_ts_response tsr;
+	struct sk_buff *skb;
+	int ret = 0;
+	u32 hi_sw;
+	u64 ns;
+
+	/* There may be more than one pending requests */
+	while (1) {
+		ret = emac_get_tx_ts(emac, &tsr);
+		if (ret) /* nothing more */
+			break;
+
+		if (tsr.cookie >= PRUETH_MAX_TX_TS_REQUESTS ||
+		    !emac->tx_ts_skb[tsr.cookie]) {
+			netdev_err(emac->ndev, "Invalid TX TS cookie 0x%x\n",
+				   tsr.cookie);
+			break;
+		}
+
+		skb = emac->tx_ts_skb[tsr.cookie];
+		emac->tx_ts_skb[tsr.cookie] = NULL;	/* free slot */
+		if (!skb) {
+			netdev_err(emac->ndev, "Driver Bug! got NULL skb\n");
+			break;
+		}
+
+		hi_sw = readl(emac->prueth->shram.va +
+			      TIMESYNC_FW_WC_COUNT_HI_SW_OFFSET_OFFSET);
+		ns = icssg_ts_to_ns(hi_sw, tsr.hi_ts, tsr.lo_ts,
+				    IEP_DEFAULT_CYCLE_TIME_NS);
+
+		memset(&ssh, 0, sizeof(ssh));
+		ssh.hwtstamp = ns_to_ktime(ns);
+
+		skb_tstamp_tx(skb, &ssh);
+		dev_consume_skb_any(skb);
+
+		if (atomic_dec_and_test(&emac->tx_ts_pending))	/* no more? */
+			break;
+	}
+}
+
+static int prueth_tx_ts_cookie_get(struct prueth_emac *emac)
+{
+	int i;
+
+	/* search and get the next free slot */
+	for (i = 0; i < PRUETH_MAX_TX_TS_REQUESTS; i++) {
+		if (!emac->tx_ts_skb[i]) {
+			emac->tx_ts_skb[i] = ERR_PTR(-EBUSY); /* reserve slot */
+			return i;
+		}
+	}
+
+	return -EBUSY;
+}
+
 /**
  * emac_ndo_start_xmit - EMAC Transmit function
  * @skb: SKB pointer
@@ -577,6 +696,8 @@ static enum netdev_tx emac_ndo_start_xmit(struct sk_buff *skb, struct net_device
 	struct prueth_tx_chn *tx_chn;
 	dma_addr_t desc_dma, buf_dma;
 	int i, ret = 0, q_idx;
+	bool in_tx_ts = 0;
+	int tx_ts_cookie;
 	void **swdata;
 	u32 pkt_len;
 	u32 *epib;
@@ -608,6 +729,18 @@ static enum netdev_tx emac_ndo_start_xmit(struct sk_buff *skb, struct net_device
 	epib = first_desc->epib;
 	epib[0] = 0;
 	epib[1] = 0;
+	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+	    emac->tx_ts_enabled) {
+		tx_ts_cookie = prueth_tx_ts_cookie_get(emac);
+		if (tx_ts_cookie >= 0) {
+			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+			/* Request TX timestamp */
+			epib[0] = (u32)tx_ts_cookie;
+			epib[1] = 0x80000000;	/* TX TS request */
+			emac->tx_ts_skb[tx_ts_cookie] = skb_get(skb);
+			in_tx_ts = 1;
+		}
+	}
 
 	/* set dst tag to indicate internal qid at the firmware which is at
 	 * bit8..bit15. bit0..bit7 indicates port num for directed
@@ -629,7 +762,7 @@ static enum netdev_tx emac_ndo_start_xmit(struct sk_buff *skb, struct net_device
 		if (!next_desc) {
 			netdev_err(ndev,
 				   "tx: failed to allocate frag. descriptor\n");
-			goto free_desc_stop_q_busy;
+			goto free_desc_stop_q_busy_cleanup_tx_ts;
 		}
 
 		buf_dma = skb_frag_dma_map(tx_chn->dma_dev, frag, 0, frag_size,
@@ -638,7 +771,7 @@ static enum netdev_tx emac_ndo_start_xmit(struct sk_buff *skb, struct net_device
 			netdev_err(ndev, "tx: Failed to map skb page\n");
 			k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc);
 			ret = NETDEV_TX_OK;
-			goto drop_free_descs;
+			goto cleanup_tx_ts;
 		}
 
 		cppi5_hdesc_reset_hbdesc(next_desc);
@@ -670,6 +803,9 @@ static enum netdev_tx emac_ndo_start_xmit(struct sk_buff *skb, struct net_device
 		goto drop_free_descs;
 	}
 
+	if (in_tx_ts)
+		atomic_inc(&emac->tx_ts_pending);
+
 	if (k3_cppi_desc_pool_avail(tx_chn->desc_pool) < MAX_SKB_FRAGS) {
 		netif_tx_stop_queue(netif_txq);
 		/* Barrier, so that stop_queue visible to other cpus */
@@ -682,6 +818,12 @@ static enum netdev_tx emac_ndo_start_xmit(struct sk_buff *skb, struct net_device
 
 	return NETDEV_TX_OK;
 
+cleanup_tx_ts:
+	if (in_tx_ts) {
+		dev_kfree_skb_any(emac->tx_ts_skb[tx_ts_cookie]);
+		emac->tx_ts_skb[tx_ts_cookie] = NULL;
+	}
+
 drop_free_descs:
 	prueth_xmit_free(tx_chn, first_desc);
 
@@ -694,7 +836,11 @@ drop_free_skb:
 
 	return ret;
 
-free_desc_stop_q_busy:
+free_desc_stop_q_busy_cleanup_tx_ts:
+	if (in_tx_ts) {
+		dev_kfree_skb_any(emac->tx_ts_skb[tx_ts_cookie]);
+		emac->tx_ts_skb[tx_ts_cookie] = NULL;
+	}
 	prueth_xmit_free(tx_chn, first_desc);
 
 drop_stop_q_busy:
@@ -717,6 +863,16 @@ static void prueth_tx_cleanup(void *data, dma_addr_t desc_dma)
 	dev_kfree_skb_any(skb);
 }
 
+static irqreturn_t prueth_tx_ts_irq(int irq, void *dev_id)
+{
+	struct prueth_emac *emac = dev_id;
+
+	/* currently only TX timestamp is being returned */
+	tx_ts_work(emac);
+
+	return IRQ_HANDLED;
+}
+
 static irqreturn_t prueth_rx_irq(int irq, void *dev_id)
 {
 	struct prueth_emac *emac = dev_id;
@@ -820,6 +976,18 @@ static void prueth_emac_stop(struct prueth_emac *emac)
 	rproc_shutdown(prueth->pru[slice]);
 }
 
+static void prueth_cleanup_tx_ts(struct prueth_emac *emac)
+{
+	int i;
+
+	for (i = 0; i < PRUETH_MAX_TX_TS_REQUESTS; i++) {
+		if (emac->tx_ts_skb[i]) {
+			dev_kfree_skb_any(emac->tx_ts_skb[i]);
+			emac->tx_ts_skb[i] = NULL;
+		}
+	}
+}
+
 /* called back by PHY layer if there is change in link state of hw port*/
 static void emac_adjust_link(struct net_device *ndev)
 {
@@ -881,6 +1049,7 @@ static void emac_adjust_link(struct net_device *ndev)
 		netif_tx_wake_all_queues(ndev);
 	} else {
 		netif_tx_stop_all_queues(ndev);
+		prueth_cleanup_tx_ts(emac);
 	}
 }
 
@@ -992,6 +1161,139 @@ static int emac_phy_connect(struct prueth_emac *emac)
 	return 0;
 }
 
+static u64 prueth_iep_gettime(void *clockops_data, struct ptp_system_timestamp *sts)
+{
+	u32 hi_rollover_count, hi_rollover_count_r;
+	struct prueth_emac *emac = clockops_data;
+	struct prueth *prueth = emac->prueth;
+	void __iomem *fw_hi_r_count_addr;
+	void __iomem *fw_count_hi_addr;
+	u32 iepcount_hi, iepcount_hi_r;
+	unsigned long flags;
+	u32 iepcount_lo;
+	u64 ts = 0;
+
+	fw_count_hi_addr = prueth->shram.va + TIMESYNC_FW_WC_COUNT_HI_SW_OFFSET_OFFSET;
+	fw_hi_r_count_addr = prueth->shram.va + TIMESYNC_FW_WC_HI_ROLLOVER_COUNT_OFFSET;
+
+	local_irq_save(flags);
+	do {
+		iepcount_hi = icss_iep_get_count_hi(emac->iep);
+		iepcount_hi += readl(fw_count_hi_addr);
+		hi_rollover_count = readl(fw_hi_r_count_addr);
+		ptp_read_system_prets(sts);
+		iepcount_lo = icss_iep_get_count_low(emac->iep);
+		ptp_read_system_postts(sts);
+
+		iepcount_hi_r = icss_iep_get_count_hi(emac->iep);
+		iepcount_hi_r += readl(fw_count_hi_addr);
+		hi_rollover_count_r = readl(fw_hi_r_count_addr);
+	} while ((iepcount_hi_r != iepcount_hi) ||
+		 (hi_rollover_count != hi_rollover_count_r));
+	local_irq_restore(flags);
+
+	ts = ((u64)hi_rollover_count) << 23 | iepcount_hi;
+	ts = ts * (u64)IEP_DEFAULT_CYCLE_TIME_NS + iepcount_lo;
+
+	return ts;
+}
+
+static void prueth_iep_settime(void *clockops_data, u64 ns)
+{
+	struct icssg_setclock_desc __iomem *sc_descp;
+	struct prueth_emac *emac = clockops_data;
+	struct icssg_setclock_desc sc_desc;
+	u64 cyclecount;
+	u32 cycletime;
+	int timeout;
+
+	if (!emac->fw_running)
+		return;
+
+	sc_descp = emac->prueth->shram.va + TIMESYNC_FW_WC_SETCLOCK_DESC_OFFSET;
+
+	cycletime = IEP_DEFAULT_CYCLE_TIME_NS;
+	cyclecount = ns / cycletime;
+
+	memset(&sc_desc, 0, sizeof(sc_desc));
+	sc_desc.margin = cycletime - 1000;
+	sc_desc.cyclecounter0_set = cyclecount & GENMASK(31, 0);
+	sc_desc.cyclecounter1_set = (cyclecount & GENMASK(63, 32)) >> 32;
+	sc_desc.iepcount_set = ns % cycletime;
+	sc_desc.CMP0_current = cycletime - 4; //Count from 0 to (cycle time)-4
+
+	memcpy_toio(sc_descp, &sc_desc, sizeof(sc_desc));
+
+	writeb(1, &sc_descp->request);
+
+	timeout = 5;	/* fw should take 2-3 ms */
+	while (timeout--) {
+		if (readb(&sc_descp->acknowledgment))
+			return;
+
+		usleep_range(500, 1000);
+	}
+
+	dev_err(emac->prueth->dev, "settime timeout\n");
+}
+
+static int prueth_perout_enable(void *clockops_data,
+				struct ptp_perout_request *req, int on,
+				u64 *cmp)
+{
+	struct prueth_emac *emac = clockops_data;
+	u32 reduction_factor = 0, offset = 0;
+	struct timespec64 ts;
+	u64 ns_period;
+
+	if (!on)
+		return 0;
+
+	/* Any firmware specific stuff for PPS/PEROUT handling */
+	ts.tv_sec = req->period.sec;
+	ts.tv_nsec = req->period.nsec;
+	ns_period = timespec64_to_ns(&ts);
+
+	/* f/w doesn't support period less than cycle time */
+	if (ns_period < IEP_DEFAULT_CYCLE_TIME_NS)
+		return -ENXIO;
+
+	reduction_factor = ns_period / IEP_DEFAULT_CYCLE_TIME_NS;
+	offset = ns_period % IEP_DEFAULT_CYCLE_TIME_NS;
+
+	/* f/w requires at least 1uS within a cycle so CMP
+	 * can trigger after SYNC is enabled
+	 */
+	if (offset < 5 * NSEC_PER_USEC)
+		offset = 5 * NSEC_PER_USEC;
+
+	/* if offset is close to cycle time then we will miss
+	 * the CMP event for last tick when IEP rolls over.
+	 * In normal mode, IEP tick is 4ns.
+	 * In slow compensation it could be 0ns or 8ns at
+	 * every slow compensation cycle.
+	 */
+	if (offset > IEP_DEFAULT_CYCLE_TIME_NS - 8)
+		offset = IEP_DEFAULT_CYCLE_TIME_NS - 8;
+
+	/* we're in shadow mode so need to set upper 32-bits */
+	*cmp = (u64)offset << 32;
+
+	writel(reduction_factor, emac->prueth->shram.va +
+		TIMESYNC_FW_WC_SYNCOUT_REDUCTION_FACTOR_OFFSET);
+
+	writel(0, emac->prueth->shram.va +
+		TIMESYNC_FW_WC_SYNCOUT_START_TIME_CYCLECOUNT_OFFSET);
+
+	return 0;
+}
+
+const struct icss_iep_clockops prueth_iep_clockops = {
+	.settime = prueth_iep_settime,
+	.gettime = prueth_iep_gettime,
+	.perout_enable = prueth_perout_enable,
+};
+
 /**
  * emac_ndo_open - EMAC device open
  * @ndev: network adapter device
@@ -1066,10 +1368,20 @@ static int emac_ndo_open(struct net_device *ndev)
 
 	icssg_mii_update_mtu(prueth->mii_rt, slice, ndev->max_mtu);
 
+	if (!prueth->emacs_initialized) {
+		ret = icss_iep_init(emac->iep, &prueth_iep_clockops,
+				    emac, IEP_DEFAULT_CYCLE_TIME_NS);
+	}
+
+	ret = request_threaded_irq(emac->tx_ts_irq, NULL, prueth_tx_ts_irq,
+				   IRQF_ONESHOT, dev_name(dev), emac);
+	if (ret)
+		goto stop;
+
 	/* Prepare RX */
 	ret = prueth_prepare_rx_chan(emac, &emac->rx_chns, PRUETH_MAX_PKT_SIZE);
 	if (ret)
-		goto stop;
+		goto free_tx_ts_irq;
 
 	ret = k3_udma_glue_enable_rx_chn(emac->rx_chns.rx_chn);
 	if (ret)
@@ -1102,6 +1414,8 @@ reset_tx_chan:
 	prueth_reset_tx_chan(emac, i, false);
 reset_rx_chn:
 	prueth_reset_rx_chan(&emac->rx_chns, max_rx_flows, false);
+free_tx_ts_irq:
+	free_irq(emac->tx_ts_irq, emac);
 stop:
 	prueth_emac_stop(emac);
 free_rx_irq:
@@ -1173,6 +1487,14 @@ static int emac_ndo_stop(struct net_device *ndev)
 	/* stop PRUs */
 	prueth_emac_stop(emac);
 
+	if (prueth->emacs_initialized == 1)
+		icss_iep_exit(emac->iep);
+
+	/* stop PRUs */
+	prueth_emac_stop(emac);
+
+	free_irq(emac->tx_ts_irq, emac);
+
 	free_irq(emac->rx_chns.irq[rx_flow], emac);
 	prueth_ndev_del_tx_napi(emac, emac->tx_ch_num);
 	prueth_cleanup_tx_chns(emac);
@@ -1235,8 +1557,79 @@ static void emac_ndo_set_rx_mode(struct net_device *ndev)
 	queue_work(emac->cmd_wq, &emac->rx_mode_work);
 }
 
+static int emac_set_ts_config(struct net_device *ndev, struct ifreq *ifr)
+{
+	struct prueth_emac *emac = netdev_priv(ndev);
+	struct hwtstamp_config config;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		emac->tx_ts_enabled = 0;
+		break;
+	case HWTSTAMP_TX_ON:
+		emac->tx_ts_enabled = 1;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		emac->rx_ts_enabled = 0;
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_NTP_ALL:
+		emac->rx_ts_enabled = 1;
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+static int emac_get_ts_config(struct net_device *ndev, struct ifreq *ifr)
+{
+	struct prueth_emac *emac = netdev_priv(ndev);
+	struct hwtstamp_config config;
+
+	config.flags = 0;
+	config.tx_type = emac->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+	config.rx_filter = emac->rx_ts_enabled ? HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE;
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+			    -EFAULT : 0;
+}
+
 static int emac_ndo_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd)
 {
+	switch (cmd) {
+	case SIOCGHWTSTAMP:
+		return emac_get_ts_config(ndev, ifr);
+	case SIOCSHWTSTAMP:
+		return emac_set_ts_config(ndev, ifr);
+	default:
+		break;
+	}
+
 	return phy_do_ioctl(ndev, ifr, cmd);
 }
 
@@ -1316,6 +1709,7 @@ static int prueth_netdev_init(struct prueth *prueth,
 	struct prueth_emac *emac;
 	struct net_device *ndev;
 	enum prueth_port port;
+	const char *irq_name;
 	enum prueth_mac mac;
 
 	port = prueth_node_port(eth_node);
@@ -1355,6 +1749,15 @@ static int prueth_netdev_init(struct prueth *prueth,
 
 	emac->tx_ch_num = 1;
 
+	irq_name = "tx_ts0";
+	if (emac->port_id == PRUETH_PORT_MII1)
+		irq_name = "tx_ts1";
+	emac->tx_ts_irq = platform_get_irq_byname_optional(prueth->pdev, irq_name);
+	if (emac->tx_ts_irq < 0) {
+		ret = dev_err_probe(prueth->dev, emac->tx_ts_irq, "could not get tx_ts_irq\n");
+		goto free;
+	}
+
 	SET_NETDEV_DEV(ndev, prueth->dev);
 	spin_lock_init(&emac->lock);
 	mutex_init(&emac->cmd_lock);
@@ -1680,6 +2083,13 @@ static int prueth_probe(struct platform_device *pdev)
 	dev_dbg(dev, "sram: pa %llx va %p size %zx\n", prueth->msmcram.pa,
 		prueth->msmcram.va, prueth->msmcram.size);
 
+	prueth->iep0 = icss_iep_get_idx(np, 0);
+	if (IS_ERR(prueth->iep0)) {
+		ret = dev_err_probe(dev, PTR_ERR(prueth->iep0), "iep0 get failed\n");
+		prueth->iep0 = NULL;
+		goto free_pool;
+	}
+
 	/* setup netdev interfaces */
 	if (eth0_node) {
 		ret = prueth_netdev_init(prueth, eth0_node);
@@ -1688,6 +2098,7 @@ static int prueth_probe(struct platform_device *pdev)
 				      eth0_node->name);
 			goto netdev_exit;
 		}
+		prueth->emac[PRUETH_MAC0]->iep = prueth->iep0;
 	}
 
 	if (eth1_node) {
@@ -1697,6 +2108,8 @@ static int prueth_probe(struct platform_device *pdev)
 				      eth1_node->name);
 			goto netdev_exit;
 		}
+
+		prueth->emac[PRUETH_MAC1]->iep = prueth->iep0;
 	}
 
 	/* register the network devices */
@@ -1754,6 +2167,7 @@ netdev_exit:
 		prueth_netdev_exit(prueth, eth_node);
 	}
 
+free_pool:
 	gen_pool_free(prueth->sram_pool,
 		      (unsigned long)prueth->msmcram.va, msmc_ram_size);
 
@@ -1798,6 +2212,8 @@ static void prueth_remove(struct platform_device *pdev)
 		prueth_netdev_exit(prueth, eth_node);
 	}
 
+	icss_iep_put(prueth->iep0);
+
 	gen_pool_free(prueth->sram_pool,
 		      (unsigned long)prueth->msmcram.va,
 		      MSMC_RAM_SIZE);
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
index a8ce4d01ef16..a56ab4cdc83c 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
@@ -35,6 +35,7 @@
 #include <net/devlink.h>
 
 #include "icssg_config.h"
+#include "icss_iep.h"
 #include "icssg_switch_map.h"
 
 #define PRUETH_MAX_MTU          (2000 - ETH_HLEN - ETH_FCS_LEN)
@@ -122,6 +123,8 @@ struct prueth_rx_chn {
  */
 #define PRUETH_MAX_TX_QUEUES	4
 
+#define PRUETH_MAX_TX_TS_REQUESTS	50 /* Max simultaneous TX_TS requests */
+
 /* data for each emac port */
 struct prueth_emac {
 	bool fw_running;
@@ -139,6 +142,9 @@ struct prueth_emac {
 	struct device_node *phy_node;
 	phy_interface_t phy_if;
 	enum prueth_port port_id;
+	struct icss_iep *iep;
+	unsigned int rx_ts_enabled : 1;
+	unsigned int tx_ts_enabled : 1;
 
 	/* DMA related */
 	struct prueth_tx_chn tx_chns[PRUETH_MAX_TX_QUEUES];
@@ -150,7 +156,15 @@ struct prueth_emac {
 
 	spinlock_t lock;	/* serialize access */
 
-	unsigned long state;
+	/* TX HW Timestamping */
+	/* TX TS cookie will be index to the tx_ts_skb array */
+	struct sk_buff *tx_ts_skb[PRUETH_MAX_TX_TS_REQUESTS];
+	atomic_t tx_ts_pending;
+	int tx_ts_irq;
+
+	u8 cmd_seq;
+	/* shutdown related */
+	u32 cmd_data[4];
 	struct completion cmd_complete;
 	/* Mutex to serialize access to firmware command interface */
 	struct mutex cmd_lock;
@@ -193,6 +207,7 @@ struct prueth_pdata {
  * @pdata: pointer to platform data for ICSSG driver
  * @icssg_hwcmdseq: seq counter or HWQ messages
  * @emacs_initialized: num of EMACs/ext ports that are up/running
+ * @iep0: pointer to IEP0 device
  */
 struct prueth {
 	struct device *dev;
@@ -214,8 +229,15 @@ struct prueth {
 	struct platform_device *pdev;
 	struct prueth_pdata pdata;
 	u8 icssg_hwcmdseq;
-
 	int emacs_initialized;
+	struct icss_iep *iep0;
+};
+
+struct emac_tx_ts_response {
+	u32 reserved[2];
+	u32 cookie;
+	u32 lo_ts;
+	u32 hi_ts;
 };
 
 /* get PRUSS SLICE number from prueth_emac */

From 443a2367ba3c96e0f35ce801bcdfb2ce91f5038c Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 24 Aug 2023 17:16:18 +0530
Subject: [PATCH 1940/2047] net: ti: icssg-prueth: am65x SR2.0 add 10M full
 duplex support

For AM65x SR2.0 it's required to enable IEP1 in raw 64bit mode which is
used by PRU FW to monitor the link and apply w/a for 10M link issue.
Note. No public errata available yet.

Without this w/a the PRU FW will stuck if link state changes under TX
traffic pressure.

Hence, add support for 10M full duplex for AM65x SR2.0:
 - add new IEP API to enable IEP, but without PTP support
 - add pdata quirk_10m_link_issue to enable 10M link issue w/a.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/icssg/icss_iep.c     | 26 +++++++++++++++++++
 drivers/net/ethernet/ti/icssg/icss_iep.h     |  2 ++
 drivers/net/ethernet/ti/icssg/icssg_config.c |  7 +++++
 drivers/net/ethernet/ti/icssg/icssg_prueth.c | 27 ++++++++++++++++++--
 drivers/net/ethernet/ti/icssg/icssg_prueth.h |  2 ++
 5 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c
index bcc056bf45da..4cf2a52e4378 100644
--- a/drivers/net/ethernet/ti/icssg/icss_iep.c
+++ b/drivers/net/ethernet/ti/icssg/icss_iep.c
@@ -727,6 +727,32 @@ void icss_iep_put(struct icss_iep *iep)
 }
 EXPORT_SYMBOL_GPL(icss_iep_put);
 
+void icss_iep_init_fw(struct icss_iep *iep)
+{
+	/* start IEP for FW use in raw 64bit mode, no PTP support */
+	iep->clk_tick_time = iep->def_inc;
+	iep->cycle_time_ns = 0;
+	iep->ops = NULL;
+	iep->clockops_data = NULL;
+	icss_iep_set_default_inc(iep, iep->def_inc);
+	icss_iep_set_compensation_inc(iep, iep->def_inc);
+	icss_iep_set_compensation_count(iep, 0);
+	regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, iep->refclk_freq / 10); /* 100 ms pulse */
+	regmap_write(iep->map, ICSS_IEP_SYNC0_PERIOD_REG, 0);
+	if (iep->plat_data->flags & ICSS_IEP_SLOW_COMPEN_REG_SUPPORT)
+		icss_iep_set_slow_compensation_count(iep, 0);
+
+	icss_iep_enable(iep);
+	icss_iep_settime(iep, 0);
+}
+EXPORT_SYMBOL_GPL(icss_iep_init_fw);
+
+void icss_iep_exit_fw(struct icss_iep *iep)
+{
+	icss_iep_disable(iep);
+}
+EXPORT_SYMBOL_GPL(icss_iep_exit_fw);
+
 int icss_iep_init(struct icss_iep *iep, const struct icss_iep_clockops *clkops,
 		  void *clockops_data, u32 cycle_time_ns)
 {
diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.h b/drivers/net/ethernet/ti/icssg/icss_iep.h
index 9c7f4d0a0916..803a4b714893 100644
--- a/drivers/net/ethernet/ti/icssg/icss_iep.h
+++ b/drivers/net/ethernet/ti/icssg/icss_iep.h
@@ -35,5 +35,7 @@ int icss_iep_exit(struct icss_iep *iep);
 int icss_iep_get_count_low(struct icss_iep *iep);
 int icss_iep_get_count_hi(struct icss_iep *iep);
 int icss_iep_get_ptp_clock_idx(struct icss_iep *iep);
+void icss_iep_init_fw(struct icss_iep *iep);
+void icss_iep_exit_fw(struct icss_iep *iep);
 
 #endif /* __NET_TI_ICSS_IEP_H */
diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c
index ab648d3efe85..933b84666574 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_config.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_config.c
@@ -210,6 +210,10 @@ void icssg_config_ipg(struct prueth_emac *emac)
 	case SPEED_100:
 		icssg_mii_update_ipg(prueth->mii_rt, slice, MII_RT_TX_IPG_100M);
 		break;
+	case SPEED_10:
+		/* IPG for 10M is same as 100M */
+		icssg_mii_update_ipg(prueth->mii_rt, slice, MII_RT_TX_IPG_100M);
+		break;
 	default:
 		/* Other links speeds not supported */
 		netdev_err(emac->ndev, "Unsupported link speed\n");
@@ -440,6 +444,9 @@ void icssg_config_set_speed(struct prueth_emac *emac)
 	case SPEED_100:
 		fw_speed = FW_LINK_SPEED_100M;
 		break;
+	case SPEED_10:
+		fw_speed = FW_LINK_SPEED_10M;
+		break;
 	default:
 		/* Other links speeds not supported */
 		netdev_err(emac->ndev, "Unsupported link speed\n");
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index 1bcb4e174652..410612f43cbd 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -1149,7 +1149,6 @@ static int emac_phy_connect(struct prueth_emac *emac)
 
 	/* remove unsupported modes */
 	phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
-	phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_10baseT_Full_BIT);
 	phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
 	phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
 	phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_Pause_BIT);
@@ -2090,13 +2089,29 @@ static int prueth_probe(struct platform_device *pdev)
 		goto free_pool;
 	}
 
+	prueth->iep1 = icss_iep_get_idx(np, 1);
+	if (IS_ERR(prueth->iep1)) {
+		ret = dev_err_probe(dev, PTR_ERR(prueth->iep1), "iep1 get failed\n");
+		icss_iep_put(prueth->iep0);
+		prueth->iep0 = NULL;
+		prueth->iep1 = NULL;
+		goto free_pool;
+	}
+
+	if (prueth->pdata.quirk_10m_link_issue) {
+		/* Enable IEP1 for FW in 64bit mode as W/A for 10M FD link detect issue under TX
+		 * traffic.
+		 */
+		icss_iep_init_fw(prueth->iep1);
+	}
+
 	/* setup netdev interfaces */
 	if (eth0_node) {
 		ret = prueth_netdev_init(prueth, eth0_node);
 		if (ret) {
 			dev_err_probe(dev, ret, "netdev init %s failed\n",
 				      eth0_node->name);
-			goto netdev_exit;
+			goto exit_iep;
 		}
 		prueth->emac[PRUETH_MAC0]->iep = prueth->iep0;
 	}
@@ -2167,6 +2182,10 @@ netdev_exit:
 		prueth_netdev_exit(prueth, eth_node);
 	}
 
+exit_iep:
+	if (prueth->pdata.quirk_10m_link_issue)
+		icss_iep_exit_fw(prueth->iep1);
+
 free_pool:
 	gen_pool_free(prueth->sram_pool,
 		      (unsigned long)prueth->msmcram.va, msmc_ram_size);
@@ -2212,6 +2231,10 @@ static void prueth_remove(struct platform_device *pdev)
 		prueth_netdev_exit(prueth, eth_node);
 	}
 
+	if (prueth->pdata.quirk_10m_link_issue)
+		icss_iep_exit_fw(prueth->iep1);
+
+	icss_iep_put(prueth->iep1);
 	icss_iep_put(prueth->iep0);
 
 	gen_pool_free(prueth->sram_pool,
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
index a56ab4cdc83c..3fe80a8758d3 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h
@@ -208,6 +208,7 @@ struct prueth_pdata {
  * @icssg_hwcmdseq: seq counter or HWQ messages
  * @emacs_initialized: num of EMACs/ext ports that are up/running
  * @iep0: pointer to IEP0 device
+ * @iep1: pointer to IEP1 device
  */
 struct prueth {
 	struct device *dev;
@@ -231,6 +232,7 @@ struct prueth {
 	u8 icssg_hwcmdseq;
 	int emacs_initialized;
 	struct icss_iep *iep0;
+	struct icss_iep *iep1;
 };
 
 struct emac_tx_ts_response {

From 2dde18cd1d8fac735875f2e4987f11817cc0bc2c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 27 Aug 2023 14:49:51 -0700
Subject: [PATCH 1941/2047] Linux 6.5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 4739c21a63e2..2fdd8b40b7e0 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*

From 62b6442c58dc17b168f69b37b398a9cab7cd90c9 Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Thu, 24 Aug 2023 23:28:29 -0700
Subject: [PATCH 1942/2047] devlink: Expose port function commands to control
 IPsec crypto offloads

Expose port function commands to enable / disable IPsec crypto offloads,
this is used to control the port IPsec capabilities.

When IPsec crypto is disabled for a function of the port (default),
function cannot offload any IPsec crypto operations (Encrypt/Decrypt and
XFRM state offloading). When enabled, IPsec crypto operations can be
offloaded by the function of the port.

Example of a PCI VF port which supports IPsec crypto offloads:

$ devlink port show pci/0000:06:00.0/1
    pci/0000:06:00.0/1: type eth netdev enp6s0pf0vf0 flavour pcivf pfnum 0 vfnum 0
        function:
        hw_addr 00:00:00:00:00:00 roce enable ipsec_crypto disable

$ devlink port function set pci/0000:06:00.0/1 ipsec_crypto enable

$ devlink port show pci/0000:06:00.0/1
    pci/0000:06:00.0/1: type eth netdev enp6s0pf0vf0 flavour pcivf pfnum 0 vfnum 0
        function:
        hw_addr 00:00:00:00:00:00 roce enable ipsec_crypto enable

Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230825062836.103744-2-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../networking/devlink/devlink-port.rst       | 27 ++++++++++
 include/net/devlink.h                         | 15 ++++++
 include/uapi/linux/devlink.h                  |  2 +
 net/devlink/leftover.c                        | 52 +++++++++++++++++++
 4 files changed, 96 insertions(+)

diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst
index 3da590953ce8..6983b11559cb 100644
--- a/Documentation/networking/devlink/devlink-port.rst
+++ b/Documentation/networking/devlink/devlink-port.rst
@@ -128,6 +128,9 @@ Users may also set the RoCE capability of the function using
 Users may also set the function as migratable using
 'devlink port function set migratable' command.
 
+Users may also set the IPsec crypto capability of the function using
+`devlink port function set ipsec_crypto` command.
+
 Function attributes
 ===================
 
@@ -240,6 +243,30 @@ Attach VF to the VM.
 Start the VM.
 Perform live migration.
 
+IPsec crypto capability setup
+-----------------------------
+When user enables IPsec crypto capability for a VF, user application can offload
+XFRM state crypto operation (Encrypt/Decrypt) to this VF.
+
+When IPsec crypto capability is disabled (default) for a VF, the XFRM state is
+processed in software by the kernel.
+
+- Get IPsec crypto capability of the VF device::
+
+    $ devlink port show pci/0000:06:00.0/2
+    pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+        function:
+            hw_addr 00:00:00:00:00:00 ipsec_crypto disabled
+
+- Set IPsec crypto capability of the VF device::
+
+    $ devlink port function set pci/0000:06:00.0/2 ipsec_crypto enable
+
+    $ devlink port show pci/0000:06:00.0/2
+    pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+        function:
+            hw_addr 00:00:00:00:00:00 ipsec_crypto enabled
+
 Subfunction
 ============
 
diff --git a/include/net/devlink.h b/include/net/devlink.h
index f7fec0791acc..1cf07a820a0e 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1583,6 +1583,15 @@ void devlink_free(struct devlink *devlink);
  *		       Should be used by device drivers set
  *		       the admin state of a function managed
  *		       by the devlink port.
+ * @port_fn_ipsec_crypto_get: Callback used to get port function's ipsec_crypto
+ *			      capability. Should be used by device drivers
+ *			      to report the current state of ipsec_crypto
+ *			      capability of a function managed by the devlink
+ *			      port.
+ * @port_fn_ipsec_crypto_set: Callback used to set port function's ipsec_crypto
+ *			      capability. Should be used by device drivers to
+ *			      enable/disable ipsec_crypto capability of a
+ *			      function managed by the devlink port.
  *
  * Note: Driver should return -EOPNOTSUPP if it doesn't support
  * port function (@port_fn_*) handling for a particular port.
@@ -1620,6 +1629,12 @@ struct devlink_port_ops {
 	int (*port_fn_state_set)(struct devlink_port *port,
 				 enum devlink_port_fn_state state,
 				 struct netlink_ext_ack *extack);
+	int (*port_fn_ipsec_crypto_get)(struct devlink_port *devlink_port,
+					bool *is_enable,
+					struct netlink_ext_ack *extack);
+	int (*port_fn_ipsec_crypto_set)(struct devlink_port *devlink_port,
+					bool enable,
+					struct netlink_ext_ack *extack);
 };
 
 void devlink_port_init(struct devlink *devlink,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 3782d4219ac9..f9ae9a058ad2 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -661,6 +661,7 @@ enum devlink_resource_unit {
 enum devlink_port_fn_attr_cap {
 	DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT,
 	DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT,
+	DEVLINK_PORT_FN_ATTR_CAP_IPSEC_CRYPTO_BIT,
 
 	/* Add new caps above */
 	__DEVLINK_PORT_FN_ATTR_CAPS_MAX,
@@ -669,6 +670,7 @@ enum devlink_port_fn_attr_cap {
 #define DEVLINK_PORT_FN_CAP_ROCE _BITUL(DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT)
 #define DEVLINK_PORT_FN_CAP_MIGRATABLE \
 	_BITUL(DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT)
+#define DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO _BITUL(DEVLINK_PORT_FN_ATTR_CAP_IPSEC_CRYPTO_BIT)
 
 enum devlink_port_function_attr {
 	DEVLINK_PORT_FUNCTION_ATTR_UNSPEC,
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index e2cd13958cc2..fcc1a06cae48 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -492,6 +492,28 @@ static int devlink_port_fn_migratable_fill(struct devlink_port *devlink_port,
 	return 0;
 }
 
+static int devlink_port_fn_ipsec_crypto_fill(struct devlink_port *devlink_port,
+					     struct nla_bitfield32 *caps,
+					     struct netlink_ext_ack *extack)
+{
+	bool is_enable;
+	int err;
+
+	if (!devlink_port->ops->port_fn_ipsec_crypto_get ||
+	    devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+		return 0;
+
+	err = devlink_port->ops->port_fn_ipsec_crypto_get(devlink_port, &is_enable, extack);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+
+	devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO, is_enable);
+	return 0;
+}
+
 static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
 				     struct sk_buff *msg,
 				     struct netlink_ext_ack *extack,
@@ -508,6 +530,10 @@ static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
 	if (err)
 		return err;
 
+	err = devlink_port_fn_ipsec_crypto_fill(devlink_port, &caps, extack);
+	if (err)
+		return err;
+
 	if (!caps.selector)
 		return 0;
 	err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
@@ -838,6 +864,13 @@ devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
 						   extack);
 }
 
+static int
+devlink_port_fn_ipsec_crypto_set(struct devlink_port *devlink_port, bool enable,
+				 struct netlink_ext_ack *extack)
+{
+	return devlink_port->ops->port_fn_ipsec_crypto_set(devlink_port, enable, extack);
+}
+
 static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
 				    const struct nlattr *attr,
 				    struct netlink_ext_ack *extack)
@@ -862,6 +895,13 @@ static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
 		if (err)
 			return err;
 	}
+	if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO) {
+		err = devlink_port_fn_ipsec_crypto_set(devlink_port, caps_value &
+						       DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO,
+						       extack);
+		if (err)
+			return err;
+	}
 	return 0;
 }
 
@@ -1226,6 +1266,18 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
 				return -EOPNOTSUPP;
 			}
 		}
+		if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO) {
+			if (!ops->port_fn_ipsec_crypto_set) {
+				NL_SET_ERR_MSG_ATTR(extack, attr,
+						    "Port doesn't support ipsec_crypto function attribute");
+				return -EOPNOTSUPP;
+			}
+			if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+				NL_SET_ERR_MSG_ATTR(extack, attr,
+						    "ipsec_crypto function attribute supported for VFs only");
+				return -EOPNOTSUPP;
+			}
+		}
 	}
 	return 0;
 }

From 390a24cbc39626a8a38c6d877a59f758fe209f2d Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Thu, 24 Aug 2023 23:28:30 -0700
Subject: [PATCH 1943/2047] devlink: Expose port function commands to control
 IPsec packet offloads

Expose port function commands to enable / disable IPsec packet offloads,
this is used to control the port IPsec capabilities.

When IPsec packet is disabled for a function of the port (default),
function cannot offload IPsec packet operations (encapsulation and XFRM
policy offload). When enabled, IPsec packet operations can be offloaded
by the function of the port, which includes crypto operation
(Encrypt/Decrypt), IPsec encapsulation and XFRM state and policy
offload.

Example of a PCI VF port which supports IPsec packet offloads:

$ devlink port show pci/0000:06:00.0/1
    pci/0000:06:00.0/1: type eth netdev enp6s0pf0vf0 flavour pcivf pfnum 0 vfnum 0
        function:
        hw_addr 00:00:00:00:00:00 roce enable ipsec_packet disable

$ devlink port function set pci/0000:06:00.0/1 ipsec_packet enable

$ devlink port show pci/0000:06:00.0/1
    pci/0000:06:00.0/1: type eth netdev enp6s0pf0vf0 flavour pcivf pfnum 0 vfnum 0
        function:
        hw_addr 00:00:00:00:00:00 roce enable ipsec_packet enable

Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230825062836.103744-3-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../networking/devlink/devlink-port.rst       | 28 ++++++++++
 include/net/devlink.h                         | 15 ++++++
 include/uapi/linux/devlink.h                  |  2 +
 net/devlink/leftover.c                        | 52 +++++++++++++++++++
 4 files changed, 97 insertions(+)

diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst
index 6983b11559cb..f5adb910427a 100644
--- a/Documentation/networking/devlink/devlink-port.rst
+++ b/Documentation/networking/devlink/devlink-port.rst
@@ -131,6 +131,9 @@ Users may also set the function as migratable using
 Users may also set the IPsec crypto capability of the function using
 `devlink port function set ipsec_crypto` command.
 
+Users may also set the IPsec packet capability of the function using
+`devlink port function set ipsec_packet` command.
+
 Function attributes
 ===================
 
@@ -267,6 +270,31 @@ processed in software by the kernel.
         function:
             hw_addr 00:00:00:00:00:00 ipsec_crypto enabled
 
+IPsec packet capability setup
+-----------------------------
+When user enables IPsec packet capability for a VF, user application can offload
+XFRM state and policy crypto operation (Encrypt/Decrypt) to this VF, as well as
+IPsec encapsulation.
+
+When IPsec packet capability is disabled (default) for a VF, the XFRM state and
+policy is processed in software by the kernel.
+
+- Get IPsec packet capability of the VF device::
+
+    $ devlink port show pci/0000:06:00.0/2
+    pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+        function:
+            hw_addr 00:00:00:00:00:00 ipsec_packet disabled
+
+- Set IPsec packet capability of the VF device::
+
+    $ devlink port function set pci/0000:06:00.0/2 ipsec_packet enable
+
+    $ devlink port show pci/0000:06:00.0/2
+    pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
+        function:
+            hw_addr 00:00:00:00:00:00 ipsec_packet enabled
+
 Subfunction
 ============
 
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 1cf07a820a0e..29fd1b4ee654 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1592,6 +1592,15 @@ void devlink_free(struct devlink *devlink);
  *			      capability. Should be used by device drivers to
  *			      enable/disable ipsec_crypto capability of a
  *			      function managed by the devlink port.
+ * @port_fn_ipsec_packet_get: Callback used to get port function's ipsec_packet
+ *			      capability. Should be used by device drivers
+ *			      to report the current state of ipsec_packet
+ *			      capability of a function managed by the devlink
+ *			      port.
+ * @port_fn_ipsec_packet_set: Callback used to set port function's ipsec_packet
+ *			      capability. Should be used by device drivers to
+ *			      enable/disable ipsec_packet capability of a
+ *			      function managed by the devlink port.
  *
  * Note: Driver should return -EOPNOTSUPP if it doesn't support
  * port function (@port_fn_*) handling for a particular port.
@@ -1635,6 +1644,12 @@ struct devlink_port_ops {
 	int (*port_fn_ipsec_crypto_set)(struct devlink_port *devlink_port,
 					bool enable,
 					struct netlink_ext_ack *extack);
+	int (*port_fn_ipsec_packet_get)(struct devlink_port *devlink_port,
+					bool *is_enable,
+					struct netlink_ext_ack *extack);
+	int (*port_fn_ipsec_packet_set)(struct devlink_port *devlink_port,
+					bool enable,
+					struct netlink_ext_ack *extack);
 };
 
 void devlink_port_init(struct devlink *devlink,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index f9ae9a058ad2..03875e078be8 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -662,6 +662,7 @@ enum devlink_port_fn_attr_cap {
 	DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT,
 	DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT,
 	DEVLINK_PORT_FN_ATTR_CAP_IPSEC_CRYPTO_BIT,
+	DEVLINK_PORT_FN_ATTR_CAP_IPSEC_PACKET_BIT,
 
 	/* Add new caps above */
 	__DEVLINK_PORT_FN_ATTR_CAPS_MAX,
@@ -671,6 +672,7 @@ enum devlink_port_fn_attr_cap {
 #define DEVLINK_PORT_FN_CAP_MIGRATABLE \
 	_BITUL(DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT)
 #define DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO _BITUL(DEVLINK_PORT_FN_ATTR_CAP_IPSEC_CRYPTO_BIT)
+#define DEVLINK_PORT_FN_CAP_IPSEC_PACKET _BITUL(DEVLINK_PORT_FN_ATTR_CAP_IPSEC_PACKET_BIT)
 
 enum devlink_port_function_attr {
 	DEVLINK_PORT_FUNCTION_ATTR_UNSPEC,
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index fcc1a06cae48..149752bc9f2d 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -514,6 +514,28 @@ static int devlink_port_fn_ipsec_crypto_fill(struct devlink_port *devlink_port,
 	return 0;
 }
 
+static int devlink_port_fn_ipsec_packet_fill(struct devlink_port *devlink_port,
+					     struct nla_bitfield32 *caps,
+					     struct netlink_ext_ack *extack)
+{
+	bool is_enable;
+	int err;
+
+	if (!devlink_port->ops->port_fn_ipsec_packet_get ||
+	    devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+		return 0;
+
+	err = devlink_port->ops->port_fn_ipsec_packet_get(devlink_port, &is_enable, extack);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+
+	devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_IPSEC_PACKET, is_enable);
+	return 0;
+}
+
 static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
 				     struct sk_buff *msg,
 				     struct netlink_ext_ack *extack,
@@ -534,6 +556,10 @@ static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
 	if (err)
 		return err;
 
+	err = devlink_port_fn_ipsec_packet_fill(devlink_port, &caps, extack);
+	if (err)
+		return err;
+
 	if (!caps.selector)
 		return 0;
 	err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
@@ -871,6 +897,13 @@ devlink_port_fn_ipsec_crypto_set(struct devlink_port *devlink_port, bool enable,
 	return devlink_port->ops->port_fn_ipsec_crypto_set(devlink_port, enable, extack);
 }
 
+static int
+devlink_port_fn_ipsec_packet_set(struct devlink_port *devlink_port, bool enable,
+				 struct netlink_ext_ack *extack)
+{
+	return devlink_port->ops->port_fn_ipsec_packet_set(devlink_port, enable, extack);
+}
+
 static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
 				    const struct nlattr *attr,
 				    struct netlink_ext_ack *extack)
@@ -902,6 +935,13 @@ static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
 		if (err)
 			return err;
 	}
+	if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_PACKET) {
+		err = devlink_port_fn_ipsec_packet_set(devlink_port, caps_value &
+						       DEVLINK_PORT_FN_CAP_IPSEC_PACKET,
+						       extack);
+		if (err)
+			return err;
+	}
 	return 0;
 }
 
@@ -1278,6 +1318,18 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
 				return -EOPNOTSUPP;
 			}
 		}
+		if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_PACKET) {
+			if (!ops->port_fn_ipsec_packet_set) {
+				NL_SET_ERR_MSG_ATTR(extack, attr,
+						    "Port doesn't support ipsec_packet function attribute");
+				return -EOPNOTSUPP;
+			}
+			if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+				NL_SET_ERR_MSG_ATTR(extack, attr,
+						    "ipsec_packet function attribute supported for VFs only");
+				return -EOPNOTSUPP;
+			}
+		}
 	}
 	return 0;
 }

From c46fb77383a6d5eea723c785135e79d73614f1c3 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Thu, 24 Aug 2023 23:28:31 -0700
Subject: [PATCH 1944/2047] net/mlx5: Drop extra layer of locks in IPsec

There is no need in holding devlink lock as it gives nothing
compared to already used write mode_lock.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20230825062836.103744-4-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../mellanox/mlx5/core/eswitch_offloads.c      | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 67eab99f95b1..28c4b9724e38 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3903,38 +3903,28 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
 
 bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev)
 {
-	struct devlink *devlink = priv_to_devlink(dev);
-	struct mlx5_eswitch *esw;
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
 
-	devl_lock(devlink);
-	esw = mlx5_devlink_eswitch_get(devlink);
-	if (IS_ERR(esw)) {
-		devl_unlock(devlink);
-		/* Failure means no eswitch => not possible to change encap */
+	if (!mlx5_esw_allowed(esw))
 		return true;
-	}
 
 	down_write(&esw->mode_lock);
 	if (esw->mode != MLX5_ESWITCH_LEGACY &&
 	    esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
 		up_write(&esw->mode_lock);
-		devl_unlock(devlink);
 		return false;
 	}
 
 	esw->offloads.num_block_encap++;
 	up_write(&esw->mode_lock);
-	devl_unlock(devlink);
 	return true;
 }
 
 void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
 {
-	struct devlink *devlink = priv_to_devlink(dev);
-	struct mlx5_eswitch *esw;
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
 
-	esw = mlx5_devlink_eswitch_get(devlink);
-	if (IS_ERR(esw))
+	if (!mlx5_esw_allowed(esw))
 		return;
 
 	down_write(&esw->mode_lock);

From e2537341667830fa1e6dd51bc9dbf19c58954d35 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Thu, 24 Aug 2023 23:28:32 -0700
Subject: [PATCH 1945/2047] net/mlx5e: Rewrite IPsec vs. TC block interface

In the commit 366e46242b8e ("net/mlx5e: Make IPsec offload work together
with eswitch and TC"), new API to block IPsec vs. TC creation was introduced.

Internally, that API used devlink lock to avoid races with userspace, but it is
not really needed as dev->priv.eswitch is stable and can't be changed. So remove
dependency on devlink lock and move block encap code back to its original place.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20230825062836.103744-5-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 63 +++++++------------
 .../net/ethernet/mellanox/mlx5/core/eswitch.h | 15 ++---
 .../mellanox/mlx5/core/eswitch_offloads.c     | 53 +++-------------
 3 files changed, 38 insertions(+), 93 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index a1cfddd05bc4..7dba4221993f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -254,6 +254,8 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 	mlx5_del_flow_rules(rx->sa.rule);
 	mlx5_destroy_flow_group(rx->sa.group);
 	mlx5_destroy_flow_table(rx->ft.sa);
+	if (rx->allow_tunnel_mode)
+		mlx5_eswitch_unblock_encap(mdev);
 	if (rx == ipsec->rx_esw) {
 		mlx5_esw_ipsec_rx_status_destroy(ipsec, rx);
 	} else {
@@ -357,6 +359,8 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 		goto err_add;
 
 	/* Create FT */
+	if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)
+		rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev);
 	if (rx->allow_tunnel_mode)
 		flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
 	ft = ipsec_ft_create(attr.ns, attr.sa_level, attr.prio, 2, flags);
@@ -411,6 +415,8 @@ err_pol_ft:
 err_fs:
 	mlx5_destroy_flow_table(rx->ft.sa);
 err_fs_ft:
+	if (rx->allow_tunnel_mode)
+		mlx5_eswitch_unblock_encap(mdev);
 	mlx5_del_flow_rules(rx->status.rule);
 	mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
 err_add:
@@ -428,26 +434,19 @@ static int rx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 	if (rx->ft.refcnt)
 		goto skip;
 
-	if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)
-		rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev);
-
-	err = mlx5_eswitch_block_mode_trylock(mdev);
+	err = mlx5_eswitch_block_mode(mdev);
 	if (err)
-		goto err_out;
+		return err;
 
 	err = rx_create(mdev, ipsec, rx, family);
-	mlx5_eswitch_block_mode_unlock(mdev, err);
-	if (err)
-		goto err_out;
+	if (err) {
+		mlx5_eswitch_unblock_mode(mdev);
+		return err;
+	}
 
 skip:
 	rx->ft.refcnt++;
 	return 0;
-
-err_out:
-	if (rx->allow_tunnel_mode)
-		mlx5_eswitch_unblock_encap(mdev);
-	return err;
 }
 
 static void rx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx,
@@ -456,12 +455,8 @@ static void rx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx,
 	if (--rx->ft.refcnt)
 		return;
 
-	mlx5_eswitch_unblock_mode_lock(ipsec->mdev);
 	rx_destroy(ipsec->mdev, ipsec, rx, family);
-	mlx5_eswitch_unblock_mode_unlock(ipsec->mdev);
-
-	if (rx->allow_tunnel_mode)
-		mlx5_eswitch_unblock_encap(ipsec->mdev);
+	mlx5_eswitch_unblock_mode(ipsec->mdev);
 }
 
 static struct mlx5e_ipsec_rx *rx_ft_get(struct mlx5_core_dev *mdev,
@@ -581,6 +576,8 @@ static void tx_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx,
 		mlx5_destroy_flow_group(tx->sa.group);
 	}
 	mlx5_destroy_flow_table(tx->ft.sa);
+	if (tx->allow_tunnel_mode)
+		mlx5_eswitch_unblock_encap(ipsec->mdev);
 	mlx5_del_flow_rules(tx->status.rule);
 	mlx5_destroy_flow_table(tx->ft.status);
 }
@@ -621,6 +618,8 @@ static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx,
 	if (err)
 		goto err_status_rule;
 
+	if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)
+		tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev);
 	if (tx->allow_tunnel_mode)
 		flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
 	ft = ipsec_ft_create(tx->ns, attr.sa_level, attr.prio, 4, flags);
@@ -687,6 +686,8 @@ err_pol_ft:
 err_sa_miss:
 	mlx5_destroy_flow_table(tx->ft.sa);
 err_sa_ft:
+	if (tx->allow_tunnel_mode)
+		mlx5_eswitch_unblock_encap(mdev);
 	mlx5_del_flow_rules(tx->status.rule);
 err_status_rule:
 	mlx5_destroy_flow_table(tx->ft.status);
@@ -720,32 +721,22 @@ static int tx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 	if (tx->ft.refcnt)
 		goto skip;
 
-	if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)
-		tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev);
-
-	err = mlx5_eswitch_block_mode_trylock(mdev);
+	err = mlx5_eswitch_block_mode(mdev);
 	if (err)
-		goto err_out;
+		return err;
 
 	err = tx_create(ipsec, tx, ipsec->roce);
 	if (err) {
-		mlx5_eswitch_block_mode_unlock(mdev, err);
-		goto err_out;
+		mlx5_eswitch_unblock_mode(mdev);
+		return err;
 	}
 
 	if (tx == ipsec->tx_esw)
 		ipsec_esw_tx_ft_policy_set(mdev, tx->ft.pol);
 
-	mlx5_eswitch_block_mode_unlock(mdev, err);
-
 skip:
 	tx->ft.refcnt++;
 	return 0;
-
-err_out:
-	if (tx->allow_tunnel_mode)
-		mlx5_eswitch_unblock_encap(mdev);
-	return err;
 }
 
 static void tx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx)
@@ -753,19 +744,13 @@ static void tx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx)
 	if (--tx->ft.refcnt)
 		return;
 
-	mlx5_eswitch_unblock_mode_lock(ipsec->mdev);
-
 	if (tx == ipsec->tx_esw) {
 		mlx5_esw_ipsec_restore_dest_uplink(ipsec->mdev);
 		ipsec_esw_tx_ft_policy_set(ipsec->mdev, NULL);
 	}
 
 	tx_destroy(ipsec, tx, ipsec->roce);
-
-	mlx5_eswitch_unblock_mode_unlock(ipsec->mdev);
-
-	if (tx->allow_tunnel_mode)
-		mlx5_eswitch_unblock_encap(ipsec->mdev);
+	mlx5_eswitch_unblock_mode(ipsec->mdev);
 }
 
 static struct mlx5_flow_table *tx_ft_get_policy(struct mlx5_core_dev *mdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 6fcece69d3be..ed3abcd70c46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -829,10 +829,8 @@ int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
 bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev);
 void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev);
 
-int mlx5_eswitch_block_mode_trylock(struct mlx5_core_dev *dev);
-void mlx5_eswitch_block_mode_unlock(struct mlx5_core_dev *dev, int err);
-void mlx5_eswitch_unblock_mode_lock(struct mlx5_core_dev *dev);
-void mlx5_eswitch_unblock_mode_unlock(struct mlx5_core_dev *dev);
+int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev);
+void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev);
 
 static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw)
 {
@@ -916,13 +914,8 @@ static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
 {
 }
 
-static inline int mlx5_eswitch_block_mode_trylock(struct mlx5_core_dev *dev) { return 0; }
-
-static inline void mlx5_eswitch_block_mode_unlock(struct mlx5_core_dev *dev, int err) {}
-
-static inline void mlx5_eswitch_unblock_mode_lock(struct mlx5_core_dev *dev) {}
-
-static inline void mlx5_eswitch_unblock_mode_unlock(struct mlx5_core_dev *dev) {}
+static inline int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev) {}
 #endif /* CONFIG_MLX5_ESWITCH */
 
 #endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 28c4b9724e38..d4697dadd27d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3641,65 +3641,32 @@ static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink)
 	return net_eq(devl_net, netdev_net);
 }
 
-int mlx5_eswitch_block_mode_trylock(struct mlx5_core_dev *dev)
+int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev)
 {
-	struct devlink *devlink = priv_to_devlink(dev);
-	struct mlx5_eswitch *esw;
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	int err;
 
-	devl_lock(devlink);
-	esw = mlx5_devlink_eswitch_get(devlink);
-	if (IS_ERR(esw)) {
-		/* Failure means no eswitch => not possible to change eswitch mode */
-		devl_unlock(devlink);
+	if (!mlx5_esw_allowed(esw))
 		return 0;
-	}
 
+	/* Take TC into account */
 	err = mlx5_esw_try_lock(esw);
-	if (err < 0) {
-		devl_unlock(devlink);
+	if (err < 0)
 		return err;
-	}
 
+	esw->offloads.num_block_mode++;
+	mlx5_esw_unlock(esw);
 	return 0;
 }
 
-void mlx5_eswitch_block_mode_unlock(struct mlx5_core_dev *dev, int err)
+void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev)
 {
-	struct devlink *devlink = priv_to_devlink(dev);
-	struct mlx5_eswitch *esw;
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
 
-	esw = mlx5_devlink_eswitch_get(devlink);
-	if (IS_ERR(esw))
-		return;
-
-	if (!err)
-		esw->offloads.num_block_mode++;
-	mlx5_esw_unlock(esw);
-	devl_unlock(devlink);
-}
-
-void mlx5_eswitch_unblock_mode_lock(struct mlx5_core_dev *dev)
-{
-	struct devlink *devlink = priv_to_devlink(dev);
-	struct mlx5_eswitch *esw;
-
-	esw = mlx5_devlink_eswitch_get(devlink);
-	if (IS_ERR(esw))
+	if (!mlx5_esw_allowed(esw))
 		return;
 
 	down_write(&esw->mode_lock);
-}
-
-void mlx5_eswitch_unblock_mode_unlock(struct mlx5_core_dev *dev)
-{
-	struct devlink *devlink = priv_to_devlink(dev);
-	struct mlx5_eswitch *esw;
-
-	esw = mlx5_devlink_eswitch_get(devlink);
-	if (IS_ERR(esw))
-		return;
-
 	esw->offloads.num_block_mode--;
 	up_write(&esw->mode_lock);
 }

From 17c8da5a3423c9f3800a256dbaa685bae18e1264 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Thu, 24 Aug 2023 23:28:33 -0700
Subject: [PATCH 1946/2047] net/mlx5: Add IFC bits to support IPsec
 enable/disable

Add hardware definitions to allow to control IPSec capabilities.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20230825062836.103744-6-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 08dcb1f43be7..fc3db401f8a2 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -65,9 +65,11 @@ enum {
 
 enum {
 	MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE        = 0x0,
+	MLX5_SET_HCA_CAP_OP_MOD_ETHERNET_OFFLOADS     = 0x1,
 	MLX5_SET_HCA_CAP_OP_MOD_ODP                   = 0x2,
 	MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
 	MLX5_SET_HCA_CAP_OP_MOD_ROCE                  = 0x4,
+	MLX5_SET_HCA_CAP_OP_MOD_IPSEC                 = 0x15,
 	MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2       = 0x20,
 	MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION        = 0x25,
 };
@@ -3451,6 +3453,7 @@ union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap;
 	struct mlx5_ifc_macsec_cap_bits macsec_cap;
 	struct mlx5_ifc_crypto_cap_bits crypto_cap;
+	struct mlx5_ifc_ipsec_cap_bits ipsec_cap;
 	u8         reserved_at_0[0x8000];
 };
 

From 8efd7b17a3b03242c97281d88463ad56e8f551f8 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Thu, 24 Aug 2023 23:28:34 -0700
Subject: [PATCH 1947/2047] net/mlx5: Provide an interface to block change of
 IPsec capabilities

mlx5 HW can't perform IPsec offload operation simultaneously both on PF
and VFs at the same time. While the previous patches added devlink knobs
to change IPsec capabilities dynamically, there is a need to add a logic
to block such IPsec capabilities for the cases when IPsec is already
configured.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20230825062836.103744-7-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../mellanox/mlx5/core/en_accel/ipsec.c       | 20 +++++++++++-
 .../net/ethernet/mellanox/mlx5/core/eswitch.c | 32 +++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |  9 ++++++
 include/linux/mlx5/driver.h                   |  1 +
 4 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 3b88a8bb7082..7d4ceb9b9c16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -38,6 +38,7 @@
 #include <net/netevent.h>
 
 #include "en.h"
+#include "eswitch.h"
 #include "ipsec.h"
 #include "ipsec_rxtx.h"
 #include "en_rep.h"
@@ -670,6 +671,11 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
 	if (err)
 		goto err_xfrm;
 
+	if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
+		err = -EBUSY;
+		goto err_xfrm;
+	}
+
 	/* check esn */
 	if (x->props.flags & XFRM_STATE_ESN)
 		mlx5e_ipsec_update_esn_state(sa_entry);
@@ -678,7 +684,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
 
 	err = mlx5_ipsec_create_work(sa_entry);
 	if (err)
-		goto err_xfrm;
+		goto unblock_ipsec;
 
 	err = mlx5e_ipsec_create_dwork(sa_entry);
 	if (err)
@@ -735,6 +741,8 @@ release_work:
 	if (sa_entry->work)
 		kfree(sa_entry->work->data);
 	kfree(sa_entry->work);
+unblock_ipsec:
+	mlx5_eswitch_unblock_ipsec(priv->mdev);
 err_xfrm:
 	kfree(sa_entry);
 	NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state");
@@ -764,6 +772,7 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x)
 static void mlx5e_xfrm_free_state(struct xfrm_state *x)
 {
 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
 
 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
 		goto sa_entry_free;
@@ -780,6 +789,7 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x)
 	if (sa_entry->work)
 		kfree(sa_entry->work->data);
 	kfree(sa_entry->work);
+	mlx5_eswitch_unblock_ipsec(ipsec->mdev);
 sa_entry_free:
 	kfree(sa_entry);
 }
@@ -1055,6 +1065,11 @@ static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
 	pol_entry->x = x;
 	pol_entry->ipsec = priv->ipsec;
 
+	if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
+		err = -EBUSY;
+		goto ipsec_busy;
+	}
+
 	mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs);
 	err = mlx5e_accel_ipsec_fs_add_pol(pol_entry);
 	if (err)
@@ -1064,6 +1079,8 @@ static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
 	return 0;
 
 err_fs:
+	mlx5_eswitch_unblock_ipsec(priv->mdev);
+ipsec_busy:
 	kfree(pol_entry);
 	NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy");
 	return err;
@@ -1074,6 +1091,7 @@ static void mlx5e_xfrm_del_policy(struct xfrm_policy *x)
 	struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
 
 	mlx5e_accel_ipsec_fs_del_pol(pol_entry);
+	mlx5_eswitch_unblock_ipsec(pol_entry->ipsec->mdev);
 }
 
 static void mlx5e_xfrm_free_policy(struct xfrm_policy *x)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index db1c2a076364..38a149dd479f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -48,6 +48,7 @@
 #include "devlink.h"
 #include "ecpf.h"
 #include "en/mod_hdr.h"
+#include "en_accel/ipsec.h"
 
 enum {
 	MLX5_ACTION_NONE = 0,
@@ -2336,3 +2337,34 @@ struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
 	return mlx5_esw_allowed(esw) ? esw->dev : NULL;
 }
 EXPORT_SYMBOL(mlx5_eswitch_get_core_dev);
+
+bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+
+	if (!mlx5_esw_allowed(esw))
+		return true;
+
+	mutex_lock(&esw->state_lock);
+	if (esw->enabled_ipsec_vf_count) {
+		mutex_unlock(&esw->state_lock);
+		return false;
+	}
+
+	dev->num_ipsec_offloads++;
+	mutex_unlock(&esw->state_lock);
+	return true;
+}
+
+void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+
+	if (!mlx5_esw_allowed(esw))
+		/* Failure means no eswitch => core dev is not a PF */
+		return;
+
+	mutex_lock(&esw->state_lock);
+	dev->num_ipsec_offloads--;
+	mutex_unlock(&esw->state_lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index ed3abcd70c46..0e51fd066fcc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -380,6 +380,7 @@ struct mlx5_eswitch {
 	struct blocking_notifier_head n_head;
 	struct xarray paired;
 	struct mlx5_devcom_comp_dev *devcom;
+	u16 enabled_ipsec_vf_count;
 };
 
 void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -855,6 +856,8 @@ mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw)
 
 int mlx5_eswitch_restore_ipsec_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule,
 				    struct mlx5_esw_flow_attr *esw_attr, int attr_idx);
+bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev);
+void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev);
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -916,6 +919,12 @@ static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
 
 static inline int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev) {}
+static inline bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev)
+{
+	return false;
+}
+
+static inline void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev) {}
 #endif /* CONFIG_MLX5_ESWITCH */
 
 #endif /* __MLX5_ESWITCH_H__ */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e95f10066eac..3033bbaeac81 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -813,6 +813,7 @@ struct mlx5_core_dev {
 	/* MACsec notifier chain to sync MACsec core and IB database */
 	struct blocking_notifier_head macsec_nh;
 #endif
+	u64 num_ipsec_offloads;
 };
 
 struct mlx5_db {

From 06bab69658a8afb493204448f29703e12e6d3960 Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Thu, 24 Aug 2023 23:28:35 -0700
Subject: [PATCH 1948/2047] net/mlx5: Implement devlink port function cmds to
 control ipsec_crypto

Implement devlink port function commands to enable / disable IPsec
crypto offloads.  This is used to control the IPsec capability of the
device.

When ipsec_crypto is enabled for a VF, it prevents adding IPsec crypto
offloads on the PF, because the two cannot be active simultaneously due
to HW constraints. Conversely, if there are any active IPsec crypto
offloads on the PF, it's not allowed to enable ipsec_crypto on a VF,
until PF IPsec offloads are cleared.

Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20230825062836.103744-8-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../ethernet/mellanox/mlx5/switchdev.rst      |  10 +
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 .../mellanox/mlx5/core/esw/devlink_port.c     |   4 +
 .../ethernet/mellanox/mlx5/core/esw/ipsec.c   | 309 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/eswitch.c |   8 +
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |  14 +
 .../mellanox/mlx5/core/eswitch_offloads.c     |  85 +++++
 7 files changed, 431 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst
index 6e3f5ee8b0d0..de51e55dcfe3 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst
@@ -190,6 +190,16 @@ explicitly enable the VF migratable capability.
 mlx5 driver support devlink port function attr mechanism to setup migratable
 capability. (refer to Documentation/networking/devlink/devlink-port.rst)
 
+IPsec crypto capability setup
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+User who wants mlx5 PCI VFs to be able to perform IPsec crypto offloading need
+to explicitly enable the VF ipsec_crypto capability. Enabling IPsec capability
+for VFs is supported starting with ConnectX6dx devices and above. When a VF has
+IPsec capability enabled, any IPsec offloading is blocked on the PF.
+
+mlx5 driver support devlink port function attr mechanism to setup ipsec_crypto
+capability. (refer to Documentation/networking/devlink/devlink-port.rst)
+
 SF state setup
 --------------
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index b56b187a9097..7e94caca4888 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -69,7 +69,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE)   += en/tc/sample.o
 #
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
 				      ecpf.o rdma.o esw/legacy.o \
-				      esw/devlink_port.o esw/vporttbl.o esw/qos.o
+				      esw/devlink_port.o esw/vporttbl.o esw/qos.o esw/ipsec.o
 
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
 				      esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index 3c254a710006..12205e913417 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -92,6 +92,10 @@ static const struct devlink_port_ops mlx5_esw_pf_vf_dl_port_ops = {
 	.port_fn_roce_set = mlx5_devlink_port_fn_roce_set,
 	.port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get,
 	.port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set,
+#ifdef CONFIG_XFRM_OFFLOAD
+	.port_fn_ipsec_crypto_get = mlx5_devlink_port_fn_ipsec_crypto_get,
+	.port_fn_ipsec_crypto_set = mlx5_devlink_port_fn_ipsec_crypto_set,
+#endif /* CONFIG_XFRM_OFFLOAD */
 };
 
 static void mlx5_esw_offloads_sf_devlink_port_attrs_set(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c
new file mode 100644
index 000000000000..187fb5f2d0cb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+static int esw_ipsec_vf_query_generic(struct mlx5_core_dev *dev, u16 vport_num, bool *result)
+{
+	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	void *hca_cap, *query_cap;
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, vhca_resource_manager))
+		return -EOPNOTSUPP;
+
+	if (!mlx5_esw_ipsec_vf_offload_supported(dev)) {
+		*result = false;
+		return 0;
+	}
+
+	query_cap = kvzalloc(query_sz, GFP_KERNEL);
+	if (!query_cap)
+		return -ENOMEM;
+
+	err = mlx5_vport_get_other_func_general_cap(dev, vport_num, query_cap);
+	if (err)
+		goto free;
+
+	hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability);
+	*result = MLX5_GET(cmd_hca_cap, hca_cap, ipsec_offload);
+free:
+	kvfree(query_cap);
+	return err;
+}
+
+enum esw_vport_ipsec_offload {
+	MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD,
+};
+
+int mlx5_esw_ipsec_vf_offload_get(struct mlx5_core_dev *dev, struct mlx5_vport *vport)
+{
+	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	void *hca_cap, *query_cap;
+	bool ipsec_enabled;
+	int err;
+
+	/* Querying IPsec caps only makes sense when generic ipsec_offload
+	 * HCA cap is enabled
+	 */
+	err = esw_ipsec_vf_query_generic(dev, vport->vport, &ipsec_enabled);
+	if (err)
+		return err;
+
+	if (!ipsec_enabled) {
+		vport->info.ipsec_crypto_enabled = false;
+		return 0;
+	}
+
+	query_cap = kvzalloc(query_sz, GFP_KERNEL);
+	if (!query_cap)
+		return -ENOMEM;
+
+	err = mlx5_vport_get_other_func_cap(dev, vport->vport, query_cap, MLX5_CAP_IPSEC);
+	if (err)
+		goto free;
+
+	hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability);
+	vport->info.ipsec_crypto_enabled =
+		MLX5_GET(ipsec_cap, hca_cap, ipsec_crypto_offload);
+free:
+	kvfree(query_cap);
+	return err;
+}
+
+static int esw_ipsec_vf_set_generic(struct mlx5_core_dev *dev, u16 vport_num, bool ipsec_ofld)
+{
+	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+	void *hca_cap, *query_cap, *cap;
+	int ret;
+
+	if (!MLX5_CAP_GEN(dev, vhca_resource_manager))
+		return -EOPNOTSUPP;
+
+	query_cap = kvzalloc(query_sz, GFP_KERNEL);
+	hca_cap = kvzalloc(set_sz, GFP_KERNEL);
+	if (!hca_cap || !query_cap) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	ret = mlx5_vport_get_other_func_general_cap(dev, vport_num, query_cap);
+	if (ret)
+		goto free;
+
+	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
+	memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
+	       MLX5_UN_SZ_BYTES(hca_cap_union));
+	MLX5_SET(cmd_hca_cap, cap, ipsec_offload, ipsec_ofld);
+
+	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+	MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
+	MLX5_SET(set_hca_cap_in, hca_cap, function_id, vport_num);
+
+	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
+		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
+	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
+free:
+	kvfree(hca_cap);
+	kvfree(query_cap);
+	return ret;
+}
+
+static int esw_ipsec_vf_set_bytype(struct mlx5_core_dev *dev, struct mlx5_vport *vport,
+				   bool enable, enum esw_vport_ipsec_offload type)
+{
+	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+	void *hca_cap, *query_cap, *cap;
+	int ret;
+
+	if (!MLX5_CAP_GEN(dev, vhca_resource_manager))
+		return -EOPNOTSUPP;
+
+	query_cap = kvzalloc(query_sz, GFP_KERNEL);
+	hca_cap = kvzalloc(set_sz, GFP_KERNEL);
+	if (!hca_cap || !query_cap) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	ret = mlx5_vport_get_other_func_cap(dev, vport->vport, query_cap, MLX5_CAP_IPSEC);
+	if (ret)
+		goto free;
+
+	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
+	memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
+	       MLX5_UN_SZ_BYTES(hca_cap_union));
+
+	switch (type) {
+	case MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD:
+		MLX5_SET(ipsec_cap, cap, ipsec_crypto_offload, enable);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		goto free;
+	}
+
+	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+	MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
+	MLX5_SET(set_hca_cap_in, hca_cap, function_id, vport->vport);
+
+	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
+		 MLX5_SET_HCA_CAP_OP_MOD_IPSEC << 1);
+	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
+free:
+	kvfree(hca_cap);
+	kvfree(query_cap);
+	return ret;
+}
+
+static int esw_ipsec_vf_crypto_aux_caps_set(struct mlx5_core_dev *dev, u16 vport_num, bool enable)
+{
+	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	void *hca_cap, *query_cap, *cap;
+	int ret;
+
+	query_cap = kvzalloc(query_sz, GFP_KERNEL);
+	hca_cap = kvzalloc(set_sz, GFP_KERNEL);
+	if (!hca_cap || !query_cap) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	ret = mlx5_vport_get_other_func_cap(dev, vport_num, query_cap, MLX5_CAP_ETHERNET_OFFLOADS);
+	if (ret)
+		goto free;
+
+	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
+	memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
+	       MLX5_UN_SZ_BYTES(hca_cap_union));
+	MLX5_SET(per_protocol_networking_offload_caps, cap, insert_trailer, enable);
+	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+	MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
+	MLX5_SET(set_hca_cap_in, hca_cap, function_id, vport_num);
+	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
+		 MLX5_SET_HCA_CAP_OP_MOD_ETHERNET_OFFLOADS << 1);
+	ret = mlx5_cmd_exec_in(esw->dev, set_hca_cap, hca_cap);
+free:
+	kvfree(hca_cap);
+	kvfree(query_cap);
+	return ret;
+}
+
+static int esw_ipsec_vf_offload_set_bytype(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+					   bool enable, enum esw_vport_ipsec_offload type)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	int err;
+
+	if (vport->vport == MLX5_VPORT_PF)
+		return -EOPNOTSUPP;
+
+	if (type == MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD) {
+		err = esw_ipsec_vf_crypto_aux_caps_set(dev, vport->vport, enable);
+		if (err)
+			return err;
+	}
+
+	if (enable) {
+		err = esw_ipsec_vf_set_generic(dev, vport->vport, enable);
+		if (err)
+			return err;
+		err = esw_ipsec_vf_set_bytype(dev, vport, enable, type);
+		if (err)
+			return err;
+	} else {
+		err = esw_ipsec_vf_set_bytype(dev, vport, enable, type);
+		if (err)
+			return err;
+		err = esw_ipsec_vf_set_generic(dev, vport->vport, enable);
+		if (err)
+			return err;
+	}
+
+	switch (type) {
+	case MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD:
+		vport->info.ipsec_crypto_enabled = enable;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int esw_ipsec_offload_supported(struct mlx5_core_dev *dev, u16 vport_num)
+{
+	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	void *hca_cap, *query_cap;
+	int ret;
+
+	query_cap = kvzalloc(query_sz, GFP_KERNEL);
+	if (!query_cap)
+		return -ENOMEM;
+
+	ret = mlx5_vport_get_other_func_cap(dev, vport_num, query_cap, MLX5_CAP_GENERAL);
+	if (ret)
+		goto free;
+
+	hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability);
+	if (!MLX5_GET(cmd_hca_cap, hca_cap, log_max_dek))
+		ret = -EOPNOTSUPP;
+free:
+	kvfree(query_cap);
+	return ret;
+}
+
+bool mlx5_esw_ipsec_vf_offload_supported(struct mlx5_core_dev *dev)
+{
+	/* Old firmware doesn't support ipsec_offload capability for VFs. This
+	 * can be detected by checking reformat_add_esp_trasport capability -
+	 * when this cap isn't supported it means firmware cannot be trusted
+	 * about what it reports for ipsec_offload cap.
+	 */
+	return MLX5_CAP_FLOWTABLE_NIC_TX(dev, reformat_add_esp_trasport);
+}
+
+int mlx5_esw_ipsec_vf_crypto_offload_supported(struct mlx5_core_dev *dev,
+					       u16 vport_num)
+{
+	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	void *hca_cap, *query_cap;
+	int err;
+
+	if (!mlx5_esw_ipsec_vf_offload_supported(dev))
+		return -EOPNOTSUPP;
+
+	err = esw_ipsec_offload_supported(dev, vport_num);
+	if (err)
+		return err;
+
+	query_cap = kvzalloc(query_sz, GFP_KERNEL);
+	if (!query_cap)
+		return -ENOMEM;
+
+	err = mlx5_vport_get_other_func_cap(dev, vport_num, query_cap, MLX5_CAP_ETHERNET_OFFLOADS);
+	if (err)
+		goto free;
+
+	hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability);
+	if (!MLX5_GET(per_protocol_networking_offload_caps, hca_cap, swp))
+		goto free;
+
+free:
+	kvfree(query_cap);
+	return err;
+}
+
+int mlx5_esw_ipsec_vf_crypto_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+					 bool enable)
+{
+	return esw_ipsec_vf_offload_set_bytype(esw, vport, enable,
+					       MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 38a149dd479f..79ae6ad94f55 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -832,6 +832,9 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport *
 
 	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
 	vport->info.mig_enabled = MLX5_GET(cmd_hca_cap_2, hca_caps, migratable);
+
+	err = mlx5_esw_ipsec_vf_offload_get(esw->dev, vport);
+
 out_free:
 	kfree(query_ctx);
 	return err;
@@ -914,6 +917,8 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
 	/* Sync with current vport context */
 	vport->enabled_events = enabled_events;
 	vport->enabled = true;
+	if (vport->vport != MLX5_VPORT_PF && vport->info.ipsec_crypto_enabled)
+		esw->enabled_ipsec_vf_count++;
 
 	/* Esw manager is trusted by default. Host PF (vport 0) is trusted as well
 	 * in smartNIC as it's a vport group manager.
@@ -970,6 +975,9 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
 	    MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
 		mlx5_esw_vport_vhca_id_clear(esw, vport_num);
 
+	if (vport->vport != MLX5_VPORT_PF && vport->info.ipsec_crypto_enabled)
+		esw->enabled_ipsec_vf_count--;
+
 	/* We don't assume VFs will cleanup after themselves.
 	 * Calling vport change handler while vport is disabled will cleanup
 	 * the vport resources.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 0e51fd066fcc..cde5712aa697 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -163,6 +163,7 @@ struct mlx5_vport_info {
 	u8                      trusted: 1;
 	u8                      roce_enabled: 1;
 	u8                      mig_enabled: 1;
+	u8                      ipsec_crypto_enabled: 1;
 };
 
 /* Vport context events */
@@ -559,6 +560,12 @@ int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enab
 					struct netlink_ext_ack *extack);
 int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable,
 					struct netlink_ext_ack *extack);
+#ifdef CONFIG_XFRM_OFFLOAD
+int mlx5_devlink_port_fn_ipsec_crypto_get(struct devlink_port *port, bool *is_enabled,
+					  struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_ipsec_crypto_set(struct devlink_port *port, bool enable,
+					  struct netlink_ext_ack *extack);
+#endif /* CONFIG_XFRM_OFFLOAD */
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
 int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
@@ -858,6 +865,13 @@ int mlx5_eswitch_restore_ipsec_rule(struct mlx5_eswitch *esw, struct mlx5_flow_h
 				    struct mlx5_esw_flow_attr *esw_attr, int attr_idx);
 bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev);
 void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev);
+bool mlx5_esw_ipsec_vf_offload_supported(struct mlx5_core_dev *dev);
+int mlx5_esw_ipsec_vf_offload_get(struct mlx5_core_dev *dev,
+				  struct mlx5_vport *vport);
+int mlx5_esw_ipsec_vf_crypto_offload_supported(struct mlx5_core_dev *dev,
+					       u16 vport_num);
+int mlx5_esw_ipsec_vf_crypto_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+					 bool enable);
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index d4697dadd27d..f37d30a096c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -4367,3 +4367,88 @@ mlx5_eswitch_restore_ipsec_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handl
 
 	return mlx5_modify_rule_destination(rule, &new_dest, &old_dest);
 }
+
+#ifdef CONFIG_XFRM_OFFLOAD
+int mlx5_devlink_port_fn_ipsec_crypto_get(struct devlink_port *port, bool *is_enabled,
+					  struct netlink_ext_ack *extack)
+{
+	struct mlx5_eswitch *esw;
+	struct mlx5_vport *vport;
+	int err = 0;
+
+	esw = mlx5_devlink_eswitch_get(port->devlink);
+	if (IS_ERR(esw))
+		return PTR_ERR(esw);
+
+	if (!mlx5_esw_ipsec_vf_offload_supported(esw->dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPSec crypto");
+		return -EOPNOTSUPP;
+	}
+
+	vport = mlx5_devlink_port_vport_get(port);
+
+	mutex_lock(&esw->state_lock);
+	if (!vport->enabled) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
+
+	*is_enabled = vport->info.ipsec_crypto_enabled;
+unlock:
+	mutex_unlock(&esw->state_lock);
+	return err;
+}
+
+int mlx5_devlink_port_fn_ipsec_crypto_set(struct devlink_port *port, bool enable,
+					  struct netlink_ext_ack *extack)
+{
+	struct mlx5_eswitch *esw;
+	struct mlx5_vport *vport;
+	u16 vport_num;
+	int err;
+
+	esw = mlx5_devlink_eswitch_get(port->devlink);
+	if (IS_ERR(esw))
+		return PTR_ERR(esw);
+
+	vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
+	err = mlx5_esw_ipsec_vf_crypto_offload_supported(esw->dev, vport_num);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Device doesn't support IPsec crypto");
+		return err;
+	}
+
+	vport = mlx5_devlink_port_vport_get(port);
+
+	mutex_lock(&esw->state_lock);
+	if (!vport->enabled) {
+		err = -EOPNOTSUPP;
+		NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
+		goto unlock;
+	}
+
+	if (vport->info.ipsec_crypto_enabled == enable)
+		goto unlock;
+
+	if (!esw->enabled_ipsec_vf_count && esw->dev->num_ipsec_offloads) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
+	err = mlx5_esw_ipsec_vf_crypto_offload_set(esw, vport, enable);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to set IPsec crypto");
+		goto unlock;
+	}
+
+	vport->info.ipsec_crypto_enabled = enable;
+	if (enable)
+		esw->enabled_ipsec_vf_count++;
+	else
+		esw->enabled_ipsec_vf_count--;
+unlock:
+	mutex_unlock(&esw->state_lock);
+	return err;
+}
+#endif /* CONFIG_XFRM_OFFLOAD */

From b691b1116e820450de8a5d50eb4ce546a7de93dc Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Thu, 24 Aug 2023 23:28:36 -0700
Subject: [PATCH 1949/2047] net/mlx5: Implement devlink port function cmds to
 control ipsec_packet

Implement devlink port function commands to enable / disable IPsec
packet offloads. This is used to control the IPsec capability of the
device.

When ipsec_offload is enabled for a VF, it prevents adding IPsec packet
offloads on the PF, because the two cannot be active simultaneously due
to HW constraints. Conversely, if there are any active IPsec packet
offloads on the PF, it's not allowed to enable ipsec_packet on a VF,
until PF IPsec offloads are cleared.

Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20230825062836.103744-9-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../ethernet/mellanox/mlx5/switchdev.rst      | 10 +++
 .../mellanox/mlx5/core/esw/devlink_port.c     |  2 +
 .../ethernet/mellanox/mlx5/core/esw/ipsec.c   | 62 +++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/eswitch.c |  7 +-
 .../net/ethernet/mellanox/mlx5/core/eswitch.h | 12 +++
 .../mellanox/mlx5/core/eswitch_offloads.c     | 84 +++++++++++++++++++
 6 files changed, 173 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst
index de51e55dcfe3..b617e93d7c2c 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst
@@ -200,6 +200,16 @@ IPsec capability enabled, any IPsec offloading is blocked on the PF.
 mlx5 driver support devlink port function attr mechanism to setup ipsec_crypto
 capability. (refer to Documentation/networking/devlink/devlink-port.rst)
 
+IPsec packet capability setup
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+User who wants mlx5 PCI VFs to be able to perform IPsec packet offloading need
+to explicitly enable the VF ipsec_packet capability. Enabling IPsec capability
+for VFs is supported starting with ConnectX6dx devices and above. When a VF has
+IPsec capability enabled, any IPsec offloading is blocked on the PF.
+
+mlx5 driver support devlink port function attr mechanism to setup ipsec_packet
+capability. (refer to Documentation/networking/devlink/devlink-port.rst)
+
 SF state setup
 --------------
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index 12205e913417..d8e739cbcbce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -95,6 +95,8 @@ static const struct devlink_port_ops mlx5_esw_pf_vf_dl_port_ops = {
 #ifdef CONFIG_XFRM_OFFLOAD
 	.port_fn_ipsec_crypto_get = mlx5_devlink_port_fn_ipsec_crypto_get,
 	.port_fn_ipsec_crypto_set = mlx5_devlink_port_fn_ipsec_crypto_set,
+	.port_fn_ipsec_packet_get = mlx5_devlink_port_fn_ipsec_packet_get,
+	.port_fn_ipsec_packet_set = mlx5_devlink_port_fn_ipsec_packet_set,
 #endif /* CONFIG_XFRM_OFFLOAD */
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c
index 187fb5f2d0cb..da10e04777cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec.c
@@ -37,6 +37,7 @@ free:
 
 enum esw_vport_ipsec_offload {
 	MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD,
+	MLX5_ESW_VPORT_IPSEC_PACKET_OFFLOAD,
 };
 
 int mlx5_esw_ipsec_vf_offload_get(struct mlx5_core_dev *dev, struct mlx5_vport *vport)
@@ -55,6 +56,7 @@ int mlx5_esw_ipsec_vf_offload_get(struct mlx5_core_dev *dev, struct mlx5_vport *
 
 	if (!ipsec_enabled) {
 		vport->info.ipsec_crypto_enabled = false;
+		vport->info.ipsec_packet_enabled = false;
 		return 0;
 	}
 
@@ -69,6 +71,8 @@ int mlx5_esw_ipsec_vf_offload_get(struct mlx5_core_dev *dev, struct mlx5_vport *
 	hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability);
 	vport->info.ipsec_crypto_enabled =
 		MLX5_GET(ipsec_cap, hca_cap, ipsec_crypto_offload);
+	vport->info.ipsec_packet_enabled =
+		MLX5_GET(ipsec_cap, hca_cap, ipsec_full_offload);
 free:
 	kvfree(query_cap);
 	return err;
@@ -143,6 +147,9 @@ static int esw_ipsec_vf_set_bytype(struct mlx5_core_dev *dev, struct mlx5_vport
 	case MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD:
 		MLX5_SET(ipsec_cap, cap, ipsec_crypto_offload, enable);
 		break;
+	case MLX5_ESW_VPORT_IPSEC_PACKET_OFFLOAD:
+		MLX5_SET(ipsec_cap, cap, ipsec_full_offload, enable);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		goto free;
@@ -222,15 +229,28 @@ static int esw_ipsec_vf_offload_set_bytype(struct mlx5_eswitch *esw, struct mlx5
 		err = esw_ipsec_vf_set_bytype(dev, vport, enable, type);
 		if (err)
 			return err;
-		err = esw_ipsec_vf_set_generic(dev, vport->vport, enable);
+		err = mlx5_esw_ipsec_vf_offload_get(dev, vport);
 		if (err)
 			return err;
+
+		/* The generic ipsec_offload cap can be disabled only if both
+		 * ipsec_crypto_offload and ipsec_full_offload aren't enabled.
+		 */
+		if (!vport->info.ipsec_crypto_enabled &&
+		    !vport->info.ipsec_packet_enabled) {
+			err = esw_ipsec_vf_set_generic(dev, vport->vport, enable);
+			if (err)
+				return err;
+		}
 	}
 
 	switch (type) {
 	case MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD:
 		vport->info.ipsec_crypto_enabled = enable;
 		break;
+	case MLX5_ESW_VPORT_IPSEC_PACKET_OFFLOAD:
+		vport->info.ipsec_packet_enabled = enable;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -301,9 +321,49 @@ free:
 	return err;
 }
 
+int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev,
+					       u16 vport_num)
+{
+	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	void *hca_cap, *query_cap;
+	int ret;
+
+	if (!mlx5_esw_ipsec_vf_offload_supported(dev))
+		return -EOPNOTSUPP;
+
+	ret = esw_ipsec_offload_supported(dev, vport_num);
+	if (ret)
+		return ret;
+
+	query_cap = kvzalloc(query_sz, GFP_KERNEL);
+	if (!query_cap)
+		return -ENOMEM;
+
+	ret = mlx5_vport_get_other_func_cap(dev, vport_num, query_cap, MLX5_CAP_FLOW_TABLE);
+	if (ret)
+		goto out;
+
+	hca_cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability);
+	if (!MLX5_GET(flow_table_nic_cap, hca_cap, flow_table_properties_nic_receive.decap)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+out:
+	kvfree(query_cap);
+	return ret;
+}
+
 int mlx5_esw_ipsec_vf_crypto_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
 					 bool enable)
 {
 	return esw_ipsec_vf_offload_set_bytype(esw, vport, enable,
 					       MLX5_ESW_VPORT_IPSEC_CRYPTO_OFFLOAD);
 }
+
+int mlx5_esw_ipsec_vf_packet_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+					 bool enable)
+{
+	return esw_ipsec_vf_offload_set_bytype(esw, vport, enable,
+					       MLX5_ESW_VPORT_IPSEC_PACKET_OFFLOAD);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 79ae6ad94f55..6cd7d6497e10 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -834,7 +834,6 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport *
 	vport->info.mig_enabled = MLX5_GET(cmd_hca_cap_2, hca_caps, migratable);
 
 	err = mlx5_esw_ipsec_vf_offload_get(esw->dev, vport);
-
 out_free:
 	kfree(query_ctx);
 	return err;
@@ -917,7 +916,8 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
 	/* Sync with current vport context */
 	vport->enabled_events = enabled_events;
 	vport->enabled = true;
-	if (vport->vport != MLX5_VPORT_PF && vport->info.ipsec_crypto_enabled)
+	if (vport->vport != MLX5_VPORT_PF &&
+	    (vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled))
 		esw->enabled_ipsec_vf_count++;
 
 	/* Esw manager is trusted by default. Host PF (vport 0) is trusted as well
@@ -975,7 +975,8 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
 	    MLX5_CAP_GEN(esw->dev, vhca_resource_manager))
 		mlx5_esw_vport_vhca_id_clear(esw, vport_num);
 
-	if (vport->vport != MLX5_VPORT_PF && vport->info.ipsec_crypto_enabled)
+	if (vport->vport != MLX5_VPORT_PF &&
+	    (vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled))
 		esw->enabled_ipsec_vf_count--;
 
 	/* We don't assume VFs will cleanup after themselves.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index cde5712aa697..37ab66e7b403 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -164,6 +164,7 @@ struct mlx5_vport_info {
 	u8                      roce_enabled: 1;
 	u8                      mig_enabled: 1;
 	u8                      ipsec_crypto_enabled: 1;
+	u8                      ipsec_packet_enabled: 1;
 };
 
 /* Vport context events */
@@ -565,6 +566,10 @@ int mlx5_devlink_port_fn_ipsec_crypto_get(struct devlink_port *port, bool *is_en
 					  struct netlink_ext_ack *extack);
 int mlx5_devlink_port_fn_ipsec_crypto_set(struct devlink_port *port, bool enable,
 					  struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_ipsec_packet_get(struct devlink_port *port, bool *is_enabled,
+					  struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, bool enable,
+					  struct netlink_ext_ack *extack);
 #endif /* CONFIG_XFRM_OFFLOAD */
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
@@ -872,6 +877,13 @@ int mlx5_esw_ipsec_vf_crypto_offload_supported(struct mlx5_core_dev *dev,
 					       u16 vport_num);
 int mlx5_esw_ipsec_vf_crypto_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
 					 bool enable);
+int mlx5_esw_ipsec_vf_packet_offload_set(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+					 bool enable);
+int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev,
+					       u16 vport_num);
+void mlx5_esw_vport_ipsec_offload_enable(struct mlx5_eswitch *esw);
+void mlx5_esw_vport_ipsec_offload_disable(struct mlx5_eswitch *esw);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f37d30a096c0..752fb0dfb111 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -4451,4 +4451,88 @@ unlock:
 	mutex_unlock(&esw->state_lock);
 	return err;
 }
+
+int mlx5_devlink_port_fn_ipsec_packet_get(struct devlink_port *port, bool *is_enabled,
+					  struct netlink_ext_ack *extack)
+{
+	struct mlx5_eswitch *esw;
+	struct mlx5_vport *vport;
+	int err = 0;
+
+	esw = mlx5_devlink_eswitch_get(port->devlink);
+	if (IS_ERR(esw))
+		return PTR_ERR(esw);
+
+	if (!mlx5_esw_ipsec_vf_offload_supported(esw->dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet");
+		return -EOPNOTSUPP;
+	}
+
+	vport = mlx5_devlink_port_vport_get(port);
+
+	mutex_lock(&esw->state_lock);
+	if (!vport->enabled) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
+
+	*is_enabled = vport->info.ipsec_packet_enabled;
+unlock:
+	mutex_unlock(&esw->state_lock);
+	return err;
+}
+
+int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port,
+					  bool enable,
+					  struct netlink_ext_ack *extack)
+{
+	struct mlx5_eswitch *esw;
+	struct mlx5_vport *vport;
+	u16 vport_num;
+	int err;
+
+	esw = mlx5_devlink_eswitch_get(port->devlink);
+	if (IS_ERR(esw))
+		return PTR_ERR(esw);
+
+	vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index);
+	err = mlx5_esw_ipsec_vf_packet_offload_supported(esw->dev, vport_num);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Device doesn't support IPsec packet mode");
+		return err;
+	}
+
+	vport = mlx5_devlink_port_vport_get(port);
+	mutex_lock(&esw->state_lock);
+	if (!vport->enabled) {
+		err = -EOPNOTSUPP;
+		NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
+		goto unlock;
+	}
+
+	if (vport->info.ipsec_packet_enabled == enable)
+		goto unlock;
+
+	if (!esw->enabled_ipsec_vf_count && esw->dev->num_ipsec_offloads) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
+	err = mlx5_esw_ipsec_vf_packet_offload_set(esw, vport, enable);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Failed to set IPsec packet mode");
+		goto unlock;
+	}
+
+	vport->info.ipsec_packet_enabled = enable;
+	if (enable)
+		esw->enabled_ipsec_vf_count++;
+	else
+		esw->enabled_ipsec_vf_count--;
+unlock:
+	mutex_unlock(&esw->state_lock);
+	return err;
+}
 #endif /* CONFIG_XFRM_OFFLOAD */

From c4e1ab07b5572b9c732fcf7ac86e168ee2835adf Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 25 Aug 2023 13:27:44 +0100
Subject: [PATCH 1950/2047] doc/netlink: Fix typo in genetlink-* schemas

Fix typo verion -> version in genetlink-c and genetlink-legacy.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20230825122756.7603-2-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/genetlink-c.yaml      | 2 +-
 Documentation/netlink/genetlink-legacy.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index 4c1f8c22627b..9806c44f604c 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -41,7 +41,7 @@ properties:
     description: Name of the define for the family name.
     type: string
   c-version-name:
-    description: Name of the define for the verion of the family.
+    description: Name of the define for the version of the family.
     type: string
   max-by-define:
     description: Makes the number of attributes and commands be specified by a define, not an enum value.
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index 196076dfa309..12a0a045605d 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -41,7 +41,7 @@ properties:
     description: Name of the define for the family name.
     type: string
   c-version-name:
-    description: Name of the define for the verion of the family.
+    description: Name of the define for the version of the family.
     type: string
   max-by-define:
     description: Makes the number of attributes and commands be specified by a define, not an enum value.

From ed68c58c0eb4867df2e995d872aec6342171619c Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 25 Aug 2023 13:27:45 +0100
Subject: [PATCH 1951/2047] doc/netlink: Add a schema for netlink-raw families

This schema is largely a copy of the genetlink-legacy schema with the
following modifications:

 - change the schema id to netlink-raw
 - add a top-level protonum property, e.g. 0 (for NETLINK_ROUTE)
 - change the protocol enumeration to netlink-raw, removing the
   genetlink options.
 - replace doc references to generic netlink with raw netlink
 - add a value property to mcast-group definitions

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20230825122756.7603-3-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/netlink-raw.yaml | 410 +++++++++++++++++++++++++
 1 file changed, 410 insertions(+)
 create mode 100644 Documentation/netlink/netlink-raw.yaml

diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml
new file mode 100644
index 000000000000..896797876414
--- /dev/null
+++ b/Documentation/netlink/netlink-raw.yaml
@@ -0,0 +1,410 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+%YAML 1.2
+---
+$id: http://kernel.org/schemas/netlink/netlink-raw.yaml#
+$schema: https://json-schema.org/draft-07/schema
+
+# Common defines
+$defs:
+  uint:
+    type: integer
+    minimum: 0
+  len-or-define:
+    type: [ string, integer ]
+    pattern: ^[0-9A-Za-z_]+( - 1)?$
+    minimum: 0
+
+# Schema for specs
+title: Protocol
+description: Specification of a raw netlink protocol
+type: object
+required: [ name, doc, attribute-sets, operations ]
+additionalProperties: False
+properties:
+  name:
+    description: Name of the netlink family.
+    type: string
+  doc:
+    type: string
+  protocol:
+    description: Schema compatibility level.
+    enum: [ netlink-raw ] # Trim
+  # Start netlink-raw
+  protonum:
+    description: Protocol number to use for netlink-raw
+    type: integer
+  # End netlink-raw
+  uapi-header:
+    description: Path to the uAPI header, default is linux/${family-name}.h
+    type: string
+  # Start genetlink-c
+  c-family-name:
+    description: Name of the define for the family name.
+    type: string
+  c-version-name:
+    description: Name of the define for the version of the family.
+    type: string
+  max-by-define:
+    description: Makes the number of attributes and commands be specified by a define, not an enum value.
+    type: boolean
+  # End genetlink-c
+  # Start genetlink-legacy
+  kernel-policy:
+    description: |
+      Defines if the input policy in the kernel is global, per-operation, or split per operation type.
+      Default is split.
+    enum: [ split, per-op, global ]
+  # End genetlink-legacy
+
+  definitions:
+    description: List of type and constant definitions (enums, flags, defines).
+    type: array
+    items:
+      type: object
+      required: [ type, name ]
+      additionalProperties: False
+      properties:
+        name:
+          type: string
+        header:
+          description: For C-compatible languages, header which already defines this value.
+          type: string
+        type:
+          enum: [ const, enum, flags, struct ] # Trim
+        doc:
+          type: string
+        # For const
+        value:
+          description: For const - the value.
+          type: [ string, integer ]
+        # For enum and flags
+        value-start:
+          description: For enum or flags the literal initializer for the first value.
+          type: [ string, integer ]
+        entries:
+          description: For enum or flags array of values.
+          type: array
+          items:
+            oneOf:
+              - type: string
+              - type: object
+                required: [ name ]
+                additionalProperties: False
+                properties:
+                  name:
+                    type: string
+                  value:
+                    type: integer
+                  doc:
+                    type: string
+        render-max:
+          description: Render the max members for this enum.
+          type: boolean
+        # Start genetlink-c
+        enum-name:
+          description: Name for enum, if empty no name will be used.
+          type: [ string, "null" ]
+        name-prefix:
+          description: For enum the prefix of the values, optional.
+          type: string
+        # End genetlink-c
+        # Start genetlink-legacy
+        members:
+          description: List of struct members. Only scalars and strings members allowed.
+          type: array
+          items:
+            type: object
+            required: [ name, type ]
+            additionalProperties: False
+            properties:
+              name:
+                type: string
+              type:
+                description: The netlink attribute type
+                enum: [ u8, u16, u32, u64, s8, s16, s32, s64, string, binary ]
+              len:
+                $ref: '#/$defs/len-or-define'
+              byte-order:
+                enum: [ little-endian, big-endian ]
+              doc:
+                description: Documentation for the struct member attribute.
+                type: string
+              enum:
+                description: Name of the enum type used for the attribute.
+                type: string
+              enum-as-flags:
+                description: |
+                  Treat the enum as flags. In most cases enum is either used as flags or as values.
+                  Sometimes, however, both forms are necessary, in which case header contains the enum
+                  form while specific attributes may request to convert the values into a bitfield.
+                type: boolean
+              display-hint: &display-hint
+                description: |
+                  Optional format indicator that is intended only for choosing
+                  the right formatting mechanism when displaying values of this
+                  type.
+                enum: [ hex, mac, fddi, ipv4, ipv6, uuid ]
+        # End genetlink-legacy
+
+  attribute-sets:
+    description: Definition of attribute spaces for this family.
+    type: array
+    items:
+      description: Definition of a single attribute space.
+      type: object
+      required: [ name, attributes ]
+      additionalProperties: False
+      properties:
+        name:
+          description: |
+            Name used when referring to this space in other definitions, not used outside of the spec.
+          type: string
+        name-prefix:
+          description: |
+            Prefix for the C enum name of the attributes. Default family[name]-set[name]-a-
+          type: string
+        enum-name:
+          description: Name for the enum type of the attribute.
+          type: string
+        doc:
+          description: Documentation of the space.
+          type: string
+        subset-of:
+          description: |
+            Name of another space which this is a logical part of. Sub-spaces can be used to define
+            a limited group of attributes which are used in a nest.
+          type: string
+        # Start genetlink-c
+        attr-cnt-name:
+          description: The explicit name for constant holding the count of attributes (last attr + 1).
+          type: string
+        attr-max-name:
+          description: The explicit name for last member of attribute enum.
+          type: string
+        # End genetlink-c
+        attributes:
+          description: List of attributes in the space.
+          type: array
+          items:
+            type: object
+            required: [ name, type ]
+            additionalProperties: False
+            properties:
+              name:
+                type: string
+              type: &attr-type
+                description: The netlink attribute type
+                enum: [ unused, pad, flag, binary, u8, u16, u32, u64, s32, s64,
+                        string, nest, array-nest, nest-type-value ]
+              doc:
+                description: Documentation of the attribute.
+                type: string
+              value:
+                description: Value for the enum item representing this attribute in the uAPI.
+                $ref: '#/$defs/uint'
+              type-value:
+                description: Name of the value extracted from the type of a nest-type-value attribute.
+                type: array
+                items:
+                  type: string
+              byte-order:
+                enum: [ little-endian, big-endian ]
+              multi-attr:
+                type: boolean
+              nested-attributes:
+                description: Name of the space (sub-space) used inside the attribute.
+                type: string
+              enum:
+                description: Name of the enum type used for the attribute.
+                type: string
+              enum-as-flags:
+                description: |
+                  Treat the enum as flags. In most cases enum is either used as flags or as values.
+                  Sometimes, however, both forms are necessary, in which case header contains the enum
+                  form while specific attributes may request to convert the values into a bitfield.
+                type: boolean
+              checks:
+                description: Kernel input validation.
+                type: object
+                additionalProperties: False
+                properties:
+                  flags-mask:
+                    description: Name of the flags constant on which to base mask (unsigned scalar types only).
+                    type: string
+                  min:
+                    description: Min value for an integer attribute.
+                    type: integer
+                  min-len:
+                    description: Min length for a binary attribute.
+                    $ref: '#/$defs/len-or-define'
+                  max-len:
+                    description: Max length for a string or a binary attribute.
+                    $ref: '#/$defs/len-or-define'
+              sub-type: *attr-type
+              display-hint: *display-hint
+              # Start genetlink-c
+              name-prefix:
+                type: string
+              # End genetlink-c
+              # Start genetlink-legacy
+              struct:
+                description: Name of the struct type used for the attribute.
+                type: string
+              # End genetlink-legacy
+
+      # Make sure name-prefix does not appear in subsets (subsets inherit naming)
+      dependencies:
+        name-prefix:
+          not:
+            required: [ subset-of ]
+        subset-of:
+          not:
+            required: [ name-prefix ]
+
+  operations:
+    description: Operations supported by the protocol.
+    type: object
+    required: [ list ]
+    additionalProperties: False
+    properties:
+      enum-model:
+        description: |
+          The model of assigning values to the operations.
+          "unified" is the recommended model where all message types belong
+          to a single enum.
+          "directional" has the messages sent to the kernel and from the kernel
+          enumerated separately.
+        enum: [ unified, directional ] # Trim
+      name-prefix:
+        description: |
+          Prefix for the C enum name of the command. The name is formed by concatenating
+          the prefix with the upper case name of the command, with dashes replaced by underscores.
+        type: string
+      enum-name:
+        description: Name for the enum type with commands.
+        type: string
+      async-prefix:
+        description: Same as name-prefix but used to render notifications and events to separate enum.
+        type: string
+      async-enum:
+        description: Name for the enum type with notifications/events.
+        type: string
+      # Start genetlink-legacy
+      fixed-header: &fixed-header
+        description: |
+          Name of the structure defining the optional fixed-length protocol
+          header. This header is placed in a message after the netlink and
+          genetlink headers and before any attributes.
+        type: string
+      # End genetlink-legacy
+      list:
+        description: List of commands
+        type: array
+        items:
+          type: object
+          additionalProperties: False
+          required: [ name, doc ]
+          properties:
+            name:
+              description: Name of the operation, also defining its C enum value in uAPI.
+              type: string
+            doc:
+              description: Documentation for the command.
+              type: string
+            value:
+              description: Value for the enum in the uAPI.
+              $ref: '#/$defs/uint'
+            attribute-set:
+              description: |
+                Attribute space from which attributes directly in the requests and replies
+                to this command are defined.
+              type: string
+            flags: &cmd_flags
+              description: Command flags.
+              type: array
+              items:
+                enum: [ admin-perm ]
+            dont-validate:
+              description: Kernel attribute validation flags.
+              type: array
+              items:
+                enum: [ strict, dump ]
+            # Start genetlink-legacy
+            fixed-header: *fixed-header
+            # End genetlink-legacy
+            do: &subop-type
+              description: Main command handler.
+              type: object
+              additionalProperties: False
+              properties:
+                request: &subop-attr-list
+                  description: Definition of the request message for a given command.
+                  type: object
+                  additionalProperties: False
+                  properties:
+                    attributes:
+                      description: |
+                        Names of attributes from the attribute-set (not full attribute
+                        definitions, just names).
+                      type: array
+                      items:
+                        type: string
+                    # Start genetlink-legacy
+                    value:
+                      description: |
+                        ID of this message if value for request and response differ,
+                        i.e. requests and responses have different message enums.
+                      $ref: '#/$defs/uint'
+                    # End genetlink-legacy
+                reply: *subop-attr-list
+                pre:
+                  description: Hook for a function to run before the main callback (pre_doit or start).
+                  type: string
+                post:
+                  description: Hook for a function to run after the main callback (post_doit or done).
+                  type: string
+            dump: *subop-type
+            notify:
+              description: Name of the command sharing the reply type with this notification.
+              type: string
+            event:
+              type: object
+              additionalProperties: False
+              properties:
+                attributes:
+                  description: Explicit list of the attributes for the notification.
+                  type: array
+                  items:
+                    type: string
+            mcgrp:
+              description: Name of the multicast group generating given notification.
+              type: string
+  mcast-groups:
+    description: List of multicast groups.
+    type: object
+    required: [ list ]
+    additionalProperties: False
+    properties:
+      list:
+        description: List of groups.
+        type: array
+        items:
+          type: object
+          required: [ name ]
+          additionalProperties: False
+          properties:
+            name:
+              description: |
+                The name for the group, used to form the define and the value of the define.
+              type: string
+            # Start genetlink-c
+            c-define-name:
+              description: Override for the name of the define in C uAPI.
+              type: string
+            # End genetlink-c
+            flags: *cmd_flags
+            # Start netlink-raw
+            value:
+              description: Value of the netlink multicast group in the uAPI.
+              type: integer
+            # End netlink-raw

From 294f37fc87728230cf81f387aa6a0a8438066751 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 25 Aug 2023 13:27:46 +0100
Subject: [PATCH 1952/2047] doc/netlink: Update genetlink-legacy documentation

Add documentation for recently added genetlink-legacy schema attributes.
Remove statements about 'work in progress' and 'todo'.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20230825122756.7603-4-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/core-api/netlink.rst            |  9 ++++---
 .../netlink/genetlink-legacy.rst              | 26 ++++++++++++-------
 Documentation/userspace-api/netlink/specs.rst | 13 ++++++++++
 3 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/Documentation/core-api/netlink.rst b/Documentation/core-api/netlink.rst
index e4a938a05cc9..9f692b02bfe6 100644
--- a/Documentation/core-api/netlink.rst
+++ b/Documentation/core-api/netlink.rst
@@ -67,10 +67,11 @@ Globals
 kernel-policy
 ~~~~~~~~~~~~~
 
-Defines if the kernel validation policy is per operation (``per-op``)
-or for the entire family (``global``). New families should use ``per-op``
-(default) to be able to narrow down the attributes accepted by a specific
-command.
+Defines whether the kernel validation policy is ``global`` i.e. the same for all
+operations of the family, defined for each operation individually - ``per-op``,
+or separately for each operation and operation type (do vs dump) - ``split``.
+New families should use ``per-op`` (default) to be able to narrow down the
+attributes accepted by a specific command.
 
 checks
 ------
diff --git a/Documentation/userspace-api/netlink/genetlink-legacy.rst b/Documentation/userspace-api/netlink/genetlink-legacy.rst
index 802875a37a27..40b82ad5d54a 100644
--- a/Documentation/userspace-api/netlink/genetlink-legacy.rst
+++ b/Documentation/userspace-api/netlink/genetlink-legacy.rst
@@ -8,11 +8,8 @@ This document describes the many additional quirks and properties
 required to describe older Generic Netlink families which form
 the ``genetlink-legacy`` protocol level.
 
-The spec is a work in progress, some of the quirks are just documented
-for future reference.
-
-Specification (defined)
-=======================
+Specification
+=============
 
 Attribute type nests
 --------------------
@@ -156,16 +153,27 @@ it will be allocated 3 for the request (``a`` is the previous operation
 with a request section and the value of 2) and 8 for response (``c`` is
 the previous operation in the "from-kernel" direction).
 
-Other quirks (todo)
-===================
+Other quirks
+============
 
 Structures
 ----------
 
 Legacy families can define C structures both to be used as the contents of
 an attribute and as a fixed message header. Structures are defined in
-``definitions``  and referenced in operations or attributes. Note that
-structures defined in YAML are implicitly packed according to C
+``definitions``  and referenced in operations or attributes.
+
+members
+~~~~~~~
+
+ - ``name`` - The attribute name of the struct member
+ - ``type`` - One of the scalar types ``u8``, ``u16``, ``u32``, ``u64``, ``s8``,
+   ``s16``, ``s32``, ``s64``, ``string`` or ``binary``.
+ - ``byte-order`` - ``big-endian`` or ``little-endian``
+ - ``doc``, ``enum``, ``enum-as-flags``, ``display-hint`` - Same as for
+   :ref:`attribute definitions <attribute_properties>`
+
+Note that structures defined in YAML are implicitly packed according to C
 conventions. For example, the following struct is 4 bytes, not 6 bytes:
 
 .. code-block:: c
diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst
index 2e4acde890b7..cc4e2430997e 100644
--- a/Documentation/userspace-api/netlink/specs.rst
+++ b/Documentation/userspace-api/netlink/specs.rst
@@ -68,6 +68,10 @@ The following sections describe the properties of the most modern ``genetlink``
 schema. See the documentation of :doc:`genetlink-c <c-code-gen>`
 for information on how C names are derived from name properties.
 
+See also :ref:`Documentation/core-api/netlink.rst <kernel_netlink>` for
+information on the Netlink specification properties that are only relevant to
+the kernel space and not part of the user space API.
+
 genetlink
 =========
 
@@ -180,6 +184,8 @@ attributes
 
 List of attributes in the set.
 
+.. _attribute_properties:
+
 Attribute properties
 --------------------
 
@@ -264,6 +270,13 @@ a C array of u32 values can be specified with ``type: binary`` and
 ``sub-type: u32``. Binary types and legacy array formats are described in
 more detail in :doc:`genetlink-legacy`.
 
+display-hint
+~~~~~~~~~~~~
+
+Optional format indicator that is intended only for choosing the right
+formatting mechanism when displaying values of this type. Currently supported
+hints are ``hex``, ``mac``, ``fddi``, ``ipv4``, ``ipv6`` and ``uuid``.
+
 operations
 ----------
 

From 2db8abf0b455e7494bbdd695775bd43c9d4824a8 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 25 Aug 2023 13:27:47 +0100
Subject: [PATCH 1953/2047] doc/netlink: Document the netlink-raw schema
 extensions

Add a doc page for netlink-raw that describes the schema attributes
needed for netlink-raw.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20230825122756.7603-5-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/userspace-api/netlink/index.rst |  1 +
 .../userspace-api/netlink/netlink-raw.rst     | 58 +++++++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 Documentation/userspace-api/netlink/netlink-raw.rst

diff --git a/Documentation/userspace-api/netlink/index.rst b/Documentation/userspace-api/netlink/index.rst
index 26f3720cb3be..62725dafbbdb 100644
--- a/Documentation/userspace-api/netlink/index.rst
+++ b/Documentation/userspace-api/netlink/index.rst
@@ -14,5 +14,6 @@ Netlink documentation for users.
    specs
    c-code-gen
    genetlink-legacy
+   netlink-raw
 
 See also :ref:`Documentation/core-api/netlink.rst <kernel_netlink>`.
diff --git a/Documentation/userspace-api/netlink/netlink-raw.rst b/Documentation/userspace-api/netlink/netlink-raw.rst
new file mode 100644
index 000000000000..f07fb9b9c101
--- /dev/null
+++ b/Documentation/userspace-api/netlink/netlink-raw.rst
@@ -0,0 +1,58 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+
+======================================================
+Netlink specification support for raw Netlink families
+======================================================
+
+This document describes the additional properties required by raw Netlink
+families such as ``NETLINK_ROUTE`` which use the ``netlink-raw`` protocol
+specification.
+
+Specification
+=============
+
+The netlink-raw schema extends the :doc:`genetlink-legacy <genetlink-legacy>`
+schema with properties that are needed to specify the protocol numbers and
+multicast IDs used by raw netlink families. See :ref:`classic_netlink` for more
+information.
+
+Globals
+-------
+
+protonum
+~~~~~~~~
+
+The ``protonum`` property is used to specify the protocol number to use when
+opening a netlink socket.
+
+.. code-block:: yaml
+
+  # SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+  name: rt-addr
+  protocol: netlink-raw
+  protonum: 0             # part of the NETLINK_ROUTE protocol
+
+
+Multicast group properties
+--------------------------
+
+value
+~~~~~
+
+The ``value`` property is used to specify the group ID to use for multicast
+group registration.
+
+.. code-block:: yaml
+
+  mcast-groups:
+    list:
+      -
+        name: rtnlgrp-ipv4-ifaddr
+        value: 5
+      -
+        name: rtnlgrp-ipv6-ifaddr
+        value: 9
+      -
+        name: rtnlgrp-mctp-ifaddr
+        value: 34

From 88901b967958eaa70478db1c6ddc8cdea748b2e6 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 25 Aug 2023 13:27:48 +0100
Subject: [PATCH 1954/2047] tools/ynl: Add mcast-group schema parsing to ynl

Add a SpecMcastGroup class to the nlspec lib.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20230825122756.7603-6-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/nlspec.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index 0ff0d18666b2..37bcb4d8b37b 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -322,6 +322,26 @@ class SpecOperation(SpecElement):
             self.attr_set = self.family.attr_sets[attr_set_name]
 
 
+class SpecMcastGroup(SpecElement):
+    """Netlink Multicast Group
+
+    Information about a multicast group.
+
+    Value is only used for classic netlink families that use the
+    netlink-raw schema. Genetlink families use dynamic ID allocation
+    where the ids of multicast groups get resolved at runtime. Value
+    will be None for genetlink families.
+
+    Attributes:
+        name      name of the mulitcast group
+        value     integer id of this multicast group for netlink-raw or None
+        yaml      raw spec as loaded from the spec file
+    """
+    def __init__(self, family, yaml):
+        super().__init__(family, yaml)
+        self.value = self.yaml.get('value')
+
+
 class SpecFamily(SpecElement):
     """ Netlink Family Spec class.
 
@@ -343,6 +363,7 @@ class SpecFamily(SpecElement):
         ntfs       dict of all async events
         consts     dict of all constants/enums
         fixed_header  string, optional name of family default fixed header struct
+        mcast_groups  dict of all multicast groups (index by name)
     """
     def __init__(self, spec_path, schema_path=None, exclude_ops=None):
         with open(spec_path, "r") as stream:
@@ -384,6 +405,7 @@ class SpecFamily(SpecElement):
         self.ops = collections.OrderedDict()
         self.ntfs = collections.OrderedDict()
         self.consts = collections.OrderedDict()
+        self.mcast_groups = collections.OrderedDict()
 
         last_exception = None
         while len(self._resolution_list) > 0:
@@ -416,6 +438,9 @@ class SpecFamily(SpecElement):
     def new_operation(self, elem, req_val, rsp_val):
         return SpecOperation(self, elem, req_val, rsp_val)
 
+    def new_mcast_group(self, elem):
+        return SpecMcastGroup(self, elem)
+
     def add_unresolved(self, elem):
         self._resolution_list.append(elem)
 
@@ -512,3 +537,9 @@ class SpecFamily(SpecElement):
                 self.ops[op.name] = op
             elif op.is_async:
                 self.ntfs[op.name] = op
+
+        mcgs = self.yaml.get('mcast-groups')
+        if mcgs:
+            for elem in mcgs['list']:
+                mcg = self.new_mcast_group(elem)
+                self.mcast_groups[elem['name']] = mcg

From fb0a06d455d64443fefea6843ed6c5ae7ee0b395 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 25 Aug 2023 13:27:49 +0100
Subject: [PATCH 1955/2047] tools/net/ynl: Fix extack parsing with fixed header
 genlmsg

Move decode_fixed_header into YnlFamily and add a _fixed_header_size
method to allow extack decoding to skip the fixed header.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20230825122756.7603-7-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/ynl.py | 65 ++++++++++++++++++++++++----------------
 1 file changed, 40 insertions(+), 25 deletions(-)

diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index fa4f1c28efc5..db556dcb931e 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -293,7 +293,7 @@ def _genl_load_families():
 
                 gm = GenlMsg(nl_msg)
                 fam = dict()
-                for attr in gm.raw_attrs:
+                for attr in NlAttrs(gm.raw):
                     if attr.type == Netlink.CTRL_ATTR_FAMILY_ID:
                         fam['id'] = attr.as_scalar('u16')
                     elif attr.type == Netlink.CTRL_ATTR_FAMILY_NAME:
@@ -317,23 +317,10 @@ def _genl_load_families():
 
 
 class GenlMsg:
-    def __init__(self, nl_msg, fixed_header_members=[]):
+    def __init__(self, nl_msg):
         self.nl = nl_msg
-
-        self.hdr = nl_msg.raw[0:4]
-        offset = 4
-
-        self.genl_cmd, self.genl_version, _ = struct.unpack("BBH", self.hdr)
-
-        self.fixed_header_attrs = dict()
-        for m in fixed_header_members:
-            format = NlAttr.get_format(m.type, m.byte_order)
-            decoded = format.unpack_from(nl_msg.raw, offset)
-            offset += format.size
-            self.fixed_header_attrs[m.name] = decoded[0]
-
-        self.raw = nl_msg.raw[offset:]
-        self.raw_attrs = NlAttrs(self.raw)
+        self.genl_cmd, self.genl_version, _ = struct.unpack_from("BBH", nl_msg.raw, 0)
+        self.raw = nl_msg.raw[4:]
 
     def __repr__(self):
         msg = repr(self.nl)
@@ -514,17 +501,44 @@ class YnlFamily(SpecFamily):
 
         return None
 
-    def _decode_extack(self, request, attr_space, extack):
+    def _decode_extack(self, request, op, extack):
         if 'bad-attr-offs' not in extack:
             return
 
-        genl_req = GenlMsg(NlMsg(request, 0, attr_space=attr_space))
-        path = self._decode_extack_path(genl_req.raw_attrs, attr_space,
-                                        20, extack['bad-attr-offs'])
+        genl_req = GenlMsg(NlMsg(request, 0, op.attr_set))
+        fixed_header_size = self._fixed_header_size(op)
+        offset = 20 + fixed_header_size
+        path = self._decode_extack_path(NlAttrs(genl_req.raw[fixed_header_size:]),
+                                        op.attr_set, offset,
+                                        extack['bad-attr-offs'])
         if path:
             del extack['bad-attr-offs']
             extack['bad-attr'] = path
 
+    def _fixed_header_size(self, op):
+        if op.fixed_header:
+            fixed_header_members = self.consts[op.fixed_header].members
+            size = 0
+            for m in fixed_header_members:
+                format = NlAttr.get_format(m.type, m.byte_order)
+                size += format.size
+            return size
+        else:
+            return 0
+
+    def _decode_fixed_header(self, msg, name):
+        fixed_header_members = self.consts[name].members
+        fixed_header_attrs = dict()
+        offset = 0
+        for m in fixed_header_members:
+            format = NlAttr.get_format(m.type, m.byte_order)
+            [ value ] = format.unpack_from(msg.raw, offset)
+            offset += format.size
+            if m.enum:
+                value = self._decode_enum(value, m)
+            fixed_header_attrs[m.name] = value
+        return fixed_header_attrs
+
     def handle_ntf(self, nl_msg, genl_msg):
         msg = dict()
         if self.include_raw:
@@ -599,7 +613,7 @@ class YnlFamily(SpecFamily):
             nms = NlMsgs(reply, attr_space=op.attr_set)
             for nl_msg in nms:
                 if nl_msg.extack:
-                    self._decode_extack(msg, op.attr_set, nl_msg.extack)
+                    self._decode_extack(msg, op, nl_msg.extack)
 
                 if nl_msg.error:
                     raise NlError(nl_msg)
@@ -610,7 +624,7 @@ class YnlFamily(SpecFamily):
                     done = True
                     break
 
-                gm = GenlMsg(nl_msg, fixed_header_members)
+                gm = GenlMsg(nl_msg)
                 # Check if this is a reply to our request
                 if nl_msg.nl_seq != req_seq or gm.genl_cmd != op.rsp_value:
                     if gm.genl_cmd in self.async_msg_ids:
@@ -620,8 +634,9 @@ class YnlFamily(SpecFamily):
                         print('Unexpected message: ' + repr(gm))
                         continue
 
-                rsp_msg = self._decode(gm.raw_attrs, op.attr_set.name)
-                rsp_msg.update(gm.fixed_header_attrs)
+                rsp_msg = self._decode(NlAttrs(gm.raw), op.attr_set.name)
+                if op.fixed_header:
+                    rsp_msg.update(self._decode_fixed_header(gm, op.fixed_header))
                 rsp.append(rsp_msg)
 
         if not rsp:

From e46dd903efe367214802b8d1cac01bdc43708339 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 25 Aug 2023 13:27:50 +0100
Subject: [PATCH 1956/2047] tools/net/ynl: Add support for netlink-raw families

Refactor the ynl code to encapsulate protocol specifics into
NetlinkProtocol and GenlProtocol.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20230825122756.7603-8-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/ynl.py | 124 ++++++++++++++++++++++++++++-----------
 1 file changed, 91 insertions(+), 33 deletions(-)

diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index db556dcb931e..43022dec1076 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -25,6 +25,7 @@ class Netlink:
     NETLINK_ADD_MEMBERSHIP = 1
     NETLINK_CAP_ACK = 10
     NETLINK_EXT_ACK = 11
+    NETLINK_GET_STRICT_CHK = 12
 
     # Netlink message
     NLMSG_ERROR = 2
@@ -228,6 +229,9 @@ class NlMsg:
                             desc += f" ({spec['doc']})"
                         self.extack['miss-type'] = desc
 
+    def cmd(self):
+        return self.nl_type
+
     def __repr__(self):
         msg = f"nl_len = {self.nl_len} ({len(self.raw)}) nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}\n"
         if self.error:
@@ -322,6 +326,9 @@ class GenlMsg:
         self.genl_cmd, self.genl_version, _ = struct.unpack_from("BBH", nl_msg.raw, 0)
         self.raw = nl_msg.raw[4:]
 
+    def cmd(self):
+        return self.genl_cmd
+
     def __repr__(self):
         msg = repr(self.nl)
         msg += f"\tgenl_cmd = {self.genl_cmd} genl_ver = {self.genl_version}\n"
@@ -330,9 +337,41 @@ class GenlMsg:
         return msg
 
 
-class GenlFamily:
-    def __init__(self, family_name):
+class NetlinkProtocol:
+    def __init__(self, family_name, proto_num):
         self.family_name = family_name
+        self.proto_num = proto_num
+
+    def _message(self, nl_type, nl_flags, seq=None):
+        if seq is None:
+            seq = random.randint(1, 1024)
+        nlmsg = struct.pack("HHII", nl_type, nl_flags, seq, 0)
+        return nlmsg
+
+    def message(self, flags, command, version, seq=None):
+        return self._message(command, flags, seq)
+
+    def _decode(self, nl_msg):
+        return nl_msg
+
+    def decode(self, ynl, nl_msg):
+        msg = self._decode(nl_msg)
+        fixed_header_size = 0
+        if ynl:
+            op = ynl.rsp_by_value[msg.cmd()]
+            fixed_header_size = ynl._fixed_header_size(op)
+        msg.raw_attrs = NlAttrs(msg.raw[fixed_header_size:])
+        return msg
+
+    def get_mcast_id(self, mcast_name, mcast_groups):
+        if mcast_name not in mcast_groups:
+            raise Exception(f'Multicast group "{mcast_name}" not present in the spec')
+        return mcast_groups[mcast_name].value
+
+
+class GenlProtocol(NetlinkProtocol):
+    def __init__(self, family_name):
+        super().__init__(family_name, Netlink.NETLINK_GENERIC)
 
         global genl_family_name_to_id
         if genl_family_name_to_id is None:
@@ -341,6 +380,19 @@ class GenlFamily:
         self.genl_family = genl_family_name_to_id[family_name]
         self.family_id = genl_family_name_to_id[family_name]['id']
 
+    def message(self, flags, command, version, seq=None):
+        nlmsg = self._message(self.family_id, flags, seq)
+        genlmsg = struct.pack("BBH", command, version, 0)
+        return nlmsg + genlmsg
+
+    def _decode(self, nl_msg):
+        return GenlMsg(nl_msg)
+
+    def get_mcast_id(self, mcast_name, mcast_groups):
+        if mcast_name not in self.genl_family['mcast']:
+            raise Exception(f'Multicast group "{mcast_name}" not present in the family')
+        return self.genl_family['mcast'][mcast_name]
+
 
 #
 # YNL implementation details.
@@ -353,9 +405,19 @@ class YnlFamily(SpecFamily):
 
         self.include_raw = False
 
-        self.sock = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, Netlink.NETLINK_GENERIC)
+        try:
+            if self.proto == "netlink-raw":
+                self.nlproto = NetlinkProtocol(self.yaml['name'],
+                                               self.yaml['protonum'])
+            else:
+                self.nlproto = GenlProtocol(self.yaml['name'])
+        except KeyError:
+            raise Exception(f"Family '{self.yaml['name']}' not supported by the kernel")
+
+        self.sock = socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, self.nlproto.proto_num)
         self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1)
         self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_EXT_ACK, 1)
+        self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_GET_STRICT_CHK, 1)
 
         self.async_msg_ids = set()
         self.async_msg_queue = []
@@ -368,18 +430,12 @@ class YnlFamily(SpecFamily):
             bound_f = functools.partial(self._op, op_name)
             setattr(self, op.ident_name, bound_f)
 
-        try:
-            self.family = GenlFamily(self.yaml['name'])
-        except KeyError:
-            raise Exception(f"Family '{self.yaml['name']}' not supported by the kernel")
 
     def ntf_subscribe(self, mcast_name):
-        if mcast_name not in self.family.genl_family['mcast']:
-            raise Exception(f'Multicast group "{mcast_name}" not present in the family')
-
+        mcast_id = self.nlproto.get_mcast_id(mcast_name, self.mcast_groups)
         self.sock.bind((0, 0))
         self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_ADD_MEMBERSHIP,
-                             self.family.genl_family['mcast'][mcast_name])
+                             mcast_id)
 
     def _add_attr(self, space, name, value):
         try:
@@ -505,11 +561,9 @@ class YnlFamily(SpecFamily):
         if 'bad-attr-offs' not in extack:
             return
 
-        genl_req = GenlMsg(NlMsg(request, 0, op.attr_set))
-        fixed_header_size = self._fixed_header_size(op)
-        offset = 20 + fixed_header_size
-        path = self._decode_extack_path(NlAttrs(genl_req.raw[fixed_header_size:]),
-                                        op.attr_set, offset,
+        msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set))
+        offset = 20 + self._fixed_header_size(op)
+        path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset,
                                         extack['bad-attr-offs'])
         if path:
             del extack['bad-attr-offs']
@@ -539,14 +593,17 @@ class YnlFamily(SpecFamily):
             fixed_header_attrs[m.name] = value
         return fixed_header_attrs
 
-    def handle_ntf(self, nl_msg, genl_msg):
+    def handle_ntf(self, decoded):
         msg = dict()
         if self.include_raw:
-            msg['nlmsg'] = nl_msg
-            msg['genlmsg'] = genl_msg
-        op = self.rsp_by_value[genl_msg.genl_cmd]
+            msg['raw'] = decoded
+        op = self.rsp_by_value[decoded.cmd()]
+        attrs = self._decode(decoded.raw_attrs, op.attr_set.name)
+        if op.fixed_header:
+            attrs.update(self._decode_fixed_header(decoded, op.fixed_header))
+
         msg['name'] = op['name']
-        msg['msg'] = self._decode(genl_msg.raw_attrs, op.attr_set.name)
+        msg['msg'] = attrs
         self.async_msg_queue.append(msg)
 
     def check_ntf(self):
@@ -566,12 +623,12 @@ class YnlFamily(SpecFamily):
                     print("Netlink done while checking for ntf!?")
                     continue
 
-                gm = GenlMsg(nl_msg)
-                if gm.genl_cmd not in self.async_msg_ids:
-                    print("Unexpected msg id done while checking for ntf", gm)
+                decoded = self.nlproto.decode(self, nl_msg)
+                if decoded.cmd() not in self.async_msg_ids:
+                    print("Unexpected msg id done while checking for ntf", decoded)
                     continue
 
-                self.handle_ntf(nl_msg, gm)
+                self.handle_ntf(decoded)
 
     def operation_do_attributes(self, name):
       """
@@ -592,7 +649,7 @@ class YnlFamily(SpecFamily):
             nl_flags |= Netlink.NLM_F_DUMP
 
         req_seq = random.randint(1024, 65535)
-        msg = _genl_msg(self.family.family_id, nl_flags, op.req_value, 1, req_seq)
+        msg = self.nlproto.message(nl_flags, op.req_value, 1, req_seq)
         fixed_header_members = []
         if op.fixed_header:
             fixed_header_members = self.consts[op.fixed_header].members
@@ -624,19 +681,20 @@ class YnlFamily(SpecFamily):
                     done = True
                     break
 
-                gm = GenlMsg(nl_msg)
+                decoded = self.nlproto.decode(self, nl_msg)
+
                 # Check if this is a reply to our request
-                if nl_msg.nl_seq != req_seq or gm.genl_cmd != op.rsp_value:
-                    if gm.genl_cmd in self.async_msg_ids:
-                        self.handle_ntf(nl_msg, gm)
+                if nl_msg.nl_seq != req_seq or decoded.cmd() != op.rsp_value:
+                    if decoded.cmd() in self.async_msg_ids:
+                        self.handle_ntf(decoded)
                         continue
                     else:
-                        print('Unexpected message: ' + repr(gm))
+                        print('Unexpected message: ' + repr(decoded))
                         continue
 
-                rsp_msg = self._decode(NlAttrs(gm.raw), op.attr_set.name)
+                rsp_msg = self._decode(decoded.raw_attrs, op.attr_set.name)
                 if op.fixed_header:
-                    rsp_msg.update(self._decode_fixed_header(gm, op.fixed_header))
+                    rsp_msg.update(self._decode_fixed_header(decoded, op.fixed_header))
                 rsp.append(rsp_msg)
 
         if not rsp:

From 0493e56d021d371d3511159a38facbc92969c0f6 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 25 Aug 2023 13:27:51 +0100
Subject: [PATCH 1957/2047] tools/net/ynl: Implement nlattr array-nest decoding
 in ynl

Add support for the 'array-nest' attribute type that is used by several
netlink-raw families.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20230825122756.7603-9-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/ynl.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 43022dec1076..cab84c407ba2 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -497,6 +497,17 @@ class YnlFamily(SpecFamily):
                 decoded = NlAttr.formatted_string(decoded, attr_spec.display_hint)
         return decoded
 
+    def _decode_array_nest(self, attr, attr_spec):
+        decoded = []
+        offset = 0
+        while offset < len(attr.raw):
+            item = NlAttr(attr.raw, offset)
+            offset += item.full_len
+
+            subattrs = self._decode(NlAttrs(item.raw), attr_spec['nested-attributes'])
+            decoded.append({ item.type: subattrs })
+        return decoded
+
     def _decode(self, attrs, space):
         attr_space = self.attr_sets[space]
         rsp = dict()
@@ -516,6 +527,8 @@ class YnlFamily(SpecFamily):
                 decoded = True
             elif attr_spec["type"] in NlAttr.type_formats:
                 decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order)
+            elif attr_spec["type"] == 'array-nest':
+                decoded = self._decode_array_nest(attr, attr_spec)
             else:
                 raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
 

From 1768d8a767f8e7049407fe467364e7ea56f0bdce Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 25 Aug 2023 13:27:52 +0100
Subject: [PATCH 1958/2047] tools/net/ynl: Add support for create flags

Add support for using NLM_F_REPLACE, _EXCL, _CREATE and _APPEND flags
in requests.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20230825122756.7603-10-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/cli.py          | 12 ++++++++++--
 tools/net/ynl/lib/__init__.py |  4 ++--
 tools/net/ynl/lib/ynl.py      | 14 ++++++++++----
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py
index ffaa8038aa8c..564ecf07cd2c 100755
--- a/tools/net/ynl/cli.py
+++ b/tools/net/ynl/cli.py
@@ -6,7 +6,7 @@ import json
 import pprint
 import time
 
-from lib import YnlFamily
+from lib import YnlFamily, Netlink
 
 
 def main():
@@ -19,6 +19,14 @@ def main():
     parser.add_argument('--dump', dest='dump', type=str)
     parser.add_argument('--sleep', dest='sleep', type=int)
     parser.add_argument('--subscribe', dest='ntf', type=str)
+    parser.add_argument('--replace', dest='flags', action='append_const',
+                        const=Netlink.NLM_F_REPLACE)
+    parser.add_argument('--excl', dest='flags', action='append_const',
+                        const=Netlink.NLM_F_EXCL)
+    parser.add_argument('--create', dest='flags', action='append_const',
+                        const=Netlink.NLM_F_CREATE)
+    parser.add_argument('--append', dest='flags', action='append_const',
+                        const=Netlink.NLM_F_APPEND)
     args = parser.parse_args()
 
     if args.no_schema:
@@ -37,7 +45,7 @@ def main():
         time.sleep(args.sleep)
 
     if args.do:
-        reply = ynl.do(args.do, attrs)
+        reply = ynl.do(args.do, attrs, args.flags)
         pprint.PrettyPrinter().pprint(reply)
     if args.dump:
         reply = ynl.dump(args.dump, attrs)
diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/lib/__init__.py
index 4b3797fe784b..f7eaa07783e7 100644
--- a/tools/net/ynl/lib/__init__.py
+++ b/tools/net/ynl/lib/__init__.py
@@ -2,7 +2,7 @@
 
 from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \
     SpecFamily, SpecOperation
-from .ynl import YnlFamily
+from .ynl import YnlFamily, Netlink
 
 __all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet",
-           "SpecFamily", "SpecOperation", "YnlFamily"]
+           "SpecFamily", "SpecOperation", "YnlFamily", "Netlink"]
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index cab84c407ba2..13c4b019a881 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -35,6 +35,10 @@ class Netlink:
     NLM_F_ACK = 4
     NLM_F_ROOT = 0x100
     NLM_F_MATCH = 0x200
+
+    NLM_F_REPLACE = 0x100
+    NLM_F_EXCL = 0x200
+    NLM_F_CREATE = 0x400
     NLM_F_APPEND = 0x800
 
     NLM_F_CAPPED = 0x100
@@ -654,10 +658,12 @@ class YnlFamily(SpecFamily):
 
       return op['do']['request']['attributes'].copy()
 
-    def _op(self, method, vals, dump=False):
+    def _op(self, method, vals, flags, dump=False):
         op = self.ops[method]
 
         nl_flags = Netlink.NLM_F_REQUEST | Netlink.NLM_F_ACK
+        for flag in flags or []:
+            nl_flags |= flag
         if dump:
             nl_flags |= Netlink.NLM_F_DUMP
 
@@ -716,8 +722,8 @@ class YnlFamily(SpecFamily):
             return rsp[0]
         return rsp
 
-    def do(self, method, vals):
-        return self._op(method, vals)
+    def do(self, method, vals, flags):
+        return self._op(method, vals, flags)
 
     def dump(self, method, vals):
-        return self._op(method, vals, dump=True)
+        return self._op(method, vals, [], dump=True)

From dfb0f7d9d979567ad7b1194093e649c7a0d42f90 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 25 Aug 2023 13:27:53 +0100
Subject: [PATCH 1959/2047] doc/netlink: Add spec for rt addr messages

Add schema for rt addr with support for:
     - newaddr, deladdr, getaddr (dump)

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20230825122756.7603-11-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/rt_addr.yaml | 179 +++++++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 Documentation/netlink/specs/rt_addr.yaml

diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt_addr.yaml
new file mode 100644
index 000000000000..cbee1cedb177
--- /dev/null
+++ b/Documentation/netlink/specs/rt_addr.yaml
@@ -0,0 +1,179 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: rt-addr
+protocol: netlink-raw
+protonum: 0
+
+doc:
+  Address configuration over rtnetlink.
+
+definitions:
+  -
+    name: ifaddrmsg
+    type: struct
+    members:
+      -
+        name: ifa-family
+        type: u8
+      -
+        name: ifa-prefixlen
+        type: u8
+      -
+        name: ifa-flags
+        type: u8
+        enum: ifa-flags
+        enum-as-flags: true
+      -
+        name: ifa-scope
+        type: u8
+      -
+        name: ifa-index
+        type: u32
+  -
+    name: ifa-cacheinfo
+    type: struct
+    members:
+      -
+        name: ifa-prefered
+        type: u32
+      -
+        name: ifa-valid
+        type: u32
+      -
+        name: cstamp
+        type: u32
+      -
+        name: tstamp
+        type: u32
+
+  -
+    name: ifa-flags
+    type: flags
+    entries:
+      -
+        name: secondary
+      -
+        name: nodad
+      -
+        name: optimistic
+      -
+        name: dadfailed
+      -
+        name: homeaddress
+      -
+        name: deprecated
+      -
+        name: tentative
+      -
+        name: permanent
+      -
+        name: managetempaddr
+      -
+        name: noprefixroute
+      -
+        name: mcautojoin
+      -
+        name: stable-privacy
+
+attribute-sets:
+  -
+    name: addr-attrs
+    attributes:
+      -
+        name: ifa-address
+        type: binary
+        display-hint: ipv4
+      -
+        name: ifa-local
+        type: binary
+        display-hint: ipv4
+      -
+        name: ifa-label
+        type: string
+      -
+        name: ifa-broadcast
+        type: binary
+        display-hint: ipv4
+      -
+        name: ifa-anycast
+        type: binary
+      -
+        name: ifa-cacheinfo
+        type: binary
+        struct: ifa-cacheinfo
+      -
+        name: ifa-multicast
+        type: binary
+      -
+        name: ifa-flags
+        type: u32
+        enum: ifa-flags
+        enum-as-flags: true
+      -
+        name: ifa-rt-priority
+        type: u32
+      -
+        name: ifa-target-netnsid
+        type: binary
+      -
+        name: ifa-proto
+        type: u8
+
+
+operations:
+  fixed-header: ifaddrmsg
+  enum-model: directional
+  list:
+    -
+      name: newaddr
+      doc: Add new address
+      attribute-set: addr-attrs
+      do:
+        request:
+          value: 20
+          attributes: &ifaddr-all
+            - ifa-family
+            - ifa-flags
+            - ifa-prefixlen
+            - ifa-scope
+            - ifa-index
+            - ifa-address
+            - ifa-label
+            - ifa-local
+            - ifa-cacheinfo
+    -
+      name: deladdr
+      doc: Remove address
+      attribute-set: addr-attrs
+      do:
+        request:
+          value: 21
+          attributes:
+            - ifa-family
+            - ifa-flags
+            - ifa-prefixlen
+            - ifa-scope
+            - ifa-index
+            - ifa-address
+            - ifa-local
+    -
+      name: getaddr
+      doc: Dump address information.
+      attribute-set: addr-attrs
+      dump:
+        request:
+          value: 22
+          attributes:
+            - ifa-index
+        reply:
+          value: 20
+          attributes: *ifaddr-all
+
+mcast-groups:
+  list:
+    -
+      name: rtnlgrp-ipv4-ifaddr
+      value: 5
+    -
+      name: rtnlgrp-ipv6-ifaddr
+      value: 9

From b2f63d904e7254de447e6e809fc6ebd98323aa39 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 25 Aug 2023 13:27:54 +0100
Subject: [PATCH 1960/2047] doc/netlink: Add spec for rt link messages

Add schema for rt link with support for newlink, dellink, getlink,
setlink and getstats.

A dummy link can be created like this:

sudo ./tools/net/ynl/cli.py \
    --spec Documentation/netlink/specs/rt_link.yaml \
    --do newlink --create \
    --json '{"ifname": "dummy0", "linkinfo": {"kind": "dummy"}}'

For example, offload stats can be fetched like this:

./tools/net/ynl/cli.py \
    --spec Documentation/netlink/specs/rt_link.yaml \
    --dump getstats --json '{ "filter-mask": 8 }'

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20230825122756.7603-12-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/rt_link.yaml | 1432 ++++++++++++++++++++++
 1 file changed, 1432 insertions(+)
 create mode 100644 Documentation/netlink/specs/rt_link.yaml

diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml
new file mode 100644
index 000000000000..d86a68f8475c
--- /dev/null
+++ b/Documentation/netlink/specs/rt_link.yaml
@@ -0,0 +1,1432 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: rt-link
+protocol: netlink-raw
+protonum: 0
+
+doc:
+  Link configuration over rtnetlink.
+
+definitions:
+  -
+    name: ifinfo-flags
+    type: flags
+    entries:
+      -
+        name: up
+      -
+        name: broadcast
+      -
+        name: debug
+      -
+        name: loopback
+      -
+        name: point-to-point
+      -
+        name: no-trailers
+      -
+        name: running
+      -
+        name: no-arp
+      -
+        name: promisc
+      -
+        name: all-multi
+      -
+        name: master
+      -
+        name: slave
+      -
+        name: multicast
+      -
+        name: portsel
+      -
+        name: auto-media
+      -
+        name: dynamic
+      -
+        name: lower-up
+      -
+        name: dormant
+      -
+        name: echo
+
+  -
+    name: rtgenmsg
+    type: struct
+    members:
+      -
+        name: family
+        type: u8
+  -
+    name: ifinfomsg
+    type: struct
+    members:
+      -
+        name: ifi-family
+        type: u8
+      -
+        name: padding
+        type: u8
+      -
+        name: ifi-type
+        type: u16
+      -
+        name: ifi-index
+        type: s32
+      -
+        name: ifi-flags
+        type: u32
+        enum: ifinfo-flags
+        enum-as-flags: true
+      -
+        name: ifi-change
+        type: u32
+  -
+    name: ifla-cacheinfo
+    type: struct
+    members:
+      -
+        name: max-reasm-len
+        type: u32
+      -
+        name: tstamp
+        type: u32
+      -
+        name: reachable-time
+        type: s32
+      -
+        name: retrans-time
+        type: u32
+  -
+    name: rtnl-link-stats
+    type: struct
+    members:
+      -
+        name: rx-packets
+        type: u32
+      -
+        name: tx-packets
+        type: u32
+      -
+        name: rx-bytes
+        type: u32
+      -
+        name: tx-bytes
+        type: u32
+      -
+        name: rx-errors
+        type: u32
+      -
+        name: tx-errors
+        type: u32
+      -
+        name: rx-dropped
+        type: u32
+      -
+        name: tx-dropped
+        type: u32
+      -
+        name: multicast
+        type: u32
+      -
+        name: collisions
+        type: u32
+      -
+        name: rx-length-errors
+        type: u32
+      -
+        name: rx-over-errors
+        type: u32
+      -
+        name: rx-crc-errors
+        type: u32
+      -
+        name: rx-frame-errors
+        type: u32
+      -
+        name: rx-fifo-errors
+        type: u32
+      -
+        name: rx-missed-errors
+        type: u32
+      -
+        name: tx-aborted-errors
+        type: u32
+      -
+        name: tx-carrier-errors
+        type: u32
+      -
+        name: tx-fifo-errors
+        type: u32
+      -
+        name: tx-heartbeat-errors
+        type: u32
+      -
+        name: tx-window-errors
+        type: u32
+      -
+        name: rx-compressed
+        type: u32
+      -
+        name: tx-compressed
+        type: u32
+      -
+        name: rx-nohandler
+        type: u32
+  -
+    name: rtnl-link-stats64
+    type: struct
+    members:
+      -
+        name: rx-packets
+        type: u64
+      -
+        name: tx-packets
+        type: u64
+      -
+        name: rx-bytes
+        type: u64
+      -
+        name: tx-bytes
+        type: u64
+      -
+        name: rx-errors
+        type: u64
+      -
+        name: tx-errors
+        type: u64
+      -
+        name: rx-dropped
+        type: u64
+      -
+        name: tx-dropped
+        type: u64
+      -
+        name: multicast
+        type: u64
+      -
+        name: collisions
+        type: u64
+      -
+        name: rx-length-errors
+        type: u64
+      -
+        name: rx-over-errors
+        type: u64
+      -
+        name: rx-crc-errors
+        type: u64
+      -
+        name: rx-frame-errors
+        type: u64
+      -
+        name: rx-fifo-errors
+        type: u64
+      -
+        name: rx-missed-errors
+        type: u64
+      -
+        name: tx-aborted-errors
+        type: u64
+      -
+        name: tx-carrier-errors
+        type: u64
+      -
+        name: tx-fifo-errors
+        type: u64
+      -
+        name: tx-heartbeat-errors
+        type: u64
+      -
+        name: tx-window-errors
+        type: u64
+      -
+        name: rx-compressed
+        type: u64
+      -
+        name: tx-compressed
+        type: u64
+      -
+        name: rx-nohandler
+        type: u64
+      -
+        name: rx-otherhost-dropped
+        type: u64
+  -
+    name: rtnl-link-ifmap
+    type: struct
+    members:
+      -
+        name: mem-start
+        type: u64
+      -
+        name: mem-end
+        type: u64
+      -
+        name: base-addr
+        type: u64
+      -
+        name: irq
+        type: u16
+      -
+        name: dma
+        type: u8
+      -
+        name: port
+        type: u8
+  -
+    name: ipv4-devconf
+    type: struct
+    members:
+      -
+        name: forwarding
+        type: u32
+      -
+        name: mc-forwarding
+        type: u32
+      -
+        name: proxy-arp
+        type: u32
+      -
+        name: accept-redirects
+        type: u32
+      -
+        name: secure-redirects
+        type: u32
+      -
+        name: send-redirects
+        type: u32
+      -
+        name: shared-media
+        type: u32
+      -
+        name: rp-filter
+        type: u32
+      -
+        name: accept-source-route
+        type: u32
+      -
+        name: bootp-relay
+        type: u32
+      -
+        name: log-martians
+        type: u32
+      -
+        name: tag
+        type: u32
+      -
+        name: arpfilter
+        type: u32
+      -
+        name: medium-id
+        type: u32
+      -
+        name: noxfrm
+        type: u32
+      -
+        name: nopolicy
+        type: u32
+      -
+        name: force-igmp-version
+        type: u32
+      -
+        name: arp-announce
+        type: u32
+      -
+        name: arp-ignore
+        type: u32
+      -
+        name: promote-secondaries
+        type: u32
+      -
+        name: arp-accept
+        type: u32
+      -
+        name: arp-notify
+        type: u32
+      -
+        name: accept-local
+        type: u32
+      -
+        name: src-vmark
+        type: u32
+      -
+        name: proxy-arp-pvlan
+        type: u32
+      -
+        name: route-localnet
+        type: u32
+      -
+        name: igmpv2-unsolicited-report-interval
+        type: u32
+      -
+        name: igmpv3-unsolicited-report-interval
+        type: u32
+      -
+        name: ignore-routes-with-linkdown
+        type: u32
+      -
+        name: drop-unicast-in-l2-multicast
+        type: u32
+      -
+        name: drop-gratuitous-arp
+        type: u32
+      -
+        name: bc-forwarding
+        type: u32
+      -
+        name: arp-evict-nocarrier
+        type: u32
+  -
+    name: ipv6-devconf
+    type: struct
+    members:
+      -
+        name: forwarding
+        type: u32
+      -
+        name: hoplimit
+        type: u32
+      -
+        name: mtu6
+        type: u32
+      -
+        name: accept-ra
+        type: u32
+      -
+        name: accept-redirects
+        type: u32
+      -
+        name: autoconf
+        type: u32
+      -
+        name: dad-transmits
+        type: u32
+      -
+        name: rtr-solicits
+        type: u32
+      -
+        name: rtr-solicit-interval
+        type: u32
+      -
+        name: rtr-solicit-delay
+        type: u32
+      -
+        name: use-tempaddr
+        type: u32
+      -
+        name: temp-valid-lft
+        type: u32
+      -
+        name: temp-prefered-lft
+        type: u32
+      -
+        name: regen-max-retry
+        type: u32
+      -
+        name: max-desync-factor
+        type: u32
+      -
+        name: max-addresses
+        type: u32
+      -
+        name: force-mld-version
+        type: u32
+      -
+        name: accept-ra-defrtr
+        type: u32
+      -
+        name: accept-ra-pinfo
+        type: u32
+      -
+        name: accept-ra-rtr-pref
+        type: u32
+      -
+        name: rtr-probe-interval
+        type: u32
+      -
+        name: accept-ra-rt-info-max-plen
+        type: u32
+      -
+        name: proxy-ndp
+        type: u32
+      -
+        name: optimistic-dad
+        type: u32
+      -
+        name: accept-source-route
+        type: u32
+      -
+        name: mc-forwarding
+        type: u32
+      -
+        name: disable-ipv6
+        type: u32
+      -
+        name: accept-dad
+        type: u32
+      -
+        name: force-tllao
+        type: u32
+      -
+        name: ndisc-notify
+        type: u32
+      -
+        name: mldv1-unsolicited-report-interval
+        type: u32
+      -
+        name: mldv2-unsolicited-report-interval
+        type: u32
+      -
+        name: suppress-frag-ndisc
+        type: u32
+      -
+        name: accept-ra-from-local
+        type: u32
+      -
+        name: use-optimistic
+        type: u32
+      -
+        name: accept-ra-mtu
+        type: u32
+      -
+        name: stable-secret
+        type: u32
+      -
+        name: use-oif-addrs-only
+        type: u32
+      -
+        name: accept-ra-min-hop-limit
+        type: u32
+      -
+        name: ignore-routes-with-linkdown
+        type: u32
+      -
+        name: drop-unicast-in-l2-multicast
+        type: u32
+      -
+        name: drop-unsolicited-na
+        type: u32
+      -
+        name: keep-addr-on-down
+        type: u32
+      -
+        name: rtr-solicit-max-interval
+        type: u32
+      -
+        name: seg6-enabled
+        type: u32
+      -
+        name: seg6-require-hmac
+        type: u32
+      -
+        name: enhanced-dad
+        type: u32
+      -
+        name: addr-gen-mode
+        type: u8
+      -
+        name: disable-policy
+        type: u32
+      -
+        name: accept-ra-rt-info-min-plen
+        type: u32
+      -
+        name: ndisc-tclass
+        type: u32
+      -
+        name: rpl-seg-enabled
+        type: u32
+      -
+        name: ra-defrtr-metric
+        type: u32
+      -
+        name: ioam6-enabled
+        type: u32
+      -
+        name: ioam6-id
+        type: u32
+      -
+        name: ioam6-id-wide
+        type: u32
+      -
+        name: ndisc-evict-nocarrier
+        type: u32
+      -
+        name: accept-untracked-na
+        type: u32
+  -
+    name: ifla-icmp6-stats
+    type: struct
+    members:
+      -
+        name: inmsgs
+        type: u64
+      -
+        name: inerrors
+        type: u64
+      -
+        name: outmsgs
+        type: u64
+      -
+        name: outerrors
+        type: u64
+      -
+        name: csumerrors
+        type: u64
+      -
+        name: ratelimithost
+        type: u64
+  -
+    name: ifla-inet6-stats
+    type: struct
+    members:
+      -
+        name: inpkts
+        type: u64
+      -
+        name: inoctets
+        type: u64
+      -
+        name: indelivers
+        type: u64
+      -
+        name: outforwdatagrams
+        type: u64
+      -
+        name: outpkts
+        type: u64
+      -
+        name: outoctets
+        type: u64
+      -
+        name: inhdrerrors
+        type: u64
+      -
+        name: intoobigerrors
+        type: u64
+      -
+        name: innoroutes
+        type: u64
+      -
+        name: inaddrerrors
+        type: u64
+      -
+        name: inunknownprotos
+        type: u64
+      -
+        name: intruncatedpkts
+        type: u64
+      -
+        name: indiscards
+        type: u64
+      -
+        name: outdiscards
+        type: u64
+      -
+        name: outnoroutes
+        type: u64
+      -
+        name: reasmtimeout
+        type: u64
+      -
+        name: reasmreqds
+        type: u64
+      -
+        name: reasmoks
+        type: u64
+      -
+        name: reasmfails
+        type: u64
+      -
+        name: fragoks
+        type: u64
+      -
+        name: fragfails
+        type: u64
+      -
+        name: fragcreates
+        type: u64
+      -
+        name: inmcastpkts
+        type: u64
+      -
+        name: outmcastpkts
+        type: u64
+      -
+        name: inbcastpkts
+        type: u64
+      -
+        name: outbcastpkts
+        type: u64
+      -
+        name: inmcastoctets
+        type: u64
+      -
+        name: outmcastoctets
+        type: u64
+      -
+        name: inbcastoctets
+        type: u64
+      -
+        name: outbcastoctets
+        type: u64
+      -
+        name: csumerrors
+        type: u64
+      -
+        name: noectpkts
+        type: u64
+      -
+        name: ect1-pkts
+        type: u64
+      -
+        name: ect0-pkts
+        type: u64
+      -
+        name: cepkts
+        type: u64
+      -
+        name: reasm-overlaps
+        type: u64
+  - name: br-boolopt-multi
+    type: struct
+    members:
+      -
+        name: optval
+        type: u32
+      -
+        name: optmask
+        type: u32
+  -
+    name: if_stats_msg
+    type: struct
+    members:
+      -
+        name: family
+        type: u8
+      -
+        name: pad1
+        type: u8
+      -
+        name: pad2
+        type: u16
+      -
+        name: ifindex
+        type: u32
+      -
+        name: filter-mask
+        type: u32
+
+
+attribute-sets:
+  -
+    name: link-attrs
+    name-prefix: ifla-
+    attributes:
+      -
+        name: address
+        type: binary
+        display-hint: mac
+      -
+        name: broadcast
+        type: binary
+        display-hint: mac
+      -
+        name: ifname
+        type: string
+      -
+        name: mtu
+        type: u32
+      -
+        name: link
+        type: u32
+      -
+        name: qdisc
+        type: string
+      -
+        name: stats
+        type: binary
+        struct: rtnl-link-stats
+      -
+        name: cost
+        type: string
+      -
+        name: priority
+        type: string
+      -
+        name: master
+        type: u32
+      -
+        name: wireless
+        type: string
+      -
+        name: protinfo
+        type: string
+      -
+        name: txqlen
+        type: u32
+      -
+        name: map
+        type: binary
+        struct: rtnl-link-ifmap
+      -
+        name: weight
+        type: u32
+      -
+        name: operstate
+        type: u8
+      -
+        name: linkmode
+        type: u8
+      -
+        name: linkinfo
+        type: nest
+        nested-attributes: linkinfo-attrs
+      -
+        name: net-ns-pid
+        type: u32
+      -
+        name: ifalias
+        type: string
+      -
+        name: num-vf
+        type: u32
+      -
+        name: vfinfo-list
+        type: nest
+        nested-attributes: vfinfo-attrs
+      -
+        name: stats64
+        type: binary
+        struct: rtnl-link-stats64
+      -
+        name: vf-ports
+        type: nest
+        nested-attributes: vf-ports-attrs
+      -
+        name: port-self
+        type: nest
+        nested-attributes: port-self-attrs
+      -
+        name: af-spec
+        type: nest
+        nested-attributes: af-spec-attrs
+      -
+        name: group
+        type: u32
+      -
+        name: net-ns-fd
+        type: u32
+      -
+        name: ext-mask
+        type: u32
+      -
+        name: promiscuity
+        type: u32
+      -
+        name: num-tx-queues
+        type: u32
+      -
+        name: num-rx-queues
+        type: u32
+      -
+        name: carrier
+        type: u8
+      -
+        name: phys-port-id
+        type: binary
+      -
+        name: carrier-changes
+        type: u32
+      -
+        name: phys-switch-id
+        type: binary
+      -
+        name: link-netnsid
+        type: s32
+      -
+        name: phys-port-name
+        type: string
+      -
+        name: proto-down
+        type: u8
+      -
+        name: gso-max-segs
+        type: u32
+      -
+        name: gso-max-size
+        type: u32
+      -
+        name: pad
+        type: pad
+      -
+        name: xdp
+        type: nest
+        nested-attributes: xdp-attrs
+      -
+        name: event
+        type: u32
+      -
+        name: new-netnsid
+        type: s32
+      -
+        name: target-netnsid
+        type: s32
+      -
+        name: carrier-up-count
+        type: u32
+      -
+        name: carrier-down-count
+        type: u32
+      -
+        name: new-ifindex
+        type: s32
+      -
+        name: min-mtu
+        type: u32
+      -
+        name: max-mtu
+        type: u32
+      -
+        name: prop-list
+        type: nest
+        nested-attributes: link-attrs
+      -
+        name: alt-ifname
+        type: string
+        multi-attr: true
+      -
+        name: perm-address
+        type: binary
+        display-hint: mac
+      -
+        name: proto-down-reason
+        type: string
+      -
+        name: parent-dev-name
+        type: string
+      -
+        name: parent-dev-bus-name
+        type: string
+      -
+        name: gro-max-size
+        type: u32
+      -
+        name: tso-max-size
+        type: u32
+      -
+        name: tso-max-segs
+        type: u32
+      -
+        name: allmulti
+        type: u32
+      -
+        name: devlink-port
+        type: binary
+      -
+        name: gso-ipv4-max-size
+        type: u32
+      -
+        name: gro-ipv4-max-size
+        type: u32
+  -
+    name: af-spec-attrs
+    attributes:
+      -
+        name: "inet"
+        type: nest
+        value: 2
+        nested-attributes: ifla-attrs
+      -
+        name: "inet6"
+        type: nest
+        value: 10
+        nested-attributes: ifla6-attrs
+      -
+        name: "mctp"
+        type: nest
+        value: 45
+        nested-attributes: mctp-attrs
+  -
+    name: vfinfo-attrs
+    attributes: []
+  -
+    name: vf-ports-attrs
+    attributes: []
+  -
+    name: port-self-attrs
+    attributes: []
+  -
+    name: linkinfo-attrs
+    attributes:
+      -
+        name: kind
+        type: string
+      -
+        name: data
+        type: binary
+        # kind specific nest, e.g. linkinfo-bridge-attrs
+      -
+        name: xstats
+        type: binary
+      -
+        name: slave-kind
+        type: string
+      -
+        name: slave-data
+        type: binary
+        # kind specific nest
+  -
+    name: linkinfo-bridge-attrs
+    attributes:
+      -
+        name: forward-delay
+        type: u32
+      -
+        name: hello-time
+        type: u32
+      -
+        name: max-age
+        type: u32
+      -
+        name: ageing-time
+        type: u32
+      -
+        name: stp-state
+        type: u32
+      -
+        name: priority
+        type: u16
+      -
+        name: vlan-filtering
+        type: u8
+      -
+        name: vlan-protocol
+        type: u16
+      -
+        name: group-fwd-mask
+        type: u16
+      -
+        name: root-id
+        type: binary
+      -
+        name: bridge-id
+        type: binary
+      -
+        name: root-port
+        type: u16
+      -
+        name: root-path-cost
+        type: u32
+      -
+        name: topology-change
+        type: u8
+      -
+        name: topology-change-detected
+        type: u8
+      -
+        name: hello-timer
+        type: u64
+      -
+        name: tcn-timer
+        type: u64
+      -
+        name: topology-change-timer
+        type: u64
+      -
+        name: gc-timer
+        type: u64
+      -
+        name: group-addr
+        type: binary
+      -
+        name: fdb-flush
+        type: binary
+      -
+        name: mcast-router
+        type: u8
+      -
+        name: mcast-snooping
+        type: u8
+      -
+        name: mcast-query-use-ifaddr
+        type: u8
+      -
+        name: mcast-querier
+        type: u8
+      -
+        name: mcast-hash-elasticity
+        type: u32
+      -
+        name: mcast-hash-max
+        type: u32
+      -
+        name: mcast-last-member-cnt
+        type: u32
+      -
+        name: mcast-startup-query-cnt
+        type: u32
+      -
+        name: mcast-last-member-intvl
+        type: u64
+      -
+        name: mcast-membership-intvl
+        type: u64
+      -
+        name: mcast-querier-intvl
+        type: u64
+      -
+        name: mcast-query-intvl
+        type: u64
+      -
+        name: mcast-query-response-intvl
+        type: u64
+      -
+        name: mcast-startup-query-intvl
+        type: u64
+      -
+        name: nf-call-iptables
+        type: u8
+      -
+        name: nf-call-ip6-tables
+        type: u8
+      -
+        name: nf-call-arptables
+        type: u8
+      -
+        name: vlan-default-pvid
+        type: u16
+      -
+        name: pad
+        type: pad
+      -
+        name: vlan-stats-enabled
+        type: u8
+      -
+        name: mcast-stats-enabled
+        type: u8
+      -
+        name: mcast-igmp-version
+        type: u8
+      -
+        name: mcast-mld-version
+        type: u8
+      -
+        name: vlan-stats-per-port
+        type: u8
+      -
+        name: multi-boolopt
+        type: binary
+        struct: br-boolopt-multi
+      -
+        name: mcast-querier-state
+        type: binary
+  -
+    name: xdp-attrs
+    attributes:
+      -
+        name: fd
+        type: s32
+      -
+        name: attached
+        type: u8
+      -
+        name: flags
+        type: u32
+      -
+        name: prog-id
+        type: u32
+      -
+        name: drv-prog-id
+        type: u32
+      -
+        name: skb-prog-id
+        type: u32
+      -
+        name: hw-prog-id
+        type: u32
+      -
+        name: expected-fd
+        type: s32
+  -
+    name: ifla-attrs
+    attributes:
+      -
+        name: conf
+        type: binary
+        struct: ipv4-devconf
+  -
+    name: ifla6-attrs
+    attributes:
+      -
+        name: flags
+        type: u32
+      -
+        name: conf
+        type: binary
+        struct: ipv6-devconf
+      -
+        name: stats
+        type: binary
+        struct: ifla-inet6-stats
+      -
+        name: mcast
+        type: binary
+      -
+        name: cacheinfo
+        type: binary
+        struct: ifla-cacheinfo
+      -
+        name: icmp6-stats
+        type: binary
+        struct: ifla-icmp6-stats
+      -
+        name: token
+        type: binary
+      -
+        name: addr-gen-mode
+        type: u8
+      -
+        name: ra-mtu
+        type: u32
+  -
+    name: mctp-attrs
+    attributes:
+      -
+        name: mctp-net
+        type: u32
+  -
+    name: stats-attrs
+    name-prefix: ifla-stats-
+    attributes:
+      -
+        name: link-64
+        type: binary
+        struct: rtnl-link-stats64
+      -
+        name: link-xstats
+        type: binary
+      -
+        name: link-xstats-slave
+        type: binary
+      -
+        name: link-offload-xstats
+        type: nest
+        nested-attributes: link-offload-xstats
+      -
+        name: af-spec
+        type: binary
+  -
+    name: link-offload-xstats
+    attributes:
+      -
+        name: cpu-hit
+        type: binary
+      -
+        name: hw-s-info
+        type: array-nest
+        nested-attributes: hw-s-info-one
+      -
+        name: l3-stats
+        type: binary
+  -
+    name: hw-s-info-one
+    attributes:
+      -
+        name: request
+        type: u8
+      -
+        name: used
+        type: u8
+
+operations:
+  enum-model: directional
+  list:
+    -
+      name: newlink
+      doc: Create a new link.
+      attribute-set: link-attrs
+      fixed-header: ifinfomsg
+      do:
+        request:
+          value: 16
+          attributes: &link-new-attrs
+            - ifi-index
+            - ifname
+            - net-ns-pid
+            - net-ns-fd
+            - target-netnsid
+            - link-netnsid
+            - linkinfo
+            - group
+            - num-tx-queues
+            - num-rx-queues
+            - address
+            - broadcast
+            - mtu
+            - txqlen
+            - operstate
+            - linkmode
+            - group
+            - gso-max-size
+            - gso-max-segs
+            - gro-max-size
+            - gso-ipv4-max-size
+            - gro-ipv4-max-size
+            - af-spec
+    -
+      name: dellink
+      doc: Delete an existing link.
+      attribute-set: link-attrs
+      fixed-header: ifinfomsg
+      do:
+        request:
+          value: 17
+          attributes:
+            - ifi-index
+            - ifname
+    -
+      name: getlink
+      doc: Get / dump information about a link.
+      attribute-set: link-attrs
+      fixed-header: ifinfomsg
+      do:
+        request:
+          value: 18
+          attributes:
+            - ifi-index
+            - ifname
+            - alt-ifname
+            - ext-mask
+            - target-netnsid
+        reply:
+          value: 16
+          attributes: &link-all-attrs
+            - ifi-family
+            - ifi-type
+            - ifi-index
+            - ifi-flags
+            - ifi-change
+            - address
+            - broadcast
+            - ifname
+            - mtu
+            - link
+            - qdisc
+            - stats
+            - cost
+            - priority
+            - master
+            - wireless
+            - protinfo
+            - txqlen
+            - map
+            - weight
+            - operstate
+            - linkmode
+            - linkinfo
+            - net-ns-pid
+            - ifalias
+            - num-vf
+            - vfinfo-list
+            - stats64
+            - vf-ports
+            - port-self
+            - af-spec
+            - group
+            - net-ns-fd
+            - ext-mask
+            - promiscuity
+            - num-tx-queues
+            - num-rx-queues
+            - carrier
+            - phys-port-id
+            - carrier-changes
+            - phys-switch-id
+            - link-netnsid
+            - phys-port-name
+            - proto-down
+            - gso-max-segs
+            - gso-max-size
+            - pad
+            - xdp
+            - event
+            - new-netnsid
+            - if-netnsid
+            - target-netnsid
+            - carrier-up-count
+            - carrier-down-count
+            - new-ifindex
+            - min-mtu
+            - max-mtu
+            - prop-list
+            - alt-ifname
+            - perm-address
+            - proto-down-reason
+            - parent-dev-name
+            - parent-dev-bus-name
+            - gro-max-size
+            - tso-max-size
+            - tso-max-segs
+            - allmulti
+            - devlink-port
+            - gso-ipv4-max-size
+            - gro-ipv4-max-size
+      dump:
+        request:
+          value: 18
+          attributes:
+            - target-netnsid
+            - ext-mask
+            - master
+            - linkinfo
+        reply:
+          value: 16
+          attributes: *link-all-attrs
+    -
+      name: setlink
+      doc: Set information about a link.
+      attribute-set: link-attrs
+      fixed-header: ifinfomsg
+      do:
+        request:
+          value: 19
+          attributes: *link-all-attrs
+    -
+      name: getstats
+      doc: Get / dump link stats.
+      attribute-set: stats-attrs
+      fixed-header: if_stats_msg
+      do:
+        request:
+          value: 94
+          attributes:
+            - ifindex
+        reply:
+          value: 92
+          attributes: &link-stats-attrs
+            - family
+            - ifindex
+            - filter-mask
+            - link-64
+            - link-xstats
+            - link-xstats-slave
+            - link-offload-xstats
+            - af-spec
+      dump:
+        request:
+          value: 94
+        reply:
+          value: 92
+          attributes: *link-stats-attrs
+
+mcast-groups:
+  list:
+    -
+      name: rtnlgrp-link
+      value: 1
+    -
+      name: rtnlgrp-stats
+      value: 36

From 023289b4f582820f3124fa7ff42a61959fe7dea6 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 25 Aug 2023 13:27:55 +0100
Subject: [PATCH 1961/2047] doc/netlink: Add spec for rt route messages

Add schema for rt route with support for getroute, newroute and
delroute.

Routes can be dumped with filter attributes like this:

./tools/net/ynl/cli.py \
    --spec Documentation/netlink/specs/rt_route.yaml \
    --dump getroute --json '{"rtm-family": 2, "rtm-table": 254}'

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20230825122756.7603-13-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/rt_route.yaml | 327 ++++++++++++++++++++++
 1 file changed, 327 insertions(+)
 create mode 100644 Documentation/netlink/specs/rt_route.yaml

diff --git a/Documentation/netlink/specs/rt_route.yaml b/Documentation/netlink/specs/rt_route.yaml
new file mode 100644
index 000000000000..f4368be0caed
--- /dev/null
+++ b/Documentation/netlink/specs/rt_route.yaml
@@ -0,0 +1,327 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: rt-route
+protocol: netlink-raw
+protonum: 0
+
+doc:
+  Route configuration over rtnetlink.
+
+definitions:
+  -
+    name: rtm-type
+    name-prefix: rtn-
+    type: enum
+    entries:
+      - unspec
+      - unicast
+      - local
+      - broadcast
+      - anycast
+      - multicast
+      - blackhole
+      - unreachable
+      - prohibit
+      - throw
+      - nat
+      - xresolve
+  -
+    name: rtmsg
+    type: struct
+    members:
+      -
+        name: rtm-family
+        type: u8
+      -
+        name: rtm-dst-len
+        type: u8
+      -
+        name: rtm-src-len
+        type: u8
+      -
+        name: rtm-tos
+        type: u8
+      -
+        name: rtm-table
+        type: u8
+      -
+        name: rtm-protocol
+        type: u8
+      -
+        name: rtm-scope
+        type: u8
+      -
+        name: rtm-type
+        type: u8
+        enum: rtm-type
+      -
+        name: rtm-flags
+        type: u32
+  -
+    name: rta-cacheinfo
+    type: struct
+    members:
+      -
+        name: rta-clntref
+        type: u32
+      -
+        name: rta-lastuse
+        type: u32
+      -
+        name: rta-expires
+        type: u32
+      -
+        name: rta-error
+        type: u32
+      -
+        name: rta-used
+        type: u32
+
+attribute-sets:
+  -
+    name: route-attrs
+    attributes:
+      -
+        name: rta-dst
+        type: binary
+        display-hint: ipv4
+      -
+        name: rta-src
+        type: binary
+        display-hint: ipv4
+      -
+        name: rta-iif
+        type: u32
+      -
+        name: rta-oif
+        type: u32
+      -
+        name: rta-gateway
+        type: binary
+        display-hint: ipv4
+      -
+        name: rta-priority
+        type: u32
+      -
+        name: rta-prefsrc
+        type: binary
+        display-hint: ipv4
+      -
+        name: rta-metrics
+        type: nest
+        nested-attributes: rta-metrics
+      -
+        name: rta-multipath
+        type: binary
+      -
+        name: rta-protoinfo # not used
+        type: binary
+      -
+        name: rta-flow
+        type: u32
+      -
+        name: rta-cacheinfo
+        type: binary
+        struct: rta-cacheinfo
+      -
+        name: rta-session # not used
+        type: binary
+      -
+        name: rta-mp-algo # not used
+        type: binary
+      -
+        name: rta-table
+        type: u32
+      -
+        name: rta-mark
+        type: u32
+      -
+        name: rta-mfc-stats
+        type: binary
+      -
+        name: rta-via
+        type: binary
+      -
+        name: rta-newdst
+        type: binary
+      -
+        name: rta-pref
+        type: u8
+      -
+        name: rta-encap-type
+        type: u16
+      -
+        name: rta-encap
+        type: binary # tunnel specific nest
+      -
+        name: rta-expires
+        type: u32
+      -
+        name: rta-pad
+        type: binary
+      -
+        name: rta-uid
+        type: u32
+      -
+        name: rta-ttl-propagate
+        type: u8
+      -
+        name: rta-ip-proto
+        type: u8
+      -
+        name: rta-sport
+        type: u16
+      -
+        name: rta-dport
+        type: u16
+      -
+        name: rta-nh-id
+        type: u32
+  -
+    name: rta-metrics
+    attributes:
+      -
+        name: rtax-unspec
+        type: unused
+        value: 0
+      -
+        name: rtax-lock
+        type: u32
+      -
+        name: rtax-mtu
+        type: u32
+      -
+        name: rtax-window
+        type: u32
+      -
+        name: rtax-rtt
+        type: u32
+      -
+        name: rtax-rttvar
+        type: u32
+      -
+        name: rtax-ssthresh
+        type: u32
+      -
+        name: rtax-cwnd
+        type: u32
+      -
+        name: rtax-advmss
+        type: u32
+      -
+        name: rtax-reordering
+        type: u32
+      -
+        name: rtax-hoplimit
+        type: u32
+      -
+        name: rtax-initcwnd
+        type: u32
+      -
+        name: rtax-features
+        type: u32
+      -
+        name: rtax-rto-min
+        type: u32
+      -
+        name: rtax-initrwnd
+        type: u32
+      -
+        name: rtax-quickack
+        type: u32
+      -
+        name: rtax-cc-algo
+        type: string
+      -
+        name: rtax-fastopen-no-cookie
+        type: u32
+
+operations:
+  enum-model: directional
+  list:
+    -
+      name: getroute
+      doc: Dump route information.
+      attribute-set: route-attrs
+      fixed-header: rtmsg
+      do:
+        request:
+          value: 26
+          attributes:
+            - rtm-family
+            - rta-src
+            - rtm-src-len
+            - rta-dst
+            - rtm-dst-len
+            - rta-iif
+            - rta-oif
+            - rta-ip-proto
+            - rta-sport
+            - rta-dport
+            - rta-mark
+            - rta-uid
+        reply:
+          value: 24
+          attributes: &all-route-attrs
+            - rtm-family
+            - rtm-dst-len
+            - rtm-src-len
+            - rtm-tos
+            - rtm-table
+            - rtm-protocol
+            - rtm-scope
+            - rtm-type
+            - rtm-flags
+            - rta-dst
+            - rta-src
+            - rta-iif
+            - rta-oif
+            - rta-gateway
+            - rta-priority
+            - rta-prefsrc
+            - rta-metrics
+            - rta-multipath
+            - rta-flow
+            - rta-cacheinfo
+            - rta-table
+            - rta-mark
+            - rta-mfc-stats
+            - rta-via
+            - rta-newdst
+            - rta-pref
+            - rta-encap-type
+            - rta-encap
+            - rta-expires
+            - rta-pad
+            - rta-uid
+            - rta-ttl-propagate
+            - rta-ip-proto
+            - rta-sport
+            - rta-dport
+            - rta-nh-id
+      dump:
+        request:
+          value: 26
+          attributes:
+            - rtm-family
+        reply:
+          value: 24
+          attributes: *all-route-attrs
+    -
+      name: newroute
+      doc: Create a new route
+      attribute-set: route-attrs
+      fixed-header: rtmsg
+      do:
+        request:
+          value: 24
+          attributes: *all-route-attrs
+    -
+      name: delroute
+      doc: Delete an existing route
+      attribute-set: route-attrs
+      fixed-header: rtmsg
+      do:
+        request:
+          value: 25
+          attributes: *all-route-attrs

From 84e306b0834077f5bd9fb6b9e37298b45c3f5a09 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:06 +0200
Subject: [PATCH 1962/2047] selftests: tls: add test variants for aria-gcm

Only supported for TLS1.2.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/ccf4a4d3f3820f8ff30431b7629f5210cb33fa89.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/config |  1 +
 tools/testing/selftests/net/tls.c  | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index cd3cc52c59b4..8da562a9ae87 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -51,3 +51,4 @@ CONFIG_AMT=m
 CONFIG_VXLAN=m
 CONFIG_IP_SCTP=m
 CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_CRYPTO_ARIA=y
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 4b63708c6a81..95bef2be48cd 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -36,6 +36,8 @@ struct tls_crypto_info_keys {
 		struct tls12_crypto_info_sm4_ccm sm4ccm;
 		struct tls12_crypto_info_aes_ccm_128 aesccm128;
 		struct tls12_crypto_info_aes_gcm_256 aesgcm256;
+		struct tls12_crypto_info_aria_gcm_128 ariagcm128;
+		struct tls12_crypto_info_aria_gcm_256 ariagcm256;
 	};
 	size_t len;
 };
@@ -76,6 +78,16 @@ static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
 		tls12->aesgcm256.info.version = tls_version;
 		tls12->aesgcm256.info.cipher_type = cipher_type;
 		break;
+	case TLS_CIPHER_ARIA_GCM_128:
+		tls12->len = sizeof(struct tls12_crypto_info_aria_gcm_128);
+		tls12->ariagcm128.info.version = tls_version;
+		tls12->ariagcm128.info.cipher_type = cipher_type;
+		break;
+	case TLS_CIPHER_ARIA_GCM_256:
+		tls12->len = sizeof(struct tls12_crypto_info_aria_gcm_256);
+		tls12->ariagcm256.info.version = tls_version;
+		tls12->ariagcm256.info.cipher_type = cipher_type;
+		break;
 	default:
 		break;
 	}
@@ -312,6 +324,18 @@ FIXTURE_VARIANT_ADD(tls, 13_nopad)
 	.nopad = true,
 };
 
+FIXTURE_VARIANT_ADD(tls, 12_aria_gcm)
+{
+	.tls_version = TLS_1_2_VERSION,
+	.cipher_type = TLS_CIPHER_ARIA_GCM_128,
+};
+
+FIXTURE_VARIANT_ADD(tls, 12_aria_gcm_256)
+{
+	.tls_version = TLS_1_2_VERSION,
+	.cipher_type = TLS_CIPHER_ARIA_GCM_256,
+};
+
 FIXTURE_SETUP(tls)
 {
 	struct tls_crypto_info_keys tls12;

From f27ad62fe38cf7c0a561628734f348217777127b Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:07 +0200
Subject: [PATCH 1963/2047] selftests: tls: add getsockopt test

The kernel accepts fetching either just the version and cipher type,
or exactly the per-cipher struct. Also check that getsockopt returns
what we just passed to the kernel.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/81a007ca13de9a74f4af45635d06682cdb385a54.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/tls.c | 35 +++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 95bef2be48cd..0da6952a047a 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -30,6 +30,7 @@ static int fips_enabled;
 
 struct tls_crypto_info_keys {
 	union {
+		struct tls_crypto_info crypto_info;
 		struct tls12_crypto_info_aes_gcm_128 aes128;
 		struct tls12_crypto_info_chacha20_poly1305 chacha20;
 		struct tls12_crypto_info_sm4_gcm sm4gcm;
@@ -1496,6 +1497,40 @@ TEST_F(tls, shutdown_reuse)
 	EXPECT_EQ(errno, EISCONN);
 }
 
+TEST_F(tls, getsockopt)
+{
+	struct tls_crypto_info_keys expect, get;
+	socklen_t len;
+
+	/* get only the version/cipher */
+	len = sizeof(struct tls_crypto_info);
+	memrnd(&get, sizeof(get));
+	EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0);
+	EXPECT_EQ(len, sizeof(struct tls_crypto_info));
+	EXPECT_EQ(get.crypto_info.version, variant->tls_version);
+	EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type);
+
+	/* get the full crypto_info */
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect);
+	len = expect.len;
+	memrnd(&get, sizeof(get));
+	EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0);
+	EXPECT_EQ(len, expect.len);
+	EXPECT_EQ(get.crypto_info.version, variant->tls_version);
+	EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type);
+	EXPECT_EQ(memcmp(&get, &expect, expect.len), 0);
+
+	/* short get should fail */
+	len = sizeof(struct tls_crypto_info) - 1;
+	EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* partial get of the cipher data should fail */
+	len = expect.len - 1;
+	EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), -1);
+	EXPECT_EQ(errno, EINVAL);
+}
+
 FIXTURE(tls_err)
 {
 	int fd, cfd;

From 4bfb6224ed80ca2df13cff391fecded00cc072ee Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:08 +0200
Subject: [PATCH 1964/2047] selftests: tls: test some invalid inputs for
 setsockopt

This test will need to be updated if new ciphers are added.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/bfcfa9cffda56d2064296ab7c99a05775dd4c28e.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/tls.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 0da6952a047a..297d972558fb 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -241,6 +241,31 @@ TEST_F(tls_basic, base_base)
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 };
 
+TEST_F(tls_basic, bad_cipher)
+{
+	struct tls_crypto_info_keys tls12;
+
+	tls12.crypto_info.version = 200;
+	tls12.crypto_info.cipher_type = TLS_CIPHER_AES_GCM_128;
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+
+	tls12.crypto_info.version = TLS_1_2_VERSION;
+	tls12.crypto_info.cipher_type = 50;
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+
+	tls12.crypto_info.version = TLS_1_2_VERSION;
+	tls12.crypto_info.cipher_type = 59;
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+
+	tls12.crypto_info.version = TLS_1_2_VERSION;
+	tls12.crypto_info.cipher_type = 10;
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+
+	tls12.crypto_info.version = TLS_1_2_VERSION;
+	tls12.crypto_info.cipher_type = 70;
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+}
+
 FIXTURE(tls)
 {
 	int fd, cfd;

From fd0fc6fdd8896a195cb7b0210a5ee46774718fc8 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:09 +0200
Subject: [PATCH 1965/2047] tls: move tls_cipher_size_desc to net/tls/tls.h

It's only used in net/tls/*, no need to bloat include/net/tls.h.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/dd9fad80415e5b3575b41f56b331871038362eab.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tls.h | 10 ----------
 net/tls/tls.h     | 10 ++++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 06fca9160346..a2b44578dcb7 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -51,16 +51,6 @@
 
 struct tls_rec;
 
-struct tls_cipher_size_desc {
-	unsigned int iv;
-	unsigned int key;
-	unsigned int salt;
-	unsigned int tag;
-	unsigned int rec_seq;
-};
-
-extern const struct tls_cipher_size_desc tls_cipher_size_desc[];
-
 /* Maximum data size carried in a TLS record */
 #define TLS_MAX_PAYLOAD_SIZE		((size_t)1 << 14)
 
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 164d6a955e26..7aae92972e00 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -51,6 +51,16 @@
 #define TLS_DEC_STATS(net, field)				\
 	SNMP_DEC_STATS((net)->mib.tls_statistics, field)
 
+struct tls_cipher_size_desc {
+	unsigned int iv;
+	unsigned int key;
+	unsigned int salt;
+	unsigned int tag;
+	unsigned int rec_seq;
+};
+
+extern const struct tls_cipher_size_desc tls_cipher_size_desc[];
+
 /* TLS records are maintained in 'struct tls_rec'. It stores the memory pages
  * allocated or mapped for each TLS record. After encryption, the records are
  * stores in a linked list.

From 200e23165109a173ffde3310dffa5ef5e502d97f Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:10 +0200
Subject: [PATCH 1966/2047] tls: add TLS_CIPHER_ARIA_GCM_* to
 tls_cipher_size_desc

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/b2e0fb79e6d0a4478be9bf33781dc9c9281c9d56.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index f550c84f3408..9843c2af994f 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -73,6 +73,8 @@ const struct tls_cipher_size_desc tls_cipher_size_desc[] = {
 	CIPHER_SIZE_DESC(TLS_CIPHER_CHACHA20_POLY1305),
 	CIPHER_SIZE_DESC(TLS_CIPHER_SM4_GCM),
 	CIPHER_SIZE_DESC(TLS_CIPHER_SM4_CCM),
+	CIPHER_SIZE_DESC(TLS_CIPHER_ARIA_GCM_128),
+	CIPHER_SIZE_DESC(TLS_CIPHER_ARIA_GCM_256),
 };
 
 static const struct proto *saved_tcpv6_prot;

From 037303d6760751fdb95ba62cf448ecbc1ac29c98 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:11 +0200
Subject: [PATCH 1967/2047] tls: reduce size of tls_cipher_size_desc

tls_cipher_size_desc indexes ciphers by their type, but we're not
using indices 0..50 of the array. Each struct tls_cipher_size_desc is
20B, so that's a lot of unused memory. We can reindex the array
starting at the lowest used cipher_type.

Introduce the get_cipher_size_desc helper to find the right item and
avoid out-of-bounds accesses, and make tls_cipher_size_desc's size
explicit so that gcc reminds us to update TLS_CIPHER_MIN/MAX when we
add a new cipher.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/5e054e370e240247a5d37881a1cd93a67c15f4ca.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls.h                 | 13 ++++++++++++-
 net/tls/tls_device.c          |  4 ++--
 net/tls/tls_device_fallback.c |  8 ++++----
 net/tls/tls_main.c            |  4 ++--
 4 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/net/tls/tls.h b/net/tls/tls.h
index 7aae92972e00..ea799ef77bf8 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -59,7 +59,18 @@ struct tls_cipher_size_desc {
 	unsigned int rec_seq;
 };
 
-extern const struct tls_cipher_size_desc tls_cipher_size_desc[];
+#define TLS_CIPHER_MIN TLS_CIPHER_AES_GCM_128
+#define TLS_CIPHER_MAX TLS_CIPHER_ARIA_GCM_256
+extern const struct tls_cipher_size_desc tls_cipher_size_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN];
+
+static inline const struct tls_cipher_size_desc *get_cipher_size_desc(u16 cipher_type)
+{
+	if (cipher_type < TLS_CIPHER_MIN || cipher_type > TLS_CIPHER_MAX)
+		return NULL;
+
+	return &tls_cipher_size_desc[cipher_type - TLS_CIPHER_MIN];
+}
+
 
 /* TLS records are maintained in 'struct tls_rec'. It stores the memory pages
  * allocated or mapped for each TLS record. After encryption, the records are
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 2392d06845aa..9bc42041c2ce 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -898,7 +898,7 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
 	default:
 		return -EINVAL;
 	}
-	cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_recv.info.cipher_type];
+	cipher_sz = get_cipher_size_desc(tls_ctx->crypto_recv.info.cipher_type);
 
 	rxm = strp_msg(tls_strp_msg(sw_ctx));
 	orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv,
@@ -1094,7 +1094,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 		rc = -EINVAL;
 		goto release_netdev;
 	}
-	cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type];
+	cipher_sz = get_cipher_size_desc(crypto_info->cipher_type);
 
 	/* Sanity-check the rec_seq_size for stack allocations */
 	if (cipher_sz->rec_seq > TLS_MAX_REC_SEQ_SIZE) {
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index b28c5e296dfd..dd21fa4961b6 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -69,7 +69,7 @@ static int tls_enc_record(struct aead_request *aead_req,
 	default:
 		return -EINVAL;
 	}
-	cipher_sz = &tls_cipher_size_desc[prot->cipher_type];
+	cipher_sz = get_cipher_size_desc(prot->cipher_type);
 
 	buf_size = TLS_HEADER_SIZE + cipher_sz->iv;
 	len = min_t(int, *in_len, buf_size);
@@ -310,7 +310,7 @@ static void fill_sg_out(struct scatterlist sg_out[3], void *buf,
 			void *dummy_buf)
 {
 	const struct tls_cipher_size_desc *cipher_sz =
-		&tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type];
+		get_cipher_size_desc(tls_ctx->crypto_send.info.cipher_type);
 
 	sg_set_buf(&sg_out[0], dummy_buf, sync_size);
 	sg_set_buf(&sg_out[1], nskb->data + tcp_payload_offset, payload_len);
@@ -348,7 +348,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
 	default:
 		goto free_req;
 	}
-	cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type];
+	cipher_sz = get_cipher_size_desc(tls_ctx->crypto_send.info.cipher_type);
 	buf_len = cipher_sz->salt + cipher_sz->iv + TLS_AAD_SPACE_SIZE +
 		  sync_size + cipher_sz->tag;
 	buf = kmalloc(buf_len, GFP_ATOMIC);
@@ -495,7 +495,7 @@ int tls_sw_fallback_init(struct sock *sk,
 		rc = -EINVAL;
 		goto free_aead;
 	}
-	cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type];
+	cipher_sz = get_cipher_size_desc(crypto_info->cipher_type);
 
 	rc = crypto_aead_setkey(offload_ctx->aead_send, key, cipher_sz->key);
 	if (rc)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 9843c2af994f..1bf04636948d 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -58,7 +58,7 @@ enum {
 	TLS_NUM_PROTS,
 };
 
-#define CIPHER_SIZE_DESC(cipher) [cipher] = { \
+#define CIPHER_SIZE_DESC(cipher) [cipher - TLS_CIPHER_MIN] = {	\
 	.iv = cipher ## _IV_SIZE, \
 	.key = cipher ## _KEY_SIZE, \
 	.salt = cipher ## _SALT_SIZE, \
@@ -66,7 +66,7 @@ enum {
 	.rec_seq = cipher ## _REC_SEQ_SIZE, \
 }
 
-const struct tls_cipher_size_desc tls_cipher_size_desc[] = {
+const struct tls_cipher_size_desc tls_cipher_size_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN] = {
 	CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_128),
 	CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_256),
 	CIPHER_SIZE_DESC(TLS_CIPHER_AES_CCM_128),

From 8db44ab26bebe969851468bea6072d9a094b2ace Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:12 +0200
Subject: [PATCH 1968/2047] tls: rename tls_cipher_size_desc to tls_cipher_desc

We're going to add other fields to it to fully describe a cipher, so
the "_size" name won't match the contents.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/76ca6c7686bd6d1534dfa188fb0f1f6fabebc791.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls.h                 |  8 +++----
 net/tls/tls_device.c          | 34 ++++++++++++++--------------
 net/tls/tls_device_fallback.c | 42 +++++++++++++++++------------------
 net/tls/tls_main.c            | 20 ++++++++---------
 4 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/net/tls/tls.h b/net/tls/tls.h
index ea799ef77bf8..d4b56ca9d267 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -51,7 +51,7 @@
 #define TLS_DEC_STATS(net, field)				\
 	SNMP_DEC_STATS((net)->mib.tls_statistics, field)
 
-struct tls_cipher_size_desc {
+struct tls_cipher_desc {
 	unsigned int iv;
 	unsigned int key;
 	unsigned int salt;
@@ -61,14 +61,14 @@ struct tls_cipher_size_desc {
 
 #define TLS_CIPHER_MIN TLS_CIPHER_AES_GCM_128
 #define TLS_CIPHER_MAX TLS_CIPHER_ARIA_GCM_256
-extern const struct tls_cipher_size_desc tls_cipher_size_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN];
+extern const struct tls_cipher_desc tls_cipher_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN];
 
-static inline const struct tls_cipher_size_desc *get_cipher_size_desc(u16 cipher_type)
+static inline const struct tls_cipher_desc *get_cipher_desc(u16 cipher_type)
 {
 	if (cipher_type < TLS_CIPHER_MIN || cipher_type > TLS_CIPHER_MAX)
 		return NULL;
 
-	return &tls_cipher_size_desc[cipher_type - TLS_CIPHER_MIN];
+	return &tls_cipher_desc[cipher_type - TLS_CIPHER_MIN];
 }
 
 
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 9bc42041c2ce..98885d872d4c 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -884,7 +884,7 @@ static int
 tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
 {
 	struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx);
-	const struct tls_cipher_size_desc *cipher_sz;
+	const struct tls_cipher_desc *cipher_desc;
 	int err, offset, copy, data_len, pos;
 	struct sk_buff *skb, *skb_iter;
 	struct scatterlist sg[1];
@@ -898,10 +898,10 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
 	default:
 		return -EINVAL;
 	}
-	cipher_sz = get_cipher_size_desc(tls_ctx->crypto_recv.info.cipher_type);
+	cipher_desc = get_cipher_desc(tls_ctx->crypto_recv.info.cipher_type);
 
 	rxm = strp_msg(tls_strp_msg(sw_ctx));
-	orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv,
+	orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv,
 			   sk->sk_allocation);
 	if (!orig_buf)
 		return -ENOMEM;
@@ -917,8 +917,8 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
 
 	sg_init_table(sg, 1);
 	sg_set_buf(&sg[0], buf,
-		   rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv);
-	err = skb_copy_bits(skb, offset, buf, TLS_HEADER_SIZE + cipher_sz->iv);
+		   rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv);
+	err = skb_copy_bits(skb, offset, buf, TLS_HEADER_SIZE + cipher_desc->iv);
 	if (err)
 		goto free_buf;
 
@@ -929,7 +929,7 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
 	else
 		err = 0;
 
-	data_len = rxm->full_len - cipher_sz->tag;
+	data_len = rxm->full_len - cipher_desc->tag;
 
 	if (skb_pagelen(skb) > offset) {
 		copy = min_t(int, skb_pagelen(skb) - offset, data_len);
@@ -1046,7 +1046,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_prot_info *prot = &tls_ctx->prot_info;
-	const struct tls_cipher_size_desc *cipher_sz;
+	const struct tls_cipher_desc *cipher_desc;
 	struct tls_record_info *start_marker_record;
 	struct tls_offload_context_tx *offload_ctx;
 	struct tls_crypto_info *crypto_info;
@@ -1094,31 +1094,31 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 		rc = -EINVAL;
 		goto release_netdev;
 	}
-	cipher_sz = get_cipher_size_desc(crypto_info->cipher_type);
+	cipher_desc = get_cipher_desc(crypto_info->cipher_type);
 
 	/* Sanity-check the rec_seq_size for stack allocations */
-	if (cipher_sz->rec_seq > TLS_MAX_REC_SEQ_SIZE) {
+	if (cipher_desc->rec_seq > TLS_MAX_REC_SEQ_SIZE) {
 		rc = -EINVAL;
 		goto release_netdev;
 	}
 
 	prot->version = crypto_info->version;
 	prot->cipher_type = crypto_info->cipher_type;
-	prot->prepend_size = TLS_HEADER_SIZE + cipher_sz->iv;
-	prot->tag_size = cipher_sz->tag;
+	prot->prepend_size = TLS_HEADER_SIZE + cipher_desc->iv;
+	prot->tag_size = cipher_desc->tag;
 	prot->overhead_size = prot->prepend_size + prot->tag_size;
-	prot->iv_size = cipher_sz->iv;
-	prot->salt_size = cipher_sz->salt;
-	ctx->tx.iv = kmalloc(cipher_sz->iv + cipher_sz->salt, GFP_KERNEL);
+	prot->iv_size = cipher_desc->iv;
+	prot->salt_size = cipher_desc->salt;
+	ctx->tx.iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL);
 	if (!ctx->tx.iv) {
 		rc = -ENOMEM;
 		goto release_netdev;
 	}
 
-	memcpy(ctx->tx.iv + cipher_sz->salt, iv, cipher_sz->iv);
+	memcpy(ctx->tx.iv + cipher_desc->salt, iv, cipher_desc->iv);
 
-	prot->rec_seq_size = cipher_sz->rec_seq;
-	ctx->tx.rec_seq = kmemdup(rec_seq, cipher_sz->rec_seq, GFP_KERNEL);
+	prot->rec_seq_size = cipher_desc->rec_seq;
+	ctx->tx.rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL);
 	if (!ctx->tx.rec_seq) {
 		rc = -ENOMEM;
 		goto free_iv;
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index dd21fa4961b6..cb224fb2a394 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -55,7 +55,7 @@ static int tls_enc_record(struct aead_request *aead_req,
 			  struct tls_prot_info *prot)
 {
 	unsigned char buf[TLS_HEADER_SIZE + MAX_IV_SIZE];
-	const struct tls_cipher_size_desc *cipher_sz;
+	const struct tls_cipher_desc *cipher_desc;
 	struct scatterlist sg_in[3];
 	struct scatterlist sg_out[3];
 	unsigned int buf_size;
@@ -69,9 +69,9 @@ static int tls_enc_record(struct aead_request *aead_req,
 	default:
 		return -EINVAL;
 	}
-	cipher_sz = get_cipher_size_desc(prot->cipher_type);
+	cipher_desc = get_cipher_desc(prot->cipher_type);
 
-	buf_size = TLS_HEADER_SIZE + cipher_sz->iv;
+	buf_size = TLS_HEADER_SIZE + cipher_desc->iv;
 	len = min_t(int, *in_len, buf_size);
 
 	scatterwalk_copychunks(buf, in, len, 0);
@@ -85,11 +85,11 @@ static int tls_enc_record(struct aead_request *aead_req,
 	scatterwalk_pagedone(out, 1, 1);
 
 	len = buf[4] | (buf[3] << 8);
-	len -= cipher_sz->iv;
+	len -= cipher_desc->iv;
 
-	tls_make_aad(aad, len - cipher_sz->tag, (char *)&rcd_sn, buf[0], prot);
+	tls_make_aad(aad, len - cipher_desc->tag, (char *)&rcd_sn, buf[0], prot);
 
-	memcpy(iv + cipher_sz->salt, buf + TLS_HEADER_SIZE, cipher_sz->iv);
+	memcpy(iv + cipher_desc->salt, buf + TLS_HEADER_SIZE, cipher_desc->iv);
 
 	sg_init_table(sg_in, ARRAY_SIZE(sg_in));
 	sg_init_table(sg_out, ARRAY_SIZE(sg_out));
@@ -100,7 +100,7 @@ static int tls_enc_record(struct aead_request *aead_req,
 
 	*in_len -= len;
 	if (*in_len < 0) {
-		*in_len += cipher_sz->tag;
+		*in_len += cipher_desc->tag;
 		/* the input buffer doesn't contain the entire record.
 		 * trim len accordingly. The resulting authentication tag
 		 * will contain garbage, but we don't care, so we won't
@@ -121,7 +121,7 @@ static int tls_enc_record(struct aead_request *aead_req,
 		scatterwalk_pagedone(out, 1, 1);
 	}
 
-	len -= cipher_sz->tag;
+	len -= cipher_desc->tag;
 	aead_request_set_crypt(aead_req, sg_in, sg_out, len, iv);
 
 	rc = crypto_aead_encrypt(aead_req);
@@ -309,14 +309,14 @@ static void fill_sg_out(struct scatterlist sg_out[3], void *buf,
 			int sync_size,
 			void *dummy_buf)
 {
-	const struct tls_cipher_size_desc *cipher_sz =
-		get_cipher_size_desc(tls_ctx->crypto_send.info.cipher_type);
+	const struct tls_cipher_desc *cipher_desc =
+		get_cipher_desc(tls_ctx->crypto_send.info.cipher_type);
 
 	sg_set_buf(&sg_out[0], dummy_buf, sync_size);
 	sg_set_buf(&sg_out[1], nskb->data + tcp_payload_offset, payload_len);
 	/* Add room for authentication tag produced by crypto */
 	dummy_buf += sync_size;
-	sg_set_buf(&sg_out[2], dummy_buf, cipher_sz->tag);
+	sg_set_buf(&sg_out[2], dummy_buf, cipher_desc->tag);
 }
 
 static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
@@ -328,7 +328,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
 	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 	int tcp_payload_offset = skb_tcp_all_headers(skb);
 	int payload_len = skb->len - tcp_payload_offset;
-	const struct tls_cipher_size_desc *cipher_sz;
+	const struct tls_cipher_desc *cipher_desc;
 	void *buf, *iv, *aad, *dummy_buf, *salt;
 	struct aead_request *aead_req;
 	struct sk_buff *nskb = NULL;
@@ -348,16 +348,16 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
 	default:
 		goto free_req;
 	}
-	cipher_sz = get_cipher_size_desc(tls_ctx->crypto_send.info.cipher_type);
-	buf_len = cipher_sz->salt + cipher_sz->iv + TLS_AAD_SPACE_SIZE +
-		  sync_size + cipher_sz->tag;
+	cipher_desc = get_cipher_desc(tls_ctx->crypto_send.info.cipher_type);
+	buf_len = cipher_desc->salt + cipher_desc->iv + TLS_AAD_SPACE_SIZE +
+		  sync_size + cipher_desc->tag;
 	buf = kmalloc(buf_len, GFP_ATOMIC);
 	if (!buf)
 		goto free_req;
 
 	iv = buf;
-	memcpy(iv, salt, cipher_sz->salt);
-	aad = buf + cipher_sz->salt + cipher_sz->iv;
+	memcpy(iv, salt, cipher_desc->salt);
+	aad = buf + cipher_desc->salt + cipher_desc->iv;
 	dummy_buf = aad + TLS_AAD_SPACE_SIZE;
 
 	nskb = alloc_skb(skb_headroom(skb) + skb->len, GFP_ATOMIC);
@@ -471,7 +471,7 @@ int tls_sw_fallback_init(struct sock *sk,
 			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info)
 {
-	const struct tls_cipher_size_desc *cipher_sz;
+	const struct tls_cipher_desc *cipher_desc;
 	const u8 *key;
 	int rc;
 
@@ -495,13 +495,13 @@ int tls_sw_fallback_init(struct sock *sk,
 		rc = -EINVAL;
 		goto free_aead;
 	}
-	cipher_sz = get_cipher_size_desc(crypto_info->cipher_type);
+	cipher_desc = get_cipher_desc(crypto_info->cipher_type);
 
-	rc = crypto_aead_setkey(offload_ctx->aead_send, key, cipher_sz->key);
+	rc = crypto_aead_setkey(offload_ctx->aead_send, key, cipher_desc->key);
 	if (rc)
 		goto free_aead;
 
-	rc = crypto_aead_setauthsize(offload_ctx->aead_send, cipher_sz->tag);
+	rc = crypto_aead_setauthsize(offload_ctx->aead_send, cipher_desc->tag);
 	if (rc)
 		goto free_aead;
 
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 1bf04636948d..217c2aa004dc 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -58,7 +58,7 @@ enum {
 	TLS_NUM_PROTS,
 };
 
-#define CIPHER_SIZE_DESC(cipher) [cipher - TLS_CIPHER_MIN] = {	\
+#define CIPHER_DESC(cipher) [cipher - TLS_CIPHER_MIN] = {	\
 	.iv = cipher ## _IV_SIZE, \
 	.key = cipher ## _KEY_SIZE, \
 	.salt = cipher ## _SALT_SIZE, \
@@ -66,15 +66,15 @@ enum {
 	.rec_seq = cipher ## _REC_SEQ_SIZE, \
 }
 
-const struct tls_cipher_size_desc tls_cipher_size_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN] = {
-	CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_128),
-	CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_256),
-	CIPHER_SIZE_DESC(TLS_CIPHER_AES_CCM_128),
-	CIPHER_SIZE_DESC(TLS_CIPHER_CHACHA20_POLY1305),
-	CIPHER_SIZE_DESC(TLS_CIPHER_SM4_GCM),
-	CIPHER_SIZE_DESC(TLS_CIPHER_SM4_CCM),
-	CIPHER_SIZE_DESC(TLS_CIPHER_ARIA_GCM_128),
-	CIPHER_SIZE_DESC(TLS_CIPHER_ARIA_GCM_256),
+const struct tls_cipher_desc tls_cipher_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN] = {
+	CIPHER_DESC(TLS_CIPHER_AES_GCM_128),
+	CIPHER_DESC(TLS_CIPHER_AES_GCM_256),
+	CIPHER_DESC(TLS_CIPHER_AES_CCM_128),
+	CIPHER_DESC(TLS_CIPHER_CHACHA20_POLY1305),
+	CIPHER_DESC(TLS_CIPHER_SM4_GCM),
+	CIPHER_DESC(TLS_CIPHER_SM4_CCM),
+	CIPHER_DESC(TLS_CIPHER_ARIA_GCM_128),
+	CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256),
 };
 
 static const struct proto *saved_tcpv6_prot;

From 176a3f50bc6a327c82c6b051b0bedd19917081a2 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:13 +0200
Subject: [PATCH 1969/2047] tls: extend tls_cipher_desc to fully describe the
 ciphers

- add nonce, usually equal to iv_size but not for chacha
 - add offsets into the crypto_info for each field
 - add algorithm name
 - add offloadable flag

Also add helpers to access each field of a crypto_info struct
described by a tls_cipher_desc.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/39d5f476d63c171097764e8d38f6f158b7c109ae.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls.h      | 32 ++++++++++++++++++++++++++++++++
 net/tls/tls_main.c | 41 ++++++++++++++++++++++++++++++++---------
 2 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/net/tls/tls.h b/net/tls/tls.h
index d4b56ca9d267..28a8c0e80e3c 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -52,11 +52,19 @@
 	SNMP_DEC_STATS((net)->mib.tls_statistics, field)
 
 struct tls_cipher_desc {
+	unsigned int nonce;
 	unsigned int iv;
 	unsigned int key;
 	unsigned int salt;
 	unsigned int tag;
 	unsigned int rec_seq;
+	unsigned int iv_offset;
+	unsigned int key_offset;
+	unsigned int salt_offset;
+	unsigned int rec_seq_offset;
+	char *cipher_name;
+	bool offloadable;
+	size_t crypto_info;
 };
 
 #define TLS_CIPHER_MIN TLS_CIPHER_AES_GCM_128
@@ -71,6 +79,30 @@ static inline const struct tls_cipher_desc *get_cipher_desc(u16 cipher_type)
 	return &tls_cipher_desc[cipher_type - TLS_CIPHER_MIN];
 }
 
+static inline char *crypto_info_iv(struct tls_crypto_info *crypto_info,
+				   const struct tls_cipher_desc *cipher_desc)
+{
+	return (char *)crypto_info + cipher_desc->iv_offset;
+}
+
+static inline char *crypto_info_key(struct tls_crypto_info *crypto_info,
+				    const struct tls_cipher_desc *cipher_desc)
+{
+	return (char *)crypto_info + cipher_desc->key_offset;
+}
+
+static inline char *crypto_info_salt(struct tls_crypto_info *crypto_info,
+				     const struct tls_cipher_desc *cipher_desc)
+{
+	return (char *)crypto_info + cipher_desc->salt_offset;
+}
+
+static inline char *crypto_info_rec_seq(struct tls_crypto_info *crypto_info,
+					const struct tls_cipher_desc *cipher_desc)
+{
+	return (char *)crypto_info + cipher_desc->rec_seq_offset;
+}
+
 
 /* TLS records are maintained in 'struct tls_rec'. It stores the memory pages
  * allocated or mapped for each TLS record. After encryption, the records are
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 217c2aa004dc..bbdf211cc898 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -58,23 +58,46 @@ enum {
 	TLS_NUM_PROTS,
 };
 
-#define CIPHER_DESC(cipher) [cipher - TLS_CIPHER_MIN] = {	\
+#define __CIPHER_DESC(ci) \
+	.iv_offset = offsetof(struct ci, iv), \
+	.key_offset = offsetof(struct ci, key), \
+	.salt_offset = offsetof(struct ci, salt), \
+	.rec_seq_offset = offsetof(struct ci, rec_seq), \
+	.crypto_info = sizeof(struct ci)
+
+#define CIPHER_DESC(cipher,ci,algname,_offloadable) [cipher - TLS_CIPHER_MIN] = {	\
+	.nonce = cipher ## _IV_SIZE, \
 	.iv = cipher ## _IV_SIZE, \
 	.key = cipher ## _KEY_SIZE, \
 	.salt = cipher ## _SALT_SIZE, \
 	.tag = cipher ## _TAG_SIZE, \
 	.rec_seq = cipher ## _REC_SEQ_SIZE, \
+	.cipher_name = algname,	\
+	.offloadable = _offloadable, \
+	__CIPHER_DESC(ci), \
+}
+
+#define CIPHER_DESC_NONCE0(cipher,ci,algname,_offloadable) [cipher - TLS_CIPHER_MIN] = { \
+	.nonce = 0, \
+	.iv = cipher ## _IV_SIZE, \
+	.key = cipher ## _KEY_SIZE, \
+	.salt = cipher ## _SALT_SIZE, \
+	.tag = cipher ## _TAG_SIZE, \
+	.rec_seq = cipher ## _REC_SEQ_SIZE, \
+	.cipher_name = algname,	\
+	.offloadable = _offloadable, \
+	__CIPHER_DESC(ci), \
 }
 
 const struct tls_cipher_desc tls_cipher_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN] = {
-	CIPHER_DESC(TLS_CIPHER_AES_GCM_128),
-	CIPHER_DESC(TLS_CIPHER_AES_GCM_256),
-	CIPHER_DESC(TLS_CIPHER_AES_CCM_128),
-	CIPHER_DESC(TLS_CIPHER_CHACHA20_POLY1305),
-	CIPHER_DESC(TLS_CIPHER_SM4_GCM),
-	CIPHER_DESC(TLS_CIPHER_SM4_CCM),
-	CIPHER_DESC(TLS_CIPHER_ARIA_GCM_128),
-	CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256),
+	CIPHER_DESC(TLS_CIPHER_AES_GCM_128, tls12_crypto_info_aes_gcm_128, "gcm(aes)", true),
+	CIPHER_DESC(TLS_CIPHER_AES_GCM_256, tls12_crypto_info_aes_gcm_256, "gcm(aes)", true),
+	CIPHER_DESC(TLS_CIPHER_AES_CCM_128, tls12_crypto_info_aes_ccm_128, "ccm(aes)", false),
+	CIPHER_DESC_NONCE0(TLS_CIPHER_CHACHA20_POLY1305, tls12_crypto_info_chacha20_poly1305, "rfc7539(chacha20,poly1305)", false),
+	CIPHER_DESC(TLS_CIPHER_SM4_GCM, tls12_crypto_info_sm4_gcm, "gcm(sm4)", false),
+	CIPHER_DESC(TLS_CIPHER_SM4_CCM, tls12_crypto_info_sm4_ccm, "ccm(sm4)", false),
+	CIPHER_DESC(TLS_CIPHER_ARIA_GCM_128, tls12_crypto_info_aria_gcm_128, "gcm(aria)", false),
+	CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256, tls12_crypto_info_aria_gcm_256, "gcm(aria)", false),
 };
 
 static const struct proto *saved_tcpv6_prot;

From 0d98cc02022d60004f78f6e7e6cc1bd39db80ef9 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:14 +0200
Subject: [PATCH 1970/2047] tls: validate cipher descriptions at compile time

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/b38fb8cf60e099e82ae9979c3c9c92421042417c.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_main.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index bbdf211cc898..9d8629be7017 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -58,6 +58,15 @@ enum {
 	TLS_NUM_PROTS,
 };
 
+#define CHECK_CIPHER_DESC(cipher,ci)				\
+	static_assert(cipher ## _IV_SIZE <= MAX_IV_SIZE);		\
+	static_assert(cipher ## _REC_SEQ_SIZE <= TLS_MAX_REC_SEQ_SIZE);	\
+	static_assert(cipher ## _TAG_SIZE == TLS_TAG_SIZE);		\
+	static_assert(sizeof_field(struct ci, iv) == cipher ## _IV_SIZE);	\
+	static_assert(sizeof_field(struct ci, key) == cipher ## _KEY_SIZE);	\
+	static_assert(sizeof_field(struct ci, salt) == cipher ## _SALT_SIZE);	\
+	static_assert(sizeof_field(struct ci, rec_seq) == cipher ## _REC_SEQ_SIZE);
+
 #define __CIPHER_DESC(ci) \
 	.iv_offset = offsetof(struct ci, iv), \
 	.key_offset = offsetof(struct ci, key), \
@@ -100,6 +109,15 @@ const struct tls_cipher_desc tls_cipher_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN
 	CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256, tls12_crypto_info_aria_gcm_256, "gcm(aria)", false),
 };
 
+CHECK_CIPHER_DESC(TLS_CIPHER_AES_GCM_128, tls12_crypto_info_aes_gcm_128);
+CHECK_CIPHER_DESC(TLS_CIPHER_AES_GCM_256, tls12_crypto_info_aes_gcm_256);
+CHECK_CIPHER_DESC(TLS_CIPHER_AES_CCM_128, tls12_crypto_info_aes_ccm_128);
+CHECK_CIPHER_DESC(TLS_CIPHER_CHACHA20_POLY1305, tls12_crypto_info_chacha20_poly1305);
+CHECK_CIPHER_DESC(TLS_CIPHER_SM4_GCM, tls12_crypto_info_sm4_gcm);
+CHECK_CIPHER_DESC(TLS_CIPHER_SM4_CCM, tls12_crypto_info_sm4_ccm);
+CHECK_CIPHER_DESC(TLS_CIPHER_ARIA_GCM_128, tls12_crypto_info_aria_gcm_128);
+CHECK_CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256, tls12_crypto_info_aria_gcm_256);
+
 static const struct proto *saved_tcpv6_prot;
 static DEFINE_MUTEX(tcpv6_prot_mutex);
 static const struct proto *saved_tcpv4_prot;

From 3524dd4d5f1fb9e75fdfaf280822a34fa82059bd Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:15 +0200
Subject: [PATCH 1971/2047] tls: expand use of tls_cipher_desc in
 tls_set_device_offload

tls_set_device_offload is already getting iv and rec_seq sizes from
tls_cipher_desc. We can now also check if the cipher_type coming from
userspace is valid and can be offloaded.

We can also remove the runtime check on rec_seq, since we validate it
at compile time.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/8ab71b8eca856c7aaf981a45fe91ac649eb0e2e9.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_device.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 98885d872d4c..8c94c926606a 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -1079,29 +1079,15 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 		goto release_netdev;
 	}
 
-	switch (crypto_info->cipher_type) {
-	case TLS_CIPHER_AES_GCM_128:
-		iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv;
-		rec_seq =
-		 ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq;
-		break;
-	case TLS_CIPHER_AES_GCM_256:
-		iv = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->iv;
-		rec_seq =
-		 ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->rec_seq;
-		break;
-	default:
-		rc = -EINVAL;
-		goto release_netdev;
-	}
 	cipher_desc = get_cipher_desc(crypto_info->cipher_type);
-
-	/* Sanity-check the rec_seq_size for stack allocations */
-	if (cipher_desc->rec_seq > TLS_MAX_REC_SEQ_SIZE) {
+	if (!cipher_desc || !cipher_desc->offloadable) {
 		rc = -EINVAL;
 		goto release_netdev;
 	}
 
+	iv = crypto_info_iv(crypto_info, cipher_desc);
+	rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc);
+
 	prot->version = crypto_info->version;
 	prot->cipher_type = crypto_info->cipher_type;
 	prot->prepend_size = TLS_HEADER_SIZE + cipher_desc->iv;

From d2322cf5ed59f084ac86d9339f7c3acccd177bfd Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:16 +0200
Subject: [PATCH 1972/2047] tls: allocate the fallback aead after checking that
 the cipher is valid

No need to allocate the aead if we're going to fail afterwards.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/335e32511ed55a0b30f3f81a78fa8f323b3bdf8f.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_device_fallback.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index cb224fb2a394..4de9061f38f5 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -475,15 +475,6 @@ int tls_sw_fallback_init(struct sock *sk,
 	const u8 *key;
 	int rc;
 
-	offload_ctx->aead_send =
-	    crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(offload_ctx->aead_send)) {
-		rc = PTR_ERR(offload_ctx->aead_send);
-		pr_err_ratelimited("crypto_alloc_aead failed rc=%d\n", rc);
-		offload_ctx->aead_send = NULL;
-		goto err_out;
-	}
-
 	switch (crypto_info->cipher_type) {
 	case TLS_CIPHER_AES_GCM_128:
 		key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key;
@@ -493,10 +484,19 @@ int tls_sw_fallback_init(struct sock *sk,
 		break;
 	default:
 		rc = -EINVAL;
-		goto free_aead;
+		goto err_out;
 	}
 	cipher_desc = get_cipher_desc(crypto_info->cipher_type);
 
+	offload_ctx->aead_send =
+	    crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(offload_ctx->aead_send)) {
+		rc = PTR_ERR(offload_ctx->aead_send);
+		pr_err_ratelimited("crypto_alloc_aead failed rc=%d\n", rc);
+		offload_ctx->aead_send = NULL;
+		goto err_out;
+	}
+
 	rc = crypto_aead_setkey(offload_ctx->aead_send, key, cipher_desc->key);
 	if (rc)
 		goto free_aead;

From e907277aeb6caad1c4be96e20195f24531fcfefc Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:17 +0200
Subject: [PATCH 1973/2047] tls: expand use of tls_cipher_desc in
 tls_sw_fallback_init

tls_sw_fallback_init already gets the key and tag size from
tls_cipher_desc. We can now also check that the cipher type is valid,
and stop hard-coding the algorithm name passed to crypto_alloc_aead.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/c8c94b8fcafbfb558e09589c1f1ad48dbdf92f76.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_device_fallback.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 4de9061f38f5..1d743f310f4f 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -472,24 +472,14 @@ int tls_sw_fallback_init(struct sock *sk,
 			 struct tls_crypto_info *crypto_info)
 {
 	const struct tls_cipher_desc *cipher_desc;
-	const u8 *key;
 	int rc;
 
-	switch (crypto_info->cipher_type) {
-	case TLS_CIPHER_AES_GCM_128:
-		key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key;
-		break;
-	case TLS_CIPHER_AES_GCM_256:
-		key = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->key;
-		break;
-	default:
-		rc = -EINVAL;
-		goto err_out;
-	}
 	cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+	if (!cipher_desc || !cipher_desc->offloadable)
+		return -EINVAL;
 
 	offload_ctx->aead_send =
-	    crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+	    crypto_alloc_aead(cipher_desc->cipher_name, 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(offload_ctx->aead_send)) {
 		rc = PTR_ERR(offload_ctx->aead_send);
 		pr_err_ratelimited("crypto_alloc_aead failed rc=%d\n", rc);
@@ -497,7 +487,9 @@ int tls_sw_fallback_init(struct sock *sk,
 		goto err_out;
 	}
 
-	rc = crypto_aead_setkey(offload_ctx->aead_send, key, cipher_desc->key);
+	rc = crypto_aead_setkey(offload_ctx->aead_send,
+				crypto_info_key(crypto_info, cipher_desc),
+				cipher_desc->key);
 	if (rc)
 		goto free_aead;
 

From 5f309ade49c7068b1149ecf825c4c16e56a3b865 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:18 +0200
Subject: [PATCH 1974/2047] tls: get crypto_info size from tls_cipher_desc in
 do_tls_setsockopt_conf

We can simplify do_tls_setsockopt_conf using tls_cipher_desc. Also use
get_cipher_desc's result to check if the cipher_type coming from
userspace is valid.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/e97658eb4c6a5832f8ba20a06c4f36a77763c59e.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_main.c | 39 ++++++++-------------------------------
 1 file changed, 8 insertions(+), 31 deletions(-)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 9d8629be7017..73cae5dec392 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -739,7 +739,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
 	struct tls_crypto_info *crypto_info;
 	struct tls_crypto_info *alt_crypto_info;
 	struct tls_context *ctx = tls_get_ctx(sk);
-	size_t optsize;
+	const struct tls_cipher_desc *cipher_desc;
 	int rc = 0;
 	int conf;
 
@@ -780,46 +780,23 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
 		}
 	}
 
-	switch (crypto_info->cipher_type) {
-	case TLS_CIPHER_AES_GCM_128:
-		optsize = sizeof(struct tls12_crypto_info_aes_gcm_128);
-		break;
-	case TLS_CIPHER_AES_GCM_256: {
-		optsize = sizeof(struct tls12_crypto_info_aes_gcm_256);
-		break;
+	cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+	if (!cipher_desc) {
+		rc = -EINVAL;
+		goto err_crypto_info;
 	}
-	case TLS_CIPHER_AES_CCM_128:
-		optsize = sizeof(struct tls12_crypto_info_aes_ccm_128);
-		break;
-	case TLS_CIPHER_CHACHA20_POLY1305:
-		optsize = sizeof(struct tls12_crypto_info_chacha20_poly1305);
-		break;
-	case TLS_CIPHER_SM4_GCM:
-		optsize = sizeof(struct tls12_crypto_info_sm4_gcm);
-		break;
-	case TLS_CIPHER_SM4_CCM:
-		optsize = sizeof(struct tls12_crypto_info_sm4_ccm);
-		break;
+
+	switch (crypto_info->cipher_type) {
 	case TLS_CIPHER_ARIA_GCM_128:
-		if (crypto_info->version != TLS_1_2_VERSION) {
-			rc = -EINVAL;
-			goto err_crypto_info;
-		}
-		optsize = sizeof(struct tls12_crypto_info_aria_gcm_128);
-		break;
 	case TLS_CIPHER_ARIA_GCM_256:
 		if (crypto_info->version != TLS_1_2_VERSION) {
 			rc = -EINVAL;
 			goto err_crypto_info;
 		}
-		optsize = sizeof(struct tls12_crypto_info_aria_gcm_256);
 		break;
-	default:
-		rc = -EINVAL;
-		goto err_crypto_info;
 	}
 
-	if (optlen != optsize) {
+	if (optlen != cipher_desc->crypto_info) {
 		rc = -EINVAL;
 		goto err_crypto_info;
 	}

From 077e05d135489e144d9e0d01454886bf613d32a4 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:19 +0200
Subject: [PATCH 1975/2047] tls: use tls_cipher_desc to simplify
 do_tls_getsockopt_conf

Every cipher uses the same code to update its crypto_info struct based
on the values contained in the cctx, with only the struct type and
size/offset changing. We can get those  from tls_cipher_desc, and use
a single pair of memcpy and final copy_to_user.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/c21a904b91e972bdbbf9d1c6d2731ccfa1eedf72.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_main.c | 176 ++++-----------------------------------------
 1 file changed, 12 insertions(+), 164 deletions(-)

diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 73cae5dec392..02f583ff9239 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -435,6 +435,7 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
 				  int __user *optlen, int tx)
 {
 	int rc = 0;
+	const struct tls_cipher_desc *cipher_desc;
 	struct tls_context *ctx = tls_get_ctx(sk);
 	struct tls_crypto_info *crypto_info;
 	struct cipher_context *cctx;
@@ -473,173 +474,20 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
 		goto out;
 	}
 
-	switch (crypto_info->cipher_type) {
-	case TLS_CIPHER_AES_GCM_128: {
-		struct tls12_crypto_info_aes_gcm_128 *
-		  crypto_info_aes_gcm_128 =
-		  container_of(crypto_info,
-			       struct tls12_crypto_info_aes_gcm_128,
-			       info);
-
-		if (len != sizeof(*crypto_info_aes_gcm_128)) {
-			rc = -EINVAL;
-			goto out;
-		}
-		memcpy(crypto_info_aes_gcm_128->iv,
-		       cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
-		       TLS_CIPHER_AES_GCM_128_IV_SIZE);
-		memcpy(crypto_info_aes_gcm_128->rec_seq, cctx->rec_seq,
-		       TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
-		if (copy_to_user(optval,
-				 crypto_info_aes_gcm_128,
-				 sizeof(*crypto_info_aes_gcm_128)))
-			rc = -EFAULT;
-		break;
-	}
-	case TLS_CIPHER_AES_GCM_256: {
-		struct tls12_crypto_info_aes_gcm_256 *
-		  crypto_info_aes_gcm_256 =
-		  container_of(crypto_info,
-			       struct tls12_crypto_info_aes_gcm_256,
-			       info);
-
-		if (len != sizeof(*crypto_info_aes_gcm_256)) {
-			rc = -EINVAL;
-			goto out;
-		}
-		memcpy(crypto_info_aes_gcm_256->iv,
-		       cctx->iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE,
-		       TLS_CIPHER_AES_GCM_256_IV_SIZE);
-		memcpy(crypto_info_aes_gcm_256->rec_seq, cctx->rec_seq,
-		       TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE);
-		if (copy_to_user(optval,
-				 crypto_info_aes_gcm_256,
-				 sizeof(*crypto_info_aes_gcm_256)))
-			rc = -EFAULT;
-		break;
-	}
-	case TLS_CIPHER_AES_CCM_128: {
-		struct tls12_crypto_info_aes_ccm_128 *aes_ccm_128 =
-			container_of(crypto_info,
-				struct tls12_crypto_info_aes_ccm_128, info);
-
-		if (len != sizeof(*aes_ccm_128)) {
-			rc = -EINVAL;
-			goto out;
-		}
-		memcpy(aes_ccm_128->iv,
-		       cctx->iv + TLS_CIPHER_AES_CCM_128_SALT_SIZE,
-		       TLS_CIPHER_AES_CCM_128_IV_SIZE);
-		memcpy(aes_ccm_128->rec_seq, cctx->rec_seq,
-		       TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE);
-		if (copy_to_user(optval, aes_ccm_128, sizeof(*aes_ccm_128)))
-			rc = -EFAULT;
-		break;
-	}
-	case TLS_CIPHER_CHACHA20_POLY1305: {
-		struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305 =
-			container_of(crypto_info,
-				struct tls12_crypto_info_chacha20_poly1305,
-				info);
-
-		if (len != sizeof(*chacha20_poly1305)) {
-			rc = -EINVAL;
-			goto out;
-		}
-		memcpy(chacha20_poly1305->iv,
-		       cctx->iv + TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE,
-		       TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE);
-		memcpy(chacha20_poly1305->rec_seq, cctx->rec_seq,
-		       TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE);
-		if (copy_to_user(optval, chacha20_poly1305,
-				sizeof(*chacha20_poly1305)))
-			rc = -EFAULT;
-		break;
-	}
-	case TLS_CIPHER_SM4_GCM: {
-		struct tls12_crypto_info_sm4_gcm *sm4_gcm_info =
-			container_of(crypto_info,
-				struct tls12_crypto_info_sm4_gcm, info);
-
-		if (len != sizeof(*sm4_gcm_info)) {
-			rc = -EINVAL;
-			goto out;
-		}
-		memcpy(sm4_gcm_info->iv,
-		       cctx->iv + TLS_CIPHER_SM4_GCM_SALT_SIZE,
-		       TLS_CIPHER_SM4_GCM_IV_SIZE);
-		memcpy(sm4_gcm_info->rec_seq, cctx->rec_seq,
-		       TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE);
-		if (copy_to_user(optval, sm4_gcm_info, sizeof(*sm4_gcm_info)))
-			rc = -EFAULT;
-		break;
-	}
-	case TLS_CIPHER_SM4_CCM: {
-		struct tls12_crypto_info_sm4_ccm *sm4_ccm_info =
-			container_of(crypto_info,
-				struct tls12_crypto_info_sm4_ccm, info);
-
-		if (len != sizeof(*sm4_ccm_info)) {
-			rc = -EINVAL;
-			goto out;
-		}
-		memcpy(sm4_ccm_info->iv,
-		       cctx->iv + TLS_CIPHER_SM4_CCM_SALT_SIZE,
-		       TLS_CIPHER_SM4_CCM_IV_SIZE);
-		memcpy(sm4_ccm_info->rec_seq, cctx->rec_seq,
-		       TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE);
-		if (copy_to_user(optval, sm4_ccm_info, sizeof(*sm4_ccm_info)))
-			rc = -EFAULT;
-		break;
-	}
-	case TLS_CIPHER_ARIA_GCM_128: {
-		struct tls12_crypto_info_aria_gcm_128 *
-		  crypto_info_aria_gcm_128 =
-		  container_of(crypto_info,
-			       struct tls12_crypto_info_aria_gcm_128,
-			       info);
-
-		if (len != sizeof(*crypto_info_aria_gcm_128)) {
-			rc = -EINVAL;
-			goto out;
-		}
-		memcpy(crypto_info_aria_gcm_128->iv,
-		       cctx->iv + TLS_CIPHER_ARIA_GCM_128_SALT_SIZE,
-		       TLS_CIPHER_ARIA_GCM_128_IV_SIZE);
-		memcpy(crypto_info_aria_gcm_128->rec_seq, cctx->rec_seq,
-		       TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE);
-		if (copy_to_user(optval,
-				 crypto_info_aria_gcm_128,
-				 sizeof(*crypto_info_aria_gcm_128)))
-			rc = -EFAULT;
-		break;
-	}
-	case TLS_CIPHER_ARIA_GCM_256: {
-		struct tls12_crypto_info_aria_gcm_256 *
-		  crypto_info_aria_gcm_256 =
-		  container_of(crypto_info,
-			       struct tls12_crypto_info_aria_gcm_256,
-			       info);
-
-		if (len != sizeof(*crypto_info_aria_gcm_256)) {
-			rc = -EINVAL;
-			goto out;
-		}
-		memcpy(crypto_info_aria_gcm_256->iv,
-		       cctx->iv + TLS_CIPHER_ARIA_GCM_256_SALT_SIZE,
-		       TLS_CIPHER_ARIA_GCM_256_IV_SIZE);
-		memcpy(crypto_info_aria_gcm_256->rec_seq, cctx->rec_seq,
-		       TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE);
-		if (copy_to_user(optval,
-				 crypto_info_aria_gcm_256,
-				 sizeof(*crypto_info_aria_gcm_256)))
-			rc = -EFAULT;
-		break;
-	}
-	default:
+	cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+	if (!cipher_desc || len != cipher_desc->crypto_info) {
 		rc = -EINVAL;
+		goto out;
 	}
 
+	memcpy(crypto_info_iv(crypto_info, cipher_desc),
+	       cctx->iv + cipher_desc->salt, cipher_desc->iv);
+	memcpy(crypto_info_rec_seq(crypto_info, cipher_desc),
+	       cctx->rec_seq, cipher_desc->rec_seq);
+
+	if (copy_to_user(optval, crypto_info, cipher_desc->crypto_info))
+		rc = -EFAULT;
+
 out:
 	return rc;
 }

From d9a6ca1a975839e61c0463a5e29dd8053fd3e8a4 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:20 +0200
Subject: [PATCH 1976/2047] tls: use tls_cipher_desc to get per-cipher sizes in
 tls_set_sw_offload

We can get rid of some local variables, but we have to keep nonce_size
because tls1.3 uses nonce_size = 0 for all ciphers.

We can also drop the runtime sanity checks on iv/rec_seq/tag size,
since we have compile time checks on those values.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/deed9c4430a62c31751a72b8c03ad66ffe710717.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_sw.c | 79 ++++++++++--------------------------------------
 1 file changed, 16 insertions(+), 63 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 5c122d7bb784..85708656dcd4 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2590,10 +2590,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	struct tls_sw_context_rx *sw_ctx_rx = NULL;
 	struct cipher_context *cctx;
 	struct crypto_aead **aead;
-	u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size;
 	struct crypto_tfm *tfm;
 	char *iv, *rec_seq, *key, *salt, *cipher_name;
-	size_t keysize;
+	const struct tls_cipher_desc *cipher_desc;
+	u16 nonce_size;
 	int rc = 0;
 
 	if (!ctx) {
@@ -2652,16 +2652,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
 
 		gcm_128_info = (void *)crypto_info;
-		nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
-		tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE;
-		iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
 		iv = gcm_128_info->iv;
-		rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE;
 		rec_seq = gcm_128_info->rec_seq;
-		keysize = TLS_CIPHER_AES_GCM_128_KEY_SIZE;
 		key = gcm_128_info->key;
 		salt = gcm_128_info->salt;
-		salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE;
 		cipher_name = "gcm(aes)";
 		break;
 	}
@@ -2669,16 +2663,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		struct tls12_crypto_info_aes_gcm_256 *gcm_256_info;
 
 		gcm_256_info = (void *)crypto_info;
-		nonce_size = TLS_CIPHER_AES_GCM_256_IV_SIZE;
-		tag_size = TLS_CIPHER_AES_GCM_256_TAG_SIZE;
-		iv_size = TLS_CIPHER_AES_GCM_256_IV_SIZE;
 		iv = gcm_256_info->iv;
-		rec_seq_size = TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE;
 		rec_seq = gcm_256_info->rec_seq;
-		keysize = TLS_CIPHER_AES_GCM_256_KEY_SIZE;
 		key = gcm_256_info->key;
 		salt = gcm_256_info->salt;
-		salt_size = TLS_CIPHER_AES_GCM_256_SALT_SIZE;
 		cipher_name = "gcm(aes)";
 		break;
 	}
@@ -2686,16 +2674,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		struct tls12_crypto_info_aes_ccm_128 *ccm_128_info;
 
 		ccm_128_info = (void *)crypto_info;
-		nonce_size = TLS_CIPHER_AES_CCM_128_IV_SIZE;
-		tag_size = TLS_CIPHER_AES_CCM_128_TAG_SIZE;
-		iv_size = TLS_CIPHER_AES_CCM_128_IV_SIZE;
 		iv = ccm_128_info->iv;
-		rec_seq_size = TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE;
 		rec_seq = ccm_128_info->rec_seq;
-		keysize = TLS_CIPHER_AES_CCM_128_KEY_SIZE;
 		key = ccm_128_info->key;
 		salt = ccm_128_info->salt;
-		salt_size = TLS_CIPHER_AES_CCM_128_SALT_SIZE;
 		cipher_name = "ccm(aes)";
 		break;
 	}
@@ -2703,16 +2685,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305_info;
 
 		chacha20_poly1305_info = (void *)crypto_info;
-		nonce_size = 0;
-		tag_size = TLS_CIPHER_CHACHA20_POLY1305_TAG_SIZE;
-		iv_size = TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE;
 		iv = chacha20_poly1305_info->iv;
-		rec_seq_size = TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE;
 		rec_seq = chacha20_poly1305_info->rec_seq;
-		keysize = TLS_CIPHER_CHACHA20_POLY1305_KEY_SIZE;
 		key = chacha20_poly1305_info->key;
 		salt = chacha20_poly1305_info->salt;
-		salt_size = TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE;
 		cipher_name = "rfc7539(chacha20,poly1305)";
 		break;
 	}
@@ -2720,16 +2696,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		struct tls12_crypto_info_sm4_gcm *sm4_gcm_info;
 
 		sm4_gcm_info = (void *)crypto_info;
-		nonce_size = TLS_CIPHER_SM4_GCM_IV_SIZE;
-		tag_size = TLS_CIPHER_SM4_GCM_TAG_SIZE;
-		iv_size = TLS_CIPHER_SM4_GCM_IV_SIZE;
 		iv = sm4_gcm_info->iv;
-		rec_seq_size = TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE;
 		rec_seq = sm4_gcm_info->rec_seq;
-		keysize = TLS_CIPHER_SM4_GCM_KEY_SIZE;
 		key = sm4_gcm_info->key;
 		salt = sm4_gcm_info->salt;
-		salt_size = TLS_CIPHER_SM4_GCM_SALT_SIZE;
 		cipher_name = "gcm(sm4)";
 		break;
 	}
@@ -2737,16 +2707,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		struct tls12_crypto_info_sm4_ccm *sm4_ccm_info;
 
 		sm4_ccm_info = (void *)crypto_info;
-		nonce_size = TLS_CIPHER_SM4_CCM_IV_SIZE;
-		tag_size = TLS_CIPHER_SM4_CCM_TAG_SIZE;
-		iv_size = TLS_CIPHER_SM4_CCM_IV_SIZE;
 		iv = sm4_ccm_info->iv;
-		rec_seq_size = TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE;
 		rec_seq = sm4_ccm_info->rec_seq;
-		keysize = TLS_CIPHER_SM4_CCM_KEY_SIZE;
 		key = sm4_ccm_info->key;
 		salt = sm4_ccm_info->salt;
-		salt_size = TLS_CIPHER_SM4_CCM_SALT_SIZE;
 		cipher_name = "ccm(sm4)";
 		break;
 	}
@@ -2754,16 +2718,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		struct tls12_crypto_info_aria_gcm_128 *aria_gcm_128_info;
 
 		aria_gcm_128_info = (void *)crypto_info;
-		nonce_size = TLS_CIPHER_ARIA_GCM_128_IV_SIZE;
-		tag_size = TLS_CIPHER_ARIA_GCM_128_TAG_SIZE;
-		iv_size = TLS_CIPHER_ARIA_GCM_128_IV_SIZE;
 		iv = aria_gcm_128_info->iv;
-		rec_seq_size = TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE;
 		rec_seq = aria_gcm_128_info->rec_seq;
-		keysize = TLS_CIPHER_ARIA_GCM_128_KEY_SIZE;
 		key = aria_gcm_128_info->key;
 		salt = aria_gcm_128_info->salt;
-		salt_size = TLS_CIPHER_ARIA_GCM_128_SALT_SIZE;
 		cipher_name = "gcm(aria)";
 		break;
 	}
@@ -2771,16 +2729,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		struct tls12_crypto_info_aria_gcm_256 *gcm_256_info;
 
 		gcm_256_info = (void *)crypto_info;
-		nonce_size = TLS_CIPHER_ARIA_GCM_256_IV_SIZE;
-		tag_size = TLS_CIPHER_ARIA_GCM_256_TAG_SIZE;
-		iv_size = TLS_CIPHER_ARIA_GCM_256_IV_SIZE;
 		iv = gcm_256_info->iv;
-		rec_seq_size = TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE;
 		rec_seq = gcm_256_info->rec_seq;
-		keysize = TLS_CIPHER_ARIA_GCM_256_KEY_SIZE;
 		key = gcm_256_info->key;
 		salt = gcm_256_info->salt;
-		salt_size = TLS_CIPHER_ARIA_GCM_256_SALT_SIZE;
 		cipher_name = "gcm(aria)";
 		break;
 	}
@@ -2789,6 +2741,9 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		goto free_priv;
 	}
 
+	cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+	nonce_size = cipher_desc->nonce;
+
 	if (crypto_info->version == TLS_1_3_VERSION) {
 		nonce_size = 0;
 		prot->aad_size = TLS_HEADER_SIZE;
@@ -2799,9 +2754,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	}
 
 	/* Sanity-check the sizes for stack allocations. */
-	if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE ||
-	    rec_seq_size > TLS_MAX_REC_SEQ_SIZE || tag_size != TLS_TAG_SIZE ||
-	    prot->aad_size > TLS_MAX_AAD_SIZE) {
+	if (nonce_size > MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) {
 		rc = -EINVAL;
 		goto free_priv;
 	}
@@ -2809,21 +2762,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	prot->version = crypto_info->version;
 	prot->cipher_type = crypto_info->cipher_type;
 	prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
-	prot->tag_size = tag_size;
+	prot->tag_size = cipher_desc->tag;
 	prot->overhead_size = prot->prepend_size +
 			      prot->tag_size + prot->tail_size;
-	prot->iv_size = iv_size;
-	prot->salt_size = salt_size;
-	cctx->iv = kmalloc(iv_size + salt_size, GFP_KERNEL);
+	prot->iv_size = cipher_desc->iv;
+	prot->salt_size = cipher_desc->salt;
+	cctx->iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL);
 	if (!cctx->iv) {
 		rc = -ENOMEM;
 		goto free_priv;
 	}
 	/* Note: 128 & 256 bit salt are the same size */
-	prot->rec_seq_size = rec_seq_size;
-	memcpy(cctx->iv, salt, salt_size);
-	memcpy(cctx->iv + salt_size, iv, iv_size);
-	cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
+	prot->rec_seq_size = cipher_desc->rec_seq;
+	memcpy(cctx->iv, salt, cipher_desc->salt);
+	memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv);
+
+	cctx->rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL);
 	if (!cctx->rec_seq) {
 		rc = -ENOMEM;
 		goto free_iv;
@@ -2840,8 +2794,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 
 	ctx->push_pending_record = tls_sw_push_pending_record;
 
-	rc = crypto_aead_setkey(*aead, key, keysize);
-
+	rc = crypto_aead_setkey(*aead, key, cipher_desc->key);
 	if (rc)
 		goto free_aead;
 

From 48dfad27fd4045df82eee54186c2513cbfc54c1d Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:21 +0200
Subject: [PATCH 1977/2047] tls: use tls_cipher_desc to access per-cipher
 crypto_info in tls_set_sw_offload

The crypto_info_* helpers allow us to fetch pointers into the
per-cipher crypto_info's data.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/c23af110caf0af6b68de2f86c58064913e2e902a.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_sw.c | 89 +++++++-----------------------------------------
 1 file changed, 13 insertions(+), 76 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 85708656dcd4..9c18ddf0d568 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2648,94 +2648,26 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	}
 
 	switch (crypto_info->cipher_type) {
-	case TLS_CIPHER_AES_GCM_128: {
-		struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
-
-		gcm_128_info = (void *)crypto_info;
-		iv = gcm_128_info->iv;
-		rec_seq = gcm_128_info->rec_seq;
-		key = gcm_128_info->key;
-		salt = gcm_128_info->salt;
+	case TLS_CIPHER_AES_GCM_128:
+	case TLS_CIPHER_AES_GCM_256:
 		cipher_name = "gcm(aes)";
 		break;
-	}
-	case TLS_CIPHER_AES_GCM_256: {
-		struct tls12_crypto_info_aes_gcm_256 *gcm_256_info;
-
-		gcm_256_info = (void *)crypto_info;
-		iv = gcm_256_info->iv;
-		rec_seq = gcm_256_info->rec_seq;
-		key = gcm_256_info->key;
-		salt = gcm_256_info->salt;
-		cipher_name = "gcm(aes)";
-		break;
-	}
-	case TLS_CIPHER_AES_CCM_128: {
-		struct tls12_crypto_info_aes_ccm_128 *ccm_128_info;
-
-		ccm_128_info = (void *)crypto_info;
-		iv = ccm_128_info->iv;
-		rec_seq = ccm_128_info->rec_seq;
-		key = ccm_128_info->key;
-		salt = ccm_128_info->salt;
+	case TLS_CIPHER_AES_CCM_128:
 		cipher_name = "ccm(aes)";
 		break;
-	}
-	case TLS_CIPHER_CHACHA20_POLY1305: {
-		struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305_info;
-
-		chacha20_poly1305_info = (void *)crypto_info;
-		iv = chacha20_poly1305_info->iv;
-		rec_seq = chacha20_poly1305_info->rec_seq;
-		key = chacha20_poly1305_info->key;
-		salt = chacha20_poly1305_info->salt;
+	case TLS_CIPHER_CHACHA20_POLY1305:
 		cipher_name = "rfc7539(chacha20,poly1305)";
 		break;
-	}
-	case TLS_CIPHER_SM4_GCM: {
-		struct tls12_crypto_info_sm4_gcm *sm4_gcm_info;
-
-		sm4_gcm_info = (void *)crypto_info;
-		iv = sm4_gcm_info->iv;
-		rec_seq = sm4_gcm_info->rec_seq;
-		key = sm4_gcm_info->key;
-		salt = sm4_gcm_info->salt;
+	case TLS_CIPHER_SM4_GCM:
 		cipher_name = "gcm(sm4)";
 		break;
-	}
-	case TLS_CIPHER_SM4_CCM: {
-		struct tls12_crypto_info_sm4_ccm *sm4_ccm_info;
-
-		sm4_ccm_info = (void *)crypto_info;
-		iv = sm4_ccm_info->iv;
-		rec_seq = sm4_ccm_info->rec_seq;
-		key = sm4_ccm_info->key;
-		salt = sm4_ccm_info->salt;
+	case TLS_CIPHER_SM4_CCM:
 		cipher_name = "ccm(sm4)";
 		break;
-	}
-	case TLS_CIPHER_ARIA_GCM_128: {
-		struct tls12_crypto_info_aria_gcm_128 *aria_gcm_128_info;
-
-		aria_gcm_128_info = (void *)crypto_info;
-		iv = aria_gcm_128_info->iv;
-		rec_seq = aria_gcm_128_info->rec_seq;
-		key = aria_gcm_128_info->key;
-		salt = aria_gcm_128_info->salt;
+	case TLS_CIPHER_ARIA_GCM_128:
+	case TLS_CIPHER_ARIA_GCM_256:
 		cipher_name = "gcm(aria)";
 		break;
-	}
-	case TLS_CIPHER_ARIA_GCM_256: {
-		struct tls12_crypto_info_aria_gcm_256 *gcm_256_info;
-
-		gcm_256_info = (void *)crypto_info;
-		iv = gcm_256_info->iv;
-		rec_seq = gcm_256_info->rec_seq;
-		key = gcm_256_info->key;
-		salt = gcm_256_info->salt;
-		cipher_name = "gcm(aria)";
-		break;
-	}
 	default:
 		rc = -EINVAL;
 		goto free_priv;
@@ -2744,6 +2676,11 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	cipher_desc = get_cipher_desc(crypto_info->cipher_type);
 	nonce_size = cipher_desc->nonce;
 
+	iv = crypto_info_iv(crypto_info, cipher_desc);
+	key = crypto_info_key(crypto_info, cipher_desc);
+	salt = crypto_info_salt(crypto_info, cipher_desc);
+	rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc);
+
 	if (crypto_info->version == TLS_1_3_VERSION) {
 		nonce_size = 0;
 		prot->aad_size = TLS_HEADER_SIZE;

From f3e444e31f9fa45ad59cc2d0e0c1bcc86eef4780 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 25 Aug 2023 23:35:22 +0200
Subject: [PATCH 1978/2047] tls: get cipher_name from cipher_desc in
 tls_set_sw_offload

tls_cipher_desc also contains the algorithm name needed by
crypto_alloc_aead, use it.

Finally, use get_cipher_desc to check if the cipher_type coming from
userspace is valid, and remove the cipher_type switch.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://lore.kernel.org/r/53d021d80138aa125a9cef4468aa5ce531975a7b.1692977948.git.sd@queasysnail.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_sw.c | 29 ++++-------------------------
 1 file changed, 4 insertions(+), 25 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9c18ddf0d568..1ed4a611631f 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2591,7 +2591,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	struct cipher_context *cctx;
 	struct crypto_aead **aead;
 	struct crypto_tfm *tfm;
-	char *iv, *rec_seq, *key, *salt, *cipher_name;
+	char *iv, *rec_seq, *key, *salt;
 	const struct tls_cipher_desc *cipher_desc;
 	u16 nonce_size;
 	int rc = 0;
@@ -2647,33 +2647,12 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		aead = &sw_ctx_rx->aead_recv;
 	}
 
-	switch (crypto_info->cipher_type) {
-	case TLS_CIPHER_AES_GCM_128:
-	case TLS_CIPHER_AES_GCM_256:
-		cipher_name = "gcm(aes)";
-		break;
-	case TLS_CIPHER_AES_CCM_128:
-		cipher_name = "ccm(aes)";
-		break;
-	case TLS_CIPHER_CHACHA20_POLY1305:
-		cipher_name = "rfc7539(chacha20,poly1305)";
-		break;
-	case TLS_CIPHER_SM4_GCM:
-		cipher_name = "gcm(sm4)";
-		break;
-	case TLS_CIPHER_SM4_CCM:
-		cipher_name = "ccm(sm4)";
-		break;
-	case TLS_CIPHER_ARIA_GCM_128:
-	case TLS_CIPHER_ARIA_GCM_256:
-		cipher_name = "gcm(aria)";
-		break;
-	default:
+	cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+	if (!cipher_desc) {
 		rc = -EINVAL;
 		goto free_priv;
 	}
 
-	cipher_desc = get_cipher_desc(crypto_info->cipher_type);
 	nonce_size = cipher_desc->nonce;
 
 	iv = crypto_info_iv(crypto_info, cipher_desc);
@@ -2721,7 +2700,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	}
 
 	if (!*aead) {
-		*aead = crypto_alloc_aead(cipher_name, 0, 0);
+		*aead = crypto_alloc_aead(cipher_desc->cipher_name, 0, 0);
 		if (IS_ERR(*aead)) {
 			rc = PTR_ERR(*aead);
 			*aead = NULL;

From c639a708a0b8b69b7d580292a8f0405124488bad Mon Sep 17 00:00:00 2001
From: Pranavi Somisetty <pranavi.somisetty@amd.com>
Date: Thu, 24 Aug 2023 17:14:56 +0530
Subject: [PATCH 1979/2047] dt-bindings: net: xilinx_gmii2rgmii: Convert to
 json schema

Convert the Xilinx GMII to RGMII Converter device tree binding
documentation to json schema.
This converter is usually used as gem <---> gmii2rgmii <---> external phy
and, it's phy-handle should point to the phandle of the external phy.

Signed-off-by: Pranavi Somisetty <pranavi.somisetty@amd.com>
Signed-off-by: Harini Katakam <harini.katakam@amd.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/xilinx_gmii2rgmii.txt        | 35 ------------
 .../bindings/net/xlnx,gmii-to-rgmii.yaml      | 55 +++++++++++++++++++
 2 files changed, 55 insertions(+), 35 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt
 create mode 100644 Documentation/devicetree/bindings/net/xlnx,gmii-to-rgmii.yaml

diff --git a/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt b/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt
deleted file mode 100644
index 038dda48b8e6..000000000000
--- a/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-XILINX GMIITORGMII Converter Driver Device Tree Bindings
---------------------------------------------------------
-
-The Gigabit Media Independent Interface (GMII) to Reduced Gigabit Media
-Independent Interface (RGMII) core provides the RGMII between RGMII-compliant
-Ethernet physical media devices (PHY) and the Gigabit Ethernet controller.
-This core can be used in all three modes of operation(10/100/1000 Mb/s).
-The Management Data Input/Output (MDIO) interface is used to configure the
-Speed of operation. This core can switch dynamically between the three
-Different speed modes by configuring the conveter register through mdio write.
-
-This converter sits between the ethernet MAC and the external phy.
-MAC <==> GMII2RGMII <==> RGMII_PHY
-
-For more details about mdio please refer phy.txt file in the same directory.
-
-Required properties:
-- compatible	: Should be "xlnx,gmii-to-rgmii-1.0"
-- reg		: The ID number for the phy, usually a small integer
-- phy-handle	: Should point to the external phy device.
-		  See ethernet.txt file in the same directory.
-
-Example:
-	mdio {
-		#address-cells = <1>;
-		#size-cells = <0>;
-		phy: ethernet-phy@0 {
-			......
-		};
-		gmiitorgmii: gmiitorgmii@8 {
-			compatible = "xlnx,gmii-to-rgmii-1.0";
-			reg = <8>;
-			phy-handle = <&phy>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/net/xlnx,gmii-to-rgmii.yaml b/Documentation/devicetree/bindings/net/xlnx,gmii-to-rgmii.yaml
new file mode 100644
index 000000000000..0f781dac6717
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/xlnx,gmii-to-rgmii.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/xlnx,gmii-to-rgmii.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx GMII to RGMII Converter
+
+maintainers:
+  - Harini Katakam <harini.katakam@amd.com>
+
+description:
+  The Gigabit Media Independent Interface (GMII) to Reduced Gigabit Media
+  Independent Interface (RGMII) core provides the RGMII between RGMII-compliant
+  ethernet physical media devices (PHY) and the Gigabit Ethernet controller.
+  This core can be used in all three modes of operation(10/100/1000 Mb/s).
+  The Management Data Input/Output (MDIO) interface is used to configure the
+  speed of operation. This core can switch dynamically between the three
+  different speed modes by configuring the converter register through mdio write.
+  The core cannot function without an external phy connected to it.
+
+properties:
+  compatible:
+    const: xlnx,gmii-to-rgmii-1.0
+
+  reg:
+    minimum: 0
+    maximum: 31
+    description: The ID number for the phy.
+
+  phy-handle:
+    $ref: ethernet-controller.yaml#/properties/phy-handle
+
+required:
+  - compatible
+  - reg
+  - phy-handle
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    mdio {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        phy: ethernet-phy@0 {
+            reg = <0>;
+        };
+        gmiitorgmii@8 {
+            compatible = "xlnx,gmii-to-rgmii-1.0";
+            reg = <8>;
+            phy-handle = <&phy>;
+        };
+    };

From c2f8fd7949603efb03908e05abbf7726748c8de3 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 24 Aug 2023 09:50:59 -0700
Subject: [PATCH 1980/2047] netrom: Deny concurrent connect().

syzkaller reported null-ptr-deref [0] related to AF_NETROM.
This is another self-accept issue from the strace log. [1]

syz-executor creates an AF_NETROM socket and calls connect(), which
is blocked at that time.  Then, sk->sk_state is TCP_SYN_SENT and
sock->state is SS_CONNECTING.

  [pid  5059] socket(AF_NETROM, SOCK_SEQPACKET, 0) = 4
  [pid  5059] connect(4, {sa_family=AF_NETROM, sa_data="..." <unfinished ...>

Another thread calls connect() concurrently, which finally fails
with -EINVAL.  However, the problem here is the socket state is
reset even while the first connect() is blocked.

  [pid  5060] connect(4, NULL, 0 <unfinished ...>
  [pid  5060] <... connect resumed>)      = -1 EINVAL (Invalid argument)

As sk->state is TCP_CLOSE and sock->state is SS_UNCONNECTED, the
following listen() succeeds.  Then, the first connect() looks up
itself as a listener and puts skb into the queue with skb->sk itself.
As a result, the next accept() gets another FD of itself as 3, and
the first connect() finishes.

  [pid  5060] listen(4, 0 <unfinished ...>
  [pid  5060] <... listen resumed>)       = 0
  [pid  5060] accept(4, NULL, NULL <unfinished ...>
  [pid  5060] <... accept resumed>)       = 3
  [pid  5059] <... connect resumed>)      = 0

Then, accept4() is called but blocked, which causes the general protection
fault later.

  [pid  5059] accept4(4, NULL, 0x20000400, SOCK_NONBLOCK <unfinished ...>

After that, another self-accept occurs by accept() and writev().

  [pid  5060] accept(4, NULL, NULL <unfinished ...>
  [pid  5061] writev(3, [{iov_base=...}] <unfinished ...>
  [pid  5061] <... writev resumed>)       = 99
  [pid  5060] <... accept resumed>)       = 6

Finally, the leader thread close()s all FDs.  Since the three FDs
reference the same socket, nr_release() does the cleanup for it
three times, and the remaining accept4() causes the following fault.

  [pid  5058] close(3)                    = 0
  [pid  5058] close(4)                    = 0
  [pid  5058] close(5)                    = -1 EBADF (Bad file descriptor)
  [pid  5058] close(6)                    = 0
  [pid  5058] <... exit_group resumed>)   = ?
  [   83.456055][ T5059] general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN

To avoid the issue, we need to return an error for connect() if
another connect() is in progress, as done in __inet_stream_connect().

[0]:
general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f]
CPU: 0 PID: 5059 Comm: syz-executor.0 Not tainted 6.5.0-rc5-syzkaller-00194-gace0ab3a4b54 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
RIP: 0010:__lock_acquire+0x109/0x5de0 kernel/locking/lockdep.c:5012
Code: 45 85 c9 0f 84 cc 0e 00 00 44 8b 05 11 6e 23 0b 45 85 c0 0f 84 be 0d 00 00 48 ba 00 00 00 00 00 fc ff df 4c 89 d1 48 c1 e9 03 <80> 3c 11 00 0f 85 e8 40 00 00 49 81 3a a0 69 48 90 0f 84 96 0d 00
RSP: 0018:ffffc90003d6f9e0 EFLAGS: 00010006
RAX: ffff8880244c8000 RBX: 1ffff920007adf6c RCX: 0000000000000003
RDX: dffffc0000000000 RSI: 0000000000000000 RDI: 0000000000000018
RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000001
R10: 0000000000000018 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
FS:  00007f51d519a6c0(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f51d5158d58 CR3: 000000002943f000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 lock_acquire kernel/locking/lockdep.c:5761 [inline]
 lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726
 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
 _raw_spin_lock_irqsave+0x3a/0x50 kernel/locking/spinlock.c:162
 prepare_to_wait+0x47/0x380 kernel/sched/wait.c:269
 nr_accept+0x20d/0x650 net/netrom/af_netrom.c:798
 do_accept+0x3a6/0x570 net/socket.c:1872
 __sys_accept4_file net/socket.c:1913 [inline]
 __sys_accept4+0x99/0x120 net/socket.c:1943
 __do_sys_accept4 net/socket.c:1954 [inline]
 __se_sys_accept4 net/socket.c:1951 [inline]
 __x64_sys_accept4+0x96/0x100 net/socket.c:1951
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7f51d447cae9
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f51d519a0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000120
RAX: ffffffffffffffda RBX: 00007f51d459bf80 RCX: 00007f51d447cae9
RDX: 0000000020000400 RSI: 0000000000000000 RDI: 0000000000000004
RBP: 00007f51d44c847a R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000800 R11: 0000000000000246 R12: 0000000000000000
R13: 000000000000000b R14: 00007f51d459bf80 R15: 00007ffc25c34e48
 </TASK>

Link: https://syzkaller.appspot.com/text?tag=CrashLog&x=152cdb63a80000 [1]
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+666c97e4686410e79649@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=666c97e4686410e79649
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netrom/af_netrom.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index eb8ccbd58df7..96e91ab71573 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -660,6 +660,11 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 		goto out_release;
 	}
 
+	if (sock->state == SS_CONNECTING) {
+		err = -EALREADY;
+		goto out_release;
+	}
+
 	sk->sk_state   = TCP_CLOSE;
 	sock->state = SS_UNCONNECTED;
 

From bb5ed01cd2428cd25b1c88a3a9cba87055eb289f Mon Sep 17 00:00:00 2001
From: Radoslaw Tyl <radoslawx.tyl@intel.com>
Date: Thu, 24 Aug 2023 13:46:19 -0700
Subject: [PATCH 1981/2047] igb: set max size RX buffer when store bad packet
 is enabled

Increase the RX buffer size to 3K when the SBP bit is on. The size of
the RX buffer determines the number of pages allocated which may not
be sufficient for receive frames larger than the set MTU size.

Cc: stable@vger.kernel.org
Fixes: 89eaefb61dc9 ("igb: Support RX-ALL feature flag.")
Reported-by: Manfred Rudigier <manfred.rudigier@omicronenergy.com>
Signed-off-by: Radoslaw Tyl <radoslawx.tyl@intel.com>
Tested-by: Arpana Arland <arpanax.arland@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 9a2561409b06..08e3df37089f 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -4814,6 +4814,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
 static void igb_set_rx_buffer_len(struct igb_adapter *adapter,
 				  struct igb_ring *rx_ring)
 {
+#if (PAGE_SIZE < 8192)
+	struct e1000_hw *hw = &adapter->hw;
+#endif
+
 	/* set build_skb and buffer size flags */
 	clear_ring_build_skb_enabled(rx_ring);
 	clear_ring_uses_large_buffer(rx_ring);
@@ -4824,10 +4828,9 @@ static void igb_set_rx_buffer_len(struct igb_adapter *adapter,
 	set_ring_build_skb_enabled(rx_ring);
 
 #if (PAGE_SIZE < 8192)
-	if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB)
-		return;
-
-	set_ring_uses_large_buffer(rx_ring);
+	if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB ||
+	    rd32(E1000_RCTL) & E1000_RCTL_SBP)
+		set_ring_uses_large_buffer(rx_ring);
 #endif
 }
 

From a3aa97be69a7cc14ddc2bb0add0b9c51cb74bf83 Mon Sep 17 00:00:00 2001
From: Sumit Gupta <sumitg@nvidia.com>
Date: Fri, 25 Aug 2023 16:49:20 +0530
Subject: [PATCH 1982/2047] cpufreq: tegra194: add online/offline hooks

Implement the light-weight tear down and bring up helpers to reduce the
amount of work to do on CPU offline/online operation.
This change helps to make the hotplugging paths much faster.

Suggested-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
Link: https://lore.kernel.org/lkml/20230816033402.3abmugb5goypvllm@vireshk-i7/
[ Viresh: Fixed rebase conflict ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/tegra194-cpufreq.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
index c90b30469165..88df7e5a0573 100644
--- a/drivers/cpufreq/tegra194-cpufreq.c
+++ b/drivers/cpufreq/tegra194-cpufreq.c
@@ -508,6 +508,21 @@ static int tegra194_cpufreq_init(struct cpufreq_policy *policy)
 	return 0;
 }
 
+static int tegra194_cpufreq_online(struct cpufreq_policy *policy)
+{
+	/* We did light-weight tear down earlier, nothing to do here */
+	return 0;
+}
+
+static int tegra194_cpufreq_offline(struct cpufreq_policy *policy)
+{
+	/*
+	 * Preserve policy->driver_data and don't free resources on light-weight
+	 * tear down.
+	 */
+	return 0;
+}
+
 static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy,
 				       unsigned int index)
 {
@@ -535,6 +550,8 @@ static struct cpufreq_driver tegra194_cpufreq_driver = {
 	.target_index = tegra194_cpufreq_set_target,
 	.get = tegra194_get_speed,
 	.init = tegra194_cpufreq_init,
+	.online = tegra194_cpufreq_online,
+	.offline = tegra194_cpufreq_offline,
 	.attr = cpufreq_generic_attr,
 };
 

From 03997da042dac73c69e60d91942c727c76828b65 Mon Sep 17 00:00:00 2001
From: Liao Chang <liaochang1@huawei.com>
Date: Sat, 26 Aug 2023 09:51:13 +0000
Subject: [PATCH 1983/2047] cpufreq: powernow-k8: Use related_cpus instead of
 cpus in driver.exit()

Since the 'cpus' field of policy structure will become empty in the
cpufreq core API, it is better to use 'related_cpus' in the exit()
callback of driver.

Fixes: c3274763bfc3 ("cpufreq: powernow-k8: Initialize per-cpu data-structures properly")
Signed-off-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/powernow-k8.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index d289036beff2..b10f7a1b77f1 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -1101,7 +1101,8 @@ static int powernowk8_cpu_exit(struct cpufreq_policy *pol)
 
 	kfree(data->powernow_table);
 	kfree(data);
-	for_each_cpu(cpu, pol->cpus)
+	/* pol->cpus will be empty here, use related_cpus instead. */
+	for_each_cpu(cpu, pol->related_cpus)
 		per_cpu(powernow_data, cpu) = NULL;
 
 	return 0;

From 72dd7e427e16fca901daaa399e4b0893c3bb85c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= <alexis.lothore@bootlin.com>
Date: Fri, 25 Aug 2023 10:20:27 +0200
Subject: [PATCH 1984/2047] dt-bindings: net: dsa: marvell: fix wrong model in
 compatibility list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix wrong switch name in compatibility list. 88E6163 switch does not exist
and is in fact 88E6361

Fixes: 9229a9483d80 ("dt-bindings: net: dsa: marvell: add MV88E6361 switch to compatibility list")
Signed-off-by: Alexis Lothoré <alexis.lothore@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/marvell.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt
index 33726134f5c9..6ec0c181b6db 100644
--- a/Documentation/devicetree/bindings/net/dsa/marvell.txt
+++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt
@@ -20,7 +20,7 @@ which is at a different MDIO base address in different switch families.
 			  6171, 6172, 6175, 6176, 6185, 6240, 6320, 6321,
 			  6341, 6350, 6351, 6352
 - "marvell,mv88e6190"	: Switch has base address 0x00. Use with models:
-			  6163, 6190, 6190X, 6191, 6290, 6390, 6390X
+			  6190, 6190X, 6191, 6290, 6361, 6390, 6390X
 - "marvell,mv88e6250"	: Switch has base address 0x08 or 0x18. Use with model:
 			  6220, 6250
 

From 5266733c798652925aaf556aacc3f3389abc9d32 Mon Sep 17 00:00:00 2001
From: Hariprasad Kelam <hkelam@marvell.com>
Date: Fri, 25 Aug 2023 16:10:19 +0530
Subject: [PATCH 1985/2047] octeontx2-af: CN10KB: Add USGMII LMAC mode

Upon physical link change, firmware reports to the kernel about the
change along with the details like speed, lmac_type_id, etc.
Kernel derives lmac_type based on lmac_type_id received from firmware.

This patch extends current lmac list with new USGMII mode supported
by CN10KB RPM block.

Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 1 +
 drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 988383e20bb8..9392ef95a245 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -55,6 +55,7 @@ static const char *cgx_lmactype_string[LMAC_MODE_MAX] = {
 	[LMAC_MODE_50G_R] = "50G_R",
 	[LMAC_MODE_100G_R] = "100G_R",
 	[LMAC_MODE_USXGMII] = "USXGMII",
+	[LMAC_MODE_USGMII] = "USGMII",
 };
 
 /* CGX PHY management internal APIs */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index 574114179688..6f7d1dee5830 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -110,6 +110,7 @@ enum LMAC_TYPE {
 	LMAC_MODE_50G_R		= 8,
 	LMAC_MODE_100G_R	= 9,
 	LMAC_MODE_USXGMII	= 10,
+	LMAC_MODE_USGMII	= 11,
 	LMAC_MODE_MAX,
 };
 

From f027fd51edebb326c1edb5d283ceaa7b8f7508df Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@marvell.com>
Date: Fri, 25 Aug 2023 16:10:20 +0530
Subject: [PATCH 1986/2047] octeontx2-af: Don't treat lack of CGX interfaces as
 error

Don't treat lack of CGX LMACs on the system as a error.
Instead ignore it so that LBK VFs are created and can be used.

Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index b3f766b970ca..4e3aec7bdbee 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -345,7 +345,7 @@ int rvu_cgx_init(struct rvu *rvu)
 	rvu->cgx_cnt_max = cgx_get_cgxcnt_max();
 	if (!rvu->cgx_cnt_max) {
 		dev_info(rvu->dev, "No CGX devices found!\n");
-		return -ENODEV;
+		return 0;
 	}
 
 	rvu->cgx_idmap = devm_kzalloc(rvu->dev, rvu->cgx_cnt_max *

From 2f387525d484c0eca841b71842e1cd672220c20c Mon Sep 17 00:00:00 2001
From: Hariprasad Kelam <hkelam@marvell.com>
Date: Fri, 25 Aug 2023 16:10:21 +0530
Subject: [PATCH 1987/2047] octeontx2-af: Add validation of lmac

With the addition of new MAC blocks like CN10K RPM and CN10KB
RPM_USX, LMACs are noncontiguous. Though in most of the functions,
lmac validation checks exist but in few functions they are missing.
This patch adds the same.

Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 9392ef95a245..e06f77ad6106 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -232,6 +232,9 @@ int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr)
 	int index, id;
 	u64 cfg;
 
+	if (!lmac)
+		return -ENODEV;
+
 	/* access mac_ops to know csr_offset */
 	mac_ops = cgx_dev->mac_ops;
 
@@ -550,15 +553,16 @@ void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable)
 {
 	struct cgx *cgx = cgx_get_pdata(cgx_id);
 	struct lmac *lmac = lmac_pdata(lmac_id, cgx);
-	u16 max_dmac = lmac->mac_to_index_bmap.max;
 	struct mac_ops *mac_ops;
+	u16 max_dmac;
 	int index, i;
 	u64 cfg = 0;
 	int id;
 
-	if (!cgx)
+	if (!cgx || !lmac)
 		return;
 
+	max_dmac = lmac->mac_to_index_bmap.max;
 	id = get_sequence_id_of_lmac(cgx, lmac_id);
 
 	mac_ops = cgx->mac_ops;
@@ -731,7 +735,7 @@ int cgx_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp)
 	int corr_reg, uncorr_reg;
 	struct cgx *cgx = cgxd;
 
-	if (!cgx || lmac_id >= cgx->lmac_count)
+	if (!is_lmac_valid(cgx, lmac_id))
 		return -ENODEV;
 
 	if (cgx->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_NONE)

From 17d1368f4f19eec2047c7527e7e606496481f1bf Mon Sep 17 00:00:00 2001
From: Hariprasad Kelam <hkelam@marvell.com>
Date: Fri, 25 Aug 2023 16:10:22 +0530
Subject: [PATCH 1988/2047] octeontx2-af: print error message incase of invalid
 pf mapping

During AF driver initialization, it creates a mapping between pf to
cgx,lmac pair. Whenever there is a physical link change, using this
mapping driver forwards the message to the associated netdev.

This patch prints error message incase of cgx,lmac pair is not
associated with any pf netdev.

Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index 4e3aec7bdbee..f2b1edf1bb43 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -236,6 +236,11 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu)
 
 	linfo = &event->link_uinfo;
 	pfmap = cgxlmac_to_pfmap(rvu, event->cgx_id, event->lmac_id);
+	if (!pfmap) {
+		dev_err(rvu->dev, "CGX port%d:%d not mapped with PF\n",
+			event->cgx_id, event->lmac_id);
+		return;
+	}
 
 	do {
 		pfid = find_first_bit(&pfmap,

From 977ad86c2a1bcaf58f01ab98df5cc145083c489c Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 25 Aug 2023 15:32:41 +0200
Subject: [PATCH 1989/2047] dccp: Fix out of bounds access in DCCP error
 handler

There was a previous attempt to fix an out-of-bounds access in the DCCP
error handlers, but that fix assumed that the error handlers only want
to access the first 8 bytes of the DCCP header. Actually, they also look
at the DCCP sequence number, which is stored beyond 8 bytes, so an
explicit pskb_may_pull() is required.

Fixes: 6706a97fec96 ("dccp: fix out of bound access in dccp_v4_err()")
Fixes: 1aa9d1a0e7ee ("ipv6: dccp: fix out of bound access in dccp_v6_err()")
Cc: stable@vger.kernel.org
Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c | 13 +++++++++----
 net/dccp/ipv6.c | 15 ++++++++++-----
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index a545ad71201c..a5361fb7a415 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -255,12 +255,17 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
 	int err;
 	struct net *net = dev_net(skb->dev);
 
-	/* Only need dccph_dport & dccph_sport which are the first
-	 * 4 bytes in dccp header.
+	/* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
+	 * which is in byte 7 of the dccp header.
 	 * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
+	 *
+	 * Later on, we want to access the sequence number fields, which are
+	 * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
 	 */
-	BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
-	BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+	dh = (struct dccp_hdr *)(skb->data + offset);
+	if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
+		return -EINVAL;
+	iph = (struct iphdr *)skb->data;
 	dh = (struct dccp_hdr *)(skb->data + offset);
 
 	sk = __inet_lookup_established(net, &dccp_hashinfo,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d29d1163203d..25816e790527 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -74,7 +74,7 @@ static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
 static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			u8 type, u8 code, int offset, __be32 info)
 {
-	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+	const struct ipv6hdr *hdr;
 	const struct dccp_hdr *dh;
 	struct dccp_sock *dp;
 	struct ipv6_pinfo *np;
@@ -83,12 +83,17 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	__u64 seq;
 	struct net *net = dev_net(skb->dev);
 
-	/* Only need dccph_dport & dccph_sport which are the first
-	 * 4 bytes in dccp header.
+	/* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
+	 * which is in byte 7 of the dccp header.
 	 * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
+	 *
+	 * Later on, we want to access the sequence number fields, which are
+	 * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
 	 */
-	BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
-	BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+	dh = (struct dccp_hdr *)(skb->data + offset);
+	if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
+		return -EINVAL;
+	hdr = (const struct ipv6hdr *)skb->data;
 	dh = (struct dccp_hdr *)(skb->data + offset);
 
 	sk = __inet6_lookup_established(net, &dccp_hashinfo,

From 28d18b673ffa2d13112ddb6e4c32c60d9b0cda50 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 25 Aug 2023 15:49:45 +0200
Subject: [PATCH 1990/2047] net: Fix skb consume leak in sch_handle_egress

Fix a memory leak for the tc egress path with TC_ACT_{STOLEN,QUEUED,TRAP}:

  [...]
  unreferenced object 0xffff88818bcb4f00 (size 232):
  comm "softirq", pid 0, jiffies 4299085078 (age 134.028s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 80 70 61 81 88 ff ff 00 41 31 14 81 88 ff ff  ..pa.....A1.....
  backtrace:
    [<ffffffff9991b938>] kmem_cache_alloc_node+0x268/0x400
    [<ffffffff9b3d9231>] __alloc_skb+0x211/0x2c0
    [<ffffffff9b3f0c7e>] alloc_skb_with_frags+0xbe/0x6b0
    [<ffffffff9b3bf9a9>] sock_alloc_send_pskb+0x6a9/0x870
    [<ffffffff9b6b3f00>] __ip_append_data+0x14d0/0x3bf0
    [<ffffffff9b6ba24e>] ip_append_data+0xee/0x190
    [<ffffffff9b7e1496>] icmp_push_reply+0xa6/0x470
    [<ffffffff9b7e4030>] icmp_reply+0x900/0xa00
    [<ffffffff9b7e42e3>] icmp_echo.part.0+0x1a3/0x230
    [<ffffffff9b7e444d>] icmp_echo+0xcd/0x190
    [<ffffffff9b7e9566>] icmp_rcv+0x806/0xe10
    [<ffffffff9b699bd1>] ip_protocol_deliver_rcu+0x351/0x3d0
    [<ffffffff9b699f14>] ip_local_deliver_finish+0x2b4/0x450
    [<ffffffff9b69a234>] ip_local_deliver+0x174/0x1f0
    [<ffffffff9b69a4b2>] ip_sublist_rcv_finish+0x1f2/0x420
    [<ffffffff9b69ab56>] ip_sublist_rcv+0x466/0x920
  [...]

I was able to reproduce this via:

  ip link add dev dummy0 type dummy
  ip link set dev dummy0 up
  tc qdisc add dev eth0 clsact
  tc filter add dev eth0 egress protocol ip prio 1 u32 match ip protocol 1 0xff action mirred egress redirect dev dummy0
  ping 1.1.1.1
  <stolen>

After the fix, there are no kmemleak reports with the reproducer. This is
in line with what is also done on the ingress side, and from debugging the
skb_unref(skb) on dummy xmit and sch_handle_egress() side, it is visible
that these are two different skbs with both skb_unref(skb) as true. The two
seen skbs are due to mirred doing a skb_clone() internally as use_reinsert
is false in tcf_mirred_act() for egress. This was initially reported by Gal.

Fixes: e420bed02507 ("bpf: Add fd-based tcx multi-prog infra with link support")
Reported-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/bdfc2640-8f65-5b56-4472-db8e2b161aab@nvidia.com
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 17e6281e408c..9f6ed6d97f89 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4061,6 +4061,7 @@ egress_verdict:
 	case TC_ACT_STOLEN:
 	case TC_ACT_QUEUED:
 	case TC_ACT_TRAP:
+		consume_skb(skb);
 		*ret = NET_XMIT_SUCCESS;
 		return NULL;
 	}

From 3a1e2f43985af0dea5750c6436f8cb979780c084 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 25 Aug 2023 15:49:46 +0200
Subject: [PATCH 1991/2047] net: Make consumed action consistent in
 sch_handle_egress

While looking at TC_ACT_* handling, the TC_ACT_CONSUMED is only handled in
sch_handle_ingress but not sch_handle_egress. This was added via cd11b164073b
("net/tc: introduce TC_ACT_REINSERT.") and e5cf1baf92cb ("act_mirred: use
TC_ACT_REINSERT when possible") and later got renamed into TC_ACT_CONSUMED
via 720f22fed81b ("net: sched: refactor reinsert action").

The initial work was targeted for ovs back then and only needed on ingress,
and the mirred action module also restricts it to only that. However, given
it's an API contract it would still make sense to make this consistent to
sch_handle_ingress and handle it on egress side in the same way, that is,
setting return code to "success" and returning NULL back to the caller as
otherwise an action module sitting on egress returning TC_ACT_CONSUMED could
lead to an UAF when untreated.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 9f6ed6d97f89..ccff2b6ef958 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4062,6 +4062,8 @@ egress_verdict:
 	case TC_ACT_QUEUED:
 	case TC_ACT_TRAP:
 		consume_skb(skb);
+		fallthrough;
+	case TC_ACT_CONSUMED:
 		*ret = NET_XMIT_SUCCESS;
 		return NULL;
 	}

From ec1b90886f3cd7e90d4c0381dffb418e2d3a1473 Mon Sep 17 00:00:00 2001
From: Mikhail Kobuk <m.kobuk@ispras.ru>
Date: Fri, 25 Aug 2023 22:04:41 +0300
Subject: [PATCH 1992/2047] ethernet: tg3: remove unreachable code

'tp->irq_max' value is either 1 [L16336] or 5 [L16354], as indicated in
tg3_get_invariants(). Therefore, 'i' can't exceed 4 in tg3_init_one()
that makes (i <= 4) always true. Moreover, 'intmbx' value set at the
last iteration is not used later in it's scope.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Fixes: 78f90dcf184b ("tg3: Move napi_add calls below tg3_get_invariants")
Signed-off-by: Mikhail Kobuk <m.kobuk@ispras.ru>
Reviewed-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/tg3.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index cb2810f175cc..5408ed051772 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -17795,10 +17795,7 @@ static int tg3_init_one(struct pci_dev *pdev,
 		tnapi->tx_pending = TG3_DEF_TX_RING_PENDING;
 
 		tnapi->int_mbox = intmbx;
-		if (i <= 4)
-			intmbx += 0x8;
-		else
-			intmbx += 0x4;
+		intmbx += 0x8;
 
 		tnapi->consmbox = rcvmbx;
 		tnapi->prodmbox = sndmbx;

From 90ca51e8c654699b672ba61aeaa418dfb3252e5e Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 25 Aug 2023 21:44:01 +0200
Subject: [PATCH 1993/2047] r8169: fix ASPM-related issues on a number of
 systems with NIC version from RTL8168h

This effectively reverts 4b5f82f6aaef. On a number of systems ASPM L1
causes tx timeouts with RTL8168h, see referenced bug report.

Fixes: 4b5f82f6aaef ("r8169: enable ASPM L1/L1.1 from RTL8168h")
Cc: stable@vger.kernel.org
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217814
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 5eb50b265c0b..6351a2dc13bc 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5239,13 +5239,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* Disable ASPM L1 as that cause random device stop working
 	 * problems as well as full system hangs for some PCIe devices users.
-	 * Chips from RTL8168h partially have issues with L1.2, but seem
-	 * to work fine with L1 and L1.1.
 	 */
 	if (rtl_aspm_is_safe(tp))
 		rc = 0;
-	else if (tp->mac_version >= RTL_GIGA_MAC_VER_46)
-		rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
 	else
 		rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
 	tp->aspm_manageable = !rc;

From bf68583624c56bab26b4394fb4721461ded62f94 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 26 Aug 2023 10:23:30 +0800
Subject: [PATCH 1994/2047] selftests: bonding: create directly devices in the
 target namespaces

If failed to set link1_1 to netns client, we should delete link1_1 in the
cleanup path. But if set link1_1 to netns client successfully, delete
link1_1 will report warning. So it will be safer creating directly the
devices in the target namespaces.

Reported-by: Hangbin Liu <liuhangbin@gmail.com>
Closes: https://lore.kernel.org/all/ZNyJx1HtXaUzOkNA@Laptop-X1/
Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Acked-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/bonding/bond-arp-interval-causes-panic.sh | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh
index 7b2d421f09cf..4917dbb35a44 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh
@@ -22,14 +22,12 @@ server_ip4=192.168.1.254
 echo 180 >/proc/sys/kernel/panic
 
 # build namespaces
-ip link add dev link1_1 type veth peer name link1_2
-
 ip netns add "server"
-ip link set dev link1_2 netns server up name eth0
+ip netns add "client"
+ip -n client link add eth0 type veth peer name eth0 netns server
+ip netns exec server ip link set dev eth0 up
 ip netns exec server ip addr add ${server_ip4}/24 dev eth0
 
-ip netns add "client"
-ip link set dev link1_1 netns client down name eth0
 ip netns exec client ip link add dev bond0 down type bond mode 1 \
 	miimon 100 all_slaves_active 1
 ip netns exec client ip link set dev eth0 down master bond0

From 8be6f88b9d3fe1f6724daec4a70d6023742c9df7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 28 Aug 2023 08:47:32 +0000
Subject: [PATCH 1995/2047] inet: fix IP_TRANSPARENT error handling

My recent patch forgot to change error handling for IP_TRANSPARENT
socket option.

WARNING: bad unlock balance detected!
6.5.0-rc7-syzkaller-01717-g59da9885767a #0 Not tainted
-------------------------------------
syz-executor151/5028 is trying to release lock (sk_lock-AF_INET) at:
[<ffffffff88213983>] sockopt_release_sock+0x53/0x70 net/core/sock.c:1073
but there are no more locks to release!

other info that might help us debug this:
1 lock held by syz-executor151/5028:

stack backtrace:
CPU: 0 PID: 5028 Comm: syz-executor151 Not tainted 6.5.0-rc7-syzkaller-01717-g59da9885767a #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106
__lock_release kernel/locking/lockdep.c:5438 [inline]
lock_release+0x4b5/0x680 kernel/locking/lockdep.c:5781
sock_release_ownership include/net/sock.h:1824 [inline]
release_sock+0x175/0x1b0 net/core/sock.c:3527
sockopt_release_sock+0x53/0x70 net/core/sock.c:1073
do_ip_setsockopt+0x12c1/0x3640 net/ipv4/ip_sockglue.c:1364
ip_setsockopt+0x59/0xe0 net/ipv4/ip_sockglue.c:1419
raw_setsockopt+0x218/0x290 net/ipv4/raw.c:833
__sys_setsockopt+0x2cd/0x5b0 net/socket.c:2305
__do_sys_setsockopt net/socket.c:2316 [inline]
__se_sys_setsockopt net/socket.c:2313 [inline]

Fixes: 4bd0623f04ee ("inet: move inet->transparent to inet->inet_flags")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 54ad0f0d5c2d..d1c73660b844 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1007,12 +1007,10 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
 		return 0;
 	case IP_TRANSPARENT:
 		if (!!val && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
-		    !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
-			err = -EPERM;
-			break;
-		}
+		    !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+			return -EPERM;
 		if (optlen < 1)
-			goto e_inval;
+			return -EINVAL;
 		inet_assign_bit(TRANSPARENT, sk, val);
 		return 0;
 	case IP_NODEFRAG:

From de0e85b29edfc68046d587c7d67bbd2bdc31b73f Mon Sep 17 00:00:00 2001
From: Sumit Gupta <sumitg@nvidia.com>
Date: Fri, 25 Aug 2023 16:46:17 +0530
Subject: [PATCH 1996/2047] cpufreq: tegra194: remove opp table in exit hook

Add exit hook and remove OPP table when the device gets unregistered.
This will fix the error messages when the CPU FREQ driver module is
removed and then re-inserted. It also fixes these messages while
onlining the first CPU from a policy whose all CPU's were previously
offlined.

 debugfs: File 'cpu5' in directory 'opp' already present!
 debugfs: File 'cpu6' in directory 'opp' already present!
 debugfs: File 'cpu7' in directory 'opp' already present!

Fixes: f41e1442ac5b ("cpufreq: tegra194: add OPP support and set bandwidth")
Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
[ Viresh: Dropped irrelevant change from it ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/tegra194-cpufreq.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
index 88df7e5a0573..88ef5e57ccd0 100644
--- a/drivers/cpufreq/tegra194-cpufreq.c
+++ b/drivers/cpufreq/tegra194-cpufreq.c
@@ -520,6 +520,17 @@ static int tegra194_cpufreq_offline(struct cpufreq_policy *policy)
 	 * Preserve policy->driver_data and don't free resources on light-weight
 	 * tear down.
 	 */
+
+	return 0;
+}
+
+static int tegra194_cpufreq_exit(struct cpufreq_policy *policy)
+{
+	struct device *cpu_dev = get_cpu_device(policy->cpu);
+
+	dev_pm_opp_remove_all_dynamic(cpu_dev);
+	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
+
 	return 0;
 }
 
@@ -550,6 +561,7 @@ static struct cpufreq_driver tegra194_cpufreq_driver = {
 	.target_index = tegra194_cpufreq_set_target,
 	.get = tegra194_get_speed,
 	.init = tegra194_cpufreq_init,
+	.exit = tegra194_cpufreq_exit,
 	.online = tegra194_cpufreq_online,
 	.offline = tegra194_cpufreq_offline,
 	.attr = cpufreq_generic_attr,

From 56e65312830ec45e3ccbc929de6f5b86bc301546 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:43 +0200
Subject: [PATCH 1997/2047] devlink: push object register/unregister
 notifications into separate helpers

In preparations of leftover.c split to individual files, avoid need to
have object structures exposed in devl_internal.h and allow to have them
maintained in object files.

The register/unregister notifications need to know the structures
to iterate lists. To avoid the need, introduce per-object
register/unregister notification helpers and use them.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-2-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/leftover.c | 247 +++++++++++++++++++++++++++--------------
 1 file changed, 163 insertions(+), 84 deletions(-)

diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 149752bc9f2d..890eafac9f8f 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -1058,6 +1058,26 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
 				0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+static void devlink_ports_notify(struct devlink *devlink,
+				 enum devlink_command cmd)
+{
+	struct devlink_port *devlink_port;
+	unsigned long port_index;
+
+	xa_for_each(&devlink->ports, port_index, devlink_port)
+		devlink_port_notify(devlink_port, cmd);
+}
+
+static void devlink_ports_notify_register(struct devlink *devlink)
+{
+	devlink_ports_notify(devlink, DEVLINK_CMD_PORT_NEW);
+}
+
+static void devlink_ports_notify_unregister(struct devlink *devlink)
+{
+	devlink_ports_notify(devlink, DEVLINK_CMD_PORT_DEL);
+}
+
 static void devlink_rate_notify(struct devlink_rate *devlink_rate,
 				enum devlink_command cmd)
 {
@@ -1084,6 +1104,22 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
 				0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+static void devlink_rates_notify_register(struct devlink *devlink)
+{
+	struct devlink_rate *rate_node;
+
+	list_for_each_entry(rate_node, &devlink->rate_list, list)
+		devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+}
+
+static void devlink_rates_notify_unregister(struct devlink *devlink)
+{
+	struct devlink_rate *rate_node;
+
+	list_for_each_entry_reverse(rate_node, &devlink->rate_list, list)
+		devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+}
+
 static int
 devlink_nl_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
 			     struct netlink_callback *cb, int flags)
@@ -1928,6 +1964,22 @@ static void devlink_linecard_notify(struct devlink_linecard *linecard,
 				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+static void devlink_linecards_notify_register(struct devlink *devlink)
+{
+	struct devlink_linecard *linecard;
+
+	list_for_each_entry(linecard, &devlink->linecard_list, list)
+		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+}
+
+static void devlink_linecards_notify_unregister(struct devlink *devlink)
+{
+	struct devlink_linecard *linecard;
+
+	list_for_each_entry_reverse(linecard, &devlink->linecard_list, list)
+		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
+}
+
 int devlink_nl_linecard_get_doit(struct sk_buff *skb, struct genl_info *info)
 {
 	struct devlink *devlink = info->user_ptr[0];
@@ -4285,6 +4337,26 @@ static void devlink_param_notify(struct devlink *devlink,
 				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+static void devlink_params_notify(struct devlink *devlink,
+				  enum devlink_command cmd)
+{
+	struct devlink_param_item *param_item;
+	unsigned long param_id;
+
+	xa_for_each(&devlink->params, param_id, param_item)
+		devlink_param_notify(devlink, 0, param_item, cmd);
+}
+
+static void devlink_params_notify_register(struct devlink *devlink)
+{
+	devlink_params_notify(devlink, DEVLINK_CMD_PARAM_NEW);
+}
+
+static void devlink_params_notify_unregister(struct devlink *devlink)
+{
+	devlink_params_notify(devlink, DEVLINK_CMD_PARAM_DEL);
+}
+
 static int devlink_nl_param_get_dump_one(struct sk_buff *msg,
 					 struct devlink *devlink,
 					 struct netlink_callback *cb,
@@ -4694,6 +4766,22 @@ static void devlink_nl_region_notify(struct devlink_region *region,
 				0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+static void devlink_regions_notify_register(struct devlink *devlink)
+{
+	struct devlink_region *region;
+
+	list_for_each_entry(region, &devlink->region_list, list)
+		devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+}
+
+static void devlink_regions_notify_unregister(struct devlink *devlink)
+{
+	struct devlink_region *region;
+
+	list_for_each_entry_reverse(region, &devlink->region_list, list)
+		devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+}
+
 /**
  * __devlink_snapshot_id_increment - Increment number of snapshots using an id
  *	@devlink: devlink instance
@@ -6662,98 +6750,36 @@ const struct genl_small_ops devlink_nl_small_ops[40] = {
 	/* -- No new ops here! Use split ops going forward! -- */
 };
 
-static void
-devlink_trap_policer_notify(struct devlink *devlink,
-			    const struct devlink_trap_policer_item *policer_item,
-			    enum devlink_command cmd);
-static void
-devlink_trap_group_notify(struct devlink *devlink,
-			  const struct devlink_trap_group_item *group_item,
-			  enum devlink_command cmd);
-static void devlink_trap_notify(struct devlink *devlink,
-				const struct devlink_trap_item *trap_item,
-				enum devlink_command cmd);
+static void devlink_trap_policers_notify_register(struct devlink *devlink);
+static void devlink_trap_policers_notify_unregister(struct devlink *devlink);
+static void devlink_trap_groups_notify_register(struct devlink *devlink);
+static void devlink_trap_groups_notify_unregister(struct devlink *devlink);
+static void devlink_traps_notify_register(struct devlink *devlink);
+static void devlink_traps_notify_unregister(struct devlink *devlink);
 
 void devlink_notify_register(struct devlink *devlink)
 {
-	struct devlink_trap_policer_item *policer_item;
-	struct devlink_trap_group_item *group_item;
-	struct devlink_param_item *param_item;
-	struct devlink_trap_item *trap_item;
-	struct devlink_port *devlink_port;
-	struct devlink_linecard *linecard;
-	struct devlink_rate *rate_node;
-	struct devlink_region *region;
-	unsigned long port_index;
-	unsigned long param_id;
-
 	devlink_notify(devlink, DEVLINK_CMD_NEW);
-	list_for_each_entry(linecard, &devlink->linecard_list, list)
-		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-
-	xa_for_each(&devlink->ports, port_index, devlink_port)
-		devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
-
-	list_for_each_entry(policer_item, &devlink->trap_policer_list, list)
-		devlink_trap_policer_notify(devlink, policer_item,
-					    DEVLINK_CMD_TRAP_POLICER_NEW);
-
-	list_for_each_entry(group_item, &devlink->trap_group_list, list)
-		devlink_trap_group_notify(devlink, group_item,
-					  DEVLINK_CMD_TRAP_GROUP_NEW);
-
-	list_for_each_entry(trap_item, &devlink->trap_list, list)
-		devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
-
-	list_for_each_entry(rate_node, &devlink->rate_list, list)
-		devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
-
-	list_for_each_entry(region, &devlink->region_list, list)
-		devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
-
-	xa_for_each(&devlink->params, param_id, param_item)
-		devlink_param_notify(devlink, 0, param_item,
-				     DEVLINK_CMD_PARAM_NEW);
+	devlink_linecards_notify_register(devlink);
+	devlink_ports_notify_register(devlink);
+	devlink_trap_policers_notify_register(devlink);
+	devlink_trap_groups_notify_register(devlink);
+	devlink_traps_notify_register(devlink);
+	devlink_rates_notify_register(devlink);
+	devlink_regions_notify_register(devlink);
+	devlink_params_notify_register(devlink);
 }
 
 void devlink_notify_unregister(struct devlink *devlink)
 {
-	struct devlink_trap_policer_item *policer_item;
-	struct devlink_trap_group_item *group_item;
-	struct devlink_param_item *param_item;
-	struct devlink_trap_item *trap_item;
-	struct devlink_port *devlink_port;
-	struct devlink_linecard *linecard;
-	struct devlink_rate *rate_node;
-	struct devlink_region *region;
-	unsigned long port_index;
-	unsigned long param_id;
-
-	xa_for_each(&devlink->params, param_id, param_item)
-		devlink_param_notify(devlink, 0, param_item,
-				     DEVLINK_CMD_PARAM_DEL);
-
-	list_for_each_entry_reverse(region, &devlink->region_list, list)
-		devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
-
-	list_for_each_entry_reverse(rate_node, &devlink->rate_list, list)
-		devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
-
-	list_for_each_entry_reverse(trap_item, &devlink->trap_list, list)
-		devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
-
-	list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list)
-		devlink_trap_group_notify(devlink, group_item,
-					  DEVLINK_CMD_TRAP_GROUP_DEL);
-	list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list,
-				    list)
-		devlink_trap_policer_notify(devlink, policer_item,
-					    DEVLINK_CMD_TRAP_POLICER_DEL);
-
-	xa_for_each(&devlink->ports, port_index, devlink_port)
-		devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
-	list_for_each_entry_reverse(linecard, &devlink->linecard_list, list)
-		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
+	devlink_params_notify_unregister(devlink);
+	devlink_regions_notify_unregister(devlink);
+	devlink_rates_notify_unregister(devlink);
+	devlink_traps_notify_unregister(devlink);
+	devlink_trap_groups_notify_unregister(devlink);
+	devlink_trap_policers_notify_unregister(devlink);
+	devlink_ports_notify_unregister(devlink);
+	devlink_linecards_notify_unregister(devlink);
 	devlink_notify(devlink, DEVLINK_CMD_DEL);
 }
 
@@ -8879,6 +8905,24 @@ devlink_trap_group_notify(struct devlink *devlink,
 				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+static void devlink_trap_groups_notify_register(struct devlink *devlink)
+{
+	struct devlink_trap_group_item *group_item;
+
+	list_for_each_entry(group_item, &devlink->trap_group_list, list)
+		devlink_trap_group_notify(devlink, group_item,
+					  DEVLINK_CMD_TRAP_GROUP_NEW);
+}
+
+static void devlink_trap_groups_notify_unregister(struct devlink *devlink)
+{
+	struct devlink_trap_group_item *group_item;
+
+	list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list)
+		devlink_trap_group_notify(devlink, group_item,
+					  DEVLINK_CMD_TRAP_GROUP_DEL);
+}
+
 static int
 devlink_trap_item_group_link(struct devlink *devlink,
 			     struct devlink_trap_item *trap_item)
@@ -8921,6 +8965,22 @@ static void devlink_trap_notify(struct devlink *devlink,
 				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+static void devlink_traps_notify_register(struct devlink *devlink)
+{
+	struct devlink_trap_item *trap_item;
+
+	list_for_each_entry(trap_item, &devlink->trap_list, list)
+		devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+}
+
+static void devlink_traps_notify_unregister(struct devlink *devlink)
+{
+	struct devlink_trap_item *trap_item;
+
+	list_for_each_entry_reverse(trap_item, &devlink->trap_list, list)
+		devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+}
+
 static int
 devlink_trap_register(struct devlink *devlink,
 		      const struct devlink_trap *trap, void *priv)
@@ -9382,6 +9442,25 @@ devlink_trap_policer_notify(struct devlink *devlink,
 				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+static void devlink_trap_policers_notify_register(struct devlink *devlink)
+{
+	struct devlink_trap_policer_item *policer_item;
+
+	list_for_each_entry(policer_item, &devlink->trap_policer_list, list)
+		devlink_trap_policer_notify(devlink, policer_item,
+					    DEVLINK_CMD_TRAP_POLICER_NEW);
+}
+
+static void devlink_trap_policers_notify_unregister(struct devlink *devlink)
+{
+	struct devlink_trap_policer_item *policer_item;
+
+	list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list,
+				    list)
+		devlink_trap_policer_notify(devlink, policer_item,
+					    DEVLINK_CMD_TRAP_POLICER_DEL);
+}
+
 static int
 devlink_trap_policer_register(struct devlink *devlink,
 			      const struct devlink_trap_policer *policer)

From eec1e5ea1d715ce2df8dcdf3dac7112df77a6e17 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:44 +0200
Subject: [PATCH 1998/2047] devlink: push port related code into separate file

Cut out another chunk from leftover.c and put port related code
into a separate file.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-3-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/Makefile        |    2 +-
 net/devlink/devl_internal.h |   32 +-
 net/devlink/leftover.c      | 1528 -----------------------------------
 net/devlink/port.c          | 1515 ++++++++++++++++++++++++++++++++++
 4 files changed, 1547 insertions(+), 1530 deletions(-)
 create mode 100644 net/devlink/port.c

diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index a087af581847..456bfb336540 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o health.o
+obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o port.o health.o
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index eb1d5066c73f..7d01e2060702 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -3,6 +3,7 @@
  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
  */
 
+#include <linux/etherdevice.h>
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/notifier.h>
@@ -11,6 +12,8 @@
 #include <linux/xarray.h>
 #include <net/devlink.h>
 #include <net/net_namespace.h>
+#include <net/rtnetlink.h>
+#include <rdma/ib_verbs.h>
 
 #include "netlink_gen.h"
 
@@ -149,16 +152,37 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
 
 /* Notify */
 void devlink_notify(struct devlink *devlink, enum devlink_command cmd);
+void devlink_ports_notify_register(struct devlink *devlink);
+void devlink_ports_notify_unregister(struct devlink *devlink);
 
 /* Ports */
+#define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port)				\
+	WARN_ON_ONCE(!(devlink_port)->initialized)
+
+struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
+					       unsigned int port_index);
 int devlink_port_netdevice_event(struct notifier_block *nb,
 				 unsigned long event, void *ptr);
-
 struct devlink_port *
 devlink_port_get_from_info(struct devlink *devlink, struct genl_info *info);
 struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
 						 struct nlattr **attrs);
 
+/* Linecards */
+struct devlink_linecard {
+	struct list_head list;
+	struct devlink *devlink;
+	unsigned int index;
+	const struct devlink_linecard_ops *ops;
+	void *priv;
+	enum devlink_linecard_state state;
+	struct mutex state_lock; /* Protects state */
+	const char *type;
+	struct devlink_linecard_type *types;
+	unsigned int types_count;
+	struct devlink *nested_devlink;
+};
+
 /* Reload */
 bool devlink_reload_actions_valid(const struct devlink_ops *ops);
 int devlink_reload(struct devlink *devlink, struct net *dest_net,
@@ -190,6 +214,12 @@ int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info)
 int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info);
 int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info);
 int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
+				     struct genl_info *info);
+int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info);
 int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
 					    struct genl_info *info);
 int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 890eafac9f8f..d14b40fb8fdf 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -33,20 +33,6 @@
 
 #include "devl_internal.h"
 
-struct devlink_linecard {
-	struct list_head list;
-	struct devlink *devlink;
-	unsigned int index;
-	const struct devlink_linecard_ops *ops;
-	void *priv;
-	enum devlink_linecard_state state;
-	struct mutex state_lock; /* Protects state */
-	const char *type;
-	struct devlink_linecard_type *types;
-	unsigned int types_count;
-	struct devlink *nested_devlink;
-};
-
 /**
  * struct devlink_resource - devlink resource
  * @name: name of the resource
@@ -131,52 +117,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
 
-#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
-	(_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
-
-static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
-	[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
-	[DEVLINK_PORT_FN_ATTR_STATE] =
-		NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
-				 DEVLINK_PORT_FN_STATE_ACTIVE),
-	[DEVLINK_PORT_FN_ATTR_CAPS] =
-		NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
-};
-
-#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port)				\
-	WARN_ON_ONCE(!(devlink_port)->registered)
-#define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port)			\
-	WARN_ON_ONCE((devlink_port)->registered)
-#define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port)				\
-	WARN_ON_ONCE(!(devlink_port)->initialized)
-
-static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
-						      unsigned int port_index)
-{
-	return xa_load(&devlink->ports, port_index);
-}
-
-struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
-						 struct nlattr **attrs)
-{
-	if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
-		u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
-		struct devlink_port *devlink_port;
-
-		devlink_port = devlink_port_get_by_index(devlink, port_index);
-		if (!devlink_port)
-			return ERR_PTR(-ENODEV);
-		return devlink_port;
-	}
-	return ERR_PTR(-EINVAL);
-}
-
-struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
-						struct genl_info *info)
-{
-	return devlink_port_get_from_attrs(devlink, info->attrs);
-}
-
 static inline bool
 devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
 {
@@ -439,138 +379,6 @@ devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
 	return 0;
 }
 
-static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
-				     u32 cap, bool is_enable)
-{
-	caps->selector |= cap;
-	if (is_enable)
-		caps->value |= cap;
-}
-
-static int devlink_port_fn_roce_fill(struct devlink_port *devlink_port,
-				     struct nla_bitfield32 *caps,
-				     struct netlink_ext_ack *extack)
-{
-	bool is_enable;
-	int err;
-
-	if (!devlink_port->ops->port_fn_roce_get)
-		return 0;
-
-	err = devlink_port->ops->port_fn_roce_get(devlink_port, &is_enable,
-						  extack);
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			return 0;
-		return err;
-	}
-
-	devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
-	return 0;
-}
-
-static int devlink_port_fn_migratable_fill(struct devlink_port *devlink_port,
-					   struct nla_bitfield32 *caps,
-					   struct netlink_ext_ack *extack)
-{
-	bool is_enable;
-	int err;
-
-	if (!devlink_port->ops->port_fn_migratable_get ||
-	    devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
-		return 0;
-
-	err = devlink_port->ops->port_fn_migratable_get(devlink_port,
-							&is_enable, extack);
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			return 0;
-		return err;
-	}
-
-	devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_MIGRATABLE, is_enable);
-	return 0;
-}
-
-static int devlink_port_fn_ipsec_crypto_fill(struct devlink_port *devlink_port,
-					     struct nla_bitfield32 *caps,
-					     struct netlink_ext_ack *extack)
-{
-	bool is_enable;
-	int err;
-
-	if (!devlink_port->ops->port_fn_ipsec_crypto_get ||
-	    devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
-		return 0;
-
-	err = devlink_port->ops->port_fn_ipsec_crypto_get(devlink_port, &is_enable, extack);
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			return 0;
-		return err;
-	}
-
-	devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO, is_enable);
-	return 0;
-}
-
-static int devlink_port_fn_ipsec_packet_fill(struct devlink_port *devlink_port,
-					     struct nla_bitfield32 *caps,
-					     struct netlink_ext_ack *extack)
-{
-	bool is_enable;
-	int err;
-
-	if (!devlink_port->ops->port_fn_ipsec_packet_get ||
-	    devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
-		return 0;
-
-	err = devlink_port->ops->port_fn_ipsec_packet_get(devlink_port, &is_enable, extack);
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			return 0;
-		return err;
-	}
-
-	devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_IPSEC_PACKET, is_enable);
-	return 0;
-}
-
-static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
-				     struct sk_buff *msg,
-				     struct netlink_ext_ack *extack,
-				     bool *msg_updated)
-{
-	struct nla_bitfield32 caps = {};
-	int err;
-
-	err = devlink_port_fn_roce_fill(devlink_port, &caps, extack);
-	if (err)
-		return err;
-
-	err = devlink_port_fn_migratable_fill(devlink_port, &caps, extack);
-	if (err)
-		return err;
-
-	err = devlink_port_fn_ipsec_crypto_fill(devlink_port, &caps, extack);
-	if (err)
-		return err;
-
-	err = devlink_port_fn_ipsec_packet_fill(devlink_port, &caps, extack);
-	if (err)
-		return err;
-
-	if (!caps.selector)
-		return 0;
-	err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
-				 caps.selector);
-	if (err)
-		return err;
-
-	*msg_updated = true;
-	return 0;
-}
-
 static int
 devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
 				  struct genl_info *info,
@@ -661,113 +469,6 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port)
-{
-	if (devlink_nl_put_handle(msg, devlink_port->devlink))
-		return -EMSGSIZE;
-	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
-		return -EMSGSIZE;
-	return 0;
-}
-
-size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port)
-{
-	struct devlink *devlink = devlink_port->devlink;
-
-	return nla_total_size(strlen(devlink->dev->bus->name) + 1) /* DEVLINK_ATTR_BUS_NAME */
-	     + nla_total_size(strlen(dev_name(devlink->dev)) + 1) /* DEVLINK_ATTR_DEV_NAME */
-	     + nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */
-}
-
-static int devlink_nl_port_attrs_put(struct sk_buff *msg,
-				     struct devlink_port *devlink_port)
-{
-	struct devlink_port_attrs *attrs = &devlink_port->attrs;
-
-	if (!devlink_port->attrs_set)
-		return 0;
-	if (attrs->lanes) {
-		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_LANES, attrs->lanes))
-			return -EMSGSIZE;
-	}
-	if (nla_put_u8(msg, DEVLINK_ATTR_PORT_SPLITTABLE, attrs->splittable))
-		return -EMSGSIZE;
-	if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
-		return -EMSGSIZE;
-	switch (devlink_port->attrs.flavour) {
-	case DEVLINK_PORT_FLAVOUR_PCI_PF:
-		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
-				attrs->pci_pf.controller) ||
-		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_pf.pf))
-			return -EMSGSIZE;
-		if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_pf.external))
-			return -EMSGSIZE;
-		break;
-	case DEVLINK_PORT_FLAVOUR_PCI_VF:
-		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
-				attrs->pci_vf.controller) ||
-		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_vf.pf) ||
-		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, attrs->pci_vf.vf))
-			return -EMSGSIZE;
-		if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_vf.external))
-			return -EMSGSIZE;
-		break;
-	case DEVLINK_PORT_FLAVOUR_PCI_SF:
-		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
-				attrs->pci_sf.controller) ||
-		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
-				attrs->pci_sf.pf) ||
-		    nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_SF_NUMBER,
-				attrs->pci_sf.sf))
-			return -EMSGSIZE;
-		break;
-	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
-	case DEVLINK_PORT_FLAVOUR_CPU:
-	case DEVLINK_PORT_FLAVOUR_DSA:
-		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
-				attrs->phys.port_number))
-			return -EMSGSIZE;
-		if (!attrs->split)
-			return 0;
-		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
-				attrs->phys.port_number))
-			return -EMSGSIZE;
-		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
-				attrs->phys.split_subport_number))
-			return -EMSGSIZE;
-		break;
-	default:
-		break;
-	}
-	return 0;
-}
-
-static int devlink_port_fn_hw_addr_fill(struct devlink_port *port,
-					struct sk_buff *msg,
-					struct netlink_ext_ack *extack,
-					bool *msg_updated)
-{
-	u8 hw_addr[MAX_ADDR_LEN];
-	int hw_addr_len;
-	int err;
-
-	if (!port->ops->port_fn_hw_addr_get)
-		return 0;
-
-	err = port->ops->port_fn_hw_addr_get(port, hw_addr, &hw_addr_len,
-					     extack);
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			return 0;
-		return err;
-	}
-	err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr);
-	if (err)
-		return err;
-	*msg_updated = true;
-	return 0;
-}
-
 static int devlink_nl_rate_fill(struct sk_buff *msg,
 				struct devlink_rate *devlink_rate,
 				enum devlink_command cmd, u32 portid, u32 seq,
@@ -825,259 +526,6 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static bool
-devlink_port_fn_state_valid(enum devlink_port_fn_state state)
-{
-	return state == DEVLINK_PORT_FN_STATE_INACTIVE ||
-	       state == DEVLINK_PORT_FN_STATE_ACTIVE;
-}
-
-static bool
-devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
-{
-	return opstate == DEVLINK_PORT_FN_OPSTATE_DETACHED ||
-	       opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
-}
-
-static int devlink_port_fn_state_fill(struct devlink_port *port,
-				      struct sk_buff *msg,
-				      struct netlink_ext_ack *extack,
-				      bool *msg_updated)
-{
-	enum devlink_port_fn_opstate opstate;
-	enum devlink_port_fn_state state;
-	int err;
-
-	if (!port->ops->port_fn_state_get)
-		return 0;
-
-	err = port->ops->port_fn_state_get(port, &state, &opstate, extack);
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			return 0;
-		return err;
-	}
-	if (!devlink_port_fn_state_valid(state)) {
-		WARN_ON_ONCE(1);
-		NL_SET_ERR_MSG(extack, "Invalid state read from driver");
-		return -EINVAL;
-	}
-	if (!devlink_port_fn_opstate_valid(opstate)) {
-		WARN_ON_ONCE(1);
-		NL_SET_ERR_MSG(extack, "Invalid operational state read from driver");
-		return -EINVAL;
-	}
-	if (nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_STATE, state) ||
-	    nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_OPSTATE, opstate))
-		return -EMSGSIZE;
-	*msg_updated = true;
-	return 0;
-}
-
-static int
-devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable,
-			struct netlink_ext_ack *extack)
-{
-	return devlink_port->ops->port_fn_migratable_set(devlink_port, enable,
-							 extack);
-}
-
-static int
-devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
-			 struct netlink_ext_ack *extack)
-{
-	return devlink_port->ops->port_fn_roce_set(devlink_port, enable,
-						   extack);
-}
-
-static int
-devlink_port_fn_ipsec_crypto_set(struct devlink_port *devlink_port, bool enable,
-				 struct netlink_ext_ack *extack)
-{
-	return devlink_port->ops->port_fn_ipsec_crypto_set(devlink_port, enable, extack);
-}
-
-static int
-devlink_port_fn_ipsec_packet_set(struct devlink_port *devlink_port, bool enable,
-				 struct netlink_ext_ack *extack)
-{
-	return devlink_port->ops->port_fn_ipsec_packet_set(devlink_port, enable, extack);
-}
-
-static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
-				    const struct nlattr *attr,
-				    struct netlink_ext_ack *extack)
-{
-	struct nla_bitfield32 caps;
-	u32 caps_value;
-	int err;
-
-	caps = nla_get_bitfield32(attr);
-	caps_value = caps.value & caps.selector;
-	if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
-		err = devlink_port_fn_roce_set(devlink_port,
-					       caps_value & DEVLINK_PORT_FN_CAP_ROCE,
-					       extack);
-		if (err)
-			return err;
-	}
-	if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
-		err = devlink_port_fn_mig_set(devlink_port, caps_value &
-					      DEVLINK_PORT_FN_CAP_MIGRATABLE,
-					      extack);
-		if (err)
-			return err;
-	}
-	if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO) {
-		err = devlink_port_fn_ipsec_crypto_set(devlink_port, caps_value &
-						       DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO,
-						       extack);
-		if (err)
-			return err;
-	}
-	if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_PACKET) {
-		err = devlink_port_fn_ipsec_packet_set(devlink_port, caps_value &
-						       DEVLINK_PORT_FN_CAP_IPSEC_PACKET,
-						       extack);
-		if (err)
-			return err;
-	}
-	return 0;
-}
-
-static int
-devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
-				   struct netlink_ext_ack *extack)
-{
-	struct nlattr *function_attr;
-	bool msg_updated = false;
-	int err;
-
-	function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION);
-	if (!function_attr)
-		return -EMSGSIZE;
-
-	err = devlink_port_fn_hw_addr_fill(port, msg, extack, &msg_updated);
-	if (err)
-		goto out;
-	err = devlink_port_fn_caps_fill(port, msg, extack, &msg_updated);
-	if (err)
-		goto out;
-	err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
-out:
-	if (err || !msg_updated)
-		nla_nest_cancel(msg, function_attr);
-	else
-		nla_nest_end(msg, function_attr);
-	return err;
-}
-
-static int devlink_nl_port_fill(struct sk_buff *msg,
-				struct devlink_port *devlink_port,
-				enum devlink_command cmd, u32 portid, u32 seq,
-				int flags, struct netlink_ext_ack *extack)
-{
-	struct devlink *devlink = devlink_port->devlink;
-	void *hdr;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
-		goto nla_put_failure;
-
-	spin_lock_bh(&devlink_port->type_lock);
-	if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
-		goto nla_put_failure_type_locked;
-	if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
-	    nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE,
-			devlink_port->desired_type))
-		goto nla_put_failure_type_locked;
-	if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
-		if (devlink_port->type_eth.netdev &&
-		    (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
-				 devlink_port->type_eth.ifindex) ||
-		     nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
-				    devlink_port->type_eth.ifname)))
-			goto nla_put_failure_type_locked;
-	}
-	if (devlink_port->type == DEVLINK_PORT_TYPE_IB) {
-		struct ib_device *ibdev = devlink_port->type_ib.ibdev;
-
-		if (ibdev &&
-		    nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME,
-				   ibdev->name))
-			goto nla_put_failure_type_locked;
-	}
-	spin_unlock_bh(&devlink_port->type_lock);
-	if (devlink_nl_port_attrs_put(msg, devlink_port))
-		goto nla_put_failure;
-	if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
-		goto nla_put_failure;
-	if (devlink_port->linecard &&
-	    nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX,
-			devlink_port->linecard->index))
-		goto nla_put_failure;
-
-	genlmsg_end(msg, hdr);
-	return 0;
-
-nla_put_failure_type_locked:
-	spin_unlock_bh(&devlink_port->type_lock);
-nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
-}
-
-static void devlink_port_notify(struct devlink_port *devlink_port,
-				enum devlink_command cmd)
-{
-	struct devlink *devlink = devlink_port->devlink;
-	struct sk_buff *msg;
-	int err;
-
-	WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
-
-	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
-		return;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return;
-
-	err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL);
-	if (err) {
-		nlmsg_free(msg);
-		return;
-	}
-
-	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
-				0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static void devlink_ports_notify(struct devlink *devlink,
-				 enum devlink_command cmd)
-{
-	struct devlink_port *devlink_port;
-	unsigned long port_index;
-
-	xa_for_each(&devlink->ports, port_index, devlink_port)
-		devlink_port_notify(devlink_port, cmd);
-}
-
-static void devlink_ports_notify_register(struct devlink *devlink)
-{
-	devlink_ports_notify(devlink, DEVLINK_CMD_PORT_NEW);
-}
-
-static void devlink_ports_notify_unregister(struct devlink *devlink)
-{
-	devlink_ports_notify(devlink, DEVLINK_CMD_PORT_DEL);
-}
-
 static void devlink_rate_notify(struct devlink_rate *devlink_rate,
 				enum devlink_command cmd)
 {
@@ -1192,377 +640,6 @@ devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
 	return false;
 }
 
-int devlink_nl_port_get_doit(struct sk_buff *skb, struct genl_info *info)
-{
-	struct devlink_port *devlink_port = info->user_ptr[1];
-	struct sk_buff *msg;
-	int err;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW,
-				   info->snd_portid, info->snd_seq, 0,
-				   info->extack);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
-
-	return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
-			     struct netlink_callback *cb, int flags)
-{
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct devlink_port *devlink_port;
-	unsigned long port_index;
-	int err = 0;
-
-	xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
-		err = devlink_nl_port_fill(msg, devlink_port,
-					   DEVLINK_CMD_NEW,
-					   NETLINK_CB(cb->skb).portid,
-					   cb->nlh->nlmsg_seq, flags,
-					   cb->extack);
-		if (err) {
-			state->idx = port_index;
-			break;
-		}
-	}
-
-	return err;
-}
-
-int devlink_nl_port_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	return devlink_nl_dumpit(skb, cb, devlink_nl_port_get_dump_one);
-}
-
-static int devlink_port_type_set(struct devlink_port *devlink_port,
-				 enum devlink_port_type port_type)
-
-{
-	int err;
-
-	if (!devlink_port->ops->port_type_set)
-		return -EOPNOTSUPP;
-
-	if (port_type == devlink_port->type)
-		return 0;
-
-	err = devlink_port->ops->port_type_set(devlink_port, port_type);
-	if (err)
-		return err;
-
-	devlink_port->desired_type = port_type;
-	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
-	return 0;
-}
-
-static int devlink_port_function_hw_addr_set(struct devlink_port *port,
-					     const struct nlattr *attr,
-					     struct netlink_ext_ack *extack)
-{
-	const u8 *hw_addr;
-	int hw_addr_len;
-
-	hw_addr = nla_data(attr);
-	hw_addr_len = nla_len(attr);
-	if (hw_addr_len > MAX_ADDR_LEN) {
-		NL_SET_ERR_MSG(extack, "Port function hardware address too long");
-		return -EINVAL;
-	}
-	if (port->type == DEVLINK_PORT_TYPE_ETH) {
-		if (hw_addr_len != ETH_ALEN) {
-			NL_SET_ERR_MSG(extack, "Address must be 6 bytes for Ethernet device");
-			return -EINVAL;
-		}
-		if (!is_unicast_ether_addr(hw_addr)) {
-			NL_SET_ERR_MSG(extack, "Non-unicast hardware address unsupported");
-			return -EINVAL;
-		}
-	}
-
-	return port->ops->port_fn_hw_addr_set(port, hw_addr, hw_addr_len,
-					      extack);
-}
-
-static int devlink_port_fn_state_set(struct devlink_port *port,
-				     const struct nlattr *attr,
-				     struct netlink_ext_ack *extack)
-{
-	enum devlink_port_fn_state state;
-
-	state = nla_get_u8(attr);
-	return port->ops->port_fn_state_set(port, state, extack);
-}
-
-static int devlink_port_function_validate(struct devlink_port *devlink_port,
-					  struct nlattr **tb,
-					  struct netlink_ext_ack *extack)
-{
-	const struct devlink_port_ops *ops = devlink_port->ops;
-	struct nlattr *attr;
-
-	if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
-	    !ops->port_fn_hw_addr_set) {
-		NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
-				    "Port doesn't support function attributes");
-		return -EOPNOTSUPP;
-	}
-	if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
-		NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
-				    "Function does not support state setting");
-		return -EOPNOTSUPP;
-	}
-	attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
-	if (attr) {
-		struct nla_bitfield32 caps;
-
-		caps = nla_get_bitfield32(attr);
-		if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE &&
-		    !ops->port_fn_roce_set) {
-			NL_SET_ERR_MSG_ATTR(extack, attr,
-					    "Port doesn't support RoCE function attribute");
-			return -EOPNOTSUPP;
-		}
-		if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
-			if (!ops->port_fn_migratable_set) {
-				NL_SET_ERR_MSG_ATTR(extack, attr,
-						    "Port doesn't support migratable function attribute");
-				return -EOPNOTSUPP;
-			}
-			if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
-				NL_SET_ERR_MSG_ATTR(extack, attr,
-						    "migratable function attribute supported for VFs only");
-				return -EOPNOTSUPP;
-			}
-		}
-		if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO) {
-			if (!ops->port_fn_ipsec_crypto_set) {
-				NL_SET_ERR_MSG_ATTR(extack, attr,
-						    "Port doesn't support ipsec_crypto function attribute");
-				return -EOPNOTSUPP;
-			}
-			if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
-				NL_SET_ERR_MSG_ATTR(extack, attr,
-						    "ipsec_crypto function attribute supported for VFs only");
-				return -EOPNOTSUPP;
-			}
-		}
-		if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_PACKET) {
-			if (!ops->port_fn_ipsec_packet_set) {
-				NL_SET_ERR_MSG_ATTR(extack, attr,
-						    "Port doesn't support ipsec_packet function attribute");
-				return -EOPNOTSUPP;
-			}
-			if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
-				NL_SET_ERR_MSG_ATTR(extack, attr,
-						    "ipsec_packet function attribute supported for VFs only");
-				return -EOPNOTSUPP;
-			}
-		}
-	}
-	return 0;
-}
-
-static int devlink_port_function_set(struct devlink_port *port,
-				     const struct nlattr *attr,
-				     struct netlink_ext_ack *extack)
-{
-	struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
-	int err;
-
-	err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr,
-			       devlink_function_nl_policy, extack);
-	if (err < 0) {
-		NL_SET_ERR_MSG(extack, "Fail to parse port function attributes");
-		return err;
-	}
-
-	err = devlink_port_function_validate(port, tb, extack);
-	if (err)
-		return err;
-
-	attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
-	if (attr) {
-		err = devlink_port_function_hw_addr_set(port, attr, extack);
-		if (err)
-			return err;
-	}
-
-	attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
-	if (attr) {
-		err = devlink_port_fn_caps_set(port, attr, extack);
-		if (err)
-			return err;
-	}
-
-	/* Keep this as the last function attribute set, so that when
-	 * multiple port function attributes are set along with state,
-	 * Those can be applied first before activating the state.
-	 */
-	attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
-	if (attr)
-		err = devlink_port_fn_state_set(port, attr, extack);
-
-	if (!err)
-		devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
-	return err;
-}
-
-static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
-					struct genl_info *info)
-{
-	struct devlink_port *devlink_port = info->user_ptr[1];
-	int err;
-
-	if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
-		enum devlink_port_type port_type;
-
-		port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
-		err = devlink_port_type_set(devlink_port, port_type);
-		if (err)
-			return err;
-	}
-
-	if (info->attrs[DEVLINK_ATTR_PORT_FUNCTION]) {
-		struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
-		struct netlink_ext_ack *extack = info->extack;
-
-		err = devlink_port_function_set(devlink_port, attr, extack);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
-					  struct genl_info *info)
-{
-	struct devlink_port *devlink_port = info->user_ptr[1];
-	struct devlink *devlink = info->user_ptr[0];
-	u32 count;
-
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_SPLIT_COUNT))
-		return -EINVAL;
-	if (!devlink_port->ops->port_split)
-		return -EOPNOTSUPP;
-
-	count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
-
-	if (!devlink_port->attrs.splittable) {
-		/* Split ports cannot be split. */
-		if (devlink_port->attrs.split)
-			NL_SET_ERR_MSG(info->extack, "Port cannot be split further");
-		else
-			NL_SET_ERR_MSG(info->extack, "Port cannot be split");
-		return -EINVAL;
-	}
-
-	if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) {
-		NL_SET_ERR_MSG(info->extack, "Invalid split count");
-		return -EINVAL;
-	}
-
-	return devlink_port->ops->port_split(devlink, devlink_port, count,
-					     info->extack);
-}
-
-static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
-					    struct genl_info *info)
-{
-	struct devlink_port *devlink_port = info->user_ptr[1];
-	struct devlink *devlink = info->user_ptr[0];
-
-	if (!devlink_port->ops->port_unsplit)
-		return -EOPNOTSUPP;
-	return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack);
-}
-
-static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
-					struct genl_info *info)
-{
-	struct netlink_ext_ack *extack = info->extack;
-	struct devlink_port_new_attrs new_attrs = {};
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_port *devlink_port;
-	struct sk_buff *msg;
-	int err;
-
-	if (!devlink->ops->port_new)
-		return -EOPNOTSUPP;
-
-	if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] ||
-	    !info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) {
-		NL_SET_ERR_MSG(extack, "Port flavour or PCI PF are not specified");
-		return -EINVAL;
-	}
-	new_attrs.flavour = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_FLAVOUR]);
-	new_attrs.pfnum =
-		nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]);
-
-	if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
-		/* Port index of the new port being created by driver. */
-		new_attrs.port_index =
-			nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-		new_attrs.port_index_valid = true;
-	}
-	if (info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]) {
-		new_attrs.controller =
-			nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]);
-		new_attrs.controller_valid = true;
-	}
-	if (new_attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_SF &&
-	    info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]) {
-		new_attrs.sfnum = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]);
-		new_attrs.sfnum_valid = true;
-	}
-
-	err = devlink->ops->port_new(devlink, &new_attrs,
-				     extack, &devlink_port);
-	if (err)
-		return err;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg) {
-		err = -ENOMEM;
-		goto err_out_port_del;
-	}
-	err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
-				   info->snd_portid, info->snd_seq, 0, NULL);
-	if (WARN_ON_ONCE(err))
-		goto err_out_msg_free;
-	err = genlmsg_reply(msg, info);
-	if (err)
-		goto err_out_port_del;
-	return 0;
-
-err_out_msg_free:
-	nlmsg_free(msg);
-err_out_port_del:
-	devlink_port->ops->port_del(devlink, devlink_port, NULL);
-	return err;
-}
-
-static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
-					struct genl_info *info)
-{
-	struct devlink_port *devlink_port = info->user_ptr[1];
-	struct netlink_ext_ack *extack = info->extack;
-	struct devlink *devlink = info->user_ptr[0];
-
-	if (!devlink_port->ops->port_del)
-		return -EOPNOTSUPP;
-
-	return devlink_port->ops->port_del(devlink, devlink_port, extack);
-}
-
 static int
 devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
 				struct genl_info *info,
@@ -6783,489 +5860,6 @@ void devlink_notify_unregister(struct devlink *devlink)
 	devlink_notify(devlink, DEVLINK_CMD_DEL);
 }
 
-static void devlink_port_type_warn(struct work_struct *work)
-{
-	struct devlink_port *port = container_of(to_delayed_work(work),
-						 struct devlink_port,
-						 type_warn_dw);
-	dev_warn(port->devlink->dev, "Type was not set for devlink port.");
-}
-
-static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
-{
-	/* Ignore CPU and DSA flavours. */
-	return devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
-	       devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA &&
-	       devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_UNUSED;
-}
-
-#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 3600)
-
-static void devlink_port_type_warn_schedule(struct devlink_port *devlink_port)
-{
-	if (!devlink_port_type_should_warn(devlink_port))
-		return;
-	/* Schedule a work to WARN in case driver does not set port
-	 * type within timeout.
-	 */
-	schedule_delayed_work(&devlink_port->type_warn_dw,
-			      DEVLINK_PORT_TYPE_WARN_TIMEOUT);
-}
-
-static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
-{
-	if (!devlink_port_type_should_warn(devlink_port))
-		return;
-	cancel_delayed_work_sync(&devlink_port->type_warn_dw);
-}
-
-/**
- * devlink_port_init() - Init devlink port
- *
- * @devlink: devlink
- * @devlink_port: devlink port
- *
- * Initialize essential stuff that is needed for functions
- * that may be called before devlink port registration.
- * Call to this function is optional and not needed
- * in case the driver does not use such functions.
- */
-void devlink_port_init(struct devlink *devlink,
-		       struct devlink_port *devlink_port)
-{
-	if (devlink_port->initialized)
-		return;
-	devlink_port->devlink = devlink;
-	INIT_LIST_HEAD(&devlink_port->region_list);
-	devlink_port->initialized = true;
-}
-EXPORT_SYMBOL_GPL(devlink_port_init);
-
-/**
- * devlink_port_fini() - Deinitialize devlink port
- *
- * @devlink_port: devlink port
- *
- * Deinitialize essential stuff that is in use for functions
- * that may be called after devlink port unregistration.
- * Call to this function is optional and not needed
- * in case the driver does not use such functions.
- */
-void devlink_port_fini(struct devlink_port *devlink_port)
-{
-	WARN_ON(!list_empty(&devlink_port->region_list));
-}
-EXPORT_SYMBOL_GPL(devlink_port_fini);
-
-static const struct devlink_port_ops devlink_port_dummy_ops = {};
-
-/**
- * devl_port_register_with_ops() - Register devlink port
- *
- * @devlink: devlink
- * @devlink_port: devlink port
- * @port_index: driver-specific numerical identifier of the port
- * @ops: port ops
- *
- * Register devlink port with provided port index. User can use
- * any indexing, even hw-related one. devlink_port structure
- * is convenient to be embedded inside user driver private structure.
- * Note that the caller should take care of zeroing the devlink_port
- * structure.
- */
-int devl_port_register_with_ops(struct devlink *devlink,
-				struct devlink_port *devlink_port,
-				unsigned int port_index,
-				const struct devlink_port_ops *ops)
-{
-	int err;
-
-	devl_assert_locked(devlink);
-
-	ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
-	devlink_port_init(devlink, devlink_port);
-	devlink_port->registered = true;
-	devlink_port->index = port_index;
-	devlink_port->ops = ops ? ops : &devlink_port_dummy_ops;
-	spin_lock_init(&devlink_port->type_lock);
-	INIT_LIST_HEAD(&devlink_port->reporter_list);
-	err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL);
-	if (err) {
-		devlink_port->registered = false;
-		return err;
-	}
-
-	INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
-	devlink_port_type_warn_schedule(devlink_port);
-	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devl_port_register_with_ops);
-
-/**
- *	devlink_port_register_with_ops - Register devlink port
- *
- *	@devlink: devlink
- *	@devlink_port: devlink port
- *	@port_index: driver-specific numerical identifier of the port
- *	@ops: port ops
- *
- *	Register devlink port with provided port index. User can use
- *	any indexing, even hw-related one. devlink_port structure
- *	is convenient to be embedded inside user driver private structure.
- *	Note that the caller should take care of zeroing the devlink_port
- *	structure.
- *
- *	Context: Takes and release devlink->lock <mutex>.
- */
-int devlink_port_register_with_ops(struct devlink *devlink,
-				   struct devlink_port *devlink_port,
-				   unsigned int port_index,
-				   const struct devlink_port_ops *ops)
-{
-	int err;
-
-	devl_lock(devlink);
-	err = devl_port_register_with_ops(devlink, devlink_port,
-					  port_index, ops);
-	devl_unlock(devlink);
-	return err;
-}
-EXPORT_SYMBOL_GPL(devlink_port_register_with_ops);
-
-/**
- * devl_port_unregister() - Unregister devlink port
- *
- * @devlink_port: devlink port
- */
-void devl_port_unregister(struct devlink_port *devlink_port)
-{
-	lockdep_assert_held(&devlink_port->devlink->lock);
-	WARN_ON(devlink_port->type != DEVLINK_PORT_TYPE_NOTSET);
-
-	devlink_port_type_warn_cancel(devlink_port);
-	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
-	xa_erase(&devlink_port->devlink->ports, devlink_port->index);
-	WARN_ON(!list_empty(&devlink_port->reporter_list));
-	devlink_port->registered = false;
-}
-EXPORT_SYMBOL_GPL(devl_port_unregister);
-
-/**
- *	devlink_port_unregister - Unregister devlink port
- *
- *	@devlink_port: devlink port
- *
- *	Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_port_unregister(struct devlink_port *devlink_port)
-{
-	struct devlink *devlink = devlink_port->devlink;
-
-	devl_lock(devlink);
-	devl_port_unregister(devlink_port);
-	devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_port_unregister);
-
-static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port,
-					    struct net_device *netdev)
-{
-	const struct net_device_ops *ops = netdev->netdev_ops;
-
-	/* If driver registers devlink port, it should set devlink port
-	 * attributes accordingly so the compat functions are called
-	 * and the original ops are not used.
-	 */
-	if (ops->ndo_get_phys_port_name) {
-		/* Some drivers use the same set of ndos for netdevs
-		 * that have devlink_port registered and also for
-		 * those who don't. Make sure that ndo_get_phys_port_name
-		 * returns -EOPNOTSUPP here in case it is defined.
-		 * Warn if not.
-		 */
-		char name[IFNAMSIZ];
-		int err;
-
-		err = ops->ndo_get_phys_port_name(netdev, name, sizeof(name));
-		WARN_ON(err != -EOPNOTSUPP);
-	}
-	if (ops->ndo_get_port_parent_id) {
-		/* Some drivers use the same set of ndos for netdevs
-		 * that have devlink_port registered and also for
-		 * those who don't. Make sure that ndo_get_port_parent_id
-		 * returns -EOPNOTSUPP here in case it is defined.
-		 * Warn if not.
-		 */
-		struct netdev_phys_item_id ppid;
-		int err;
-
-		err = ops->ndo_get_port_parent_id(netdev, &ppid);
-		WARN_ON(err != -EOPNOTSUPP);
-	}
-}
-
-static void __devlink_port_type_set(struct devlink_port *devlink_port,
-				    enum devlink_port_type type,
-				    void *type_dev)
-{
-	struct net_device *netdev = type_dev;
-
-	ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
-
-	if (type == DEVLINK_PORT_TYPE_NOTSET) {
-		devlink_port_type_warn_schedule(devlink_port);
-	} else {
-		devlink_port_type_warn_cancel(devlink_port);
-		if (type == DEVLINK_PORT_TYPE_ETH && netdev)
-			devlink_port_type_netdev_checks(devlink_port, netdev);
-	}
-
-	spin_lock_bh(&devlink_port->type_lock);
-	devlink_port->type = type;
-	switch (type) {
-	case DEVLINK_PORT_TYPE_ETH:
-		devlink_port->type_eth.netdev = netdev;
-		if (netdev) {
-			ASSERT_RTNL();
-			devlink_port->type_eth.ifindex = netdev->ifindex;
-			BUILD_BUG_ON(sizeof(devlink_port->type_eth.ifname) !=
-				     sizeof(netdev->name));
-			strcpy(devlink_port->type_eth.ifname, netdev->name);
-		}
-		break;
-	case DEVLINK_PORT_TYPE_IB:
-		devlink_port->type_ib.ibdev = type_dev;
-		break;
-	default:
-		break;
-	}
-	spin_unlock_bh(&devlink_port->type_lock);
-	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
-}
-
-/**
- *	devlink_port_type_eth_set - Set port type to Ethernet
- *
- *	@devlink_port: devlink port
- *
- *	If driver is calling this, most likely it is doing something wrong.
- */
-void devlink_port_type_eth_set(struct devlink_port *devlink_port)
-{
-	dev_warn(devlink_port->devlink->dev,
-		 "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n",
-		 devlink_port->index);
-	__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, NULL);
-}
-EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
-
-/**
- *	devlink_port_type_ib_set - Set port type to InfiniBand
- *
- *	@devlink_port: devlink port
- *	@ibdev: related IB device
- */
-void devlink_port_type_ib_set(struct devlink_port *devlink_port,
-			      struct ib_device *ibdev)
-{
-	__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev);
-}
-EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
-
-/**
- *	devlink_port_type_clear - Clear port type
- *
- *	@devlink_port: devlink port
- *
- *	If driver is calling this for clearing Ethernet type, most likely
- *	it is doing something wrong.
- */
-void devlink_port_type_clear(struct devlink_port *devlink_port)
-{
-	if (devlink_port->type == DEVLINK_PORT_TYPE_ETH)
-		dev_warn(devlink_port->devlink->dev,
-			 "devlink port type for port %d cleared without a software interface reference, device type not supported by the kernel?\n",
-			 devlink_port->index);
-	__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL);
-}
-EXPORT_SYMBOL_GPL(devlink_port_type_clear);
-
-int devlink_port_netdevice_event(struct notifier_block *nb,
-				 unsigned long event, void *ptr)
-{
-	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
-	struct devlink_port *devlink_port = netdev->devlink_port;
-	struct devlink *devlink;
-
-	if (!devlink_port)
-		return NOTIFY_OK;
-	devlink = devlink_port->devlink;
-
-	switch (event) {
-	case NETDEV_POST_INIT:
-		/* Set the type but not netdev pointer. It is going to be set
-		 * later on by NETDEV_REGISTER event. Happens once during
-		 * netdevice register
-		 */
-		__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH,
-					NULL);
-		break;
-	case NETDEV_REGISTER:
-	case NETDEV_CHANGENAME:
-		if (devlink_net(devlink) != dev_net(netdev))
-			return NOTIFY_OK;
-		/* Set the netdev on top of previously set type. Note this
-		 * event happens also during net namespace change so here
-		 * we take into account netdev pointer appearing in this
-		 * namespace.
-		 */
-		__devlink_port_type_set(devlink_port, devlink_port->type,
-					netdev);
-		break;
-	case NETDEV_UNREGISTER:
-		if (devlink_net(devlink) != dev_net(netdev))
-			return NOTIFY_OK;
-		/* Clear netdev pointer, but not the type. This event happens
-		 * also during net namespace change so we need to clear
-		 * pointer to netdev that is going to another net namespace.
-		 */
-		__devlink_port_type_set(devlink_port, devlink_port->type,
-					NULL);
-		break;
-	case NETDEV_PRE_UNINIT:
-		/* Clear the type and the netdev pointer. Happens one during
-		 * netdevice unregister.
-		 */
-		__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET,
-					NULL);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
-				    enum devlink_port_flavour flavour)
-{
-	struct devlink_port_attrs *attrs = &devlink_port->attrs;
-
-	devlink_port->attrs_set = true;
-	attrs->flavour = flavour;
-	if (attrs->switch_id.id_len) {
-		devlink_port->switch_port = true;
-		if (WARN_ON(attrs->switch_id.id_len > MAX_PHYS_ITEM_ID_LEN))
-			attrs->switch_id.id_len = MAX_PHYS_ITEM_ID_LEN;
-	} else {
-		devlink_port->switch_port = false;
-	}
-	return 0;
-}
-
-/**
- *	devlink_port_attrs_set - Set port attributes
- *
- *	@devlink_port: devlink port
- *	@attrs: devlink port attrs
- */
-void devlink_port_attrs_set(struct devlink_port *devlink_port,
-			    struct devlink_port_attrs *attrs)
-{
-	int ret;
-
-	ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
-	devlink_port->attrs = *attrs;
-	ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
-	if (ret)
-		return;
-	WARN_ON(attrs->splittable && attrs->split);
-}
-EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
-
-/**
- *	devlink_port_attrs_pci_pf_set - Set PCI PF port attributes
- *
- *	@devlink_port: devlink port
- *	@controller: associated controller number for the devlink port instance
- *	@pf: associated PF for the devlink port instance
- *	@external: indicates if the port is for an external controller
- */
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
-				   u16 pf, bool external)
-{
-	struct devlink_port_attrs *attrs = &devlink_port->attrs;
-	int ret;
-
-	ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
-	ret = __devlink_port_attrs_set(devlink_port,
-				       DEVLINK_PORT_FLAVOUR_PCI_PF);
-	if (ret)
-		return;
-	attrs->pci_pf.controller = controller;
-	attrs->pci_pf.pf = pf;
-	attrs->pci_pf.external = external;
-}
-EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
-
-/**
- *	devlink_port_attrs_pci_vf_set - Set PCI VF port attributes
- *
- *	@devlink_port: devlink port
- *	@controller: associated controller number for the devlink port instance
- *	@pf: associated PF for the devlink port instance
- *	@vf: associated VF of a PF for the devlink port instance
- *	@external: indicates if the port is for an external controller
- */
-void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
-				   u16 pf, u16 vf, bool external)
-{
-	struct devlink_port_attrs *attrs = &devlink_port->attrs;
-	int ret;
-
-	ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
-	ret = __devlink_port_attrs_set(devlink_port,
-				       DEVLINK_PORT_FLAVOUR_PCI_VF);
-	if (ret)
-		return;
-	attrs->pci_vf.controller = controller;
-	attrs->pci_vf.pf = pf;
-	attrs->pci_vf.vf = vf;
-	attrs->pci_vf.external = external;
-}
-EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
-
-/**
- *	devlink_port_attrs_pci_sf_set - Set PCI SF port attributes
- *
- *	@devlink_port: devlink port
- *	@controller: associated controller number for the devlink port instance
- *	@pf: associated PF for the devlink port instance
- *	@sf: associated SF of a PF for the devlink port instance
- *	@external: indicates if the port is for an external controller
- */
-void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller,
-				   u16 pf, u32 sf, bool external)
-{
-	struct devlink_port_attrs *attrs = &devlink_port->attrs;
-	int ret;
-
-	ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
-	ret = __devlink_port_attrs_set(devlink_port,
-				       DEVLINK_PORT_FLAVOUR_PCI_SF);
-	if (ret)
-		return;
-	attrs->pci_sf.controller = controller;
-	attrs->pci_sf.pf = pf;
-	attrs->pci_sf.sf = sf;
-	attrs->pci_sf.external = external;
-}
-EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
-
 /**
  * devl_rate_node_create - create devlink rate node
  * @devlink: devlink instance
@@ -7412,92 +6006,6 @@ void devl_rate_nodes_destroy(struct devlink *devlink)
 }
 EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);
 
-/**
- *	devlink_port_linecard_set - Link port with a linecard
- *
- *	@devlink_port: devlink port
- *	@linecard: devlink linecard
- */
-void devlink_port_linecard_set(struct devlink_port *devlink_port,
-			       struct devlink_linecard *linecard)
-{
-	ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
-	devlink_port->linecard = linecard;
-}
-EXPORT_SYMBOL_GPL(devlink_port_linecard_set);
-
-static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
-					     char *name, size_t len)
-{
-	struct devlink_port_attrs *attrs = &devlink_port->attrs;
-	int n = 0;
-
-	if (!devlink_port->attrs_set)
-		return -EOPNOTSUPP;
-
-	switch (attrs->flavour) {
-	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
-		if (devlink_port->linecard)
-			n = snprintf(name, len, "l%u",
-				     devlink_port->linecard->index);
-		if (n < len)
-			n += snprintf(name + n, len - n, "p%u",
-				      attrs->phys.port_number);
-		if (n < len && attrs->split)
-			n += snprintf(name + n, len - n, "s%u",
-				      attrs->phys.split_subport_number);
-		break;
-	case DEVLINK_PORT_FLAVOUR_CPU:
-	case DEVLINK_PORT_FLAVOUR_DSA:
-	case DEVLINK_PORT_FLAVOUR_UNUSED:
-		/* As CPU and DSA ports do not have a netdevice associated
-		 * case should not ever happen.
-		 */
-		WARN_ON(1);
-		return -EINVAL;
-	case DEVLINK_PORT_FLAVOUR_PCI_PF:
-		if (attrs->pci_pf.external) {
-			n = snprintf(name, len, "c%u", attrs->pci_pf.controller);
-			if (n >= len)
-				return -EINVAL;
-			len -= n;
-			name += n;
-		}
-		n = snprintf(name, len, "pf%u", attrs->pci_pf.pf);
-		break;
-	case DEVLINK_PORT_FLAVOUR_PCI_VF:
-		if (attrs->pci_vf.external) {
-			n = snprintf(name, len, "c%u", attrs->pci_vf.controller);
-			if (n >= len)
-				return -EINVAL;
-			len -= n;
-			name += n;
-		}
-		n = snprintf(name, len, "pf%uvf%u",
-			     attrs->pci_vf.pf, attrs->pci_vf.vf);
-		break;
-	case DEVLINK_PORT_FLAVOUR_PCI_SF:
-		if (attrs->pci_sf.external) {
-			n = snprintf(name, len, "c%u", attrs->pci_sf.controller);
-			if (n >= len)
-				return -EINVAL;
-			len -= n;
-			name += n;
-		}
-		n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
-			     attrs->pci_sf.sf);
-		break;
-	case DEVLINK_PORT_FLAVOUR_VIRTUAL:
-		return -EOPNOTSUPP;
-	}
-
-	if (n >= len)
-		return -EINVAL;
-
-	return 0;
-}
-
 static int devlink_linecard_types_init(struct devlink_linecard *linecard)
 {
 	struct devlink_linecard_type *linecard_type;
@@ -9572,39 +8080,3 @@ devl_trap_policers_unregister(struct devlink *devlink,
 		devlink_trap_policer_unregister(devlink, &policers[i]);
 }
 EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
-
-int devlink_compat_phys_port_name_get(struct net_device *dev,
-				      char *name, size_t len)
-{
-	struct devlink_port *devlink_port;
-
-	/* RTNL mutex is held here which ensures that devlink_port
-	 * instance cannot disappear in the middle. No need to take
-	 * any devlink lock as only permanent values are accessed.
-	 */
-	ASSERT_RTNL();
-
-	devlink_port = dev->devlink_port;
-	if (!devlink_port)
-		return -EOPNOTSUPP;
-
-	return __devlink_port_phys_port_name_get(devlink_port, name, len);
-}
-
-int devlink_compat_switch_id_get(struct net_device *dev,
-				 struct netdev_phys_item_id *ppid)
-{
-	struct devlink_port *devlink_port;
-
-	/* Caller must hold RTNL mutex or reference to dev, which ensures that
-	 * devlink_port instance cannot disappear in the middle. No need to take
-	 * any devlink lock as only permanent values are accessed.
-	 */
-	devlink_port = dev->devlink_port;
-	if (!devlink_port || !devlink_port->switch_port)
-		return -EOPNOTSUPP;
-
-	memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid));
-
-	return 0;
-}
diff --git a/net/devlink/port.c b/net/devlink/port.c
new file mode 100644
index 000000000000..4763b42885fb
--- /dev/null
+++ b/net/devlink/port.c
@@ -0,0 +1,1515 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
+	(_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
+
+static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
+	[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
+	[DEVLINK_PORT_FN_ATTR_STATE] =
+		NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
+				 DEVLINK_PORT_FN_STATE_ACTIVE),
+	[DEVLINK_PORT_FN_ATTR_CAPS] =
+		NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
+};
+
+#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port)				\
+	WARN_ON_ONCE(!(devlink_port)->registered)
+#define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port)			\
+	WARN_ON_ONCE((devlink_port)->registered)
+
+struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
+					       unsigned int port_index)
+{
+	return xa_load(&devlink->ports, port_index);
+}
+
+struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
+						 struct nlattr **attrs)
+{
+	if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
+		u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+		struct devlink_port *devlink_port;
+
+		devlink_port = devlink_port_get_by_index(devlink, port_index);
+		if (!devlink_port)
+			return ERR_PTR(-ENODEV);
+		return devlink_port;
+	}
+	return ERR_PTR(-EINVAL);
+}
+
+struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
+						struct genl_info *info)
+{
+	return devlink_port_get_from_attrs(devlink, info->attrs);
+}
+
+static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
+				     u32 cap, bool is_enable)
+{
+	caps->selector |= cap;
+	if (is_enable)
+		caps->value |= cap;
+}
+
+static int devlink_port_fn_roce_fill(struct devlink_port *devlink_port,
+				     struct nla_bitfield32 *caps,
+				     struct netlink_ext_ack *extack)
+{
+	bool is_enable;
+	int err;
+
+	if (!devlink_port->ops->port_fn_roce_get)
+		return 0;
+
+	err = devlink_port->ops->port_fn_roce_get(devlink_port, &is_enable,
+						  extack);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+
+	devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
+	return 0;
+}
+
+static int devlink_port_fn_migratable_fill(struct devlink_port *devlink_port,
+					   struct nla_bitfield32 *caps,
+					   struct netlink_ext_ack *extack)
+{
+	bool is_enable;
+	int err;
+
+	if (!devlink_port->ops->port_fn_migratable_get ||
+	    devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+		return 0;
+
+	err = devlink_port->ops->port_fn_migratable_get(devlink_port,
+							&is_enable, extack);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+
+	devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_MIGRATABLE, is_enable);
+	return 0;
+}
+
+static int devlink_port_fn_ipsec_crypto_fill(struct devlink_port *devlink_port,
+					     struct nla_bitfield32 *caps,
+					     struct netlink_ext_ack *extack)
+{
+	bool is_enable;
+	int err;
+
+	if (!devlink_port->ops->port_fn_ipsec_crypto_get ||
+	    devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+		return 0;
+
+	err = devlink_port->ops->port_fn_ipsec_crypto_get(devlink_port, &is_enable, extack);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+
+	devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO, is_enable);
+	return 0;
+}
+
+static int devlink_port_fn_ipsec_packet_fill(struct devlink_port *devlink_port,
+					     struct nla_bitfield32 *caps,
+					     struct netlink_ext_ack *extack)
+{
+	bool is_enable;
+	int err;
+
+	if (!devlink_port->ops->port_fn_ipsec_packet_get ||
+	    devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+		return 0;
+
+	err = devlink_port->ops->port_fn_ipsec_packet_get(devlink_port, &is_enable, extack);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+
+	devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_IPSEC_PACKET, is_enable);
+	return 0;
+}
+
+static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
+				     struct sk_buff *msg,
+				     struct netlink_ext_ack *extack,
+				     bool *msg_updated)
+{
+	struct nla_bitfield32 caps = {};
+	int err;
+
+	err = devlink_port_fn_roce_fill(devlink_port, &caps, extack);
+	if (err)
+		return err;
+
+	err = devlink_port_fn_migratable_fill(devlink_port, &caps, extack);
+	if (err)
+		return err;
+
+	err = devlink_port_fn_ipsec_crypto_fill(devlink_port, &caps, extack);
+	if (err)
+		return err;
+
+	err = devlink_port_fn_ipsec_packet_fill(devlink_port, &caps, extack);
+	if (err)
+		return err;
+
+	if (!caps.selector)
+		return 0;
+	err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
+				 caps.selector);
+	if (err)
+		return err;
+
+	*msg_updated = true;
+	return 0;
+}
+
+int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port)
+{
+	if (devlink_nl_put_handle(msg, devlink_port->devlink))
+		return -EMSGSIZE;
+	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+		return -EMSGSIZE;
+	return 0;
+}
+
+size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port)
+{
+	struct devlink *devlink = devlink_port->devlink;
+
+	return nla_total_size(strlen(devlink->dev->bus->name) + 1) /* DEVLINK_ATTR_BUS_NAME */
+	     + nla_total_size(strlen(dev_name(devlink->dev)) + 1) /* DEVLINK_ATTR_DEV_NAME */
+	     + nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */
+}
+
+static int devlink_nl_port_attrs_put(struct sk_buff *msg,
+				     struct devlink_port *devlink_port)
+{
+	struct devlink_port_attrs *attrs = &devlink_port->attrs;
+
+	if (!devlink_port->attrs_set)
+		return 0;
+	if (attrs->lanes) {
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_LANES, attrs->lanes))
+			return -EMSGSIZE;
+	}
+	if (nla_put_u8(msg, DEVLINK_ATTR_PORT_SPLITTABLE, attrs->splittable))
+		return -EMSGSIZE;
+	if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
+		return -EMSGSIZE;
+	switch (devlink_port->attrs.flavour) {
+	case DEVLINK_PORT_FLAVOUR_PCI_PF:
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+				attrs->pci_pf.controller) ||
+		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_pf.pf))
+			return -EMSGSIZE;
+		if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_pf.external))
+			return -EMSGSIZE;
+		break;
+	case DEVLINK_PORT_FLAVOUR_PCI_VF:
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+				attrs->pci_vf.controller) ||
+		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_vf.pf) ||
+		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, attrs->pci_vf.vf))
+			return -EMSGSIZE;
+		if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_vf.external))
+			return -EMSGSIZE;
+		break;
+	case DEVLINK_PORT_FLAVOUR_PCI_SF:
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+				attrs->pci_sf.controller) ||
+		    nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
+				attrs->pci_sf.pf) ||
+		    nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_SF_NUMBER,
+				attrs->pci_sf.sf))
+			return -EMSGSIZE;
+		break;
+	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
+	case DEVLINK_PORT_FLAVOUR_CPU:
+	case DEVLINK_PORT_FLAVOUR_DSA:
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
+				attrs->phys.port_number))
+			return -EMSGSIZE;
+		if (!attrs->split)
+			return 0;
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
+				attrs->phys.port_number))
+			return -EMSGSIZE;
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
+				attrs->phys.split_subport_number))
+			return -EMSGSIZE;
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int devlink_port_fn_hw_addr_fill(struct devlink_port *port,
+					struct sk_buff *msg,
+					struct netlink_ext_ack *extack,
+					bool *msg_updated)
+{
+	u8 hw_addr[MAX_ADDR_LEN];
+	int hw_addr_len;
+	int err;
+
+	if (!port->ops->port_fn_hw_addr_get)
+		return 0;
+
+	err = port->ops->port_fn_hw_addr_get(port, hw_addr, &hw_addr_len,
+					     extack);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+	err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr);
+	if (err)
+		return err;
+	*msg_updated = true;
+	return 0;
+}
+
+static bool
+devlink_port_fn_state_valid(enum devlink_port_fn_state state)
+{
+	return state == DEVLINK_PORT_FN_STATE_INACTIVE ||
+	       state == DEVLINK_PORT_FN_STATE_ACTIVE;
+}
+
+static bool
+devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
+{
+	return opstate == DEVLINK_PORT_FN_OPSTATE_DETACHED ||
+	       opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
+}
+
+static int devlink_port_fn_state_fill(struct devlink_port *port,
+				      struct sk_buff *msg,
+				      struct netlink_ext_ack *extack,
+				      bool *msg_updated)
+{
+	enum devlink_port_fn_opstate opstate;
+	enum devlink_port_fn_state state;
+	int err;
+
+	if (!port->ops->port_fn_state_get)
+		return 0;
+
+	err = port->ops->port_fn_state_get(port, &state, &opstate, extack);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+	if (!devlink_port_fn_state_valid(state)) {
+		WARN_ON_ONCE(1);
+		NL_SET_ERR_MSG(extack, "Invalid state read from driver");
+		return -EINVAL;
+	}
+	if (!devlink_port_fn_opstate_valid(opstate)) {
+		WARN_ON_ONCE(1);
+		NL_SET_ERR_MSG(extack, "Invalid operational state read from driver");
+		return -EINVAL;
+	}
+	if (nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_STATE, state) ||
+	    nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_OPSTATE, opstate))
+		return -EMSGSIZE;
+	*msg_updated = true;
+	return 0;
+}
+
+static int
+devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable,
+			struct netlink_ext_ack *extack)
+{
+	return devlink_port->ops->port_fn_migratable_set(devlink_port, enable,
+							 extack);
+}
+
+static int
+devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
+			 struct netlink_ext_ack *extack)
+{
+	return devlink_port->ops->port_fn_roce_set(devlink_port, enable,
+						   extack);
+}
+
+static int
+devlink_port_fn_ipsec_crypto_set(struct devlink_port *devlink_port, bool enable,
+				 struct netlink_ext_ack *extack)
+{
+	return devlink_port->ops->port_fn_ipsec_crypto_set(devlink_port, enable, extack);
+}
+
+static int
+devlink_port_fn_ipsec_packet_set(struct devlink_port *devlink_port, bool enable,
+				 struct netlink_ext_ack *extack)
+{
+	return devlink_port->ops->port_fn_ipsec_packet_set(devlink_port, enable, extack);
+}
+
+static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
+				    const struct nlattr *attr,
+				    struct netlink_ext_ack *extack)
+{
+	struct nla_bitfield32 caps;
+	u32 caps_value;
+	int err;
+
+	caps = nla_get_bitfield32(attr);
+	caps_value = caps.value & caps.selector;
+	if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
+		err = devlink_port_fn_roce_set(devlink_port,
+					       caps_value & DEVLINK_PORT_FN_CAP_ROCE,
+					       extack);
+		if (err)
+			return err;
+	}
+	if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+		err = devlink_port_fn_mig_set(devlink_port, caps_value &
+					      DEVLINK_PORT_FN_CAP_MIGRATABLE,
+					      extack);
+		if (err)
+			return err;
+	}
+	if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO) {
+		err = devlink_port_fn_ipsec_crypto_set(devlink_port, caps_value &
+						       DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO,
+						       extack);
+		if (err)
+			return err;
+	}
+	if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_PACKET) {
+		err = devlink_port_fn_ipsec_packet_set(devlink_port, caps_value &
+						       DEVLINK_PORT_FN_CAP_IPSEC_PACKET,
+						       extack);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int
+devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
+				   struct netlink_ext_ack *extack)
+{
+	struct nlattr *function_attr;
+	bool msg_updated = false;
+	int err;
+
+	function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION);
+	if (!function_attr)
+		return -EMSGSIZE;
+
+	err = devlink_port_fn_hw_addr_fill(port, msg, extack, &msg_updated);
+	if (err)
+		goto out;
+	err = devlink_port_fn_caps_fill(port, msg, extack, &msg_updated);
+	if (err)
+		goto out;
+	err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
+out:
+	if (err || !msg_updated)
+		nla_nest_cancel(msg, function_attr);
+	else
+		nla_nest_end(msg, function_attr);
+	return err;
+}
+
+static int devlink_nl_port_fill(struct sk_buff *msg,
+				struct devlink_port *devlink_port,
+				enum devlink_command cmd, u32 portid, u32 seq,
+				int flags, struct netlink_ext_ack *extack)
+{
+	struct devlink *devlink = devlink_port->devlink;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+		goto nla_put_failure;
+
+	spin_lock_bh(&devlink_port->type_lock);
+	if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
+		goto nla_put_failure_type_locked;
+	if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
+	    nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE,
+			devlink_port->desired_type))
+		goto nla_put_failure_type_locked;
+	if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
+		if (devlink_port->type_eth.netdev &&
+		    (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
+				 devlink_port->type_eth.ifindex) ||
+		     nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
+				    devlink_port->type_eth.ifname)))
+			goto nla_put_failure_type_locked;
+	}
+	if (devlink_port->type == DEVLINK_PORT_TYPE_IB) {
+		struct ib_device *ibdev = devlink_port->type_ib.ibdev;
+
+		if (ibdev &&
+		    nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME,
+				   ibdev->name))
+			goto nla_put_failure_type_locked;
+	}
+	spin_unlock_bh(&devlink_port->type_lock);
+	if (devlink_nl_port_attrs_put(msg, devlink_port))
+		goto nla_put_failure;
+	if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
+		goto nla_put_failure;
+	if (devlink_port->linecard &&
+	    nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX,
+			devlink_port->linecard->index))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure_type_locked:
+	spin_unlock_bh(&devlink_port->type_lock);
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static void devlink_port_notify(struct devlink_port *devlink_port,
+				enum devlink_command cmd)
+{
+	struct devlink *devlink = devlink_port->devlink;
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
+
+	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+				0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static void devlink_ports_notify(struct devlink *devlink,
+				 enum devlink_command cmd)
+{
+	struct devlink_port *devlink_port;
+	unsigned long port_index;
+
+	xa_for_each(&devlink->ports, port_index, devlink_port)
+		devlink_port_notify(devlink_port, cmd);
+}
+
+void devlink_ports_notify_register(struct devlink *devlink)
+{
+	devlink_ports_notify(devlink, DEVLINK_CMD_PORT_NEW);
+}
+
+void devlink_ports_notify_unregister(struct devlink *devlink)
+{
+	devlink_ports_notify(devlink, DEVLINK_CMD_PORT_DEL);
+}
+
+int devlink_nl_port_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[1];
+	struct sk_buff *msg;
+	int err;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW,
+				   info->snd_portid, info->snd_seq, 0,
+				   info->extack);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+			     struct netlink_callback *cb, int flags)
+{
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct devlink_port *devlink_port;
+	unsigned long port_index;
+	int err = 0;
+
+	xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
+		err = devlink_nl_port_fill(msg, devlink_port,
+					   DEVLINK_CMD_NEW,
+					   NETLINK_CB(cb->skb).portid,
+					   cb->nlh->nlmsg_seq, flags,
+					   cb->extack);
+		if (err) {
+			state->idx = port_index;
+			break;
+		}
+	}
+
+	return err;
+}
+
+int devlink_nl_port_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return devlink_nl_dumpit(skb, cb, devlink_nl_port_get_dump_one);
+}
+
+static int devlink_port_type_set(struct devlink_port *devlink_port,
+				 enum devlink_port_type port_type)
+
+{
+	int err;
+
+	if (!devlink_port->ops->port_type_set)
+		return -EOPNOTSUPP;
+
+	if (port_type == devlink_port->type)
+		return 0;
+
+	err = devlink_port->ops->port_type_set(devlink_port, port_type);
+	if (err)
+		return err;
+
+	devlink_port->desired_type = port_type;
+	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+	return 0;
+}
+
+static int devlink_port_function_hw_addr_set(struct devlink_port *port,
+					     const struct nlattr *attr,
+					     struct netlink_ext_ack *extack)
+{
+	const u8 *hw_addr;
+	int hw_addr_len;
+
+	hw_addr = nla_data(attr);
+	hw_addr_len = nla_len(attr);
+	if (hw_addr_len > MAX_ADDR_LEN) {
+		NL_SET_ERR_MSG(extack, "Port function hardware address too long");
+		return -EINVAL;
+	}
+	if (port->type == DEVLINK_PORT_TYPE_ETH) {
+		if (hw_addr_len != ETH_ALEN) {
+			NL_SET_ERR_MSG(extack, "Address must be 6 bytes for Ethernet device");
+			return -EINVAL;
+		}
+		if (!is_unicast_ether_addr(hw_addr)) {
+			NL_SET_ERR_MSG(extack, "Non-unicast hardware address unsupported");
+			return -EINVAL;
+		}
+	}
+
+	return port->ops->port_fn_hw_addr_set(port, hw_addr, hw_addr_len,
+					      extack);
+}
+
+static int devlink_port_fn_state_set(struct devlink_port *port,
+				     const struct nlattr *attr,
+				     struct netlink_ext_ack *extack)
+{
+	enum devlink_port_fn_state state;
+
+	state = nla_get_u8(attr);
+	return port->ops->port_fn_state_set(port, state, extack);
+}
+
+static int devlink_port_function_validate(struct devlink_port *devlink_port,
+					  struct nlattr **tb,
+					  struct netlink_ext_ack *extack)
+{
+	const struct devlink_port_ops *ops = devlink_port->ops;
+	struct nlattr *attr;
+
+	if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
+	    !ops->port_fn_hw_addr_set) {
+		NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+				    "Port doesn't support function attributes");
+		return -EOPNOTSUPP;
+	}
+	if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
+		NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+				    "Function does not support state setting");
+		return -EOPNOTSUPP;
+	}
+	attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+	if (attr) {
+		struct nla_bitfield32 caps;
+
+		caps = nla_get_bitfield32(attr);
+		if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE &&
+		    !ops->port_fn_roce_set) {
+			NL_SET_ERR_MSG_ATTR(extack, attr,
+					    "Port doesn't support RoCE function attribute");
+			return -EOPNOTSUPP;
+		}
+		if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+			if (!ops->port_fn_migratable_set) {
+				NL_SET_ERR_MSG_ATTR(extack, attr,
+						    "Port doesn't support migratable function attribute");
+				return -EOPNOTSUPP;
+			}
+			if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+				NL_SET_ERR_MSG_ATTR(extack, attr,
+						    "migratable function attribute supported for VFs only");
+				return -EOPNOTSUPP;
+			}
+		}
+		if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO) {
+			if (!ops->port_fn_ipsec_crypto_set) {
+				NL_SET_ERR_MSG_ATTR(extack, attr,
+						    "Port doesn't support ipsec_crypto function attribute");
+				return -EOPNOTSUPP;
+			}
+			if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+				NL_SET_ERR_MSG_ATTR(extack, attr,
+						    "ipsec_crypto function attribute supported for VFs only");
+				return -EOPNOTSUPP;
+			}
+		}
+		if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_PACKET) {
+			if (!ops->port_fn_ipsec_packet_set) {
+				NL_SET_ERR_MSG_ATTR(extack, attr,
+						    "Port doesn't support ipsec_packet function attribute");
+				return -EOPNOTSUPP;
+			}
+			if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+				NL_SET_ERR_MSG_ATTR(extack, attr,
+						    "ipsec_packet function attribute supported for VFs only");
+				return -EOPNOTSUPP;
+			}
+		}
+	}
+	return 0;
+}
+
+static int devlink_port_function_set(struct devlink_port *port,
+				     const struct nlattr *attr,
+				     struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr,
+			       devlink_function_nl_policy, extack);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Fail to parse port function attributes");
+		return err;
+	}
+
+	err = devlink_port_function_validate(port, tb, extack);
+	if (err)
+		return err;
+
+	attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
+	if (attr) {
+		err = devlink_port_function_hw_addr_set(port, attr, extack);
+		if (err)
+			return err;
+	}
+
+	attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+	if (attr) {
+		err = devlink_port_fn_caps_set(port, attr, extack);
+		if (err)
+			return err;
+	}
+
+	/* Keep this as the last function attribute set, so that when
+	 * multiple port function attributes are set along with state,
+	 * Those can be applied first before activating the state.
+	 */
+	attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
+	if (attr)
+		err = devlink_port_fn_state_set(port, attr, extack);
+
+	if (!err)
+		devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
+	return err;
+}
+
+int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[1];
+	int err;
+
+	if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
+		enum devlink_port_type port_type;
+
+		port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
+		err = devlink_port_type_set(devlink_port, port_type);
+		if (err)
+			return err;
+	}
+
+	if (info->attrs[DEVLINK_ATTR_PORT_FUNCTION]) {
+		struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
+		struct netlink_ext_ack *extack = info->extack;
+
+		err = devlink_port_function_set(devlink_port, attr, extack);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[1];
+	struct devlink *devlink = info->user_ptr[0];
+	u32 count;
+
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_SPLIT_COUNT))
+		return -EINVAL;
+	if (!devlink_port->ops->port_split)
+		return -EOPNOTSUPP;
+
+	count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
+
+	if (!devlink_port->attrs.splittable) {
+		/* Split ports cannot be split. */
+		if (devlink_port->attrs.split)
+			NL_SET_ERR_MSG(info->extack, "Port cannot be split further");
+		else
+			NL_SET_ERR_MSG(info->extack, "Port cannot be split");
+		return -EINVAL;
+	}
+
+	if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) {
+		NL_SET_ERR_MSG(info->extack, "Invalid split count");
+		return -EINVAL;
+	}
+
+	return devlink_port->ops->port_split(devlink, devlink_port, count,
+					     info->extack);
+}
+
+int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[1];
+	struct devlink *devlink = info->user_ptr[0];
+
+	if (!devlink_port->ops->port_unsplit)
+		return -EOPNOTSUPP;
+	return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack);
+}
+
+int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink_port_new_attrs new_attrs = {};
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_port *devlink_port;
+	struct sk_buff *msg;
+	int err;
+
+	if (!devlink->ops->port_new)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] ||
+	    !info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) {
+		NL_SET_ERR_MSG(extack, "Port flavour or PCI PF are not specified");
+		return -EINVAL;
+	}
+	new_attrs.flavour = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_FLAVOUR]);
+	new_attrs.pfnum =
+		nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]);
+
+	if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+		/* Port index of the new port being created by driver. */
+		new_attrs.port_index =
+			nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+		new_attrs.port_index_valid = true;
+	}
+	if (info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]) {
+		new_attrs.controller =
+			nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]);
+		new_attrs.controller_valid = true;
+	}
+	if (new_attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_SF &&
+	    info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]) {
+		new_attrs.sfnum = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]);
+		new_attrs.sfnum_valid = true;
+	}
+
+	err = devlink->ops->port_new(devlink, &new_attrs,
+				     extack, &devlink_port);
+	if (err)
+		return err;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto err_out_port_del;
+	}
+	err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
+				   info->snd_portid, info->snd_seq, 0, NULL);
+	if (WARN_ON_ONCE(err))
+		goto err_out_msg_free;
+	err = genlmsg_reply(msg, info);
+	if (err)
+		goto err_out_port_del;
+	return 0;
+
+err_out_msg_free:
+	nlmsg_free(msg);
+err_out_port_del:
+	devlink_port->ops->port_del(devlink, devlink_port, NULL);
+	return err;
+}
+
+int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[1];
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+
+	if (!devlink_port->ops->port_del)
+		return -EOPNOTSUPP;
+
+	return devlink_port->ops->port_del(devlink, devlink_port, extack);
+}
+
+static void devlink_port_type_warn(struct work_struct *work)
+{
+	struct devlink_port *port = container_of(to_delayed_work(work),
+						 struct devlink_port,
+						 type_warn_dw);
+	dev_warn(port->devlink->dev, "Type was not set for devlink port.");
+}
+
+static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
+{
+	/* Ignore CPU and DSA flavours. */
+	return devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
+	       devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA &&
+	       devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_UNUSED;
+}
+
+#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 3600)
+
+static void devlink_port_type_warn_schedule(struct devlink_port *devlink_port)
+{
+	if (!devlink_port_type_should_warn(devlink_port))
+		return;
+	/* Schedule a work to WARN in case driver does not set port
+	 * type within timeout.
+	 */
+	schedule_delayed_work(&devlink_port->type_warn_dw,
+			      DEVLINK_PORT_TYPE_WARN_TIMEOUT);
+}
+
+static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
+{
+	if (!devlink_port_type_should_warn(devlink_port))
+		return;
+	cancel_delayed_work_sync(&devlink_port->type_warn_dw);
+}
+
+/**
+ * devlink_port_init() - Init devlink port
+ *
+ * @devlink: devlink
+ * @devlink_port: devlink port
+ *
+ * Initialize essential stuff that is needed for functions
+ * that may be called before devlink port registration.
+ * Call to this function is optional and not needed
+ * in case the driver does not use such functions.
+ */
+void devlink_port_init(struct devlink *devlink,
+		       struct devlink_port *devlink_port)
+{
+	if (devlink_port->initialized)
+		return;
+	devlink_port->devlink = devlink;
+	INIT_LIST_HEAD(&devlink_port->region_list);
+	devlink_port->initialized = true;
+}
+EXPORT_SYMBOL_GPL(devlink_port_init);
+
+/**
+ * devlink_port_fini() - Deinitialize devlink port
+ *
+ * @devlink_port: devlink port
+ *
+ * Deinitialize essential stuff that is in use for functions
+ * that may be called after devlink port unregistration.
+ * Call to this function is optional and not needed
+ * in case the driver does not use such functions.
+ */
+void devlink_port_fini(struct devlink_port *devlink_port)
+{
+	WARN_ON(!list_empty(&devlink_port->region_list));
+}
+EXPORT_SYMBOL_GPL(devlink_port_fini);
+
+static const struct devlink_port_ops devlink_port_dummy_ops = {};
+
+/**
+ * devl_port_register_with_ops() - Register devlink port
+ *
+ * @devlink: devlink
+ * @devlink_port: devlink port
+ * @port_index: driver-specific numerical identifier of the port
+ * @ops: port ops
+ *
+ * Register devlink port with provided port index. User can use
+ * any indexing, even hw-related one. devlink_port structure
+ * is convenient to be embedded inside user driver private structure.
+ * Note that the caller should take care of zeroing the devlink_port
+ * structure.
+ */
+int devl_port_register_with_ops(struct devlink *devlink,
+				struct devlink_port *devlink_port,
+				unsigned int port_index,
+				const struct devlink_port_ops *ops)
+{
+	int err;
+
+	devl_assert_locked(devlink);
+
+	ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+	devlink_port_init(devlink, devlink_port);
+	devlink_port->registered = true;
+	devlink_port->index = port_index;
+	devlink_port->ops = ops ? ops : &devlink_port_dummy_ops;
+	spin_lock_init(&devlink_port->type_lock);
+	INIT_LIST_HEAD(&devlink_port->reporter_list);
+	err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL);
+	if (err) {
+		devlink_port->registered = false;
+		return err;
+	}
+
+	INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
+	devlink_port_type_warn_schedule(devlink_port);
+	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devl_port_register_with_ops);
+
+/**
+ *	devlink_port_register_with_ops - Register devlink port
+ *
+ *	@devlink: devlink
+ *	@devlink_port: devlink port
+ *	@port_index: driver-specific numerical identifier of the port
+ *	@ops: port ops
+ *
+ *	Register devlink port with provided port index. User can use
+ *	any indexing, even hw-related one. devlink_port structure
+ *	is convenient to be embedded inside user driver private structure.
+ *	Note that the caller should take care of zeroing the devlink_port
+ *	structure.
+ *
+ *	Context: Takes and release devlink->lock <mutex>.
+ */
+int devlink_port_register_with_ops(struct devlink *devlink,
+				   struct devlink_port *devlink_port,
+				   unsigned int port_index,
+				   const struct devlink_port_ops *ops)
+{
+	int err;
+
+	devl_lock(devlink);
+	err = devl_port_register_with_ops(devlink, devlink_port,
+					  port_index, ops);
+	devl_unlock(devlink);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_port_register_with_ops);
+
+/**
+ * devl_port_unregister() - Unregister devlink port
+ *
+ * @devlink_port: devlink port
+ */
+void devl_port_unregister(struct devlink_port *devlink_port)
+{
+	lockdep_assert_held(&devlink_port->devlink->lock);
+	WARN_ON(devlink_port->type != DEVLINK_PORT_TYPE_NOTSET);
+
+	devlink_port_type_warn_cancel(devlink_port);
+	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
+	xa_erase(&devlink_port->devlink->ports, devlink_port->index);
+	WARN_ON(!list_empty(&devlink_port->reporter_list));
+	devlink_port->registered = false;
+}
+EXPORT_SYMBOL_GPL(devl_port_unregister);
+
+/**
+ *	devlink_port_unregister - Unregister devlink port
+ *
+ *	@devlink_port: devlink port
+ *
+ *	Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_port_unregister(struct devlink_port *devlink_port)
+{
+	struct devlink *devlink = devlink_port->devlink;
+
+	devl_lock(devlink);
+	devl_port_unregister(devlink_port);
+	devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_port_unregister);
+
+static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port,
+					    struct net_device *netdev)
+{
+	const struct net_device_ops *ops = netdev->netdev_ops;
+
+	/* If driver registers devlink port, it should set devlink port
+	 * attributes accordingly so the compat functions are called
+	 * and the original ops are not used.
+	 */
+	if (ops->ndo_get_phys_port_name) {
+		/* Some drivers use the same set of ndos for netdevs
+		 * that have devlink_port registered and also for
+		 * those who don't. Make sure that ndo_get_phys_port_name
+		 * returns -EOPNOTSUPP here in case it is defined.
+		 * Warn if not.
+		 */
+		char name[IFNAMSIZ];
+		int err;
+
+		err = ops->ndo_get_phys_port_name(netdev, name, sizeof(name));
+		WARN_ON(err != -EOPNOTSUPP);
+	}
+	if (ops->ndo_get_port_parent_id) {
+		/* Some drivers use the same set of ndos for netdevs
+		 * that have devlink_port registered and also for
+		 * those who don't. Make sure that ndo_get_port_parent_id
+		 * returns -EOPNOTSUPP here in case it is defined.
+		 * Warn if not.
+		 */
+		struct netdev_phys_item_id ppid;
+		int err;
+
+		err = ops->ndo_get_port_parent_id(netdev, &ppid);
+		WARN_ON(err != -EOPNOTSUPP);
+	}
+}
+
+static void __devlink_port_type_set(struct devlink_port *devlink_port,
+				    enum devlink_port_type type,
+				    void *type_dev)
+{
+	struct net_device *netdev = type_dev;
+
+	ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
+
+	if (type == DEVLINK_PORT_TYPE_NOTSET) {
+		devlink_port_type_warn_schedule(devlink_port);
+	} else {
+		devlink_port_type_warn_cancel(devlink_port);
+		if (type == DEVLINK_PORT_TYPE_ETH && netdev)
+			devlink_port_type_netdev_checks(devlink_port, netdev);
+	}
+
+	spin_lock_bh(&devlink_port->type_lock);
+	devlink_port->type = type;
+	switch (type) {
+	case DEVLINK_PORT_TYPE_ETH:
+		devlink_port->type_eth.netdev = netdev;
+		if (netdev) {
+			ASSERT_RTNL();
+			devlink_port->type_eth.ifindex = netdev->ifindex;
+			BUILD_BUG_ON(sizeof(devlink_port->type_eth.ifname) !=
+				     sizeof(netdev->name));
+			strcpy(devlink_port->type_eth.ifname, netdev->name);
+		}
+		break;
+	case DEVLINK_PORT_TYPE_IB:
+		devlink_port->type_ib.ibdev = type_dev;
+		break;
+	default:
+		break;
+	}
+	spin_unlock_bh(&devlink_port->type_lock);
+	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+}
+
+/**
+ *	devlink_port_type_eth_set - Set port type to Ethernet
+ *
+ *	@devlink_port: devlink port
+ *
+ *	If driver is calling this, most likely it is doing something wrong.
+ */
+void devlink_port_type_eth_set(struct devlink_port *devlink_port)
+{
+	dev_warn(devlink_port->devlink->dev,
+		 "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n",
+		 devlink_port->index);
+	__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, NULL);
+}
+EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
+
+/**
+ *	devlink_port_type_ib_set - Set port type to InfiniBand
+ *
+ *	@devlink_port: devlink port
+ *	@ibdev: related IB device
+ */
+void devlink_port_type_ib_set(struct devlink_port *devlink_port,
+			      struct ib_device *ibdev)
+{
+	__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev);
+}
+EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
+
+/**
+ *	devlink_port_type_clear - Clear port type
+ *
+ *	@devlink_port: devlink port
+ *
+ *	If driver is calling this for clearing Ethernet type, most likely
+ *	it is doing something wrong.
+ */
+void devlink_port_type_clear(struct devlink_port *devlink_port)
+{
+	if (devlink_port->type == DEVLINK_PORT_TYPE_ETH)
+		dev_warn(devlink_port->devlink->dev,
+			 "devlink port type for port %d cleared without a software interface reference, device type not supported by the kernel?\n",
+			 devlink_port->index);
+	__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL);
+}
+EXPORT_SYMBOL_GPL(devlink_port_type_clear);
+
+int devlink_port_netdevice_event(struct notifier_block *nb,
+				 unsigned long event, void *ptr)
+{
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct devlink_port *devlink_port = netdev->devlink_port;
+	struct devlink *devlink;
+
+	if (!devlink_port)
+		return NOTIFY_OK;
+	devlink = devlink_port->devlink;
+
+	switch (event) {
+	case NETDEV_POST_INIT:
+		/* Set the type but not netdev pointer. It is going to be set
+		 * later on by NETDEV_REGISTER event. Happens once during
+		 * netdevice register
+		 */
+		__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH,
+					NULL);
+		break;
+	case NETDEV_REGISTER:
+	case NETDEV_CHANGENAME:
+		if (devlink_net(devlink) != dev_net(netdev))
+			return NOTIFY_OK;
+		/* Set the netdev on top of previously set type. Note this
+		 * event happens also during net namespace change so here
+		 * we take into account netdev pointer appearing in this
+		 * namespace.
+		 */
+		__devlink_port_type_set(devlink_port, devlink_port->type,
+					netdev);
+		break;
+	case NETDEV_UNREGISTER:
+		if (devlink_net(devlink) != dev_net(netdev))
+			return NOTIFY_OK;
+		/* Clear netdev pointer, but not the type. This event happens
+		 * also during net namespace change so we need to clear
+		 * pointer to netdev that is going to another net namespace.
+		 */
+		__devlink_port_type_set(devlink_port, devlink_port->type,
+					NULL);
+		break;
+	case NETDEV_PRE_UNINIT:
+		/* Clear the type and the netdev pointer. Happens one during
+		 * netdevice unregister.
+		 */
+		__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET,
+					NULL);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
+				    enum devlink_port_flavour flavour)
+{
+	struct devlink_port_attrs *attrs = &devlink_port->attrs;
+
+	devlink_port->attrs_set = true;
+	attrs->flavour = flavour;
+	if (attrs->switch_id.id_len) {
+		devlink_port->switch_port = true;
+		if (WARN_ON(attrs->switch_id.id_len > MAX_PHYS_ITEM_ID_LEN))
+			attrs->switch_id.id_len = MAX_PHYS_ITEM_ID_LEN;
+	} else {
+		devlink_port->switch_port = false;
+	}
+	return 0;
+}
+
+/**
+ *	devlink_port_attrs_set - Set port attributes
+ *
+ *	@devlink_port: devlink port
+ *	@attrs: devlink port attrs
+ */
+void devlink_port_attrs_set(struct devlink_port *devlink_port,
+			    struct devlink_port_attrs *attrs)
+{
+	int ret;
+
+	ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+	devlink_port->attrs = *attrs;
+	ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
+	if (ret)
+		return;
+	WARN_ON(attrs->splittable && attrs->split);
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
+
+/**
+ *	devlink_port_attrs_pci_pf_set - Set PCI PF port attributes
+ *
+ *	@devlink_port: devlink port
+ *	@controller: associated controller number for the devlink port instance
+ *	@pf: associated PF for the devlink port instance
+ *	@external: indicates if the port is for an external controller
+ */
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
+				   u16 pf, bool external)
+{
+	struct devlink_port_attrs *attrs = &devlink_port->attrs;
+	int ret;
+
+	ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+	ret = __devlink_port_attrs_set(devlink_port,
+				       DEVLINK_PORT_FLAVOUR_PCI_PF);
+	if (ret)
+		return;
+	attrs->pci_pf.controller = controller;
+	attrs->pci_pf.pf = pf;
+	attrs->pci_pf.external = external;
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
+
+/**
+ *	devlink_port_attrs_pci_vf_set - Set PCI VF port attributes
+ *
+ *	@devlink_port: devlink port
+ *	@controller: associated controller number for the devlink port instance
+ *	@pf: associated PF for the devlink port instance
+ *	@vf: associated VF of a PF for the devlink port instance
+ *	@external: indicates if the port is for an external controller
+ */
+void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
+				   u16 pf, u16 vf, bool external)
+{
+	struct devlink_port_attrs *attrs = &devlink_port->attrs;
+	int ret;
+
+	ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+	ret = __devlink_port_attrs_set(devlink_port,
+				       DEVLINK_PORT_FLAVOUR_PCI_VF);
+	if (ret)
+		return;
+	attrs->pci_vf.controller = controller;
+	attrs->pci_vf.pf = pf;
+	attrs->pci_vf.vf = vf;
+	attrs->pci_vf.external = external;
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
+
+/**
+ *	devlink_port_attrs_pci_sf_set - Set PCI SF port attributes
+ *
+ *	@devlink_port: devlink port
+ *	@controller: associated controller number for the devlink port instance
+ *	@pf: associated PF for the devlink port instance
+ *	@sf: associated SF of a PF for the devlink port instance
+ *	@external: indicates if the port is for an external controller
+ */
+void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller,
+				   u16 pf, u32 sf, bool external)
+{
+	struct devlink_port_attrs *attrs = &devlink_port->attrs;
+	int ret;
+
+	ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+	ret = __devlink_port_attrs_set(devlink_port,
+				       DEVLINK_PORT_FLAVOUR_PCI_SF);
+	if (ret)
+		return;
+	attrs->pci_sf.controller = controller;
+	attrs->pci_sf.pf = pf;
+	attrs->pci_sf.sf = sf;
+	attrs->pci_sf.external = external;
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
+
+/**
+ *	devlink_port_linecard_set - Link port with a linecard
+ *
+ *	@devlink_port: devlink port
+ *	@linecard: devlink linecard
+ */
+void devlink_port_linecard_set(struct devlink_port *devlink_port,
+			       struct devlink_linecard *linecard)
+{
+	ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+	devlink_port->linecard = linecard;
+}
+EXPORT_SYMBOL_GPL(devlink_port_linecard_set);
+
+static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
+					     char *name, size_t len)
+{
+	struct devlink_port_attrs *attrs = &devlink_port->attrs;
+	int n = 0;
+
+	if (!devlink_port->attrs_set)
+		return -EOPNOTSUPP;
+
+	switch (attrs->flavour) {
+	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
+		if (devlink_port->linecard)
+			n = snprintf(name, len, "l%u",
+				     devlink_port->linecard->index);
+		if (n < len)
+			n += snprintf(name + n, len - n, "p%u",
+				      attrs->phys.port_number);
+		if (n < len && attrs->split)
+			n += snprintf(name + n, len - n, "s%u",
+				      attrs->phys.split_subport_number);
+		break;
+	case DEVLINK_PORT_FLAVOUR_CPU:
+	case DEVLINK_PORT_FLAVOUR_DSA:
+	case DEVLINK_PORT_FLAVOUR_UNUSED:
+		/* As CPU and DSA ports do not have a netdevice associated
+		 * case should not ever happen.
+		 */
+		WARN_ON(1);
+		return -EINVAL;
+	case DEVLINK_PORT_FLAVOUR_PCI_PF:
+		if (attrs->pci_pf.external) {
+			n = snprintf(name, len, "c%u", attrs->pci_pf.controller);
+			if (n >= len)
+				return -EINVAL;
+			len -= n;
+			name += n;
+		}
+		n = snprintf(name, len, "pf%u", attrs->pci_pf.pf);
+		break;
+	case DEVLINK_PORT_FLAVOUR_PCI_VF:
+		if (attrs->pci_vf.external) {
+			n = snprintf(name, len, "c%u", attrs->pci_vf.controller);
+			if (n >= len)
+				return -EINVAL;
+			len -= n;
+			name += n;
+		}
+		n = snprintf(name, len, "pf%uvf%u",
+			     attrs->pci_vf.pf, attrs->pci_vf.vf);
+		break;
+	case DEVLINK_PORT_FLAVOUR_PCI_SF:
+		if (attrs->pci_sf.external) {
+			n = snprintf(name, len, "c%u", attrs->pci_sf.controller);
+			if (n >= len)
+				return -EINVAL;
+			len -= n;
+			name += n;
+		}
+		n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
+			     attrs->pci_sf.sf);
+		break;
+	case DEVLINK_PORT_FLAVOUR_VIRTUAL:
+		return -EOPNOTSUPP;
+	}
+
+	if (n >= len)
+		return -EINVAL;
+
+	return 0;
+}
+
+int devlink_compat_phys_port_name_get(struct net_device *dev,
+				      char *name, size_t len)
+{
+	struct devlink_port *devlink_port;
+
+	/* RTNL mutex is held here which ensures that devlink_port
+	 * instance cannot disappear in the middle. No need to take
+	 * any devlink lock as only permanent values are accessed.
+	 */
+	ASSERT_RTNL();
+
+	devlink_port = dev->devlink_port;
+	if (!devlink_port)
+		return -EOPNOTSUPP;
+
+	return __devlink_port_phys_port_name_get(devlink_port, name, len);
+}
+
+int devlink_compat_switch_id_get(struct net_device *dev,
+				 struct netdev_phys_item_id *ppid)
+{
+	struct devlink_port *devlink_port;
+
+	/* Caller must hold RTNL mutex or reference to dev, which ensures that
+	 * devlink_port instance cannot disappear in the middle. No need to take
+	 * any devlink lock as only permanent values are accessed.
+	 */
+	devlink_port = dev->devlink_port;
+	if (!devlink_port || !devlink_port->switch_port)
+		return -EOPNOTSUPP;
+
+	memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid));
+
+	return 0;
+}

From 2b4d8bb0888930b4a26cd46ec067753228877488 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:45 +0200
Subject: [PATCH 1999/2047] devlink: push shared buffer related code into
 separate file

Cut out another chunk from leftover.c and put sb related code
into a separate file.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-4-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/Makefile        |   2 +-
 net/devlink/devl_internal.h |   9 +
 net/devlink/leftover.c      | 990 -----------------------------------
 net/devlink/sb.c            | 996 ++++++++++++++++++++++++++++++++++++
 4 files changed, 1006 insertions(+), 991 deletions(-)
 create mode 100644 net/devlink/sb.c

diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index 456bfb336540..dcc508af48a9 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o port.o health.o
+obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o port.o sb.o health.o \
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 7d01e2060702..e1a6b7a763b8 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -220,6 +220,15 @@ int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
 				     struct genl_info *info);
 int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info);
 int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
+					 struct genl_info *info);
+int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
+					    struct genl_info *info);
+int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
+					struct genl_info *info);
+int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
+					 struct genl_info *info);
 int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
 					    struct genl_info *info);
 int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index d14b40fb8fdf..795bfdd41103 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -231,164 +231,6 @@ devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
 	return devlink_linecard_get_from_attrs(devlink, info->attrs);
 }
 
-struct devlink_sb {
-	struct list_head list;
-	unsigned int index;
-	u32 size;
-	u16 ingress_pools_count;
-	u16 egress_pools_count;
-	u16 ingress_tc_count;
-	u16 egress_tc_count;
-};
-
-static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb)
-{
-	return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count;
-}
-
-static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink,
-						  unsigned int sb_index)
-{
-	struct devlink_sb *devlink_sb;
-
-	list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
-		if (devlink_sb->index == sb_index)
-			return devlink_sb;
-	}
-	return NULL;
-}
-
-static bool devlink_sb_index_exists(struct devlink *devlink,
-				    unsigned int sb_index)
-{
-	return devlink_sb_get_by_index(devlink, sb_index);
-}
-
-static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink,
-						    struct nlattr **attrs)
-{
-	if (attrs[DEVLINK_ATTR_SB_INDEX]) {
-		u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]);
-		struct devlink_sb *devlink_sb;
-
-		devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
-		if (!devlink_sb)
-			return ERR_PTR(-ENODEV);
-		return devlink_sb;
-	}
-	return ERR_PTR(-EINVAL);
-}
-
-static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink,
-						   struct genl_info *info)
-{
-	return devlink_sb_get_from_attrs(devlink, info->attrs);
-}
-
-static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb,
-						struct nlattr **attrs,
-						u16 *p_pool_index)
-{
-	u16 val;
-
-	if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX])
-		return -EINVAL;
-
-	val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]);
-	if (val >= devlink_sb_pool_count(devlink_sb))
-		return -EINVAL;
-	*p_pool_index = val;
-	return 0;
-}
-
-static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb,
-					       struct genl_info *info,
-					       u16 *p_pool_index)
-{
-	return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs,
-						    p_pool_index);
-}
-
-static int
-devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs,
-				    enum devlink_sb_pool_type *p_pool_type)
-{
-	u8 val;
-
-	if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE])
-		return -EINVAL;
-
-	val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]);
-	if (val != DEVLINK_SB_POOL_TYPE_INGRESS &&
-	    val != DEVLINK_SB_POOL_TYPE_EGRESS)
-		return -EINVAL;
-	*p_pool_type = val;
-	return 0;
-}
-
-static int
-devlink_sb_pool_type_get_from_info(struct genl_info *info,
-				   enum devlink_sb_pool_type *p_pool_type)
-{
-	return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type);
-}
-
-static int
-devlink_sb_th_type_get_from_attrs(struct nlattr **attrs,
-				  enum devlink_sb_threshold_type *p_th_type)
-{
-	u8 val;
-
-	if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE])
-		return -EINVAL;
-
-	val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]);
-	if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC &&
-	    val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC)
-		return -EINVAL;
-	*p_th_type = val;
-	return 0;
-}
-
-static int
-devlink_sb_th_type_get_from_info(struct genl_info *info,
-				 enum devlink_sb_threshold_type *p_th_type)
-{
-	return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type);
-}
-
-static int
-devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
-				   struct nlattr **attrs,
-				   enum devlink_sb_pool_type pool_type,
-				   u16 *p_tc_index)
-{
-	u16 val;
-
-	if (!attrs[DEVLINK_ATTR_SB_TC_INDEX])
-		return -EINVAL;
-
-	val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]);
-	if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS &&
-	    val >= devlink_sb->ingress_tc_count)
-		return -EINVAL;
-	if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS &&
-	    val >= devlink_sb->egress_tc_count)
-		return -EINVAL;
-	*p_tc_index = val;
-	return 0;
-}
-
-static int
-devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
-				  struct genl_info *info,
-				  enum devlink_sb_pool_type pool_type,
-				  u16 *p_tc_index)
-{
-	return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs,
-						  pool_type, p_tc_index);
-}
-
 struct devlink_region {
 	struct devlink *devlink;
 	struct devlink_port *port;
@@ -1286,775 +1128,6 @@ static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
 	return 0;
 }
 
-static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
-			      struct devlink_sb *devlink_sb,
-			      enum devlink_command cmd, u32 portid,
-			      u32 seq, int flags)
-{
-	void *hdr;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size))
-		goto nla_put_failure;
-	if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT,
-			devlink_sb->ingress_pools_count))
-		goto nla_put_failure;
-	if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT,
-			devlink_sb->egress_pools_count))
-		goto nla_put_failure;
-	if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT,
-			devlink_sb->ingress_tc_count))
-		goto nla_put_failure;
-	if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT,
-			devlink_sb->egress_tc_count))
-		goto nla_put_failure;
-
-	genlmsg_end(msg, hdr);
-	return 0;
-
-nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
-}
-
-int devlink_nl_sb_get_doit(struct sk_buff *skb, struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_sb *devlink_sb;
-	struct sk_buff *msg;
-	int err;
-
-	devlink_sb = devlink_sb_get_from_info(devlink, info);
-	if (IS_ERR(devlink_sb))
-		return PTR_ERR(devlink_sb);
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
-				 DEVLINK_CMD_SB_NEW,
-				 info->snd_portid, info->snd_seq, 0);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
-
-	return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
-			   struct netlink_callback *cb, int flags)
-{
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct devlink_sb *devlink_sb;
-	int idx = 0;
-	int err = 0;
-
-	list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
-		if (idx < state->idx) {
-			idx++;
-			continue;
-		}
-		err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
-					 DEVLINK_CMD_SB_NEW,
-					 NETLINK_CB(cb->skb).portid,
-					 cb->nlh->nlmsg_seq, flags);
-		if (err) {
-			state->idx = idx;
-			break;
-		}
-		idx++;
-	}
-
-	return err;
-}
-
-int devlink_nl_sb_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	return devlink_nl_dumpit(skb, cb, devlink_nl_sb_get_dump_one);
-}
-
-static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
-				   struct devlink_sb *devlink_sb,
-				   u16 pool_index, enum devlink_command cmd,
-				   u32 portid, u32 seq, int flags)
-{
-	struct devlink_sb_pool_info pool_info;
-	void *hdr;
-	int err;
-
-	err = devlink->ops->sb_pool_get(devlink, devlink_sb->index,
-					pool_index, &pool_info);
-	if (err)
-		return err;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
-		goto nla_put_failure;
-	if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
-		goto nla_put_failure;
-	if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size))
-		goto nla_put_failure;
-	if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,
-		       pool_info.threshold_type))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_CELL_SIZE,
-			pool_info.cell_size))
-		goto nla_put_failure;
-
-	genlmsg_end(msg, hdr);
-	return 0;
-
-nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
-}
-
-int devlink_nl_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_sb *devlink_sb;
-	struct sk_buff *msg;
-	u16 pool_index;
-	int err;
-
-	devlink_sb = devlink_sb_get_from_info(devlink, info);
-	if (IS_ERR(devlink_sb))
-		return PTR_ERR(devlink_sb);
-
-	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
-						  &pool_index);
-	if (err)
-		return err;
-
-	if (!devlink->ops->sb_pool_get)
-		return -EOPNOTSUPP;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index,
-				      DEVLINK_CMD_SB_POOL_NEW,
-				      info->snd_portid, info->snd_seq, 0);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
-
-	return genlmsg_reply(msg, info);
-}
-
-static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
-				struct devlink *devlink,
-				struct devlink_sb *devlink_sb,
-				u32 portid, u32 seq, int flags)
-{
-	u16 pool_count = devlink_sb_pool_count(devlink_sb);
-	u16 pool_index;
-	int err;
-
-	for (pool_index = 0; pool_index < pool_count; pool_index++) {
-		if (*p_idx < start) {
-			(*p_idx)++;
-			continue;
-		}
-		err = devlink_nl_sb_pool_fill(msg, devlink,
-					      devlink_sb,
-					      pool_index,
-					      DEVLINK_CMD_SB_POOL_NEW,
-					      portid, seq, flags);
-		if (err)
-			return err;
-		(*p_idx)++;
-	}
-	return 0;
-}
-
-static int
-devlink_nl_sb_pool_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
-				struct netlink_callback *cb, int flags)
-{
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct devlink_sb *devlink_sb;
-	int err = 0;
-	int idx = 0;
-
-	if (!devlink->ops->sb_pool_get)
-		return 0;
-
-	list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
-		err = __sb_pool_get_dumpit(msg, state->idx, &idx,
-					   devlink, devlink_sb,
-					   NETLINK_CB(cb->skb).portid,
-					   cb->nlh->nlmsg_seq, flags);
-		if (err == -EOPNOTSUPP) {
-			err = 0;
-		} else if (err) {
-			state->idx = idx;
-			break;
-		}
-	}
-
-	return err;
-}
-
-int devlink_nl_sb_pool_get_dumpit(struct sk_buff *skb,
-				  struct netlink_callback *cb)
-{
-	return devlink_nl_dumpit(skb, cb, devlink_nl_sb_pool_get_dump_one);
-}
-
-static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
-			       u16 pool_index, u32 size,
-			       enum devlink_sb_threshold_type threshold_type,
-			       struct netlink_ext_ack *extack)
-
-{
-	const struct devlink_ops *ops = devlink->ops;
-
-	if (ops->sb_pool_set)
-		return ops->sb_pool_set(devlink, sb_index, pool_index,
-					size, threshold_type, extack);
-	return -EOPNOTSUPP;
-}
-
-static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb,
-					   struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	enum devlink_sb_threshold_type threshold_type;
-	struct devlink_sb *devlink_sb;
-	u16 pool_index;
-	u32 size;
-	int err;
-
-	devlink_sb = devlink_sb_get_from_info(devlink, info);
-	if (IS_ERR(devlink_sb))
-		return PTR_ERR(devlink_sb);
-
-	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
-						  &pool_index);
-	if (err)
-		return err;
-
-	err = devlink_sb_th_type_get_from_info(info, &threshold_type);
-	if (err)
-		return err;
-
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_POOL_SIZE))
-		return -EINVAL;
-
-	size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]);
-	return devlink_sb_pool_set(devlink, devlink_sb->index,
-				   pool_index, size, threshold_type,
-				   info->extack);
-}
-
-static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
-					struct devlink *devlink,
-					struct devlink_port *devlink_port,
-					struct devlink_sb *devlink_sb,
-					u16 pool_index,
-					enum devlink_command cmd,
-					u32 portid, u32 seq, int flags)
-{
-	const struct devlink_ops *ops = devlink->ops;
-	u32 threshold;
-	void *hdr;
-	int err;
-
-	err = ops->sb_port_pool_get(devlink_port, devlink_sb->index,
-				    pool_index, &threshold);
-	if (err)
-		return err;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
-		goto nla_put_failure;
-	if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
-		goto nla_put_failure;
-
-	if (ops->sb_occ_port_pool_get) {
-		u32 cur;
-		u32 max;
-
-		err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index,
-						pool_index, &cur, &max);
-		if (err && err != -EOPNOTSUPP)
-			goto sb_occ_get_failure;
-		if (!err) {
-			if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
-				goto nla_put_failure;
-			if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
-				goto nla_put_failure;
-		}
-	}
-
-	genlmsg_end(msg, hdr);
-	return 0;
-
-nla_put_failure:
-	err = -EMSGSIZE;
-sb_occ_get_failure:
-	genlmsg_cancel(msg, hdr);
-	return err;
-}
-
-int devlink_nl_sb_port_pool_get_doit(struct sk_buff *skb,
-				     struct genl_info *info)
-{
-	struct devlink_port *devlink_port = info->user_ptr[1];
-	struct devlink *devlink = devlink_port->devlink;
-	struct devlink_sb *devlink_sb;
-	struct sk_buff *msg;
-	u16 pool_index;
-	int err;
-
-	devlink_sb = devlink_sb_get_from_info(devlink, info);
-	if (IS_ERR(devlink_sb))
-		return PTR_ERR(devlink_sb);
-
-	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
-						  &pool_index);
-	if (err)
-		return err;
-
-	if (!devlink->ops->sb_port_pool_get)
-		return -EOPNOTSUPP;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port,
-					   devlink_sb, pool_index,
-					   DEVLINK_CMD_SB_PORT_POOL_NEW,
-					   info->snd_portid, info->snd_seq, 0);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
-
-	return genlmsg_reply(msg, info);
-}
-
-static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
-				     struct devlink *devlink,
-				     struct devlink_sb *devlink_sb,
-				     u32 portid, u32 seq, int flags)
-{
-	struct devlink_port *devlink_port;
-	u16 pool_count = devlink_sb_pool_count(devlink_sb);
-	unsigned long port_index;
-	u16 pool_index;
-	int err;
-
-	xa_for_each(&devlink->ports, port_index, devlink_port) {
-		for (pool_index = 0; pool_index < pool_count; pool_index++) {
-			if (*p_idx < start) {
-				(*p_idx)++;
-				continue;
-			}
-			err = devlink_nl_sb_port_pool_fill(msg, devlink,
-							   devlink_port,
-							   devlink_sb,
-							   pool_index,
-							   DEVLINK_CMD_SB_PORT_POOL_NEW,
-							   portid, seq, flags);
-			if (err)
-				return err;
-			(*p_idx)++;
-		}
-	}
-	return 0;
-}
-
-static int
-devlink_nl_sb_port_pool_get_dump_one(struct sk_buff *msg,
-				     struct devlink *devlink,
-				     struct netlink_callback *cb, int flags)
-{
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct devlink_sb *devlink_sb;
-	int idx = 0;
-	int err = 0;
-
-	if (!devlink->ops->sb_port_pool_get)
-		return 0;
-
-	list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
-		err = __sb_port_pool_get_dumpit(msg, state->idx, &idx,
-						devlink, devlink_sb,
-						NETLINK_CB(cb->skb).portid,
-						cb->nlh->nlmsg_seq, flags);
-		if (err == -EOPNOTSUPP) {
-			err = 0;
-		} else if (err) {
-			state->idx = idx;
-			break;
-		}
-	}
-
-	return err;
-}
-
-int devlink_nl_sb_port_pool_get_dumpit(struct sk_buff *skb,
-				       struct netlink_callback *cb)
-{
-	return devlink_nl_dumpit(skb, cb, devlink_nl_sb_port_pool_get_dump_one);
-}
-
-static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
-				    unsigned int sb_index, u16 pool_index,
-				    u32 threshold,
-				    struct netlink_ext_ack *extack)
-
-{
-	const struct devlink_ops *ops = devlink_port->devlink->ops;
-
-	if (ops->sb_port_pool_set)
-		return ops->sb_port_pool_set(devlink_port, sb_index,
-					     pool_index, threshold, extack);
-	return -EOPNOTSUPP;
-}
-
-static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
-						struct genl_info *info)
-{
-	struct devlink_port *devlink_port = info->user_ptr[1];
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_sb *devlink_sb;
-	u16 pool_index;
-	u32 threshold;
-	int err;
-
-	devlink_sb = devlink_sb_get_from_info(devlink, info);
-	if (IS_ERR(devlink_sb))
-		return PTR_ERR(devlink_sb);
-
-	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
-						  &pool_index);
-	if (err)
-		return err;
-
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
-		return -EINVAL;
-
-	threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
-	return devlink_sb_port_pool_set(devlink_port, devlink_sb->index,
-					pool_index, threshold, info->extack);
-}
-
-static int
-devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink,
-				struct devlink_port *devlink_port,
-				struct devlink_sb *devlink_sb, u16 tc_index,
-				enum devlink_sb_pool_type pool_type,
-				enum devlink_command cmd,
-				u32 portid, u32 seq, int flags)
-{
-	const struct devlink_ops *ops = devlink->ops;
-	u16 pool_index;
-	u32 threshold;
-	void *hdr;
-	int err;
-
-	err = ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index,
-				       tc_index, pool_type,
-				       &pool_index, &threshold);
-	if (err)
-		return err;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
-		goto nla_put_failure;
-	if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index))
-		goto nla_put_failure;
-	if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type))
-		goto nla_put_failure;
-	if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
-		goto nla_put_failure;
-
-	if (ops->sb_occ_tc_port_bind_get) {
-		u32 cur;
-		u32 max;
-
-		err = ops->sb_occ_tc_port_bind_get(devlink_port,
-						   devlink_sb->index,
-						   tc_index, pool_type,
-						   &cur, &max);
-		if (err && err != -EOPNOTSUPP)
-			return err;
-		if (!err) {
-			if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
-				goto nla_put_failure;
-			if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
-				goto nla_put_failure;
-		}
-	}
-
-	genlmsg_end(msg, hdr);
-	return 0;
-
-nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
-}
-
-int devlink_nl_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
-					struct genl_info *info)
-{
-	struct devlink_port *devlink_port = info->user_ptr[1];
-	struct devlink *devlink = devlink_port->devlink;
-	struct devlink_sb *devlink_sb;
-	struct sk_buff *msg;
-	enum devlink_sb_pool_type pool_type;
-	u16 tc_index;
-	int err;
-
-	devlink_sb = devlink_sb_get_from_info(devlink, info);
-	if (IS_ERR(devlink_sb))
-		return PTR_ERR(devlink_sb);
-
-	err = devlink_sb_pool_type_get_from_info(info, &pool_type);
-	if (err)
-		return err;
-
-	err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
-						pool_type, &tc_index);
-	if (err)
-		return err;
-
-	if (!devlink->ops->sb_tc_pool_bind_get)
-		return -EOPNOTSUPP;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port,
-					      devlink_sb, tc_index, pool_type,
-					      DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
-					      info->snd_portid,
-					      info->snd_seq, 0);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
-
-	return genlmsg_reply(msg, info);
-}
-
-static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
-					int start, int *p_idx,
-					struct devlink *devlink,
-					struct devlink_sb *devlink_sb,
-					u32 portid, u32 seq, int flags)
-{
-	struct devlink_port *devlink_port;
-	unsigned long port_index;
-	u16 tc_index;
-	int err;
-
-	xa_for_each(&devlink->ports, port_index, devlink_port) {
-		for (tc_index = 0;
-		     tc_index < devlink_sb->ingress_tc_count; tc_index++) {
-			if (*p_idx < start) {
-				(*p_idx)++;
-				continue;
-			}
-			err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
-							      devlink_port,
-							      devlink_sb,
-							      tc_index,
-							      DEVLINK_SB_POOL_TYPE_INGRESS,
-							      DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
-							      portid, seq,
-							      flags);
-			if (err)
-				return err;
-			(*p_idx)++;
-		}
-		for (tc_index = 0;
-		     tc_index < devlink_sb->egress_tc_count; tc_index++) {
-			if (*p_idx < start) {
-				(*p_idx)++;
-				continue;
-			}
-			err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
-							      devlink_port,
-							      devlink_sb,
-							      tc_index,
-							      DEVLINK_SB_POOL_TYPE_EGRESS,
-							      DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
-							      portid, seq,
-							      flags);
-			if (err)
-				return err;
-			(*p_idx)++;
-		}
-	}
-	return 0;
-}
-
-static int devlink_nl_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg,
-						   struct devlink *devlink,
-						   struct netlink_callback *cb,
-						   int flags)
-{
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct devlink_sb *devlink_sb;
-	int idx = 0;
-	int err = 0;
-
-	if (!devlink->ops->sb_tc_pool_bind_get)
-		return 0;
-
-	list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
-		err = __sb_tc_pool_bind_get_dumpit(msg, state->idx, &idx,
-						   devlink, devlink_sb,
-						   NETLINK_CB(cb->skb).portid,
-						   cb->nlh->nlmsg_seq, flags);
-		if (err == -EOPNOTSUPP) {
-			err = 0;
-		} else if (err) {
-			state->idx = idx;
-			break;
-		}
-	}
-
-	return err;
-}
-
-int devlink_nl_sb_tc_pool_bind_get_dumpit(struct sk_buff *skb,
-					  struct netlink_callback *cb)
-{
-	return devlink_nl_dumpit(skb, cb,
-				 devlink_nl_sb_tc_pool_bind_get_dump_one);
-}
-
-static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
-				       unsigned int sb_index, u16 tc_index,
-				       enum devlink_sb_pool_type pool_type,
-				       u16 pool_index, u32 threshold,
-				       struct netlink_ext_ack *extack)
-
-{
-	const struct devlink_ops *ops = devlink_port->devlink->ops;
-
-	if (ops->sb_tc_pool_bind_set)
-		return ops->sb_tc_pool_bind_set(devlink_port, sb_index,
-						tc_index, pool_type,
-						pool_index, threshold, extack);
-	return -EOPNOTSUPP;
-}
-
-static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
-						   struct genl_info *info)
-{
-	struct devlink_port *devlink_port = info->user_ptr[1];
-	struct devlink *devlink = info->user_ptr[0];
-	enum devlink_sb_pool_type pool_type;
-	struct devlink_sb *devlink_sb;
-	u16 tc_index;
-	u16 pool_index;
-	u32 threshold;
-	int err;
-
-	devlink_sb = devlink_sb_get_from_info(devlink, info);
-	if (IS_ERR(devlink_sb))
-		return PTR_ERR(devlink_sb);
-
-	err = devlink_sb_pool_type_get_from_info(info, &pool_type);
-	if (err)
-		return err;
-
-	err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
-						pool_type, &tc_index);
-	if (err)
-		return err;
-
-	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
-						  &pool_index);
-	if (err)
-		return err;
-
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
-		return -EINVAL;
-
-	threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
-	return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index,
-					   tc_index, pool_type,
-					   pool_index, threshold, info->extack);
-}
-
-static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
-					       struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	const struct devlink_ops *ops = devlink->ops;
-	struct devlink_sb *devlink_sb;
-
-	devlink_sb = devlink_sb_get_from_info(devlink, info);
-	if (IS_ERR(devlink_sb))
-		return PTR_ERR(devlink_sb);
-
-	if (ops->sb_occ_snapshot)
-		return ops->sb_occ_snapshot(devlink, devlink_sb->index);
-	return -EOPNOTSUPP;
-}
-
-static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
-						struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	const struct devlink_ops *ops = devlink->ops;
-	struct devlink_sb *devlink_sb;
-
-	devlink_sb = devlink_sb_get_from_info(devlink, info);
-	if (IS_ERR(devlink_sb))
-		return PTR_ERR(devlink_sb);
-
-	if (ops->sb_occ_max_clear)
-		return ops->sb_occ_max_clear(devlink, devlink_sb->index);
-	return -EOPNOTSUPP;
-}
-
 int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
 			     struct netlink_ext_ack *extack)
 {
@@ -6215,69 +5288,6 @@ void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
 }
 EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
 
-int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
-		     u32 size, u16 ingress_pools_count,
-		     u16 egress_pools_count, u16 ingress_tc_count,
-		     u16 egress_tc_count)
-{
-	struct devlink_sb *devlink_sb;
-
-	lockdep_assert_held(&devlink->lock);
-
-	if (devlink_sb_index_exists(devlink, sb_index))
-		return -EEXIST;
-
-	devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL);
-	if (!devlink_sb)
-		return -ENOMEM;
-	devlink_sb->index = sb_index;
-	devlink_sb->size = size;
-	devlink_sb->ingress_pools_count = ingress_pools_count;
-	devlink_sb->egress_pools_count = egress_pools_count;
-	devlink_sb->ingress_tc_count = ingress_tc_count;
-	devlink_sb->egress_tc_count = egress_tc_count;
-	list_add_tail(&devlink_sb->list, &devlink->sb_list);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devl_sb_register);
-
-int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
-			u32 size, u16 ingress_pools_count,
-			u16 egress_pools_count, u16 ingress_tc_count,
-			u16 egress_tc_count)
-{
-	int err;
-
-	devl_lock(devlink);
-	err = devl_sb_register(devlink, sb_index, size, ingress_pools_count,
-			       egress_pools_count, ingress_tc_count,
-			       egress_tc_count);
-	devl_unlock(devlink);
-	return err;
-}
-EXPORT_SYMBOL_GPL(devlink_sb_register);
-
-void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index)
-{
-	struct devlink_sb *devlink_sb;
-
-	lockdep_assert_held(&devlink->lock);
-
-	devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
-	WARN_ON(!devlink_sb);
-	list_del(&devlink_sb->list);
-	kfree(devlink_sb);
-}
-EXPORT_SYMBOL_GPL(devl_sb_unregister);
-
-void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
-{
-	devl_lock(devlink);
-	devl_sb_unregister(devlink, sb_index);
-	devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_sb_unregister);
-
 /**
  * devl_dpipe_headers_register - register dpipe headers
  *
diff --git a/net/devlink/sb.c b/net/devlink/sb.c
new file mode 100644
index 000000000000..bd677fff5ec8
--- /dev/null
+++ b/net/devlink/sb.c
@@ -0,0 +1,996 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+struct devlink_sb {
+	struct list_head list;
+	unsigned int index;
+	u32 size;
+	u16 ingress_pools_count;
+	u16 egress_pools_count;
+	u16 ingress_tc_count;
+	u16 egress_tc_count;
+};
+
+static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb)
+{
+	return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count;
+}
+
+static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink,
+						  unsigned int sb_index)
+{
+	struct devlink_sb *devlink_sb;
+
+	list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+		if (devlink_sb->index == sb_index)
+			return devlink_sb;
+	}
+	return NULL;
+}
+
+static bool devlink_sb_index_exists(struct devlink *devlink,
+				    unsigned int sb_index)
+{
+	return devlink_sb_get_by_index(devlink, sb_index);
+}
+
+static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink,
+						    struct nlattr **attrs)
+{
+	if (attrs[DEVLINK_ATTR_SB_INDEX]) {
+		u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]);
+		struct devlink_sb *devlink_sb;
+
+		devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
+		if (!devlink_sb)
+			return ERR_PTR(-ENODEV);
+		return devlink_sb;
+	}
+	return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink,
+						   struct genl_info *info)
+{
+	return devlink_sb_get_from_attrs(devlink, info->attrs);
+}
+
+static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb,
+						struct nlattr **attrs,
+						u16 *p_pool_index)
+{
+	u16 val;
+
+	if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX])
+		return -EINVAL;
+
+	val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]);
+	if (val >= devlink_sb_pool_count(devlink_sb))
+		return -EINVAL;
+	*p_pool_index = val;
+	return 0;
+}
+
+static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb,
+					       struct genl_info *info,
+					       u16 *p_pool_index)
+{
+	return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs,
+						    p_pool_index);
+}
+
+static int
+devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs,
+				    enum devlink_sb_pool_type *p_pool_type)
+{
+	u8 val;
+
+	if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE])
+		return -EINVAL;
+
+	val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]);
+	if (val != DEVLINK_SB_POOL_TYPE_INGRESS &&
+	    val != DEVLINK_SB_POOL_TYPE_EGRESS)
+		return -EINVAL;
+	*p_pool_type = val;
+	return 0;
+}
+
+static int
+devlink_sb_pool_type_get_from_info(struct genl_info *info,
+				   enum devlink_sb_pool_type *p_pool_type)
+{
+	return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type);
+}
+
+static int
+devlink_sb_th_type_get_from_attrs(struct nlattr **attrs,
+				  enum devlink_sb_threshold_type *p_th_type)
+{
+	u8 val;
+
+	if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE])
+		return -EINVAL;
+
+	val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]);
+	if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC &&
+	    val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC)
+		return -EINVAL;
+	*p_th_type = val;
+	return 0;
+}
+
+static int
+devlink_sb_th_type_get_from_info(struct genl_info *info,
+				 enum devlink_sb_threshold_type *p_th_type)
+{
+	return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type);
+}
+
+static int
+devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
+				   struct nlattr **attrs,
+				   enum devlink_sb_pool_type pool_type,
+				   u16 *p_tc_index)
+{
+	u16 val;
+
+	if (!attrs[DEVLINK_ATTR_SB_TC_INDEX])
+		return -EINVAL;
+
+	val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]);
+	if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS &&
+	    val >= devlink_sb->ingress_tc_count)
+		return -EINVAL;
+	if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS &&
+	    val >= devlink_sb->egress_tc_count)
+		return -EINVAL;
+	*p_tc_index = val;
+	return 0;
+}
+
+static int
+devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
+				  struct genl_info *info,
+				  enum devlink_sb_pool_type pool_type,
+				  u16 *p_tc_index)
+{
+	return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs,
+						  pool_type, p_tc_index);
+}
+
+static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
+			      struct devlink_sb *devlink_sb,
+			      enum devlink_command cmd, u32 portid,
+			      u32 seq, int flags)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT,
+			devlink_sb->ingress_pools_count))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT,
+			devlink_sb->egress_pools_count))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT,
+			devlink_sb->ingress_tc_count))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT,
+			devlink_sb->egress_tc_count))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+int devlink_nl_sb_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_sb *devlink_sb;
+	struct sk_buff *msg;
+	int err;
+
+	devlink_sb = devlink_sb_get_from_info(devlink, info);
+	if (IS_ERR(devlink_sb))
+		return PTR_ERR(devlink_sb);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+				 DEVLINK_CMD_SB_NEW,
+				 info->snd_portid, info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+			   struct netlink_callback *cb, int flags)
+{
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct devlink_sb *devlink_sb;
+	int idx = 0;
+	int err = 0;
+
+	list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+		if (idx < state->idx) {
+			idx++;
+			continue;
+		}
+		err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+					 DEVLINK_CMD_SB_NEW,
+					 NETLINK_CB(cb->skb).portid,
+					 cb->nlh->nlmsg_seq, flags);
+		if (err) {
+			state->idx = idx;
+			break;
+		}
+		idx++;
+	}
+
+	return err;
+}
+
+int devlink_nl_sb_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return devlink_nl_dumpit(skb, cb, devlink_nl_sb_get_dump_one);
+}
+
+static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
+				   struct devlink_sb *devlink_sb,
+				   u16 pool_index, enum devlink_command cmd,
+				   u32 portid, u32 seq, int flags)
+{
+	struct devlink_sb_pool_info pool_info;
+	void *hdr;
+	int err;
+
+	err = devlink->ops->sb_pool_get(devlink, devlink_sb->index,
+					pool_index, &pool_info);
+	if (err)
+		return err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+		goto nla_put_failure;
+	if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size))
+		goto nla_put_failure;
+	if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,
+		       pool_info.threshold_type))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_CELL_SIZE,
+			pool_info.cell_size))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+int devlink_nl_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_sb *devlink_sb;
+	struct sk_buff *msg;
+	u16 pool_index;
+	int err;
+
+	devlink_sb = devlink_sb_get_from_info(devlink, info);
+	if (IS_ERR(devlink_sb))
+		return PTR_ERR(devlink_sb);
+
+	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+						  &pool_index);
+	if (err)
+		return err;
+
+	if (!devlink->ops->sb_pool_get)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index,
+				      DEVLINK_CMD_SB_POOL_NEW,
+				      info->snd_portid, info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
+				struct devlink *devlink,
+				struct devlink_sb *devlink_sb,
+				u32 portid, u32 seq, int flags)
+{
+	u16 pool_count = devlink_sb_pool_count(devlink_sb);
+	u16 pool_index;
+	int err;
+
+	for (pool_index = 0; pool_index < pool_count; pool_index++) {
+		if (*p_idx < start) {
+			(*p_idx)++;
+			continue;
+		}
+		err = devlink_nl_sb_pool_fill(msg, devlink,
+					      devlink_sb,
+					      pool_index,
+					      DEVLINK_CMD_SB_POOL_NEW,
+					      portid, seq, flags);
+		if (err)
+			return err;
+		(*p_idx)++;
+	}
+	return 0;
+}
+
+static int
+devlink_nl_sb_pool_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+				struct netlink_callback *cb, int flags)
+{
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct devlink_sb *devlink_sb;
+	int err = 0;
+	int idx = 0;
+
+	if (!devlink->ops->sb_pool_get)
+		return 0;
+
+	list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+		err = __sb_pool_get_dumpit(msg, state->idx, &idx,
+					   devlink, devlink_sb,
+					   NETLINK_CB(cb->skb).portid,
+					   cb->nlh->nlmsg_seq, flags);
+		if (err == -EOPNOTSUPP) {
+			err = 0;
+		} else if (err) {
+			state->idx = idx;
+			break;
+		}
+	}
+
+	return err;
+}
+
+int devlink_nl_sb_pool_get_dumpit(struct sk_buff *skb,
+				  struct netlink_callback *cb)
+{
+	return devlink_nl_dumpit(skb, cb, devlink_nl_sb_pool_get_dump_one);
+}
+
+static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
+			       u16 pool_index, u32 size,
+			       enum devlink_sb_threshold_type threshold_type,
+			       struct netlink_ext_ack *extack)
+
+{
+	const struct devlink_ops *ops = devlink->ops;
+
+	if (ops->sb_pool_set)
+		return ops->sb_pool_set(devlink, sb_index, pool_index,
+					size, threshold_type, extack);
+	return -EOPNOTSUPP;
+}
+
+int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	enum devlink_sb_threshold_type threshold_type;
+	struct devlink_sb *devlink_sb;
+	u16 pool_index;
+	u32 size;
+	int err;
+
+	devlink_sb = devlink_sb_get_from_info(devlink, info);
+	if (IS_ERR(devlink_sb))
+		return PTR_ERR(devlink_sb);
+
+	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+						  &pool_index);
+	if (err)
+		return err;
+
+	err = devlink_sb_th_type_get_from_info(info, &threshold_type);
+	if (err)
+		return err;
+
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_POOL_SIZE))
+		return -EINVAL;
+
+	size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]);
+	return devlink_sb_pool_set(devlink, devlink_sb->index,
+				   pool_index, size, threshold_type,
+				   info->extack);
+}
+
+static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
+					struct devlink *devlink,
+					struct devlink_port *devlink_port,
+					struct devlink_sb *devlink_sb,
+					u16 pool_index,
+					enum devlink_command cmd,
+					u32 portid, u32 seq, int flags)
+{
+	const struct devlink_ops *ops = devlink->ops;
+	u32 threshold;
+	void *hdr;
+	int err;
+
+	err = ops->sb_port_pool_get(devlink_port, devlink_sb->index,
+				    pool_index, &threshold);
+	if (err)
+		return err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
+		goto nla_put_failure;
+
+	if (ops->sb_occ_port_pool_get) {
+		u32 cur;
+		u32 max;
+
+		err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index,
+						pool_index, &cur, &max);
+		if (err && err != -EOPNOTSUPP)
+			goto sb_occ_get_failure;
+		if (!err) {
+			if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
+				goto nla_put_failure;
+			if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
+				goto nla_put_failure;
+		}
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	err = -EMSGSIZE;
+sb_occ_get_failure:
+	genlmsg_cancel(msg, hdr);
+	return err;
+}
+
+int devlink_nl_sb_port_pool_get_doit(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[1];
+	struct devlink *devlink = devlink_port->devlink;
+	struct devlink_sb *devlink_sb;
+	struct sk_buff *msg;
+	u16 pool_index;
+	int err;
+
+	devlink_sb = devlink_sb_get_from_info(devlink, info);
+	if (IS_ERR(devlink_sb))
+		return PTR_ERR(devlink_sb);
+
+	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+						  &pool_index);
+	if (err)
+		return err;
+
+	if (!devlink->ops->sb_port_pool_get)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port,
+					   devlink_sb, pool_index,
+					   DEVLINK_CMD_SB_PORT_POOL_NEW,
+					   info->snd_portid, info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
+				     struct devlink *devlink,
+				     struct devlink_sb *devlink_sb,
+				     u32 portid, u32 seq, int flags)
+{
+	struct devlink_port *devlink_port;
+	u16 pool_count = devlink_sb_pool_count(devlink_sb);
+	unsigned long port_index;
+	u16 pool_index;
+	int err;
+
+	xa_for_each(&devlink->ports, port_index, devlink_port) {
+		for (pool_index = 0; pool_index < pool_count; pool_index++) {
+			if (*p_idx < start) {
+				(*p_idx)++;
+				continue;
+			}
+			err = devlink_nl_sb_port_pool_fill(msg, devlink,
+							   devlink_port,
+							   devlink_sb,
+							   pool_index,
+							   DEVLINK_CMD_SB_PORT_POOL_NEW,
+							   portid, seq, flags);
+			if (err)
+				return err;
+			(*p_idx)++;
+		}
+	}
+	return 0;
+}
+
+static int
+devlink_nl_sb_port_pool_get_dump_one(struct sk_buff *msg,
+				     struct devlink *devlink,
+				     struct netlink_callback *cb, int flags)
+{
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct devlink_sb *devlink_sb;
+	int idx = 0;
+	int err = 0;
+
+	if (!devlink->ops->sb_port_pool_get)
+		return 0;
+
+	list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+		err = __sb_port_pool_get_dumpit(msg, state->idx, &idx,
+						devlink, devlink_sb,
+						NETLINK_CB(cb->skb).portid,
+						cb->nlh->nlmsg_seq, flags);
+		if (err == -EOPNOTSUPP) {
+			err = 0;
+		} else if (err) {
+			state->idx = idx;
+			break;
+		}
+	}
+
+	return err;
+}
+
+int devlink_nl_sb_port_pool_get_dumpit(struct sk_buff *skb,
+				       struct netlink_callback *cb)
+{
+	return devlink_nl_dumpit(skb, cb, devlink_nl_sb_port_pool_get_dump_one);
+}
+
+static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
+				    unsigned int sb_index, u16 pool_index,
+				    u32 threshold,
+				    struct netlink_ext_ack *extack)
+
+{
+	const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+	if (ops->sb_port_pool_set)
+		return ops->sb_port_pool_set(devlink_port, sb_index,
+					     pool_index, threshold, extack);
+	return -EOPNOTSUPP;
+}
+
+int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
+					 struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[1];
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_sb *devlink_sb;
+	u16 pool_index;
+	u32 threshold;
+	int err;
+
+	devlink_sb = devlink_sb_get_from_info(devlink, info);
+	if (IS_ERR(devlink_sb))
+		return PTR_ERR(devlink_sb);
+
+	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+						  &pool_index);
+	if (err)
+		return err;
+
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
+		return -EINVAL;
+
+	threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
+	return devlink_sb_port_pool_set(devlink_port, devlink_sb->index,
+					pool_index, threshold, info->extack);
+}
+
+static int
+devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink,
+				struct devlink_port *devlink_port,
+				struct devlink_sb *devlink_sb, u16 tc_index,
+				enum devlink_sb_pool_type pool_type,
+				enum devlink_command cmd,
+				u32 portid, u32 seq, int flags)
+{
+	const struct devlink_ops *ops = devlink->ops;
+	u16 pool_index;
+	u32 threshold;
+	void *hdr;
+	int err;
+
+	err = ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index,
+				       tc_index, pool_type,
+				       &pool_index, &threshold);
+	if (err)
+		return err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index))
+		goto nla_put_failure;
+	if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type))
+		goto nla_put_failure;
+	if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
+		goto nla_put_failure;
+
+	if (ops->sb_occ_tc_port_bind_get) {
+		u32 cur;
+		u32 max;
+
+		err = ops->sb_occ_tc_port_bind_get(devlink_port,
+						   devlink_sb->index,
+						   tc_index, pool_type,
+						   &cur, &max);
+		if (err && err != -EOPNOTSUPP)
+			return err;
+		if (!err) {
+			if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
+				goto nla_put_failure;
+			if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
+				goto nla_put_failure;
+		}
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+int devlink_nl_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[1];
+	struct devlink *devlink = devlink_port->devlink;
+	struct devlink_sb *devlink_sb;
+	struct sk_buff *msg;
+	enum devlink_sb_pool_type pool_type;
+	u16 tc_index;
+	int err;
+
+	devlink_sb = devlink_sb_get_from_info(devlink, info);
+	if (IS_ERR(devlink_sb))
+		return PTR_ERR(devlink_sb);
+
+	err = devlink_sb_pool_type_get_from_info(info, &pool_type);
+	if (err)
+		return err;
+
+	err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
+						pool_type, &tc_index);
+	if (err)
+		return err;
+
+	if (!devlink->ops->sb_tc_pool_bind_get)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port,
+					      devlink_sb, tc_index, pool_type,
+					      DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+					      info->snd_portid,
+					      info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
+					int start, int *p_idx,
+					struct devlink *devlink,
+					struct devlink_sb *devlink_sb,
+					u32 portid, u32 seq, int flags)
+{
+	struct devlink_port *devlink_port;
+	unsigned long port_index;
+	u16 tc_index;
+	int err;
+
+	xa_for_each(&devlink->ports, port_index, devlink_port) {
+		for (tc_index = 0;
+		     tc_index < devlink_sb->ingress_tc_count; tc_index++) {
+			if (*p_idx < start) {
+				(*p_idx)++;
+				continue;
+			}
+			err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
+							      devlink_port,
+							      devlink_sb,
+							      tc_index,
+							      DEVLINK_SB_POOL_TYPE_INGRESS,
+							      DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+							      portid, seq,
+							      flags);
+			if (err)
+				return err;
+			(*p_idx)++;
+		}
+		for (tc_index = 0;
+		     tc_index < devlink_sb->egress_tc_count; tc_index++) {
+			if (*p_idx < start) {
+				(*p_idx)++;
+				continue;
+			}
+			err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
+							      devlink_port,
+							      devlink_sb,
+							      tc_index,
+							      DEVLINK_SB_POOL_TYPE_EGRESS,
+							      DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+							      portid, seq,
+							      flags);
+			if (err)
+				return err;
+			(*p_idx)++;
+		}
+	}
+	return 0;
+}
+
+static int devlink_nl_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg,
+						   struct devlink *devlink,
+						   struct netlink_callback *cb,
+						   int flags)
+{
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct devlink_sb *devlink_sb;
+	int idx = 0;
+	int err = 0;
+
+	if (!devlink->ops->sb_tc_pool_bind_get)
+		return 0;
+
+	list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+		err = __sb_tc_pool_bind_get_dumpit(msg, state->idx, &idx,
+						   devlink, devlink_sb,
+						   NETLINK_CB(cb->skb).portid,
+						   cb->nlh->nlmsg_seq, flags);
+		if (err == -EOPNOTSUPP) {
+			err = 0;
+		} else if (err) {
+			state->idx = idx;
+			break;
+		}
+	}
+
+	return err;
+}
+
+int devlink_nl_sb_tc_pool_bind_get_dumpit(struct sk_buff *skb,
+					  struct netlink_callback *cb)
+{
+	return devlink_nl_dumpit(skb, cb,
+				 devlink_nl_sb_tc_pool_bind_get_dump_one);
+}
+
+static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
+				       unsigned int sb_index, u16 tc_index,
+				       enum devlink_sb_pool_type pool_type,
+				       u16 pool_index, u32 threshold,
+				       struct netlink_ext_ack *extack)
+
+{
+	const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+	if (ops->sb_tc_pool_bind_set)
+		return ops->sb_tc_pool_bind_set(devlink_port, sb_index,
+						tc_index, pool_type,
+						pool_index, threshold, extack);
+	return -EOPNOTSUPP;
+}
+
+int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
+					    struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[1];
+	struct devlink *devlink = info->user_ptr[0];
+	enum devlink_sb_pool_type pool_type;
+	struct devlink_sb *devlink_sb;
+	u16 tc_index;
+	u16 pool_index;
+	u32 threshold;
+	int err;
+
+	devlink_sb = devlink_sb_get_from_info(devlink, info);
+	if (IS_ERR(devlink_sb))
+		return PTR_ERR(devlink_sb);
+
+	err = devlink_sb_pool_type_get_from_info(info, &pool_type);
+	if (err)
+		return err;
+
+	err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
+						pool_type, &tc_index);
+	if (err)
+		return err;
+
+	err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+						  &pool_index);
+	if (err)
+		return err;
+
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
+		return -EINVAL;
+
+	threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
+	return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index,
+					   tc_index, pool_type,
+					   pool_index, threshold, info->extack);
+}
+
+int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	const struct devlink_ops *ops = devlink->ops;
+	struct devlink_sb *devlink_sb;
+
+	devlink_sb = devlink_sb_get_from_info(devlink, info);
+	if (IS_ERR(devlink_sb))
+		return PTR_ERR(devlink_sb);
+
+	if (ops->sb_occ_snapshot)
+		return ops->sb_occ_snapshot(devlink, devlink_sb->index);
+	return -EOPNOTSUPP;
+}
+
+int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
+					 struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	const struct devlink_ops *ops = devlink->ops;
+	struct devlink_sb *devlink_sb;
+
+	devlink_sb = devlink_sb_get_from_info(devlink, info);
+	if (IS_ERR(devlink_sb))
+		return PTR_ERR(devlink_sb);
+
+	if (ops->sb_occ_max_clear)
+		return ops->sb_occ_max_clear(devlink, devlink_sb->index);
+	return -EOPNOTSUPP;
+}
+
+int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
+		     u32 size, u16 ingress_pools_count,
+		     u16 egress_pools_count, u16 ingress_tc_count,
+		     u16 egress_tc_count)
+{
+	struct devlink_sb *devlink_sb;
+
+	lockdep_assert_held(&devlink->lock);
+
+	if (devlink_sb_index_exists(devlink, sb_index))
+		return -EEXIST;
+
+	devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL);
+	if (!devlink_sb)
+		return -ENOMEM;
+	devlink_sb->index = sb_index;
+	devlink_sb->size = size;
+	devlink_sb->ingress_pools_count = ingress_pools_count;
+	devlink_sb->egress_pools_count = egress_pools_count;
+	devlink_sb->ingress_tc_count = ingress_tc_count;
+	devlink_sb->egress_tc_count = egress_tc_count;
+	list_add_tail(&devlink_sb->list, &devlink->sb_list);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devl_sb_register);
+
+int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
+			u32 size, u16 ingress_pools_count,
+			u16 egress_pools_count, u16 ingress_tc_count,
+			u16 egress_tc_count)
+{
+	int err;
+
+	devl_lock(devlink);
+	err = devl_sb_register(devlink, sb_index, size, ingress_pools_count,
+			       egress_pools_count, ingress_tc_count,
+			       egress_tc_count);
+	devl_unlock(devlink);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_sb_register);
+
+void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+{
+	struct devlink_sb *devlink_sb;
+
+	lockdep_assert_held(&devlink->lock);
+
+	devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
+	WARN_ON(!devlink_sb);
+	list_del(&devlink_sb->list);
+	kfree(devlink_sb);
+}
+EXPORT_SYMBOL_GPL(devl_sb_unregister);
+
+void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+{
+	devl_lock(devlink);
+	devl_sb_unregister(devlink, sb_index);
+	devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_sb_unregister);

From 2475ed158c478c624d8fbc8d639d344a960c1ad8 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:46 +0200
Subject: [PATCH 2000/2047] devlink: move and rename
 devlink_dpipe_send_and_alloc_skb() helper

Since both dpipe and resource code is using this helper, in preparation
for code split to separate files, move
devlink_dpipe_send_and_alloc_skb() helper into netlink.c. Rename it on
the way.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-5-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/devl_internal.h |  2 ++
 net/devlink/leftover.c      | 34 +++++++++-------------------------
 net/devlink/netlink.c       | 15 +++++++++++++++
 3 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index e1a6b7a763b8..66c94957e96c 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -150,6 +150,8 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
 	return 0;
 }
 
+int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info);
+
 /* Notify */
 void devlink_notify(struct devlink *devlink, enum devlink_command cmd);
 void devlink_ports_notify_register(struct devlink *devlink);
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 795bfdd41103..1bcd4192099e 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -1277,22 +1277,6 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb,
-					    struct genl_info *info)
-{
-	int err;
-
-	if (*pskb) {
-		err = genlmsg_reply(*pskb, info);
-		if (err)
-			return err;
-	}
-	*pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!*pskb)
-		return -ENOMEM;
-	return 0;
-}
-
 static int devlink_dpipe_tables_fill(struct genl_info *info,
 				     enum devlink_command cmd, int flags,
 				     struct list_head *dpipe_tables,
@@ -1311,7 +1295,7 @@ static int devlink_dpipe_tables_fill(struct genl_info *info,
 	table = list_first_entry(dpipe_tables,
 				 struct devlink_dpipe_table, list);
 start_again:
-	err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+	err = devlink_nl_msg_reply_and_new(&skb, info);
 	if (err)
 		return err;
 
@@ -1358,7 +1342,7 @@ send_done:
 	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
 			NLMSG_DONE, 0, flags | NLM_F_MULTI);
 	if (!nlh) {
-		err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+		err = devlink_nl_msg_reply_and_new(&skb, info);
 		if (err)
 			return err;
 		goto send_done;
@@ -1551,8 +1535,8 @@ int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
 	struct devlink *devlink;
 	int err;
 
-	err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb,
-					       dump_ctx->info);
+	err = devlink_nl_msg_reply_and_new(&dump_ctx->skb,
+					   dump_ctx->info);
 	if (err)
 		return err;
 
@@ -1638,7 +1622,7 @@ send_done:
 	nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
 			NLMSG_DONE, 0, flags | NLM_F_MULTI);
 	if (!nlh) {
-		err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
+		err = devlink_nl_msg_reply_and_new(&dump_ctx.skb, info);
 		if (err)
 			return err;
 		goto send_done;
@@ -1746,7 +1730,7 @@ static int devlink_dpipe_headers_fill(struct genl_info *info,
 
 	i = 0;
 start_again:
-	err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+	err = devlink_nl_msg_reply_and_new(&skb, info);
 	if (err)
 		return err;
 
@@ -1782,7 +1766,7 @@ send_done:
 	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
 			NLMSG_DONE, 0, flags | NLM_F_MULTI);
 	if (!nlh) {
-		err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+		err = devlink_nl_msg_reply_and_new(&skb, info);
 		if (err)
 			return err;
 		goto send_done;
@@ -2047,7 +2031,7 @@ static int devlink_resource_fill(struct genl_info *info,
 	resource = list_first_entry(&devlink->resource_list,
 				    struct devlink_resource, list);
 start_again:
-	err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+	err = devlink_nl_msg_reply_and_new(&skb, info);
 	if (err)
 		return err;
 
@@ -2086,7 +2070,7 @@ send_done:
 	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
 			NLMSG_DONE, 0, flags | NLM_F_MULTI);
 	if (!nlh) {
-		err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+		err = devlink_nl_msg_reply_and_new(&skb, info);
 		if (err)
 			return err;
 		goto send_done;
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
index 72a5005a64cd..5f57afd31dea 100644
--- a/net/devlink/netlink.c
+++ b/net/devlink/netlink.c
@@ -82,6 +82,21 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG },
 };
 
+int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info)
+{
+	int err;
+
+	if (*msg) {
+		err = genlmsg_reply(*msg, info);
+		if (err)
+			return err;
+	}
+	*msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!*msg)
+		return -ENOMEM;
+	return 0;
+}
+
 struct devlink *
 devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
 {

From a9fd44b15fc5625ee0a38661e773444f6bf31b3d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:47 +0200
Subject: [PATCH 2001/2047] devlink: push dpipe related code into separate file

Cut out another chunk from leftover.c and put dpipe related code
into a separate file.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-6-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/Makefile        |   3 +-
 net/devlink/devl_internal.h |   7 +
 net/devlink/dpipe.c         | 917 ++++++++++++++++++++++++++++++++++++
 net/devlink/leftover.c      | 894 -----------------------------------
 4 files changed, 926 insertions(+), 895 deletions(-)
 create mode 100644 net/devlink/dpipe.c

diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index dcc508af48a9..122de1d565d2 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o port.o sb.o health.o \
+obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
+	 health.o
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 66c94957e96c..72653e1dae80 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -231,6 +231,13 @@ int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
 					struct genl_info *info);
 int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
 					 struct genl_info *info);
+int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
+				     struct genl_info *info);
+int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
+				     struct genl_info *info);
+int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
+					    struct genl_info *info);
 int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
 					    struct genl_info *info);
 int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c
new file mode 100644
index 000000000000..431227c412e5
--- /dev/null
+++ b/net/devlink/dpipe.c
@@ -0,0 +1,917 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
+	{
+		.name = "destination mac",
+		.id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
+		.bitwidth = 48,
+	},
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
+	.name = "ethernet",
+	.id = DEVLINK_DPIPE_HEADER_ETHERNET,
+	.fields = devlink_dpipe_fields_ethernet,
+	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
+	.global = true,
+};
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ethernet);
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
+	{
+		.name = "destination ip",
+		.id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
+		.bitwidth = 32,
+	},
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
+	.name = "ipv4",
+	.id = DEVLINK_DPIPE_HEADER_IPV4,
+	.fields = devlink_dpipe_fields_ipv4,
+	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
+	.global = true,
+};
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv4);
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
+	{
+		.name = "destination ip",
+		.id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
+		.bitwidth = 128,
+	},
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
+	.name = "ipv6",
+	.id = DEVLINK_DPIPE_HEADER_IPV6,
+	.fields = devlink_dpipe_fields_ipv6,
+	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
+	.global = true,
+};
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv6);
+
+int devlink_dpipe_match_put(struct sk_buff *skb,
+			    struct devlink_dpipe_match *match)
+{
+	struct devlink_dpipe_header *header = match->header;
+	struct devlink_dpipe_field *field = &header->fields[match->field_id];
+	struct nlattr *match_attr;
+
+	match_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_MATCH);
+	if (!match_attr)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) ||
+	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) ||
+	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+	    nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, match_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, match_attr);
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_match_put);
+
+static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table,
+				     struct sk_buff *skb)
+{
+	struct nlattr *matches_attr;
+
+	matches_attr = nla_nest_start_noflag(skb,
+					     DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
+	if (!matches_attr)
+		return -EMSGSIZE;
+
+	if (table->table_ops->matches_dump(table->priv, skb))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, matches_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, matches_attr);
+	return -EMSGSIZE;
+}
+
+int devlink_dpipe_action_put(struct sk_buff *skb,
+			     struct devlink_dpipe_action *action)
+{
+	struct devlink_dpipe_header *header = action->header;
+	struct devlink_dpipe_field *field = &header->fields[action->field_id];
+	struct nlattr *action_attr;
+
+	action_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ACTION);
+	if (!action_attr)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) ||
+	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) ||
+	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+	    nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, action_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, action_attr);
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_action_put);
+
+static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table,
+				     struct sk_buff *skb)
+{
+	struct nlattr *actions_attr;
+
+	actions_attr = nla_nest_start_noflag(skb,
+					     DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
+	if (!actions_attr)
+		return -EMSGSIZE;
+
+	if (table->table_ops->actions_dump(table->priv, skb))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, actions_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, actions_attr);
+	return -EMSGSIZE;
+}
+
+static int devlink_dpipe_table_put(struct sk_buff *skb,
+				   struct devlink_dpipe_table *table)
+{
+	struct nlattr *table_attr;
+	u64 table_size;
+
+	table_size = table->table_ops->size_get(table->priv);
+	table_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLE);
+	if (!table_attr)
+		return -EMSGSIZE;
+
+	if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
+	    nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size,
+			      DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
+		       table->counters_enabled))
+		goto nla_put_failure;
+
+	if (table->resource_valid) {
+		if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
+				      table->resource_id, DEVLINK_ATTR_PAD) ||
+		    nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
+				      table->resource_units, DEVLINK_ATTR_PAD))
+			goto nla_put_failure;
+	}
+	if (devlink_dpipe_matches_put(table, skb))
+		goto nla_put_failure;
+
+	if (devlink_dpipe_actions_put(table, skb))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, table_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, table_attr);
+	return -EMSGSIZE;
+}
+
+static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb,
+					    struct genl_info *info)
+{
+	int err;
+
+	if (*pskb) {
+		err = genlmsg_reply(*pskb, info);
+		if (err)
+			return err;
+	}
+	*pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!*pskb)
+		return -ENOMEM;
+	return 0;
+}
+
+static int devlink_dpipe_tables_fill(struct genl_info *info,
+				     enum devlink_command cmd, int flags,
+				     struct list_head *dpipe_tables,
+				     const char *table_name)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_dpipe_table *table;
+	struct nlattr *tables_attr;
+	struct sk_buff *skb = NULL;
+	struct nlmsghdr *nlh;
+	bool incomplete;
+	void *hdr;
+	int i;
+	int err;
+
+	table = list_first_entry(dpipe_tables,
+				 struct devlink_dpipe_table, list);
+start_again:
+	err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+	if (err)
+		return err;
+
+	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+			  &devlink_nl_family, NLM_F_MULTI, cmd);
+	if (!hdr) {
+		nlmsg_free(skb);
+		return -EMSGSIZE;
+	}
+
+	if (devlink_nl_put_handle(skb, devlink))
+		goto nla_put_failure;
+	tables_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLES);
+	if (!tables_attr)
+		goto nla_put_failure;
+
+	i = 0;
+	incomplete = false;
+	list_for_each_entry_from(table, dpipe_tables, list) {
+		if (!table_name) {
+			err = devlink_dpipe_table_put(skb, table);
+			if (err) {
+				if (!i)
+					goto err_table_put;
+				incomplete = true;
+				break;
+			}
+		} else {
+			if (!strcmp(table->name, table_name)) {
+				err = devlink_dpipe_table_put(skb, table);
+				if (err)
+					break;
+			}
+		}
+		i++;
+	}
+
+	nla_nest_end(skb, tables_attr);
+	genlmsg_end(skb, hdr);
+	if (incomplete)
+		goto start_again;
+
+send_done:
+	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+			NLMSG_DONE, 0, flags | NLM_F_MULTI);
+	if (!nlh) {
+		err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+		if (err)
+			return err;
+		goto send_done;
+	}
+
+	return genlmsg_reply(skb, info);
+
+nla_put_failure:
+	err = -EMSGSIZE;
+err_table_put:
+	nlmsg_free(skb);
+	return err;
+}
+
+int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	const char *table_name =  NULL;
+
+	if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+		table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+
+	return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0,
+					 &devlink->dpipe_table_list,
+					 table_name);
+}
+
+static int devlink_dpipe_value_put(struct sk_buff *skb,
+				   struct devlink_dpipe_value *value)
+{
+	if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE,
+		    value->value_size, value->value))
+		return -EMSGSIZE;
+	if (value->mask)
+		if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK,
+			    value->value_size, value->mask))
+			return -EMSGSIZE;
+	if (value->mapping_valid)
+		if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING,
+				value->mapping_value))
+			return -EMSGSIZE;
+	return 0;
+}
+
+static int devlink_dpipe_action_value_put(struct sk_buff *skb,
+					  struct devlink_dpipe_value *value)
+{
+	if (!value->action)
+		return -EINVAL;
+	if (devlink_dpipe_action_put(skb, value->action))
+		return -EMSGSIZE;
+	if (devlink_dpipe_value_put(skb, value))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static int devlink_dpipe_action_values_put(struct sk_buff *skb,
+					   struct devlink_dpipe_value *values,
+					   unsigned int values_count)
+{
+	struct nlattr *action_attr;
+	int i;
+	int err;
+
+	for (i = 0; i < values_count; i++) {
+		action_attr = nla_nest_start_noflag(skb,
+						    DEVLINK_ATTR_DPIPE_ACTION_VALUE);
+		if (!action_attr)
+			return -EMSGSIZE;
+		err = devlink_dpipe_action_value_put(skb, &values[i]);
+		if (err)
+			goto err_action_value_put;
+		nla_nest_end(skb, action_attr);
+	}
+	return 0;
+
+err_action_value_put:
+	nla_nest_cancel(skb, action_attr);
+	return err;
+}
+
+static int devlink_dpipe_match_value_put(struct sk_buff *skb,
+					 struct devlink_dpipe_value *value)
+{
+	if (!value->match)
+		return -EINVAL;
+	if (devlink_dpipe_match_put(skb, value->match))
+		return -EMSGSIZE;
+	if (devlink_dpipe_value_put(skb, value))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static int devlink_dpipe_match_values_put(struct sk_buff *skb,
+					  struct devlink_dpipe_value *values,
+					  unsigned int values_count)
+{
+	struct nlattr *match_attr;
+	int i;
+	int err;
+
+	for (i = 0; i < values_count; i++) {
+		match_attr = nla_nest_start_noflag(skb,
+						   DEVLINK_ATTR_DPIPE_MATCH_VALUE);
+		if (!match_attr)
+			return -EMSGSIZE;
+		err = devlink_dpipe_match_value_put(skb, &values[i]);
+		if (err)
+			goto err_match_value_put;
+		nla_nest_end(skb, match_attr);
+	}
+	return 0;
+
+err_match_value_put:
+	nla_nest_cancel(skb, match_attr);
+	return err;
+}
+
+static int devlink_dpipe_entry_put(struct sk_buff *skb,
+				   struct devlink_dpipe_entry *entry)
+{
+	struct nlattr *entry_attr, *matches_attr, *actions_attr;
+	int err;
+
+	entry_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ENTRY);
+	if (!entry_attr)
+		return  -EMSGSIZE;
+
+	if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
+			      DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+	if (entry->counter_valid)
+		if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
+				      entry->counter, DEVLINK_ATTR_PAD))
+			goto nla_put_failure;
+
+	matches_attr = nla_nest_start_noflag(skb,
+					     DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
+	if (!matches_attr)
+		goto nla_put_failure;
+
+	err = devlink_dpipe_match_values_put(skb, entry->match_values,
+					     entry->match_values_count);
+	if (err) {
+		nla_nest_cancel(skb, matches_attr);
+		goto err_match_values_put;
+	}
+	nla_nest_end(skb, matches_attr);
+
+	actions_attr = nla_nest_start_noflag(skb,
+					     DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
+	if (!actions_attr)
+		goto nla_put_failure;
+
+	err = devlink_dpipe_action_values_put(skb, entry->action_values,
+					      entry->action_values_count);
+	if (err) {
+		nla_nest_cancel(skb, actions_attr);
+		goto err_action_values_put;
+	}
+	nla_nest_end(skb, actions_attr);
+
+	nla_nest_end(skb, entry_attr);
+	return 0;
+
+nla_put_failure:
+	err = -EMSGSIZE;
+err_match_values_put:
+err_action_values_put:
+	nla_nest_cancel(skb, entry_attr);
+	return err;
+}
+
+static struct devlink_dpipe_table *
+devlink_dpipe_table_find(struct list_head *dpipe_tables,
+			 const char *table_name, struct devlink *devlink)
+{
+	struct devlink_dpipe_table *table;
+
+	list_for_each_entry_rcu(table, dpipe_tables, list,
+				lockdep_is_held(&devlink->lock)) {
+		if (!strcmp(table->name, table_name))
+			return table;
+	}
+	return NULL;
+}
+
+int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+	struct devlink *devlink;
+	int err;
+
+	err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb,
+					       dump_ctx->info);
+	if (err)
+		return err;
+
+	dump_ctx->hdr = genlmsg_put(dump_ctx->skb,
+				    dump_ctx->info->snd_portid,
+				    dump_ctx->info->snd_seq,
+				    &devlink_nl_family, NLM_F_MULTI,
+				    dump_ctx->cmd);
+	if (!dump_ctx->hdr)
+		goto nla_put_failure;
+
+	devlink = dump_ctx->info->user_ptr[0];
+	if (devlink_nl_put_handle(dump_ctx->skb, devlink))
+		goto nla_put_failure;
+	dump_ctx->nest = nla_nest_start_noflag(dump_ctx->skb,
+					       DEVLINK_ATTR_DPIPE_ENTRIES);
+	if (!dump_ctx->nest)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	nlmsg_free(dump_ctx->skb);
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare);
+
+int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
+				   struct devlink_dpipe_entry *entry)
+{
+	return devlink_dpipe_entry_put(dump_ctx->skb, entry);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append);
+
+int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+	nla_nest_end(dump_ctx->skb, dump_ctx->nest);
+	genlmsg_end(dump_ctx->skb, dump_ctx->hdr);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
+
+void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
+
+{
+	unsigned int value_count, value_index;
+	struct devlink_dpipe_value *value;
+
+	value = entry->action_values;
+	value_count = entry->action_values_count;
+	for (value_index = 0; value_index < value_count; value_index++) {
+		kfree(value[value_index].value);
+		kfree(value[value_index].mask);
+	}
+
+	value = entry->match_values;
+	value_count = entry->match_values_count;
+	for (value_index = 0; value_index < value_count; value_index++) {
+		kfree(value[value_index].value);
+		kfree(value[value_index].mask);
+	}
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_clear);
+
+static int devlink_dpipe_entries_fill(struct genl_info *info,
+				      enum devlink_command cmd, int flags,
+				      struct devlink_dpipe_table *table)
+{
+	struct devlink_dpipe_dump_ctx dump_ctx;
+	struct nlmsghdr *nlh;
+	int err;
+
+	dump_ctx.skb = NULL;
+	dump_ctx.cmd = cmd;
+	dump_ctx.info = info;
+
+	err = table->table_ops->entries_dump(table->priv,
+					     table->counters_enabled,
+					     &dump_ctx);
+	if (err)
+		return err;
+
+send_done:
+	nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
+			NLMSG_DONE, 0, flags | NLM_F_MULTI);
+	if (!nlh) {
+		err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
+		if (err)
+			return err;
+		goto send_done;
+	}
+	return genlmsg_reply(dump_ctx.skb, info);
+}
+
+int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_dpipe_table *table;
+	const char *table_name;
+
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME))
+		return -EINVAL;
+
+	table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+	table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+					 table_name, devlink);
+	if (!table)
+		return -EINVAL;
+
+	if (!table->table_ops->entries_dump)
+		return -EINVAL;
+
+	return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET,
+					  0, table);
+}
+
+static int devlink_dpipe_fields_put(struct sk_buff *skb,
+				    const struct devlink_dpipe_header *header)
+{
+	struct devlink_dpipe_field *field;
+	struct nlattr *field_attr;
+	int i;
+
+	for (i = 0; i < header->fields_count; i++) {
+		field = &header->fields[i];
+		field_attr = nla_nest_start_noflag(skb,
+						   DEVLINK_ATTR_DPIPE_FIELD);
+		if (!field_attr)
+			return -EMSGSIZE;
+		if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) ||
+		    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+		    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) ||
+		    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type))
+			goto nla_put_failure;
+		nla_nest_end(skb, field_attr);
+	}
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, field_attr);
+	return -EMSGSIZE;
+}
+
+static int devlink_dpipe_header_put(struct sk_buff *skb,
+				    struct devlink_dpipe_header *header)
+{
+	struct nlattr *fields_attr, *header_attr;
+	int err;
+
+	header_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADER);
+	if (!header_attr)
+		return -EMSGSIZE;
+
+	if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) ||
+	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+	    nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+		goto nla_put_failure;
+
+	fields_attr = nla_nest_start_noflag(skb,
+					    DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
+	if (!fields_attr)
+		goto nla_put_failure;
+
+	err = devlink_dpipe_fields_put(skb, header);
+	if (err) {
+		nla_nest_cancel(skb, fields_attr);
+		goto nla_put_failure;
+	}
+	nla_nest_end(skb, fields_attr);
+	nla_nest_end(skb, header_attr);
+	return 0;
+
+nla_put_failure:
+	err = -EMSGSIZE;
+	nla_nest_cancel(skb, header_attr);
+	return err;
+}
+
+static int devlink_dpipe_headers_fill(struct genl_info *info,
+				      enum devlink_command cmd, int flags,
+				      struct devlink_dpipe_headers *
+				      dpipe_headers)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct nlattr *headers_attr;
+	struct sk_buff *skb = NULL;
+	struct nlmsghdr *nlh;
+	void *hdr;
+	int i, j;
+	int err;
+
+	i = 0;
+start_again:
+	err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+	if (err)
+		return err;
+
+	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+			  &devlink_nl_family, NLM_F_MULTI, cmd);
+	if (!hdr) {
+		nlmsg_free(skb);
+		return -EMSGSIZE;
+	}
+
+	if (devlink_nl_put_handle(skb, devlink))
+		goto nla_put_failure;
+	headers_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADERS);
+	if (!headers_attr)
+		goto nla_put_failure;
+
+	j = 0;
+	for (; i < dpipe_headers->headers_count; i++) {
+		err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]);
+		if (err) {
+			if (!j)
+				goto err_table_put;
+			break;
+		}
+		j++;
+	}
+	nla_nest_end(skb, headers_attr);
+	genlmsg_end(skb, hdr);
+	if (i != dpipe_headers->headers_count)
+		goto start_again;
+
+send_done:
+	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+			NLMSG_DONE, 0, flags | NLM_F_MULTI);
+	if (!nlh) {
+		err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+		if (err)
+			return err;
+		goto send_done;
+	}
+	return genlmsg_reply(skb, info);
+
+nla_put_failure:
+	err = -EMSGSIZE;
+err_table_put:
+	nlmsg_free(skb);
+	return err;
+}
+
+int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+
+	if (!devlink->dpipe_headers)
+		return -EOPNOTSUPP;
+	return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET,
+					  0, devlink->dpipe_headers);
+}
+
+static int devlink_dpipe_table_counters_set(struct devlink *devlink,
+					    const char *table_name,
+					    bool enable)
+{
+	struct devlink_dpipe_table *table;
+
+	table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+					 table_name, devlink);
+	if (!table)
+		return -EINVAL;
+
+	if (table->counter_control_extern)
+		return -EOPNOTSUPP;
+
+	if (!(table->counters_enabled ^ enable))
+		return 0;
+
+	table->counters_enabled = enable;
+	if (table->table_ops->counters_set_update)
+		table->table_ops->counters_set_update(table->priv, enable);
+	return 0;
+}
+
+int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
+					    struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	const char *table_name;
+	bool counters_enable;
+
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME) ||
+	    GENL_REQ_ATTR_CHECK(info,
+				DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED))
+		return -EINVAL;
+
+	table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+	counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]);
+
+	return devlink_dpipe_table_counters_set(devlink, table_name,
+						counters_enable);
+}
+
+/**
+ * devl_dpipe_headers_register - register dpipe headers
+ *
+ * @devlink: devlink
+ * @dpipe_headers: dpipe header array
+ *
+ * Register the headers supported by hardware.
+ */
+void devl_dpipe_headers_register(struct devlink *devlink,
+				 struct devlink_dpipe_headers *dpipe_headers)
+{
+	lockdep_assert_held(&devlink->lock);
+
+	devlink->dpipe_headers = dpipe_headers;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_headers_register);
+
+/**
+ * devl_dpipe_headers_unregister - unregister dpipe headers
+ *
+ * @devlink: devlink
+ *
+ * Unregister the headers supported by hardware.
+ */
+void devl_dpipe_headers_unregister(struct devlink *devlink)
+{
+	lockdep_assert_held(&devlink->lock);
+
+	devlink->dpipe_headers = NULL;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_headers_unregister);
+
+/**
+ *	devlink_dpipe_table_counter_enabled - check if counter allocation
+ *					      required
+ *	@devlink: devlink
+ *	@table_name: tables name
+ *
+ *	Used by driver to check if counter allocation is required.
+ *	After counter allocation is turned on the table entries
+ *	are updated to include counter statistics.
+ *
+ *	After that point on the driver must respect the counter
+ *	state so that each entry added to the table is added
+ *	with a counter.
+ */
+bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
+					 const char *table_name)
+{
+	struct devlink_dpipe_table *table;
+	bool enabled;
+
+	rcu_read_lock();
+	table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+					 table_name, devlink);
+	enabled = false;
+	if (table)
+		enabled = table->counters_enabled;
+	rcu_read_unlock();
+	return enabled;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
+
+/**
+ * devl_dpipe_table_register - register dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ * @table_ops: table ops
+ * @priv: priv
+ * @counter_control_extern: external control for counters
+ */
+int devl_dpipe_table_register(struct devlink *devlink,
+			      const char *table_name,
+			      struct devlink_dpipe_table_ops *table_ops,
+			      void *priv, bool counter_control_extern)
+{
+	struct devlink_dpipe_table *table;
+
+	lockdep_assert_held(&devlink->lock);
+
+	if (WARN_ON(!table_ops->size_get))
+		return -EINVAL;
+
+	if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
+				     devlink))
+		return -EEXIST;
+
+	table = kzalloc(sizeof(*table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	table->name = table_name;
+	table->table_ops = table_ops;
+	table->priv = priv;
+	table->counter_control_extern = counter_control_extern;
+
+	list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_table_register);
+
+/**
+ * devl_dpipe_table_unregister - unregister dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ */
+void devl_dpipe_table_unregister(struct devlink *devlink,
+				 const char *table_name)
+{
+	struct devlink_dpipe_table *table;
+
+	lockdep_assert_held(&devlink->lock);
+
+	table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+					 table_name, devlink);
+	if (!table)
+		return;
+	list_del_rcu(&table->list);
+	kfree_rcu(table, rcu);
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_table_unregister);
+
+/**
+ * devl_dpipe_table_resource_set - set the resource id
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ * @resource_id: resource id
+ * @resource_units: number of resource's units consumed per table's entry
+ */
+int devl_dpipe_table_resource_set(struct devlink *devlink,
+				  const char *table_name, u64 resource_id,
+				  u64 resource_units)
+{
+	struct devlink_dpipe_table *table;
+
+	table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+					 table_name, devlink);
+	if (!table)
+		return -EINVAL;
+
+	table->resource_id = resource_id;
+	table->resource_units = resource_units;
+	table->resource_valid = true;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_table_resource_set);
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 1bcd4192099e..f71201e6b5e1 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -62,57 +62,6 @@ struct devlink_resource {
 	void *occ_get_priv;
 };
 
-static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
-	{
-		.name = "destination mac",
-		.id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
-		.bitwidth = 48,
-	},
-};
-
-struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
-	.name = "ethernet",
-	.id = DEVLINK_DPIPE_HEADER_ETHERNET,
-	.fields = devlink_dpipe_fields_ethernet,
-	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
-	.global = true,
-};
-EXPORT_SYMBOL_GPL(devlink_dpipe_header_ethernet);
-
-static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
-	{
-		.name = "destination ip",
-		.id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
-		.bitwidth = 32,
-	},
-};
-
-struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
-	.name = "ipv4",
-	.id = DEVLINK_DPIPE_HEADER_IPV4,
-	.fields = devlink_dpipe_fields_ipv4,
-	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
-	.global = true,
-};
-EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv4);
-
-static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
-	{
-		.name = "destination ip",
-		.id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
-		.bitwidth = 128,
-	},
-};
-
-struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
-	.name = "ipv6",
-	.id = DEVLINK_DPIPE_HEADER_IPV6,
-	.fields = devlink_dpipe_fields_ipv6,
-	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
-	.global = true,
-};
-EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv6);
-
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
@@ -1141,698 +1090,6 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
 	return 0;
 }
 
-int devlink_dpipe_match_put(struct sk_buff *skb,
-			    struct devlink_dpipe_match *match)
-{
-	struct devlink_dpipe_header *header = match->header;
-	struct devlink_dpipe_field *field = &header->fields[match->field_id];
-	struct nlattr *match_attr;
-
-	match_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_MATCH);
-	if (!match_attr)
-		return -EMSGSIZE;
-
-	if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) ||
-	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) ||
-	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
-	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
-	    nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
-		goto nla_put_failure;
-
-	nla_nest_end(skb, match_attr);
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, match_attr);
-	return -EMSGSIZE;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_match_put);
-
-static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table,
-				     struct sk_buff *skb)
-{
-	struct nlattr *matches_attr;
-
-	matches_attr = nla_nest_start_noflag(skb,
-					     DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
-	if (!matches_attr)
-		return -EMSGSIZE;
-
-	if (table->table_ops->matches_dump(table->priv, skb))
-		goto nla_put_failure;
-
-	nla_nest_end(skb, matches_attr);
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, matches_attr);
-	return -EMSGSIZE;
-}
-
-int devlink_dpipe_action_put(struct sk_buff *skb,
-			     struct devlink_dpipe_action *action)
-{
-	struct devlink_dpipe_header *header = action->header;
-	struct devlink_dpipe_field *field = &header->fields[action->field_id];
-	struct nlattr *action_attr;
-
-	action_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ACTION);
-	if (!action_attr)
-		return -EMSGSIZE;
-
-	if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) ||
-	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) ||
-	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
-	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
-	    nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
-		goto nla_put_failure;
-
-	nla_nest_end(skb, action_attr);
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, action_attr);
-	return -EMSGSIZE;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_action_put);
-
-static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table,
-				     struct sk_buff *skb)
-{
-	struct nlattr *actions_attr;
-
-	actions_attr = nla_nest_start_noflag(skb,
-					     DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
-	if (!actions_attr)
-		return -EMSGSIZE;
-
-	if (table->table_ops->actions_dump(table->priv, skb))
-		goto nla_put_failure;
-
-	nla_nest_end(skb, actions_attr);
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, actions_attr);
-	return -EMSGSIZE;
-}
-
-static int devlink_dpipe_table_put(struct sk_buff *skb,
-				   struct devlink_dpipe_table *table)
-{
-	struct nlattr *table_attr;
-	u64 table_size;
-
-	table_size = table->table_ops->size_get(table->priv);
-	table_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLE);
-	if (!table_attr)
-		return -EMSGSIZE;
-
-	if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
-	    nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size,
-			      DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-	if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
-		       table->counters_enabled))
-		goto nla_put_failure;
-
-	if (table->resource_valid) {
-		if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
-				      table->resource_id, DEVLINK_ATTR_PAD) ||
-		    nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
-				      table->resource_units, DEVLINK_ATTR_PAD))
-			goto nla_put_failure;
-	}
-	if (devlink_dpipe_matches_put(table, skb))
-		goto nla_put_failure;
-
-	if (devlink_dpipe_actions_put(table, skb))
-		goto nla_put_failure;
-
-	nla_nest_end(skb, table_attr);
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, table_attr);
-	return -EMSGSIZE;
-}
-
-static int devlink_dpipe_tables_fill(struct genl_info *info,
-				     enum devlink_command cmd, int flags,
-				     struct list_head *dpipe_tables,
-				     const char *table_name)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_dpipe_table *table;
-	struct nlattr *tables_attr;
-	struct sk_buff *skb = NULL;
-	struct nlmsghdr *nlh;
-	bool incomplete;
-	void *hdr;
-	int i;
-	int err;
-
-	table = list_first_entry(dpipe_tables,
-				 struct devlink_dpipe_table, list);
-start_again:
-	err = devlink_nl_msg_reply_and_new(&skb, info);
-	if (err)
-		return err;
-
-	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
-			  &devlink_nl_family, NLM_F_MULTI, cmd);
-	if (!hdr) {
-		nlmsg_free(skb);
-		return -EMSGSIZE;
-	}
-
-	if (devlink_nl_put_handle(skb, devlink))
-		goto nla_put_failure;
-	tables_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLES);
-	if (!tables_attr)
-		goto nla_put_failure;
-
-	i = 0;
-	incomplete = false;
-	list_for_each_entry_from(table, dpipe_tables, list) {
-		if (!table_name) {
-			err = devlink_dpipe_table_put(skb, table);
-			if (err) {
-				if (!i)
-					goto err_table_put;
-				incomplete = true;
-				break;
-			}
-		} else {
-			if (!strcmp(table->name, table_name)) {
-				err = devlink_dpipe_table_put(skb, table);
-				if (err)
-					break;
-			}
-		}
-		i++;
-	}
-
-	nla_nest_end(skb, tables_attr);
-	genlmsg_end(skb, hdr);
-	if (incomplete)
-		goto start_again;
-
-send_done:
-	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
-			NLMSG_DONE, 0, flags | NLM_F_MULTI);
-	if (!nlh) {
-		err = devlink_nl_msg_reply_and_new(&skb, info);
-		if (err)
-			return err;
-		goto send_done;
-	}
-
-	return genlmsg_reply(skb, info);
-
-nla_put_failure:
-	err = -EMSGSIZE;
-err_table_put:
-	nlmsg_free(skb);
-	return err;
-}
-
-static int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb,
-					  struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	const char *table_name =  NULL;
-
-	if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
-		table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
-
-	return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0,
-					 &devlink->dpipe_table_list,
-					 table_name);
-}
-
-static int devlink_dpipe_value_put(struct sk_buff *skb,
-				   struct devlink_dpipe_value *value)
-{
-	if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE,
-		    value->value_size, value->value))
-		return -EMSGSIZE;
-	if (value->mask)
-		if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK,
-			    value->value_size, value->mask))
-			return -EMSGSIZE;
-	if (value->mapping_valid)
-		if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING,
-				value->mapping_value))
-			return -EMSGSIZE;
-	return 0;
-}
-
-static int devlink_dpipe_action_value_put(struct sk_buff *skb,
-					  struct devlink_dpipe_value *value)
-{
-	if (!value->action)
-		return -EINVAL;
-	if (devlink_dpipe_action_put(skb, value->action))
-		return -EMSGSIZE;
-	if (devlink_dpipe_value_put(skb, value))
-		return -EMSGSIZE;
-	return 0;
-}
-
-static int devlink_dpipe_action_values_put(struct sk_buff *skb,
-					   struct devlink_dpipe_value *values,
-					   unsigned int values_count)
-{
-	struct nlattr *action_attr;
-	int i;
-	int err;
-
-	for (i = 0; i < values_count; i++) {
-		action_attr = nla_nest_start_noflag(skb,
-						    DEVLINK_ATTR_DPIPE_ACTION_VALUE);
-		if (!action_attr)
-			return -EMSGSIZE;
-		err = devlink_dpipe_action_value_put(skb, &values[i]);
-		if (err)
-			goto err_action_value_put;
-		nla_nest_end(skb, action_attr);
-	}
-	return 0;
-
-err_action_value_put:
-	nla_nest_cancel(skb, action_attr);
-	return err;
-}
-
-static int devlink_dpipe_match_value_put(struct sk_buff *skb,
-					 struct devlink_dpipe_value *value)
-{
-	if (!value->match)
-		return -EINVAL;
-	if (devlink_dpipe_match_put(skb, value->match))
-		return -EMSGSIZE;
-	if (devlink_dpipe_value_put(skb, value))
-		return -EMSGSIZE;
-	return 0;
-}
-
-static int devlink_dpipe_match_values_put(struct sk_buff *skb,
-					  struct devlink_dpipe_value *values,
-					  unsigned int values_count)
-{
-	struct nlattr *match_attr;
-	int i;
-	int err;
-
-	for (i = 0; i < values_count; i++) {
-		match_attr = nla_nest_start_noflag(skb,
-						   DEVLINK_ATTR_DPIPE_MATCH_VALUE);
-		if (!match_attr)
-			return -EMSGSIZE;
-		err = devlink_dpipe_match_value_put(skb, &values[i]);
-		if (err)
-			goto err_match_value_put;
-		nla_nest_end(skb, match_attr);
-	}
-	return 0;
-
-err_match_value_put:
-	nla_nest_cancel(skb, match_attr);
-	return err;
-}
-
-static int devlink_dpipe_entry_put(struct sk_buff *skb,
-				   struct devlink_dpipe_entry *entry)
-{
-	struct nlattr *entry_attr, *matches_attr, *actions_attr;
-	int err;
-
-	entry_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ENTRY);
-	if (!entry_attr)
-		return  -EMSGSIZE;
-
-	if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
-			      DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-	if (entry->counter_valid)
-		if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
-				      entry->counter, DEVLINK_ATTR_PAD))
-			goto nla_put_failure;
-
-	matches_attr = nla_nest_start_noflag(skb,
-					     DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
-	if (!matches_attr)
-		goto nla_put_failure;
-
-	err = devlink_dpipe_match_values_put(skb, entry->match_values,
-					     entry->match_values_count);
-	if (err) {
-		nla_nest_cancel(skb, matches_attr);
-		goto err_match_values_put;
-	}
-	nla_nest_end(skb, matches_attr);
-
-	actions_attr = nla_nest_start_noflag(skb,
-					     DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
-	if (!actions_attr)
-		goto nla_put_failure;
-
-	err = devlink_dpipe_action_values_put(skb, entry->action_values,
-					      entry->action_values_count);
-	if (err) {
-		nla_nest_cancel(skb, actions_attr);
-		goto err_action_values_put;
-	}
-	nla_nest_end(skb, actions_attr);
-
-	nla_nest_end(skb, entry_attr);
-	return 0;
-
-nla_put_failure:
-	err = -EMSGSIZE;
-err_match_values_put:
-err_action_values_put:
-	nla_nest_cancel(skb, entry_attr);
-	return err;
-}
-
-static struct devlink_dpipe_table *
-devlink_dpipe_table_find(struct list_head *dpipe_tables,
-			 const char *table_name, struct devlink *devlink)
-{
-	struct devlink_dpipe_table *table;
-	list_for_each_entry_rcu(table, dpipe_tables, list,
-				lockdep_is_held(&devlink->lock)) {
-		if (!strcmp(table->name, table_name))
-			return table;
-	}
-	return NULL;
-}
-
-int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
-{
-	struct devlink *devlink;
-	int err;
-
-	err = devlink_nl_msg_reply_and_new(&dump_ctx->skb,
-					   dump_ctx->info);
-	if (err)
-		return err;
-
-	dump_ctx->hdr = genlmsg_put(dump_ctx->skb,
-				    dump_ctx->info->snd_portid,
-				    dump_ctx->info->snd_seq,
-				    &devlink_nl_family, NLM_F_MULTI,
-				    dump_ctx->cmd);
-	if (!dump_ctx->hdr)
-		goto nla_put_failure;
-
-	devlink = dump_ctx->info->user_ptr[0];
-	if (devlink_nl_put_handle(dump_ctx->skb, devlink))
-		goto nla_put_failure;
-	dump_ctx->nest = nla_nest_start_noflag(dump_ctx->skb,
-					       DEVLINK_ATTR_DPIPE_ENTRIES);
-	if (!dump_ctx->nest)
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	nlmsg_free(dump_ctx->skb);
-	return -EMSGSIZE;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare);
-
-int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
-				   struct devlink_dpipe_entry *entry)
-{
-	return devlink_dpipe_entry_put(dump_ctx->skb, entry);
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append);
-
-int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
-{
-	nla_nest_end(dump_ctx->skb, dump_ctx->nest);
-	genlmsg_end(dump_ctx->skb, dump_ctx->hdr);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
-
-void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
-
-{
-	unsigned int value_count, value_index;
-	struct devlink_dpipe_value *value;
-
-	value = entry->action_values;
-	value_count = entry->action_values_count;
-	for (value_index = 0; value_index < value_count; value_index++) {
-		kfree(value[value_index].value);
-		kfree(value[value_index].mask);
-	}
-
-	value = entry->match_values;
-	value_count = entry->match_values_count;
-	for (value_index = 0; value_index < value_count; value_index++) {
-		kfree(value[value_index].value);
-		kfree(value[value_index].mask);
-	}
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_entry_clear);
-
-static int devlink_dpipe_entries_fill(struct genl_info *info,
-				      enum devlink_command cmd, int flags,
-				      struct devlink_dpipe_table *table)
-{
-	struct devlink_dpipe_dump_ctx dump_ctx;
-	struct nlmsghdr *nlh;
-	int err;
-
-	dump_ctx.skb = NULL;
-	dump_ctx.cmd = cmd;
-	dump_ctx.info = info;
-
-	err = table->table_ops->entries_dump(table->priv,
-					     table->counters_enabled,
-					     &dump_ctx);
-	if (err)
-		return err;
-
-send_done:
-	nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
-			NLMSG_DONE, 0, flags | NLM_F_MULTI);
-	if (!nlh) {
-		err = devlink_nl_msg_reply_and_new(&dump_ctx.skb, info);
-		if (err)
-			return err;
-		goto send_done;
-	}
-	return genlmsg_reply(dump_ctx.skb, info);
-}
-
-static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
-					    struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_dpipe_table *table;
-	const char *table_name;
-
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME))
-		return -EINVAL;
-
-	table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
-	table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
-					 table_name, devlink);
-	if (!table)
-		return -EINVAL;
-
-	if (!table->table_ops->entries_dump)
-		return -EINVAL;
-
-	return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET,
-					  0, table);
-}
-
-static int devlink_dpipe_fields_put(struct sk_buff *skb,
-				    const struct devlink_dpipe_header *header)
-{
-	struct devlink_dpipe_field *field;
-	struct nlattr *field_attr;
-	int i;
-
-	for (i = 0; i < header->fields_count; i++) {
-		field = &header->fields[i];
-		field_attr = nla_nest_start_noflag(skb,
-						   DEVLINK_ATTR_DPIPE_FIELD);
-		if (!field_attr)
-			return -EMSGSIZE;
-		if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) ||
-		    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
-		    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) ||
-		    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type))
-			goto nla_put_failure;
-		nla_nest_end(skb, field_attr);
-	}
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(skb, field_attr);
-	return -EMSGSIZE;
-}
-
-static int devlink_dpipe_header_put(struct sk_buff *skb,
-				    struct devlink_dpipe_header *header)
-{
-	struct nlattr *fields_attr, *header_attr;
-	int err;
-
-	header_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADER);
-	if (!header_attr)
-		return -EMSGSIZE;
-
-	if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) ||
-	    nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
-	    nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
-		goto nla_put_failure;
-
-	fields_attr = nla_nest_start_noflag(skb,
-					    DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
-	if (!fields_attr)
-		goto nla_put_failure;
-
-	err = devlink_dpipe_fields_put(skb, header);
-	if (err) {
-		nla_nest_cancel(skb, fields_attr);
-		goto nla_put_failure;
-	}
-	nla_nest_end(skb, fields_attr);
-	nla_nest_end(skb, header_attr);
-	return 0;
-
-nla_put_failure:
-	err = -EMSGSIZE;
-	nla_nest_cancel(skb, header_attr);
-	return err;
-}
-
-static int devlink_dpipe_headers_fill(struct genl_info *info,
-				      enum devlink_command cmd, int flags,
-				      struct devlink_dpipe_headers *
-				      dpipe_headers)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct nlattr *headers_attr;
-	struct sk_buff *skb = NULL;
-	struct nlmsghdr *nlh;
-	void *hdr;
-	int i, j;
-	int err;
-
-	i = 0;
-start_again:
-	err = devlink_nl_msg_reply_and_new(&skb, info);
-	if (err)
-		return err;
-
-	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
-			  &devlink_nl_family, NLM_F_MULTI, cmd);
-	if (!hdr) {
-		nlmsg_free(skb);
-		return -EMSGSIZE;
-	}
-
-	if (devlink_nl_put_handle(skb, devlink))
-		goto nla_put_failure;
-	headers_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADERS);
-	if (!headers_attr)
-		goto nla_put_failure;
-
-	j = 0;
-	for (; i < dpipe_headers->headers_count; i++) {
-		err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]);
-		if (err) {
-			if (!j)
-				goto err_table_put;
-			break;
-		}
-		j++;
-	}
-	nla_nest_end(skb, headers_attr);
-	genlmsg_end(skb, hdr);
-	if (i != dpipe_headers->headers_count)
-		goto start_again;
-
-send_done:
-	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
-			NLMSG_DONE, 0, flags | NLM_F_MULTI);
-	if (!nlh) {
-		err = devlink_nl_msg_reply_and_new(&skb, info);
-		if (err)
-			return err;
-		goto send_done;
-	}
-	return genlmsg_reply(skb, info);
-
-nla_put_failure:
-	err = -EMSGSIZE;
-err_table_put:
-	nlmsg_free(skb);
-	return err;
-}
-
-static int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
-					    struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-
-	if (!devlink->dpipe_headers)
-		return -EOPNOTSUPP;
-	return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET,
-					  0, devlink->dpipe_headers);
-}
-
-static int devlink_dpipe_table_counters_set(struct devlink *devlink,
-					    const char *table_name,
-					    bool enable)
-{
-	struct devlink_dpipe_table *table;
-
-	table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
-					 table_name, devlink);
-	if (!table)
-		return -EINVAL;
-
-	if (table->counter_control_extern)
-		return -EOPNOTSUPP;
-
-	if (!(table->counters_enabled ^ enable))
-		return 0;
-
-	table->counters_enabled = enable;
-	if (table->table_ops->counters_set_update)
-		table->table_ops->counters_set_update(table->priv, enable);
-	return 0;
-}
-
-static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
-						   struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	const char *table_name;
-	bool counters_enable;
-
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME) ||
-	    GENL_REQ_ATTR_CHECK(info,
-				DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED))
-		return -EINVAL;
-
-	table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
-	counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]);
-
-	return devlink_dpipe_table_counters_set(devlink, table_name,
-						counters_enable);
-}
-
 static struct devlink_resource *
 devlink_resource_find(struct devlink *devlink,
 		      struct devlink_resource *resource, u64 resource_id)
@@ -5272,131 +4529,6 @@ void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
 }
 EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
 
-/**
- * devl_dpipe_headers_register - register dpipe headers
- *
- * @devlink: devlink
- * @dpipe_headers: dpipe header array
- *
- * Register the headers supported by hardware.
- */
-void devl_dpipe_headers_register(struct devlink *devlink,
-				 struct devlink_dpipe_headers *dpipe_headers)
-{
-	lockdep_assert_held(&devlink->lock);
-
-	devlink->dpipe_headers = dpipe_headers;
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_headers_register);
-
-/**
- * devl_dpipe_headers_unregister - unregister dpipe headers
- *
- * @devlink: devlink
- *
- * Unregister the headers supported by hardware.
- */
-void devl_dpipe_headers_unregister(struct devlink *devlink)
-{
-	lockdep_assert_held(&devlink->lock);
-
-	devlink->dpipe_headers = NULL;
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_headers_unregister);
-
-/**
- *	devlink_dpipe_table_counter_enabled - check if counter allocation
- *					      required
- *	@devlink: devlink
- *	@table_name: tables name
- *
- *	Used by driver to check if counter allocation is required.
- *	After counter allocation is turned on the table entries
- *	are updated to include counter statistics.
- *
- *	After that point on the driver must respect the counter
- *	state so that each entry added to the table is added
- *	with a counter.
- */
-bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
-					 const char *table_name)
-{
-	struct devlink_dpipe_table *table;
-	bool enabled;
-
-	rcu_read_lock();
-	table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
-					 table_name, devlink);
-	enabled = false;
-	if (table)
-		enabled = table->counters_enabled;
-	rcu_read_unlock();
-	return enabled;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
-
-/**
- * devl_dpipe_table_register - register dpipe table
- *
- * @devlink: devlink
- * @table_name: table name
- * @table_ops: table ops
- * @priv: priv
- * @counter_control_extern: external control for counters
- */
-int devl_dpipe_table_register(struct devlink *devlink,
-			      const char *table_name,
-			      struct devlink_dpipe_table_ops *table_ops,
-			      void *priv, bool counter_control_extern)
-{
-	struct devlink_dpipe_table *table;
-
-	lockdep_assert_held(&devlink->lock);
-
-	if (WARN_ON(!table_ops->size_get))
-		return -EINVAL;
-
-	if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
-				     devlink))
-		return -EEXIST;
-
-	table = kzalloc(sizeof(*table), GFP_KERNEL);
-	if (!table)
-		return -ENOMEM;
-
-	table->name = table_name;
-	table->table_ops = table_ops;
-	table->priv = priv;
-	table->counter_control_extern = counter_control_extern;
-
-	list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_table_register);
-
-/**
- * devl_dpipe_table_unregister - unregister dpipe table
- *
- * @devlink: devlink
- * @table_name: table name
- */
-void devl_dpipe_table_unregister(struct devlink *devlink,
-				 const char *table_name)
-{
-	struct devlink_dpipe_table *table;
-
-	lockdep_assert_held(&devlink->lock);
-
-	table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
-					 table_name, devlink);
-	if (!table)
-		return;
-	list_del_rcu(&table->list);
-	kfree_rcu(table, rcu);
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_table_unregister);
-
 /**
  * devl_resource_register - devlink resource register
  *
@@ -5569,32 +4701,6 @@ int devl_resource_size_get(struct devlink *devlink,
 }
 EXPORT_SYMBOL_GPL(devl_resource_size_get);
 
-/**
- * devl_dpipe_table_resource_set - set the resource id
- *
- * @devlink: devlink
- * @table_name: table name
- * @resource_id: resource id
- * @resource_units: number of resource's units consumed per table's entry
- */
-int devl_dpipe_table_resource_set(struct devlink *devlink,
-				  const char *table_name, u64 resource_id,
-				  u64 resource_units)
-{
-	struct devlink_dpipe_table *table;
-
-	table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
-					 table_name, devlink);
-	if (!table)
-		return -EINVAL;
-
-	table->resource_id = resource_id;
-	table->resource_units = resource_units;
-	table->resource_valid = true;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_table_resource_set);
-
 /**
  * devl_resource_occ_get_register - register occupancy getter
  *

From a9f960074ecd9db292744db2491a0cbbd354e24b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:48 +0200
Subject: [PATCH 2002/2047] devlink: push resource related code into separate
 file

Cut out another chunk from leftover.c and put resource related code
into a separate file.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-7-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/Makefile        |   2 +-
 net/devlink/devl_internal.h |   2 +
 net/devlink/leftover.c      | 574 -----------------------------------
 net/devlink/resource.c      | 579 ++++++++++++++++++++++++++++++++++++
 4 files changed, 582 insertions(+), 575 deletions(-)
 create mode 100644 net/devlink/resource.c

diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index 122de1d565d2..8864cd933acb 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
-	 health.o
+	 resource.o health.o
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 72653e1dae80..e5c0acf7a758 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -238,6 +238,8 @@ int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
 				     struct genl_info *info);
 int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
 					    struct genl_info *info);
+int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info);
 int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
 					    struct genl_info *info);
 int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index f71201e6b5e1..1c65449f2252 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -33,35 +33,6 @@
 
 #include "devl_internal.h"
 
-/**
- * struct devlink_resource - devlink resource
- * @name: name of the resource
- * @id: id, per devlink instance
- * @size: size of the resource
- * @size_new: updated size of the resource, reload is needed
- * @size_valid: valid in case the total size of the resource is valid
- *              including its children
- * @parent: parent resource
- * @size_params: size parameters
- * @list: parent list
- * @resource_list: list of child resources
- * @occ_get: occupancy getter callback
- * @occ_get_priv: occupancy getter callback priv
- */
-struct devlink_resource {
-	const char *name;
-	u64 id;
-	u64 size;
-	u64 size_new;
-	bool size_valid;
-	struct devlink_resource *parent;
-	struct devlink_resource_size_params size_params;
-	struct list_head list;
-	struct list_head resource_list;
-	devlink_resource_occ_get_t *occ_get;
-	void *occ_get_priv;
-};
-
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
@@ -1090,290 +1061,6 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
 	return 0;
 }
 
-static struct devlink_resource *
-devlink_resource_find(struct devlink *devlink,
-		      struct devlink_resource *resource, u64 resource_id)
-{
-	struct list_head *resource_list;
-
-	if (resource)
-		resource_list = &resource->resource_list;
-	else
-		resource_list = &devlink->resource_list;
-
-	list_for_each_entry(resource, resource_list, list) {
-		struct devlink_resource *child_resource;
-
-		if (resource->id == resource_id)
-			return resource;
-
-		child_resource = devlink_resource_find(devlink, resource,
-						       resource_id);
-		if (child_resource)
-			return child_resource;
-	}
-	return NULL;
-}
-
-static void
-devlink_resource_validate_children(struct devlink_resource *resource)
-{
-	struct devlink_resource *child_resource;
-	bool size_valid = true;
-	u64 parts_size = 0;
-
-	if (list_empty(&resource->resource_list))
-		goto out;
-
-	list_for_each_entry(child_resource, &resource->resource_list, list)
-		parts_size += child_resource->size_new;
-
-	if (parts_size > resource->size_new)
-		size_valid = false;
-out:
-	resource->size_valid = size_valid;
-}
-
-static int
-devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
-			       struct netlink_ext_ack *extack)
-{
-	u64 reminder;
-	int err = 0;
-
-	if (size > resource->size_params.size_max) {
-		NL_SET_ERR_MSG(extack, "Size larger than maximum");
-		err = -EINVAL;
-	}
-
-	if (size < resource->size_params.size_min) {
-		NL_SET_ERR_MSG(extack, "Size smaller than minimum");
-		err = -EINVAL;
-	}
-
-	div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
-	if (reminder) {
-		NL_SET_ERR_MSG(extack, "Wrong granularity");
-		err = -EINVAL;
-	}
-
-	return err;
-}
-
-static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
-				       struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_resource *resource;
-	u64 resource_id;
-	u64 size;
-	int err;
-
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_ID) ||
-	    GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_SIZE))
-		return -EINVAL;
-	resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]);
-
-	resource = devlink_resource_find(devlink, NULL, resource_id);
-	if (!resource)
-		return -EINVAL;
-
-	size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
-	err = devlink_resource_validate_size(resource, size, info->extack);
-	if (err)
-		return err;
-
-	resource->size_new = size;
-	devlink_resource_validate_children(resource);
-	if (resource->parent)
-		devlink_resource_validate_children(resource->parent);
-	return 0;
-}
-
-static int
-devlink_resource_size_params_put(struct devlink_resource *resource,
-				 struct sk_buff *skb)
-{
-	struct devlink_resource_size_params *size_params;
-
-	size_params = &resource->size_params;
-	if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
-			      size_params->size_granularity, DEVLINK_ATTR_PAD) ||
-	    nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
-			      size_params->size_max, DEVLINK_ATTR_PAD) ||
-	    nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
-			      size_params->size_min, DEVLINK_ATTR_PAD) ||
-	    nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit))
-		return -EMSGSIZE;
-	return 0;
-}
-
-static int devlink_resource_occ_put(struct devlink_resource *resource,
-				    struct sk_buff *skb)
-{
-	if (!resource->occ_get)
-		return 0;
-	return nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
-				 resource->occ_get(resource->occ_get_priv),
-				 DEVLINK_ATTR_PAD);
-}
-
-static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
-				struct devlink_resource *resource)
-{
-	struct devlink_resource *child_resource;
-	struct nlattr *child_resource_attr;
-	struct nlattr *resource_attr;
-
-	resource_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE);
-	if (!resource_attr)
-		return -EMSGSIZE;
-
-	if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) ||
-	    nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size,
-			      DEVLINK_ATTR_PAD) ||
-	    nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
-			      DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-	if (resource->size != resource->size_new &&
-	    nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
-			      resource->size_new, DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-	if (devlink_resource_occ_put(resource, skb))
-		goto nla_put_failure;
-	if (devlink_resource_size_params_put(resource, skb))
-		goto nla_put_failure;
-	if (list_empty(&resource->resource_list))
-		goto out;
-
-	if (nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_SIZE_VALID,
-		       resource->size_valid))
-		goto nla_put_failure;
-
-	child_resource_attr = nla_nest_start_noflag(skb,
-						    DEVLINK_ATTR_RESOURCE_LIST);
-	if (!child_resource_attr)
-		goto nla_put_failure;
-
-	list_for_each_entry(child_resource, &resource->resource_list, list) {
-		if (devlink_resource_put(devlink, skb, child_resource))
-			goto resource_put_failure;
-	}
-
-	nla_nest_end(skb, child_resource_attr);
-out:
-	nla_nest_end(skb, resource_attr);
-	return 0;
-
-resource_put_failure:
-	nla_nest_cancel(skb, child_resource_attr);
-nla_put_failure:
-	nla_nest_cancel(skb, resource_attr);
-	return -EMSGSIZE;
-}
-
-static int devlink_resource_fill(struct genl_info *info,
-				 enum devlink_command cmd, int flags)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_resource *resource;
-	struct nlattr *resources_attr;
-	struct sk_buff *skb = NULL;
-	struct nlmsghdr *nlh;
-	bool incomplete;
-	void *hdr;
-	int i;
-	int err;
-
-	resource = list_first_entry(&devlink->resource_list,
-				    struct devlink_resource, list);
-start_again:
-	err = devlink_nl_msg_reply_and_new(&skb, info);
-	if (err)
-		return err;
-
-	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
-			  &devlink_nl_family, NLM_F_MULTI, cmd);
-	if (!hdr) {
-		nlmsg_free(skb);
-		return -EMSGSIZE;
-	}
-
-	if (devlink_nl_put_handle(skb, devlink))
-		goto nla_put_failure;
-
-	resources_attr = nla_nest_start_noflag(skb,
-					       DEVLINK_ATTR_RESOURCE_LIST);
-	if (!resources_attr)
-		goto nla_put_failure;
-
-	incomplete = false;
-	i = 0;
-	list_for_each_entry_from(resource, &devlink->resource_list, list) {
-		err = devlink_resource_put(devlink, skb, resource);
-		if (err) {
-			if (!i)
-				goto err_resource_put;
-			incomplete = true;
-			break;
-		}
-		i++;
-	}
-	nla_nest_end(skb, resources_attr);
-	genlmsg_end(skb, hdr);
-	if (incomplete)
-		goto start_again;
-send_done:
-	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
-			NLMSG_DONE, 0, flags | NLM_F_MULTI);
-	if (!nlh) {
-		err = devlink_nl_msg_reply_and_new(&skb, info);
-		if (err)
-			return err;
-		goto send_done;
-	}
-	return genlmsg_reply(skb, info);
-
-nla_put_failure:
-	err = -EMSGSIZE;
-err_resource_put:
-	nlmsg_free(skb);
-	return err;
-}
-
-static int devlink_nl_cmd_resource_dump(struct sk_buff *skb,
-					struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-
-	if (list_empty(&devlink->resource_list))
-		return -EOPNOTSUPP;
-
-	return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
-}
-
-int devlink_resources_validate(struct devlink *devlink,
-			       struct devlink_resource *resource,
-			       struct genl_info *info)
-{
-	struct list_head *resource_list;
-	int err = 0;
-
-	if (resource)
-		resource_list = &resource->resource_list;
-	else
-		resource_list = &devlink->resource_list;
-
-	list_for_each_entry(resource, resource_list, list) {
-		if (!resource->size_valid)
-			return -EINVAL;
-		err = devlink_resources_validate(devlink, resource, info);
-		if (err)
-			return err;
-	}
-	return err;
-}
-
 static const struct devlink_param devlink_param_generic[] = {
 	{
 		.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
@@ -4529,267 +4216,6 @@ void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
 }
 EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
 
-/**
- * devl_resource_register - devlink resource register
- *
- * @devlink: devlink
- * @resource_name: resource's name
- * @resource_size: resource's size
- * @resource_id: resource's id
- * @parent_resource_id: resource's parent id
- * @size_params: size parameters
- *
- * Generic resources should reuse the same names across drivers.
- * Please see the generic resources list at:
- * Documentation/networking/devlink/devlink-resource.rst
- */
-int devl_resource_register(struct devlink *devlink,
-			   const char *resource_name,
-			   u64 resource_size,
-			   u64 resource_id,
-			   u64 parent_resource_id,
-			   const struct devlink_resource_size_params *size_params)
-{
-	struct devlink_resource *resource;
-	struct list_head *resource_list;
-	bool top_hierarchy;
-
-	lockdep_assert_held(&devlink->lock);
-
-	top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
-
-	resource = devlink_resource_find(devlink, NULL, resource_id);
-	if (resource)
-		return -EINVAL;
-
-	resource = kzalloc(sizeof(*resource), GFP_KERNEL);
-	if (!resource)
-		return -ENOMEM;
-
-	if (top_hierarchy) {
-		resource_list = &devlink->resource_list;
-	} else {
-		struct devlink_resource *parent_resource;
-
-		parent_resource = devlink_resource_find(devlink, NULL,
-							parent_resource_id);
-		if (parent_resource) {
-			resource_list = &parent_resource->resource_list;
-			resource->parent = parent_resource;
-		} else {
-			kfree(resource);
-			return -EINVAL;
-		}
-	}
-
-	resource->name = resource_name;
-	resource->size = resource_size;
-	resource->size_new = resource_size;
-	resource->id = resource_id;
-	resource->size_valid = true;
-	memcpy(&resource->size_params, size_params,
-	       sizeof(resource->size_params));
-	INIT_LIST_HEAD(&resource->resource_list);
-	list_add_tail(&resource->list, resource_list);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devl_resource_register);
-
-/**
- *	devlink_resource_register - devlink resource register
- *
- *	@devlink: devlink
- *	@resource_name: resource's name
- *	@resource_size: resource's size
- *	@resource_id: resource's id
- *	@parent_resource_id: resource's parent id
- *	@size_params: size parameters
- *
- *	Generic resources should reuse the same names across drivers.
- *	Please see the generic resources list at:
- *	Documentation/networking/devlink/devlink-resource.rst
- *
- *	Context: Takes and release devlink->lock <mutex>.
- */
-int devlink_resource_register(struct devlink *devlink,
-			      const char *resource_name,
-			      u64 resource_size,
-			      u64 resource_id,
-			      u64 parent_resource_id,
-			      const struct devlink_resource_size_params *size_params)
-{
-	int err;
-
-	devl_lock(devlink);
-	err = devl_resource_register(devlink, resource_name, resource_size,
-				     resource_id, parent_resource_id, size_params);
-	devl_unlock(devlink);
-	return err;
-}
-EXPORT_SYMBOL_GPL(devlink_resource_register);
-
-static void devlink_resource_unregister(struct devlink *devlink,
-					struct devlink_resource *resource)
-{
-	struct devlink_resource *tmp, *child_resource;
-
-	list_for_each_entry_safe(child_resource, tmp, &resource->resource_list,
-				 list) {
-		devlink_resource_unregister(devlink, child_resource);
-		list_del(&child_resource->list);
-		kfree(child_resource);
-	}
-}
-
-/**
- * devl_resources_unregister - free all resources
- *
- * @devlink: devlink
- */
-void devl_resources_unregister(struct devlink *devlink)
-{
-	struct devlink_resource *tmp, *child_resource;
-
-	lockdep_assert_held(&devlink->lock);
-
-	list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list,
-				 list) {
-		devlink_resource_unregister(devlink, child_resource);
-		list_del(&child_resource->list);
-		kfree(child_resource);
-	}
-}
-EXPORT_SYMBOL_GPL(devl_resources_unregister);
-
-/**
- *	devlink_resources_unregister - free all resources
- *
- *	@devlink: devlink
- *
- *	Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resources_unregister(struct devlink *devlink)
-{
-	devl_lock(devlink);
-	devl_resources_unregister(devlink);
-	devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resources_unregister);
-
-/**
- * devl_resource_size_get - get and update size
- *
- * @devlink: devlink
- * @resource_id: the requested resource id
- * @p_resource_size: ptr to update
- */
-int devl_resource_size_get(struct devlink *devlink,
-			   u64 resource_id,
-			   u64 *p_resource_size)
-{
-	struct devlink_resource *resource;
-
-	lockdep_assert_held(&devlink->lock);
-
-	resource = devlink_resource_find(devlink, NULL, resource_id);
-	if (!resource)
-		return -EINVAL;
-	*p_resource_size = resource->size_new;
-	resource->size = resource->size_new;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devl_resource_size_get);
-
-/**
- * devl_resource_occ_get_register - register occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- * @occ_get: occupancy getter callback
- * @occ_get_priv: occupancy getter callback priv
- */
-void devl_resource_occ_get_register(struct devlink *devlink,
-				    u64 resource_id,
-				    devlink_resource_occ_get_t *occ_get,
-				    void *occ_get_priv)
-{
-	struct devlink_resource *resource;
-
-	lockdep_assert_held(&devlink->lock);
-
-	resource = devlink_resource_find(devlink, NULL, resource_id);
-	if (WARN_ON(!resource))
-		return;
-	WARN_ON(resource->occ_get);
-
-	resource->occ_get = occ_get;
-	resource->occ_get_priv = occ_get_priv;
-}
-EXPORT_SYMBOL_GPL(devl_resource_occ_get_register);
-
-/**
- *	devlink_resource_occ_get_register - register occupancy getter
- *
- *	@devlink: devlink
- *	@resource_id: resource id
- *	@occ_get: occupancy getter callback
- *	@occ_get_priv: occupancy getter callback priv
- *
- *	Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resource_occ_get_register(struct devlink *devlink,
-				       u64 resource_id,
-				       devlink_resource_occ_get_t *occ_get,
-				       void *occ_get_priv)
-{
-	devl_lock(devlink);
-	devl_resource_occ_get_register(devlink, resource_id,
-				       occ_get, occ_get_priv);
-	devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
-
-/**
- * devl_resource_occ_get_unregister - unregister occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- */
-void devl_resource_occ_get_unregister(struct devlink *devlink,
-				      u64 resource_id)
-{
-	struct devlink_resource *resource;
-
-	lockdep_assert_held(&devlink->lock);
-
-	resource = devlink_resource_find(devlink, NULL, resource_id);
-	if (WARN_ON(!resource))
-		return;
-	WARN_ON(!resource->occ_get);
-
-	resource->occ_get = NULL;
-	resource->occ_get_priv = NULL;
-}
-EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister);
-
-/**
- *	devlink_resource_occ_get_unregister - unregister occupancy getter
- *
- *	@devlink: devlink
- *	@resource_id: resource id
- *
- *	Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resource_occ_get_unregister(struct devlink *devlink,
-					 u64 resource_id)
-{
-	devl_lock(devlink);
-	devl_resource_occ_get_unregister(devlink, resource_id);
-	devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
-
 static int devlink_param_verify(const struct devlink_param *param)
 {
 	if (!param || !param->name || !param->supported_cmodes)
diff --git a/net/devlink/resource.c b/net/devlink/resource.c
new file mode 100644
index 000000000000..c8b615e4c385
--- /dev/null
+++ b/net/devlink/resource.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+/**
+ * struct devlink_resource - devlink resource
+ * @name: name of the resource
+ * @id: id, per devlink instance
+ * @size: size of the resource
+ * @size_new: updated size of the resource, reload is needed
+ * @size_valid: valid in case the total size of the resource is valid
+ *              including its children
+ * @parent: parent resource
+ * @size_params: size parameters
+ * @list: parent list
+ * @resource_list: list of child resources
+ * @occ_get: occupancy getter callback
+ * @occ_get_priv: occupancy getter callback priv
+ */
+struct devlink_resource {
+	const char *name;
+	u64 id;
+	u64 size;
+	u64 size_new;
+	bool size_valid;
+	struct devlink_resource *parent;
+	struct devlink_resource_size_params size_params;
+	struct list_head list;
+	struct list_head resource_list;
+	devlink_resource_occ_get_t *occ_get;
+	void *occ_get_priv;
+};
+
+static struct devlink_resource *
+devlink_resource_find(struct devlink *devlink,
+		      struct devlink_resource *resource, u64 resource_id)
+{
+	struct list_head *resource_list;
+
+	if (resource)
+		resource_list = &resource->resource_list;
+	else
+		resource_list = &devlink->resource_list;
+
+	list_for_each_entry(resource, resource_list, list) {
+		struct devlink_resource *child_resource;
+
+		if (resource->id == resource_id)
+			return resource;
+
+		child_resource = devlink_resource_find(devlink, resource,
+						       resource_id);
+		if (child_resource)
+			return child_resource;
+	}
+	return NULL;
+}
+
+static void
+devlink_resource_validate_children(struct devlink_resource *resource)
+{
+	struct devlink_resource *child_resource;
+	bool size_valid = true;
+	u64 parts_size = 0;
+
+	if (list_empty(&resource->resource_list))
+		goto out;
+
+	list_for_each_entry(child_resource, &resource->resource_list, list)
+		parts_size += child_resource->size_new;
+
+	if (parts_size > resource->size_new)
+		size_valid = false;
+out:
+	resource->size_valid = size_valid;
+}
+
+static int
+devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
+			       struct netlink_ext_ack *extack)
+{
+	u64 reminder;
+	int err = 0;
+
+	if (size > resource->size_params.size_max) {
+		NL_SET_ERR_MSG(extack, "Size larger than maximum");
+		err = -EINVAL;
+	}
+
+	if (size < resource->size_params.size_min) {
+		NL_SET_ERR_MSG(extack, "Size smaller than minimum");
+		err = -EINVAL;
+	}
+
+	div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
+	if (reminder) {
+		NL_SET_ERR_MSG(extack, "Wrong granularity");
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_resource *resource;
+	u64 resource_id;
+	u64 size;
+	int err;
+
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_ID) ||
+	    GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_SIZE))
+		return -EINVAL;
+	resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]);
+
+	resource = devlink_resource_find(devlink, NULL, resource_id);
+	if (!resource)
+		return -EINVAL;
+
+	size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
+	err = devlink_resource_validate_size(resource, size, info->extack);
+	if (err)
+		return err;
+
+	resource->size_new = size;
+	devlink_resource_validate_children(resource);
+	if (resource->parent)
+		devlink_resource_validate_children(resource->parent);
+	return 0;
+}
+
+static int
+devlink_resource_size_params_put(struct devlink_resource *resource,
+				 struct sk_buff *skb)
+{
+	struct devlink_resource_size_params *size_params;
+
+	size_params = &resource->size_params;
+	if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
+			      size_params->size_granularity, DEVLINK_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
+			      size_params->size_max, DEVLINK_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
+			      size_params->size_min, DEVLINK_ATTR_PAD) ||
+	    nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static int devlink_resource_occ_put(struct devlink_resource *resource,
+				    struct sk_buff *skb)
+{
+	if (!resource->occ_get)
+		return 0;
+	return nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
+				 resource->occ_get(resource->occ_get_priv),
+				 DEVLINK_ATTR_PAD);
+}
+
+static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
+				struct devlink_resource *resource)
+{
+	struct devlink_resource *child_resource;
+	struct nlattr *child_resource_attr;
+	struct nlattr *resource_attr;
+
+	resource_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE);
+	if (!resource_attr)
+		return -EMSGSIZE;
+
+	if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) ||
+	    nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size,
+			      DEVLINK_ATTR_PAD) ||
+	    nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
+			      DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+	if (resource->size != resource->size_new &&
+	    nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
+			      resource->size_new, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+	if (devlink_resource_occ_put(resource, skb))
+		goto nla_put_failure;
+	if (devlink_resource_size_params_put(resource, skb))
+		goto nla_put_failure;
+	if (list_empty(&resource->resource_list))
+		goto out;
+
+	if (nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_SIZE_VALID,
+		       resource->size_valid))
+		goto nla_put_failure;
+
+	child_resource_attr = nla_nest_start_noflag(skb,
+						    DEVLINK_ATTR_RESOURCE_LIST);
+	if (!child_resource_attr)
+		goto nla_put_failure;
+
+	list_for_each_entry(child_resource, &resource->resource_list, list) {
+		if (devlink_resource_put(devlink, skb, child_resource))
+			goto resource_put_failure;
+	}
+
+	nla_nest_end(skb, child_resource_attr);
+out:
+	nla_nest_end(skb, resource_attr);
+	return 0;
+
+resource_put_failure:
+	nla_nest_cancel(skb, child_resource_attr);
+nla_put_failure:
+	nla_nest_cancel(skb, resource_attr);
+	return -EMSGSIZE;
+}
+
+static int devlink_resource_fill(struct genl_info *info,
+				 enum devlink_command cmd, int flags)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_resource *resource;
+	struct nlattr *resources_attr;
+	struct sk_buff *skb = NULL;
+	struct nlmsghdr *nlh;
+	bool incomplete;
+	void *hdr;
+	int i;
+	int err;
+
+	resource = list_first_entry(&devlink->resource_list,
+				    struct devlink_resource, list);
+start_again:
+	err = devlink_nl_msg_reply_and_new(&skb, info);
+	if (err)
+		return err;
+
+	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+			  &devlink_nl_family, NLM_F_MULTI, cmd);
+	if (!hdr) {
+		nlmsg_free(skb);
+		return -EMSGSIZE;
+	}
+
+	if (devlink_nl_put_handle(skb, devlink))
+		goto nla_put_failure;
+
+	resources_attr = nla_nest_start_noflag(skb,
+					       DEVLINK_ATTR_RESOURCE_LIST);
+	if (!resources_attr)
+		goto nla_put_failure;
+
+	incomplete = false;
+	i = 0;
+	list_for_each_entry_from(resource, &devlink->resource_list, list) {
+		err = devlink_resource_put(devlink, skb, resource);
+		if (err) {
+			if (!i)
+				goto err_resource_put;
+			incomplete = true;
+			break;
+		}
+		i++;
+	}
+	nla_nest_end(skb, resources_attr);
+	genlmsg_end(skb, hdr);
+	if (incomplete)
+		goto start_again;
+send_done:
+	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+			NLMSG_DONE, 0, flags | NLM_F_MULTI);
+	if (!nlh) {
+		err = devlink_nl_msg_reply_and_new(&skb, info);
+		if (err)
+			return err;
+		goto send_done;
+	}
+	return genlmsg_reply(skb, info);
+
+nla_put_failure:
+	err = -EMSGSIZE;
+err_resource_put:
+	nlmsg_free(skb);
+	return err;
+}
+
+int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+
+	if (list_empty(&devlink->resource_list))
+		return -EOPNOTSUPP;
+
+	return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
+}
+
+int devlink_resources_validate(struct devlink *devlink,
+			       struct devlink_resource *resource,
+			       struct genl_info *info)
+{
+	struct list_head *resource_list;
+	int err = 0;
+
+	if (resource)
+		resource_list = &resource->resource_list;
+	else
+		resource_list = &devlink->resource_list;
+
+	list_for_each_entry(resource, resource_list, list) {
+		if (!resource->size_valid)
+			return -EINVAL;
+		err = devlink_resources_validate(devlink, resource, info);
+		if (err)
+			return err;
+	}
+	return err;
+}
+
+/**
+ * devl_resource_register - devlink resource register
+ *
+ * @devlink: devlink
+ * @resource_name: resource's name
+ * @resource_size: resource's size
+ * @resource_id: resource's id
+ * @parent_resource_id: resource's parent id
+ * @size_params: size parameters
+ *
+ * Generic resources should reuse the same names across drivers.
+ * Please see the generic resources list at:
+ * Documentation/networking/devlink/devlink-resource.rst
+ */
+int devl_resource_register(struct devlink *devlink,
+			   const char *resource_name,
+			   u64 resource_size,
+			   u64 resource_id,
+			   u64 parent_resource_id,
+			   const struct devlink_resource_size_params *size_params)
+{
+	struct devlink_resource *resource;
+	struct list_head *resource_list;
+	bool top_hierarchy;
+
+	lockdep_assert_held(&devlink->lock);
+
+	top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
+
+	resource = devlink_resource_find(devlink, NULL, resource_id);
+	if (resource)
+		return -EINVAL;
+
+	resource = kzalloc(sizeof(*resource), GFP_KERNEL);
+	if (!resource)
+		return -ENOMEM;
+
+	if (top_hierarchy) {
+		resource_list = &devlink->resource_list;
+	} else {
+		struct devlink_resource *parent_resource;
+
+		parent_resource = devlink_resource_find(devlink, NULL,
+							parent_resource_id);
+		if (parent_resource) {
+			resource_list = &parent_resource->resource_list;
+			resource->parent = parent_resource;
+		} else {
+			kfree(resource);
+			return -EINVAL;
+		}
+	}
+
+	resource->name = resource_name;
+	resource->size = resource_size;
+	resource->size_new = resource_size;
+	resource->id = resource_id;
+	resource->size_valid = true;
+	memcpy(&resource->size_params, size_params,
+	       sizeof(resource->size_params));
+	INIT_LIST_HEAD(&resource->resource_list);
+	list_add_tail(&resource->list, resource_list);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devl_resource_register);
+
+/**
+ *	devlink_resource_register - devlink resource register
+ *
+ *	@devlink: devlink
+ *	@resource_name: resource's name
+ *	@resource_size: resource's size
+ *	@resource_id: resource's id
+ *	@parent_resource_id: resource's parent id
+ *	@size_params: size parameters
+ *
+ *	Generic resources should reuse the same names across drivers.
+ *	Please see the generic resources list at:
+ *	Documentation/networking/devlink/devlink-resource.rst
+ *
+ *	Context: Takes and release devlink->lock <mutex>.
+ */
+int devlink_resource_register(struct devlink *devlink,
+			      const char *resource_name,
+			      u64 resource_size,
+			      u64 resource_id,
+			      u64 parent_resource_id,
+			      const struct devlink_resource_size_params *size_params)
+{
+	int err;
+
+	devl_lock(devlink);
+	err = devl_resource_register(devlink, resource_name, resource_size,
+				     resource_id, parent_resource_id, size_params);
+	devl_unlock(devlink);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_resource_register);
+
+static void devlink_resource_unregister(struct devlink *devlink,
+					struct devlink_resource *resource)
+{
+	struct devlink_resource *tmp, *child_resource;
+
+	list_for_each_entry_safe(child_resource, tmp, &resource->resource_list,
+				 list) {
+		devlink_resource_unregister(devlink, child_resource);
+		list_del(&child_resource->list);
+		kfree(child_resource);
+	}
+}
+
+/**
+ * devl_resources_unregister - free all resources
+ *
+ * @devlink: devlink
+ */
+void devl_resources_unregister(struct devlink *devlink)
+{
+	struct devlink_resource *tmp, *child_resource;
+
+	lockdep_assert_held(&devlink->lock);
+
+	list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list,
+				 list) {
+		devlink_resource_unregister(devlink, child_resource);
+		list_del(&child_resource->list);
+		kfree(child_resource);
+	}
+}
+EXPORT_SYMBOL_GPL(devl_resources_unregister);
+
+/**
+ *	devlink_resources_unregister - free all resources
+ *
+ *	@devlink: devlink
+ *
+ *	Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_resources_unregister(struct devlink *devlink)
+{
+	devl_lock(devlink);
+	devl_resources_unregister(devlink);
+	devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_resources_unregister);
+
+/**
+ * devl_resource_size_get - get and update size
+ *
+ * @devlink: devlink
+ * @resource_id: the requested resource id
+ * @p_resource_size: ptr to update
+ */
+int devl_resource_size_get(struct devlink *devlink,
+			   u64 resource_id,
+			   u64 *p_resource_size)
+{
+	struct devlink_resource *resource;
+
+	lockdep_assert_held(&devlink->lock);
+
+	resource = devlink_resource_find(devlink, NULL, resource_id);
+	if (!resource)
+		return -EINVAL;
+	*p_resource_size = resource->size_new;
+	resource->size = resource->size_new;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devl_resource_size_get);
+
+/**
+ * devl_resource_occ_get_register - register occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ * @occ_get: occupancy getter callback
+ * @occ_get_priv: occupancy getter callback priv
+ */
+void devl_resource_occ_get_register(struct devlink *devlink,
+				    u64 resource_id,
+				    devlink_resource_occ_get_t *occ_get,
+				    void *occ_get_priv)
+{
+	struct devlink_resource *resource;
+
+	lockdep_assert_held(&devlink->lock);
+
+	resource = devlink_resource_find(devlink, NULL, resource_id);
+	if (WARN_ON(!resource))
+		return;
+	WARN_ON(resource->occ_get);
+
+	resource->occ_get = occ_get;
+	resource->occ_get_priv = occ_get_priv;
+}
+EXPORT_SYMBOL_GPL(devl_resource_occ_get_register);
+
+/**
+ *	devlink_resource_occ_get_register - register occupancy getter
+ *
+ *	@devlink: devlink
+ *	@resource_id: resource id
+ *	@occ_get: occupancy getter callback
+ *	@occ_get_priv: occupancy getter callback priv
+ *
+ *	Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_resource_occ_get_register(struct devlink *devlink,
+				       u64 resource_id,
+				       devlink_resource_occ_get_t *occ_get,
+				       void *occ_get_priv)
+{
+	devl_lock(devlink);
+	devl_resource_occ_get_register(devlink, resource_id,
+				       occ_get, occ_get_priv);
+	devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
+
+/**
+ * devl_resource_occ_get_unregister - unregister occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ */
+void devl_resource_occ_get_unregister(struct devlink *devlink,
+				      u64 resource_id)
+{
+	struct devlink_resource *resource;
+
+	lockdep_assert_held(&devlink->lock);
+
+	resource = devlink_resource_find(devlink, NULL, resource_id);
+	if (WARN_ON(!resource))
+		return;
+	WARN_ON(!resource->occ_get);
+
+	resource->occ_get = NULL;
+	resource->occ_get_priv = NULL;
+}
+EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister);
+
+/**
+ *	devlink_resource_occ_get_unregister - unregister occupancy getter
+ *
+ *	@devlink: devlink
+ *	@resource_id: resource id
+ *
+ *	Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_resource_occ_get_unregister(struct devlink *devlink,
+					 u64 resource_id)
+{
+	devl_lock(devlink);
+	devl_resource_occ_get_unregister(devlink, resource_id);
+	devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);

From 830c41e1e987d9745a01b06c5f86434df8bd5f58 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:49 +0200
Subject: [PATCH 2003/2047] devlink: push param related code into separate file

Cut out another chunk from leftover.c and put param related code
into a separate file.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-8-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/Makefile        |   2 +-
 net/devlink/devl_internal.h |   9 +
 net/devlink/leftover.c      | 859 -----------------------------------
 net/devlink/param.c         | 865 ++++++++++++++++++++++++++++++++++++
 4 files changed, 875 insertions(+), 860 deletions(-)
 create mode 100644 net/devlink/param.c

diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index 8864cd933acb..c4b29f603a1f 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
-	 resource.o health.o
+	 resource.o param.o health.o
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index e5c0acf7a758..e0304d607978 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -156,6 +156,8 @@ int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info);
 void devlink_notify(struct devlink *devlink, enum devlink_command cmd);
 void devlink_ports_notify_register(struct devlink *devlink);
 void devlink_ports_notify_unregister(struct devlink *devlink);
+void devlink_params_notify_register(struct devlink *devlink);
+void devlink_params_notify_unregister(struct devlink *devlink);
 
 /* Ports */
 #define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port)				\
@@ -240,6 +242,13 @@ int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
 					    struct genl_info *info);
 int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info);
 int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
+					 struct netlink_callback *cb);
+int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
+				       struct genl_info *info);
+int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
+				       struct genl_info *info);
 int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
 					    struct genl_info *info);
 int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 1c65449f2252..2f1dfd27d1c8 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -1061,611 +1061,6 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
 	return 0;
 }
 
-static const struct devlink_param devlink_param_generic[] = {
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
-		.name = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME,
-		.type = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
-		.name = DEVLINK_PARAM_GENERIC_MAX_MACS_NAME,
-		.type = DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
-		.name = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME,
-		.type = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
-		.name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
-		.type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
-		.name = DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME,
-		.type = DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
-		.name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME,
-		.type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
-		.name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME,
-		.type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
-		.name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
-		.type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
-		.name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME,
-		.type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
-		.name = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME,
-		.type = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET,
-		.name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME,
-		.type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
-		.name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME,
-		.type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
-		.name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME,
-		.type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
-		.name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME,
-		.type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
-		.name = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME,
-		.type = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
-		.name = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME,
-		.type = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE,
-	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
-		.name = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME,
-		.type = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE,
-	},
-};
-
-static int devlink_param_generic_verify(const struct devlink_param *param)
-{
-	/* verify it match generic parameter by id and name */
-	if (param->id > DEVLINK_PARAM_GENERIC_ID_MAX)
-		return -EINVAL;
-	if (strcmp(param->name, devlink_param_generic[param->id].name))
-		return -ENOENT;
-
-	WARN_ON(param->type != devlink_param_generic[param->id].type);
-
-	return 0;
-}
-
-static int devlink_param_driver_verify(const struct devlink_param *param)
-{
-	int i;
-
-	if (param->id <= DEVLINK_PARAM_GENERIC_ID_MAX)
-		return -EINVAL;
-	/* verify no such name in generic params */
-	for (i = 0; i <= DEVLINK_PARAM_GENERIC_ID_MAX; i++)
-		if (!strcmp(param->name, devlink_param_generic[i].name))
-			return -EEXIST;
-
-	return 0;
-}
-
-static struct devlink_param_item *
-devlink_param_find_by_name(struct xarray *params, const char *param_name)
-{
-	struct devlink_param_item *param_item;
-	unsigned long param_id;
-
-	xa_for_each(params, param_id, param_item) {
-		if (!strcmp(param_item->param->name, param_name))
-			return param_item;
-	}
-	return NULL;
-}
-
-static struct devlink_param_item *
-devlink_param_find_by_id(struct xarray *params, u32 param_id)
-{
-	return xa_load(params, param_id);
-}
-
-static bool
-devlink_param_cmode_is_supported(const struct devlink_param *param,
-				 enum devlink_param_cmode cmode)
-{
-	return test_bit(cmode, &param->supported_cmodes);
-}
-
-static int devlink_param_get(struct devlink *devlink,
-			     const struct devlink_param *param,
-			     struct devlink_param_gset_ctx *ctx)
-{
-	if (!param->get)
-		return -EOPNOTSUPP;
-	return param->get(devlink, param->id, ctx);
-}
-
-static int devlink_param_set(struct devlink *devlink,
-			     const struct devlink_param *param,
-			     struct devlink_param_gset_ctx *ctx)
-{
-	if (!param->set)
-		return -EOPNOTSUPP;
-	return param->set(devlink, param->id, ctx);
-}
-
-static int
-devlink_param_type_to_nla_type(enum devlink_param_type param_type)
-{
-	switch (param_type) {
-	case DEVLINK_PARAM_TYPE_U8:
-		return NLA_U8;
-	case DEVLINK_PARAM_TYPE_U16:
-		return NLA_U16;
-	case DEVLINK_PARAM_TYPE_U32:
-		return NLA_U32;
-	case DEVLINK_PARAM_TYPE_STRING:
-		return NLA_STRING;
-	case DEVLINK_PARAM_TYPE_BOOL:
-		return NLA_FLAG;
-	default:
-		return -EINVAL;
-	}
-}
-
-static int
-devlink_nl_param_value_fill_one(struct sk_buff *msg,
-				enum devlink_param_type type,
-				enum devlink_param_cmode cmode,
-				union devlink_param_value val)
-{
-	struct nlattr *param_value_attr;
-
-	param_value_attr = nla_nest_start_noflag(msg,
-						 DEVLINK_ATTR_PARAM_VALUE);
-	if (!param_value_attr)
-		goto nla_put_failure;
-
-	if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_CMODE, cmode))
-		goto value_nest_cancel;
-
-	switch (type) {
-	case DEVLINK_PARAM_TYPE_U8:
-		if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu8))
-			goto value_nest_cancel;
-		break;
-	case DEVLINK_PARAM_TYPE_U16:
-		if (nla_put_u16(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu16))
-			goto value_nest_cancel;
-		break;
-	case DEVLINK_PARAM_TYPE_U32:
-		if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32))
-			goto value_nest_cancel;
-		break;
-	case DEVLINK_PARAM_TYPE_STRING:
-		if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA,
-				   val.vstr))
-			goto value_nest_cancel;
-		break;
-	case DEVLINK_PARAM_TYPE_BOOL:
-		if (val.vbool &&
-		    nla_put_flag(msg, DEVLINK_ATTR_PARAM_VALUE_DATA))
-			goto value_nest_cancel;
-		break;
-	}
-
-	nla_nest_end(msg, param_value_attr);
-	return 0;
-
-value_nest_cancel:
-	nla_nest_cancel(msg, param_value_attr);
-nla_put_failure:
-	return -EMSGSIZE;
-}
-
-static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
-				 unsigned int port_index,
-				 struct devlink_param_item *param_item,
-				 enum devlink_command cmd,
-				 u32 portid, u32 seq, int flags)
-{
-	union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
-	bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {};
-	const struct devlink_param *param = param_item->param;
-	struct devlink_param_gset_ctx ctx;
-	struct nlattr *param_values_list;
-	struct nlattr *param_attr;
-	int nla_type;
-	void *hdr;
-	int err;
-	int i;
-
-	/* Get value from driver part to driverinit configuration mode */
-	for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
-		if (!devlink_param_cmode_is_supported(param, i))
-			continue;
-		if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) {
-			if (param_item->driverinit_value_new_valid)
-				param_value[i] = param_item->driverinit_value_new;
-			else if (param_item->driverinit_value_valid)
-				param_value[i] = param_item->driverinit_value;
-			else
-				return -EOPNOTSUPP;
-		} else {
-			ctx.cmode = i;
-			err = devlink_param_get(devlink, param, &ctx);
-			if (err)
-				return err;
-			param_value[i] = ctx.val;
-		}
-		param_value_set[i] = true;
-	}
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (devlink_nl_put_handle(msg, devlink))
-		goto genlmsg_cancel;
-
-	if (cmd == DEVLINK_CMD_PORT_PARAM_GET ||
-	    cmd == DEVLINK_CMD_PORT_PARAM_NEW ||
-	    cmd == DEVLINK_CMD_PORT_PARAM_DEL)
-		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index))
-			goto genlmsg_cancel;
-
-	param_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM);
-	if (!param_attr)
-		goto genlmsg_cancel;
-	if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name))
-		goto param_nest_cancel;
-	if (param->generic && nla_put_flag(msg, DEVLINK_ATTR_PARAM_GENERIC))
-		goto param_nest_cancel;
-
-	nla_type = devlink_param_type_to_nla_type(param->type);
-	if (nla_type < 0)
-		goto param_nest_cancel;
-	if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type))
-		goto param_nest_cancel;
-
-	param_values_list = nla_nest_start_noflag(msg,
-						  DEVLINK_ATTR_PARAM_VALUES_LIST);
-	if (!param_values_list)
-		goto param_nest_cancel;
-
-	for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
-		if (!param_value_set[i])
-			continue;
-		err = devlink_nl_param_value_fill_one(msg, param->type,
-						      i, param_value[i]);
-		if (err)
-			goto values_list_nest_cancel;
-	}
-
-	nla_nest_end(msg, param_values_list);
-	nla_nest_end(msg, param_attr);
-	genlmsg_end(msg, hdr);
-	return 0;
-
-values_list_nest_cancel:
-	nla_nest_end(msg, param_values_list);
-param_nest_cancel:
-	nla_nest_cancel(msg, param_attr);
-genlmsg_cancel:
-	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
-}
-
-static void devlink_param_notify(struct devlink *devlink,
-				 unsigned int port_index,
-				 struct devlink_param_item *param_item,
-				 enum devlink_command cmd)
-{
-	struct sk_buff *msg;
-	int err;
-
-	WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL &&
-		cmd != DEVLINK_CMD_PORT_PARAM_NEW &&
-		cmd != DEVLINK_CMD_PORT_PARAM_DEL);
-
-	/* devlink_notify_register() / devlink_notify_unregister()
-	 * will replay the notifications if the params are added/removed
-	 * outside of the lifetime of the instance.
-	 */
-	if (!devl_is_registered(devlink))
-		return;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return;
-	err = devlink_nl_param_fill(msg, devlink, port_index, param_item, cmd,
-				    0, 0, 0);
-	if (err) {
-		nlmsg_free(msg);
-		return;
-	}
-
-	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
-				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static void devlink_params_notify(struct devlink *devlink,
-				  enum devlink_command cmd)
-{
-	struct devlink_param_item *param_item;
-	unsigned long param_id;
-
-	xa_for_each(&devlink->params, param_id, param_item)
-		devlink_param_notify(devlink, 0, param_item, cmd);
-}
-
-static void devlink_params_notify_register(struct devlink *devlink)
-{
-	devlink_params_notify(devlink, DEVLINK_CMD_PARAM_NEW);
-}
-
-static void devlink_params_notify_unregister(struct devlink *devlink)
-{
-	devlink_params_notify(devlink, DEVLINK_CMD_PARAM_DEL);
-}
-
-static int devlink_nl_param_get_dump_one(struct sk_buff *msg,
-					 struct devlink *devlink,
-					 struct netlink_callback *cb,
-					 int flags)
-{
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct devlink_param_item *param_item;
-	unsigned long param_id;
-	int err = 0;
-
-	xa_for_each_start(&devlink->params, param_id, param_item, state->idx) {
-		err = devlink_nl_param_fill(msg, devlink, 0, param_item,
-					    DEVLINK_CMD_PARAM_GET,
-					    NETLINK_CB(cb->skb).portid,
-					    cb->nlh->nlmsg_seq, flags);
-		if (err == -EOPNOTSUPP) {
-			err = 0;
-		} else if (err) {
-			state->idx = param_id;
-			break;
-		}
-	}
-
-	return err;
-}
-
-int devlink_nl_param_get_dumpit(struct sk_buff *skb,
-				struct netlink_callback *cb)
-{
-	return devlink_nl_dumpit(skb, cb, devlink_nl_param_get_dump_one);
-}
-
-static int
-devlink_param_type_get_from_info(struct genl_info *info,
-				 enum devlink_param_type *param_type)
-{
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_TYPE))
-		return -EINVAL;
-
-	switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) {
-	case NLA_U8:
-		*param_type = DEVLINK_PARAM_TYPE_U8;
-		break;
-	case NLA_U16:
-		*param_type = DEVLINK_PARAM_TYPE_U16;
-		break;
-	case NLA_U32:
-		*param_type = DEVLINK_PARAM_TYPE_U32;
-		break;
-	case NLA_STRING:
-		*param_type = DEVLINK_PARAM_TYPE_STRING;
-		break;
-	case NLA_FLAG:
-		*param_type = DEVLINK_PARAM_TYPE_BOOL;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int
-devlink_param_value_get_from_info(const struct devlink_param *param,
-				  struct genl_info *info,
-				  union devlink_param_value *value)
-{
-	struct nlattr *param_data;
-	int len;
-
-	param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA];
-
-	if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data)
-		return -EINVAL;
-
-	switch (param->type) {
-	case DEVLINK_PARAM_TYPE_U8:
-		if (nla_len(param_data) != sizeof(u8))
-			return -EINVAL;
-		value->vu8 = nla_get_u8(param_data);
-		break;
-	case DEVLINK_PARAM_TYPE_U16:
-		if (nla_len(param_data) != sizeof(u16))
-			return -EINVAL;
-		value->vu16 = nla_get_u16(param_data);
-		break;
-	case DEVLINK_PARAM_TYPE_U32:
-		if (nla_len(param_data) != sizeof(u32))
-			return -EINVAL;
-		value->vu32 = nla_get_u32(param_data);
-		break;
-	case DEVLINK_PARAM_TYPE_STRING:
-		len = strnlen(nla_data(param_data), nla_len(param_data));
-		if (len == nla_len(param_data) ||
-		    len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
-			return -EINVAL;
-		strcpy(value->vstr, nla_data(param_data));
-		break;
-	case DEVLINK_PARAM_TYPE_BOOL:
-		if (param_data && nla_len(param_data))
-			return -EINVAL;
-		value->vbool = nla_get_flag(param_data);
-		break;
-	}
-	return 0;
-}
-
-static struct devlink_param_item *
-devlink_param_get_from_info(struct xarray *params, struct genl_info *info)
-{
-	char *param_name;
-
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_NAME))
-		return NULL;
-
-	param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
-	return devlink_param_find_by_name(params, param_name);
-}
-
-int devlink_nl_param_get_doit(struct sk_buff *skb,
-			      struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_param_item *param_item;
-	struct sk_buff *msg;
-	int err;
-
-	param_item = devlink_param_get_from_info(&devlink->params, info);
-	if (!param_item)
-		return -EINVAL;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	err = devlink_nl_param_fill(msg, devlink, 0, param_item,
-				    DEVLINK_CMD_PARAM_GET,
-				    info->snd_portid, info->snd_seq, 0);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
-
-	return genlmsg_reply(msg, info);
-}
-
-static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
-					   unsigned int port_index,
-					   struct xarray *params,
-					   struct genl_info *info,
-					   enum devlink_command cmd)
-{
-	enum devlink_param_type param_type;
-	struct devlink_param_gset_ctx ctx;
-	enum devlink_param_cmode cmode;
-	struct devlink_param_item *param_item;
-	const struct devlink_param *param;
-	union devlink_param_value value;
-	int err = 0;
-
-	param_item = devlink_param_get_from_info(params, info);
-	if (!param_item)
-		return -EINVAL;
-	param = param_item->param;
-	err = devlink_param_type_get_from_info(info, &param_type);
-	if (err)
-		return err;
-	if (param_type != param->type)
-		return -EINVAL;
-	err = devlink_param_value_get_from_info(param, info, &value);
-	if (err)
-		return err;
-	if (param->validate) {
-		err = param->validate(devlink, param->id, value, info->extack);
-		if (err)
-			return err;
-	}
-
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_VALUE_CMODE))
-		return -EINVAL;
-	cmode = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]);
-	if (!devlink_param_cmode_is_supported(param, cmode))
-		return -EOPNOTSUPP;
-
-	if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) {
-		param_item->driverinit_value_new = value;
-		param_item->driverinit_value_new_valid = true;
-	} else {
-		if (!param->set)
-			return -EOPNOTSUPP;
-		ctx.val = value;
-		ctx.cmode = cmode;
-		err = devlink_param_set(devlink, param, &ctx);
-		if (err)
-			return err;
-	}
-
-	devlink_param_notify(devlink, port_index, param_item, cmd);
-	return 0;
-}
-
-static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
-					 struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-
-	return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->params,
-					       info, DEVLINK_CMD_PARAM_NEW);
-}
-
-static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
-						struct netlink_callback *cb)
-{
-	NL_SET_ERR_MSG(cb->extack, "Port params are not supported");
-	return msg->len;
-}
-
-static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
-					      struct genl_info *info)
-{
-	NL_SET_ERR_MSG(info->extack, "Port params are not supported");
-	return -EINVAL;
-}
-
-static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
-					      struct genl_info *info)
-{
-	NL_SET_ERR_MSG(info->extack, "Port params are not supported");
-	return -EINVAL;
-}
-
 static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
 					     struct devlink *devlink,
 					     struct devlink_snapshot *snapshot)
@@ -4216,260 +3611,6 @@ void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
 }
 EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
 
-static int devlink_param_verify(const struct devlink_param *param)
-{
-	if (!param || !param->name || !param->supported_cmodes)
-		return -EINVAL;
-	if (param->generic)
-		return devlink_param_generic_verify(param);
-	else
-		return devlink_param_driver_verify(param);
-}
-
-static int devlink_param_register(struct devlink *devlink,
-				  const struct devlink_param *param)
-{
-	struct devlink_param_item *param_item;
-	int err;
-
-	WARN_ON(devlink_param_verify(param));
-	WARN_ON(devlink_param_find_by_name(&devlink->params, param->name));
-
-	if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
-		WARN_ON(param->get || param->set);
-	else
-		WARN_ON(!param->get || !param->set);
-
-	param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
-	if (!param_item)
-		return -ENOMEM;
-
-	param_item->param = param;
-
-	err = xa_insert(&devlink->params, param->id, param_item, GFP_KERNEL);
-	if (err)
-		goto err_xa_insert;
-
-	devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
-	return 0;
-
-err_xa_insert:
-	kfree(param_item);
-	return err;
-}
-
-static void devlink_param_unregister(struct devlink *devlink,
-				     const struct devlink_param *param)
-{
-	struct devlink_param_item *param_item;
-
-	param_item = devlink_param_find_by_id(&devlink->params, param->id);
-	if (WARN_ON(!param_item))
-		return;
-	devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL);
-	xa_erase(&devlink->params, param->id);
-	kfree(param_item);
-}
-
-/**
- *	devl_params_register - register configuration parameters
- *
- *	@devlink: devlink
- *	@params: configuration parameters array
- *	@params_count: number of parameters provided
- *
- *	Register the configuration parameters supported by the driver.
- */
-int devl_params_register(struct devlink *devlink,
-			 const struct devlink_param *params,
-			 size_t params_count)
-{
-	const struct devlink_param *param = params;
-	int i, err;
-
-	lockdep_assert_held(&devlink->lock);
-
-	for (i = 0; i < params_count; i++, param++) {
-		err = devlink_param_register(devlink, param);
-		if (err)
-			goto rollback;
-	}
-	return 0;
-
-rollback:
-	if (!i)
-		return err;
-
-	for (param--; i > 0; i--, param--)
-		devlink_param_unregister(devlink, param);
-	return err;
-}
-EXPORT_SYMBOL_GPL(devl_params_register);
-
-int devlink_params_register(struct devlink *devlink,
-			    const struct devlink_param *params,
-			    size_t params_count)
-{
-	int err;
-
-	devl_lock(devlink);
-	err = devl_params_register(devlink, params, params_count);
-	devl_unlock(devlink);
-	return err;
-}
-EXPORT_SYMBOL_GPL(devlink_params_register);
-
-/**
- *	devl_params_unregister - unregister configuration parameters
- *	@devlink: devlink
- *	@params: configuration parameters to unregister
- *	@params_count: number of parameters provided
- */
-void devl_params_unregister(struct devlink *devlink,
-			    const struct devlink_param *params,
-			    size_t params_count)
-{
-	const struct devlink_param *param = params;
-	int i;
-
-	lockdep_assert_held(&devlink->lock);
-
-	for (i = 0; i < params_count; i++, param++)
-		devlink_param_unregister(devlink, param);
-}
-EXPORT_SYMBOL_GPL(devl_params_unregister);
-
-void devlink_params_unregister(struct devlink *devlink,
-			       const struct devlink_param *params,
-			       size_t params_count)
-{
-	devl_lock(devlink);
-	devl_params_unregister(devlink, params, params_count);
-	devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_params_unregister);
-
-/**
- *	devl_param_driverinit_value_get - get configuration parameter
- *					  value for driver initializing
- *
- *	@devlink: devlink
- *	@param_id: parameter ID
- *	@val: pointer to store the value of parameter in driverinit
- *	      configuration mode
- *
- *	This function should be used by the driver to get driverinit
- *	configuration for initialization after reload command.
- *
- *	Note that lockless call of this function relies on the
- *	driver to maintain following basic sane behavior:
- *	1) Driver ensures a call to this function cannot race with
- *	   registering/unregistering the parameter with the same parameter ID.
- *	2) Driver ensures a call to this function cannot race with
- *	   devl_param_driverinit_value_set() call with the same parameter ID.
- *	3) Driver ensures a call to this function cannot race with
- *	   reload operation.
- *	If the driver is not able to comply, it has to take the devlink->lock
- *	while calling this.
- */
-int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
-				    union devlink_param_value *val)
-{
-	struct devlink_param_item *param_item;
-
-	if (WARN_ON(!devlink_reload_supported(devlink->ops)))
-		return -EOPNOTSUPP;
-
-	param_item = devlink_param_find_by_id(&devlink->params, param_id);
-	if (!param_item)
-		return -EINVAL;
-
-	if (!param_item->driverinit_value_valid)
-		return -EOPNOTSUPP;
-
-	if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
-						      DEVLINK_PARAM_CMODE_DRIVERINIT)))
-		return -EOPNOTSUPP;
-
-	*val = param_item->driverinit_value;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devl_param_driverinit_value_get);
-
-/**
- *	devl_param_driverinit_value_set - set value of configuration
- *					  parameter for driverinit
- *					  configuration mode
- *
- *	@devlink: devlink
- *	@param_id: parameter ID
- *	@init_val: value of parameter to set for driverinit configuration mode
- *
- *	This function should be used by the driver to set driverinit
- *	configuration mode default value.
- */
-void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
-				     union devlink_param_value init_val)
-{
-	struct devlink_param_item *param_item;
-
-	devl_assert_locked(devlink);
-
-	param_item = devlink_param_find_by_id(&devlink->params, param_id);
-	if (WARN_ON(!param_item))
-		return;
-
-	if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
-						      DEVLINK_PARAM_CMODE_DRIVERINIT)))
-		return;
-
-	param_item->driverinit_value = init_val;
-	param_item->driverinit_value_valid = true;
-
-	devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
-}
-EXPORT_SYMBOL_GPL(devl_param_driverinit_value_set);
-
-void devlink_params_driverinit_load_new(struct devlink *devlink)
-{
-	struct devlink_param_item *param_item;
-	unsigned long param_id;
-
-	xa_for_each(&devlink->params, param_id, param_item) {
-		if (!devlink_param_cmode_is_supported(param_item->param,
-						      DEVLINK_PARAM_CMODE_DRIVERINIT) ||
-		    !param_item->driverinit_value_new_valid)
-			continue;
-		param_item->driverinit_value = param_item->driverinit_value_new;
-		param_item->driverinit_value_valid = true;
-		param_item->driverinit_value_new_valid = false;
-	}
-}
-
-/**
- *	devl_param_value_changed - notify devlink on a parameter's value
- *				   change. Should be called by the driver
- *				   right after the change.
- *
- *	@devlink: devlink
- *	@param_id: parameter ID
- *
- *	This function should be used by the driver to notify devlink on value
- *	change, excluding driverinit configuration mode.
- *	For driverinit configuration mode driver should use the function
- */
-void devl_param_value_changed(struct devlink *devlink, u32 param_id)
-{
-	struct devlink_param_item *param_item;
-
-	param_item = devlink_param_find_by_id(&devlink->params, param_id);
-	WARN_ON(!param_item);
-
-	devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
-}
-EXPORT_SYMBOL_GPL(devl_param_value_changed);
-
 /**
  * devl_region_create - create a new address region
  *
diff --git a/net/devlink/param.c b/net/devlink/param.c
new file mode 100644
index 000000000000..31275f9d4cb7
--- /dev/null
+++ b/net/devlink/param.c
@@ -0,0 +1,865 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+static const struct devlink_param devlink_param_generic[] = {
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+		.name = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME,
+		.type = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+		.name = DEVLINK_PARAM_GENERIC_MAX_MACS_NAME,
+		.type = DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
+		.name = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME,
+		.type = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+		.name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
+		.type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
+		.name = DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME,
+		.type = DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
+		.name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME,
+		.type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
+		.name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME,
+		.type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+		.name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
+		.type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
+		.name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME,
+		.type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+		.name = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME,
+		.type = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET,
+		.name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME,
+		.type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+		.name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME,
+		.type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+		.name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME,
+		.type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+		.name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME,
+		.type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
+		.name = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME,
+		.type = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+		.name = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME,
+		.type = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+		.name = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME,
+		.type = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE,
+	},
+};
+
+static int devlink_param_generic_verify(const struct devlink_param *param)
+{
+	/* verify it match generic parameter by id and name */
+	if (param->id > DEVLINK_PARAM_GENERIC_ID_MAX)
+		return -EINVAL;
+	if (strcmp(param->name, devlink_param_generic[param->id].name))
+		return -ENOENT;
+
+	WARN_ON(param->type != devlink_param_generic[param->id].type);
+
+	return 0;
+}
+
+static int devlink_param_driver_verify(const struct devlink_param *param)
+{
+	int i;
+
+	if (param->id <= DEVLINK_PARAM_GENERIC_ID_MAX)
+		return -EINVAL;
+	/* verify no such name in generic params */
+	for (i = 0; i <= DEVLINK_PARAM_GENERIC_ID_MAX; i++)
+		if (!strcmp(param->name, devlink_param_generic[i].name))
+			return -EEXIST;
+
+	return 0;
+}
+
+static struct devlink_param_item *
+devlink_param_find_by_name(struct xarray *params, const char *param_name)
+{
+	struct devlink_param_item *param_item;
+	unsigned long param_id;
+
+	xa_for_each(params, param_id, param_item) {
+		if (!strcmp(param_item->param->name, param_name))
+			return param_item;
+	}
+	return NULL;
+}
+
+static struct devlink_param_item *
+devlink_param_find_by_id(struct xarray *params, u32 param_id)
+{
+	return xa_load(params, param_id);
+}
+
+static bool
+devlink_param_cmode_is_supported(const struct devlink_param *param,
+				 enum devlink_param_cmode cmode)
+{
+	return test_bit(cmode, &param->supported_cmodes);
+}
+
+static int devlink_param_get(struct devlink *devlink,
+			     const struct devlink_param *param,
+			     struct devlink_param_gset_ctx *ctx)
+{
+	if (!param->get)
+		return -EOPNOTSUPP;
+	return param->get(devlink, param->id, ctx);
+}
+
+static int devlink_param_set(struct devlink *devlink,
+			     const struct devlink_param *param,
+			     struct devlink_param_gset_ctx *ctx)
+{
+	if (!param->set)
+		return -EOPNOTSUPP;
+	return param->set(devlink, param->id, ctx);
+}
+
+static int
+devlink_param_type_to_nla_type(enum devlink_param_type param_type)
+{
+	switch (param_type) {
+	case DEVLINK_PARAM_TYPE_U8:
+		return NLA_U8;
+	case DEVLINK_PARAM_TYPE_U16:
+		return NLA_U16;
+	case DEVLINK_PARAM_TYPE_U32:
+		return NLA_U32;
+	case DEVLINK_PARAM_TYPE_STRING:
+		return NLA_STRING;
+	case DEVLINK_PARAM_TYPE_BOOL:
+		return NLA_FLAG;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int
+devlink_nl_param_value_fill_one(struct sk_buff *msg,
+				enum devlink_param_type type,
+				enum devlink_param_cmode cmode,
+				union devlink_param_value val)
+{
+	struct nlattr *param_value_attr;
+
+	param_value_attr = nla_nest_start_noflag(msg,
+						 DEVLINK_ATTR_PARAM_VALUE);
+	if (!param_value_attr)
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_CMODE, cmode))
+		goto value_nest_cancel;
+
+	switch (type) {
+	case DEVLINK_PARAM_TYPE_U8:
+		if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu8))
+			goto value_nest_cancel;
+		break;
+	case DEVLINK_PARAM_TYPE_U16:
+		if (nla_put_u16(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu16))
+			goto value_nest_cancel;
+		break;
+	case DEVLINK_PARAM_TYPE_U32:
+		if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32))
+			goto value_nest_cancel;
+		break;
+	case DEVLINK_PARAM_TYPE_STRING:
+		if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA,
+				   val.vstr))
+			goto value_nest_cancel;
+		break;
+	case DEVLINK_PARAM_TYPE_BOOL:
+		if (val.vbool &&
+		    nla_put_flag(msg, DEVLINK_ATTR_PARAM_VALUE_DATA))
+			goto value_nest_cancel;
+		break;
+	}
+
+	nla_nest_end(msg, param_value_attr);
+	return 0;
+
+value_nest_cancel:
+	nla_nest_cancel(msg, param_value_attr);
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
+				 unsigned int port_index,
+				 struct devlink_param_item *param_item,
+				 enum devlink_command cmd,
+				 u32 portid, u32 seq, int flags)
+{
+	union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
+	bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {};
+	const struct devlink_param *param = param_item->param;
+	struct devlink_param_gset_ctx ctx;
+	struct nlattr *param_values_list;
+	struct nlattr *param_attr;
+	int nla_type;
+	void *hdr;
+	int err;
+	int i;
+
+	/* Get value from driver part to driverinit configuration mode */
+	for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
+		if (!devlink_param_cmode_is_supported(param, i))
+			continue;
+		if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) {
+			if (param_item->driverinit_value_new_valid)
+				param_value[i] = param_item->driverinit_value_new;
+			else if (param_item->driverinit_value_valid)
+				param_value[i] = param_item->driverinit_value;
+			else
+				return -EOPNOTSUPP;
+		} else {
+			ctx.cmode = i;
+			err = devlink_param_get(devlink, param, &ctx);
+			if (err)
+				return err;
+			param_value[i] = ctx.val;
+		}
+		param_value_set[i] = true;
+	}
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto genlmsg_cancel;
+
+	if (cmd == DEVLINK_CMD_PORT_PARAM_GET ||
+	    cmd == DEVLINK_CMD_PORT_PARAM_NEW ||
+	    cmd == DEVLINK_CMD_PORT_PARAM_DEL)
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index))
+			goto genlmsg_cancel;
+
+	param_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM);
+	if (!param_attr)
+		goto genlmsg_cancel;
+	if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name))
+		goto param_nest_cancel;
+	if (param->generic && nla_put_flag(msg, DEVLINK_ATTR_PARAM_GENERIC))
+		goto param_nest_cancel;
+
+	nla_type = devlink_param_type_to_nla_type(param->type);
+	if (nla_type < 0)
+		goto param_nest_cancel;
+	if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type))
+		goto param_nest_cancel;
+
+	param_values_list = nla_nest_start_noflag(msg,
+						  DEVLINK_ATTR_PARAM_VALUES_LIST);
+	if (!param_values_list)
+		goto param_nest_cancel;
+
+	for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
+		if (!param_value_set[i])
+			continue;
+		err = devlink_nl_param_value_fill_one(msg, param->type,
+						      i, param_value[i]);
+		if (err)
+			goto values_list_nest_cancel;
+	}
+
+	nla_nest_end(msg, param_values_list);
+	nla_nest_end(msg, param_attr);
+	genlmsg_end(msg, hdr);
+	return 0;
+
+values_list_nest_cancel:
+	nla_nest_end(msg, param_values_list);
+param_nest_cancel:
+	nla_nest_cancel(msg, param_attr);
+genlmsg_cancel:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static void devlink_param_notify(struct devlink *devlink,
+				 unsigned int port_index,
+				 struct devlink_param_item *param_item,
+				 enum devlink_command cmd)
+{
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL &&
+		cmd != DEVLINK_CMD_PORT_PARAM_NEW &&
+		cmd != DEVLINK_CMD_PORT_PARAM_DEL);
+
+	/* devlink_notify_register() / devlink_notify_unregister()
+	 * will replay the notifications if the params are added/removed
+	 * outside of the lifetime of the instance.
+	 */
+	if (!devl_is_registered(devlink))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+	err = devlink_nl_param_fill(msg, devlink, port_index, param_item, cmd,
+				    0, 0, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static void devlink_params_notify(struct devlink *devlink,
+				  enum devlink_command cmd)
+{
+	struct devlink_param_item *param_item;
+	unsigned long param_id;
+
+	xa_for_each(&devlink->params, param_id, param_item)
+		devlink_param_notify(devlink, 0, param_item, cmd);
+}
+
+void devlink_params_notify_register(struct devlink *devlink)
+{
+	devlink_params_notify(devlink, DEVLINK_CMD_PARAM_NEW);
+}
+
+void devlink_params_notify_unregister(struct devlink *devlink)
+{
+	devlink_params_notify(devlink, DEVLINK_CMD_PARAM_DEL);
+}
+
+static int devlink_nl_param_get_dump_one(struct sk_buff *msg,
+					 struct devlink *devlink,
+					 struct netlink_callback *cb,
+					 int flags)
+{
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct devlink_param_item *param_item;
+	unsigned long param_id;
+	int err = 0;
+
+	xa_for_each_start(&devlink->params, param_id, param_item, state->idx) {
+		err = devlink_nl_param_fill(msg, devlink, 0, param_item,
+					    DEVLINK_CMD_PARAM_GET,
+					    NETLINK_CB(cb->skb).portid,
+					    cb->nlh->nlmsg_seq, flags);
+		if (err == -EOPNOTSUPP) {
+			err = 0;
+		} else if (err) {
+			state->idx = param_id;
+			break;
+		}
+	}
+
+	return err;
+}
+
+int devlink_nl_param_get_dumpit(struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	return devlink_nl_dumpit(skb, cb, devlink_nl_param_get_dump_one);
+}
+
+static int
+devlink_param_type_get_from_info(struct genl_info *info,
+				 enum devlink_param_type *param_type)
+{
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_TYPE))
+		return -EINVAL;
+
+	switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) {
+	case NLA_U8:
+		*param_type = DEVLINK_PARAM_TYPE_U8;
+		break;
+	case NLA_U16:
+		*param_type = DEVLINK_PARAM_TYPE_U16;
+		break;
+	case NLA_U32:
+		*param_type = DEVLINK_PARAM_TYPE_U32;
+		break;
+	case NLA_STRING:
+		*param_type = DEVLINK_PARAM_TYPE_STRING;
+		break;
+	case NLA_FLAG:
+		*param_type = DEVLINK_PARAM_TYPE_BOOL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+devlink_param_value_get_from_info(const struct devlink_param *param,
+				  struct genl_info *info,
+				  union devlink_param_value *value)
+{
+	struct nlattr *param_data;
+	int len;
+
+	param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA];
+
+	if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data)
+		return -EINVAL;
+
+	switch (param->type) {
+	case DEVLINK_PARAM_TYPE_U8:
+		if (nla_len(param_data) != sizeof(u8))
+			return -EINVAL;
+		value->vu8 = nla_get_u8(param_data);
+		break;
+	case DEVLINK_PARAM_TYPE_U16:
+		if (nla_len(param_data) != sizeof(u16))
+			return -EINVAL;
+		value->vu16 = nla_get_u16(param_data);
+		break;
+	case DEVLINK_PARAM_TYPE_U32:
+		if (nla_len(param_data) != sizeof(u32))
+			return -EINVAL;
+		value->vu32 = nla_get_u32(param_data);
+		break;
+	case DEVLINK_PARAM_TYPE_STRING:
+		len = strnlen(nla_data(param_data), nla_len(param_data));
+		if (len == nla_len(param_data) ||
+		    len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
+			return -EINVAL;
+		strcpy(value->vstr, nla_data(param_data));
+		break;
+	case DEVLINK_PARAM_TYPE_BOOL:
+		if (param_data && nla_len(param_data))
+			return -EINVAL;
+		value->vbool = nla_get_flag(param_data);
+		break;
+	}
+	return 0;
+}
+
+static struct devlink_param_item *
+devlink_param_get_from_info(struct xarray *params, struct genl_info *info)
+{
+	char *param_name;
+
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_NAME))
+		return NULL;
+
+	param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
+	return devlink_param_find_by_name(params, param_name);
+}
+
+int devlink_nl_param_get_doit(struct sk_buff *skb,
+			      struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_param_item *param_item;
+	struct sk_buff *msg;
+	int err;
+
+	param_item = devlink_param_get_from_info(&devlink->params, info);
+	if (!param_item)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_param_fill(msg, devlink, 0, param_item,
+				    DEVLINK_CMD_PARAM_GET,
+				    info->snd_portid, info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
+					   unsigned int port_index,
+					   struct xarray *params,
+					   struct genl_info *info,
+					   enum devlink_command cmd)
+{
+	enum devlink_param_type param_type;
+	struct devlink_param_gset_ctx ctx;
+	enum devlink_param_cmode cmode;
+	struct devlink_param_item *param_item;
+	const struct devlink_param *param;
+	union devlink_param_value value;
+	int err = 0;
+
+	param_item = devlink_param_get_from_info(params, info);
+	if (!param_item)
+		return -EINVAL;
+	param = param_item->param;
+	err = devlink_param_type_get_from_info(info, &param_type);
+	if (err)
+		return err;
+	if (param_type != param->type)
+		return -EINVAL;
+	err = devlink_param_value_get_from_info(param, info, &value);
+	if (err)
+		return err;
+	if (param->validate) {
+		err = param->validate(devlink, param->id, value, info->extack);
+		if (err)
+			return err;
+	}
+
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_VALUE_CMODE))
+		return -EINVAL;
+	cmode = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]);
+	if (!devlink_param_cmode_is_supported(param, cmode))
+		return -EOPNOTSUPP;
+
+	if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) {
+		param_item->driverinit_value_new = value;
+		param_item->driverinit_value_new_valid = true;
+	} else {
+		if (!param->set)
+			return -EOPNOTSUPP;
+		ctx.val = value;
+		ctx.cmode = cmode;
+		err = devlink_param_set(devlink, param, &ctx);
+		if (err)
+			return err;
+	}
+
+	devlink_param_notify(devlink, port_index, param_item, cmd);
+	return 0;
+}
+
+int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+
+	return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->params,
+					       info, DEVLINK_CMD_PARAM_NEW);
+}
+
+int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
+					 struct netlink_callback *cb)
+{
+	NL_SET_ERR_MSG(cb->extack, "Port params are not supported");
+	return msg->len;
+}
+
+int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	NL_SET_ERR_MSG(info->extack, "Port params are not supported");
+	return -EINVAL;
+}
+
+int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	NL_SET_ERR_MSG(info->extack, "Port params are not supported");
+	return -EINVAL;
+}
+
+static int devlink_param_verify(const struct devlink_param *param)
+{
+	if (!param || !param->name || !param->supported_cmodes)
+		return -EINVAL;
+	if (param->generic)
+		return devlink_param_generic_verify(param);
+	else
+		return devlink_param_driver_verify(param);
+}
+
+static int devlink_param_register(struct devlink *devlink,
+				  const struct devlink_param *param)
+{
+	struct devlink_param_item *param_item;
+	int err;
+
+	WARN_ON(devlink_param_verify(param));
+	WARN_ON(devlink_param_find_by_name(&devlink->params, param->name));
+
+	if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
+		WARN_ON(param->get || param->set);
+	else
+		WARN_ON(!param->get || !param->set);
+
+	param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
+	if (!param_item)
+		return -ENOMEM;
+
+	param_item->param = param;
+
+	err = xa_insert(&devlink->params, param->id, param_item, GFP_KERNEL);
+	if (err)
+		goto err_xa_insert;
+
+	devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+	return 0;
+
+err_xa_insert:
+	kfree(param_item);
+	return err;
+}
+
+static void devlink_param_unregister(struct devlink *devlink,
+				     const struct devlink_param *param)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_id(&devlink->params, param->id);
+	if (WARN_ON(!param_item))
+		return;
+	devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL);
+	xa_erase(&devlink->params, param->id);
+	kfree(param_item);
+}
+
+/**
+ *	devl_params_register - register configuration parameters
+ *
+ *	@devlink: devlink
+ *	@params: configuration parameters array
+ *	@params_count: number of parameters provided
+ *
+ *	Register the configuration parameters supported by the driver.
+ */
+int devl_params_register(struct devlink *devlink,
+			 const struct devlink_param *params,
+			 size_t params_count)
+{
+	const struct devlink_param *param = params;
+	int i, err;
+
+	lockdep_assert_held(&devlink->lock);
+
+	for (i = 0; i < params_count; i++, param++) {
+		err = devlink_param_register(devlink, param);
+		if (err)
+			goto rollback;
+	}
+	return 0;
+
+rollback:
+	if (!i)
+		return err;
+
+	for (param--; i > 0; i--, param--)
+		devlink_param_unregister(devlink, param);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devl_params_register);
+
+int devlink_params_register(struct devlink *devlink,
+			    const struct devlink_param *params,
+			    size_t params_count)
+{
+	int err;
+
+	devl_lock(devlink);
+	err = devl_params_register(devlink, params, params_count);
+	devl_unlock(devlink);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_params_register);
+
+/**
+ *	devl_params_unregister - unregister configuration parameters
+ *	@devlink: devlink
+ *	@params: configuration parameters to unregister
+ *	@params_count: number of parameters provided
+ */
+void devl_params_unregister(struct devlink *devlink,
+			    const struct devlink_param *params,
+			    size_t params_count)
+{
+	const struct devlink_param *param = params;
+	int i;
+
+	lockdep_assert_held(&devlink->lock);
+
+	for (i = 0; i < params_count; i++, param++)
+		devlink_param_unregister(devlink, param);
+}
+EXPORT_SYMBOL_GPL(devl_params_unregister);
+
+void devlink_params_unregister(struct devlink *devlink,
+			       const struct devlink_param *params,
+			       size_t params_count)
+{
+	devl_lock(devlink);
+	devl_params_unregister(devlink, params, params_count);
+	devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_params_unregister);
+
+/**
+ *	devl_param_driverinit_value_get - get configuration parameter
+ *					  value for driver initializing
+ *
+ *	@devlink: devlink
+ *	@param_id: parameter ID
+ *	@val: pointer to store the value of parameter in driverinit
+ *	      configuration mode
+ *
+ *	This function should be used by the driver to get driverinit
+ *	configuration for initialization after reload command.
+ *
+ *	Note that lockless call of this function relies on the
+ *	driver to maintain following basic sane behavior:
+ *	1) Driver ensures a call to this function cannot race with
+ *	   registering/unregistering the parameter with the same parameter ID.
+ *	2) Driver ensures a call to this function cannot race with
+ *	   devl_param_driverinit_value_set() call with the same parameter ID.
+ *	3) Driver ensures a call to this function cannot race with
+ *	   reload operation.
+ *	If the driver is not able to comply, it has to take the devlink->lock
+ *	while calling this.
+ */
+int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+				    union devlink_param_value *val)
+{
+	struct devlink_param_item *param_item;
+
+	if (WARN_ON(!devlink_reload_supported(devlink->ops)))
+		return -EOPNOTSUPP;
+
+	param_item = devlink_param_find_by_id(&devlink->params, param_id);
+	if (!param_item)
+		return -EINVAL;
+
+	if (!param_item->driverinit_value_valid)
+		return -EOPNOTSUPP;
+
+	if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
+						      DEVLINK_PARAM_CMODE_DRIVERINIT)))
+		return -EOPNOTSUPP;
+
+	*val = param_item->driverinit_value;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devl_param_driverinit_value_get);
+
+/**
+ *	devl_param_driverinit_value_set - set value of configuration
+ *					  parameter for driverinit
+ *					  configuration mode
+ *
+ *	@devlink: devlink
+ *	@param_id: parameter ID
+ *	@init_val: value of parameter to set for driverinit configuration mode
+ *
+ *	This function should be used by the driver to set driverinit
+ *	configuration mode default value.
+ */
+void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+				     union devlink_param_value init_val)
+{
+	struct devlink_param_item *param_item;
+
+	devl_assert_locked(devlink);
+
+	param_item = devlink_param_find_by_id(&devlink->params, param_id);
+	if (WARN_ON(!param_item))
+		return;
+
+	if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
+						      DEVLINK_PARAM_CMODE_DRIVERINIT)))
+		return;
+
+	param_item->driverinit_value = init_val;
+	param_item->driverinit_value_valid = true;
+
+	devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devl_param_driverinit_value_set);
+
+void devlink_params_driverinit_load_new(struct devlink *devlink)
+{
+	struct devlink_param_item *param_item;
+	unsigned long param_id;
+
+	xa_for_each(&devlink->params, param_id, param_item) {
+		if (!devlink_param_cmode_is_supported(param_item->param,
+						      DEVLINK_PARAM_CMODE_DRIVERINIT) ||
+		    !param_item->driverinit_value_new_valid)
+			continue;
+		param_item->driverinit_value = param_item->driverinit_value_new;
+		param_item->driverinit_value_valid = true;
+		param_item->driverinit_value_new_valid = false;
+	}
+}
+
+/**
+ *	devl_param_value_changed - notify devlink on a parameter's value
+ *				   change. Should be called by the driver
+ *				   right after the change.
+ *
+ *	@devlink: devlink
+ *	@param_id: parameter ID
+ *
+ *	This function should be used by the driver to notify devlink on value
+ *	change, excluding driverinit configuration mode.
+ *	For driverinit configuration mode driver should use the function
+ */
+void devl_param_value_changed(struct devlink *devlink, u32 param_id)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_id(&devlink->params, param_id);
+	WARN_ON(!param_item);
+
+	devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devl_param_value_changed);

From 1aa47ca1f52ee4ef5f553e7ee8fb6749b6c96b9f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:50 +0200
Subject: [PATCH 2004/2047] devlink: push region related code into separate
 file

Cut out another chunk from leftover.c and put region related code
into a separate file.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-9-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/Makefile        |    2 +-
 net/devlink/devl_internal.h |    6 +
 net/devlink/leftover.c      | 1256 ----------------------------------
 net/devlink/region.c        | 1260 +++++++++++++++++++++++++++++++++++
 4 files changed, 1267 insertions(+), 1257 deletions(-)
 create mode 100644 net/devlink/region.c

diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index c4b29f603a1f..744f64e467b7 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
-	 resource.o param.o health.o
+	 resource.o param.o region.o health.o
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index e0304d607978..32b66233240f 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -158,6 +158,8 @@ void devlink_ports_notify_register(struct devlink *devlink);
 void devlink_ports_notify_unregister(struct devlink *devlink);
 void devlink_params_notify_register(struct devlink *devlink);
 void devlink_params_notify_unregister(struct devlink *devlink);
+void devlink_regions_notify_register(struct devlink *devlink);
+void devlink_regions_notify_unregister(struct devlink *devlink);
 
 /* Ports */
 #define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port)				\
@@ -249,6 +251,10 @@ int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
 				       struct genl_info *info);
 int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
 				       struct genl_info *info);
+int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
+				      struct netlink_callback *cb);
 int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
 					    struct genl_info *info);
 int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 2f1dfd27d1c8..6d31aaee8ce8 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -151,68 +151,6 @@ devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
 	return devlink_linecard_get_from_attrs(devlink, info->attrs);
 }
 
-struct devlink_region {
-	struct devlink *devlink;
-	struct devlink_port *port;
-	struct list_head list;
-	union {
-		const struct devlink_region_ops *ops;
-		const struct devlink_port_region_ops *port_ops;
-	};
-	struct mutex snapshot_lock; /* protects snapshot_list,
-				     * max_snapshots and cur_snapshots
-				     * consistency.
-				     */
-	struct list_head snapshot_list;
-	u32 max_snapshots;
-	u32 cur_snapshots;
-	u64 size;
-};
-
-struct devlink_snapshot {
-	struct list_head list;
-	struct devlink_region *region;
-	u8 *data;
-	u32 id;
-};
-
-static struct devlink_region *
-devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
-{
-	struct devlink_region *region;
-
-	list_for_each_entry(region, &devlink->region_list, list)
-		if (!strcmp(region->ops->name, region_name))
-			return region;
-
-	return NULL;
-}
-
-static struct devlink_region *
-devlink_port_region_get_by_name(struct devlink_port *port,
-				const char *region_name)
-{
-	struct devlink_region *region;
-
-	list_for_each_entry(region, &port->region_list, list)
-		if (!strcmp(region->ops->name, region_name))
-			return region;
-
-	return NULL;
-}
-
-static struct devlink_snapshot *
-devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
-{
-	struct devlink_snapshot *snapshot;
-
-	list_for_each_entry(snapshot, &region->snapshot_list, list)
-		if (snapshot->id == id)
-			return snapshot;
-
-	return NULL;
-}
-
 static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
 {
 	struct nlattr *nested_attr;
@@ -1061,975 +999,6 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
 	return 0;
 }
 
-static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
-					     struct devlink *devlink,
-					     struct devlink_snapshot *snapshot)
-{
-	struct nlattr *snap_attr;
-	int err;
-
-	snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
-	if (!snap_attr)
-		return -EINVAL;
-
-	err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id);
-	if (err)
-		goto nla_put_failure;
-
-	nla_nest_end(msg, snap_attr);
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(msg, snap_attr);
-	return err;
-}
-
-static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg,
-					      struct devlink *devlink,
-					      struct devlink_region *region)
-{
-	struct devlink_snapshot *snapshot;
-	struct nlattr *snapshots_attr;
-	int err;
-
-	snapshots_attr = nla_nest_start_noflag(msg,
-					       DEVLINK_ATTR_REGION_SNAPSHOTS);
-	if (!snapshots_attr)
-		return -EINVAL;
-
-	list_for_each_entry(snapshot, &region->snapshot_list, list) {
-		err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot);
-		if (err)
-			goto nla_put_failure;
-	}
-
-	nla_nest_end(msg, snapshots_attr);
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(msg, snapshots_attr);
-	return err;
-}
-
-static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
-				  enum devlink_command cmd, u32 portid,
-				  u32 seq, int flags,
-				  struct devlink_region *region)
-{
-	void *hdr;
-	int err;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	err = devlink_nl_put_handle(msg, devlink);
-	if (err)
-		goto nla_put_failure;
-
-	if (region->port) {
-		err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
-				  region->port->index);
-		if (err)
-			goto nla_put_failure;
-	}
-
-	err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->ops->name);
-	if (err)
-		goto nla_put_failure;
-
-	err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
-				region->size,
-				DEVLINK_ATTR_PAD);
-	if (err)
-		goto nla_put_failure;
-
-	err = nla_put_u32(msg, DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,
-			  region->max_snapshots);
-	if (err)
-		goto nla_put_failure;
-
-	err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
-	if (err)
-		goto nla_put_failure;
-
-	genlmsg_end(msg, hdr);
-	return 0;
-
-nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	return err;
-}
-
-static struct sk_buff *
-devlink_nl_region_notify_build(struct devlink_region *region,
-			       struct devlink_snapshot *snapshot,
-			       enum devlink_command cmd, u32 portid, u32 seq)
-{
-	struct devlink *devlink = region->devlink;
-	struct sk_buff *msg;
-	void *hdr;
-	int err;
-
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return ERR_PTR(-ENOMEM);
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, 0, cmd);
-	if (!hdr) {
-		err = -EMSGSIZE;
-		goto out_free_msg;
-	}
-
-	err = devlink_nl_put_handle(msg, devlink);
-	if (err)
-		goto out_cancel_msg;
-
-	if (region->port) {
-		err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
-				  region->port->index);
-		if (err)
-			goto out_cancel_msg;
-	}
-
-	err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME,
-			     region->ops->name);
-	if (err)
-		goto out_cancel_msg;
-
-	if (snapshot) {
-		err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID,
-				  snapshot->id);
-		if (err)
-			goto out_cancel_msg;
-	} else {
-		err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
-					region->size, DEVLINK_ATTR_PAD);
-		if (err)
-			goto out_cancel_msg;
-	}
-	genlmsg_end(msg, hdr);
-
-	return msg;
-
-out_cancel_msg:
-	genlmsg_cancel(msg, hdr);
-out_free_msg:
-	nlmsg_free(msg);
-	return ERR_PTR(err);
-}
-
-static void devlink_nl_region_notify(struct devlink_region *region,
-				     struct devlink_snapshot *snapshot,
-				     enum devlink_command cmd)
-{
-	struct devlink *devlink = region->devlink;
-	struct sk_buff *msg;
-
-	WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
-	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
-		return;
-
-	msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0);
-	if (IS_ERR(msg))
-		return;
-
-	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
-				0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static void devlink_regions_notify_register(struct devlink *devlink)
-{
-	struct devlink_region *region;
-
-	list_for_each_entry(region, &devlink->region_list, list)
-		devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
-}
-
-static void devlink_regions_notify_unregister(struct devlink *devlink)
-{
-	struct devlink_region *region;
-
-	list_for_each_entry_reverse(region, &devlink->region_list, list)
-		devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
-}
-
-/**
- * __devlink_snapshot_id_increment - Increment number of snapshots using an id
- *	@devlink: devlink instance
- *	@id: the snapshot id
- *
- *	Track when a new snapshot begins using an id. Load the count for the
- *	given id from the snapshot xarray, increment it, and store it back.
- *
- *	Called when a new snapshot is created with the given id.
- *
- *	The id *must* have been previously allocated by
- *	devlink_region_snapshot_id_get().
- *
- *	Returns 0 on success, or an error on failure.
- */
-static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id)
-{
-	unsigned long count;
-	void *p;
-	int err;
-
-	xa_lock(&devlink->snapshot_ids);
-	p = xa_load(&devlink->snapshot_ids, id);
-	if (WARN_ON(!p)) {
-		err = -EINVAL;
-		goto unlock;
-	}
-
-	if (WARN_ON(!xa_is_value(p))) {
-		err = -EINVAL;
-		goto unlock;
-	}
-
-	count = xa_to_value(p);
-	count++;
-
-	err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
-				GFP_ATOMIC));
-unlock:
-	xa_unlock(&devlink->snapshot_ids);
-	return err;
-}
-
-/**
- * __devlink_snapshot_id_decrement - Decrease number of snapshots using an id
- *	@devlink: devlink instance
- *	@id: the snapshot id
- *
- *	Track when a snapshot is deleted and stops using an id. Load the count
- *	for the given id from the snapshot xarray, decrement it, and store it
- *	back.
- *
- *	If the count reaches zero, erase this id from the xarray, freeing it
- *	up for future re-use by devlink_region_snapshot_id_get().
- *
- *	Called when a snapshot using the given id is deleted, and when the
- *	initial allocator of the id is finished using it.
- */
-static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id)
-{
-	unsigned long count;
-	void *p;
-
-	xa_lock(&devlink->snapshot_ids);
-	p = xa_load(&devlink->snapshot_ids, id);
-	if (WARN_ON(!p))
-		goto unlock;
-
-	if (WARN_ON(!xa_is_value(p)))
-		goto unlock;
-
-	count = xa_to_value(p);
-
-	if (count > 1) {
-		count--;
-		__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
-			   GFP_ATOMIC);
-	} else {
-		/* If this was the last user, we can erase this id */
-		__xa_erase(&devlink->snapshot_ids, id);
-	}
-unlock:
-	xa_unlock(&devlink->snapshot_ids);
-}
-
-/**
- *	__devlink_snapshot_id_insert - Insert a specific snapshot ID
- *	@devlink: devlink instance
- *	@id: the snapshot id
- *
- *	Mark the given snapshot id as used by inserting a zero value into the
- *	snapshot xarray.
- *
- *	This must be called while holding the devlink instance lock. Unlike
- *	devlink_snapshot_id_get, the initial reference count is zero, not one.
- *	It is expected that the id will immediately be used before
- *	releasing the devlink instance lock.
- *
- *	Returns zero on success, or an error code if the snapshot id could not
- *	be inserted.
- */
-static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id)
-{
-	int err;
-
-	xa_lock(&devlink->snapshot_ids);
-	if (xa_load(&devlink->snapshot_ids, id)) {
-		xa_unlock(&devlink->snapshot_ids);
-		return -EEXIST;
-	}
-	err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(0),
-				GFP_ATOMIC));
-	xa_unlock(&devlink->snapshot_ids);
-	return err;
-}
-
-/**
- *	__devlink_region_snapshot_id_get - get snapshot ID
- *	@devlink: devlink instance
- *	@id: storage to return snapshot id
- *
- *	Allocates a new snapshot id. Returns zero on success, or a negative
- *	error on failure. Must be called while holding the devlink instance
- *	lock.
- *
- *	Snapshot IDs are tracked using an xarray which stores the number of
- *	users of the snapshot id.
- *
- *	Note that the caller of this function counts as a 'user', in order to
- *	avoid race conditions. The caller must release its hold on the
- *	snapshot by using devlink_region_snapshot_id_put.
- */
-static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
-{
-	return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1),
-			xa_limit_32b, GFP_KERNEL);
-}
-
-/**
- *	__devlink_region_snapshot_create - create a new snapshot
- *	This will add a new snapshot of a region. The snapshot
- *	will be stored on the region struct and can be accessed
- *	from devlink. This is useful for future analyses of snapshots.
- *	Multiple snapshots can be created on a region.
- *	The @snapshot_id should be obtained using the getter function.
- *
- *	Must be called only while holding the region snapshot lock.
- *
- *	@region: devlink region of the snapshot
- *	@data: snapshot data
- *	@snapshot_id: snapshot id to be created
- */
-static int
-__devlink_region_snapshot_create(struct devlink_region *region,
-				 u8 *data, u32 snapshot_id)
-{
-	struct devlink *devlink = region->devlink;
-	struct devlink_snapshot *snapshot;
-	int err;
-
-	lockdep_assert_held(&region->snapshot_lock);
-
-	/* check if region can hold one more snapshot */
-	if (region->cur_snapshots == region->max_snapshots)
-		return -ENOSPC;
-
-	if (devlink_region_snapshot_get_by_id(region, snapshot_id))
-		return -EEXIST;
-
-	snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
-	if (!snapshot)
-		return -ENOMEM;
-
-	err = __devlink_snapshot_id_increment(devlink, snapshot_id);
-	if (err)
-		goto err_snapshot_id_increment;
-
-	snapshot->id = snapshot_id;
-	snapshot->region = region;
-	snapshot->data = data;
-
-	list_add_tail(&snapshot->list, &region->snapshot_list);
-
-	region->cur_snapshots++;
-
-	devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_NEW);
-	return 0;
-
-err_snapshot_id_increment:
-	kfree(snapshot);
-	return err;
-}
-
-static void devlink_region_snapshot_del(struct devlink_region *region,
-					struct devlink_snapshot *snapshot)
-{
-	struct devlink *devlink = region->devlink;
-
-	lockdep_assert_held(&region->snapshot_lock);
-
-	devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
-	region->cur_snapshots--;
-	list_del(&snapshot->list);
-	region->ops->destructor(snapshot->data);
-	__devlink_snapshot_id_decrement(devlink, snapshot->id);
-	kfree(snapshot);
-}
-
-int devlink_nl_region_get_doit(struct sk_buff *skb, struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_port *port = NULL;
-	struct devlink_region *region;
-	const char *region_name;
-	struct sk_buff *msg;
-	unsigned int index;
-	int err;
-
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME))
-		return -EINVAL;
-
-	if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
-		index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
-		port = devlink_port_get_by_index(devlink, index);
-		if (!port)
-			return -ENODEV;
-	}
-
-	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
-	if (port)
-		region = devlink_port_region_get_by_name(port, region_name);
-	else
-		region = devlink_region_get_by_name(devlink, region_name);
-
-	if (!region)
-		return -EINVAL;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	err = devlink_nl_region_fill(msg, devlink, DEVLINK_CMD_REGION_GET,
-				     info->snd_portid, info->snd_seq, 0,
-				     region);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
-
-	return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_region_get_port_dumpit(struct sk_buff *msg,
-						 struct netlink_callback *cb,
-						 struct devlink_port *port,
-						 int *idx, int start, int flags)
-{
-	struct devlink_region *region;
-	int err = 0;
-
-	list_for_each_entry(region, &port->region_list, list) {
-		if (*idx < start) {
-			(*idx)++;
-			continue;
-		}
-		err = devlink_nl_region_fill(msg, port->devlink,
-					     DEVLINK_CMD_REGION_GET,
-					     NETLINK_CB(cb->skb).portid,
-					     cb->nlh->nlmsg_seq,
-					     flags, region);
-		if (err)
-			goto out;
-		(*idx)++;
-	}
-
-out:
-	return err;
-}
-
-static int devlink_nl_region_get_dump_one(struct sk_buff *msg,
-					  struct devlink *devlink,
-					  struct netlink_callback *cb,
-					  int flags)
-{
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct devlink_region *region;
-	struct devlink_port *port;
-	unsigned long port_index;
-	int idx = 0;
-	int err;
-
-	list_for_each_entry(region, &devlink->region_list, list) {
-		if (idx < state->idx) {
-			idx++;
-			continue;
-		}
-		err = devlink_nl_region_fill(msg, devlink,
-					     DEVLINK_CMD_REGION_GET,
-					     NETLINK_CB(cb->skb).portid,
-					     cb->nlh->nlmsg_seq, flags,
-					     region);
-		if (err) {
-			state->idx = idx;
-			return err;
-		}
-		idx++;
-	}
-
-	xa_for_each(&devlink->ports, port_index, port) {
-		err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, &idx,
-							    state->idx, flags);
-		if (err) {
-			state->idx = idx;
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-int devlink_nl_region_get_dumpit(struct sk_buff *skb,
-				 struct netlink_callback *cb)
-{
-	return devlink_nl_dumpit(skb, cb, devlink_nl_region_get_dump_one);
-}
-
-static int devlink_nl_cmd_region_del(struct sk_buff *skb,
-				     struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_snapshot *snapshot;
-	struct devlink_port *port = NULL;
-	struct devlink_region *region;
-	const char *region_name;
-	unsigned int index;
-	u32 snapshot_id;
-
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME) ||
-	    GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_SNAPSHOT_ID))
-		return -EINVAL;
-
-	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
-	snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
-
-	if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
-		index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
-		port = devlink_port_get_by_index(devlink, index);
-		if (!port)
-			return -ENODEV;
-	}
-
-	if (port)
-		region = devlink_port_region_get_by_name(port, region_name);
-	else
-		region = devlink_region_get_by_name(devlink, region_name);
-
-	if (!region)
-		return -EINVAL;
-
-	mutex_lock(&region->snapshot_lock);
-	snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
-	if (!snapshot) {
-		mutex_unlock(&region->snapshot_lock);
-		return -EINVAL;
-	}
-
-	devlink_region_snapshot_del(region, snapshot);
-	mutex_unlock(&region->snapshot_lock);
-	return 0;
-}
-
-static int
-devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_snapshot *snapshot;
-	struct devlink_port *port = NULL;
-	struct nlattr *snapshot_id_attr;
-	struct devlink_region *region;
-	const char *region_name;
-	unsigned int index;
-	u32 snapshot_id;
-	u8 *data;
-	int err;
-
-	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) {
-		NL_SET_ERR_MSG(info->extack, "No region name provided");
-		return -EINVAL;
-	}
-
-	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
-
-	if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
-		index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
-		port = devlink_port_get_by_index(devlink, index);
-		if (!port)
-			return -ENODEV;
-	}
-
-	if (port)
-		region = devlink_port_region_get_by_name(port, region_name);
-	else
-		region = devlink_region_get_by_name(devlink, region_name);
-
-	if (!region) {
-		NL_SET_ERR_MSG(info->extack, "The requested region does not exist");
-		return -EINVAL;
-	}
-
-	if (!region->ops->snapshot) {
-		NL_SET_ERR_MSG(info->extack, "The requested region does not support taking an immediate snapshot");
-		return -EOPNOTSUPP;
-	}
-
-	mutex_lock(&region->snapshot_lock);
-
-	if (region->cur_snapshots == region->max_snapshots) {
-		NL_SET_ERR_MSG(info->extack, "The region has reached the maximum number of stored snapshots");
-		err = -ENOSPC;
-		goto unlock;
-	}
-
-	snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
-	if (snapshot_id_attr) {
-		snapshot_id = nla_get_u32(snapshot_id_attr);
-
-		if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
-			NL_SET_ERR_MSG(info->extack, "The requested snapshot id is already in use");
-			err = -EEXIST;
-			goto unlock;
-		}
-
-		err = __devlink_snapshot_id_insert(devlink, snapshot_id);
-		if (err)
-			goto unlock;
-	} else {
-		err = __devlink_region_snapshot_id_get(devlink, &snapshot_id);
-		if (err) {
-			NL_SET_ERR_MSG(info->extack, "Failed to allocate a new snapshot id");
-			goto unlock;
-		}
-	}
-
-	if (port)
-		err = region->port_ops->snapshot(port, region->port_ops,
-						 info->extack, &data);
-	else
-		err = region->ops->snapshot(devlink, region->ops,
-					    info->extack, &data);
-	if (err)
-		goto err_snapshot_capture;
-
-	err = __devlink_region_snapshot_create(region, data, snapshot_id);
-	if (err)
-		goto err_snapshot_create;
-
-	if (!snapshot_id_attr) {
-		struct sk_buff *msg;
-
-		snapshot = devlink_region_snapshot_get_by_id(region,
-							     snapshot_id);
-		if (WARN_ON(!snapshot)) {
-			err = -EINVAL;
-			goto unlock;
-		}
-
-		msg = devlink_nl_region_notify_build(region, snapshot,
-						     DEVLINK_CMD_REGION_NEW,
-						     info->snd_portid,
-						     info->snd_seq);
-		err = PTR_ERR_OR_ZERO(msg);
-		if (err)
-			goto err_notify;
-
-		err = genlmsg_reply(msg, info);
-		if (err)
-			goto err_notify;
-	}
-
-	mutex_unlock(&region->snapshot_lock);
-	return 0;
-
-err_snapshot_create:
-	region->ops->destructor(data);
-err_snapshot_capture:
-	__devlink_snapshot_id_decrement(devlink, snapshot_id);
-	mutex_unlock(&region->snapshot_lock);
-	return err;
-
-err_notify:
-	devlink_region_snapshot_del(region, snapshot);
-unlock:
-	mutex_unlock(&region->snapshot_lock);
-	return err;
-}
-
-static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
-						 u8 *chunk, u32 chunk_size,
-						 u64 addr)
-{
-	struct nlattr *chunk_attr;
-	int err;
-
-	chunk_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_CHUNK);
-	if (!chunk_attr)
-		return -EINVAL;
-
-	err = nla_put(msg, DEVLINK_ATTR_REGION_CHUNK_DATA, chunk_size, chunk);
-	if (err)
-		goto nla_put_failure;
-
-	err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr,
-				DEVLINK_ATTR_PAD);
-	if (err)
-		goto nla_put_failure;
-
-	nla_nest_end(msg, chunk_attr);
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(msg, chunk_attr);
-	return err;
-}
-
-#define DEVLINK_REGION_READ_CHUNK_SIZE 256
-
-typedef int devlink_chunk_fill_t(void *cb_priv, u8 *chunk, u32 chunk_size,
-				 u64 curr_offset,
-				 struct netlink_ext_ack *extack);
-
-static int
-devlink_nl_region_read_fill(struct sk_buff *skb, devlink_chunk_fill_t *cb,
-			    void *cb_priv, u64 start_offset, u64 end_offset,
-			    u64 *new_offset, struct netlink_ext_ack *extack)
-{
-	u64 curr_offset = start_offset;
-	int err = 0;
-	u8 *data;
-
-	/* Allocate and re-use a single buffer */
-	data = kmalloc(DEVLINK_REGION_READ_CHUNK_SIZE, GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	*new_offset = start_offset;
-
-	while (curr_offset < end_offset) {
-		u32 data_size;
-
-		data_size = min_t(u32, end_offset - curr_offset,
-				  DEVLINK_REGION_READ_CHUNK_SIZE);
-
-		err = cb(cb_priv, data, data_size, curr_offset, extack);
-		if (err)
-			break;
-
-		err = devlink_nl_cmd_region_read_chunk_fill(skb, data, data_size, curr_offset);
-		if (err)
-			break;
-
-		curr_offset += data_size;
-	}
-	*new_offset = curr_offset;
-
-	kfree(data);
-
-	return err;
-}
-
-static int
-devlink_region_snapshot_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
-			     u64 curr_offset,
-			     struct netlink_ext_ack __always_unused *extack)
-{
-	struct devlink_snapshot *snapshot = cb_priv;
-
-	memcpy(chunk, &snapshot->data[curr_offset], chunk_size);
-
-	return 0;
-}
-
-static int
-devlink_region_port_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
-				u64 curr_offset, struct netlink_ext_ack *extack)
-{
-	struct devlink_region *region = cb_priv;
-
-	return region->port_ops->read(region->port, region->port_ops, extack,
-				      curr_offset, chunk_size, chunk);
-}
-
-static int
-devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
-			   u64 curr_offset, struct netlink_ext_ack *extack)
-{
-	struct devlink_region *region = cb_priv;
-
-	return region->ops->read(region->devlink, region->ops, extack,
-				 curr_offset, chunk_size, chunk);
-}
-
-static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
-					     struct netlink_callback *cb)
-{
-	const struct genl_dumpit_info *info = genl_dumpit_info(cb);
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct nlattr *chunks_attr, *region_attr, *snapshot_attr;
-	u64 ret_offset, start_offset, end_offset = U64_MAX;
-	struct nlattr **attrs = info->info.attrs;
-	struct devlink_port *port = NULL;
-	devlink_chunk_fill_t *region_cb;
-	struct devlink_region *region;
-	const char *region_name;
-	struct devlink *devlink;
-	unsigned int index;
-	void *region_cb_priv;
-	void *hdr;
-	int err;
-
-	start_offset = state->start_offset;
-
-	devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
-	if (IS_ERR(devlink))
-		return PTR_ERR(devlink);
-
-	if (!attrs[DEVLINK_ATTR_REGION_NAME]) {
-		NL_SET_ERR_MSG(cb->extack, "No region name provided");
-		err = -EINVAL;
-		goto out_unlock;
-	}
-
-	if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
-		index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
-
-		port = devlink_port_get_by_index(devlink, index);
-		if (!port) {
-			err = -ENODEV;
-			goto out_unlock;
-		}
-	}
-
-	region_attr = attrs[DEVLINK_ATTR_REGION_NAME];
-	region_name = nla_data(region_attr);
-
-	if (port)
-		region = devlink_port_region_get_by_name(port, region_name);
-	else
-		region = devlink_region_get_by_name(devlink, region_name);
-
-	if (!region) {
-		NL_SET_ERR_MSG_ATTR(cb->extack, region_attr, "Requested region does not exist");
-		err = -EINVAL;
-		goto out_unlock;
-	}
-
-	snapshot_attr = attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
-	if (!snapshot_attr) {
-		if (!nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
-			NL_SET_ERR_MSG(cb->extack, "No snapshot id provided");
-			err = -EINVAL;
-			goto out_unlock;
-		}
-
-		if (!region->ops->read) {
-			NL_SET_ERR_MSG(cb->extack, "Requested region does not support direct read");
-			err = -EOPNOTSUPP;
-			goto out_unlock;
-		}
-
-		if (port)
-			region_cb = &devlink_region_port_direct_fill;
-		else
-			region_cb = &devlink_region_direct_fill;
-		region_cb_priv = region;
-	} else {
-		struct devlink_snapshot *snapshot;
-		u32 snapshot_id;
-
-		if (nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
-			NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Direct region read does not use snapshot");
-			err = -EINVAL;
-			goto out_unlock;
-		}
-
-		snapshot_id = nla_get_u32(snapshot_attr);
-		snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
-		if (!snapshot) {
-			NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist");
-			err = -EINVAL;
-			goto out_unlock;
-		}
-		region_cb = &devlink_region_snapshot_fill;
-		region_cb_priv = snapshot;
-	}
-
-	if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
-	    attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
-		if (!start_offset)
-			start_offset =
-				nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
-
-		end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
-		end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
-	}
-
-	if (end_offset > region->size)
-		end_offset = region->size;
-
-	/* return 0 if there is no further data to read */
-	if (start_offset == end_offset) {
-		err = 0;
-		goto out_unlock;
-	}
-
-	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
-			  &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
-			  DEVLINK_CMD_REGION_READ);
-	if (!hdr) {
-		err = -EMSGSIZE;
-		goto out_unlock;
-	}
-
-	err = devlink_nl_put_handle(skb, devlink);
-	if (err)
-		goto nla_put_failure;
-
-	if (region->port) {
-		err = nla_put_u32(skb, DEVLINK_ATTR_PORT_INDEX,
-				  region->port->index);
-		if (err)
-			goto nla_put_failure;
-	}
-
-	err = nla_put_string(skb, DEVLINK_ATTR_REGION_NAME, region_name);
-	if (err)
-		goto nla_put_failure;
-
-	chunks_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_REGION_CHUNKS);
-	if (!chunks_attr) {
-		err = -EMSGSIZE;
-		goto nla_put_failure;
-	}
-
-	err = devlink_nl_region_read_fill(skb, region_cb, region_cb_priv,
-					  start_offset, end_offset, &ret_offset,
-					  cb->extack);
-
-	if (err && err != -EMSGSIZE)
-		goto nla_put_failure;
-
-	/* Check if there was any progress done to prevent infinite loop */
-	if (ret_offset == start_offset) {
-		err = -EINVAL;
-		goto nla_put_failure;
-	}
-
-	state->start_offset = ret_offset;
-
-	nla_nest_end(skb, chunks_attr);
-	genlmsg_end(skb, hdr);
-	devl_unlock(devlink);
-	devlink_put(devlink);
-	return skb->len;
-
-nla_put_failure:
-	genlmsg_cancel(skb, hdr);
-out_unlock:
-	devl_unlock(devlink);
-	devlink_put(devlink);
-	return err;
-}
-
 struct devlink_stats {
 	u64_stats_t rx_bytes;
 	u64_stats_t rx_packets;
@@ -3611,231 +2580,6 @@ void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
 }
 EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
 
-/**
- * devl_region_create - create a new address region
- *
- * @devlink: devlink
- * @ops: region operations and name
- * @region_max_snapshots: Maximum supported number of snapshots for region
- * @region_size: size of region
- */
-struct devlink_region *devl_region_create(struct devlink *devlink,
-					  const struct devlink_region_ops *ops,
-					  u32 region_max_snapshots,
-					  u64 region_size)
-{
-	struct devlink_region *region;
-
-	devl_assert_locked(devlink);
-
-	if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
-		return ERR_PTR(-EINVAL);
-
-	if (devlink_region_get_by_name(devlink, ops->name))
-		return ERR_PTR(-EEXIST);
-
-	region = kzalloc(sizeof(*region), GFP_KERNEL);
-	if (!region)
-		return ERR_PTR(-ENOMEM);
-
-	region->devlink = devlink;
-	region->max_snapshots = region_max_snapshots;
-	region->ops = ops;
-	region->size = region_size;
-	INIT_LIST_HEAD(&region->snapshot_list);
-	mutex_init(&region->snapshot_lock);
-	list_add_tail(&region->list, &devlink->region_list);
-	devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
-
-	return region;
-}
-EXPORT_SYMBOL_GPL(devl_region_create);
-
-/**
- *	devlink_region_create - create a new address region
- *
- *	@devlink: devlink
- *	@ops: region operations and name
- *	@region_max_snapshots: Maximum supported number of snapshots for region
- *	@region_size: size of region
- *
- *	Context: Takes and release devlink->lock <mutex>.
- */
-struct devlink_region *
-devlink_region_create(struct devlink *devlink,
-		      const struct devlink_region_ops *ops,
-		      u32 region_max_snapshots, u64 region_size)
-{
-	struct devlink_region *region;
-
-	devl_lock(devlink);
-	region = devl_region_create(devlink, ops, region_max_snapshots,
-				    region_size);
-	devl_unlock(devlink);
-	return region;
-}
-EXPORT_SYMBOL_GPL(devlink_region_create);
-
-/**
- *	devlink_port_region_create - create a new address region for a port
- *
- *	@port: devlink port
- *	@ops: region operations and name
- *	@region_max_snapshots: Maximum supported number of snapshots for region
- *	@region_size: size of region
- *
- *	Context: Takes and release devlink->lock <mutex>.
- */
-struct devlink_region *
-devlink_port_region_create(struct devlink_port *port,
-			   const struct devlink_port_region_ops *ops,
-			   u32 region_max_snapshots, u64 region_size)
-{
-	struct devlink *devlink = port->devlink;
-	struct devlink_region *region;
-	int err = 0;
-
-	ASSERT_DEVLINK_PORT_INITIALIZED(port);
-
-	if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
-		return ERR_PTR(-EINVAL);
-
-	devl_lock(devlink);
-
-	if (devlink_port_region_get_by_name(port, ops->name)) {
-		err = -EEXIST;
-		goto unlock;
-	}
-
-	region = kzalloc(sizeof(*region), GFP_KERNEL);
-	if (!region) {
-		err = -ENOMEM;
-		goto unlock;
-	}
-
-	region->devlink = devlink;
-	region->port = port;
-	region->max_snapshots = region_max_snapshots;
-	region->port_ops = ops;
-	region->size = region_size;
-	INIT_LIST_HEAD(&region->snapshot_list);
-	mutex_init(&region->snapshot_lock);
-	list_add_tail(&region->list, &port->region_list);
-	devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
-
-	devl_unlock(devlink);
-	return region;
-
-unlock:
-	devl_unlock(devlink);
-	return ERR_PTR(err);
-}
-EXPORT_SYMBOL_GPL(devlink_port_region_create);
-
-/**
- * devl_region_destroy - destroy address region
- *
- * @region: devlink region to destroy
- */
-void devl_region_destroy(struct devlink_region *region)
-{
-	struct devlink *devlink = region->devlink;
-	struct devlink_snapshot *snapshot, *ts;
-
-	devl_assert_locked(devlink);
-
-	/* Free all snapshots of region */
-	mutex_lock(&region->snapshot_lock);
-	list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
-		devlink_region_snapshot_del(region, snapshot);
-	mutex_unlock(&region->snapshot_lock);
-
-	list_del(&region->list);
-	mutex_destroy(&region->snapshot_lock);
-
-	devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
-	kfree(region);
-}
-EXPORT_SYMBOL_GPL(devl_region_destroy);
-
-/**
- *	devlink_region_destroy - destroy address region
- *
- *	@region: devlink region to destroy
- *
- *	Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_region_destroy(struct devlink_region *region)
-{
-	struct devlink *devlink = region->devlink;
-
-	devl_lock(devlink);
-	devl_region_destroy(region);
-	devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_region_destroy);
-
-/**
- *	devlink_region_snapshot_id_get - get snapshot ID
- *
- *	This callback should be called when adding a new snapshot,
- *	Driver should use the same id for multiple snapshots taken
- *	on multiple regions at the same time/by the same trigger.
- *
- *	The caller of this function must use devlink_region_snapshot_id_put
- *	when finished creating regions using this id.
- *
- *	Returns zero on success, or a negative error code on failure.
- *
- *	@devlink: devlink
- *	@id: storage to return id
- */
-int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
-{
-	return __devlink_region_snapshot_id_get(devlink, id);
-}
-EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
-
-/**
- *	devlink_region_snapshot_id_put - put snapshot ID reference
- *
- *	This should be called by a driver after finishing creating snapshots
- *	with an id. Doing so ensures that the ID can later be released in the
- *	event that all snapshots using it have been destroyed.
- *
- *	@devlink: devlink
- *	@id: id to release reference on
- */
-void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id)
-{
-	__devlink_snapshot_id_decrement(devlink, id);
-}
-EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put);
-
-/**
- *	devlink_region_snapshot_create - create a new snapshot
- *	This will add a new snapshot of a region. The snapshot
- *	will be stored on the region struct and can be accessed
- *	from devlink. This is useful for future analyses of snapshots.
- *	Multiple snapshots can be created on a region.
- *	The @snapshot_id should be obtained using the getter function.
- *
- *	@region: devlink region of the snapshot
- *	@data: snapshot data
- *	@snapshot_id: snapshot id to be created
- */
-int devlink_region_snapshot_create(struct devlink_region *region,
-				   u8 *data, u32 snapshot_id)
-{
-	int err;
-
-	mutex_lock(&region->snapshot_lock);
-	err = __devlink_region_snapshot_create(region, data, snapshot_id);
-	mutex_unlock(&region->snapshot_lock);
-	return err;
-}
-EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
-
 #define DEVLINK_TRAP(_id, _type)					      \
 	{								      \
 		.type = DEVLINK_TRAP_TYPE_##_type,			      \
diff --git a/net/devlink/region.c b/net/devlink/region.c
new file mode 100644
index 000000000000..d197cdb662db
--- /dev/null
+++ b/net/devlink/region.c
@@ -0,0 +1,1260 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+struct devlink_region {
+	struct devlink *devlink;
+	struct devlink_port *port;
+	struct list_head list;
+	union {
+		const struct devlink_region_ops *ops;
+		const struct devlink_port_region_ops *port_ops;
+	};
+	struct mutex snapshot_lock; /* protects snapshot_list,
+				     * max_snapshots and cur_snapshots
+				     * consistency.
+				     */
+	struct list_head snapshot_list;
+	u32 max_snapshots;
+	u32 cur_snapshots;
+	u64 size;
+};
+
+struct devlink_snapshot {
+	struct list_head list;
+	struct devlink_region *region;
+	u8 *data;
+	u32 id;
+};
+
+static struct devlink_region *
+devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
+{
+	struct devlink_region *region;
+
+	list_for_each_entry(region, &devlink->region_list, list)
+		if (!strcmp(region->ops->name, region_name))
+			return region;
+
+	return NULL;
+}
+
+static struct devlink_region *
+devlink_port_region_get_by_name(struct devlink_port *port,
+				const char *region_name)
+{
+	struct devlink_region *region;
+
+	list_for_each_entry(region, &port->region_list, list)
+		if (!strcmp(region->ops->name, region_name))
+			return region;
+
+	return NULL;
+}
+
+static struct devlink_snapshot *
+devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
+{
+	struct devlink_snapshot *snapshot;
+
+	list_for_each_entry(snapshot, &region->snapshot_list, list)
+		if (snapshot->id == id)
+			return snapshot;
+
+	return NULL;
+}
+
+static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
+					     struct devlink *devlink,
+					     struct devlink_snapshot *snapshot)
+{
+	struct nlattr *snap_attr;
+	int err;
+
+	snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
+	if (!snap_attr)
+		return -EINVAL;
+
+	err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id);
+	if (err)
+		goto nla_put_failure;
+
+	nla_nest_end(msg, snap_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, snap_attr);
+	return err;
+}
+
+static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg,
+					      struct devlink *devlink,
+					      struct devlink_region *region)
+{
+	struct devlink_snapshot *snapshot;
+	struct nlattr *snapshots_attr;
+	int err;
+
+	snapshots_attr = nla_nest_start_noflag(msg,
+					       DEVLINK_ATTR_REGION_SNAPSHOTS);
+	if (!snapshots_attr)
+		return -EINVAL;
+
+	list_for_each_entry(snapshot, &region->snapshot_list, list) {
+		err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot);
+		if (err)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(msg, snapshots_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, snapshots_attr);
+	return err;
+}
+
+static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
+				  enum devlink_command cmd, u32 portid,
+				  u32 seq, int flags,
+				  struct devlink_region *region)
+{
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	err = devlink_nl_put_handle(msg, devlink);
+	if (err)
+		goto nla_put_failure;
+
+	if (region->port) {
+		err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+				  region->port->index);
+		if (err)
+			goto nla_put_failure;
+	}
+
+	err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->ops->name);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+				region->size,
+				DEVLINK_ATTR_PAD);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_u32(msg, DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,
+			  region->max_snapshots);
+	if (err)
+		goto nla_put_failure;
+
+	err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
+	if (err)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return err;
+}
+
+static struct sk_buff *
+devlink_nl_region_notify_build(struct devlink_region *region,
+			       struct devlink_snapshot *snapshot,
+			       enum devlink_command cmd, u32 portid, u32 seq)
+{
+	struct devlink *devlink = region->devlink;
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return ERR_PTR(-ENOMEM);
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, 0, cmd);
+	if (!hdr) {
+		err = -EMSGSIZE;
+		goto out_free_msg;
+	}
+
+	err = devlink_nl_put_handle(msg, devlink);
+	if (err)
+		goto out_cancel_msg;
+
+	if (region->port) {
+		err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+				  region->port->index);
+		if (err)
+			goto out_cancel_msg;
+	}
+
+	err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME,
+			     region->ops->name);
+	if (err)
+		goto out_cancel_msg;
+
+	if (snapshot) {
+		err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID,
+				  snapshot->id);
+		if (err)
+			goto out_cancel_msg;
+	} else {
+		err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+					region->size, DEVLINK_ATTR_PAD);
+		if (err)
+			goto out_cancel_msg;
+	}
+	genlmsg_end(msg, hdr);
+
+	return msg;
+
+out_cancel_msg:
+	genlmsg_cancel(msg, hdr);
+out_free_msg:
+	nlmsg_free(msg);
+	return ERR_PTR(err);
+}
+
+static void devlink_nl_region_notify(struct devlink_region *region,
+				     struct devlink_snapshot *snapshot,
+				     enum devlink_command cmd)
+{
+	struct devlink *devlink = region->devlink;
+	struct sk_buff *msg;
+
+	WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
+	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+		return;
+
+	msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0);
+	if (IS_ERR(msg))
+		return;
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+				0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_regions_notify_register(struct devlink *devlink)
+{
+	struct devlink_region *region;
+
+	list_for_each_entry(region, &devlink->region_list, list)
+		devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+}
+
+void devlink_regions_notify_unregister(struct devlink *devlink)
+{
+	struct devlink_region *region;
+
+	list_for_each_entry_reverse(region, &devlink->region_list, list)
+		devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+}
+
+/**
+ * __devlink_snapshot_id_increment - Increment number of snapshots using an id
+ *	@devlink: devlink instance
+ *	@id: the snapshot id
+ *
+ *	Track when a new snapshot begins using an id. Load the count for the
+ *	given id from the snapshot xarray, increment it, and store it back.
+ *
+ *	Called when a new snapshot is created with the given id.
+ *
+ *	The id *must* have been previously allocated by
+ *	devlink_region_snapshot_id_get().
+ *
+ *	Returns 0 on success, or an error on failure.
+ */
+static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id)
+{
+	unsigned long count;
+	void *p;
+	int err;
+
+	xa_lock(&devlink->snapshot_ids);
+	p = xa_load(&devlink->snapshot_ids, id);
+	if (WARN_ON(!p)) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	if (WARN_ON(!xa_is_value(p))) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	count = xa_to_value(p);
+	count++;
+
+	err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
+				GFP_ATOMIC));
+unlock:
+	xa_unlock(&devlink->snapshot_ids);
+	return err;
+}
+
+/**
+ * __devlink_snapshot_id_decrement - Decrease number of snapshots using an id
+ *	@devlink: devlink instance
+ *	@id: the snapshot id
+ *
+ *	Track when a snapshot is deleted and stops using an id. Load the count
+ *	for the given id from the snapshot xarray, decrement it, and store it
+ *	back.
+ *
+ *	If the count reaches zero, erase this id from the xarray, freeing it
+ *	up for future re-use by devlink_region_snapshot_id_get().
+ *
+ *	Called when a snapshot using the given id is deleted, and when the
+ *	initial allocator of the id is finished using it.
+ */
+static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id)
+{
+	unsigned long count;
+	void *p;
+
+	xa_lock(&devlink->snapshot_ids);
+	p = xa_load(&devlink->snapshot_ids, id);
+	if (WARN_ON(!p))
+		goto unlock;
+
+	if (WARN_ON(!xa_is_value(p)))
+		goto unlock;
+
+	count = xa_to_value(p);
+
+	if (count > 1) {
+		count--;
+		__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
+			   GFP_ATOMIC);
+	} else {
+		/* If this was the last user, we can erase this id */
+		__xa_erase(&devlink->snapshot_ids, id);
+	}
+unlock:
+	xa_unlock(&devlink->snapshot_ids);
+}
+
+/**
+ *	__devlink_snapshot_id_insert - Insert a specific snapshot ID
+ *	@devlink: devlink instance
+ *	@id: the snapshot id
+ *
+ *	Mark the given snapshot id as used by inserting a zero value into the
+ *	snapshot xarray.
+ *
+ *	This must be called while holding the devlink instance lock. Unlike
+ *	devlink_snapshot_id_get, the initial reference count is zero, not one.
+ *	It is expected that the id will immediately be used before
+ *	releasing the devlink instance lock.
+ *
+ *	Returns zero on success, or an error code if the snapshot id could not
+ *	be inserted.
+ */
+static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id)
+{
+	int err;
+
+	xa_lock(&devlink->snapshot_ids);
+	if (xa_load(&devlink->snapshot_ids, id)) {
+		xa_unlock(&devlink->snapshot_ids);
+		return -EEXIST;
+	}
+	err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(0),
+				GFP_ATOMIC));
+	xa_unlock(&devlink->snapshot_ids);
+	return err;
+}
+
+/**
+ *	__devlink_region_snapshot_id_get - get snapshot ID
+ *	@devlink: devlink instance
+ *	@id: storage to return snapshot id
+ *
+ *	Allocates a new snapshot id. Returns zero on success, or a negative
+ *	error on failure. Must be called while holding the devlink instance
+ *	lock.
+ *
+ *	Snapshot IDs are tracked using an xarray which stores the number of
+ *	users of the snapshot id.
+ *
+ *	Note that the caller of this function counts as a 'user', in order to
+ *	avoid race conditions. The caller must release its hold on the
+ *	snapshot by using devlink_region_snapshot_id_put.
+ */
+static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
+{
+	return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1),
+			xa_limit_32b, GFP_KERNEL);
+}
+
+/**
+ *	__devlink_region_snapshot_create - create a new snapshot
+ *	This will add a new snapshot of a region. The snapshot
+ *	will be stored on the region struct and can be accessed
+ *	from devlink. This is useful for future analyses of snapshots.
+ *	Multiple snapshots can be created on a region.
+ *	The @snapshot_id should be obtained using the getter function.
+ *
+ *	Must be called only while holding the region snapshot lock.
+ *
+ *	@region: devlink region of the snapshot
+ *	@data: snapshot data
+ *	@snapshot_id: snapshot id to be created
+ */
+static int
+__devlink_region_snapshot_create(struct devlink_region *region,
+				 u8 *data, u32 snapshot_id)
+{
+	struct devlink *devlink = region->devlink;
+	struct devlink_snapshot *snapshot;
+	int err;
+
+	lockdep_assert_held(&region->snapshot_lock);
+
+	/* check if region can hold one more snapshot */
+	if (region->cur_snapshots == region->max_snapshots)
+		return -ENOSPC;
+
+	if (devlink_region_snapshot_get_by_id(region, snapshot_id))
+		return -EEXIST;
+
+	snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
+	if (!snapshot)
+		return -ENOMEM;
+
+	err = __devlink_snapshot_id_increment(devlink, snapshot_id);
+	if (err)
+		goto err_snapshot_id_increment;
+
+	snapshot->id = snapshot_id;
+	snapshot->region = region;
+	snapshot->data = data;
+
+	list_add_tail(&snapshot->list, &region->snapshot_list);
+
+	region->cur_snapshots++;
+
+	devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_NEW);
+	return 0;
+
+err_snapshot_id_increment:
+	kfree(snapshot);
+	return err;
+}
+
+static void devlink_region_snapshot_del(struct devlink_region *region,
+					struct devlink_snapshot *snapshot)
+{
+	struct devlink *devlink = region->devlink;
+
+	lockdep_assert_held(&region->snapshot_lock);
+
+	devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
+	region->cur_snapshots--;
+	list_del(&snapshot->list);
+	region->ops->destructor(snapshot->data);
+	__devlink_snapshot_id_decrement(devlink, snapshot->id);
+	kfree(snapshot);
+}
+
+int devlink_nl_region_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_port *port = NULL;
+	struct devlink_region *region;
+	const char *region_name;
+	struct sk_buff *msg;
+	unsigned int index;
+	int err;
+
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME))
+		return -EINVAL;
+
+	if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+		index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+		port = devlink_port_get_by_index(devlink, index);
+		if (!port)
+			return -ENODEV;
+	}
+
+	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+	if (port)
+		region = devlink_port_region_get_by_name(port, region_name);
+	else
+		region = devlink_region_get_by_name(devlink, region_name);
+
+	if (!region)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_region_fill(msg, devlink, DEVLINK_CMD_REGION_GET,
+				     info->snd_portid, info->snd_seq, 0,
+				     region);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_region_get_port_dumpit(struct sk_buff *msg,
+						 struct netlink_callback *cb,
+						 struct devlink_port *port,
+						 int *idx, int start, int flags)
+{
+	struct devlink_region *region;
+	int err = 0;
+
+	list_for_each_entry(region, &port->region_list, list) {
+		if (*idx < start) {
+			(*idx)++;
+			continue;
+		}
+		err = devlink_nl_region_fill(msg, port->devlink,
+					     DEVLINK_CMD_REGION_GET,
+					     NETLINK_CB(cb->skb).portid,
+					     cb->nlh->nlmsg_seq,
+					     flags, region);
+		if (err)
+			goto out;
+		(*idx)++;
+	}
+
+out:
+	return err;
+}
+
+static int devlink_nl_region_get_dump_one(struct sk_buff *msg,
+					  struct devlink *devlink,
+					  struct netlink_callback *cb,
+					  int flags)
+{
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct devlink_region *region;
+	struct devlink_port *port;
+	unsigned long port_index;
+	int idx = 0;
+	int err;
+
+	list_for_each_entry(region, &devlink->region_list, list) {
+		if (idx < state->idx) {
+			idx++;
+			continue;
+		}
+		err = devlink_nl_region_fill(msg, devlink,
+					     DEVLINK_CMD_REGION_GET,
+					     NETLINK_CB(cb->skb).portid,
+					     cb->nlh->nlmsg_seq, flags,
+					     region);
+		if (err) {
+			state->idx = idx;
+			return err;
+		}
+		idx++;
+	}
+
+	xa_for_each(&devlink->ports, port_index, port) {
+		err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, &idx,
+							    state->idx, flags);
+		if (err) {
+			state->idx = idx;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int devlink_nl_region_get_dumpit(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	return devlink_nl_dumpit(skb, cb, devlink_nl_region_get_dump_one);
+}
+
+int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_snapshot *snapshot;
+	struct devlink_port *port = NULL;
+	struct devlink_region *region;
+	const char *region_name;
+	unsigned int index;
+	u32 snapshot_id;
+
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME) ||
+	    GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_SNAPSHOT_ID))
+		return -EINVAL;
+
+	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+	snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
+
+	if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+		index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+		port = devlink_port_get_by_index(devlink, index);
+		if (!port)
+			return -ENODEV;
+	}
+
+	if (port)
+		region = devlink_port_region_get_by_name(port, region_name);
+	else
+		region = devlink_region_get_by_name(devlink, region_name);
+
+	if (!region)
+		return -EINVAL;
+
+	mutex_lock(&region->snapshot_lock);
+	snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+	if (!snapshot) {
+		mutex_unlock(&region->snapshot_lock);
+		return -EINVAL;
+	}
+
+	devlink_region_snapshot_del(region, snapshot);
+	mutex_unlock(&region->snapshot_lock);
+	return 0;
+}
+
+int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_snapshot *snapshot;
+	struct devlink_port *port = NULL;
+	struct nlattr *snapshot_id_attr;
+	struct devlink_region *region;
+	const char *region_name;
+	unsigned int index;
+	u32 snapshot_id;
+	u8 *data;
+	int err;
+
+	if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) {
+		NL_SET_ERR_MSG(info->extack, "No region name provided");
+		return -EINVAL;
+	}
+
+	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+
+	if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+		index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+		port = devlink_port_get_by_index(devlink, index);
+		if (!port)
+			return -ENODEV;
+	}
+
+	if (port)
+		region = devlink_port_region_get_by_name(port, region_name);
+	else
+		region = devlink_region_get_by_name(devlink, region_name);
+
+	if (!region) {
+		NL_SET_ERR_MSG(info->extack, "The requested region does not exist");
+		return -EINVAL;
+	}
+
+	if (!region->ops->snapshot) {
+		NL_SET_ERR_MSG(info->extack, "The requested region does not support taking an immediate snapshot");
+		return -EOPNOTSUPP;
+	}
+
+	mutex_lock(&region->snapshot_lock);
+
+	if (region->cur_snapshots == region->max_snapshots) {
+		NL_SET_ERR_MSG(info->extack, "The region has reached the maximum number of stored snapshots");
+		err = -ENOSPC;
+		goto unlock;
+	}
+
+	snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
+	if (snapshot_id_attr) {
+		snapshot_id = nla_get_u32(snapshot_id_attr);
+
+		if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
+			NL_SET_ERR_MSG(info->extack, "The requested snapshot id is already in use");
+			err = -EEXIST;
+			goto unlock;
+		}
+
+		err = __devlink_snapshot_id_insert(devlink, snapshot_id);
+		if (err)
+			goto unlock;
+	} else {
+		err = __devlink_region_snapshot_id_get(devlink, &snapshot_id);
+		if (err) {
+			NL_SET_ERR_MSG(info->extack, "Failed to allocate a new snapshot id");
+			goto unlock;
+		}
+	}
+
+	if (port)
+		err = region->port_ops->snapshot(port, region->port_ops,
+						 info->extack, &data);
+	else
+		err = region->ops->snapshot(devlink, region->ops,
+					    info->extack, &data);
+	if (err)
+		goto err_snapshot_capture;
+
+	err = __devlink_region_snapshot_create(region, data, snapshot_id);
+	if (err)
+		goto err_snapshot_create;
+
+	if (!snapshot_id_attr) {
+		struct sk_buff *msg;
+
+		snapshot = devlink_region_snapshot_get_by_id(region,
+							     snapshot_id);
+		if (WARN_ON(!snapshot)) {
+			err = -EINVAL;
+			goto unlock;
+		}
+
+		msg = devlink_nl_region_notify_build(region, snapshot,
+						     DEVLINK_CMD_REGION_NEW,
+						     info->snd_portid,
+						     info->snd_seq);
+		err = PTR_ERR_OR_ZERO(msg);
+		if (err)
+			goto err_notify;
+
+		err = genlmsg_reply(msg, info);
+		if (err)
+			goto err_notify;
+	}
+
+	mutex_unlock(&region->snapshot_lock);
+	return 0;
+
+err_snapshot_create:
+	region->ops->destructor(data);
+err_snapshot_capture:
+	__devlink_snapshot_id_decrement(devlink, snapshot_id);
+	mutex_unlock(&region->snapshot_lock);
+	return err;
+
+err_notify:
+	devlink_region_snapshot_del(region, snapshot);
+unlock:
+	mutex_unlock(&region->snapshot_lock);
+	return err;
+}
+
+static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
+						 u8 *chunk, u32 chunk_size,
+						 u64 addr)
+{
+	struct nlattr *chunk_attr;
+	int err;
+
+	chunk_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_CHUNK);
+	if (!chunk_attr)
+		return -EINVAL;
+
+	err = nla_put(msg, DEVLINK_ATTR_REGION_CHUNK_DATA, chunk_size, chunk);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr,
+				DEVLINK_ATTR_PAD);
+	if (err)
+		goto nla_put_failure;
+
+	nla_nest_end(msg, chunk_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, chunk_attr);
+	return err;
+}
+
+#define DEVLINK_REGION_READ_CHUNK_SIZE 256
+
+typedef int devlink_chunk_fill_t(void *cb_priv, u8 *chunk, u32 chunk_size,
+				 u64 curr_offset,
+				 struct netlink_ext_ack *extack);
+
+static int
+devlink_nl_region_read_fill(struct sk_buff *skb, devlink_chunk_fill_t *cb,
+			    void *cb_priv, u64 start_offset, u64 end_offset,
+			    u64 *new_offset, struct netlink_ext_ack *extack)
+{
+	u64 curr_offset = start_offset;
+	int err = 0;
+	u8 *data;
+
+	/* Allocate and re-use a single buffer */
+	data = kmalloc(DEVLINK_REGION_READ_CHUNK_SIZE, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	*new_offset = start_offset;
+
+	while (curr_offset < end_offset) {
+		u32 data_size;
+
+		data_size = min_t(u32, end_offset - curr_offset,
+				  DEVLINK_REGION_READ_CHUNK_SIZE);
+
+		err = cb(cb_priv, data, data_size, curr_offset, extack);
+		if (err)
+			break;
+
+		err = devlink_nl_cmd_region_read_chunk_fill(skb, data, data_size, curr_offset);
+		if (err)
+			break;
+
+		curr_offset += data_size;
+	}
+	*new_offset = curr_offset;
+
+	kfree(data);
+
+	return err;
+}
+
+static int
+devlink_region_snapshot_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+			     u64 curr_offset,
+			     struct netlink_ext_ack __always_unused *extack)
+{
+	struct devlink_snapshot *snapshot = cb_priv;
+
+	memcpy(chunk, &snapshot->data[curr_offset], chunk_size);
+
+	return 0;
+}
+
+static int
+devlink_region_port_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+				u64 curr_offset, struct netlink_ext_ack *extack)
+{
+	struct devlink_region *region = cb_priv;
+
+	return region->port_ops->read(region->port, region->port_ops, extack,
+				      curr_offset, chunk_size, chunk);
+}
+
+static int
+devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+			   u64 curr_offset, struct netlink_ext_ack *extack)
+{
+	struct devlink_region *region = cb_priv;
+
+	return region->ops->read(region->devlink, region->ops, extack,
+				 curr_offset, chunk_size, chunk);
+}
+
+int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
+				      struct netlink_callback *cb)
+{
+	const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct nlattr *chunks_attr, *region_attr, *snapshot_attr;
+	u64 ret_offset, start_offset, end_offset = U64_MAX;
+	struct nlattr **attrs = info->info.attrs;
+	struct devlink_port *port = NULL;
+	devlink_chunk_fill_t *region_cb;
+	struct devlink_region *region;
+	const char *region_name;
+	struct devlink *devlink;
+	unsigned int index;
+	void *region_cb_priv;
+	void *hdr;
+	int err;
+
+	start_offset = state->start_offset;
+
+	devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+	if (IS_ERR(devlink))
+		return PTR_ERR(devlink);
+
+	if (!attrs[DEVLINK_ATTR_REGION_NAME]) {
+		NL_SET_ERR_MSG(cb->extack, "No region name provided");
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
+		index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+		port = devlink_port_get_by_index(devlink, index);
+		if (!port) {
+			err = -ENODEV;
+			goto out_unlock;
+		}
+	}
+
+	region_attr = attrs[DEVLINK_ATTR_REGION_NAME];
+	region_name = nla_data(region_attr);
+
+	if (port)
+		region = devlink_port_region_get_by_name(port, region_name);
+	else
+		region = devlink_region_get_by_name(devlink, region_name);
+
+	if (!region) {
+		NL_SET_ERR_MSG_ATTR(cb->extack, region_attr, "Requested region does not exist");
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	snapshot_attr = attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
+	if (!snapshot_attr) {
+		if (!nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
+			NL_SET_ERR_MSG(cb->extack, "No snapshot id provided");
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		if (!region->ops->read) {
+			NL_SET_ERR_MSG(cb->extack, "Requested region does not support direct read");
+			err = -EOPNOTSUPP;
+			goto out_unlock;
+		}
+
+		if (port)
+			region_cb = &devlink_region_port_direct_fill;
+		else
+			region_cb = &devlink_region_direct_fill;
+		region_cb_priv = region;
+	} else {
+		struct devlink_snapshot *snapshot;
+		u32 snapshot_id;
+
+		if (nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
+			NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Direct region read does not use snapshot");
+			err = -EINVAL;
+			goto out_unlock;
+		}
+
+		snapshot_id = nla_get_u32(snapshot_attr);
+		snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+		if (!snapshot) {
+			NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist");
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		region_cb = &devlink_region_snapshot_fill;
+		region_cb_priv = snapshot;
+	}
+
+	if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
+	    attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
+		if (!start_offset)
+			start_offset =
+				nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+
+		end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+		end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
+	}
+
+	if (end_offset > region->size)
+		end_offset = region->size;
+
+	/* return 0 if there is no further data to read */
+	if (start_offset == end_offset) {
+		err = 0;
+		goto out_unlock;
+	}
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
+			  DEVLINK_CMD_REGION_READ);
+	if (!hdr) {
+		err = -EMSGSIZE;
+		goto out_unlock;
+	}
+
+	err = devlink_nl_put_handle(skb, devlink);
+	if (err)
+		goto nla_put_failure;
+
+	if (region->port) {
+		err = nla_put_u32(skb, DEVLINK_ATTR_PORT_INDEX,
+				  region->port->index);
+		if (err)
+			goto nla_put_failure;
+	}
+
+	err = nla_put_string(skb, DEVLINK_ATTR_REGION_NAME, region_name);
+	if (err)
+		goto nla_put_failure;
+
+	chunks_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_REGION_CHUNKS);
+	if (!chunks_attr) {
+		err = -EMSGSIZE;
+		goto nla_put_failure;
+	}
+
+	err = devlink_nl_region_read_fill(skb, region_cb, region_cb_priv,
+					  start_offset, end_offset, &ret_offset,
+					  cb->extack);
+
+	if (err && err != -EMSGSIZE)
+		goto nla_put_failure;
+
+	/* Check if there was any progress done to prevent infinite loop */
+	if (ret_offset == start_offset) {
+		err = -EINVAL;
+		goto nla_put_failure;
+	}
+
+	state->start_offset = ret_offset;
+
+	nla_nest_end(skb, chunks_attr);
+	genlmsg_end(skb, hdr);
+	devl_unlock(devlink);
+	devlink_put(devlink);
+	return skb->len;
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+out_unlock:
+	devl_unlock(devlink);
+	devlink_put(devlink);
+	return err;
+}
+
+/**
+ * devl_region_create - create a new address region
+ *
+ * @devlink: devlink
+ * @ops: region operations and name
+ * @region_max_snapshots: Maximum supported number of snapshots for region
+ * @region_size: size of region
+ */
+struct devlink_region *devl_region_create(struct devlink *devlink,
+					  const struct devlink_region_ops *ops,
+					  u32 region_max_snapshots,
+					  u64 region_size)
+{
+	struct devlink_region *region;
+
+	devl_assert_locked(devlink);
+
+	if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
+		return ERR_PTR(-EINVAL);
+
+	if (devlink_region_get_by_name(devlink, ops->name))
+		return ERR_PTR(-EEXIST);
+
+	region = kzalloc(sizeof(*region), GFP_KERNEL);
+	if (!region)
+		return ERR_PTR(-ENOMEM);
+
+	region->devlink = devlink;
+	region->max_snapshots = region_max_snapshots;
+	region->ops = ops;
+	region->size = region_size;
+	INIT_LIST_HEAD(&region->snapshot_list);
+	mutex_init(&region->snapshot_lock);
+	list_add_tail(&region->list, &devlink->region_list);
+	devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+	return region;
+}
+EXPORT_SYMBOL_GPL(devl_region_create);
+
+/**
+ *	devlink_region_create - create a new address region
+ *
+ *	@devlink: devlink
+ *	@ops: region operations and name
+ *	@region_max_snapshots: Maximum supported number of snapshots for region
+ *	@region_size: size of region
+ *
+ *	Context: Takes and release devlink->lock <mutex>.
+ */
+struct devlink_region *
+devlink_region_create(struct devlink *devlink,
+		      const struct devlink_region_ops *ops,
+		      u32 region_max_snapshots, u64 region_size)
+{
+	struct devlink_region *region;
+
+	devl_lock(devlink);
+	region = devl_region_create(devlink, ops, region_max_snapshots,
+				    region_size);
+	devl_unlock(devlink);
+	return region;
+}
+EXPORT_SYMBOL_GPL(devlink_region_create);
+
+/**
+ *	devlink_port_region_create - create a new address region for a port
+ *
+ *	@port: devlink port
+ *	@ops: region operations and name
+ *	@region_max_snapshots: Maximum supported number of snapshots for region
+ *	@region_size: size of region
+ *
+ *	Context: Takes and release devlink->lock <mutex>.
+ */
+struct devlink_region *
+devlink_port_region_create(struct devlink_port *port,
+			   const struct devlink_port_region_ops *ops,
+			   u32 region_max_snapshots, u64 region_size)
+{
+	struct devlink *devlink = port->devlink;
+	struct devlink_region *region;
+	int err = 0;
+
+	ASSERT_DEVLINK_PORT_INITIALIZED(port);
+
+	if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
+		return ERR_PTR(-EINVAL);
+
+	devl_lock(devlink);
+
+	if (devlink_port_region_get_by_name(port, ops->name)) {
+		err = -EEXIST;
+		goto unlock;
+	}
+
+	region = kzalloc(sizeof(*region), GFP_KERNEL);
+	if (!region) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	region->devlink = devlink;
+	region->port = port;
+	region->max_snapshots = region_max_snapshots;
+	region->port_ops = ops;
+	region->size = region_size;
+	INIT_LIST_HEAD(&region->snapshot_list);
+	mutex_init(&region->snapshot_lock);
+	list_add_tail(&region->list, &port->region_list);
+	devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+	devl_unlock(devlink);
+	return region;
+
+unlock:
+	devl_unlock(devlink);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(devlink_port_region_create);
+
+/**
+ * devl_region_destroy - destroy address region
+ *
+ * @region: devlink region to destroy
+ */
+void devl_region_destroy(struct devlink_region *region)
+{
+	struct devlink *devlink = region->devlink;
+	struct devlink_snapshot *snapshot, *ts;
+
+	devl_assert_locked(devlink);
+
+	/* Free all snapshots of region */
+	mutex_lock(&region->snapshot_lock);
+	list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
+		devlink_region_snapshot_del(region, snapshot);
+	mutex_unlock(&region->snapshot_lock);
+
+	list_del(&region->list);
+	mutex_destroy(&region->snapshot_lock);
+
+	devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+	kfree(region);
+}
+EXPORT_SYMBOL_GPL(devl_region_destroy);
+
+/**
+ *	devlink_region_destroy - destroy address region
+ *
+ *	@region: devlink region to destroy
+ *
+ *	Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_region_destroy(struct devlink_region *region)
+{
+	struct devlink *devlink = region->devlink;
+
+	devl_lock(devlink);
+	devl_region_destroy(region);
+	devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_region_destroy);
+
+/**
+ *	devlink_region_snapshot_id_get - get snapshot ID
+ *
+ *	This callback should be called when adding a new snapshot,
+ *	Driver should use the same id for multiple snapshots taken
+ *	on multiple regions at the same time/by the same trigger.
+ *
+ *	The caller of this function must use devlink_region_snapshot_id_put
+ *	when finished creating regions using this id.
+ *
+ *	Returns zero on success, or a negative error code on failure.
+ *
+ *	@devlink: devlink
+ *	@id: storage to return id
+ */
+int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
+{
+	return __devlink_region_snapshot_id_get(devlink, id);
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
+
+/**
+ *	devlink_region_snapshot_id_put - put snapshot ID reference
+ *
+ *	This should be called by a driver after finishing creating snapshots
+ *	with an id. Doing so ensures that the ID can later be released in the
+ *	event that all snapshots using it have been destroyed.
+ *
+ *	@devlink: devlink
+ *	@id: id to release reference on
+ */
+void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id)
+{
+	__devlink_snapshot_id_decrement(devlink, id);
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put);
+
+/**
+ *	devlink_region_snapshot_create - create a new snapshot
+ *	This will add a new snapshot of a region. The snapshot
+ *	will be stored on the region struct and can be accessed
+ *	from devlink. This is useful for future analyses of snapshots.
+ *	Multiple snapshots can be created on a region.
+ *	The @snapshot_id should be obtained using the getter function.
+ *
+ *	@region: devlink region of the snapshot
+ *	@data: snapshot data
+ *	@snapshot_id: snapshot id to be created
+ */
+int devlink_region_snapshot_create(struct devlink_region *region,
+				   u8 *data, u32 snapshot_id)
+{
+	int err;
+
+	mutex_lock(&region->snapshot_lock);
+	err = __devlink_region_snapshot_create(region, data, snapshot_id);
+	mutex_unlock(&region->snapshot_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);

From 85facf94fd804ef557d5d844b5253859adef10d5 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:51 +0200
Subject: [PATCH 2005/2047] devlink: use tracepoint_enabled() helper

In preparation for the trap code move, use tracepoint_enabled() helper
instead of trace_devlink_trap_report_enabled() which would not be
defined in that scope.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-10-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/leftover.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 6d31aaee8ce8..1db3a7928465 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -3131,7 +3131,7 @@ void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
 	devlink_trap_stats_update(trap_item->stats, skb->len);
 	devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
 
-	if (trace_devlink_trap_report_enabled()) {
+	if (tracepoint_enabled(devlink_trap_report)) {
 		struct devlink_trap_metadata metadata = {};
 
 		devlink_trap_report_metadata_set(&metadata, trap_item,

From 4bbdec80ff270a69b6b61644c597ad6657f2a58b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:52 +0200
Subject: [PATCH 2006/2047] devlink: push trap related code into separate file

Cut out another chunk from leftover.c and put trap related code
into a separate file.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-11-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/Makefile        |    2 +-
 net/devlink/devl_internal.h |   11 +
 net/devlink/leftover.c      | 1861 -----------------------------------
 net/devlink/trap.c          | 1861 +++++++++++++++++++++++++++++++++++
 4 files changed, 1873 insertions(+), 1862 deletions(-)
 create mode 100644 net/devlink/trap.c

diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index 744f64e467b7..523efffe522c 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
-	 resource.o param.o region.o health.o
+	 resource.o param.o region.o health.o trap.o
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 32b66233240f..148525aa2847 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -160,6 +160,12 @@ void devlink_params_notify_register(struct devlink *devlink);
 void devlink_params_notify_unregister(struct devlink *devlink);
 void devlink_regions_notify_register(struct devlink *devlink);
 void devlink_regions_notify_unregister(struct devlink *devlink);
+void devlink_trap_policers_notify_register(struct devlink *devlink);
+void devlink_trap_policers_notify_unregister(struct devlink *devlink);
+void devlink_trap_groups_notify_register(struct devlink *devlink);
+void devlink_trap_groups_notify_unregister(struct devlink *devlink);
+void devlink_traps_notify_register(struct devlink *devlink);
+void devlink_traps_notify_unregister(struct devlink *devlink);
 
 /* Ports */
 #define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port)				\
@@ -267,3 +273,8 @@ int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
 						   struct genl_info *info);
 int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
 					     struct genl_info *info);
+int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
+				       struct genl_info *info);
+int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
+					 struct genl_info *info);
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 1db3a7928465..5b153ce097ab 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -999,948 +999,6 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
 	return 0;
 }
 
-struct devlink_stats {
-	u64_stats_t rx_bytes;
-	u64_stats_t rx_packets;
-	struct u64_stats_sync syncp;
-};
-
-/**
- * struct devlink_trap_policer_item - Packet trap policer attributes.
- * @policer: Immutable packet trap policer attributes.
- * @rate: Rate in packets / sec.
- * @burst: Burst size in packets.
- * @list: trap_policer_list member.
- *
- * Describes packet trap policer attributes. Created by devlink during trap
- * policer registration.
- */
-struct devlink_trap_policer_item {
-	const struct devlink_trap_policer *policer;
-	u64 rate;
-	u64 burst;
-	struct list_head list;
-};
-
-/**
- * struct devlink_trap_group_item - Packet trap group attributes.
- * @group: Immutable packet trap group attributes.
- * @policer_item: Associated policer item. Can be NULL.
- * @list: trap_group_list member.
- * @stats: Trap group statistics.
- *
- * Describes packet trap group attributes. Created by devlink during trap
- * group registration.
- */
-struct devlink_trap_group_item {
-	const struct devlink_trap_group *group;
-	struct devlink_trap_policer_item *policer_item;
-	struct list_head list;
-	struct devlink_stats __percpu *stats;
-};
-
-/**
- * struct devlink_trap_item - Packet trap attributes.
- * @trap: Immutable packet trap attributes.
- * @group_item: Associated group item.
- * @list: trap_list member.
- * @action: Trap action.
- * @stats: Trap statistics.
- * @priv: Driver private information.
- *
- * Describes both mutable and immutable packet trap attributes. Created by
- * devlink during trap registration and used for all trap related operations.
- */
-struct devlink_trap_item {
-	const struct devlink_trap *trap;
-	struct devlink_trap_group_item *group_item;
-	struct list_head list;
-	enum devlink_trap_action action;
-	struct devlink_stats __percpu *stats;
-	void *priv;
-};
-
-static struct devlink_trap_policer_item *
-devlink_trap_policer_item_lookup(struct devlink *devlink, u32 id)
-{
-	struct devlink_trap_policer_item *policer_item;
-
-	list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
-		if (policer_item->policer->id == id)
-			return policer_item;
-	}
-
-	return NULL;
-}
-
-static struct devlink_trap_item *
-devlink_trap_item_lookup(struct devlink *devlink, const char *name)
-{
-	struct devlink_trap_item *trap_item;
-
-	list_for_each_entry(trap_item, &devlink->trap_list, list) {
-		if (!strcmp(trap_item->trap->name, name))
-			return trap_item;
-	}
-
-	return NULL;
-}
-
-static struct devlink_trap_item *
-devlink_trap_item_get_from_info(struct devlink *devlink,
-				struct genl_info *info)
-{
-	struct nlattr *attr;
-
-	if (!info->attrs[DEVLINK_ATTR_TRAP_NAME])
-		return NULL;
-	attr = info->attrs[DEVLINK_ATTR_TRAP_NAME];
-
-	return devlink_trap_item_lookup(devlink, nla_data(attr));
-}
-
-static int
-devlink_trap_action_get_from_info(struct genl_info *info,
-				  enum devlink_trap_action *p_trap_action)
-{
-	u8 val;
-
-	val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]);
-	switch (val) {
-	case DEVLINK_TRAP_ACTION_DROP:
-	case DEVLINK_TRAP_ACTION_TRAP:
-	case DEVLINK_TRAP_ACTION_MIRROR:
-		*p_trap_action = val;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int devlink_trap_metadata_put(struct sk_buff *msg,
-				     const struct devlink_trap *trap)
-{
-	struct nlattr *attr;
-
-	attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA);
-	if (!attr)
-		return -EMSGSIZE;
-
-	if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) &&
-	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT))
-		goto nla_put_failure;
-	if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE) &&
-	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE))
-		goto nla_put_failure;
-
-	nla_nest_end(msg, attr);
-
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(msg, attr);
-	return -EMSGSIZE;
-}
-
-static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
-				    struct devlink_stats *stats)
-{
-	int i;
-
-	memset(stats, 0, sizeof(*stats));
-	for_each_possible_cpu(i) {
-		struct devlink_stats *cpu_stats;
-		u64 rx_packets, rx_bytes;
-		unsigned int start;
-
-		cpu_stats = per_cpu_ptr(trap_stats, i);
-		do {
-			start = u64_stats_fetch_begin(&cpu_stats->syncp);
-			rx_packets = u64_stats_read(&cpu_stats->rx_packets);
-			rx_bytes = u64_stats_read(&cpu_stats->rx_bytes);
-		} while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
-
-		u64_stats_add(&stats->rx_packets, rx_packets);
-		u64_stats_add(&stats->rx_bytes, rx_bytes);
-	}
-}
-
-static int
-devlink_trap_group_stats_put(struct sk_buff *msg,
-			     struct devlink_stats __percpu *trap_stats)
-{
-	struct devlink_stats stats;
-	struct nlattr *attr;
-
-	devlink_trap_stats_read(trap_stats, &stats);
-
-	attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
-	if (!attr)
-		return -EMSGSIZE;
-
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
-			      u64_stats_read(&stats.rx_packets),
-			      DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
-			      u64_stats_read(&stats.rx_bytes),
-			      DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-
-	nla_nest_end(msg, attr);
-
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(msg, attr);
-	return -EMSGSIZE;
-}
-
-static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
-				  const struct devlink_trap_item *trap_item)
-{
-	struct devlink_stats stats;
-	struct nlattr *attr;
-	u64 drops = 0;
-	int err;
-
-	if (devlink->ops->trap_drop_counter_get) {
-		err = devlink->ops->trap_drop_counter_get(devlink,
-							  trap_item->trap,
-							  &drops);
-		if (err)
-			return err;
-	}
-
-	devlink_trap_stats_read(trap_item->stats, &stats);
-
-	attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
-	if (!attr)
-		return -EMSGSIZE;
-
-	if (devlink->ops->trap_drop_counter_get &&
-	    nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
-			      DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
-			      u64_stats_read(&stats.rx_packets),
-			      DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
-			      u64_stats_read(&stats.rx_bytes),
-			      DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-
-	nla_nest_end(msg, attr);
-
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(msg, attr);
-	return -EMSGSIZE;
-}
-
-static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
-				const struct devlink_trap_item *trap_item,
-				enum devlink_command cmd, u32 portid, u32 seq,
-				int flags)
-{
-	struct devlink_trap_group_item *group_item = trap_item->group_item;
-	void *hdr;
-	int err;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-
-	if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
-			   group_item->group->name))
-		goto nla_put_failure;
-
-	if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name))
-		goto nla_put_failure;
-
-	if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type))
-		goto nla_put_failure;
-
-	if (trap_item->trap->generic &&
-	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
-		goto nla_put_failure;
-
-	if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action))
-		goto nla_put_failure;
-
-	err = devlink_trap_metadata_put(msg, trap_item->trap);
-	if (err)
-		goto nla_put_failure;
-
-	err = devlink_trap_stats_put(msg, devlink, trap_item);
-	if (err)
-		goto nla_put_failure;
-
-	genlmsg_end(msg, hdr);
-
-	return 0;
-
-nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
-}
-
-int devlink_nl_trap_get_doit(struct sk_buff *skb, struct genl_info *info)
-{
-	struct netlink_ext_ack *extack = info->extack;
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_trap_item *trap_item;
-	struct sk_buff *msg;
-	int err;
-
-	if (list_empty(&devlink->trap_list))
-		return -EOPNOTSUPP;
-
-	trap_item = devlink_trap_item_get_from_info(devlink, info);
-	if (!trap_item) {
-		NL_SET_ERR_MSG(extack, "Device did not register this trap");
-		return -ENOENT;
-	}
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	err = devlink_nl_trap_fill(msg, devlink, trap_item,
-				   DEVLINK_CMD_TRAP_NEW, info->snd_portid,
-				   info->snd_seq, 0);
-	if (err)
-		goto err_trap_fill;
-
-	return genlmsg_reply(msg, info);
-
-err_trap_fill:
-	nlmsg_free(msg);
-	return err;
-}
-
-static int devlink_nl_trap_get_dump_one(struct sk_buff *msg,
-					struct devlink *devlink,
-					struct netlink_callback *cb, int flags)
-{
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct devlink_trap_item *trap_item;
-	int idx = 0;
-	int err = 0;
-
-	list_for_each_entry(trap_item, &devlink->trap_list, list) {
-		if (idx < state->idx) {
-			idx++;
-			continue;
-		}
-		err = devlink_nl_trap_fill(msg, devlink, trap_item,
-					   DEVLINK_CMD_TRAP_NEW,
-					   NETLINK_CB(cb->skb).portid,
-					   cb->nlh->nlmsg_seq, flags);
-		if (err) {
-			state->idx = idx;
-			break;
-		}
-		idx++;
-	}
-
-	return err;
-}
-
-int devlink_nl_trap_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	return devlink_nl_dumpit(skb, cb, devlink_nl_trap_get_dump_one);
-}
-
-static int __devlink_trap_action_set(struct devlink *devlink,
-				     struct devlink_trap_item *trap_item,
-				     enum devlink_trap_action trap_action,
-				     struct netlink_ext_ack *extack)
-{
-	int err;
-
-	if (trap_item->action != trap_action &&
-	    trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
-		NL_SET_ERR_MSG(extack, "Cannot change action of non-drop traps. Skipping");
-		return 0;
-	}
-
-	err = devlink->ops->trap_action_set(devlink, trap_item->trap,
-					    trap_action, extack);
-	if (err)
-		return err;
-
-	trap_item->action = trap_action;
-
-	return 0;
-}
-
-static int devlink_trap_action_set(struct devlink *devlink,
-				   struct devlink_trap_item *trap_item,
-				   struct genl_info *info)
-{
-	enum devlink_trap_action trap_action;
-	int err;
-
-	if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
-		return 0;
-
-	err = devlink_trap_action_get_from_info(info, &trap_action);
-	if (err) {
-		NL_SET_ERR_MSG(info->extack, "Invalid trap action");
-		return -EINVAL;
-	}
-
-	return __devlink_trap_action_set(devlink, trap_item, trap_action,
-					 info->extack);
-}
-
-static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb,
-					struct genl_info *info)
-{
-	struct netlink_ext_ack *extack = info->extack;
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_trap_item *trap_item;
-
-	if (list_empty(&devlink->trap_list))
-		return -EOPNOTSUPP;
-
-	trap_item = devlink_trap_item_get_from_info(devlink, info);
-	if (!trap_item) {
-		NL_SET_ERR_MSG(extack, "Device did not register this trap");
-		return -ENOENT;
-	}
-
-	return devlink_trap_action_set(devlink, trap_item, info);
-}
-
-static struct devlink_trap_group_item *
-devlink_trap_group_item_lookup(struct devlink *devlink, const char *name)
-{
-	struct devlink_trap_group_item *group_item;
-
-	list_for_each_entry(group_item, &devlink->trap_group_list, list) {
-		if (!strcmp(group_item->group->name, name))
-			return group_item;
-	}
-
-	return NULL;
-}
-
-static struct devlink_trap_group_item *
-devlink_trap_group_item_lookup_by_id(struct devlink *devlink, u16 id)
-{
-	struct devlink_trap_group_item *group_item;
-
-	list_for_each_entry(group_item, &devlink->trap_group_list, list) {
-		if (group_item->group->id == id)
-			return group_item;
-	}
-
-	return NULL;
-}
-
-static struct devlink_trap_group_item *
-devlink_trap_group_item_get_from_info(struct devlink *devlink,
-				      struct genl_info *info)
-{
-	char *name;
-
-	if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME])
-		return NULL;
-	name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]);
-
-	return devlink_trap_group_item_lookup(devlink, name);
-}
-
-static int
-devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
-			   const struct devlink_trap_group_item *group_item,
-			   enum devlink_command cmd, u32 portid, u32 seq,
-			   int flags)
-{
-	void *hdr;
-	int err;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-
-	if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
-			   group_item->group->name))
-		goto nla_put_failure;
-
-	if (group_item->group->generic &&
-	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
-		goto nla_put_failure;
-
-	if (group_item->policer_item &&
-	    nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
-			group_item->policer_item->policer->id))
-		goto nla_put_failure;
-
-	err = devlink_trap_group_stats_put(msg, group_item->stats);
-	if (err)
-		goto nla_put_failure;
-
-	genlmsg_end(msg, hdr);
-
-	return 0;
-
-nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
-}
-
-int devlink_nl_trap_group_get_doit(struct sk_buff *skb, struct genl_info *info)
-{
-	struct netlink_ext_ack *extack = info->extack;
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_trap_group_item *group_item;
-	struct sk_buff *msg;
-	int err;
-
-	if (list_empty(&devlink->trap_group_list))
-		return -EOPNOTSUPP;
-
-	group_item = devlink_trap_group_item_get_from_info(devlink, info);
-	if (!group_item) {
-		NL_SET_ERR_MSG(extack, "Device did not register this trap group");
-		return -ENOENT;
-	}
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	err = devlink_nl_trap_group_fill(msg, devlink, group_item,
-					 DEVLINK_CMD_TRAP_GROUP_NEW,
-					 info->snd_portid, info->snd_seq, 0);
-	if (err)
-		goto err_trap_group_fill;
-
-	return genlmsg_reply(msg, info);
-
-err_trap_group_fill:
-	nlmsg_free(msg);
-	return err;
-}
-
-static int devlink_nl_trap_group_get_dump_one(struct sk_buff *msg,
-					      struct devlink *devlink,
-					      struct netlink_callback *cb,
-					      int flags)
-{
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct devlink_trap_group_item *group_item;
-	int idx = 0;
-	int err = 0;
-
-
-	list_for_each_entry(group_item, &devlink->trap_group_list, list) {
-		if (idx < state->idx) {
-			idx++;
-			continue;
-		}
-		err = devlink_nl_trap_group_fill(msg, devlink, group_item,
-						 DEVLINK_CMD_TRAP_GROUP_NEW,
-						 NETLINK_CB(cb->skb).portid,
-						 cb->nlh->nlmsg_seq, flags);
-		if (err) {
-			state->idx = idx;
-			break;
-		}
-		idx++;
-	}
-
-	return err;
-}
-
-int devlink_nl_trap_group_get_dumpit(struct sk_buff *skb,
-				     struct netlink_callback *cb)
-{
-	return devlink_nl_dumpit(skb, cb, devlink_nl_trap_group_get_dump_one);
-}
-
-static int
-__devlink_trap_group_action_set(struct devlink *devlink,
-				struct devlink_trap_group_item *group_item,
-				enum devlink_trap_action trap_action,
-				struct netlink_ext_ack *extack)
-{
-	const char *group_name = group_item->group->name;
-	struct devlink_trap_item *trap_item;
-	int err;
-
-	if (devlink->ops->trap_group_action_set) {
-		err = devlink->ops->trap_group_action_set(devlink, group_item->group,
-							  trap_action, extack);
-		if (err)
-			return err;
-
-		list_for_each_entry(trap_item, &devlink->trap_list, list) {
-			if (strcmp(trap_item->group_item->group->name, group_name))
-				continue;
-			if (trap_item->action != trap_action &&
-			    trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP)
-				continue;
-			trap_item->action = trap_action;
-		}
-
-		return 0;
-	}
-
-	list_for_each_entry(trap_item, &devlink->trap_list, list) {
-		if (strcmp(trap_item->group_item->group->name, group_name))
-			continue;
-		err = __devlink_trap_action_set(devlink, trap_item,
-						trap_action, extack);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int
-devlink_trap_group_action_set(struct devlink *devlink,
-			      struct devlink_trap_group_item *group_item,
-			      struct genl_info *info, bool *p_modified)
-{
-	enum devlink_trap_action trap_action;
-	int err;
-
-	if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
-		return 0;
-
-	err = devlink_trap_action_get_from_info(info, &trap_action);
-	if (err) {
-		NL_SET_ERR_MSG(info->extack, "Invalid trap action");
-		return -EINVAL;
-	}
-
-	err = __devlink_trap_group_action_set(devlink, group_item, trap_action,
-					      info->extack);
-	if (err)
-		return err;
-
-	*p_modified = true;
-
-	return 0;
-}
-
-static int devlink_trap_group_set(struct devlink *devlink,
-				  struct devlink_trap_group_item *group_item,
-				  struct genl_info *info)
-{
-	struct devlink_trap_policer_item *policer_item;
-	struct netlink_ext_ack *extack = info->extack;
-	const struct devlink_trap_policer *policer;
-	struct nlattr **attrs = info->attrs;
-	u32 policer_id;
-	int err;
-
-	if (!attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
-		return 0;
-
-	if (!devlink->ops->trap_group_set)
-		return -EOPNOTSUPP;
-
-	policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
-	policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
-	if (policer_id && !policer_item) {
-		NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
-		return -ENOENT;
-	}
-	policer = policer_item ? policer_item->policer : NULL;
-
-	err = devlink->ops->trap_group_set(devlink, group_item->group, policer,
-					   extack);
-	if (err)
-		return err;
-
-	group_item->policer_item = policer_item;
-
-	return 0;
-}
-
-static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
-					      struct genl_info *info)
-{
-	struct netlink_ext_ack *extack = info->extack;
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_trap_group_item *group_item;
-	bool modified = false;
-	int err;
-
-	if (list_empty(&devlink->trap_group_list))
-		return -EOPNOTSUPP;
-
-	group_item = devlink_trap_group_item_get_from_info(devlink, info);
-	if (!group_item) {
-		NL_SET_ERR_MSG(extack, "Device did not register this trap group");
-		return -ENOENT;
-	}
-
-	err = devlink_trap_group_action_set(devlink, group_item, info,
-					    &modified);
-	if (err)
-		return err;
-
-	err = devlink_trap_group_set(devlink, group_item, info);
-	if (err)
-		goto err_trap_group_set;
-
-	return 0;
-
-err_trap_group_set:
-	if (modified)
-		NL_SET_ERR_MSG(extack, "Trap group set failed, but some changes were committed already");
-	return err;
-}
-
-static struct devlink_trap_policer_item *
-devlink_trap_policer_item_get_from_info(struct devlink *devlink,
-					struct genl_info *info)
-{
-	u32 id;
-
-	if (!info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
-		return NULL;
-	id = nla_get_u32(info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
-
-	return devlink_trap_policer_item_lookup(devlink, id);
-}
-
-static int
-devlink_trap_policer_stats_put(struct sk_buff *msg, struct devlink *devlink,
-			       const struct devlink_trap_policer *policer)
-{
-	struct nlattr *attr;
-	u64 drops;
-	int err;
-
-	if (!devlink->ops->trap_policer_counter_get)
-		return 0;
-
-	err = devlink->ops->trap_policer_counter_get(devlink, policer, &drops);
-	if (err)
-		return err;
-
-	attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
-	if (!attr)
-		return -EMSGSIZE;
-
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
-			      DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-
-	nla_nest_end(msg, attr);
-
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(msg, attr);
-	return -EMSGSIZE;
-}
-
-static int
-devlink_nl_trap_policer_fill(struct sk_buff *msg, struct devlink *devlink,
-			     const struct devlink_trap_policer_item *policer_item,
-			     enum devlink_command cmd, u32 portid, u32 seq,
-			     int flags)
-{
-	void *hdr;
-	int err;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-
-	if (nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
-			policer_item->policer->id))
-		goto nla_put_failure;
-
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_RATE,
-			      policer_item->rate, DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_BURST,
-			      policer_item->burst, DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-
-	err = devlink_trap_policer_stats_put(msg, devlink,
-					     policer_item->policer);
-	if (err)
-		goto nla_put_failure;
-
-	genlmsg_end(msg, hdr);
-
-	return 0;
-
-nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
-}
-
-int devlink_nl_trap_policer_get_doit(struct sk_buff *skb,
-				     struct genl_info *info)
-{
-	struct devlink_trap_policer_item *policer_item;
-	struct netlink_ext_ack *extack = info->extack;
-	struct devlink *devlink = info->user_ptr[0];
-	struct sk_buff *msg;
-	int err;
-
-	if (list_empty(&devlink->trap_policer_list))
-		return -EOPNOTSUPP;
-
-	policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
-	if (!policer_item) {
-		NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
-		return -ENOENT;
-	}
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
-					   DEVLINK_CMD_TRAP_POLICER_NEW,
-					   info->snd_portid, info->snd_seq, 0);
-	if (err)
-		goto err_trap_policer_fill;
-
-	return genlmsg_reply(msg, info);
-
-err_trap_policer_fill:
-	nlmsg_free(msg);
-	return err;
-}
-
-static int devlink_nl_trap_policer_get_dump_one(struct sk_buff *msg,
-						struct devlink *devlink,
-						struct netlink_callback *cb,
-						int flags)
-{
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct devlink_trap_policer_item *policer_item;
-	int idx = 0;
-	int err = 0;
-
-	list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
-		if (idx < state->idx) {
-			idx++;
-			continue;
-		}
-		err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
-						   DEVLINK_CMD_TRAP_POLICER_NEW,
-						   NETLINK_CB(cb->skb).portid,
-						   cb->nlh->nlmsg_seq, flags);
-		if (err) {
-			state->idx = idx;
-			break;
-		}
-		idx++;
-	}
-
-	return err;
-}
-
-int devlink_nl_trap_policer_get_dumpit(struct sk_buff *skb,
-				       struct netlink_callback *cb)
-{
-	return devlink_nl_dumpit(skb, cb, devlink_nl_trap_policer_get_dump_one);
-}
-
-static int
-devlink_trap_policer_set(struct devlink *devlink,
-			 struct devlink_trap_policer_item *policer_item,
-			 struct genl_info *info)
-{
-	struct netlink_ext_ack *extack = info->extack;
-	struct nlattr **attrs = info->attrs;
-	u64 rate, burst;
-	int err;
-
-	rate = policer_item->rate;
-	burst = policer_item->burst;
-
-	if (attrs[DEVLINK_ATTR_TRAP_POLICER_RATE])
-		rate = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_RATE]);
-
-	if (attrs[DEVLINK_ATTR_TRAP_POLICER_BURST])
-		burst = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_BURST]);
-
-	if (rate < policer_item->policer->min_rate) {
-		NL_SET_ERR_MSG(extack, "Policer rate lower than limit");
-		return -EINVAL;
-	}
-
-	if (rate > policer_item->policer->max_rate) {
-		NL_SET_ERR_MSG(extack, "Policer rate higher than limit");
-		return -EINVAL;
-	}
-
-	if (burst < policer_item->policer->min_burst) {
-		NL_SET_ERR_MSG(extack, "Policer burst size lower than limit");
-		return -EINVAL;
-	}
-
-	if (burst > policer_item->policer->max_burst) {
-		NL_SET_ERR_MSG(extack, "Policer burst size higher than limit");
-		return -EINVAL;
-	}
-
-	err = devlink->ops->trap_policer_set(devlink, policer_item->policer,
-					     rate, burst, info->extack);
-	if (err)
-		return err;
-
-	policer_item->rate = rate;
-	policer_item->burst = burst;
-
-	return 0;
-}
-
-static int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
-						struct genl_info *info)
-{
-	struct devlink_trap_policer_item *policer_item;
-	struct netlink_ext_ack *extack = info->extack;
-	struct devlink *devlink = info->user_ptr[0];
-
-	if (list_empty(&devlink->trap_policer_list))
-		return -EOPNOTSUPP;
-
-	if (!devlink->ops->trap_policer_set)
-		return -EOPNOTSUPP;
-
-	policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
-	if (!policer_item) {
-		NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
-		return -ENOENT;
-	}
-
-	return devlink_trap_policer_set(devlink, policer_item, info);
-}
-
 const struct genl_small_ops devlink_nl_small_ops[40] = {
 	{
 		.cmd = DEVLINK_CMD_PORT_SET,
@@ -2192,13 +1250,6 @@ const struct genl_small_ops devlink_nl_small_ops[40] = {
 	/* -- No new ops here! Use split ops going forward! -- */
 };
 
-static void devlink_trap_policers_notify_register(struct devlink *devlink);
-static void devlink_trap_policers_notify_unregister(struct devlink *devlink);
-static void devlink_trap_groups_notify_register(struct devlink *devlink);
-static void devlink_trap_groups_notify_unregister(struct devlink *devlink);
-static void devlink_traps_notify_register(struct devlink *devlink);
-static void devlink_traps_notify_unregister(struct devlink *devlink);
-
 void devlink_notify_register(struct devlink *devlink)
 {
 	devlink_notify(devlink, DEVLINK_CMD_NEW);
@@ -2579,915 +1630,3 @@ void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
 	mutex_unlock(&linecard->state_lock);
 }
 EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
-
-#define DEVLINK_TRAP(_id, _type)					      \
-	{								      \
-		.type = DEVLINK_TRAP_TYPE_##_type,			      \
-		.id = DEVLINK_TRAP_GENERIC_ID_##_id,			      \
-		.name = DEVLINK_TRAP_GENERIC_NAME_##_id,		      \
-	}
-
-static const struct devlink_trap devlink_trap_generic[] = {
-	DEVLINK_TRAP(SMAC_MC, DROP),
-	DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP),
-	DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP),
-	DEVLINK_TRAP(INGRESS_STP_FILTER, DROP),
-	DEVLINK_TRAP(EMPTY_TX_LIST, DROP),
-	DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP),
-	DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP),
-	DEVLINK_TRAP(TTL_ERROR, EXCEPTION),
-	DEVLINK_TRAP(TAIL_DROP, DROP),
-	DEVLINK_TRAP(NON_IP_PACKET, DROP),
-	DEVLINK_TRAP(UC_DIP_MC_DMAC, DROP),
-	DEVLINK_TRAP(DIP_LB, DROP),
-	DEVLINK_TRAP(SIP_MC, DROP),
-	DEVLINK_TRAP(SIP_LB, DROP),
-	DEVLINK_TRAP(CORRUPTED_IP_HDR, DROP),
-	DEVLINK_TRAP(IPV4_SIP_BC, DROP),
-	DEVLINK_TRAP(IPV6_MC_DIP_RESERVED_SCOPE, DROP),
-	DEVLINK_TRAP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, DROP),
-	DEVLINK_TRAP(MTU_ERROR, EXCEPTION),
-	DEVLINK_TRAP(UNRESOLVED_NEIGH, EXCEPTION),
-	DEVLINK_TRAP(RPF, EXCEPTION),
-	DEVLINK_TRAP(REJECT_ROUTE, EXCEPTION),
-	DEVLINK_TRAP(IPV4_LPM_UNICAST_MISS, EXCEPTION),
-	DEVLINK_TRAP(IPV6_LPM_UNICAST_MISS, EXCEPTION),
-	DEVLINK_TRAP(NON_ROUTABLE, DROP),
-	DEVLINK_TRAP(DECAP_ERROR, EXCEPTION),
-	DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP),
-	DEVLINK_TRAP(INGRESS_FLOW_ACTION_DROP, DROP),
-	DEVLINK_TRAP(EGRESS_FLOW_ACTION_DROP, DROP),
-	DEVLINK_TRAP(STP, CONTROL),
-	DEVLINK_TRAP(LACP, CONTROL),
-	DEVLINK_TRAP(LLDP, CONTROL),
-	DEVLINK_TRAP(IGMP_QUERY, CONTROL),
-	DEVLINK_TRAP(IGMP_V1_REPORT, CONTROL),
-	DEVLINK_TRAP(IGMP_V2_REPORT, CONTROL),
-	DEVLINK_TRAP(IGMP_V3_REPORT, CONTROL),
-	DEVLINK_TRAP(IGMP_V2_LEAVE, CONTROL),
-	DEVLINK_TRAP(MLD_QUERY, CONTROL),
-	DEVLINK_TRAP(MLD_V1_REPORT, CONTROL),
-	DEVLINK_TRAP(MLD_V2_REPORT, CONTROL),
-	DEVLINK_TRAP(MLD_V1_DONE, CONTROL),
-	DEVLINK_TRAP(IPV4_DHCP, CONTROL),
-	DEVLINK_TRAP(IPV6_DHCP, CONTROL),
-	DEVLINK_TRAP(ARP_REQUEST, CONTROL),
-	DEVLINK_TRAP(ARP_RESPONSE, CONTROL),
-	DEVLINK_TRAP(ARP_OVERLAY, CONTROL),
-	DEVLINK_TRAP(IPV6_NEIGH_SOLICIT, CONTROL),
-	DEVLINK_TRAP(IPV6_NEIGH_ADVERT, CONTROL),
-	DEVLINK_TRAP(IPV4_BFD, CONTROL),
-	DEVLINK_TRAP(IPV6_BFD, CONTROL),
-	DEVLINK_TRAP(IPV4_OSPF, CONTROL),
-	DEVLINK_TRAP(IPV6_OSPF, CONTROL),
-	DEVLINK_TRAP(IPV4_BGP, CONTROL),
-	DEVLINK_TRAP(IPV6_BGP, CONTROL),
-	DEVLINK_TRAP(IPV4_VRRP, CONTROL),
-	DEVLINK_TRAP(IPV6_VRRP, CONTROL),
-	DEVLINK_TRAP(IPV4_PIM, CONTROL),
-	DEVLINK_TRAP(IPV6_PIM, CONTROL),
-	DEVLINK_TRAP(UC_LB, CONTROL),
-	DEVLINK_TRAP(LOCAL_ROUTE, CONTROL),
-	DEVLINK_TRAP(EXTERNAL_ROUTE, CONTROL),
-	DEVLINK_TRAP(IPV6_UC_DIP_LINK_LOCAL_SCOPE, CONTROL),
-	DEVLINK_TRAP(IPV6_DIP_ALL_NODES, CONTROL),
-	DEVLINK_TRAP(IPV6_DIP_ALL_ROUTERS, CONTROL),
-	DEVLINK_TRAP(IPV6_ROUTER_SOLICIT, CONTROL),
-	DEVLINK_TRAP(IPV6_ROUTER_ADVERT, CONTROL),
-	DEVLINK_TRAP(IPV6_REDIRECT, CONTROL),
-	DEVLINK_TRAP(IPV4_ROUTER_ALERT, CONTROL),
-	DEVLINK_TRAP(IPV6_ROUTER_ALERT, CONTROL),
-	DEVLINK_TRAP(PTP_EVENT, CONTROL),
-	DEVLINK_TRAP(PTP_GENERAL, CONTROL),
-	DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL),
-	DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL),
-	DEVLINK_TRAP(EARLY_DROP, DROP),
-	DEVLINK_TRAP(VXLAN_PARSING, DROP),
-	DEVLINK_TRAP(LLC_SNAP_PARSING, DROP),
-	DEVLINK_TRAP(VLAN_PARSING, DROP),
-	DEVLINK_TRAP(PPPOE_PPP_PARSING, DROP),
-	DEVLINK_TRAP(MPLS_PARSING, DROP),
-	DEVLINK_TRAP(ARP_PARSING, DROP),
-	DEVLINK_TRAP(IP_1_PARSING, DROP),
-	DEVLINK_TRAP(IP_N_PARSING, DROP),
-	DEVLINK_TRAP(GRE_PARSING, DROP),
-	DEVLINK_TRAP(UDP_PARSING, DROP),
-	DEVLINK_TRAP(TCP_PARSING, DROP),
-	DEVLINK_TRAP(IPSEC_PARSING, DROP),
-	DEVLINK_TRAP(SCTP_PARSING, DROP),
-	DEVLINK_TRAP(DCCP_PARSING, DROP),
-	DEVLINK_TRAP(GTP_PARSING, DROP),
-	DEVLINK_TRAP(ESP_PARSING, DROP),
-	DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP),
-	DEVLINK_TRAP(DMAC_FILTER, DROP),
-	DEVLINK_TRAP(EAPOL, CONTROL),
-	DEVLINK_TRAP(LOCKED_PORT, DROP),
-};
-
-#define DEVLINK_TRAP_GROUP(_id)						      \
-	{								      \
-		.id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id,		      \
-		.name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id,		      \
-	}
-
-static const struct devlink_trap_group devlink_trap_group_generic[] = {
-	DEVLINK_TRAP_GROUP(L2_DROPS),
-	DEVLINK_TRAP_GROUP(L3_DROPS),
-	DEVLINK_TRAP_GROUP(L3_EXCEPTIONS),
-	DEVLINK_TRAP_GROUP(BUFFER_DROPS),
-	DEVLINK_TRAP_GROUP(TUNNEL_DROPS),
-	DEVLINK_TRAP_GROUP(ACL_DROPS),
-	DEVLINK_TRAP_GROUP(STP),
-	DEVLINK_TRAP_GROUP(LACP),
-	DEVLINK_TRAP_GROUP(LLDP),
-	DEVLINK_TRAP_GROUP(MC_SNOOPING),
-	DEVLINK_TRAP_GROUP(DHCP),
-	DEVLINK_TRAP_GROUP(NEIGH_DISCOVERY),
-	DEVLINK_TRAP_GROUP(BFD),
-	DEVLINK_TRAP_GROUP(OSPF),
-	DEVLINK_TRAP_GROUP(BGP),
-	DEVLINK_TRAP_GROUP(VRRP),
-	DEVLINK_TRAP_GROUP(PIM),
-	DEVLINK_TRAP_GROUP(UC_LB),
-	DEVLINK_TRAP_GROUP(LOCAL_DELIVERY),
-	DEVLINK_TRAP_GROUP(EXTERNAL_DELIVERY),
-	DEVLINK_TRAP_GROUP(IPV6),
-	DEVLINK_TRAP_GROUP(PTP_EVENT),
-	DEVLINK_TRAP_GROUP(PTP_GENERAL),
-	DEVLINK_TRAP_GROUP(ACL_SAMPLE),
-	DEVLINK_TRAP_GROUP(ACL_TRAP),
-	DEVLINK_TRAP_GROUP(PARSER_ERROR_DROPS),
-	DEVLINK_TRAP_GROUP(EAPOL),
-};
-
-static int devlink_trap_generic_verify(const struct devlink_trap *trap)
-{
-	if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX)
-		return -EINVAL;
-
-	if (strcmp(trap->name, devlink_trap_generic[trap->id].name))
-		return -EINVAL;
-
-	if (trap->type != devlink_trap_generic[trap->id].type)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int devlink_trap_driver_verify(const struct devlink_trap *trap)
-{
-	int i;
-
-	if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX)
-		return -EINVAL;
-
-	for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) {
-		if (!strcmp(trap->name, devlink_trap_generic[i].name))
-			return -EEXIST;
-	}
-
-	return 0;
-}
-
-static int devlink_trap_verify(const struct devlink_trap *trap)
-{
-	if (!trap || !trap->name)
-		return -EINVAL;
-
-	if (trap->generic)
-		return devlink_trap_generic_verify(trap);
-	else
-		return devlink_trap_driver_verify(trap);
-}
-
-static int
-devlink_trap_group_generic_verify(const struct devlink_trap_group *group)
-{
-	if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
-		return -EINVAL;
-
-	if (strcmp(group->name, devlink_trap_group_generic[group->id].name))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int
-devlink_trap_group_driver_verify(const struct devlink_trap_group *group)
-{
-	int i;
-
-	if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
-		return -EINVAL;
-
-	for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) {
-		if (!strcmp(group->name, devlink_trap_group_generic[i].name))
-			return -EEXIST;
-	}
-
-	return 0;
-}
-
-static int devlink_trap_group_verify(const struct devlink_trap_group *group)
-{
-	if (group->generic)
-		return devlink_trap_group_generic_verify(group);
-	else
-		return devlink_trap_group_driver_verify(group);
-}
-
-static void
-devlink_trap_group_notify(struct devlink *devlink,
-			  const struct devlink_trap_group_item *group_item,
-			  enum devlink_command cmd)
-{
-	struct sk_buff *msg;
-	int err;
-
-	WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
-		     cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
-	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
-		return;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return;
-
-	err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0,
-					 0);
-	if (err) {
-		nlmsg_free(msg);
-		return;
-	}
-
-	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
-				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static void devlink_trap_groups_notify_register(struct devlink *devlink)
-{
-	struct devlink_trap_group_item *group_item;
-
-	list_for_each_entry(group_item, &devlink->trap_group_list, list)
-		devlink_trap_group_notify(devlink, group_item,
-					  DEVLINK_CMD_TRAP_GROUP_NEW);
-}
-
-static void devlink_trap_groups_notify_unregister(struct devlink *devlink)
-{
-	struct devlink_trap_group_item *group_item;
-
-	list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list)
-		devlink_trap_group_notify(devlink, group_item,
-					  DEVLINK_CMD_TRAP_GROUP_DEL);
-}
-
-static int
-devlink_trap_item_group_link(struct devlink *devlink,
-			     struct devlink_trap_item *trap_item)
-{
-	u16 group_id = trap_item->trap->init_group_id;
-	struct devlink_trap_group_item *group_item;
-
-	group_item = devlink_trap_group_item_lookup_by_id(devlink, group_id);
-	if (WARN_ON_ONCE(!group_item))
-		return -EINVAL;
-
-	trap_item->group_item = group_item;
-
-	return 0;
-}
-
-static void devlink_trap_notify(struct devlink *devlink,
-				const struct devlink_trap_item *trap_item,
-				enum devlink_command cmd)
-{
-	struct sk_buff *msg;
-	int err;
-
-	WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
-		     cmd != DEVLINK_CMD_TRAP_DEL);
-	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
-		return;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return;
-
-	err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0);
-	if (err) {
-		nlmsg_free(msg);
-		return;
-	}
-
-	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
-				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static void devlink_traps_notify_register(struct devlink *devlink)
-{
-	struct devlink_trap_item *trap_item;
-
-	list_for_each_entry(trap_item, &devlink->trap_list, list)
-		devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
-}
-
-static void devlink_traps_notify_unregister(struct devlink *devlink)
-{
-	struct devlink_trap_item *trap_item;
-
-	list_for_each_entry_reverse(trap_item, &devlink->trap_list, list)
-		devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
-}
-
-static int
-devlink_trap_register(struct devlink *devlink,
-		      const struct devlink_trap *trap, void *priv)
-{
-	struct devlink_trap_item *trap_item;
-	int err;
-
-	if (devlink_trap_item_lookup(devlink, trap->name))
-		return -EEXIST;
-
-	trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL);
-	if (!trap_item)
-		return -ENOMEM;
-
-	trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
-	if (!trap_item->stats) {
-		err = -ENOMEM;
-		goto err_stats_alloc;
-	}
-
-	trap_item->trap = trap;
-	trap_item->action = trap->init_action;
-	trap_item->priv = priv;
-
-	err = devlink_trap_item_group_link(devlink, trap_item);
-	if (err)
-		goto err_group_link;
-
-	err = devlink->ops->trap_init(devlink, trap, trap_item);
-	if (err)
-		goto err_trap_init;
-
-	list_add_tail(&trap_item->list, &devlink->trap_list);
-	devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
-
-	return 0;
-
-err_trap_init:
-err_group_link:
-	free_percpu(trap_item->stats);
-err_stats_alloc:
-	kfree(trap_item);
-	return err;
-}
-
-static void devlink_trap_unregister(struct devlink *devlink,
-				    const struct devlink_trap *trap)
-{
-	struct devlink_trap_item *trap_item;
-
-	trap_item = devlink_trap_item_lookup(devlink, trap->name);
-	if (WARN_ON_ONCE(!trap_item))
-		return;
-
-	devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
-	list_del(&trap_item->list);
-	if (devlink->ops->trap_fini)
-		devlink->ops->trap_fini(devlink, trap, trap_item);
-	free_percpu(trap_item->stats);
-	kfree(trap_item);
-}
-
-static void devlink_trap_disable(struct devlink *devlink,
-				 const struct devlink_trap *trap)
-{
-	struct devlink_trap_item *trap_item;
-
-	trap_item = devlink_trap_item_lookup(devlink, trap->name);
-	if (WARN_ON_ONCE(!trap_item))
-		return;
-
-	devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP,
-				      NULL);
-	trap_item->action = DEVLINK_TRAP_ACTION_DROP;
-}
-
-/**
- * devl_traps_register - Register packet traps with devlink.
- * @devlink: devlink.
- * @traps: Packet traps.
- * @traps_count: Count of provided packet traps.
- * @priv: Driver private information.
- *
- * Return: Non-zero value on failure.
- */
-int devl_traps_register(struct devlink *devlink,
-			const struct devlink_trap *traps,
-			size_t traps_count, void *priv)
-{
-	int i, err;
-
-	if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
-		return -EINVAL;
-
-	devl_assert_locked(devlink);
-	for (i = 0; i < traps_count; i++) {
-		const struct devlink_trap *trap = &traps[i];
-
-		err = devlink_trap_verify(trap);
-		if (err)
-			goto err_trap_verify;
-
-		err = devlink_trap_register(devlink, trap, priv);
-		if (err)
-			goto err_trap_register;
-	}
-
-	return 0;
-
-err_trap_register:
-err_trap_verify:
-	for (i--; i >= 0; i--)
-		devlink_trap_unregister(devlink, &traps[i]);
-	return err;
-}
-EXPORT_SYMBOL_GPL(devl_traps_register);
-
-/**
- * devlink_traps_register - Register packet traps with devlink.
- * @devlink: devlink.
- * @traps: Packet traps.
- * @traps_count: Count of provided packet traps.
- * @priv: Driver private information.
- *
- * Context: Takes and release devlink->lock <mutex>.
- *
- * Return: Non-zero value on failure.
- */
-int devlink_traps_register(struct devlink *devlink,
-			   const struct devlink_trap *traps,
-			   size_t traps_count, void *priv)
-{
-	int err;
-
-	devl_lock(devlink);
-	err = devl_traps_register(devlink, traps, traps_count, priv);
-	devl_unlock(devlink);
-	return err;
-}
-EXPORT_SYMBOL_GPL(devlink_traps_register);
-
-/**
- * devl_traps_unregister - Unregister packet traps from devlink.
- * @devlink: devlink.
- * @traps: Packet traps.
- * @traps_count: Count of provided packet traps.
- */
-void devl_traps_unregister(struct devlink *devlink,
-			   const struct devlink_trap *traps,
-			   size_t traps_count)
-{
-	int i;
-
-	devl_assert_locked(devlink);
-	/* Make sure we do not have any packets in-flight while unregistering
-	 * traps by disabling all of them and waiting for a grace period.
-	 */
-	for (i = traps_count - 1; i >= 0; i--)
-		devlink_trap_disable(devlink, &traps[i]);
-	synchronize_rcu();
-	for (i = traps_count - 1; i >= 0; i--)
-		devlink_trap_unregister(devlink, &traps[i]);
-}
-EXPORT_SYMBOL_GPL(devl_traps_unregister);
-
-/**
- * devlink_traps_unregister - Unregister packet traps from devlink.
- * @devlink: devlink.
- * @traps: Packet traps.
- * @traps_count: Count of provided packet traps.
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_traps_unregister(struct devlink *devlink,
-			      const struct devlink_trap *traps,
-			      size_t traps_count)
-{
-	devl_lock(devlink);
-	devl_traps_unregister(devlink, traps, traps_count);
-	devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_traps_unregister);
-
-static void
-devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
-			  size_t skb_len)
-{
-	struct devlink_stats *stats;
-
-	stats = this_cpu_ptr(trap_stats);
-	u64_stats_update_begin(&stats->syncp);
-	u64_stats_add(&stats->rx_bytes, skb_len);
-	u64_stats_inc(&stats->rx_packets);
-	u64_stats_update_end(&stats->syncp);
-}
-
-static void
-devlink_trap_report_metadata_set(struct devlink_trap_metadata *metadata,
-				 const struct devlink_trap_item *trap_item,
-				 struct devlink_port *in_devlink_port,
-				 const struct flow_action_cookie *fa_cookie)
-{
-	metadata->trap_name = trap_item->trap->name;
-	metadata->trap_group_name = trap_item->group_item->group->name;
-	metadata->fa_cookie = fa_cookie;
-	metadata->trap_type = trap_item->trap->type;
-
-	spin_lock(&in_devlink_port->type_lock);
-	if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
-		metadata->input_dev = in_devlink_port->type_eth.netdev;
-	spin_unlock(&in_devlink_port->type_lock);
-}
-
-/**
- * devlink_trap_report - Report trapped packet to drop monitor.
- * @devlink: devlink.
- * @skb: Trapped packet.
- * @trap_ctx: Trap context.
- * @in_devlink_port: Input devlink port.
- * @fa_cookie: Flow action cookie. Could be NULL.
- */
-void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
-			 void *trap_ctx, struct devlink_port *in_devlink_port,
-			 const struct flow_action_cookie *fa_cookie)
-
-{
-	struct devlink_trap_item *trap_item = trap_ctx;
-
-	devlink_trap_stats_update(trap_item->stats, skb->len);
-	devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
-
-	if (tracepoint_enabled(devlink_trap_report)) {
-		struct devlink_trap_metadata metadata = {};
-
-		devlink_trap_report_metadata_set(&metadata, trap_item,
-						 in_devlink_port, fa_cookie);
-		trace_devlink_trap_report(devlink, skb, &metadata);
-	}
-}
-EXPORT_SYMBOL_GPL(devlink_trap_report);
-
-/**
- * devlink_trap_ctx_priv - Trap context to driver private information.
- * @trap_ctx: Trap context.
- *
- * Return: Driver private information passed during registration.
- */
-void *devlink_trap_ctx_priv(void *trap_ctx)
-{
-	struct devlink_trap_item *trap_item = trap_ctx;
-
-	return trap_item->priv;
-}
-EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv);
-
-static int
-devlink_trap_group_item_policer_link(struct devlink *devlink,
-				     struct devlink_trap_group_item *group_item)
-{
-	u32 policer_id = group_item->group->init_policer_id;
-	struct devlink_trap_policer_item *policer_item;
-
-	if (policer_id == 0)
-		return 0;
-
-	policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
-	if (WARN_ON_ONCE(!policer_item))
-		return -EINVAL;
-
-	group_item->policer_item = policer_item;
-
-	return 0;
-}
-
-static int
-devlink_trap_group_register(struct devlink *devlink,
-			    const struct devlink_trap_group *group)
-{
-	struct devlink_trap_group_item *group_item;
-	int err;
-
-	if (devlink_trap_group_item_lookup(devlink, group->name))
-		return -EEXIST;
-
-	group_item = kzalloc(sizeof(*group_item), GFP_KERNEL);
-	if (!group_item)
-		return -ENOMEM;
-
-	group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
-	if (!group_item->stats) {
-		err = -ENOMEM;
-		goto err_stats_alloc;
-	}
-
-	group_item->group = group;
-
-	err = devlink_trap_group_item_policer_link(devlink, group_item);
-	if (err)
-		goto err_policer_link;
-
-	if (devlink->ops->trap_group_init) {
-		err = devlink->ops->trap_group_init(devlink, group);
-		if (err)
-			goto err_group_init;
-	}
-
-	list_add_tail(&group_item->list, &devlink->trap_group_list);
-	devlink_trap_group_notify(devlink, group_item,
-				  DEVLINK_CMD_TRAP_GROUP_NEW);
-
-	return 0;
-
-err_group_init:
-err_policer_link:
-	free_percpu(group_item->stats);
-err_stats_alloc:
-	kfree(group_item);
-	return err;
-}
-
-static void
-devlink_trap_group_unregister(struct devlink *devlink,
-			      const struct devlink_trap_group *group)
-{
-	struct devlink_trap_group_item *group_item;
-
-	group_item = devlink_trap_group_item_lookup(devlink, group->name);
-	if (WARN_ON_ONCE(!group_item))
-		return;
-
-	devlink_trap_group_notify(devlink, group_item,
-				  DEVLINK_CMD_TRAP_GROUP_DEL);
-	list_del(&group_item->list);
-	free_percpu(group_item->stats);
-	kfree(group_item);
-}
-
-/**
- * devl_trap_groups_register - Register packet trap groups with devlink.
- * @devlink: devlink.
- * @groups: Packet trap groups.
- * @groups_count: Count of provided packet trap groups.
- *
- * Return: Non-zero value on failure.
- */
-int devl_trap_groups_register(struct devlink *devlink,
-			      const struct devlink_trap_group *groups,
-			      size_t groups_count)
-{
-	int i, err;
-
-	devl_assert_locked(devlink);
-	for (i = 0; i < groups_count; i++) {
-		const struct devlink_trap_group *group = &groups[i];
-
-		err = devlink_trap_group_verify(group);
-		if (err)
-			goto err_trap_group_verify;
-
-		err = devlink_trap_group_register(devlink, group);
-		if (err)
-			goto err_trap_group_register;
-	}
-
-	return 0;
-
-err_trap_group_register:
-err_trap_group_verify:
-	for (i--; i >= 0; i--)
-		devlink_trap_group_unregister(devlink, &groups[i]);
-	return err;
-}
-EXPORT_SYMBOL_GPL(devl_trap_groups_register);
-
-/**
- * devlink_trap_groups_register - Register packet trap groups with devlink.
- * @devlink: devlink.
- * @groups: Packet trap groups.
- * @groups_count: Count of provided packet trap groups.
- *
- * Context: Takes and release devlink->lock <mutex>.
- *
- * Return: Non-zero value on failure.
- */
-int devlink_trap_groups_register(struct devlink *devlink,
-				 const struct devlink_trap_group *groups,
-				 size_t groups_count)
-{
-	int err;
-
-	devl_lock(devlink);
-	err = devl_trap_groups_register(devlink, groups, groups_count);
-	devl_unlock(devlink);
-	return err;
-}
-EXPORT_SYMBOL_GPL(devlink_trap_groups_register);
-
-/**
- * devl_trap_groups_unregister - Unregister packet trap groups from devlink.
- * @devlink: devlink.
- * @groups: Packet trap groups.
- * @groups_count: Count of provided packet trap groups.
- */
-void devl_trap_groups_unregister(struct devlink *devlink,
-				 const struct devlink_trap_group *groups,
-				 size_t groups_count)
-{
-	int i;
-
-	devl_assert_locked(devlink);
-	for (i = groups_count - 1; i >= 0; i--)
-		devlink_trap_group_unregister(devlink, &groups[i]);
-}
-EXPORT_SYMBOL_GPL(devl_trap_groups_unregister);
-
-/**
- * devlink_trap_groups_unregister - Unregister packet trap groups from devlink.
- * @devlink: devlink.
- * @groups: Packet trap groups.
- * @groups_count: Count of provided packet trap groups.
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_trap_groups_unregister(struct devlink *devlink,
-				    const struct devlink_trap_group *groups,
-				    size_t groups_count)
-{
-	devl_lock(devlink);
-	devl_trap_groups_unregister(devlink, groups, groups_count);
-	devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister);
-
-static void
-devlink_trap_policer_notify(struct devlink *devlink,
-			    const struct devlink_trap_policer_item *policer_item,
-			    enum devlink_command cmd)
-{
-	struct sk_buff *msg;
-	int err;
-
-	WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
-		     cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
-	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
-		return;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return;
-
-	err = devlink_nl_trap_policer_fill(msg, devlink, policer_item, cmd, 0,
-					   0, 0);
-	if (err) {
-		nlmsg_free(msg);
-		return;
-	}
-
-	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
-				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static void devlink_trap_policers_notify_register(struct devlink *devlink)
-{
-	struct devlink_trap_policer_item *policer_item;
-
-	list_for_each_entry(policer_item, &devlink->trap_policer_list, list)
-		devlink_trap_policer_notify(devlink, policer_item,
-					    DEVLINK_CMD_TRAP_POLICER_NEW);
-}
-
-static void devlink_trap_policers_notify_unregister(struct devlink *devlink)
-{
-	struct devlink_trap_policer_item *policer_item;
-
-	list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list,
-				    list)
-		devlink_trap_policer_notify(devlink, policer_item,
-					    DEVLINK_CMD_TRAP_POLICER_DEL);
-}
-
-static int
-devlink_trap_policer_register(struct devlink *devlink,
-			      const struct devlink_trap_policer *policer)
-{
-	struct devlink_trap_policer_item *policer_item;
-	int err;
-
-	if (devlink_trap_policer_item_lookup(devlink, policer->id))
-		return -EEXIST;
-
-	policer_item = kzalloc(sizeof(*policer_item), GFP_KERNEL);
-	if (!policer_item)
-		return -ENOMEM;
-
-	policer_item->policer = policer;
-	policer_item->rate = policer->init_rate;
-	policer_item->burst = policer->init_burst;
-
-	if (devlink->ops->trap_policer_init) {
-		err = devlink->ops->trap_policer_init(devlink, policer);
-		if (err)
-			goto err_policer_init;
-	}
-
-	list_add_tail(&policer_item->list, &devlink->trap_policer_list);
-	devlink_trap_policer_notify(devlink, policer_item,
-				    DEVLINK_CMD_TRAP_POLICER_NEW);
-
-	return 0;
-
-err_policer_init:
-	kfree(policer_item);
-	return err;
-}
-
-static void
-devlink_trap_policer_unregister(struct devlink *devlink,
-				const struct devlink_trap_policer *policer)
-{
-	struct devlink_trap_policer_item *policer_item;
-
-	policer_item = devlink_trap_policer_item_lookup(devlink, policer->id);
-	if (WARN_ON_ONCE(!policer_item))
-		return;
-
-	devlink_trap_policer_notify(devlink, policer_item,
-				    DEVLINK_CMD_TRAP_POLICER_DEL);
-	list_del(&policer_item->list);
-	if (devlink->ops->trap_policer_fini)
-		devlink->ops->trap_policer_fini(devlink, policer);
-	kfree(policer_item);
-}
-
-/**
- * devl_trap_policers_register - Register packet trap policers with devlink.
- * @devlink: devlink.
- * @policers: Packet trap policers.
- * @policers_count: Count of provided packet trap policers.
- *
- * Return: Non-zero value on failure.
- */
-int
-devl_trap_policers_register(struct devlink *devlink,
-			    const struct devlink_trap_policer *policers,
-			    size_t policers_count)
-{
-	int i, err;
-
-	devl_assert_locked(devlink);
-	for (i = 0; i < policers_count; i++) {
-		const struct devlink_trap_policer *policer = &policers[i];
-
-		if (WARN_ON(policer->id == 0 ||
-			    policer->max_rate < policer->min_rate ||
-			    policer->max_burst < policer->min_burst)) {
-			err = -EINVAL;
-			goto err_trap_policer_verify;
-		}
-
-		err = devlink_trap_policer_register(devlink, policer);
-		if (err)
-			goto err_trap_policer_register;
-	}
-	return 0;
-
-err_trap_policer_register:
-err_trap_policer_verify:
-	for (i--; i >= 0; i--)
-		devlink_trap_policer_unregister(devlink, &policers[i]);
-	return err;
-}
-EXPORT_SYMBOL_GPL(devl_trap_policers_register);
-
-/**
- * devl_trap_policers_unregister - Unregister packet trap policers from devlink.
- * @devlink: devlink.
- * @policers: Packet trap policers.
- * @policers_count: Count of provided packet trap policers.
- */
-void
-devl_trap_policers_unregister(struct devlink *devlink,
-			      const struct devlink_trap_policer *policers,
-			      size_t policers_count)
-{
-	int i;
-
-	devl_assert_locked(devlink);
-	for (i = policers_count - 1; i >= 0; i--)
-		devlink_trap_policer_unregister(devlink, &policers[i]);
-}
-EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
diff --git a/net/devlink/trap.c b/net/devlink/trap.c
new file mode 100644
index 000000000000..c26bf9b29bca
--- /dev/null
+++ b/net/devlink/trap.c
@@ -0,0 +1,1861 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <trace/events/devlink.h>
+
+#include "devl_internal.h"
+
+struct devlink_stats {
+	u64_stats_t rx_bytes;
+	u64_stats_t rx_packets;
+	struct u64_stats_sync syncp;
+};
+
+/**
+ * struct devlink_trap_policer_item - Packet trap policer attributes.
+ * @policer: Immutable packet trap policer attributes.
+ * @rate: Rate in packets / sec.
+ * @burst: Burst size in packets.
+ * @list: trap_policer_list member.
+ *
+ * Describes packet trap policer attributes. Created by devlink during trap
+ * policer registration.
+ */
+struct devlink_trap_policer_item {
+	const struct devlink_trap_policer *policer;
+	u64 rate;
+	u64 burst;
+	struct list_head list;
+};
+
+/**
+ * struct devlink_trap_group_item - Packet trap group attributes.
+ * @group: Immutable packet trap group attributes.
+ * @policer_item: Associated policer item. Can be NULL.
+ * @list: trap_group_list member.
+ * @stats: Trap group statistics.
+ *
+ * Describes packet trap group attributes. Created by devlink during trap
+ * group registration.
+ */
+struct devlink_trap_group_item {
+	const struct devlink_trap_group *group;
+	struct devlink_trap_policer_item *policer_item;
+	struct list_head list;
+	struct devlink_stats __percpu *stats;
+};
+
+/**
+ * struct devlink_trap_item - Packet trap attributes.
+ * @trap: Immutable packet trap attributes.
+ * @group_item: Associated group item.
+ * @list: trap_list member.
+ * @action: Trap action.
+ * @stats: Trap statistics.
+ * @priv: Driver private information.
+ *
+ * Describes both mutable and immutable packet trap attributes. Created by
+ * devlink during trap registration and used for all trap related operations.
+ */
+struct devlink_trap_item {
+	const struct devlink_trap *trap;
+	struct devlink_trap_group_item *group_item;
+	struct list_head list;
+	enum devlink_trap_action action;
+	struct devlink_stats __percpu *stats;
+	void *priv;
+};
+
+static struct devlink_trap_policer_item *
+devlink_trap_policer_item_lookup(struct devlink *devlink, u32 id)
+{
+	struct devlink_trap_policer_item *policer_item;
+
+	list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
+		if (policer_item->policer->id == id)
+			return policer_item;
+	}
+
+	return NULL;
+}
+
+static struct devlink_trap_item *
+devlink_trap_item_lookup(struct devlink *devlink, const char *name)
+{
+	struct devlink_trap_item *trap_item;
+
+	list_for_each_entry(trap_item, &devlink->trap_list, list) {
+		if (!strcmp(trap_item->trap->name, name))
+			return trap_item;
+	}
+
+	return NULL;
+}
+
+static struct devlink_trap_item *
+devlink_trap_item_get_from_info(struct devlink *devlink,
+				struct genl_info *info)
+{
+	struct nlattr *attr;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_NAME])
+		return NULL;
+	attr = info->attrs[DEVLINK_ATTR_TRAP_NAME];
+
+	return devlink_trap_item_lookup(devlink, nla_data(attr));
+}
+
+static int
+devlink_trap_action_get_from_info(struct genl_info *info,
+				  enum devlink_trap_action *p_trap_action)
+{
+	u8 val;
+
+	val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]);
+	switch (val) {
+	case DEVLINK_TRAP_ACTION_DROP:
+	case DEVLINK_TRAP_ACTION_TRAP:
+	case DEVLINK_TRAP_ACTION_MIRROR:
+		*p_trap_action = val;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int devlink_trap_metadata_put(struct sk_buff *msg,
+				     const struct devlink_trap *trap)
+{
+	struct nlattr *attr;
+
+	attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) &&
+	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT))
+		goto nla_put_failure;
+	if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE) &&
+	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
+				    struct devlink_stats *stats)
+{
+	int i;
+
+	memset(stats, 0, sizeof(*stats));
+	for_each_possible_cpu(i) {
+		struct devlink_stats *cpu_stats;
+		u64 rx_packets, rx_bytes;
+		unsigned int start;
+
+		cpu_stats = per_cpu_ptr(trap_stats, i);
+		do {
+			start = u64_stats_fetch_begin(&cpu_stats->syncp);
+			rx_packets = u64_stats_read(&cpu_stats->rx_packets);
+			rx_bytes = u64_stats_read(&cpu_stats->rx_bytes);
+		} while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
+
+		u64_stats_add(&stats->rx_packets, rx_packets);
+		u64_stats_add(&stats->rx_bytes, rx_bytes);
+	}
+}
+
+static int
+devlink_trap_group_stats_put(struct sk_buff *msg,
+			     struct devlink_stats __percpu *trap_stats)
+{
+	struct devlink_stats stats;
+	struct nlattr *attr;
+
+	devlink_trap_stats_read(trap_stats, &stats);
+
+	attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+			      u64_stats_read(&stats.rx_packets),
+			      DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+			      u64_stats_read(&stats.rx_bytes),
+			      DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
+				  const struct devlink_trap_item *trap_item)
+{
+	struct devlink_stats stats;
+	struct nlattr *attr;
+	u64 drops = 0;
+	int err;
+
+	if (devlink->ops->trap_drop_counter_get) {
+		err = devlink->ops->trap_drop_counter_get(devlink,
+							  trap_item->trap,
+							  &drops);
+		if (err)
+			return err;
+	}
+
+	devlink_trap_stats_read(trap_item->stats, &stats);
+
+	attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (devlink->ops->trap_drop_counter_get &&
+	    nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
+			      DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+			      u64_stats_read(&stats.rx_packets),
+			      DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+			      u64_stats_read(&stats.rx_bytes),
+			      DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
+				const struct devlink_trap_item *trap_item,
+				enum devlink_command cmd, u32 portid, u32 seq,
+				int flags)
+{
+	struct devlink_trap_group_item *group_item = trap_item->group_item;
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+			   group_item->group->name))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type))
+		goto nla_put_failure;
+
+	if (trap_item->trap->generic &&
+	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action))
+		goto nla_put_failure;
+
+	err = devlink_trap_metadata_put(msg, trap_item->trap);
+	if (err)
+		goto nla_put_failure;
+
+	err = devlink_trap_stats_put(msg, devlink, trap_item);
+	if (err)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+int devlink_nl_trap_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_trap_item *trap_item;
+	struct sk_buff *msg;
+	int err;
+
+	if (list_empty(&devlink->trap_list))
+		return -EOPNOTSUPP;
+
+	trap_item = devlink_trap_item_get_from_info(devlink, info);
+	if (!trap_item) {
+		NL_SET_ERR_MSG(extack, "Device did not register this trap");
+		return -ENOENT;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_trap_fill(msg, devlink, trap_item,
+				   DEVLINK_CMD_TRAP_NEW, info->snd_portid,
+				   info->snd_seq, 0);
+	if (err)
+		goto err_trap_fill;
+
+	return genlmsg_reply(msg, info);
+
+err_trap_fill:
+	nlmsg_free(msg);
+	return err;
+}
+
+static int devlink_nl_trap_get_dump_one(struct sk_buff *msg,
+					struct devlink *devlink,
+					struct netlink_callback *cb, int flags)
+{
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct devlink_trap_item *trap_item;
+	int idx = 0;
+	int err = 0;
+
+	list_for_each_entry(trap_item, &devlink->trap_list, list) {
+		if (idx < state->idx) {
+			idx++;
+			continue;
+		}
+		err = devlink_nl_trap_fill(msg, devlink, trap_item,
+					   DEVLINK_CMD_TRAP_NEW,
+					   NETLINK_CB(cb->skb).portid,
+					   cb->nlh->nlmsg_seq, flags);
+		if (err) {
+			state->idx = idx;
+			break;
+		}
+		idx++;
+	}
+
+	return err;
+}
+
+int devlink_nl_trap_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return devlink_nl_dumpit(skb, cb, devlink_nl_trap_get_dump_one);
+}
+
+static int __devlink_trap_action_set(struct devlink *devlink,
+				     struct devlink_trap_item *trap_item,
+				     enum devlink_trap_action trap_action,
+				     struct netlink_ext_ack *extack)
+{
+	int err;
+
+	if (trap_item->action != trap_action &&
+	    trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
+		NL_SET_ERR_MSG(extack, "Cannot change action of non-drop traps. Skipping");
+		return 0;
+	}
+
+	err = devlink->ops->trap_action_set(devlink, trap_item->trap,
+					    trap_action, extack);
+	if (err)
+		return err;
+
+	trap_item->action = trap_action;
+
+	return 0;
+}
+
+static int devlink_trap_action_set(struct devlink *devlink,
+				   struct devlink_trap_item *trap_item,
+				   struct genl_info *info)
+{
+	enum devlink_trap_action trap_action;
+	int err;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+		return 0;
+
+	err = devlink_trap_action_get_from_info(info, &trap_action);
+	if (err) {
+		NL_SET_ERR_MSG(info->extack, "Invalid trap action");
+		return -EINVAL;
+	}
+
+	return __devlink_trap_action_set(devlink, trap_item, trap_action,
+					 info->extack);
+}
+
+int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_trap_item *trap_item;
+
+	if (list_empty(&devlink->trap_list))
+		return -EOPNOTSUPP;
+
+	trap_item = devlink_trap_item_get_from_info(devlink, info);
+	if (!trap_item) {
+		NL_SET_ERR_MSG(extack, "Device did not register this trap");
+		return -ENOENT;
+	}
+
+	return devlink_trap_action_set(devlink, trap_item, info);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_lookup(struct devlink *devlink, const char *name)
+{
+	struct devlink_trap_group_item *group_item;
+
+	list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+		if (!strcmp(group_item->group->name, name))
+			return group_item;
+	}
+
+	return NULL;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_lookup_by_id(struct devlink *devlink, u16 id)
+{
+	struct devlink_trap_group_item *group_item;
+
+	list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+		if (group_item->group->id == id)
+			return group_item;
+	}
+
+	return NULL;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get_from_info(struct devlink *devlink,
+				      struct genl_info *info)
+{
+	char *name;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME])
+		return NULL;
+	name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]);
+
+	return devlink_trap_group_item_lookup(devlink, name);
+}
+
+static int
+devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
+			   const struct devlink_trap_group_item *group_item,
+			   enum devlink_command cmd, u32 portid, u32 seq,
+			   int flags)
+{
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+			   group_item->group->name))
+		goto nla_put_failure;
+
+	if (group_item->group->generic &&
+	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+		goto nla_put_failure;
+
+	if (group_item->policer_item &&
+	    nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
+			group_item->policer_item->policer->id))
+		goto nla_put_failure;
+
+	err = devlink_trap_group_stats_put(msg, group_item->stats);
+	if (err)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+int devlink_nl_trap_group_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_trap_group_item *group_item;
+	struct sk_buff *msg;
+	int err;
+
+	if (list_empty(&devlink->trap_group_list))
+		return -EOPNOTSUPP;
+
+	group_item = devlink_trap_group_item_get_from_info(devlink, info);
+	if (!group_item) {
+		NL_SET_ERR_MSG(extack, "Device did not register this trap group");
+		return -ENOENT;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+					 DEVLINK_CMD_TRAP_GROUP_NEW,
+					 info->snd_portid, info->snd_seq, 0);
+	if (err)
+		goto err_trap_group_fill;
+
+	return genlmsg_reply(msg, info);
+
+err_trap_group_fill:
+	nlmsg_free(msg);
+	return err;
+}
+
+static int devlink_nl_trap_group_get_dump_one(struct sk_buff *msg,
+					      struct devlink *devlink,
+					      struct netlink_callback *cb,
+					      int flags)
+{
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct devlink_trap_group_item *group_item;
+	int idx = 0;
+	int err = 0;
+
+	list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+		if (idx < state->idx) {
+			idx++;
+			continue;
+		}
+		err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+						 DEVLINK_CMD_TRAP_GROUP_NEW,
+						 NETLINK_CB(cb->skb).portid,
+						 cb->nlh->nlmsg_seq, flags);
+		if (err) {
+			state->idx = idx;
+			break;
+		}
+		idx++;
+	}
+
+	return err;
+}
+
+int devlink_nl_trap_group_get_dumpit(struct sk_buff *skb,
+				     struct netlink_callback *cb)
+{
+	return devlink_nl_dumpit(skb, cb, devlink_nl_trap_group_get_dump_one);
+}
+
+static int
+__devlink_trap_group_action_set(struct devlink *devlink,
+				struct devlink_trap_group_item *group_item,
+				enum devlink_trap_action trap_action,
+				struct netlink_ext_ack *extack)
+{
+	const char *group_name = group_item->group->name;
+	struct devlink_trap_item *trap_item;
+	int err;
+
+	if (devlink->ops->trap_group_action_set) {
+		err = devlink->ops->trap_group_action_set(devlink, group_item->group,
+							  trap_action, extack);
+		if (err)
+			return err;
+
+		list_for_each_entry(trap_item, &devlink->trap_list, list) {
+			if (strcmp(trap_item->group_item->group->name, group_name))
+				continue;
+			if (trap_item->action != trap_action &&
+			    trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP)
+				continue;
+			trap_item->action = trap_action;
+		}
+
+		return 0;
+	}
+
+	list_for_each_entry(trap_item, &devlink->trap_list, list) {
+		if (strcmp(trap_item->group_item->group->name, group_name))
+			continue;
+		err = __devlink_trap_action_set(devlink, trap_item,
+						trap_action, extack);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int
+devlink_trap_group_action_set(struct devlink *devlink,
+			      struct devlink_trap_group_item *group_item,
+			      struct genl_info *info, bool *p_modified)
+{
+	enum devlink_trap_action trap_action;
+	int err;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+		return 0;
+
+	err = devlink_trap_action_get_from_info(info, &trap_action);
+	if (err) {
+		NL_SET_ERR_MSG(info->extack, "Invalid trap action");
+		return -EINVAL;
+	}
+
+	err = __devlink_trap_group_action_set(devlink, group_item, trap_action,
+					      info->extack);
+	if (err)
+		return err;
+
+	*p_modified = true;
+
+	return 0;
+}
+
+static int devlink_trap_group_set(struct devlink *devlink,
+				  struct devlink_trap_group_item *group_item,
+				  struct genl_info *info)
+{
+	struct devlink_trap_policer_item *policer_item;
+	struct netlink_ext_ack *extack = info->extack;
+	const struct devlink_trap_policer *policer;
+	struct nlattr **attrs = info->attrs;
+	u32 policer_id;
+	int err;
+
+	if (!attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
+		return 0;
+
+	if (!devlink->ops->trap_group_set)
+		return -EOPNOTSUPP;
+
+	policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
+	policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
+	if (policer_id && !policer_item) {
+		NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
+		return -ENOENT;
+	}
+	policer = policer_item ? policer_item->policer : NULL;
+
+	err = devlink->ops->trap_group_set(devlink, group_item->group, policer,
+					   extack);
+	if (err)
+		return err;
+
+	group_item->policer_item = policer_item;
+
+	return 0;
+}
+
+int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_trap_group_item *group_item;
+	bool modified = false;
+	int err;
+
+	if (list_empty(&devlink->trap_group_list))
+		return -EOPNOTSUPP;
+
+	group_item = devlink_trap_group_item_get_from_info(devlink, info);
+	if (!group_item) {
+		NL_SET_ERR_MSG(extack, "Device did not register this trap group");
+		return -ENOENT;
+	}
+
+	err = devlink_trap_group_action_set(devlink, group_item, info,
+					    &modified);
+	if (err)
+		return err;
+
+	err = devlink_trap_group_set(devlink, group_item, info);
+	if (err)
+		goto err_trap_group_set;
+
+	return 0;
+
+err_trap_group_set:
+	if (modified)
+		NL_SET_ERR_MSG(extack, "Trap group set failed, but some changes were committed already");
+	return err;
+}
+
+static struct devlink_trap_policer_item *
+devlink_trap_policer_item_get_from_info(struct devlink *devlink,
+					struct genl_info *info)
+{
+	u32 id;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
+		return NULL;
+	id = nla_get_u32(info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
+
+	return devlink_trap_policer_item_lookup(devlink, id);
+}
+
+static int
+devlink_trap_policer_stats_put(struct sk_buff *msg, struct devlink *devlink,
+			       const struct devlink_trap_policer *policer)
+{
+	struct nlattr *attr;
+	u64 drops;
+	int err;
+
+	if (!devlink->ops->trap_policer_counter_get)
+		return 0;
+
+	err = devlink->ops->trap_policer_counter_get(devlink, policer, &drops);
+	if (err)
+		return err;
+
+	attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
+			      DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int
+devlink_nl_trap_policer_fill(struct sk_buff *msg, struct devlink *devlink,
+			     const struct devlink_trap_policer_item *policer_item,
+			     enum devlink_command cmd, u32 portid, u32 seq,
+			     int flags)
+{
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
+			policer_item->policer->id))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_RATE,
+			      policer_item->rate, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_BURST,
+			      policer_item->burst, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	err = devlink_trap_policer_stats_put(msg, devlink,
+					     policer_item->policer);
+	if (err)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+int devlink_nl_trap_policer_get_doit(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct devlink_trap_policer_item *policer_item;
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct sk_buff *msg;
+	int err;
+
+	if (list_empty(&devlink->trap_policer_list))
+		return -EOPNOTSUPP;
+
+	policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
+	if (!policer_item) {
+		NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
+		return -ENOENT;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
+					   DEVLINK_CMD_TRAP_POLICER_NEW,
+					   info->snd_portid, info->snd_seq, 0);
+	if (err)
+		goto err_trap_policer_fill;
+
+	return genlmsg_reply(msg, info);
+
+err_trap_policer_fill:
+	nlmsg_free(msg);
+	return err;
+}
+
+static int devlink_nl_trap_policer_get_dump_one(struct sk_buff *msg,
+						struct devlink *devlink,
+						struct netlink_callback *cb,
+						int flags)
+{
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct devlink_trap_policer_item *policer_item;
+	int idx = 0;
+	int err = 0;
+
+	list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
+		if (idx < state->idx) {
+			idx++;
+			continue;
+		}
+		err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
+						   DEVLINK_CMD_TRAP_POLICER_NEW,
+						   NETLINK_CB(cb->skb).portid,
+						   cb->nlh->nlmsg_seq, flags);
+		if (err) {
+			state->idx = idx;
+			break;
+		}
+		idx++;
+	}
+
+	return err;
+}
+
+int devlink_nl_trap_policer_get_dumpit(struct sk_buff *skb,
+				       struct netlink_callback *cb)
+{
+	return devlink_nl_dumpit(skb, cb, devlink_nl_trap_policer_get_dump_one);
+}
+
+static int
+devlink_trap_policer_set(struct devlink *devlink,
+			 struct devlink_trap_policer_item *policer_item,
+			 struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct nlattr **attrs = info->attrs;
+	u64 rate, burst;
+	int err;
+
+	rate = policer_item->rate;
+	burst = policer_item->burst;
+
+	if (attrs[DEVLINK_ATTR_TRAP_POLICER_RATE])
+		rate = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_RATE]);
+
+	if (attrs[DEVLINK_ATTR_TRAP_POLICER_BURST])
+		burst = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_BURST]);
+
+	if (rate < policer_item->policer->min_rate) {
+		NL_SET_ERR_MSG(extack, "Policer rate lower than limit");
+		return -EINVAL;
+	}
+
+	if (rate > policer_item->policer->max_rate) {
+		NL_SET_ERR_MSG(extack, "Policer rate higher than limit");
+		return -EINVAL;
+	}
+
+	if (burst < policer_item->policer->min_burst) {
+		NL_SET_ERR_MSG(extack, "Policer burst size lower than limit");
+		return -EINVAL;
+	}
+
+	if (burst > policer_item->policer->max_burst) {
+		NL_SET_ERR_MSG(extack, "Policer burst size higher than limit");
+		return -EINVAL;
+	}
+
+	err = devlink->ops->trap_policer_set(devlink, policer_item->policer,
+					     rate, burst, info->extack);
+	if (err)
+		return err;
+
+	policer_item->rate = rate;
+	policer_item->burst = burst;
+
+	return 0;
+}
+
+int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
+					 struct genl_info *info)
+{
+	struct devlink_trap_policer_item *policer_item;
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+
+	if (list_empty(&devlink->trap_policer_list))
+		return -EOPNOTSUPP;
+
+	if (!devlink->ops->trap_policer_set)
+		return -EOPNOTSUPP;
+
+	policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
+	if (!policer_item) {
+		NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
+		return -ENOENT;
+	}
+
+	return devlink_trap_policer_set(devlink, policer_item, info);
+}
+
+#define DEVLINK_TRAP(_id, _type)					      \
+	{								      \
+		.type = DEVLINK_TRAP_TYPE_##_type,			      \
+		.id = DEVLINK_TRAP_GENERIC_ID_##_id,			      \
+		.name = DEVLINK_TRAP_GENERIC_NAME_##_id,		      \
+	}
+
+static const struct devlink_trap devlink_trap_generic[] = {
+	DEVLINK_TRAP(SMAC_MC, DROP),
+	DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP),
+	DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP),
+	DEVLINK_TRAP(INGRESS_STP_FILTER, DROP),
+	DEVLINK_TRAP(EMPTY_TX_LIST, DROP),
+	DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP),
+	DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP),
+	DEVLINK_TRAP(TTL_ERROR, EXCEPTION),
+	DEVLINK_TRAP(TAIL_DROP, DROP),
+	DEVLINK_TRAP(NON_IP_PACKET, DROP),
+	DEVLINK_TRAP(UC_DIP_MC_DMAC, DROP),
+	DEVLINK_TRAP(DIP_LB, DROP),
+	DEVLINK_TRAP(SIP_MC, DROP),
+	DEVLINK_TRAP(SIP_LB, DROP),
+	DEVLINK_TRAP(CORRUPTED_IP_HDR, DROP),
+	DEVLINK_TRAP(IPV4_SIP_BC, DROP),
+	DEVLINK_TRAP(IPV6_MC_DIP_RESERVED_SCOPE, DROP),
+	DEVLINK_TRAP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, DROP),
+	DEVLINK_TRAP(MTU_ERROR, EXCEPTION),
+	DEVLINK_TRAP(UNRESOLVED_NEIGH, EXCEPTION),
+	DEVLINK_TRAP(RPF, EXCEPTION),
+	DEVLINK_TRAP(REJECT_ROUTE, EXCEPTION),
+	DEVLINK_TRAP(IPV4_LPM_UNICAST_MISS, EXCEPTION),
+	DEVLINK_TRAP(IPV6_LPM_UNICAST_MISS, EXCEPTION),
+	DEVLINK_TRAP(NON_ROUTABLE, DROP),
+	DEVLINK_TRAP(DECAP_ERROR, EXCEPTION),
+	DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP),
+	DEVLINK_TRAP(INGRESS_FLOW_ACTION_DROP, DROP),
+	DEVLINK_TRAP(EGRESS_FLOW_ACTION_DROP, DROP),
+	DEVLINK_TRAP(STP, CONTROL),
+	DEVLINK_TRAP(LACP, CONTROL),
+	DEVLINK_TRAP(LLDP, CONTROL),
+	DEVLINK_TRAP(IGMP_QUERY, CONTROL),
+	DEVLINK_TRAP(IGMP_V1_REPORT, CONTROL),
+	DEVLINK_TRAP(IGMP_V2_REPORT, CONTROL),
+	DEVLINK_TRAP(IGMP_V3_REPORT, CONTROL),
+	DEVLINK_TRAP(IGMP_V2_LEAVE, CONTROL),
+	DEVLINK_TRAP(MLD_QUERY, CONTROL),
+	DEVLINK_TRAP(MLD_V1_REPORT, CONTROL),
+	DEVLINK_TRAP(MLD_V2_REPORT, CONTROL),
+	DEVLINK_TRAP(MLD_V1_DONE, CONTROL),
+	DEVLINK_TRAP(IPV4_DHCP, CONTROL),
+	DEVLINK_TRAP(IPV6_DHCP, CONTROL),
+	DEVLINK_TRAP(ARP_REQUEST, CONTROL),
+	DEVLINK_TRAP(ARP_RESPONSE, CONTROL),
+	DEVLINK_TRAP(ARP_OVERLAY, CONTROL),
+	DEVLINK_TRAP(IPV6_NEIGH_SOLICIT, CONTROL),
+	DEVLINK_TRAP(IPV6_NEIGH_ADVERT, CONTROL),
+	DEVLINK_TRAP(IPV4_BFD, CONTROL),
+	DEVLINK_TRAP(IPV6_BFD, CONTROL),
+	DEVLINK_TRAP(IPV4_OSPF, CONTROL),
+	DEVLINK_TRAP(IPV6_OSPF, CONTROL),
+	DEVLINK_TRAP(IPV4_BGP, CONTROL),
+	DEVLINK_TRAP(IPV6_BGP, CONTROL),
+	DEVLINK_TRAP(IPV4_VRRP, CONTROL),
+	DEVLINK_TRAP(IPV6_VRRP, CONTROL),
+	DEVLINK_TRAP(IPV4_PIM, CONTROL),
+	DEVLINK_TRAP(IPV6_PIM, CONTROL),
+	DEVLINK_TRAP(UC_LB, CONTROL),
+	DEVLINK_TRAP(LOCAL_ROUTE, CONTROL),
+	DEVLINK_TRAP(EXTERNAL_ROUTE, CONTROL),
+	DEVLINK_TRAP(IPV6_UC_DIP_LINK_LOCAL_SCOPE, CONTROL),
+	DEVLINK_TRAP(IPV6_DIP_ALL_NODES, CONTROL),
+	DEVLINK_TRAP(IPV6_DIP_ALL_ROUTERS, CONTROL),
+	DEVLINK_TRAP(IPV6_ROUTER_SOLICIT, CONTROL),
+	DEVLINK_TRAP(IPV6_ROUTER_ADVERT, CONTROL),
+	DEVLINK_TRAP(IPV6_REDIRECT, CONTROL),
+	DEVLINK_TRAP(IPV4_ROUTER_ALERT, CONTROL),
+	DEVLINK_TRAP(IPV6_ROUTER_ALERT, CONTROL),
+	DEVLINK_TRAP(PTP_EVENT, CONTROL),
+	DEVLINK_TRAP(PTP_GENERAL, CONTROL),
+	DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL),
+	DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL),
+	DEVLINK_TRAP(EARLY_DROP, DROP),
+	DEVLINK_TRAP(VXLAN_PARSING, DROP),
+	DEVLINK_TRAP(LLC_SNAP_PARSING, DROP),
+	DEVLINK_TRAP(VLAN_PARSING, DROP),
+	DEVLINK_TRAP(PPPOE_PPP_PARSING, DROP),
+	DEVLINK_TRAP(MPLS_PARSING, DROP),
+	DEVLINK_TRAP(ARP_PARSING, DROP),
+	DEVLINK_TRAP(IP_1_PARSING, DROP),
+	DEVLINK_TRAP(IP_N_PARSING, DROP),
+	DEVLINK_TRAP(GRE_PARSING, DROP),
+	DEVLINK_TRAP(UDP_PARSING, DROP),
+	DEVLINK_TRAP(TCP_PARSING, DROP),
+	DEVLINK_TRAP(IPSEC_PARSING, DROP),
+	DEVLINK_TRAP(SCTP_PARSING, DROP),
+	DEVLINK_TRAP(DCCP_PARSING, DROP),
+	DEVLINK_TRAP(GTP_PARSING, DROP),
+	DEVLINK_TRAP(ESP_PARSING, DROP),
+	DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP),
+	DEVLINK_TRAP(DMAC_FILTER, DROP),
+	DEVLINK_TRAP(EAPOL, CONTROL),
+	DEVLINK_TRAP(LOCKED_PORT, DROP),
+};
+
+#define DEVLINK_TRAP_GROUP(_id)						      \
+	{								      \
+		.id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id,		      \
+		.name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id,		      \
+	}
+
+static const struct devlink_trap_group devlink_trap_group_generic[] = {
+	DEVLINK_TRAP_GROUP(L2_DROPS),
+	DEVLINK_TRAP_GROUP(L3_DROPS),
+	DEVLINK_TRAP_GROUP(L3_EXCEPTIONS),
+	DEVLINK_TRAP_GROUP(BUFFER_DROPS),
+	DEVLINK_TRAP_GROUP(TUNNEL_DROPS),
+	DEVLINK_TRAP_GROUP(ACL_DROPS),
+	DEVLINK_TRAP_GROUP(STP),
+	DEVLINK_TRAP_GROUP(LACP),
+	DEVLINK_TRAP_GROUP(LLDP),
+	DEVLINK_TRAP_GROUP(MC_SNOOPING),
+	DEVLINK_TRAP_GROUP(DHCP),
+	DEVLINK_TRAP_GROUP(NEIGH_DISCOVERY),
+	DEVLINK_TRAP_GROUP(BFD),
+	DEVLINK_TRAP_GROUP(OSPF),
+	DEVLINK_TRAP_GROUP(BGP),
+	DEVLINK_TRAP_GROUP(VRRP),
+	DEVLINK_TRAP_GROUP(PIM),
+	DEVLINK_TRAP_GROUP(UC_LB),
+	DEVLINK_TRAP_GROUP(LOCAL_DELIVERY),
+	DEVLINK_TRAP_GROUP(EXTERNAL_DELIVERY),
+	DEVLINK_TRAP_GROUP(IPV6),
+	DEVLINK_TRAP_GROUP(PTP_EVENT),
+	DEVLINK_TRAP_GROUP(PTP_GENERAL),
+	DEVLINK_TRAP_GROUP(ACL_SAMPLE),
+	DEVLINK_TRAP_GROUP(ACL_TRAP),
+	DEVLINK_TRAP_GROUP(PARSER_ERROR_DROPS),
+	DEVLINK_TRAP_GROUP(EAPOL),
+};
+
+static int devlink_trap_generic_verify(const struct devlink_trap *trap)
+{
+	if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX)
+		return -EINVAL;
+
+	if (strcmp(trap->name, devlink_trap_generic[trap->id].name))
+		return -EINVAL;
+
+	if (trap->type != devlink_trap_generic[trap->id].type)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int devlink_trap_driver_verify(const struct devlink_trap *trap)
+{
+	int i;
+
+	if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) {
+		if (!strcmp(trap->name, devlink_trap_generic[i].name))
+			return -EEXIST;
+	}
+
+	return 0;
+}
+
+static int devlink_trap_verify(const struct devlink_trap *trap)
+{
+	if (!trap || !trap->name)
+		return -EINVAL;
+
+	if (trap->generic)
+		return devlink_trap_generic_verify(trap);
+	else
+		return devlink_trap_driver_verify(trap);
+}
+
+static int
+devlink_trap_group_generic_verify(const struct devlink_trap_group *group)
+{
+	if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+		return -EINVAL;
+
+	if (strcmp(group->name, devlink_trap_group_generic[group->id].name))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+devlink_trap_group_driver_verify(const struct devlink_trap_group *group)
+{
+	int i;
+
+	if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) {
+		if (!strcmp(group->name, devlink_trap_group_generic[i].name))
+			return -EEXIST;
+	}
+
+	return 0;
+}
+
+static int devlink_trap_group_verify(const struct devlink_trap_group *group)
+{
+	if (group->generic)
+		return devlink_trap_group_generic_verify(group);
+	else
+		return devlink_trap_group_driver_verify(group);
+}
+
+static void
+devlink_trap_group_notify(struct devlink *devlink,
+			  const struct devlink_trap_group_item *group_item,
+			  enum devlink_command cmd)
+{
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
+		     cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
+	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0,
+					 0);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_trap_groups_notify_register(struct devlink *devlink)
+{
+	struct devlink_trap_group_item *group_item;
+
+	list_for_each_entry(group_item, &devlink->trap_group_list, list)
+		devlink_trap_group_notify(devlink, group_item,
+					  DEVLINK_CMD_TRAP_GROUP_NEW);
+}
+
+void devlink_trap_groups_notify_unregister(struct devlink *devlink)
+{
+	struct devlink_trap_group_item *group_item;
+
+	list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list)
+		devlink_trap_group_notify(devlink, group_item,
+					  DEVLINK_CMD_TRAP_GROUP_DEL);
+}
+
+static int
+devlink_trap_item_group_link(struct devlink *devlink,
+			     struct devlink_trap_item *trap_item)
+{
+	u16 group_id = trap_item->trap->init_group_id;
+	struct devlink_trap_group_item *group_item;
+
+	group_item = devlink_trap_group_item_lookup_by_id(devlink, group_id);
+	if (WARN_ON_ONCE(!group_item))
+		return -EINVAL;
+
+	trap_item->group_item = group_item;
+
+	return 0;
+}
+
+static void devlink_trap_notify(struct devlink *devlink,
+				const struct devlink_trap_item *trap_item,
+				enum devlink_command cmd)
+{
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
+		     cmd != DEVLINK_CMD_TRAP_DEL);
+	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_traps_notify_register(struct devlink *devlink)
+{
+	struct devlink_trap_item *trap_item;
+
+	list_for_each_entry(trap_item, &devlink->trap_list, list)
+		devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+}
+
+void devlink_traps_notify_unregister(struct devlink *devlink)
+{
+	struct devlink_trap_item *trap_item;
+
+	list_for_each_entry_reverse(trap_item, &devlink->trap_list, list)
+		devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+}
+
+static int
+devlink_trap_register(struct devlink *devlink,
+		      const struct devlink_trap *trap, void *priv)
+{
+	struct devlink_trap_item *trap_item;
+	int err;
+
+	if (devlink_trap_item_lookup(devlink, trap->name))
+		return -EEXIST;
+
+	trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL);
+	if (!trap_item)
+		return -ENOMEM;
+
+	trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+	if (!trap_item->stats) {
+		err = -ENOMEM;
+		goto err_stats_alloc;
+	}
+
+	trap_item->trap = trap;
+	trap_item->action = trap->init_action;
+	trap_item->priv = priv;
+
+	err = devlink_trap_item_group_link(devlink, trap_item);
+	if (err)
+		goto err_group_link;
+
+	err = devlink->ops->trap_init(devlink, trap, trap_item);
+	if (err)
+		goto err_trap_init;
+
+	list_add_tail(&trap_item->list, &devlink->trap_list);
+	devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+
+	return 0;
+
+err_trap_init:
+err_group_link:
+	free_percpu(trap_item->stats);
+err_stats_alloc:
+	kfree(trap_item);
+	return err;
+}
+
+static void devlink_trap_unregister(struct devlink *devlink,
+				    const struct devlink_trap *trap)
+{
+	struct devlink_trap_item *trap_item;
+
+	trap_item = devlink_trap_item_lookup(devlink, trap->name);
+	if (WARN_ON_ONCE(!trap_item))
+		return;
+
+	devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+	list_del(&trap_item->list);
+	if (devlink->ops->trap_fini)
+		devlink->ops->trap_fini(devlink, trap, trap_item);
+	free_percpu(trap_item->stats);
+	kfree(trap_item);
+}
+
+static void devlink_trap_disable(struct devlink *devlink,
+				 const struct devlink_trap *trap)
+{
+	struct devlink_trap_item *trap_item;
+
+	trap_item = devlink_trap_item_lookup(devlink, trap->name);
+	if (WARN_ON_ONCE(!trap_item))
+		return;
+
+	devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP,
+				      NULL);
+	trap_item->action = DEVLINK_TRAP_ACTION_DROP;
+}
+
+/**
+ * devl_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devl_traps_register(struct devlink *devlink,
+			const struct devlink_trap *traps,
+			size_t traps_count, void *priv)
+{
+	int i, err;
+
+	if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
+		return -EINVAL;
+
+	devl_assert_locked(devlink);
+	for (i = 0; i < traps_count; i++) {
+		const struct devlink_trap *trap = &traps[i];
+
+		err = devlink_trap_verify(trap);
+		if (err)
+			goto err_trap_verify;
+
+		err = devlink_trap_register(devlink, trap, priv);
+		if (err)
+			goto err_trap_register;
+	}
+
+	return 0;
+
+err_trap_register:
+err_trap_verify:
+	for (i--; i >= 0; i--)
+		devlink_trap_unregister(devlink, &traps[i]);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devl_traps_register);
+
+/**
+ * devlink_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_traps_register(struct devlink *devlink,
+			   const struct devlink_trap *traps,
+			   size_t traps_count, void *priv)
+{
+	int err;
+
+	devl_lock(devlink);
+	err = devl_traps_register(devlink, traps, traps_count, priv);
+	devl_unlock(devlink);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_traps_register);
+
+/**
+ * devl_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ */
+void devl_traps_unregister(struct devlink *devlink,
+			   const struct devlink_trap *traps,
+			   size_t traps_count)
+{
+	int i;
+
+	devl_assert_locked(devlink);
+	/* Make sure we do not have any packets in-flight while unregistering
+	 * traps by disabling all of them and waiting for a grace period.
+	 */
+	for (i = traps_count - 1; i >= 0; i--)
+		devlink_trap_disable(devlink, &traps[i]);
+	synchronize_rcu();
+	for (i = traps_count - 1; i >= 0; i--)
+		devlink_trap_unregister(devlink, &traps[i]);
+}
+EXPORT_SYMBOL_GPL(devl_traps_unregister);
+
+/**
+ * devlink_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_traps_unregister(struct devlink *devlink,
+			      const struct devlink_trap *traps,
+			      size_t traps_count)
+{
+	devl_lock(devlink);
+	devl_traps_unregister(devlink, traps, traps_count);
+	devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_traps_unregister);
+
+static void
+devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
+			  size_t skb_len)
+{
+	struct devlink_stats *stats;
+
+	stats = this_cpu_ptr(trap_stats);
+	u64_stats_update_begin(&stats->syncp);
+	u64_stats_add(&stats->rx_bytes, skb_len);
+	u64_stats_inc(&stats->rx_packets);
+	u64_stats_update_end(&stats->syncp);
+}
+
+static void
+devlink_trap_report_metadata_set(struct devlink_trap_metadata *metadata,
+				 const struct devlink_trap_item *trap_item,
+				 struct devlink_port *in_devlink_port,
+				 const struct flow_action_cookie *fa_cookie)
+{
+	metadata->trap_name = trap_item->trap->name;
+	metadata->trap_group_name = trap_item->group_item->group->name;
+	metadata->fa_cookie = fa_cookie;
+	metadata->trap_type = trap_item->trap->type;
+
+	spin_lock(&in_devlink_port->type_lock);
+	if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
+		metadata->input_dev = in_devlink_port->type_eth.netdev;
+	spin_unlock(&in_devlink_port->type_lock);
+}
+
+/**
+ * devlink_trap_report - Report trapped packet to drop monitor.
+ * @devlink: devlink.
+ * @skb: Trapped packet.
+ * @trap_ctx: Trap context.
+ * @in_devlink_port: Input devlink port.
+ * @fa_cookie: Flow action cookie. Could be NULL.
+ */
+void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
+			 void *trap_ctx, struct devlink_port *in_devlink_port,
+			 const struct flow_action_cookie *fa_cookie)
+
+{
+	struct devlink_trap_item *trap_item = trap_ctx;
+
+	devlink_trap_stats_update(trap_item->stats, skb->len);
+	devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
+
+	if (tracepoint_enabled(devlink_trap_report)) {
+		struct devlink_trap_metadata metadata = {};
+
+		devlink_trap_report_metadata_set(&metadata, trap_item,
+						 in_devlink_port, fa_cookie);
+		trace_devlink_trap_report(devlink, skb, &metadata);
+	}
+}
+EXPORT_SYMBOL_GPL(devlink_trap_report);
+
+/**
+ * devlink_trap_ctx_priv - Trap context to driver private information.
+ * @trap_ctx: Trap context.
+ *
+ * Return: Driver private information passed during registration.
+ */
+void *devlink_trap_ctx_priv(void *trap_ctx)
+{
+	struct devlink_trap_item *trap_item = trap_ctx;
+
+	return trap_item->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv);
+
+static int
+devlink_trap_group_item_policer_link(struct devlink *devlink,
+				     struct devlink_trap_group_item *group_item)
+{
+	u32 policer_id = group_item->group->init_policer_id;
+	struct devlink_trap_policer_item *policer_item;
+
+	if (policer_id == 0)
+		return 0;
+
+	policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
+	if (WARN_ON_ONCE(!policer_item))
+		return -EINVAL;
+
+	group_item->policer_item = policer_item;
+
+	return 0;
+}
+
+static int
+devlink_trap_group_register(struct devlink *devlink,
+			    const struct devlink_trap_group *group)
+{
+	struct devlink_trap_group_item *group_item;
+	int err;
+
+	if (devlink_trap_group_item_lookup(devlink, group->name))
+		return -EEXIST;
+
+	group_item = kzalloc(sizeof(*group_item), GFP_KERNEL);
+	if (!group_item)
+		return -ENOMEM;
+
+	group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+	if (!group_item->stats) {
+		err = -ENOMEM;
+		goto err_stats_alloc;
+	}
+
+	group_item->group = group;
+
+	err = devlink_trap_group_item_policer_link(devlink, group_item);
+	if (err)
+		goto err_policer_link;
+
+	if (devlink->ops->trap_group_init) {
+		err = devlink->ops->trap_group_init(devlink, group);
+		if (err)
+			goto err_group_init;
+	}
+
+	list_add_tail(&group_item->list, &devlink->trap_group_list);
+	devlink_trap_group_notify(devlink, group_item,
+				  DEVLINK_CMD_TRAP_GROUP_NEW);
+
+	return 0;
+
+err_group_init:
+err_policer_link:
+	free_percpu(group_item->stats);
+err_stats_alloc:
+	kfree(group_item);
+	return err;
+}
+
+static void
+devlink_trap_group_unregister(struct devlink *devlink,
+			      const struct devlink_trap_group *group)
+{
+	struct devlink_trap_group_item *group_item;
+
+	group_item = devlink_trap_group_item_lookup(devlink, group->name);
+	if (WARN_ON_ONCE(!group_item))
+		return;
+
+	devlink_trap_group_notify(devlink, group_item,
+				  DEVLINK_CMD_TRAP_GROUP_DEL);
+	list_del(&group_item->list);
+	free_percpu(group_item->stats);
+	kfree(group_item);
+}
+
+/**
+ * devl_trap_groups_register - Register packet trap groups with devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devl_trap_groups_register(struct devlink *devlink,
+			      const struct devlink_trap_group *groups,
+			      size_t groups_count)
+{
+	int i, err;
+
+	devl_assert_locked(devlink);
+	for (i = 0; i < groups_count; i++) {
+		const struct devlink_trap_group *group = &groups[i];
+
+		err = devlink_trap_group_verify(group);
+		if (err)
+			goto err_trap_group_verify;
+
+		err = devlink_trap_group_register(devlink, group);
+		if (err)
+			goto err_trap_group_register;
+	}
+
+	return 0;
+
+err_trap_group_register:
+err_trap_group_verify:
+	for (i--; i >= 0; i--)
+		devlink_trap_group_unregister(devlink, &groups[i]);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devl_trap_groups_register);
+
+/**
+ * devlink_trap_groups_register - Register packet trap groups with devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_trap_groups_register(struct devlink *devlink,
+				 const struct devlink_trap_group *groups,
+				 size_t groups_count)
+{
+	int err;
+
+	devl_lock(devlink);
+	err = devl_trap_groups_register(devlink, groups, groups_count);
+	devl_unlock(devlink);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_trap_groups_register);
+
+/**
+ * devl_trap_groups_unregister - Unregister packet trap groups from devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ */
+void devl_trap_groups_unregister(struct devlink *devlink,
+				 const struct devlink_trap_group *groups,
+				 size_t groups_count)
+{
+	int i;
+
+	devl_assert_locked(devlink);
+	for (i = groups_count - 1; i >= 0; i--)
+		devlink_trap_group_unregister(devlink, &groups[i]);
+}
+EXPORT_SYMBOL_GPL(devl_trap_groups_unregister);
+
+/**
+ * devlink_trap_groups_unregister - Unregister packet trap groups from devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_trap_groups_unregister(struct devlink *devlink,
+				    const struct devlink_trap_group *groups,
+				    size_t groups_count)
+{
+	devl_lock(devlink);
+	devl_trap_groups_unregister(devlink, groups, groups_count);
+	devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister);
+
+static void
+devlink_trap_policer_notify(struct devlink *devlink,
+			    const struct devlink_trap_policer_item *policer_item,
+			    enum devlink_command cmd)
+{
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
+		     cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
+	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	err = devlink_nl_trap_policer_fill(msg, devlink, policer_item, cmd, 0,
+					   0, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_trap_policers_notify_register(struct devlink *devlink)
+{
+	struct devlink_trap_policer_item *policer_item;
+
+	list_for_each_entry(policer_item, &devlink->trap_policer_list, list)
+		devlink_trap_policer_notify(devlink, policer_item,
+					    DEVLINK_CMD_TRAP_POLICER_NEW);
+}
+
+void devlink_trap_policers_notify_unregister(struct devlink *devlink)
+{
+	struct devlink_trap_policer_item *policer_item;
+
+	list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list,
+				    list)
+		devlink_trap_policer_notify(devlink, policer_item,
+					    DEVLINK_CMD_TRAP_POLICER_DEL);
+}
+
+static int
+devlink_trap_policer_register(struct devlink *devlink,
+			      const struct devlink_trap_policer *policer)
+{
+	struct devlink_trap_policer_item *policer_item;
+	int err;
+
+	if (devlink_trap_policer_item_lookup(devlink, policer->id))
+		return -EEXIST;
+
+	policer_item = kzalloc(sizeof(*policer_item), GFP_KERNEL);
+	if (!policer_item)
+		return -ENOMEM;
+
+	policer_item->policer = policer;
+	policer_item->rate = policer->init_rate;
+	policer_item->burst = policer->init_burst;
+
+	if (devlink->ops->trap_policer_init) {
+		err = devlink->ops->trap_policer_init(devlink, policer);
+		if (err)
+			goto err_policer_init;
+	}
+
+	list_add_tail(&policer_item->list, &devlink->trap_policer_list);
+	devlink_trap_policer_notify(devlink, policer_item,
+				    DEVLINK_CMD_TRAP_POLICER_NEW);
+
+	return 0;
+
+err_policer_init:
+	kfree(policer_item);
+	return err;
+}
+
+static void
+devlink_trap_policer_unregister(struct devlink *devlink,
+				const struct devlink_trap_policer *policer)
+{
+	struct devlink_trap_policer_item *policer_item;
+
+	policer_item = devlink_trap_policer_item_lookup(devlink, policer->id);
+	if (WARN_ON_ONCE(!policer_item))
+		return;
+
+	devlink_trap_policer_notify(devlink, policer_item,
+				    DEVLINK_CMD_TRAP_POLICER_DEL);
+	list_del(&policer_item->list);
+	if (devlink->ops->trap_policer_fini)
+		devlink->ops->trap_policer_fini(devlink, policer);
+	kfree(policer_item);
+}
+
+/**
+ * devl_trap_policers_register - Register packet trap policers with devlink.
+ * @devlink: devlink.
+ * @policers: Packet trap policers.
+ * @policers_count: Count of provided packet trap policers.
+ *
+ * Return: Non-zero value on failure.
+ */
+int
+devl_trap_policers_register(struct devlink *devlink,
+			    const struct devlink_trap_policer *policers,
+			    size_t policers_count)
+{
+	int i, err;
+
+	devl_assert_locked(devlink);
+	for (i = 0; i < policers_count; i++) {
+		const struct devlink_trap_policer *policer = &policers[i];
+
+		if (WARN_ON(policer->id == 0 ||
+			    policer->max_rate < policer->min_rate ||
+			    policer->max_burst < policer->min_burst)) {
+			err = -EINVAL;
+			goto err_trap_policer_verify;
+		}
+
+		err = devlink_trap_policer_register(devlink, policer);
+		if (err)
+			goto err_trap_policer_register;
+	}
+	return 0;
+
+err_trap_policer_register:
+err_trap_policer_verify:
+	for (i--; i >= 0; i--)
+		devlink_trap_policer_unregister(devlink, &policers[i]);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devl_trap_policers_register);
+
+/**
+ * devl_trap_policers_unregister - Unregister packet trap policers from devlink.
+ * @devlink: devlink.
+ * @policers: Packet trap policers.
+ * @policers_count: Count of provided packet trap policers.
+ */
+void
+devl_trap_policers_unregister(struct devlink *devlink,
+			      const struct devlink_trap_policer *policers,
+			      size_t policers_count)
+{
+	int i;
+
+	devl_assert_locked(devlink);
+	for (i = policers_count - 1; i >= 0; i--)
+		devlink_trap_policer_unregister(devlink, &policers[i]);
+}
+EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);

From 7cc7194e85ca01185f9d123e68189ef0a40f0c6a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:53 +0200
Subject: [PATCH 2007/2047] devlink: push rate related code into separate file

Cut out another chunk from leftover.c and put rate related code
into a separate file.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-12-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/Makefile        |   2 +-
 net/devlink/devl_internal.h |   5 +
 net/devlink/leftover.c      | 718 -----------------------------------
 net/devlink/rate.c          | 722 ++++++++++++++++++++++++++++++++++++
 4 files changed, 728 insertions(+), 719 deletions(-)
 create mode 100644 net/devlink/rate.c

diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index 523efffe522c..f61aa97688f5 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
-	 resource.o param.o region.o health.o trap.o
+	 resource.o param.o region.o health.o trap.o rate.o
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 148525aa2847..8b530f15c439 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -166,6 +166,8 @@ void devlink_trap_groups_notify_register(struct devlink *devlink);
 void devlink_trap_groups_notify_unregister(struct devlink *devlink);
 void devlink_traps_notify_register(struct devlink *devlink);
 void devlink_traps_notify_unregister(struct devlink *devlink);
+void devlink_rates_notify_register(struct devlink *devlink);
+void devlink_rates_notify_unregister(struct devlink *devlink);
 
 /* Ports */
 #define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port)				\
@@ -278,3 +280,6 @@ int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
 				       struct genl_info *info);
 int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
 					 struct genl_info *info);
+int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 5b153ce097ab..13958c3da59a 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -37,80 +37,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
 
-static inline bool
-devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
-{
-	return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
-}
-
-static inline bool
-devlink_rate_is_node(struct devlink_rate *devlink_rate)
-{
-	return devlink_rate->type == DEVLINK_RATE_TYPE_NODE;
-}
-
-static struct devlink_rate *
-devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
-{
-	struct devlink_rate *devlink_rate;
-	struct devlink_port *devlink_port;
-
-	devlink_port = devlink_port_get_from_attrs(devlink, info->attrs);
-	if (IS_ERR(devlink_port))
-		return ERR_CAST(devlink_port);
-	devlink_rate = devlink_port->devlink_rate;
-	return devlink_rate ?: ERR_PTR(-ENODEV);
-}
-
-static struct devlink_rate *
-devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name)
-{
-	static struct devlink_rate *devlink_rate;
-
-	list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
-		if (devlink_rate_is_node(devlink_rate) &&
-		    !strcmp(node_name, devlink_rate->name))
-			return devlink_rate;
-	}
-	return ERR_PTR(-ENODEV);
-}
-
-static struct devlink_rate *
-devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
-{
-	const char *rate_node_name;
-	size_t len;
-
-	if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME])
-		return ERR_PTR(-EINVAL);
-	rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]);
-	len = strlen(rate_node_name);
-	/* Name cannot be empty or decimal number */
-	if (!len || strspn(rate_node_name, "0123456789") == len)
-		return ERR_PTR(-EINVAL);
-
-	return devlink_rate_node_get_by_name(devlink, rate_node_name);
-}
-
-static struct devlink_rate *
-devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
-{
-	return devlink_rate_node_get_from_attrs(devlink, info->attrs);
-}
-
-static struct devlink_rate *
-devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
-{
-	struct nlattr **attrs = info->attrs;
-
-	if (attrs[DEVLINK_ATTR_PORT_INDEX])
-		return devlink_rate_leaf_get_from_info(devlink, info);
-	else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME])
-		return devlink_rate_node_get_from_info(devlink, info);
-	else
-		return ERR_PTR(-EINVAL);
-}
-
 static struct devlink_linecard *
 devlink_linecard_get_by_index(struct devlink *devlink,
 			      unsigned int linecard_index)
@@ -169,491 +95,6 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int devlink_nl_rate_fill(struct sk_buff *msg,
-				struct devlink_rate *devlink_rate,
-				enum devlink_command cmd, u32 portid, u32 seq,
-				int flags, struct netlink_ext_ack *extack)
-{
-	struct devlink *devlink = devlink_rate->devlink;
-	void *hdr;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-
-	if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type))
-		goto nla_put_failure;
-
-	if (devlink_rate_is_leaf(devlink_rate)) {
-		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
-				devlink_rate->devlink_port->index))
-			goto nla_put_failure;
-	} else if (devlink_rate_is_node(devlink_rate)) {
-		if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME,
-				   devlink_rate->name))
-			goto nla_put_failure;
-	}
-
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
-			      devlink_rate->tx_share, DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX,
-			      devlink_rate->tx_max, DEVLINK_ATTR_PAD))
-		goto nla_put_failure;
-
-	if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY,
-			devlink_rate->tx_priority))
-		goto nla_put_failure;
-
-	if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_WEIGHT,
-			devlink_rate->tx_weight))
-		goto nla_put_failure;
-
-	if (devlink_rate->parent)
-		if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME,
-				   devlink_rate->parent->name))
-			goto nla_put_failure;
-
-	genlmsg_end(msg, hdr);
-	return 0;
-
-nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
-}
-
-static void devlink_rate_notify(struct devlink_rate *devlink_rate,
-				enum devlink_command cmd)
-{
-	struct devlink *devlink = devlink_rate->devlink;
-	struct sk_buff *msg;
-	int err;
-
-	WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
-
-	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
-		return;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return;
-
-	err = devlink_nl_rate_fill(msg, devlink_rate, cmd, 0, 0, 0, NULL);
-	if (err) {
-		nlmsg_free(msg);
-		return;
-	}
-
-	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
-				0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static void devlink_rates_notify_register(struct devlink *devlink)
-{
-	struct devlink_rate *rate_node;
-
-	list_for_each_entry(rate_node, &devlink->rate_list, list)
-		devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
-}
-
-static void devlink_rates_notify_unregister(struct devlink *devlink)
-{
-	struct devlink_rate *rate_node;
-
-	list_for_each_entry_reverse(rate_node, &devlink->rate_list, list)
-		devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
-}
-
-static int
-devlink_nl_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
-			     struct netlink_callback *cb, int flags)
-{
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct devlink_rate *devlink_rate;
-	int idx = 0;
-	int err = 0;
-
-	list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
-		enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
-		u32 id = NETLINK_CB(cb->skb).portid;
-
-		if (idx < state->idx) {
-			idx++;
-			continue;
-		}
-		err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
-					   cb->nlh->nlmsg_seq, flags, NULL);
-		if (err) {
-			state->idx = idx;
-			break;
-		}
-		idx++;
-	}
-
-	return err;
-}
-
-int devlink_nl_rate_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	return devlink_nl_dumpit(skb, cb, devlink_nl_rate_get_dump_one);
-}
-
-int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_rate *devlink_rate;
-	struct sk_buff *msg;
-	int err;
-
-	devlink_rate = devlink_rate_get_from_info(devlink, info);
-	if (IS_ERR(devlink_rate))
-		return PTR_ERR(devlink_rate);
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW,
-				   info->snd_portid, info->snd_seq, 0,
-				   info->extack);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
-
-	return genlmsg_reply(msg, info);
-}
-
-static bool
-devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
-			    struct devlink_rate *parent)
-{
-	while (parent) {
-		if (parent == devlink_rate)
-			return true;
-		parent = parent->parent;
-	}
-	return false;
-}
-
-static int
-devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
-				struct genl_info *info,
-				struct nlattr *nla_parent)
-{
-	struct devlink *devlink = devlink_rate->devlink;
-	const char *parent_name = nla_data(nla_parent);
-	const struct devlink_ops *ops = devlink->ops;
-	size_t len = strlen(parent_name);
-	struct devlink_rate *parent;
-	int err = -EOPNOTSUPP;
-
-	parent = devlink_rate->parent;
-
-	if (parent && !len) {
-		if (devlink_rate_is_leaf(devlink_rate))
-			err = ops->rate_leaf_parent_set(devlink_rate, NULL,
-							devlink_rate->priv, NULL,
-							info->extack);
-		else if (devlink_rate_is_node(devlink_rate))
-			err = ops->rate_node_parent_set(devlink_rate, NULL,
-							devlink_rate->priv, NULL,
-							info->extack);
-		if (err)
-			return err;
-
-		refcount_dec(&parent->refcnt);
-		devlink_rate->parent = NULL;
-	} else if (len) {
-		parent = devlink_rate_node_get_by_name(devlink, parent_name);
-		if (IS_ERR(parent))
-			return -ENODEV;
-
-		if (parent == devlink_rate) {
-			NL_SET_ERR_MSG(info->extack, "Parent to self is not allowed");
-			return -EINVAL;
-		}
-
-		if (devlink_rate_is_node(devlink_rate) &&
-		    devlink_rate_is_parent_node(devlink_rate, parent->parent)) {
-			NL_SET_ERR_MSG(info->extack, "Node is already a parent of parent node.");
-			return -EEXIST;
-		}
-
-		if (devlink_rate_is_leaf(devlink_rate))
-			err = ops->rate_leaf_parent_set(devlink_rate, parent,
-							devlink_rate->priv, parent->priv,
-							info->extack);
-		else if (devlink_rate_is_node(devlink_rate))
-			err = ops->rate_node_parent_set(devlink_rate, parent,
-							devlink_rate->priv, parent->priv,
-							info->extack);
-		if (err)
-			return err;
-
-		if (devlink_rate->parent)
-			/* we're reassigning to other parent in this case */
-			refcount_dec(&devlink_rate->parent->refcnt);
-
-		refcount_inc(&parent->refcnt);
-		devlink_rate->parent = parent;
-	}
-
-	return 0;
-}
-
-static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
-			       const struct devlink_ops *ops,
-			       struct genl_info *info)
-{
-	struct nlattr *nla_parent, **attrs = info->attrs;
-	int err = -EOPNOTSUPP;
-	u32 priority;
-	u32 weight;
-	u64 rate;
-
-	if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) {
-		rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]);
-		if (devlink_rate_is_leaf(devlink_rate))
-			err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv,
-							  rate, info->extack);
-		else if (devlink_rate_is_node(devlink_rate))
-			err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv,
-							  rate, info->extack);
-		if (err)
-			return err;
-		devlink_rate->tx_share = rate;
-	}
-
-	if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) {
-		rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]);
-		if (devlink_rate_is_leaf(devlink_rate))
-			err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv,
-							rate, info->extack);
-		else if (devlink_rate_is_node(devlink_rate))
-			err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv,
-							rate, info->extack);
-		if (err)
-			return err;
-		devlink_rate->tx_max = rate;
-	}
-
-	if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]) {
-		priority = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]);
-		if (devlink_rate_is_leaf(devlink_rate))
-			err = ops->rate_leaf_tx_priority_set(devlink_rate, devlink_rate->priv,
-							     priority, info->extack);
-		else if (devlink_rate_is_node(devlink_rate))
-			err = ops->rate_node_tx_priority_set(devlink_rate, devlink_rate->priv,
-							     priority, info->extack);
-
-		if (err)
-			return err;
-		devlink_rate->tx_priority = priority;
-	}
-
-	if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]) {
-		weight = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]);
-		if (devlink_rate_is_leaf(devlink_rate))
-			err = ops->rate_leaf_tx_weight_set(devlink_rate, devlink_rate->priv,
-							   weight, info->extack);
-		else if (devlink_rate_is_node(devlink_rate))
-			err = ops->rate_node_tx_weight_set(devlink_rate, devlink_rate->priv,
-							   weight, info->extack);
-
-		if (err)
-			return err;
-		devlink_rate->tx_weight = weight;
-	}
-
-	nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME];
-	if (nla_parent) {
-		err = devlink_nl_rate_parent_node_set(devlink_rate, info,
-						      nla_parent);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
-					   struct genl_info *info,
-					   enum devlink_rate_type type)
-{
-	struct nlattr **attrs = info->attrs;
-
-	if (type == DEVLINK_RATE_TYPE_LEAF) {
-		if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) {
-			NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the leafs");
-			return false;
-		}
-		if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) {
-			NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the leafs");
-			return false;
-		}
-		if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
-		    !ops->rate_leaf_parent_set) {
-			NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the leafs");
-			return false;
-		}
-		if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_leaf_tx_priority_set) {
-			NL_SET_ERR_MSG_ATTR(info->extack,
-					    attrs[DEVLINK_ATTR_RATE_TX_PRIORITY],
-					    "TX priority set isn't supported for the leafs");
-			return false;
-		}
-		if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_leaf_tx_weight_set) {
-			NL_SET_ERR_MSG_ATTR(info->extack,
-					    attrs[DEVLINK_ATTR_RATE_TX_WEIGHT],
-					    "TX weight set isn't supported for the leafs");
-			return false;
-		}
-	} else if (type == DEVLINK_RATE_TYPE_NODE) {
-		if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
-			NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes");
-			return false;
-		}
-		if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) {
-			NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the nodes");
-			return false;
-		}
-		if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
-		    !ops->rate_node_parent_set) {
-			NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the nodes");
-			return false;
-		}
-		if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_node_tx_priority_set) {
-			NL_SET_ERR_MSG_ATTR(info->extack,
-					    attrs[DEVLINK_ATTR_RATE_TX_PRIORITY],
-					    "TX priority set isn't supported for the nodes");
-			return false;
-		}
-		if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_node_tx_weight_set) {
-			NL_SET_ERR_MSG_ATTR(info->extack,
-					    attrs[DEVLINK_ATTR_RATE_TX_WEIGHT],
-					    "TX weight set isn't supported for the nodes");
-			return false;
-		}
-	} else {
-		WARN(1, "Unknown type of rate object");
-		return false;
-	}
-
-	return true;
-}
-
-static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb,
-					struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_rate *devlink_rate;
-	const struct devlink_ops *ops;
-	int err;
-
-	devlink_rate = devlink_rate_get_from_info(devlink, info);
-	if (IS_ERR(devlink_rate))
-		return PTR_ERR(devlink_rate);
-
-	ops = devlink->ops;
-	if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
-		return -EOPNOTSUPP;
-
-	err = devlink_nl_rate_set(devlink_rate, ops, info);
-
-	if (!err)
-		devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
-	return err;
-}
-
-static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb,
-					struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_rate *rate_node;
-	const struct devlink_ops *ops;
-	int err;
-
-	ops = devlink->ops;
-	if (!ops || !ops->rate_node_new || !ops->rate_node_del) {
-		NL_SET_ERR_MSG(info->extack, "Rate nodes aren't supported");
-		return -EOPNOTSUPP;
-	}
-
-	if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE))
-		return -EOPNOTSUPP;
-
-	rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs);
-	if (!IS_ERR(rate_node))
-		return -EEXIST;
-	else if (rate_node == ERR_PTR(-EINVAL))
-		return -EINVAL;
-
-	rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
-	if (!rate_node)
-		return -ENOMEM;
-
-	rate_node->devlink = devlink;
-	rate_node->type = DEVLINK_RATE_TYPE_NODE;
-	rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL);
-	if (!rate_node->name) {
-		err = -ENOMEM;
-		goto err_strdup;
-	}
-
-	err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack);
-	if (err)
-		goto err_node_new;
-
-	err = devlink_nl_rate_set(rate_node, ops, info);
-	if (err)
-		goto err_rate_set;
-
-	refcount_set(&rate_node->refcnt, 1);
-	list_add(&rate_node->list, &devlink->rate_list);
-	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
-	return 0;
-
-err_rate_set:
-	ops->rate_node_del(rate_node, rate_node->priv, info->extack);
-err_node_new:
-	kfree(rate_node->name);
-err_strdup:
-	kfree(rate_node);
-	return err;
-}
-
-static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
-					struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_rate *rate_node;
-	int err;
-
-	rate_node = devlink_rate_node_get_from_info(devlink, info);
-	if (IS_ERR(rate_node))
-		return PTR_ERR(rate_node);
-
-	if (refcount_read(&rate_node->refcnt) > 1) {
-		NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node.");
-		return -EBUSY;
-	}
-
-	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
-	err = devlink->ops->rate_node_del(rate_node, rate_node->priv,
-					  info->extack);
-	if (rate_node->parent)
-		refcount_dec(&rate_node->parent->refcnt);
-	list_del(&rate_node->list);
-	kfree(rate_node->name);
-	kfree(rate_node);
-	return err;
-}
-
 struct devlink_linecard_type {
 	const char *type;
 	const void *priv;
@@ -986,19 +427,6 @@ static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
 	return 0;
 }
 
-int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
-			     struct netlink_ext_ack *extack)
-{
-	struct devlink_rate *devlink_rate;
-
-	list_for_each_entry(devlink_rate, &devlink->rate_list, list)
-		if (devlink_rate_is_node(devlink_rate)) {
-			NL_SET_ERR_MSG(extack, "Rate node(s) exists.");
-			return -EBUSY;
-		}
-	return 0;
-}
-
 const struct genl_small_ops devlink_nl_small_ops[40] = {
 	{
 		.cmd = DEVLINK_CMD_PORT_SET,
@@ -1276,152 +704,6 @@ void devlink_notify_unregister(struct devlink *devlink)
 	devlink_notify(devlink, DEVLINK_CMD_DEL);
 }
 
-/**
- * devl_rate_node_create - create devlink rate node
- * @devlink: devlink instance
- * @priv: driver private data
- * @node_name: name of the resulting node
- * @parent: parent devlink_rate struct
- *
- * Create devlink rate object of type node
- */
-struct devlink_rate *
-devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
-		      struct devlink_rate *parent)
-{
-	struct devlink_rate *rate_node;
-
-	rate_node = devlink_rate_node_get_by_name(devlink, node_name);
-	if (!IS_ERR(rate_node))
-		return ERR_PTR(-EEXIST);
-
-	rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
-	if (!rate_node)
-		return ERR_PTR(-ENOMEM);
-
-	if (parent) {
-		rate_node->parent = parent;
-		refcount_inc(&rate_node->parent->refcnt);
-	}
-
-	rate_node->type = DEVLINK_RATE_TYPE_NODE;
-	rate_node->devlink = devlink;
-	rate_node->priv = priv;
-
-	rate_node->name = kstrdup(node_name, GFP_KERNEL);
-	if (!rate_node->name) {
-		kfree(rate_node);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	refcount_set(&rate_node->refcnt, 1);
-	list_add(&rate_node->list, &devlink->rate_list);
-	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
-	return rate_node;
-}
-EXPORT_SYMBOL_GPL(devl_rate_node_create);
-
-/**
- * devl_rate_leaf_create - create devlink rate leaf
- * @devlink_port: devlink port object to create rate object on
- * @priv: driver private data
- * @parent: parent devlink_rate struct
- *
- * Create devlink rate object of type leaf on provided @devlink_port.
- */
-int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
-			  struct devlink_rate *parent)
-{
-	struct devlink *devlink = devlink_port->devlink;
-	struct devlink_rate *devlink_rate;
-
-	devl_assert_locked(devlink_port->devlink);
-
-	if (WARN_ON(devlink_port->devlink_rate))
-		return -EBUSY;
-
-	devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
-	if (!devlink_rate)
-		return -ENOMEM;
-
-	if (parent) {
-		devlink_rate->parent = parent;
-		refcount_inc(&devlink_rate->parent->refcnt);
-	}
-
-	devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
-	devlink_rate->devlink = devlink;
-	devlink_rate->devlink_port = devlink_port;
-	devlink_rate->priv = priv;
-	list_add_tail(&devlink_rate->list, &devlink->rate_list);
-	devlink_port->devlink_rate = devlink_rate;
-	devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devl_rate_leaf_create);
-
-/**
- * devl_rate_leaf_destroy - destroy devlink rate leaf
- *
- * @devlink_port: devlink port linked to the rate object
- *
- * Destroy the devlink rate object of type leaf on provided @devlink_port.
- */
-void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
-{
-	struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
-
-	devl_assert_locked(devlink_port->devlink);
-	if (!devlink_rate)
-		return;
-
-	devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
-	if (devlink_rate->parent)
-		refcount_dec(&devlink_rate->parent->refcnt);
-	list_del(&devlink_rate->list);
-	devlink_port->devlink_rate = NULL;
-	kfree(devlink_rate);
-}
-EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
-
-/**
- * devl_rate_nodes_destroy - destroy all devlink rate nodes on device
- * @devlink: devlink instance
- *
- * Unset parent for all rate objects and destroy all rate nodes
- * on specified device.
- */
-void devl_rate_nodes_destroy(struct devlink *devlink)
-{
-	static struct devlink_rate *devlink_rate, *tmp;
-	const struct devlink_ops *ops = devlink->ops;
-
-	devl_assert_locked(devlink);
-
-	list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
-		if (!devlink_rate->parent)
-			continue;
-
-		refcount_dec(&devlink_rate->parent->refcnt);
-		if (devlink_rate_is_leaf(devlink_rate))
-			ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv,
-						  NULL, NULL);
-		else if (devlink_rate_is_node(devlink_rate))
-			ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv,
-						  NULL, NULL);
-	}
-	list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) {
-		if (devlink_rate_is_node(devlink_rate)) {
-			ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL);
-			list_del(&devlink_rate->list);
-			kfree(devlink_rate->name);
-			kfree(devlink_rate);
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);
-
 static int devlink_linecard_types_init(struct devlink_linecard *linecard)
 {
 	struct devlink_linecard_type *linecard_type;
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
new file mode 100644
index 000000000000..dff1593b8406
--- /dev/null
+++ b/net/devlink/rate.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+static inline bool
+devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
+{
+	return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
+}
+
+static inline bool
+devlink_rate_is_node(struct devlink_rate *devlink_rate)
+{
+	return devlink_rate->type == DEVLINK_RATE_TYPE_NODE;
+}
+
+static struct devlink_rate *
+devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+	struct devlink_rate *devlink_rate;
+	struct devlink_port *devlink_port;
+
+	devlink_port = devlink_port_get_from_attrs(devlink, info->attrs);
+	if (IS_ERR(devlink_port))
+		return ERR_CAST(devlink_port);
+	devlink_rate = devlink_port->devlink_rate;
+	return devlink_rate ?: ERR_PTR(-ENODEV);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name)
+{
+	static struct devlink_rate *devlink_rate;
+
+	list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+		if (devlink_rate_is_node(devlink_rate) &&
+		    !strcmp(node_name, devlink_rate->name))
+			return devlink_rate;
+	}
+	return ERR_PTR(-ENODEV);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
+{
+	const char *rate_node_name;
+	size_t len;
+
+	if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME])
+		return ERR_PTR(-EINVAL);
+	rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]);
+	len = strlen(rate_node_name);
+	/* Name cannot be empty or decimal number */
+	if (!len || strspn(rate_node_name, "0123456789") == len)
+		return ERR_PTR(-EINVAL);
+
+	return devlink_rate_node_get_by_name(devlink, rate_node_name);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+	return devlink_rate_node_get_from_attrs(devlink, info->attrs);
+}
+
+static struct devlink_rate *
+devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+	struct nlattr **attrs = info->attrs;
+
+	if (attrs[DEVLINK_ATTR_PORT_INDEX])
+		return devlink_rate_leaf_get_from_info(devlink, info);
+	else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME])
+		return devlink_rate_node_get_from_info(devlink, info);
+	else
+		return ERR_PTR(-EINVAL);
+}
+
+static int devlink_nl_rate_fill(struct sk_buff *msg,
+				struct devlink_rate *devlink_rate,
+				enum devlink_command cmd, u32 portid, u32 seq,
+				int flags, struct netlink_ext_ack *extack)
+{
+	struct devlink *devlink = devlink_rate->devlink;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type))
+		goto nla_put_failure;
+
+	if (devlink_rate_is_leaf(devlink_rate)) {
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+				devlink_rate->devlink_port->index))
+			goto nla_put_failure;
+	} else if (devlink_rate_is_node(devlink_rate)) {
+		if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME,
+				   devlink_rate->name))
+			goto nla_put_failure;
+	}
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
+			      devlink_rate->tx_share, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX,
+			      devlink_rate->tx_max, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY,
+			devlink_rate->tx_priority))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_WEIGHT,
+			devlink_rate->tx_weight))
+		goto nla_put_failure;
+
+	if (devlink_rate->parent)
+		if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME,
+				   devlink_rate->parent->name))
+			goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static void devlink_rate_notify(struct devlink_rate *devlink_rate,
+				enum devlink_command cmd)
+{
+	struct devlink *devlink = devlink_rate->devlink;
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
+
+	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	err = devlink_nl_rate_fill(msg, devlink_rate, cmd, 0, 0, 0, NULL);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+				0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_rates_notify_register(struct devlink *devlink)
+{
+	struct devlink_rate *rate_node;
+
+	list_for_each_entry(rate_node, &devlink->rate_list, list)
+		devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+}
+
+void devlink_rates_notify_unregister(struct devlink *devlink)
+{
+	struct devlink_rate *rate_node;
+
+	list_for_each_entry_reverse(rate_node, &devlink->rate_list, list)
+		devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+}
+
+static int
+devlink_nl_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+			     struct netlink_callback *cb, int flags)
+{
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct devlink_rate *devlink_rate;
+	int idx = 0;
+	int err = 0;
+
+	list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+		enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
+		u32 id = NETLINK_CB(cb->skb).portid;
+
+		if (idx < state->idx) {
+			idx++;
+			continue;
+		}
+		err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
+					   cb->nlh->nlmsg_seq, flags, NULL);
+		if (err) {
+			state->idx = idx;
+			break;
+		}
+		idx++;
+	}
+
+	return err;
+}
+
+int devlink_nl_rate_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return devlink_nl_dumpit(skb, cb, devlink_nl_rate_get_dump_one);
+}
+
+int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_rate *devlink_rate;
+	struct sk_buff *msg;
+	int err;
+
+	devlink_rate = devlink_rate_get_from_info(devlink, info);
+	if (IS_ERR(devlink_rate))
+		return PTR_ERR(devlink_rate);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW,
+				   info->snd_portid, info->snd_seq, 0,
+				   info->extack);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static bool
+devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
+			    struct devlink_rate *parent)
+{
+	while (parent) {
+		if (parent == devlink_rate)
+			return true;
+		parent = parent->parent;
+	}
+	return false;
+}
+
+static int
+devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
+				struct genl_info *info,
+				struct nlattr *nla_parent)
+{
+	struct devlink *devlink = devlink_rate->devlink;
+	const char *parent_name = nla_data(nla_parent);
+	const struct devlink_ops *ops = devlink->ops;
+	size_t len = strlen(parent_name);
+	struct devlink_rate *parent;
+	int err = -EOPNOTSUPP;
+
+	parent = devlink_rate->parent;
+
+	if (parent && !len) {
+		if (devlink_rate_is_leaf(devlink_rate))
+			err = ops->rate_leaf_parent_set(devlink_rate, NULL,
+							devlink_rate->priv, NULL,
+							info->extack);
+		else if (devlink_rate_is_node(devlink_rate))
+			err = ops->rate_node_parent_set(devlink_rate, NULL,
+							devlink_rate->priv, NULL,
+							info->extack);
+		if (err)
+			return err;
+
+		refcount_dec(&parent->refcnt);
+		devlink_rate->parent = NULL;
+	} else if (len) {
+		parent = devlink_rate_node_get_by_name(devlink, parent_name);
+		if (IS_ERR(parent))
+			return -ENODEV;
+
+		if (parent == devlink_rate) {
+			NL_SET_ERR_MSG(info->extack, "Parent to self is not allowed");
+			return -EINVAL;
+		}
+
+		if (devlink_rate_is_node(devlink_rate) &&
+		    devlink_rate_is_parent_node(devlink_rate, parent->parent)) {
+			NL_SET_ERR_MSG(info->extack, "Node is already a parent of parent node.");
+			return -EEXIST;
+		}
+
+		if (devlink_rate_is_leaf(devlink_rate))
+			err = ops->rate_leaf_parent_set(devlink_rate, parent,
+							devlink_rate->priv, parent->priv,
+							info->extack);
+		else if (devlink_rate_is_node(devlink_rate))
+			err = ops->rate_node_parent_set(devlink_rate, parent,
+							devlink_rate->priv, parent->priv,
+							info->extack);
+		if (err)
+			return err;
+
+		if (devlink_rate->parent)
+			/* we're reassigning to other parent in this case */
+			refcount_dec(&devlink_rate->parent->refcnt);
+
+		refcount_inc(&parent->refcnt);
+		devlink_rate->parent = parent;
+	}
+
+	return 0;
+}
+
+static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
+			       const struct devlink_ops *ops,
+			       struct genl_info *info)
+{
+	struct nlattr *nla_parent, **attrs = info->attrs;
+	int err = -EOPNOTSUPP;
+	u32 priority;
+	u32 weight;
+	u64 rate;
+
+	if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) {
+		rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]);
+		if (devlink_rate_is_leaf(devlink_rate))
+			err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv,
+							  rate, info->extack);
+		else if (devlink_rate_is_node(devlink_rate))
+			err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv,
+							  rate, info->extack);
+		if (err)
+			return err;
+		devlink_rate->tx_share = rate;
+	}
+
+	if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) {
+		rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]);
+		if (devlink_rate_is_leaf(devlink_rate))
+			err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv,
+							rate, info->extack);
+		else if (devlink_rate_is_node(devlink_rate))
+			err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv,
+							rate, info->extack);
+		if (err)
+			return err;
+		devlink_rate->tx_max = rate;
+	}
+
+	if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]) {
+		priority = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]);
+		if (devlink_rate_is_leaf(devlink_rate))
+			err = ops->rate_leaf_tx_priority_set(devlink_rate, devlink_rate->priv,
+							     priority, info->extack);
+		else if (devlink_rate_is_node(devlink_rate))
+			err = ops->rate_node_tx_priority_set(devlink_rate, devlink_rate->priv,
+							     priority, info->extack);
+
+		if (err)
+			return err;
+		devlink_rate->tx_priority = priority;
+	}
+
+	if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]) {
+		weight = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]);
+		if (devlink_rate_is_leaf(devlink_rate))
+			err = ops->rate_leaf_tx_weight_set(devlink_rate, devlink_rate->priv,
+							   weight, info->extack);
+		else if (devlink_rate_is_node(devlink_rate))
+			err = ops->rate_node_tx_weight_set(devlink_rate, devlink_rate->priv,
+							   weight, info->extack);
+
+		if (err)
+			return err;
+		devlink_rate->tx_weight = weight;
+	}
+
+	nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME];
+	if (nla_parent) {
+		err = devlink_nl_rate_parent_node_set(devlink_rate, info,
+						      nla_parent);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
+					   struct genl_info *info,
+					   enum devlink_rate_type type)
+{
+	struct nlattr **attrs = info->attrs;
+
+	if (type == DEVLINK_RATE_TYPE_LEAF) {
+		if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) {
+			NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the leafs");
+			return false;
+		}
+		if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) {
+			NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the leafs");
+			return false;
+		}
+		if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
+		    !ops->rate_leaf_parent_set) {
+			NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the leafs");
+			return false;
+		}
+		if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_leaf_tx_priority_set) {
+			NL_SET_ERR_MSG_ATTR(info->extack,
+					    attrs[DEVLINK_ATTR_RATE_TX_PRIORITY],
+					    "TX priority set isn't supported for the leafs");
+			return false;
+		}
+		if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_leaf_tx_weight_set) {
+			NL_SET_ERR_MSG_ATTR(info->extack,
+					    attrs[DEVLINK_ATTR_RATE_TX_WEIGHT],
+					    "TX weight set isn't supported for the leafs");
+			return false;
+		}
+	} else if (type == DEVLINK_RATE_TYPE_NODE) {
+		if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
+			NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes");
+			return false;
+		}
+		if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) {
+			NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the nodes");
+			return false;
+		}
+		if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
+		    !ops->rate_node_parent_set) {
+			NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the nodes");
+			return false;
+		}
+		if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_node_tx_priority_set) {
+			NL_SET_ERR_MSG_ATTR(info->extack,
+					    attrs[DEVLINK_ATTR_RATE_TX_PRIORITY],
+					    "TX priority set isn't supported for the nodes");
+			return false;
+		}
+		if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_node_tx_weight_set) {
+			NL_SET_ERR_MSG_ATTR(info->extack,
+					    attrs[DEVLINK_ATTR_RATE_TX_WEIGHT],
+					    "TX weight set isn't supported for the nodes");
+			return false;
+		}
+	} else {
+		WARN(1, "Unknown type of rate object");
+		return false;
+	}
+
+	return true;
+}
+
+int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_rate *devlink_rate;
+	const struct devlink_ops *ops;
+	int err;
+
+	devlink_rate = devlink_rate_get_from_info(devlink, info);
+	if (IS_ERR(devlink_rate))
+		return PTR_ERR(devlink_rate);
+
+	ops = devlink->ops;
+	if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
+		return -EOPNOTSUPP;
+
+	err = devlink_nl_rate_set(devlink_rate, ops, info);
+
+	if (!err)
+		devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+	return err;
+}
+
+int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_rate *rate_node;
+	const struct devlink_ops *ops;
+	int err;
+
+	ops = devlink->ops;
+	if (!ops || !ops->rate_node_new || !ops->rate_node_del) {
+		NL_SET_ERR_MSG(info->extack, "Rate nodes aren't supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE))
+		return -EOPNOTSUPP;
+
+	rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs);
+	if (!IS_ERR(rate_node))
+		return -EEXIST;
+	else if (rate_node == ERR_PTR(-EINVAL))
+		return -EINVAL;
+
+	rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
+	if (!rate_node)
+		return -ENOMEM;
+
+	rate_node->devlink = devlink;
+	rate_node->type = DEVLINK_RATE_TYPE_NODE;
+	rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL);
+	if (!rate_node->name) {
+		err = -ENOMEM;
+		goto err_strdup;
+	}
+
+	err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack);
+	if (err)
+		goto err_node_new;
+
+	err = devlink_nl_rate_set(rate_node, ops, info);
+	if (err)
+		goto err_rate_set;
+
+	refcount_set(&rate_node->refcnt, 1);
+	list_add(&rate_node->list, &devlink->rate_list);
+	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+	return 0;
+
+err_rate_set:
+	ops->rate_node_del(rate_node, rate_node->priv, info->extack);
+err_node_new:
+	kfree(rate_node->name);
+err_strdup:
+	kfree(rate_node);
+	return err;
+}
+
+int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_rate *rate_node;
+	int err;
+
+	rate_node = devlink_rate_node_get_from_info(devlink, info);
+	if (IS_ERR(rate_node))
+		return PTR_ERR(rate_node);
+
+	if (refcount_read(&rate_node->refcnt) > 1) {
+		NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node.");
+		return -EBUSY;
+	}
+
+	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+	err = devlink->ops->rate_node_del(rate_node, rate_node->priv,
+					  info->extack);
+	if (rate_node->parent)
+		refcount_dec(&rate_node->parent->refcnt);
+	list_del(&rate_node->list);
+	kfree(rate_node->name);
+	kfree(rate_node);
+	return err;
+}
+
+int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+			     struct netlink_ext_ack *extack)
+{
+	struct devlink_rate *devlink_rate;
+
+	list_for_each_entry(devlink_rate, &devlink->rate_list, list)
+		if (devlink_rate_is_node(devlink_rate)) {
+			NL_SET_ERR_MSG(extack, "Rate node(s) exists.");
+			return -EBUSY;
+		}
+	return 0;
+}
+
+/**
+ * devl_rate_node_create - create devlink rate node
+ * @devlink: devlink instance
+ * @priv: driver private data
+ * @node_name: name of the resulting node
+ * @parent: parent devlink_rate struct
+ *
+ * Create devlink rate object of type node
+ */
+struct devlink_rate *
+devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
+		      struct devlink_rate *parent)
+{
+	struct devlink_rate *rate_node;
+
+	rate_node = devlink_rate_node_get_by_name(devlink, node_name);
+	if (!IS_ERR(rate_node))
+		return ERR_PTR(-EEXIST);
+
+	rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
+	if (!rate_node)
+		return ERR_PTR(-ENOMEM);
+
+	if (parent) {
+		rate_node->parent = parent;
+		refcount_inc(&rate_node->parent->refcnt);
+	}
+
+	rate_node->type = DEVLINK_RATE_TYPE_NODE;
+	rate_node->devlink = devlink;
+	rate_node->priv = priv;
+
+	rate_node->name = kstrdup(node_name, GFP_KERNEL);
+	if (!rate_node->name) {
+		kfree(rate_node);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	refcount_set(&rate_node->refcnt, 1);
+	list_add(&rate_node->list, &devlink->rate_list);
+	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+	return rate_node;
+}
+EXPORT_SYMBOL_GPL(devl_rate_node_create);
+
+/**
+ * devl_rate_leaf_create - create devlink rate leaf
+ * @devlink_port: devlink port object to create rate object on
+ * @priv: driver private data
+ * @parent: parent devlink_rate struct
+ *
+ * Create devlink rate object of type leaf on provided @devlink_port.
+ */
+int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
+			  struct devlink_rate *parent)
+{
+	struct devlink *devlink = devlink_port->devlink;
+	struct devlink_rate *devlink_rate;
+
+	devl_assert_locked(devlink_port->devlink);
+
+	if (WARN_ON(devlink_port->devlink_rate))
+		return -EBUSY;
+
+	devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
+	if (!devlink_rate)
+		return -ENOMEM;
+
+	if (parent) {
+		devlink_rate->parent = parent;
+		refcount_inc(&devlink_rate->parent->refcnt);
+	}
+
+	devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
+	devlink_rate->devlink = devlink;
+	devlink_rate->devlink_port = devlink_port;
+	devlink_rate->priv = priv;
+	list_add_tail(&devlink_rate->list, &devlink->rate_list);
+	devlink_port->devlink_rate = devlink_rate;
+	devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devl_rate_leaf_create);
+
+/**
+ * devl_rate_leaf_destroy - destroy devlink rate leaf
+ *
+ * @devlink_port: devlink port linked to the rate object
+ *
+ * Destroy the devlink rate object of type leaf on provided @devlink_port.
+ */
+void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
+{
+	struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
+
+	devl_assert_locked(devlink_port->devlink);
+	if (!devlink_rate)
+		return;
+
+	devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
+	if (devlink_rate->parent)
+		refcount_dec(&devlink_rate->parent->refcnt);
+	list_del(&devlink_rate->list);
+	devlink_port->devlink_rate = NULL;
+	kfree(devlink_rate);
+}
+EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
+
+/**
+ * devl_rate_nodes_destroy - destroy all devlink rate nodes on device
+ * @devlink: devlink instance
+ *
+ * Unset parent for all rate objects and destroy all rate nodes
+ * on specified device.
+ */
+void devl_rate_nodes_destroy(struct devlink *devlink)
+{
+	static struct devlink_rate *devlink_rate, *tmp;
+	const struct devlink_ops *ops = devlink->ops;
+
+	devl_assert_locked(devlink);
+
+	list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+		if (!devlink_rate->parent)
+			continue;
+
+		refcount_dec(&devlink_rate->parent->refcnt);
+		if (devlink_rate_is_leaf(devlink_rate))
+			ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv,
+						  NULL, NULL);
+		else if (devlink_rate_is_node(devlink_rate))
+			ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv,
+						  NULL, NULL);
+	}
+	list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) {
+		if (devlink_rate_is_node(devlink_rate)) {
+			ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL);
+			list_del(&devlink_rate->list);
+			kfree(devlink_rate->name);
+			kfree(devlink_rate);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);

From 9edbe6f36c5f86776cc1c6ba0f546a4aefe2767f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:54 +0200
Subject: [PATCH 2008/2047] devlink: push linecard related code into separate
 file

Cut out another chunk from leftover.c and put linecard related code
into a separate file.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-13-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/Makefile        |   2 +-
 net/devlink/devl_internal.h |  34 +-
 net/devlink/leftover.c      | 599 -----------------------------------
 net/devlink/linecard.c      | 606 ++++++++++++++++++++++++++++++++++++
 4 files changed, 626 insertions(+), 615 deletions(-)
 create mode 100644 net/devlink/linecard.c

diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index f61aa97688f5..71f490d301d7 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
-	 resource.o param.o region.o health.o trap.o rate.o
+	 resource.o param.o region.o health.o trap.o rate.o linecard.o
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 8b530f15c439..8c81f731a4c7 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -168,6 +168,8 @@ void devlink_traps_notify_register(struct devlink *devlink);
 void devlink_traps_notify_unregister(struct devlink *devlink);
 void devlink_rates_notify_register(struct devlink *devlink);
 void devlink_rates_notify_unregister(struct devlink *devlink);
+void devlink_linecards_notify_register(struct devlink *devlink);
+void devlink_linecards_notify_unregister(struct devlink *devlink);
 
 /* Ports */
 #define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port)				\
@@ -182,21 +184,6 @@ devlink_port_get_from_info(struct devlink *devlink, struct genl_info *info);
 struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
 						 struct nlattr **attrs);
 
-/* Linecards */
-struct devlink_linecard {
-	struct list_head list;
-	struct devlink *devlink;
-	unsigned int index;
-	const struct devlink_linecard_ops *ops;
-	void *priv;
-	enum devlink_linecard_state state;
-	struct mutex state_lock; /* Protects state */
-	const char *type;
-	struct devlink_linecard_type *types;
-	unsigned int types_count;
-	struct devlink *nested_devlink;
-};
-
 /* Reload */
 bool devlink_reload_actions_valid(const struct devlink_ops *ops);
 int devlink_reload(struct devlink *devlink, struct net *dest_net,
@@ -222,6 +209,21 @@ int devlink_resources_validate(struct devlink *devlink,
 int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
 			     struct netlink_ext_ack *extack);
 
+/* Linecards */
+struct devlink_linecard {
+	struct list_head list;
+	struct devlink *devlink;
+	unsigned int index;
+	const struct devlink_linecard_ops *ops;
+	void *priv;
+	enum devlink_linecard_state state;
+	struct mutex state_lock; /* Protects state */
+	const char *type;
+	struct devlink_linecard_type *types;
+	unsigned int types_count;
+	struct devlink *nested_devlink;
+};
+
 /* Devlink nl cmds */
 int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info);
 int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info);
@@ -283,3 +285,5 @@ int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
 int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info);
 int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info);
 int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
+				     struct genl_info *info);
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 13958c3da59a..98ccb3a8393d 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -37,396 +37,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
 
-static struct devlink_linecard *
-devlink_linecard_get_by_index(struct devlink *devlink,
-			      unsigned int linecard_index)
-{
-	struct devlink_linecard *devlink_linecard;
-
-	list_for_each_entry(devlink_linecard, &devlink->linecard_list, list) {
-		if (devlink_linecard->index == linecard_index)
-			return devlink_linecard;
-	}
-	return NULL;
-}
-
-static bool devlink_linecard_index_exists(struct devlink *devlink,
-					  unsigned int linecard_index)
-{
-	return devlink_linecard_get_by_index(devlink, linecard_index);
-}
-
-static struct devlink_linecard *
-devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
-{
-	if (attrs[DEVLINK_ATTR_LINECARD_INDEX]) {
-		u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]);
-		struct devlink_linecard *linecard;
-
-		linecard = devlink_linecard_get_by_index(devlink, linecard_index);
-		if (!linecard)
-			return ERR_PTR(-ENODEV);
-		return linecard;
-	}
-	return ERR_PTR(-EINVAL);
-}
-
-static struct devlink_linecard *
-devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
-{
-	return devlink_linecard_get_from_attrs(devlink, info->attrs);
-}
-
-static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
-{
-	struct nlattr *nested_attr;
-
-	nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK);
-	if (!nested_attr)
-		return -EMSGSIZE;
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-
-	nla_nest_end(msg, nested_attr);
-	return 0;
-
-nla_put_failure:
-	nla_nest_cancel(msg, nested_attr);
-	return -EMSGSIZE;
-}
-
-struct devlink_linecard_type {
-	const char *type;
-	const void *priv;
-};
-
-static int devlink_nl_linecard_fill(struct sk_buff *msg,
-				    struct devlink *devlink,
-				    struct devlink_linecard *linecard,
-				    enum devlink_command cmd, u32 portid,
-				    u32 seq, int flags,
-				    struct netlink_ext_ack *extack)
-{
-	struct devlink_linecard_type *linecard_type;
-	struct nlattr *attr;
-	void *hdr;
-	int i;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index))
-		goto nla_put_failure;
-	if (nla_put_u8(msg, DEVLINK_ATTR_LINECARD_STATE, linecard->state))
-		goto nla_put_failure;
-	if (linecard->type &&
-	    nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, linecard->type))
-		goto nla_put_failure;
-
-	if (linecard->types_count) {
-		attr = nla_nest_start(msg,
-				      DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES);
-		if (!attr)
-			goto nla_put_failure;
-		for (i = 0; i < linecard->types_count; i++) {
-			linecard_type = &linecard->types[i];
-			if (nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE,
-					   linecard_type->type)) {
-				nla_nest_cancel(msg, attr);
-				goto nla_put_failure;
-			}
-		}
-		nla_nest_end(msg, attr);
-	}
-
-	if (linecard->nested_devlink &&
-	    devlink_nl_put_nested_handle(msg, linecard->nested_devlink))
-		goto nla_put_failure;
-
-	genlmsg_end(msg, hdr);
-	return 0;
-
-nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
-}
-
-static void devlink_linecard_notify(struct devlink_linecard *linecard,
-				    enum devlink_command cmd)
-{
-	struct devlink *devlink = linecard->devlink;
-	struct sk_buff *msg;
-	int err;
-
-	WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW &&
-		cmd != DEVLINK_CMD_LINECARD_DEL);
-
-	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
-		return;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return;
-
-	err = devlink_nl_linecard_fill(msg, devlink, linecard, cmd, 0, 0, 0,
-				       NULL);
-	if (err) {
-		nlmsg_free(msg);
-		return;
-	}
-
-	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
-				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static void devlink_linecards_notify_register(struct devlink *devlink)
-{
-	struct devlink_linecard *linecard;
-
-	list_for_each_entry(linecard, &devlink->linecard_list, list)
-		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-}
-
-static void devlink_linecards_notify_unregister(struct devlink *devlink)
-{
-	struct devlink_linecard *linecard;
-
-	list_for_each_entry_reverse(linecard, &devlink->linecard_list, list)
-		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
-}
-
-int devlink_nl_linecard_get_doit(struct sk_buff *skb, struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_linecard *linecard;
-	struct sk_buff *msg;
-	int err;
-
-	linecard = devlink_linecard_get_from_info(devlink, info);
-	if (IS_ERR(linecard))
-		return PTR_ERR(linecard);
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	mutex_lock(&linecard->state_lock);
-	err = devlink_nl_linecard_fill(msg, devlink, linecard,
-				       DEVLINK_CMD_LINECARD_NEW,
-				       info->snd_portid, info->snd_seq, 0,
-				       info->extack);
-	mutex_unlock(&linecard->state_lock);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
-
-	return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_linecard_get_dump_one(struct sk_buff *msg,
-					    struct devlink *devlink,
-					    struct netlink_callback *cb,
-					    int flags)
-{
-	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
-	struct devlink_linecard *linecard;
-	int idx = 0;
-	int err = 0;
-
-	list_for_each_entry(linecard, &devlink->linecard_list, list) {
-		if (idx < state->idx) {
-			idx++;
-			continue;
-		}
-		mutex_lock(&linecard->state_lock);
-		err = devlink_nl_linecard_fill(msg, devlink, linecard,
-					       DEVLINK_CMD_LINECARD_NEW,
-					       NETLINK_CB(cb->skb).portid,
-					       cb->nlh->nlmsg_seq, flags,
-					       cb->extack);
-		mutex_unlock(&linecard->state_lock);
-		if (err) {
-			state->idx = idx;
-			break;
-		}
-		idx++;
-	}
-
-	return err;
-}
-
-int devlink_nl_linecard_get_dumpit(struct sk_buff *skb,
-				   struct netlink_callback *cb)
-{
-	return devlink_nl_dumpit(skb, cb, devlink_nl_linecard_get_dump_one);
-}
-
-static struct devlink_linecard_type *
-devlink_linecard_type_lookup(struct devlink_linecard *linecard,
-			     const char *type)
-{
-	struct devlink_linecard_type *linecard_type;
-	int i;
-
-	for (i = 0; i < linecard->types_count; i++) {
-		linecard_type = &linecard->types[i];
-		if (!strcmp(type, linecard_type->type))
-			return linecard_type;
-	}
-	return NULL;
-}
-
-static int devlink_linecard_type_set(struct devlink_linecard *linecard,
-				     const char *type,
-				     struct netlink_ext_ack *extack)
-{
-	const struct devlink_linecard_ops *ops = linecard->ops;
-	struct devlink_linecard_type *linecard_type;
-	int err;
-
-	mutex_lock(&linecard->state_lock);
-	if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
-		NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
-		err = -EBUSY;
-		goto out;
-	}
-	if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
-		NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
-		err = -EBUSY;
-		goto out;
-	}
-
-	linecard_type = devlink_linecard_type_lookup(linecard, type);
-	if (!linecard_type) {
-		NL_SET_ERR_MSG(extack, "Unsupported line card type provided");
-		err = -EINVAL;
-		goto out;
-	}
-
-	if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED &&
-	    linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
-		NL_SET_ERR_MSG(extack, "Line card already provisioned");
-		err = -EBUSY;
-		/* Check if the line card is provisioned in the same
-		 * way the user asks. In case it is, make the operation
-		 * to return success.
-		 */
-		if (ops->same_provision &&
-		    ops->same_provision(linecard, linecard->priv,
-					linecard_type->type,
-					linecard_type->priv))
-			err = 0;
-		goto out;
-	}
-
-	linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING;
-	linecard->type = linecard_type->type;
-	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-	mutex_unlock(&linecard->state_lock);
-	err = ops->provision(linecard, linecard->priv, linecard_type->type,
-			     linecard_type->priv, extack);
-	if (err) {
-		/* Provisioning failed. Assume the linecard is unprovisioned
-		 * for future operations.
-		 */
-		mutex_lock(&linecard->state_lock);
-		linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
-		linecard->type = NULL;
-		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-		mutex_unlock(&linecard->state_lock);
-	}
-	return err;
-
-out:
-	mutex_unlock(&linecard->state_lock);
-	return err;
-}
-
-static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
-				       struct netlink_ext_ack *extack)
-{
-	int err;
-
-	mutex_lock(&linecard->state_lock);
-	if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
-		NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
-		err = -EBUSY;
-		goto out;
-	}
-	if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
-		NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
-		err = -EBUSY;
-		goto out;
-	}
-	if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
-		linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
-		linecard->type = NULL;
-		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-		err = 0;
-		goto out;
-	}
-
-	if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) {
-		NL_SET_ERR_MSG(extack, "Line card is not provisioned");
-		err = 0;
-		goto out;
-	}
-	linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONING;
-	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-	mutex_unlock(&linecard->state_lock);
-	err = linecard->ops->unprovision(linecard, linecard->priv,
-					 extack);
-	if (err) {
-		/* Unprovisioning failed. Assume the linecard is unprovisioned
-		 * for future operations.
-		 */
-		mutex_lock(&linecard->state_lock);
-		linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
-		linecard->type = NULL;
-		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-		mutex_unlock(&linecard->state_lock);
-	}
-	return err;
-
-out:
-	mutex_unlock(&linecard->state_lock);
-	return err;
-}
-
-static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
-					    struct genl_info *info)
-{
-	struct netlink_ext_ack *extack = info->extack;
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_linecard *linecard;
-	int err;
-
-	linecard = devlink_linecard_get_from_info(devlink, info);
-	if (IS_ERR(linecard))
-		return PTR_ERR(linecard);
-
-	if (info->attrs[DEVLINK_ATTR_LINECARD_TYPE]) {
-		const char *type;
-
-		type = nla_data(info->attrs[DEVLINK_ATTR_LINECARD_TYPE]);
-		if (strcmp(type, "")) {
-			err = devlink_linecard_type_set(linecard, type, extack);
-			if (err)
-				return err;
-		} else {
-			err = devlink_linecard_type_unset(linecard, extack);
-			if (err)
-				return err;
-		}
-	}
-
-	return 0;
-}
-
 const struct genl_small_ops devlink_nl_small_ops[40] = {
 	{
 		.cmd = DEVLINK_CMD_PORT_SET,
@@ -703,212 +313,3 @@ void devlink_notify_unregister(struct devlink *devlink)
 	devlink_linecards_notify_unregister(devlink);
 	devlink_notify(devlink, DEVLINK_CMD_DEL);
 }
-
-static int devlink_linecard_types_init(struct devlink_linecard *linecard)
-{
-	struct devlink_linecard_type *linecard_type;
-	unsigned int count;
-	int i;
-
-	count = linecard->ops->types_count(linecard, linecard->priv);
-	linecard->types = kmalloc_array(count, sizeof(*linecard_type),
-					GFP_KERNEL);
-	if (!linecard->types)
-		return -ENOMEM;
-	linecard->types_count = count;
-
-	for (i = 0; i < count; i++) {
-		linecard_type = &linecard->types[i];
-		linecard->ops->types_get(linecard, linecard->priv, i,
-					 &linecard_type->type,
-					 &linecard_type->priv);
-	}
-	return 0;
-}
-
-static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
-{
-	kfree(linecard->types);
-}
-
-/**
- *	devl_linecard_create - Create devlink linecard
- *
- *	@devlink: devlink
- *	@linecard_index: driver-specific numerical identifier of the linecard
- *	@ops: linecards ops
- *	@priv: user priv pointer
- *
- *	Create devlink linecard instance with provided linecard index.
- *	Caller can use any indexing, even hw-related one.
- *
- *	Return: Line card structure or an ERR_PTR() encoded error code.
- */
-struct devlink_linecard *
-devl_linecard_create(struct devlink *devlink, unsigned int linecard_index,
-		     const struct devlink_linecard_ops *ops, void *priv)
-{
-	struct devlink_linecard *linecard;
-	int err;
-
-	if (WARN_ON(!ops || !ops->provision || !ops->unprovision ||
-		    !ops->types_count || !ops->types_get))
-		return ERR_PTR(-EINVAL);
-
-	if (devlink_linecard_index_exists(devlink, linecard_index))
-		return ERR_PTR(-EEXIST);
-
-	linecard = kzalloc(sizeof(*linecard), GFP_KERNEL);
-	if (!linecard)
-		return ERR_PTR(-ENOMEM);
-
-	linecard->devlink = devlink;
-	linecard->index = linecard_index;
-	linecard->ops = ops;
-	linecard->priv = priv;
-	linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
-	mutex_init(&linecard->state_lock);
-
-	err = devlink_linecard_types_init(linecard);
-	if (err) {
-		mutex_destroy(&linecard->state_lock);
-		kfree(linecard);
-		return ERR_PTR(err);
-	}
-
-	list_add_tail(&linecard->list, &devlink->linecard_list);
-	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-	return linecard;
-}
-EXPORT_SYMBOL_GPL(devl_linecard_create);
-
-/**
- *	devl_linecard_destroy - Destroy devlink linecard
- *
- *	@linecard: devlink linecard
- */
-void devl_linecard_destroy(struct devlink_linecard *linecard)
-{
-	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
-	list_del(&linecard->list);
-	devlink_linecard_types_fini(linecard);
-	mutex_destroy(&linecard->state_lock);
-	kfree(linecard);
-}
-EXPORT_SYMBOL_GPL(devl_linecard_destroy);
-
-/**
- *	devlink_linecard_provision_set - Set provisioning on linecard
- *
- *	@linecard: devlink linecard
- *	@type: linecard type
- *
- *	This is either called directly from the provision() op call or
- *	as a result of the provision() op call asynchronously.
- */
-void devlink_linecard_provision_set(struct devlink_linecard *linecard,
-				    const char *type)
-{
-	mutex_lock(&linecard->state_lock);
-	WARN_ON(linecard->type && strcmp(linecard->type, type));
-	linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
-	linecard->type = type;
-	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-	mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
-
-/**
- *	devlink_linecard_provision_clear - Clear provisioning on linecard
- *
- *	@linecard: devlink linecard
- *
- *	This is either called directly from the unprovision() op call or
- *	as a result of the unprovision() op call asynchronously.
- */
-void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
-{
-	mutex_lock(&linecard->state_lock);
-	WARN_ON(linecard->nested_devlink);
-	linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
-	linecard->type = NULL;
-	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-	mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
-
-/**
- *	devlink_linecard_provision_fail - Fail provisioning on linecard
- *
- *	@linecard: devlink linecard
- *
- *	This is either called directly from the provision() op call or
- *	as a result of the provision() op call asynchronously.
- */
-void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
-{
-	mutex_lock(&linecard->state_lock);
-	WARN_ON(linecard->nested_devlink);
-	linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
-	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-	mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_provision_fail);
-
-/**
- *	devlink_linecard_activate - Set linecard active
- *
- *	@linecard: devlink linecard
- */
-void devlink_linecard_activate(struct devlink_linecard *linecard)
-{
-	mutex_lock(&linecard->state_lock);
-	WARN_ON(linecard->state != DEVLINK_LINECARD_STATE_PROVISIONED);
-	linecard->state = DEVLINK_LINECARD_STATE_ACTIVE;
-	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-	mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_activate);
-
-/**
- *	devlink_linecard_deactivate - Set linecard inactive
- *
- *	@linecard: devlink linecard
- */
-void devlink_linecard_deactivate(struct devlink_linecard *linecard)
-{
-	mutex_lock(&linecard->state_lock);
-	switch (linecard->state) {
-	case DEVLINK_LINECARD_STATE_ACTIVE:
-		linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
-		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-		break;
-	case DEVLINK_LINECARD_STATE_UNPROVISIONING:
-		/* Line card is being deactivated as part
-		 * of unprovisioning flow.
-		 */
-		break;
-	default:
-		WARN_ON(1);
-		break;
-	}
-	mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
-
-/**
- *	devlink_linecard_nested_dl_set - Attach/detach nested devlink
- *					 instance to linecard.
- *
- *	@linecard: devlink linecard
- *	@nested_devlink: devlink instance to attach or NULL to detach
- */
-void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
-				    struct devlink *nested_devlink)
-{
-	mutex_lock(&linecard->state_lock);
-	linecard->nested_devlink = nested_devlink;
-	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-	mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c
new file mode 100644
index 000000000000..85c32c314b0f
--- /dev/null
+++ b/net/devlink/linecard.c
@@ -0,0 +1,606 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+static struct devlink_linecard *
+devlink_linecard_get_by_index(struct devlink *devlink,
+			      unsigned int linecard_index)
+{
+	struct devlink_linecard *devlink_linecard;
+
+	list_for_each_entry(devlink_linecard, &devlink->linecard_list, list) {
+		if (devlink_linecard->index == linecard_index)
+			return devlink_linecard;
+	}
+	return NULL;
+}
+
+static bool devlink_linecard_index_exists(struct devlink *devlink,
+					  unsigned int linecard_index)
+{
+	return devlink_linecard_get_by_index(devlink, linecard_index);
+}
+
+static struct devlink_linecard *
+devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
+{
+	if (attrs[DEVLINK_ATTR_LINECARD_INDEX]) {
+		u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]);
+		struct devlink_linecard *linecard;
+
+		linecard = devlink_linecard_get_by_index(devlink, linecard_index);
+		if (!linecard)
+			return ERR_PTR(-ENODEV);
+		return linecard;
+	}
+	return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_linecard *
+devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+	return devlink_linecard_get_from_attrs(devlink, info->attrs);
+}
+
+static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
+{
+	struct nlattr *nested_attr;
+
+	nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK);
+	if (!nested_attr)
+		return -EMSGSIZE;
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, nested_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, nested_attr);
+	return -EMSGSIZE;
+}
+
+struct devlink_linecard_type {
+	const char *type;
+	const void *priv;
+};
+
+static int devlink_nl_linecard_fill(struct sk_buff *msg,
+				    struct devlink *devlink,
+				    struct devlink_linecard *linecard,
+				    enum devlink_command cmd, u32 portid,
+				    u32 seq, int flags,
+				    struct netlink_ext_ack *extack)
+{
+	struct devlink_linecard_type *linecard_type;
+	struct nlattr *attr;
+	void *hdr;
+	int i;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index))
+		goto nla_put_failure;
+	if (nla_put_u8(msg, DEVLINK_ATTR_LINECARD_STATE, linecard->state))
+		goto nla_put_failure;
+	if (linecard->type &&
+	    nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, linecard->type))
+		goto nla_put_failure;
+
+	if (linecard->types_count) {
+		attr = nla_nest_start(msg,
+				      DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES);
+		if (!attr)
+			goto nla_put_failure;
+		for (i = 0; i < linecard->types_count; i++) {
+			linecard_type = &linecard->types[i];
+			if (nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE,
+					   linecard_type->type)) {
+				nla_nest_cancel(msg, attr);
+				goto nla_put_failure;
+			}
+		}
+		nla_nest_end(msg, attr);
+	}
+
+	if (linecard->nested_devlink &&
+	    devlink_nl_put_nested_handle(msg, linecard->nested_devlink))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static void devlink_linecard_notify(struct devlink_linecard *linecard,
+				    enum devlink_command cmd)
+{
+	struct devlink *devlink = linecard->devlink;
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW &&
+		cmd != DEVLINK_CMD_LINECARD_DEL);
+
+	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	err = devlink_nl_linecard_fill(msg, devlink, linecard, cmd, 0, 0, 0,
+				       NULL);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_linecards_notify_register(struct devlink *devlink)
+{
+	struct devlink_linecard *linecard;
+
+	list_for_each_entry(linecard, &devlink->linecard_list, list)
+		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+}
+
+void devlink_linecards_notify_unregister(struct devlink *devlink)
+{
+	struct devlink_linecard *linecard;
+
+	list_for_each_entry_reverse(linecard, &devlink->linecard_list, list)
+		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
+}
+
+int devlink_nl_linecard_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_linecard *linecard;
+	struct sk_buff *msg;
+	int err;
+
+	linecard = devlink_linecard_get_from_info(devlink, info);
+	if (IS_ERR(linecard))
+		return PTR_ERR(linecard);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	mutex_lock(&linecard->state_lock);
+	err = devlink_nl_linecard_fill(msg, devlink, linecard,
+				       DEVLINK_CMD_LINECARD_NEW,
+				       info->snd_portid, info->snd_seq, 0,
+				       info->extack);
+	mutex_unlock(&linecard->state_lock);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_linecard_get_dump_one(struct sk_buff *msg,
+					    struct devlink *devlink,
+					    struct netlink_callback *cb,
+					    int flags)
+{
+	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+	struct devlink_linecard *linecard;
+	int idx = 0;
+	int err = 0;
+
+	list_for_each_entry(linecard, &devlink->linecard_list, list) {
+		if (idx < state->idx) {
+			idx++;
+			continue;
+		}
+		mutex_lock(&linecard->state_lock);
+		err = devlink_nl_linecard_fill(msg, devlink, linecard,
+					       DEVLINK_CMD_LINECARD_NEW,
+					       NETLINK_CB(cb->skb).portid,
+					       cb->nlh->nlmsg_seq, flags,
+					       cb->extack);
+		mutex_unlock(&linecard->state_lock);
+		if (err) {
+			state->idx = idx;
+			break;
+		}
+		idx++;
+	}
+
+	return err;
+}
+
+int devlink_nl_linecard_get_dumpit(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	return devlink_nl_dumpit(skb, cb, devlink_nl_linecard_get_dump_one);
+}
+
+static struct devlink_linecard_type *
+devlink_linecard_type_lookup(struct devlink_linecard *linecard,
+			     const char *type)
+{
+	struct devlink_linecard_type *linecard_type;
+	int i;
+
+	for (i = 0; i < linecard->types_count; i++) {
+		linecard_type = &linecard->types[i];
+		if (!strcmp(type, linecard_type->type))
+			return linecard_type;
+	}
+	return NULL;
+}
+
+static int devlink_linecard_type_set(struct devlink_linecard *linecard,
+				     const char *type,
+				     struct netlink_ext_ack *extack)
+{
+	const struct devlink_linecard_ops *ops = linecard->ops;
+	struct devlink_linecard_type *linecard_type;
+	int err;
+
+	mutex_lock(&linecard->state_lock);
+	if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
+		NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
+		err = -EBUSY;
+		goto out;
+	}
+	if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
+		NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
+		err = -EBUSY;
+		goto out;
+	}
+
+	linecard_type = devlink_linecard_type_lookup(linecard, type);
+	if (!linecard_type) {
+		NL_SET_ERR_MSG(extack, "Unsupported line card type provided");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED &&
+	    linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
+		NL_SET_ERR_MSG(extack, "Line card already provisioned");
+		err = -EBUSY;
+		/* Check if the line card is provisioned in the same
+		 * way the user asks. In case it is, make the operation
+		 * to return success.
+		 */
+		if (ops->same_provision &&
+		    ops->same_provision(linecard, linecard->priv,
+					linecard_type->type,
+					linecard_type->priv))
+			err = 0;
+		goto out;
+	}
+
+	linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING;
+	linecard->type = linecard_type->type;
+	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+	mutex_unlock(&linecard->state_lock);
+	err = ops->provision(linecard, linecard->priv, linecard_type->type,
+			     linecard_type->priv, extack);
+	if (err) {
+		/* Provisioning failed. Assume the linecard is unprovisioned
+		 * for future operations.
+		 */
+		mutex_lock(&linecard->state_lock);
+		linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+		linecard->type = NULL;
+		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+		mutex_unlock(&linecard->state_lock);
+	}
+	return err;
+
+out:
+	mutex_unlock(&linecard->state_lock);
+	return err;
+}
+
+static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
+				       struct netlink_ext_ack *extack)
+{
+	int err;
+
+	mutex_lock(&linecard->state_lock);
+	if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
+		NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
+		err = -EBUSY;
+		goto out;
+	}
+	if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
+		NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
+		err = -EBUSY;
+		goto out;
+	}
+	if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
+		linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+		linecard->type = NULL;
+		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+		err = 0;
+		goto out;
+	}
+
+	if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) {
+		NL_SET_ERR_MSG(extack, "Line card is not provisioned");
+		err = 0;
+		goto out;
+	}
+	linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONING;
+	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+	mutex_unlock(&linecard->state_lock);
+	err = linecard->ops->unprovision(linecard, linecard->priv,
+					 extack);
+	if (err) {
+		/* Unprovisioning failed. Assume the linecard is unprovisioned
+		 * for future operations.
+		 */
+		mutex_lock(&linecard->state_lock);
+		linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+		linecard->type = NULL;
+		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+		mutex_unlock(&linecard->state_lock);
+	}
+	return err;
+
+out:
+	mutex_unlock(&linecard->state_lock);
+	return err;
+}
+
+int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_linecard *linecard;
+	int err;
+
+	linecard = devlink_linecard_get_from_info(devlink, info);
+	if (IS_ERR(linecard))
+		return PTR_ERR(linecard);
+
+	if (info->attrs[DEVLINK_ATTR_LINECARD_TYPE]) {
+		const char *type;
+
+		type = nla_data(info->attrs[DEVLINK_ATTR_LINECARD_TYPE]);
+		if (strcmp(type, "")) {
+			err = devlink_linecard_type_set(linecard, type, extack);
+			if (err)
+				return err;
+		} else {
+			err = devlink_linecard_type_unset(linecard, extack);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int devlink_linecard_types_init(struct devlink_linecard *linecard)
+{
+	struct devlink_linecard_type *linecard_type;
+	unsigned int count;
+	int i;
+
+	count = linecard->ops->types_count(linecard, linecard->priv);
+	linecard->types = kmalloc_array(count, sizeof(*linecard_type),
+					GFP_KERNEL);
+	if (!linecard->types)
+		return -ENOMEM;
+	linecard->types_count = count;
+
+	for (i = 0; i < count; i++) {
+		linecard_type = &linecard->types[i];
+		linecard->ops->types_get(linecard, linecard->priv, i,
+					 &linecard_type->type,
+					 &linecard_type->priv);
+	}
+	return 0;
+}
+
+static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
+{
+	kfree(linecard->types);
+}
+
+/**
+ *	devl_linecard_create - Create devlink linecard
+ *
+ *	@devlink: devlink
+ *	@linecard_index: driver-specific numerical identifier of the linecard
+ *	@ops: linecards ops
+ *	@priv: user priv pointer
+ *
+ *	Create devlink linecard instance with provided linecard index.
+ *	Caller can use any indexing, even hw-related one.
+ *
+ *	Return: Line card structure or an ERR_PTR() encoded error code.
+ */
+struct devlink_linecard *
+devl_linecard_create(struct devlink *devlink, unsigned int linecard_index,
+		     const struct devlink_linecard_ops *ops, void *priv)
+{
+	struct devlink_linecard *linecard;
+	int err;
+
+	if (WARN_ON(!ops || !ops->provision || !ops->unprovision ||
+		    !ops->types_count || !ops->types_get))
+		return ERR_PTR(-EINVAL);
+
+	if (devlink_linecard_index_exists(devlink, linecard_index))
+		return ERR_PTR(-EEXIST);
+
+	linecard = kzalloc(sizeof(*linecard), GFP_KERNEL);
+	if (!linecard)
+		return ERR_PTR(-ENOMEM);
+
+	linecard->devlink = devlink;
+	linecard->index = linecard_index;
+	linecard->ops = ops;
+	linecard->priv = priv;
+	linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+	mutex_init(&linecard->state_lock);
+
+	err = devlink_linecard_types_init(linecard);
+	if (err) {
+		mutex_destroy(&linecard->state_lock);
+		kfree(linecard);
+		return ERR_PTR(err);
+	}
+
+	list_add_tail(&linecard->list, &devlink->linecard_list);
+	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+	return linecard;
+}
+EXPORT_SYMBOL_GPL(devl_linecard_create);
+
+/**
+ *	devl_linecard_destroy - Destroy devlink linecard
+ *
+ *	@linecard: devlink linecard
+ */
+void devl_linecard_destroy(struct devlink_linecard *linecard)
+{
+	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
+	list_del(&linecard->list);
+	devlink_linecard_types_fini(linecard);
+	mutex_destroy(&linecard->state_lock);
+	kfree(linecard);
+}
+EXPORT_SYMBOL_GPL(devl_linecard_destroy);
+
+/**
+ *	devlink_linecard_provision_set - Set provisioning on linecard
+ *
+ *	@linecard: devlink linecard
+ *	@type: linecard type
+ *
+ *	This is either called directly from the provision() op call or
+ *	as a result of the provision() op call asynchronously.
+ */
+void devlink_linecard_provision_set(struct devlink_linecard *linecard,
+				    const char *type)
+{
+	mutex_lock(&linecard->state_lock);
+	WARN_ON(linecard->type && strcmp(linecard->type, type));
+	linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
+	linecard->type = type;
+	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+	mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
+
+/**
+ *	devlink_linecard_provision_clear - Clear provisioning on linecard
+ *
+ *	@linecard: devlink linecard
+ *
+ *	This is either called directly from the unprovision() op call or
+ *	as a result of the unprovision() op call asynchronously.
+ */
+void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
+{
+	mutex_lock(&linecard->state_lock);
+	WARN_ON(linecard->nested_devlink);
+	linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+	linecard->type = NULL;
+	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+	mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
+
+/**
+ *	devlink_linecard_provision_fail - Fail provisioning on linecard
+ *
+ *	@linecard: devlink linecard
+ *
+ *	This is either called directly from the provision() op call or
+ *	as a result of the provision() op call asynchronously.
+ */
+void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
+{
+	mutex_lock(&linecard->state_lock);
+	WARN_ON(linecard->nested_devlink);
+	linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
+	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+	mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_fail);
+
+/**
+ *	devlink_linecard_activate - Set linecard active
+ *
+ *	@linecard: devlink linecard
+ */
+void devlink_linecard_activate(struct devlink_linecard *linecard)
+{
+	mutex_lock(&linecard->state_lock);
+	WARN_ON(linecard->state != DEVLINK_LINECARD_STATE_PROVISIONED);
+	linecard->state = DEVLINK_LINECARD_STATE_ACTIVE;
+	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+	mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_activate);
+
+/**
+ *	devlink_linecard_deactivate - Set linecard inactive
+ *
+ *	@linecard: devlink linecard
+ */
+void devlink_linecard_deactivate(struct devlink_linecard *linecard)
+{
+	mutex_lock(&linecard->state_lock);
+	switch (linecard->state) {
+	case DEVLINK_LINECARD_STATE_ACTIVE:
+		linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
+		devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+		break;
+	case DEVLINK_LINECARD_STATE_UNPROVISIONING:
+		/* Line card is being deactivated as part
+		 * of unprovisioning flow.
+		 */
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+	mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
+
+/**
+ *	devlink_linecard_nested_dl_set - Attach/detach nested devlink
+ *					 instance to linecard.
+ *
+ *	@linecard: devlink linecard
+ *	@nested_devlink: devlink instance to attach or NULL to detach
+ */
+void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+				    struct devlink *nested_devlink)
+{
+	mutex_lock(&linecard->state_lock);
+	linecard->nested_devlink = nested_devlink;
+	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+	mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);

From 890c556674377c0abba4ab91ff6f1962175d578c Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:55 +0200
Subject: [PATCH 2009/2047] devlink: move tracepoint definitions into core.c

Move remaining tracepoint definitions to most suitable file core.c.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-14-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/core.c     | 6 ++++++
 net/devlink/leftover.c | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/devlink/core.c b/net/devlink/core.c
index c23ebabadc52..6cec4afb01fb 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -5,9 +5,15 @@
  */
 
 #include <net/genetlink.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/devlink.h>
 
 #include "devl_internal.h"
 
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
+
 DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
 
 void *devlink_priv(struct devlink *devlink)
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 98ccb3a8393d..a477cdbab940 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -28,15 +28,9 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/devlink.h>
-#define CREATE_TRACE_POINTS
-#include <trace/events/devlink.h>
 
 #include "devl_internal.h"
 
-EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
-EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
-EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
-
 const struct genl_small_ops devlink_nl_small_ops[40] = {
 	{
 		.cmd = DEVLINK_CMD_PORT_SET,

From 29a390d17748d93f9e6bc6fb0e09d89571aa25f6 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:56 +0200
Subject: [PATCH 2010/2047] devlink: move small_ops definition into netlink.c

Move the generic netlink small_ops definition where they are consumed,
into netlink.c

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-15-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/devl_internal.h |   2 -
 net/devlink/leftover.c      | 251 ------------------------------------
 net/devlink/netlink.c       | 251 ++++++++++++++++++++++++++++++++++++
 3 files changed, 251 insertions(+), 253 deletions(-)

diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 8c81f731a4c7..efca6abf7af7 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -121,8 +121,6 @@ typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg,
 				       struct netlink_callback *cb,
 				       int flags);
 
-extern const struct genl_small_ops devlink_nl_small_ops[40];
-
 struct devlink *
 devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
 
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index a477cdbab940..05e056d6d5ea 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -31,257 +31,6 @@
 
 #include "devl_internal.h"
 
-const struct genl_small_ops devlink_nl_small_ops[40] = {
-	{
-		.cmd = DEVLINK_CMD_PORT_SET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_port_set_doit,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
-	},
-	{
-		.cmd = DEVLINK_CMD_RATE_SET,
-		.doit = devlink_nl_cmd_rate_set_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_RATE_NEW,
-		.doit = devlink_nl_cmd_rate_new_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_RATE_DEL,
-		.doit = devlink_nl_cmd_rate_del_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_PORT_SPLIT,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_port_split_doit,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
-	},
-	{
-		.cmd = DEVLINK_CMD_PORT_UNSPLIT,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_port_unsplit_doit,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
-	},
-	{
-		.cmd = DEVLINK_CMD_PORT_NEW,
-		.doit = devlink_nl_cmd_port_new_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_PORT_DEL,
-		.doit = devlink_nl_cmd_port_del_doit,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
-	},
-
-	{
-		.cmd = DEVLINK_CMD_LINECARD_SET,
-		.doit = devlink_nl_cmd_linecard_set_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_SB_POOL_SET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_sb_pool_set_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_sb_port_pool_set_doit,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
-	},
-	{
-		.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
-	},
-	{
-		.cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_sb_occ_snapshot_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_sb_occ_max_clear_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_ESWITCH_GET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_eswitch_get_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_ESWITCH_SET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_eswitch_set_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_dpipe_table_get,
-		/* can be retrieved by unprivileged users */
-	},
-	{
-		.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_dpipe_entries_get,
-		/* can be retrieved by unprivileged users */
-	},
-	{
-		.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_dpipe_headers_get,
-		/* can be retrieved by unprivileged users */
-	},
-	{
-		.cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_dpipe_table_counters_set,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_RESOURCE_SET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_resource_set,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_RESOURCE_DUMP,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_resource_dump,
-		/* can be retrieved by unprivileged users */
-	},
-	{
-		.cmd = DEVLINK_CMD_RELOAD,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_reload,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_PARAM_SET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_param_set_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_PORT_PARAM_GET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_port_param_get_doit,
-		.dumpit = devlink_nl_cmd_port_param_get_dumpit,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
-		/* can be retrieved by unprivileged users */
-	},
-	{
-		.cmd = DEVLINK_CMD_PORT_PARAM_SET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_port_param_set_doit,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
-	},
-	{
-		.cmd = DEVLINK_CMD_REGION_NEW,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_region_new,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_REGION_DEL,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_region_del,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_REGION_READ,
-		.validate = GENL_DONT_VALIDATE_STRICT |
-			    GENL_DONT_VALIDATE_DUMP_STRICT,
-		.dumpit = devlink_nl_cmd_region_read_dumpit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_health_reporter_set_doit,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
-	},
-	{
-		.cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_health_reporter_recover_doit,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
-	},
-	{
-		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_health_reporter_diagnose_doit,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
-	},
-	{
-		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
-		.validate = GENL_DONT_VALIDATE_STRICT |
-			    GENL_DONT_VALIDATE_DUMP_STRICT,
-		.dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
-	},
-	{
-		.cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_health_reporter_test_doit,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
-	},
-	{
-		.cmd = DEVLINK_CMD_FLASH_UPDATE,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
-		.doit = devlink_nl_cmd_flash_update,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_TRAP_SET,
-		.doit = devlink_nl_cmd_trap_set_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_TRAP_GROUP_SET,
-		.doit = devlink_nl_cmd_trap_group_set_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_TRAP_POLICER_SET,
-		.doit = devlink_nl_cmd_trap_policer_set_doit,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = DEVLINK_CMD_SELFTESTS_RUN,
-		.doit = devlink_nl_cmd_selftests_run,
-		.flags = GENL_ADMIN_PERM,
-	},
-	/* -- No new ops here! Use split ops going forward! -- */
-};
-
 void devlink_notify_register(struct devlink *devlink)
 {
 	devlink_notify(devlink, DEVLINK_CMD_NEW);
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
index 5f57afd31dea..fc3e7c029a3b 100644
--- a/net/devlink/netlink.c
+++ b/net/devlink/netlink.c
@@ -255,6 +255,257 @@ int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
 		return devlink_nl_inst_iter_dumpit(msg, cb, flags, dump_one);
 }
 
+static const struct genl_small_ops devlink_nl_small_ops[40] = {
+	{
+		.cmd = DEVLINK_CMD_PORT_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_port_set_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+	},
+	{
+		.cmd = DEVLINK_CMD_RATE_SET,
+		.doit = devlink_nl_cmd_rate_set_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_RATE_NEW,
+		.doit = devlink_nl_cmd_rate_new_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_RATE_DEL,
+		.doit = devlink_nl_cmd_rate_del_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_PORT_SPLIT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_port_split_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+	},
+	{
+		.cmd = DEVLINK_CMD_PORT_UNSPLIT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_port_unsplit_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+	},
+	{
+		.cmd = DEVLINK_CMD_PORT_NEW,
+		.doit = devlink_nl_cmd_port_new_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_PORT_DEL,
+		.doit = devlink_nl_cmd_port_del_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+	},
+
+	{
+		.cmd = DEVLINK_CMD_LINECARD_SET,
+		.doit = devlink_nl_cmd_linecard_set_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_SB_POOL_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_sb_pool_set_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_sb_port_pool_set_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+	},
+	{
+		.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+	},
+	{
+		.cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_sb_occ_snapshot_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_sb_occ_max_clear_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_ESWITCH_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_eswitch_get_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_ESWITCH_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_eswitch_set_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_dpipe_table_get,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_dpipe_entries_get,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_dpipe_headers_get,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_dpipe_table_counters_set,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_RESOURCE_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_resource_set,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_RESOURCE_DUMP,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_resource_dump,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_RELOAD,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_reload,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_PARAM_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_param_set_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_PORT_PARAM_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_port_param_get_doit,
+		.dumpit = devlink_nl_cmd_port_param_get_dumpit,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_PORT_PARAM_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_port_param_set_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+	},
+	{
+		.cmd = DEVLINK_CMD_REGION_NEW,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_region_new,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_REGION_DEL,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_region_del,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_REGION_READ,
+		.validate = GENL_DONT_VALIDATE_STRICT |
+			    GENL_DONT_VALIDATE_DUMP_STRICT,
+		.dumpit = devlink_nl_cmd_region_read_dumpit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_health_reporter_set_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_health_reporter_recover_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_health_reporter_diagnose_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT |
+			    GENL_DONT_VALIDATE_DUMP_STRICT,
+		.dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_health_reporter_test_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+	},
+	{
+		.cmd = DEVLINK_CMD_FLASH_UPDATE,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = devlink_nl_cmd_flash_update,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_TRAP_SET,
+		.doit = devlink_nl_cmd_trap_set_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_TRAP_GROUP_SET,
+		.doit = devlink_nl_cmd_trap_group_set_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_TRAP_POLICER_SET,
+		.doit = devlink_nl_cmd_trap_policer_set_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_SELFTESTS_RUN,
+		.doit = devlink_nl_cmd_selftests_run,
+		.flags = GENL_ADMIN_PERM,
+	},
+	/* -- No new ops here! Use split ops going forward! -- */
+};
+
 struct genl_family devlink_nl_family __ro_after_init = {
 	.name		= DEVLINK_GENL_NAME,
 	.version	= DEVLINK_GENL_VERSION,

From 71179ac5c21185171556bc438d5f22d566948d7f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 28 Aug 2023 08:16:57 +0200
Subject: [PATCH 2011/2047] devlink: move devlink_notify_register/unregister()
 to dev.c

At last, move the last bits out of leftover.c,
the devlink_notify_register/unregister() functions to dev.c

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20230828061657.300667-16-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/devlink/Makefile        |  2 +-
 net/devlink/dev.c           | 28 +++++++++++++++++-
 net/devlink/devl_internal.h |  6 ++--
 net/devlink/leftover.c      | 58 -------------------------------------
 4 files changed, 30 insertions(+), 64 deletions(-)
 delete mode 100644 net/devlink/leftover.c

diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index 71f490d301d7..000da622116a 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-y := leftover.o core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
+obj-y := core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
 	 resource.o param.o region.o health.o trap.o rate.o linecard.o
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index abf3393a7a17..bba4ace7d22b 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -174,7 +174,7 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
+static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
 {
 	struct sk_buff *msg;
 	int err;
@@ -230,6 +230,32 @@ int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
 	return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one);
 }
 
+void devlink_notify_register(struct devlink *devlink)
+{
+	devlink_notify(devlink, DEVLINK_CMD_NEW);
+	devlink_linecards_notify_register(devlink);
+	devlink_ports_notify_register(devlink);
+	devlink_trap_policers_notify_register(devlink);
+	devlink_trap_groups_notify_register(devlink);
+	devlink_traps_notify_register(devlink);
+	devlink_rates_notify_register(devlink);
+	devlink_regions_notify_register(devlink);
+	devlink_params_notify_register(devlink);
+}
+
+void devlink_notify_unregister(struct devlink *devlink)
+{
+	devlink_params_notify_unregister(devlink);
+	devlink_regions_notify_unregister(devlink);
+	devlink_rates_notify_unregister(devlink);
+	devlink_traps_notify_unregister(devlink);
+	devlink_trap_groups_notify_unregister(devlink);
+	devlink_trap_policers_notify_unregister(devlink);
+	devlink_ports_notify_unregister(devlink);
+	devlink_linecards_notify_unregister(devlink);
+	devlink_notify(devlink, DEVLINK_CMD_DEL);
+}
+
 static void devlink_reload_failed_set(struct devlink *devlink,
 				      bool reload_failed)
 {
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index efca6abf7af7..f6b5fea2e13c 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -124,9 +124,6 @@ typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg,
 struct devlink *
 devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
 
-void devlink_notify_unregister(struct devlink *devlink);
-void devlink_notify_register(struct devlink *devlink);
-
 int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
 		      devlink_nl_dump_one_func_t *dump_one);
 
@@ -151,7 +148,8 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
 int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info);
 
 /* Notify */
-void devlink_notify(struct devlink *devlink, enum devlink_command cmd);
+void devlink_notify_register(struct devlink *devlink);
+void devlink_notify_unregister(struct devlink *devlink);
 void devlink_ports_notify_register(struct devlink *devlink);
 void devlink_ports_notify_unregister(struct devlink *devlink);
 void devlink_params_notify_register(struct devlink *devlink);
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
deleted file mode 100644
index 05e056d6d5ea..000000000000
--- a/net/devlink/leftover.c
+++ /dev/null
@@ -1,58 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/core/devlink.c - Network physical/parent device Netlink interface
- *
- * Heavily inspired by net/wireless/
- * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
- */
-
-#include <linux/etherdevice.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/gfp.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/refcount.h>
-#include <linux/workqueue.h>
-#include <linux/u64_stats_sync.h>
-#include <linux/timekeeping.h>
-#include <rdma/ib_verbs.h>
-#include <net/netlink.h>
-#include <net/genetlink.h>
-#include <net/rtnetlink.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <net/devlink.h>
-
-#include "devl_internal.h"
-
-void devlink_notify_register(struct devlink *devlink)
-{
-	devlink_notify(devlink, DEVLINK_CMD_NEW);
-	devlink_linecards_notify_register(devlink);
-	devlink_ports_notify_register(devlink);
-	devlink_trap_policers_notify_register(devlink);
-	devlink_trap_groups_notify_register(devlink);
-	devlink_traps_notify_register(devlink);
-	devlink_rates_notify_register(devlink);
-	devlink_regions_notify_register(devlink);
-	devlink_params_notify_register(devlink);
-}
-
-void devlink_notify_unregister(struct devlink *devlink)
-{
-	devlink_params_notify_unregister(devlink);
-	devlink_regions_notify_unregister(devlink);
-	devlink_rates_notify_unregister(devlink);
-	devlink_traps_notify_unregister(devlink);
-	devlink_trap_groups_notify_unregister(devlink);
-	devlink_trap_policers_notify_unregister(devlink);
-	devlink_ports_notify_unregister(devlink);
-	devlink_linecards_notify_unregister(devlink);
-	devlink_notify(devlink, DEVLINK_CMD_DEL);
-}

From 72f93a3136ee18fd59fa6579f84c07e93424681e Mon Sep 17 00:00:00 2001
From: Antonio Napolitano <anton@polit.no>
Date: Sat, 26 Aug 2023 01:05:50 +0200
Subject: [PATCH 2012/2047] r8152: add vendor/device ID pair for D-Link
 DUB-E250

The D-Link DUB-E250 is an RTL8156 based 2.5G Ethernet controller.

Add the vendor and product ID values to the driver. This makes Ethernet
work with the adapter.

Signed-off-by: Antonio Napolitano <anton@polit.no>
Link: https://lore.kernel.org/r/CV200KJEEUPC.WPKAHXCQJ05I@mercurius
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/usb/r8152.c   | 1 +
 include/linux/usb/r8152.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index bb234cf0cea0..332c853ca99b 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -9832,6 +9832,7 @@ static const struct usb_device_id rtl8152_table[] = {
 	{ USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041) },
 	{ USB_DEVICE(VENDOR_ID_NVIDIA,  0x09ff) },
 	{ USB_DEVICE(VENDOR_ID_TPLINK,  0x0601) },
+	{ USB_DEVICE(VENDOR_ID_DLINK,   0xb301) },
 	{}
 };
 
diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h
index 20d88b1defc3..287e9d83fb8b 100644
--- a/include/linux/usb/r8152.h
+++ b/include/linux/usb/r8152.h
@@ -29,6 +29,7 @@
 #define VENDOR_ID_LINKSYS		0x13b1
 #define VENDOR_ID_NVIDIA		0x0955
 #define VENDOR_ID_TPLINK		0x2357
+#define VENDOR_ID_DLINK			0x2001
 
 #if IS_REACHABLE(CONFIG_USB_RTL8152)
 extern u8 rtl8152_get_version(struct usb_interface *intf);

From a014c35556b9045ece8426df2b38eb3c5e1c1aa0 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Sat, 26 Aug 2023 11:02:51 +0100
Subject: [PATCH 2013/2047] net: stmmac: clarify difference between "interface"
 and "phy_interface"

Clarify the difference between "interface" and "phy_interface" in
struct plat_stmmacenet_data, both by adding a comment, and also
renaming "interface" to be "mac_interface". The difference between
these are:

 MAC ----- optional PCS ----- SerDes ----- optional PHY ----- Media
       ^                               ^
 mac_interface                   phy_interface

Note that phylink currently only deals with phy_interface.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://lore.kernel.org/r/E1qZq83-005tts-6K@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/stmicro/stmmac/dwmac-imx.c   | 14 ++++++-------
 .../ethernet/stmicro/stmmac/dwmac-ingenic.c   | 20 +++++++++----------
 .../ethernet/stmicro/stmmac/dwmac-loongson.c  |  2 +-
 .../ethernet/stmicro/stmmac/dwmac-lpc18xx.c   |  4 ++--
 .../ethernet/stmicro/stmmac/dwmac-mediatek.c  |  2 +-
 .../ethernet/stmicro/stmmac/dwmac-socfpga.c   |  2 +-
 .../ethernet/stmicro/stmmac/dwmac-starfive.c  |  4 ++--
 .../net/ethernet/stmicro/stmmac/dwmac-stm32.c |  8 ++++----
 .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c |  6 +++---
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |  6 ++++--
 .../ethernet/stmicro/stmmac/stmmac_platform.c |  6 +++---
 include/linux/stmmac.h                        | 15 +++++++++++++-
 12 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index 535856fffaea..df34e34cc14f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -70,7 +70,7 @@ static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat)
 	struct imx_priv_data *dwmac = plat_dat->bsp_priv;
 	int val;
 
-	switch (plat_dat->interface) {
+	switch (plat_dat->mac_interface) {
 	case PHY_INTERFACE_MODE_MII:
 		val = GPR_ENET_QOS_INTF_SEL_MII;
 		break;
@@ -87,7 +87,7 @@ static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat)
 		break;
 	default:
 		pr_debug("imx dwmac doesn't support %d interface\n",
-			 plat_dat->interface);
+			 plat_dat->mac_interface);
 		return -EINVAL;
 	}
 
@@ -110,7 +110,7 @@ static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat)
 	struct imx_priv_data *dwmac = plat_dat->bsp_priv;
 	int val;
 
-	switch (plat_dat->interface) {
+	switch (plat_dat->mac_interface) {
 	case PHY_INTERFACE_MODE_MII:
 		val = MX93_GPR_ENET_QOS_INTF_SEL_MII;
 		break;
@@ -125,7 +125,7 @@ static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat)
 		break;
 	default:
 		dev_dbg(dwmac->dev, "imx dwmac doesn't support %d interface\n",
-			 plat_dat->interface);
+			 plat_dat->mac_interface);
 		return -EINVAL;
 	}
 
@@ -192,8 +192,8 @@ static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mod
 	plat_dat = dwmac->plat_dat;
 
 	if (dwmac->ops->mac_rgmii_txclk_auto_adj ||
-	    (plat_dat->interface == PHY_INTERFACE_MODE_RMII) ||
-	    (plat_dat->interface == PHY_INTERFACE_MODE_MII))
+	    (plat_dat->mac_interface == PHY_INTERFACE_MODE_RMII) ||
+	    (plat_dat->mac_interface == PHY_INTERFACE_MODE_MII))
 		return;
 
 	switch (speed) {
@@ -260,7 +260,7 @@ static int imx_dwmac_mx93_reset(void *priv, void __iomem *ioaddr)
 	value |= DMA_BUS_MODE_SFT_RESET;
 	writel(value, ioaddr + DMA_BUS_MODE);
 
-	if (plat_dat->interface == PHY_INTERFACE_MODE_RMII) {
+	if (plat_dat->mac_interface == PHY_INTERFACE_MODE_RMII) {
 		usleep_range(100, 200);
 		writel(RMII_RESET_SPEED, ioaddr + MAC_CTRL_REG);
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
index e22ef0d6bc73..0a20c3d24722 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
@@ -89,7 +89,7 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
 	struct ingenic_mac *mac = plat_dat->bsp_priv;
 	unsigned int val;
 
-	switch (plat_dat->interface) {
+	switch (plat_dat->mac_interface) {
 	case PHY_INTERFACE_MODE_MII:
 		val = FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT) |
 			  FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_MII);
@@ -118,7 +118,7 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
 		break;
 
 	default:
-		dev_err(mac->dev, "Unsupported interface %d", plat_dat->interface);
+		dev_err(mac->dev, "Unsupported interface %d", plat_dat->mac_interface);
 		return -EINVAL;
 	}
 
@@ -130,13 +130,13 @@ static int x1000_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
 {
 	struct ingenic_mac *mac = plat_dat->bsp_priv;
 
-	switch (plat_dat->interface) {
+	switch (plat_dat->mac_interface) {
 	case PHY_INTERFACE_MODE_RMII:
 		dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n");
 		break;
 
 	default:
-		dev_err(mac->dev, "Unsupported interface %d", plat_dat->interface);
+		dev_err(mac->dev, "Unsupported interface %d", plat_dat->mac_interface);
 		return -EINVAL;
 	}
 
@@ -149,14 +149,14 @@ static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
 	struct ingenic_mac *mac = plat_dat->bsp_priv;
 	unsigned int val;
 
-	switch (plat_dat->interface) {
+	switch (plat_dat->mac_interface) {
 	case PHY_INTERFACE_MODE_RMII:
 		val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII);
 		dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n");
 		break;
 
 	default:
-		dev_err(mac->dev, "Unsupported interface %d", plat_dat->interface);
+		dev_err(mac->dev, "Unsupported interface %d", plat_dat->mac_interface);
 		return -EINVAL;
 	}
 
@@ -169,7 +169,7 @@ static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
 	struct ingenic_mac *mac = plat_dat->bsp_priv;
 	unsigned int val;
 
-	switch (plat_dat->interface) {
+	switch (plat_dat->mac_interface) {
 	case PHY_INTERFACE_MODE_RMII:
 		val = FIELD_PREP(MACPHYC_MODE_SEL_MASK, MACPHYC_MODE_SEL_RMII) |
 			  FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII);
@@ -177,7 +177,7 @@ static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
 		break;
 
 	default:
-		dev_err(mac->dev, "Unsupported interface %d", plat_dat->interface);
+		dev_err(mac->dev, "Unsupported interface %d", plat_dat->mac_interface);
 		return -EINVAL;
 	}
 
@@ -190,7 +190,7 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
 	struct ingenic_mac *mac = plat_dat->bsp_priv;
 	unsigned int val;
 
-	switch (plat_dat->interface) {
+	switch (plat_dat->mac_interface) {
 	case PHY_INTERFACE_MODE_RMII:
 		val = FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN) |
 			  FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_ORIGIN) |
@@ -220,7 +220,7 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
 		break;
 
 	default:
-		dev_err(mac->dev, "Unsupported interface %d", plat_dat->interface);
+		dev_err(mac->dev, "Unsupported interface %d", plat_dat->mac_interface);
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
index a25c187d3185..2cd6fce5c993 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -117,7 +117,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
 	}
 
 	plat->phy_interface = phy_mode;
-	plat->interface = PHY_INTERFACE_MODE_GMII;
+	plat->mac_interface = PHY_INTERFACE_MODE_GMII;
 
 	pci_set_master(pdev);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
index 18e84ba693a6..d0aa674ce705 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
@@ -50,9 +50,9 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev)
 		goto err_remove_config_dt;
 	}
 
-	if (plat_dat->interface == PHY_INTERFACE_MODE_MII) {
+	if (plat_dat->mac_interface == PHY_INTERFACE_MODE_MII) {
 		ethmode = LPC18XX_CREG_CREG6_ETHMODE_MII;
-	} else if (plat_dat->interface == PHY_INTERFACE_MODE_RMII) {
+	} else if (plat_dat->mac_interface == PHY_INTERFACE_MODE_RMII) {
 		ethmode = LPC18XX_CREG_CREG6_ETHMODE_RMII;
 	} else {
 		dev_err(&pdev->dev, "Only MII and RMII mode supported\n");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index 7580077383c0..cd796ec04132 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -587,7 +587,7 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev,
 {
 	int i;
 
-	plat->interface = priv_plat->phy_mode;
+	plat->mac_interface = priv_plat->phy_mode;
 	if (priv_plat->mac_wol)
 		plat->flags |= STMMAC_FLAG_USE_PHY_WOL;
 	else
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index 7db176e8691f..9bf102bbc6a0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -236,7 +236,7 @@ static int socfpga_get_plat_phymode(struct socfpga_dwmac *dwmac)
 	struct net_device *ndev = dev_get_drvdata(dwmac->dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
 
-	return priv->plat->interface;
+	return priv->plat->mac_interface;
 }
 
 static void socfpga_sgmii_config(struct socfpga_dwmac *dwmac, bool enable)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
index 892612564694..9289bb87c3e3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
@@ -60,7 +60,7 @@ static int starfive_dwmac_set_mode(struct plat_stmmacenet_data *plat_dat)
 	unsigned int mode;
 	int err;
 
-	switch (plat_dat->interface) {
+	switch (plat_dat->mac_interface) {
 	case PHY_INTERFACE_MODE_RMII:
 		mode = STARFIVE_DWMAC_PHY_INFT_RMII;
 		break;
@@ -72,7 +72,7 @@ static int starfive_dwmac_set_mode(struct plat_stmmacenet_data *plat_dat)
 
 	default:
 		dev_err(dwmac->dev, "unsupported interface %d\n",
-			plat_dat->interface);
+			plat_dat->mac_interface);
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index 3a09085819dc..26ea8c687881 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -171,7 +171,7 @@ static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat)
 
 	clk_rate = clk_get_rate(dwmac->clk_eth_ck);
 	dwmac->enable_eth_ck = false;
-	switch (plat_dat->interface) {
+	switch (plat_dat->mac_interface) {
 	case PHY_INTERFACE_MODE_MII:
 		if (clk_rate == ETH_CK_F_25M && dwmac->ext_phyclk)
 			dwmac->enable_eth_ck = true;
@@ -210,7 +210,7 @@ static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat)
 		break;
 	default:
 		pr_debug("SYSCFG init :  Do not manage %d interface\n",
-			 plat_dat->interface);
+			 plat_dat->mac_interface);
 		/* Do not manage others interfaces */
 		return -EINVAL;
 	}
@@ -230,7 +230,7 @@ static int stm32mcu_set_mode(struct plat_stmmacenet_data *plat_dat)
 	u32 reg = dwmac->mode_reg;
 	int val;
 
-	switch (plat_dat->interface) {
+	switch (plat_dat->mac_interface) {
 	case PHY_INTERFACE_MODE_MII:
 		val = SYSCFG_MCU_ETH_SEL_MII;
 		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_MII\n");
@@ -241,7 +241,7 @@ static int stm32mcu_set_mode(struct plat_stmmacenet_data *plat_dat)
 		break;
 	default:
 		pr_debug("SYSCFG init :  Do not manage %d interface\n",
-			 plat_dat->interface);
+			 plat_dat->mac_interface);
 		/* Do not manage others interfaces */
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index c23420863a8d..01e77368eef1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -1016,7 +1016,7 @@ static int sun8i_dwmac_set_syscon(struct device *dev,
 	if (gmac->variant->support_rmii)
 		reg &= ~SYSCON_RMII_EN;
 
-	switch (plat->interface) {
+	switch (plat->mac_interface) {
 	case PHY_INTERFACE_MODE_MII:
 		/* default */
 		break;
@@ -1031,7 +1031,7 @@ static int sun8i_dwmac_set_syscon(struct device *dev,
 		break;
 	default:
 		dev_err(dev, "Unsupported interface mode: %s",
-			phy_modes(plat->interface));
+			phy_modes(plat->mac_interface));
 		return -EINVAL;
 	}
 
@@ -1231,7 +1231,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	/* platform data specifying hardware features and callbacks.
 	 * hardware features were copied from Allwinner drivers.
 	 */
-	plat_dat->interface = interface;
+	plat_dat->mac_interface = interface;
 	plat_dat->rx_coe = STMMAC_RX_COE_TYPE2;
 	plat_dat->tx_coe = 1;
 	plat_dat->flags |= STMMAC_FLAG_HAS_SUN8I;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 33ca5c50bdcd..9a3182b9e767 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1119,7 +1119,7 @@ static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
  */
 static void stmmac_check_pcs_mode(struct stmmac_priv *priv)
 {
-	int interface = priv->plat->interface;
+	int interface = priv->plat->mac_interface;
 
 	if (priv->dma_cap.pcs) {
 		if ((interface == PHY_INTERFACE_MODE_RGMII) ||
@@ -1214,7 +1214,9 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
 		priv->phylink_config.ovr_an_inband =
 			mdio_bus_data->xpcs_an_inband;
 
-	/* Set the platform/firmware specified interface mode */
+	/* Set the platform/firmware specified interface mode. Note, phylink
+	 * deals with the PHY interface mode, not the MAC interface mode.
+	 */
 	__set_bit(mode, priv->phylink_config.supported_interfaces);
 
 	/* If we have an xpcs, it defines which PHY interfaces are supported. */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index ff330423ee66..35f4b1484029 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -419,9 +419,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
 		return ERR_PTR(phy_mode);
 
 	plat->phy_interface = phy_mode;
-	plat->interface = stmmac_of_get_mac_mode(np);
-	if (plat->interface < 0)
-		plat->interface = plat->phy_interface;
+	plat->mac_interface = stmmac_of_get_mac_mode(np);
+	if (plat->mac_interface < 0)
+		plat->mac_interface = plat->phy_interface;
 
 	/* Some wrapper drivers still rely on phy_node. Let's save it while
 	 * they are not converted to phylink. */
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index b2ccd827bb80..ce89cc3e4913 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -223,7 +223,20 @@ struct dwmac4_addrs {
 struct plat_stmmacenet_data {
 	int bus_id;
 	int phy_addr;
-	int interface;
+	/* MAC ----- optional PCS ----- SerDes ----- optional PHY ----- Media
+	 *       ^                               ^
+	 * mac_interface                   phy_interface
+	 *
+	 * mac_interface is the MAC-side interface, which may be the same
+	 * as phy_interface if there is no intervening PCS. If there is a
+	 * PCS, then mac_interface describes the interface mode between the
+	 * MAC and PCS, and phy_interface describes the interface mode
+	 * between the PCS and PHY.
+	 */
+	phy_interface_t mac_interface;
+	/* phy_interface is the PHY-side interface - the interface used by
+	 * an attached PHY.
+	 */
 	phy_interface_t phy_interface;
 	struct stmmac_mdio_bus_data *mdio_bus_data;
 	struct device_node *phy_node;

From 042bf24ac98702e0b240034f6a8b219104095b06 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 27 Aug 2023 19:31:41 +0200
Subject: [PATCH 2014/2047] net: ethernet: mtk_wed: add some more info in
 wed_txinfo_show handler

Add some new info in Wireless Ethernet Dispatcher wed_txinfo_show
debugfs handler useful during debugging.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/3390292655d568180b73d2a25576f61aa63310e5.1693157377.git.lorenzo@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mediatek/mtk_wed_debugfs.c | 11 ++++++++++-
 drivers/net/ethernet/mediatek/mtk_wed_regs.h    |  2 ++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
index b244c02c5b51..5446d3be1c3a 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
@@ -127,8 +127,17 @@ wed_txinfo_show(struct seq_file *s, void *data)
 		DUMP_WDMA_RING(WDMA_RING_RX(0)),
 		DUMP_WDMA_RING(WDMA_RING_RX(1)),
 
-		DUMP_STR("TX FREE"),
+		DUMP_STR("WED TX FREE"),
 		DUMP_WED(WED_RX_MIB(0)),
+		DUMP_WED_RING(WED_RING_RX(0)),
+		DUMP_WED(WED_WPDMA_RX_COHERENT_MIB(0)),
+		DUMP_WED(WED_RX_MIB(1)),
+		DUMP_WED_RING(WED_RING_RX(1)),
+		DUMP_WED(WED_WPDMA_RX_COHERENT_MIB(1)),
+
+		DUMP_STR("WED WPDMA TX FREE"),
+		DUMP_WED_RING(WED_WPDMA_RING_RX(0)),
+		DUMP_WED_RING(WED_WPDMA_RING_RX(1)),
 	};
 	struct mtk_wed_hw *hw = s->private;
 	struct mtk_wed_device *dev = hw->wed_dev;
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
index 0a50bb98c5ea..47ea69feb3b2 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_regs.h
+++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
@@ -266,6 +266,8 @@ struct mtk_wdma_desc {
 
 #define MTK_WED_WPDMA_TX_MIB(_n)			(0x5a0 + (_n) * 4)
 #define MTK_WED_WPDMA_TX_COHERENT_MIB(_n)		(0x5d0 + (_n) * 4)
+#define MTK_WED_WPDMA_RX_MIB(_n)			(0x5e0 + (_n) * 4)
+#define MTK_WED_WPDMA_RX_COHERENT_MIB(_n)		(0x5f0 + (_n) * 4)
 
 #define MTK_WED_WPDMA_RING_TX(_n)			(0x600 + (_n) * 0x10)
 #define MTK_WED_WPDMA_RING_RX(_n)			(0x700 + (_n) * 0x10)

From 6c9cfb853063f317b2953c5e852b6bac1eb0cade Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 27 Aug 2023 19:33:47 +0200
Subject: [PATCH 2015/2047] net: ethernet: mtk_wed: minor change in
 wed_{tx,rx}info_show

No functional changes, just cosmetic ones.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/71e046c72a978745f0435af265dda610aa9bfbcf.1693157578.git.lorenzo@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mediatek/mtk_wed_debugfs.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
index 5446d3be1c3a..e24afeaea0da 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
@@ -84,7 +84,6 @@ dump_wed_regs(struct seq_file *s, struct mtk_wed_device *dev,
 	}
 }
 
-
 static int
 wed_txinfo_show(struct seq_file *s, void *data)
 {
@@ -142,10 +141,8 @@ wed_txinfo_show(struct seq_file *s, void *data)
 	struct mtk_wed_hw *hw = s->private;
 	struct mtk_wed_device *dev = hw->wed_dev;
 
-	if (!dev)
-		return 0;
-
-	dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs));
+	if (dev)
+		dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs));
 
 	return 0;
 }
@@ -217,10 +214,8 @@ wed_rxinfo_show(struct seq_file *s, void *data)
 	struct mtk_wed_hw *hw = s->private;
 	struct mtk_wed_device *dev = hw->wed_dev;
 
-	if (!dev)
-		return 0;
-
-	dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs));
+	if (dev)
+		dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs));
 
 	return 0;
 }

From 195273147e520844c1aae9fbf85cb6eb0bc0fdd7 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 28 Aug 2023 15:16:11 +0200
Subject: [PATCH 2016/2047] wifi: mt76: fix lock dependency problem for
 wed_lock

Fix the following kernel depency lock holding wed_lock with BH disabled.

[   40.579696] mt798x-wmac 18000000.wifi: attaching wed device 0 version 2
[   40.604648] platform 15010000.wed: MTK WED WO Firmware Version: DEV_000000, Build Time: 20221208202138
[   40.613972] platform 15010000.wed: MTK WED WO Chip ID 00 Region 3
[   40.943617]
[   40.945118] ========================================================
[   40.951457] WARNING: possible irq lock inversion dependency detected
[   40.957797] 5.15.127 #0 Not tainted
[   40.961276] --------------------------------------------------------
[   40.967614] insmod/2329 just changed the state of lock:
[   40.972827] ffffff8004003b08 (&dev->wed_lock){+.+.}-{2:2}, at: mt76_get_rxwi+0x1c/0xac [mt76]
[   40.981387] but this lock was taken by another, SOFTIRQ-safe lock in the past:
[   40.988592]  (&q->lock){+.-.}-{2:2}
[   40.988602]
[   40.988602]
[   40.988602] and interrupts could create inverse lock ordering between them.
[   40.988602]
[   41.003445]
[   41.003445] other info that might help us debug this:
[   41.009957]  Possible interrupt unsafe locking scenario:
[   41.009957]
[   41.016729]        CPU0                    CPU1
[   41.021245]        ----                    ----
[   41.025761]   lock(&dev->wed_lock);
[   41.029241]                                local_irq_disable();
[   41.035145]                                lock(&q->lock);
[   41.040620]                                lock(&dev->wed_lock);
[   41.046616]   <Interrupt>
[   41.049223]     lock(&q->lock);
[   41.052356]
[   41.052356]  *** DEADLOCK ***
[   41.052356]
[   41.058260] 1 lock held by insmod/2329:
[   41.062085]  #0: ffffff80003b9988 (&dev->mutex){....}-{3:3}, at: __driver_attach+0x88/0x190
[   41.070442]
[   41.070442] the shortest dependencies between 2nd lock and 1st lock:
[   41.078257]  -> (&q->lock){+.-.}-{2:2} {
[   41.082177]     HARDIRQ-ON-W at:
[   41.085396]                       lock_acquire+0xfc/0x2c0
[   41.090787]                       _raw_spin_lock_bh+0x84/0xa0
[   41.096525]                       mt76_dma_cleanup+0x24c/0x650 [mt76]
[   41.102977]                       mt76_dma_cleanup+0x614/0x650 [mt76]
[   41.109428]                       mt7915_eeprom_get_power_delta+0x1168/0x2464 [mt7915e]
[   41.117435]                       mt7915_eeprom_init+0x40/0x340 [mt7915e]
[   41.124222]                       cleanup_module+0x94/0xb28 [mt7915e]
[   41.130662]                       platform_probe+0x64/0xbc
[   41.136139]                       really_probe.part.0+0x98/0x2f4
[   41.142134]                       __driver_probe_device+0x94/0x16c
[   41.148303]                       driver_probe_device+0x40/0x120
[   41.154299]                       __driver_attach+0x94/0x190
[   41.159947]                       bus_for_each_dev+0x5c/0x94
[   41.165594]                       driver_attach+0x20/0x30
[   41.170983]                       bus_add_driver+0x104/0x1f4
[   41.176631]                       driver_register+0x74/0x120
[   41.182280]                       __platform_driver_register+0x24/0x30
[   41.188797]                       0xffffffc000cb1074
[   41.193754]                       do_one_initcall+0x70/0x2cc
[   41.199403]                       do_init_module+0x44/0x240
[   41.204968]                       load_module+0x1f5c/0x2874
[   41.210532]                       __do_sys_init_module+0x1d8/0x2ac
[   41.216702]                       __arm64_sys_init_module+0x18/0x20
[   41.222958]                       invoke_syscall.constprop.0+0x4c/0xe0
[   41.229474]                       do_el0_svc+0x50/0xf0
[   41.234602]                       el0_svc+0x4c/0xcc
[   41.239471]                       el0t_64_sync_handler+0xe0/0x110
[   41.245556]                       el0t_64_sync+0x15c/0x160
[   41.251029]     IN-SOFTIRQ-W at:
[   41.254249]                       lock_acquire+0xfc/0x2c0
[   41.259638]                       _raw_spin_lock_bh+0x84/0xa0
[   41.265372]                       mt76_queue_tx_complete+0x34/0x70 [mt76]
[   41.272170]                       mt76_free_pending_rxwi+0x36c/0x5d0 [mt76]
[   41.279140]                       mt76_free_pending_rxwi+0x5c0/0x5d0 [mt76]
[   41.286111]                       mt7915_eeprom_get_power_delta+0x620/0x2464 [mt7915e]
[   41.294026]                       __napi_poll.constprop.0+0x5c/0x230
[   41.300372]                       net_rx_action+0xe4/0x294
[   41.305847]                       _stext+0x154/0x4cc
[   41.310801]                       do_softirq+0xa4/0xbc
[   41.315930]                       __local_bh_enable_ip+0x168/0x174
[   41.322097]                       napi_threaded_poll+0xbc/0x140
[   41.328007]                       kthread+0x13c/0x150
[   41.333049]                       ret_from_fork+0x10/0x20
[   41.338437]     INITIAL USE at:
[   41.341568]                      lock_acquire+0xfc/0x2c0
[   41.346869]                      _raw_spin_lock_bh+0x84/0xa0
[   41.352519]                      mt76_dma_cleanup+0x24c/0x650 [mt76]
[   41.358882]                      mt76_dma_cleanup+0x614/0x650 [mt76]
[   41.365245]                      mt7915_eeprom_get_power_delta+0x1168/0x2464 [mt7915e]
[   41.373160]                      mt7915_eeprom_init+0x40/0x340 [mt7915e]
[   41.379860]                      cleanup_module+0x94/0xb28 [mt7915e]
[   41.386213]                      platform_probe+0x64/0xbc
[   41.391602]                      really_probe.part.0+0x98/0x2f4
[   41.397511]                      __driver_probe_device+0x94/0x16c
[   41.403594]                      driver_probe_device+0x40/0x120
[   41.409502]                      __driver_attach+0x94/0x190
[   41.415063]                      bus_for_each_dev+0x5c/0x94
[   41.420625]                      driver_attach+0x20/0x30
[   41.425926]                      bus_add_driver+0x104/0x1f4
[   41.431487]                      driver_register+0x74/0x120
[   41.437049]                      __platform_driver_register+0x24/0x30
[   41.443479]                      0xffffffc000cb1074
[   41.448346]                      do_one_initcall+0x70/0x2cc
[   41.453907]                      do_init_module+0x44/0x240
[   41.459383]                      load_module+0x1f5c/0x2874
[   41.464860]                      __do_sys_init_module+0x1d8/0x2ac
[   41.470944]                      __arm64_sys_init_module+0x18/0x20
[   41.477113]                      invoke_syscall.constprop.0+0x4c/0xe0
[   41.483542]                      do_el0_svc+0x50/0xf0
[   41.488582]                      el0_svc+0x4c/0xcc
[   41.493364]                      el0t_64_sync_handler+0xe0/0x110
[   41.499361]                      el0t_64_sync+0x15c/0x160
[   41.504748]   }
[   41.506489]   ... key      at: [<ffffffc000c65ba0>] __this_module+0x3e0/0xffffffffffffa840 [mt76]
[   41.515371]   ... acquired at:
[   41.518413]    _raw_spin_lock+0x60/0x74
[   41.522240]    mt76_get_rxwi+0x1c/0xac [mt76]
[   41.526608]    mt76_dma_cleanup+0x3e0/0x650 [mt76]
[   41.531410]    mt76_dma_cleanup+0x614/0x650 [mt76]
[   41.536211]    mt7915_dma_init+0x408/0x7b0 [mt7915e]
[   41.541177]    mt7915_register_device+0x310/0x620 [mt7915e]
[   41.546749]    mt7915_mmio_probe+0xcec/0x1d44 [mt7915e]
[   41.551973]    platform_probe+0x64/0xbc
[   41.555802]    really_probe.part.0+0x98/0x2f4
[   41.560149]    __driver_probe_device+0x94/0x16c
[   41.564670]    driver_probe_device+0x40/0x120
[   41.569017]    __driver_attach+0x94/0x190
[   41.573019]    bus_for_each_dev+0x5c/0x94
[   41.577018]    driver_attach+0x20/0x30
[   41.580758]    bus_add_driver+0x104/0x1f4
[   41.584758]    driver_register+0x74/0x120
[   41.588759]    __platform_driver_register+0x24/0x30
[   41.593628]    init_module+0x74/0x1000 [mt7915e]
[   41.598248]    do_one_initcall+0x70/0x2cc
[   41.602248]    do_init_module+0x44/0x240
[   41.606162]    load_module+0x1f5c/0x2874
[   41.610078]    __do_sys_init_module+0x1d8/0x2ac
[   41.614600]    __arm64_sys_init_module+0x18/0x20
[   41.619209]    invoke_syscall.constprop.0+0x4c/0xe0
[   41.624076]    do_el0_svc+0x50/0xf0
[   41.627555]    el0_svc+0x4c/0xcc
[   41.630776]    el0t_64_sync_handler+0xe0/0x110
[   41.635211]    el0t_64_sync+0x15c/0x160
[   41.639037]
[   41.640517] -> (&dev->wed_lock){+.+.}-{2:2} {
[   41.644872]    HARDIRQ-ON-W at:
[   41.648003]                     lock_acquire+0xfc/0x2c0
[   41.653219]                     _raw_spin_lock+0x60/0x74
[   41.658520]                     mt76_free_pending_rxwi+0xc0/0x5d0 [mt76]
[   41.665232]                     mt76_dma_cleanup+0x1dc/0x650 [mt76]
[   41.671508]                     mt7915_eeprom_get_power_delta+0x1830/0x2464 [mt7915e]
[   41.679336]                     mt7915_unregister_device+0x5b4/0x910 [mt7915e]
[   41.686555]                     mt7915_eeprom_get_target_power+0xb8/0x230 [mt7915e]
[   41.694209]                     mt7986_wmac_enable+0xc30/0xcd0 [mt7915e]
[   41.700909]                     platform_remove+0x4c/0x64
[   41.706298]                     __device_release_driver+0x194/0x240
[   41.712554]                     driver_detach+0xc0/0x100
[   41.717857]                     bus_remove_driver+0x54/0xac
[   41.723418]                     driver_unregister+0x2c/0x54
[   41.728980]                     platform_driver_unregister+0x10/0x20
[   41.735323]                     mt7915_ops+0x244/0xffffffffffffed58 [mt7915e]
[   41.742457]                     __arm64_sys_delete_module+0x170/0x23c
[   41.748887]                     invoke_syscall.constprop.0+0x4c/0xe0
[   41.755229]                     do_el0_svc+0x50/0xf0
[   41.760183]                     el0_svc+0x4c/0xcc
[   41.764878]                     el0t_64_sync_handler+0xe0/0x110
[   41.770788]                     el0t_64_sync+0x15c/0x160
[   41.776088]    SOFTIRQ-ON-W at:
[   41.779220]                     lock_acquire+0xfc/0x2c0
[   41.784435]                     _raw_spin_lock+0x60/0x74
[   41.789737]                     mt76_get_rxwi+0x1c/0xac [mt76]
[   41.795580]                     mt7915_debugfs_rx_log+0x804/0xb74 [mt7915e]
[   41.802540]                     mtk_wed_start+0x970/0xaa0
[   41.807929]                     mt7915_dma_start+0x26c/0x630 [mt7915e]
[   41.814455]                     mt7915_dma_start+0x5a4/0x630 [mt7915e]
[   41.820981]                     mt7915_dma_init+0x45c/0x7b0 [mt7915e]
[   41.827420]                     mt7915_register_device+0x310/0x620 [mt7915e]
[   41.834467]                     mt7915_mmio_probe+0xcec/0x1d44 [mt7915e]
[   41.841167]                     platform_probe+0x64/0xbc
[   41.846469]                     really_probe.part.0+0x98/0x2f4
[   41.852291]                     __driver_probe_device+0x94/0x16c
[   41.858286]                     driver_probe_device+0x40/0x120
[   41.864107]                     __driver_attach+0x94/0x190
[   41.869582]                     bus_for_each_dev+0x5c/0x94
[   41.875056]                     driver_attach+0x20/0x30
[   41.880270]                     bus_add_driver+0x104/0x1f4
[   41.885745]                     driver_register+0x74/0x120
[   41.891221]                     __platform_driver_register+0x24/0x30
[   41.897564]                     init_module+0x74/0x1000 [mt7915e]
[   41.903657]                     do_one_initcall+0x70/0x2cc
[   41.909130]                     do_init_module+0x44/0x240
[   41.914520]                     load_module+0x1f5c/0x2874
[   41.919909]                     __do_sys_init_module+0x1d8/0x2ac
[   41.925905]                     __arm64_sys_init_module+0x18/0x20
[   41.931989]                     invoke_syscall.constprop.0+0x4c/0xe0
[   41.938331]                     do_el0_svc+0x50/0xf0
[   41.943285]                     el0_svc+0x4c/0xcc
[   41.947981]                     el0t_64_sync_handler+0xe0/0x110
[   41.953892]                     el0t_64_sync+0x15c/0x160
[   41.959192]    INITIAL USE at:
[   41.962238]                    lock_acquire+0xfc/0x2c0
[   41.967365]                    _raw_spin_lock+0x60/0x74
[   41.972580]                    mt76_free_pending_rxwi+0xc0/0x5d0 [mt76]
[   41.979206]                    mt76_dma_cleanup+0x1dc/0x650 [mt76]
[   41.985395]                    mt7915_eeprom_get_power_delta+0x1830/0x2464 [mt7915e]
[   41.993137]                    mt7915_unregister_device+0x5b4/0x910 [mt7915e]
[   42.000270]                    mt7915_eeprom_get_target_power+0xb8/0x230 [mt7915e]
[   42.007837]                    mt7986_wmac_enable+0xc30/0xcd0 [mt7915e]
[   42.014450]                    platform_remove+0x4c/0x64
[   42.019753]                    __device_release_driver+0x194/0x240
[   42.025922]                    driver_detach+0xc0/0x100
[   42.031137]                    bus_remove_driver+0x54/0xac
[   42.036612]                    driver_unregister+0x2c/0x54
[   42.042087]                    platform_driver_unregister+0x10/0x20
[   42.048344]                    mt7915_ops+0x244/0xffffffffffffed58 [mt7915e]
[   42.055391]                    __arm64_sys_delete_module+0x170/0x23c
[   42.061735]                    invoke_syscall.constprop.0+0x4c/0xe0
[   42.067990]                    do_el0_svc+0x50/0xf0
[   42.072857]                    el0_svc+0x4c/0xcc
[   42.077466]                    el0t_64_sync_handler+0xe0/0x110
[   42.083289]                    el0t_64_sync+0x15c/0x160
[   42.088503]  }
[   42.090157]  ... key      at: [<ffffffc000c65c10>] __this_module+0x450/0xffffffffffffa840 [mt76]
[   42.098951]  ... acquired at:
[   42.101907]    __lock_acquire+0x718/0x1df0
[   42.105994]    lock_acquire+0xfc/0x2c0
[   42.109734]    _raw_spin_lock+0x60/0x74
[   42.113561]    mt76_get_rxwi+0x1c/0xac [mt76]
[   42.117929]    mt7915_debugfs_rx_log+0x804/0xb74 [mt7915e]
[   42.123415]    mtk_wed_start+0x970/0xaa0
[   42.127328]    mt7915_dma_start+0x26c/0x630 [mt7915e]
[   42.132379]    mt7915_dma_start+0x5a4/0x630 [mt7915e]
[   42.137430]    mt7915_dma_init+0x45c/0x7b0 [mt7915e]
[   42.142395]    mt7915_register_device+0x310/0x620 [mt7915e]
[   42.147967]    mt7915_mmio_probe+0xcec/0x1d44 [mt7915e]
[   42.153192]    platform_probe+0x64/0xbc
[   42.157019]    really_probe.part.0+0x98/0x2f4
[   42.161367]    __driver_probe_device+0x94/0x16c
[   42.165887]    driver_probe_device+0x40/0x120
[   42.170234]    __driver_attach+0x94/0x190
[   42.174235]    bus_for_each_dev+0x5c/0x94
[   42.178235]    driver_attach+0x20/0x30
[   42.181974]    bus_add_driver+0x104/0x1f4
[   42.185974]    driver_register+0x74/0x120
[   42.189974]    __platform_driver_register+0x24/0x30
[   42.194842]    init_module+0x74/0x1000 [mt7915e]
[   42.199460]    do_one_initcall+0x70/0x2cc
[   42.203460]    do_init_module+0x44/0x240
[   42.207376]    load_module+0x1f5c/0x2874
[   42.211290]    __do_sys_init_module+0x1d8/0x2ac
[   42.215813]    __arm64_sys_init_module+0x18/0x20
[   42.220421]    invoke_syscall.constprop.0+0x4c/0xe0
[   42.225288]    do_el0_svc+0x50/0xf0
[   42.228768]    el0_svc+0x4c/0xcc
[   42.231989]    el0t_64_sync_handler+0xe0/0x110
[   42.236424]    el0t_64_sync+0x15c/0x160
[   42.240249]
[   42.241730]
[   42.241730] stack backtrace:
[   42.246074] CPU: 1 PID: 2329 Comm: insmod Not tainted 5.15.127 #0
[   42.252157] Hardware name: GainStrong Oolite-MT7981B V1 Dev Board (NAND boot) (DT)
[   42.259712] Call trace:
[   42.262147]  dump_backtrace+0x0/0x174
[   42.265802]  show_stack+0x14/0x20
[   42.269108]  dump_stack_lvl+0x84/0xac
[   42.272761]  dump_stack+0x14/0x2c
[   42.276066]  print_irq_inversion_bug.part.0+0x1b0/0x1c4
[   42.281285]  mark_lock+0x8b8/0x8bc
[   42.284678]  __lock_acquire+0x718/0x1df0
[   42.288592]  lock_acquire+0xfc/0x2c0
[   42.292158]  _raw_spin_lock+0x60/0x74
[   42.295811]  mt76_get_rxwi+0x1c/0xac [mt76]
[   42.300008]  mt7915_debugfs_rx_log+0x804/0xb74 [mt7915e]
[   42.305320]  mtk_wed_start+0x970/0xaa0
[   42.309059]  mt7915_dma_start+0x26c/0x630 [mt7915e]
[   42.313937]  mt7915_dma_start+0x5a4/0x630 [mt7915e]
[   42.318815]  mt7915_dma_init+0x45c/0x7b0 [mt7915e]
[   42.323606]  mt7915_register_device+0x310/0x620 [mt7915e]
[   42.329005]  mt7915_mmio_probe+0xcec/0x1d44 [mt7915e]
[   42.334056]  platform_probe+0x64/0xbc
[   42.337711]  really_probe.part.0+0x98/0x2f4
[   42.341885]  __driver_probe_device+0x94/0x16c
[   42.346232]  driver_probe_device+0x40/0x120
[   42.350407]  __driver_attach+0x94/0x190
[   42.354234]  bus_for_each_dev+0x5c/0x94
[   42.358061]  driver_attach+0x20/0x30
[   42.361627]  bus_add_driver+0x104/0x1f4
[   42.365454]  driver_register+0x74/0x120
[   42.369282]  __platform_driver_register+0x24/0x30
[   42.373977]  init_module+0x74/0x1000 [mt7915e]
[   42.378423]  do_one_initcall+0x70/0x2cc
[   42.382249]  do_init_module+0x44/0x240
[   42.385990]  load_module+0x1f5c/0x2874
[   42.389733]  __do_sys_init_module+0x1d8/0x2ac
[   42.394082]  __arm64_sys_init_module+0x18/0x20
[   42.398518]  invoke_syscall.constprop.0+0x4c/0xe0
[   42.403211]  do_el0_svc+0x50/0xf0
[   42.406517]  el0_svc+0x4c/0xcc
[   42.409565]  el0t_64_sync_handler+0xe0/0x110
[   42.413827]  el0t_64_sync+0x15c/0x160
[   42.674858] mt798x-wmac 18000000.wifi: HW/SW Version: 0x8a108a10, Build Time: 20221208201745a
[   42.674858]
[   42.692078] mt798x-wmac 18000000.wifi: WM Firmware Version: ____000000, Build Time: 20221208201806
[   42.735606] mt798x-wmac 18000000.wifi: WA Firmware Version: DEV_000000, Build Time: 20221208202048

Tested-by: Daniel Golle <daniel@makrotopia.org>
Fixes: 2666bece0905 ("wifi: mt76: introduce rxwi and rx token utility routines")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Acked-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/ee80be41c2a8d8749d83c6950a272a5e77aadd45.1693228333.git.lorenzo@kernel.org
---
 drivers/net/wireless/mediatek/mt76/dma.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 05d9ab3ce819..dc8f4e157eb2 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -93,13 +93,13 @@ __mt76_get_rxwi(struct mt76_dev *dev)
 {
 	struct mt76_txwi_cache *t = NULL;
 
-	spin_lock(&dev->wed_lock);
+	spin_lock_bh(&dev->wed_lock);
 	if (!list_empty(&dev->rxwi_cache)) {
 		t = list_first_entry(&dev->rxwi_cache, struct mt76_txwi_cache,
 				     list);
 		list_del(&t->list);
 	}
-	spin_unlock(&dev->wed_lock);
+	spin_unlock_bh(&dev->wed_lock);
 
 	return t;
 }
@@ -145,9 +145,9 @@ mt76_put_rxwi(struct mt76_dev *dev, struct mt76_txwi_cache *t)
 	if (!t)
 		return;
 
-	spin_lock(&dev->wed_lock);
+	spin_lock_bh(&dev->wed_lock);
 	list_add(&t->list, &dev->rxwi_cache);
-	spin_unlock(&dev->wed_lock);
+	spin_unlock_bh(&dev->wed_lock);
 }
 EXPORT_SYMBOL_GPL(mt76_put_rxwi);
 

From 84727c5727fed0ea8b0bf5ef0cbffdb0cabb2539 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@kernel.org>
Date: Mon, 28 Aug 2023 15:35:59 +0300
Subject: [PATCH 2017/2047] MAINTAINERS: wifi: remove generic wiki links from
 drivers

The top level entry for wireless drivers already contains to the wiki so no
need to duplicate that in driver entries:

NETWORKING DRIVERS (WIRELESS)
[...]
W:	https://wireless.wiki.kernel.org/

Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230828123603.87621-1-kvalo@kernel.org
---
 MAINTAINERS | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 612d6d1dbf36..8a1f16870604 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -470,7 +470,6 @@ F:	drivers/hwmon/adm1029.c
 ADM8211 WIRELESS DRIVER
 L:	linux-wireless@vger.kernel.org
 S:	Orphan
-W:	https://wireless.wiki.kernel.org/
 F:	drivers/net/wireless/admtek/adm8211.*
 
 ADP1653 FLASH CONTROLLER DRIVER
@@ -18062,7 +18061,6 @@ REALTEK WIRELESS DRIVER (rtlwifi family)
 M:	Ping-Ke Shih <pkshih@realtek.com>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained
-W:	https://wireless.wiki.kernel.org/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
 F:	drivers/net/wireless/realtek/rtlwifi/
 
@@ -18591,7 +18589,6 @@ F:	drivers/media/dvb-frontends/rtl2832_sdr*
 RTL8180 WIRELESS DRIVER
 L:	linux-wireless@vger.kernel.org
 S:	Orphan
-W:	https://wireless.wiki.kernel.org/
 F:	drivers/net/wireless/realtek/rtl818x/rtl8180/
 
 RTL8187 WIRELESS DRIVER
@@ -18599,7 +18596,6 @@ M:	Hin-Tak Leung <hintak.leung@gmail.com>
 M:	Larry Finger <Larry.Finger@lwfinger.net>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained
-W:	https://wireless.wiki.kernel.org/
 F:	drivers/net/wireless/realtek/rtl818x/rtl8187/
 
 RTL8XXXU WIRELESS DRIVER (rtl8xxxu)

From 0b9480da79e070481bd4c352ad555abf9ec7598f Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@kernel.org>
Date: Mon, 28 Aug 2023 15:36:00 +0300
Subject: [PATCH 2018/2047] MAINTAINERS: wifi: rtlwifi: remove git tree

Linville's tree hasn't been used for something like 10 years so remove it.

Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230828123603.87621-2-kvalo@kernel.org
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8a1f16870604..5074df0b1861 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18061,7 +18061,6 @@ REALTEK WIRELESS DRIVER (rtlwifi family)
 M:	Ping-Ke Shih <pkshih@realtek.com>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
 F:	drivers/net/wireless/realtek/rtlwifi/
 
 REALTEK WIRELESS DRIVER (rtw88)

From 42c5f0e20ec9b0371fbdc6de69a73a932d72a0ef Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@kernel.org>
Date: Mon, 28 Aug 2023 15:36:01 +0300
Subject: [PATCH 2019/2047] MAINTAINERS: wifi: rtl8xxxu: remove git tree

Jes' tree hasn't been used for six years so remove it.

Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230828123603.87621-3-kvalo@kernel.org
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5074df0b1861..386ec00eff63 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18601,7 +18601,6 @@ RTL8XXXU WIRELESS DRIVER (rtl8xxxu)
 M:	Jes Sorensen <Jes.Sorensen@gmail.com>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jes/linux.git rtl8xxxu-devel
 F:	drivers/net/wireless/realtek/rtl8xxxu/
 
 RTRS TRANSPORT DRIVERS

From b8c713c13482a37e55379b54e9d4535d50bc0577 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@kernel.org>
Date: Mon, 28 Aug 2023 15:36:02 +0300
Subject: [PATCH 2020/2047] MAINTAINERS: wifi: wl12xx: remove git tree

Luca's tree hasn't been used for 10 years so remove it.

Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230828123603.87621-4-kvalo@kernel.org
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 386ec00eff63..f7c7f50ca055 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21512,7 +21512,6 @@ L:	linux-wireless@vger.kernel.org
 S:	Orphan
 W:	https://wireless.wiki.kernel.org/en/users/Drivers/wl12xx
 W:	https://wireless.wiki.kernel.org/en/users/Drivers/wl1251
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/luca/wl12xx.git
 F:	drivers/net/wireless/ti/
 
 TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER

From d253fb3705b37fd4afd05eb615310be4e9c3042b Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@kernel.org>
Date: Mon, 28 Aug 2023 15:36:03 +0300
Subject: [PATCH 2021/2047] MAINTAINERS: wifi: hostap: remove maintainer and
 web page

As hostap is marked as obsolete there's no need to have a maintainer. Also
remove the link to the web page.

Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230828123603.87621-5-kvalo@kernel.org
---
 MAINTAINERS | 2 --
 1 file changed, 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index f7c7f50ca055..b52a7def4063 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9502,10 +9502,8 @@ F:	Documentation/devicetree/bindings/iio/pressure/honeywell,mprls0025pa.yaml
 F:	drivers/iio/pressure/mprls0025pa.c
 
 HOST AP DRIVER
-M:	Jouni Malinen <j@w1.fi>
 L:	linux-wireless@vger.kernel.org
 S:	Obsolete
-W:	http://w1.fi/hostap-driver.html
 F:	drivers/net/wireless/intersil/hostap/
 
 HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER

From af5ff4b789956e9ef43e0bb80429f24ec44b2063 Mon Sep 17 00:00:00 2001
From: Jeff Johnson <quic_jjohnson@quicinc.com>
Date: Tue, 29 Aug 2023 08:03:33 -0700
Subject: [PATCH 2022/2047] MAINTAINERS: wifi: ath12k: add wiki link

The ath12k wireless driver now has a wiki, so advertise it.

Signed-off-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230829-ath12kwiki-v1-1-df37127527a1@quicinc.com
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index b52a7def4063..d8927aadd094 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17528,6 +17528,7 @@ M:	Kalle Valo <kvalo@kernel.org>
 M:	Jeff Johnson <quic_jjohnson@quicinc.com>
 L:	ath12k@lists.infradead.org
 S:	Supported
+W:	https://wireless.wiki.kernel.org/en/users/Drivers/ath12k
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
 F:	drivers/net/wireless/ath/ath12k/
 

From eec679e4ac5f47507774956fb3479c206e761af7 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 24 Aug 2023 21:06:51 -0600
Subject: [PATCH 2023/2047] wifi: mwifiex: Fix tlv_buf_left calculation

In a TLV encoding scheme, the Length part represents the length after
the header containing the values for type and length. In this case,
`tlv_len` should be:

tlv_len == (sizeof(*tlv_rxba) - 1) - sizeof(tlv_rxba->header) + tlv_bitmap_len

Notice that the `- 1` accounts for the one-element array `bitmap`, which
1-byte size is already included in `sizeof(*tlv_rxba)`.

So, if the above is correct, there is a double-counting of some members
in `struct mwifiex_ie_types_rxba_sync`, when `tlv_buf_left` and `tmp`
are calculated:

968                 tlv_buf_left -= (sizeof(*tlv_rxba) + tlv_len);
969                 tmp = (u8 *)tlv_rxba + tlv_len + sizeof(*tlv_rxba);

in specific, members:

drivers/net/wireless/marvell/mwifiex/fw.h:777
 777         u8 mac[ETH_ALEN];
 778         u8 tid;
 779         u8 reserved;
 780         __le16 seq_num;
 781         __le16 bitmap_len;

This is clearly wrong, and affects the subsequent decoding of data in
`event_buf` through `tlv_rxba`:

970                 tlv_rxba = (struct mwifiex_ie_types_rxba_sync *)tmp;

Fix this by using `sizeof(tlv_rxba->header)` instead of `sizeof(*tlv_rxba)`
in the calculation of `tlv_buf_left` and `tmp`.

This results in the following binary differences before/after changes:

| drivers/net/wireless/marvell/mwifiex/11n_rxreorder.o
| @@ -4698,11 +4698,11 @@
|  drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c:968
|                 tlv_buf_left -= (sizeof(tlv_rxba->header) + tlv_len);
| -    1da7:      lea    -0x11(%rbx),%edx
| +    1da7:      lea    -0x4(%rbx),%edx
|      1daa:      movzwl %bp,%eax
|  drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c:969
|                 tmp = (u8 *)tlv_rxba  + sizeof(tlv_rxba->header) + tlv_len;
| -    1dad:      lea    0x11(%r15,%rbp,1),%r15
| +    1dad:      lea    0x4(%r15,%rbp,1),%r15

The above reflects the desired change: avoid counting 13 too many bytes;
which is the total size of the double-counted members in
`struct mwifiex_ie_types_rxba_sync`:

$ pahole -C mwifiex_ie_types_rxba_sync drivers/net/wireless/marvell/mwifiex/11n_rxreorder.o
struct mwifiex_ie_types_rxba_sync {
	struct mwifiex_ie_types_header header;           /*     0     4 */

     |-----------------------------------------------------------------------
     |  u8                         mac[6];               /*     4     6 */  |
     |	u8                         tid;                  /*    10     1 */  |
     |  u8                         reserved;             /*    11     1 */  |
     | 	__le16                     seq_num;              /*    12     2 */  |
     | 	__le16                     bitmap_len;           /*    14     2 */  |
     |  u8                         bitmap[1];            /*    16     1 */  |
     |----------------------------------------------------------------------|
								  | 13 bytes|
								  -----------

	/* size: 17, cachelines: 1, members: 7 */
	/* last cacheline: 17 bytes */
} __attribute__((__packed__));

Fixes: 99ffe72cdae4 ("mwifiex: process rxba_sync event")
Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/06668edd68e7a26bbfeebd1201ae077a2a7a8bce.1692931954.git.gustavoars@kernel.org
---
 drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
index 391793a16adc..d1d3632a3ed7 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
@@ -965,8 +965,8 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv,
 			}
 		}
 
-		tlv_buf_left -= (sizeof(*tlv_rxba) + tlv_len);
-		tmp = (u8 *)tlv_rxba + tlv_len + sizeof(*tlv_rxba);
+		tlv_buf_left -= (sizeof(tlv_rxba->header) + tlv_len);
+		tmp = (u8 *)tlv_rxba  + sizeof(tlv_rxba->header) + tlv_len;
 		tlv_rxba = (struct mwifiex_ie_types_rxba_sync *)tmp;
 	}
 }

From c7847241de28c718285d0e1bd97d1061a4a806c8 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 24 Aug 2023 21:07:43 -0600
Subject: [PATCH 2024/2047] wifi: mwifiex: Replace one-element array with
 flexible-array member in struct mwifiex_ie_types_rxba_sync

One-element and zero-length arrays are deprecated. So, replace
one-element array in struct mwifiex_ie_types_rxba_sync with
flexible-array member, and refactor the rest of the code, accordingly.

This results in no differences in binary output.

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/79c801c69c8beece2f80502c60166036d3c047cc.1692931954.git.gustavoars@kernel.org
---
 drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c | 2 +-
 drivers/net/wireless/marvell/mwifiex/fw.h            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
index d1d3632a3ed7..735aac52bdc4 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
@@ -918,7 +918,7 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv,
 
 	mwifiex_dbg_dump(priv->adapter, EVT_D, "RXBA_SYNC event:",
 			 event_buf, len);
-	while (tlv_buf_left >= sizeof(*tlv_rxba)) {
+	while (tlv_buf_left > sizeof(*tlv_rxba)) {
 		tlv_type = le16_to_cpu(tlv_rxba->header.type);
 		tlv_len  = le16_to_cpu(tlv_rxba->header.len);
 		if (tlv_type != TLV_TYPE_RXBA_SYNC) {
diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h
index f2168fac95ed..8e6db904e5b2 100644
--- a/drivers/net/wireless/marvell/mwifiex/fw.h
+++ b/drivers/net/wireless/marvell/mwifiex/fw.h
@@ -779,7 +779,7 @@ struct mwifiex_ie_types_rxba_sync {
 	u8 reserved;
 	__le16 seq_num;
 	__le16 bitmap_len;
-	u8 bitmap[1];
+	u8 bitmap[];
 } __packed;
 
 struct chan_band_param_set {

From d5a93b7d2877aae4ba7590ad6cb65f8d33079489 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 24 Aug 2023 21:10:45 -0600
Subject: [PATCH 2025/2047] wifi: mwifiex: Sanity check tlv_len and
 tlv_bitmap_len

Add sanity checks for both `tlv_len` and `tlv_bitmap_len` before
decoding data from `event_buf`.

This prevents any malicious or buggy firmware from overflowing
`event_buf` through large values for `tlv_len` and `tlv_bitmap_len`.

Suggested-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/d4f8780527d551552ee96f17a0229e02e1c200d1.1692931954.git.gustavoars@kernel.org
---
 .../net/wireless/marvell/mwifiex/11n_rxreorder.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
index 735aac52bdc4..10690e82358b 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
@@ -921,6 +921,14 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv,
 	while (tlv_buf_left > sizeof(*tlv_rxba)) {
 		tlv_type = le16_to_cpu(tlv_rxba->header.type);
 		tlv_len  = le16_to_cpu(tlv_rxba->header.len);
+		if (size_add(sizeof(tlv_rxba->header), tlv_len) > tlv_buf_left) {
+			mwifiex_dbg(priv->adapter, WARN,
+				    "TLV size (%zu) overflows event_buf buf_left=%d\n",
+				    size_add(sizeof(tlv_rxba->header), tlv_len),
+				    tlv_buf_left);
+			return;
+		}
+
 		if (tlv_type != TLV_TYPE_RXBA_SYNC) {
 			mwifiex_dbg(priv->adapter, ERROR,
 				    "Wrong TLV id=0x%x\n", tlv_type);
@@ -929,6 +937,14 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv,
 
 		tlv_seq_num = le16_to_cpu(tlv_rxba->seq_num);
 		tlv_bitmap_len = le16_to_cpu(tlv_rxba->bitmap_len);
+		if (size_add(sizeof(*tlv_rxba), tlv_bitmap_len) > tlv_buf_left) {
+			mwifiex_dbg(priv->adapter, WARN,
+				    "TLV size (%zu) overflows event_buf buf_left=%d\n",
+				    size_add(sizeof(*tlv_rxba), tlv_bitmap_len),
+				    tlv_buf_left);
+			return;
+		}
+
 		mwifiex_dbg(priv->adapter, INFO,
 			    "%pM tid=%d seq_num=%d bitmap_len=%d\n",
 			    tlv_rxba->mac, tlv_rxba->tid, tlv_seq_num,

From 234249d88b091d006b82f8d570343aae5f383736 Mon Sep 17 00:00:00 2001
From: Wen Gong <quic_wgong@quicinc.com>
Date: Fri, 25 Aug 2023 03:00:55 -0400
Subject: [PATCH 2026/2047] wifi: cfg80211/mac80211: hold link BSSes when assoc
 fails for MLO connection

When connect to MLO AP with more than one link, and the assoc response of
AP is not success, then cfg80211_unhold_bss() is not called for all the
links' cfg80211_bss except the primary link which means the link used by
the latest successful association request. Thus the hold value of the
cfg80211_bss is not reset to 0 after the assoc fail, and then the
__cfg80211_unlink_bss() will not be called for the cfg80211_bss by
__cfg80211_bss_expire().

Then the AP always looks exist even the AP is shutdown or reconfigured
to another type, then it will lead error while connecting it again.

The detail info are as below.

When connect with muti-links AP, cfg80211_hold_bss() is called by
cfg80211_mlme_assoc() for each cfg80211_bss of all the links. When
assoc response from AP is not success(such as status_code==1), the
ieee80211_link_data of non-primary link(sdata->link[link_id]) is NULL
because ieee80211_assoc_success()->ieee80211_vif_update_links() is
not called for the links.

Then struct cfg80211_rx_assoc_resp resp in cfg80211_rx_assoc_resp() and
struct cfg80211_connect_resp_params cr in __cfg80211_connect_result()
will only have the data of the primary link, and finally function
cfg80211_connect_result_release_bsses() only call cfg80211_unhold_bss()
for the primary link. Then cfg80211_bss of the other links will never free
because its hold is always > 0 now.

Hence assign value for the bss and status from assoc_data since it is
valid for this case. Also assign value of addr from assoc_data when the
link is NULL because the addrs of assoc_data and link both represent the
local link addr and they are same value for success connection.

Fixes: 81151ce462e5 ("wifi: mac80211: support MLO authentication/association with one link")
Signed-off-by: Wen Gong <quic_wgong@quicinc.com>
Link: https://lore.kernel.org/r/20230825070055.28164-1-quic_wgong@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h |  2 +-
 net/mac80211/mlme.c    | 11 ++++++-----
 net/wireless/mlme.c    |  3 ++-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3a4b684f89bf..ed3bc2a78d82 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7231,7 +7231,7 @@ struct cfg80211_rx_assoc_resp {
 	int uapsd_queues;
 	const u8 *ap_mld_addr;
 	struct {
-		const u8 *addr;
+		u8 addr[ETH_ALEN] __aligned(2);
 		struct cfg80211_bss *bss;
 		u16 status;
 	} links[IEEE80211_MLD_MAX_NUM_LINKS];
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f93eb38ae0b8..46d46cfab6c8 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -5429,17 +5429,18 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
 		struct ieee80211_link_data *link;
 
-		link = sdata_dereference(sdata->link[link_id], sdata);
-		if (!link)
-			continue;
-
 		if (!assoc_data->link[link_id].bss)
 			continue;
 
 		resp.links[link_id].bss = assoc_data->link[link_id].bss;
-		resp.links[link_id].addr = link->conf->addr;
+		ether_addr_copy(resp.links[link_id].addr,
+				assoc_data->link[link_id].addr);
 		resp.links[link_id].status = assoc_data->link[link_id].status;
 
+		link = sdata_dereference(sdata->link[link_id], sdata);
+		if (!link)
+			continue;
+
 		/* get uapsd queues configuration - same for all links */
 		resp.uapsd_queues = 0;
 		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 775cac4d6100..3e2c398abddc 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -52,7 +52,8 @@ void cfg80211_rx_assoc_resp(struct net_device *dev,
 		cr.links[link_id].bssid = data->links[link_id].bss->bssid;
 		cr.links[link_id].addr = data->links[link_id].addr;
 		/* need to have local link addresses for MLO connections */
-		WARN_ON(cr.ap_mld_addr && !cr.links[link_id].addr);
+		WARN_ON(cr.ap_mld_addr &&
+			!is_valid_ether_addr(cr.links[link_id].addr));
 
 		BUG_ON(!cr.links[link_id].bss->channel);
 

From 5112fa502708aaaf80acb78273fc8625f221eb11 Mon Sep 17 00:00:00 2001
From: Aditya Kumar Singh <quic_adisi@quicinc.com>
Date: Tue, 5 Sep 2023 12:18:57 +0530
Subject: [PATCH 2027/2047] wifi: cfg80211: validate AP phy operation before
 starting it

Many regulatories can have HE/EHT Operation as not permitted. In such
cases, AP should not be allowed to start if it is using a channel
having the no operation flag set. However, currently there is no such
check in place.

Fix this issue by validating such IEs sent during start AP against the
channel flags.

Signed-off-by: Aditya Kumar Singh <quic_adisi@quicinc.com>
Reviewed-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Link: https://lore.kernel.org/r/20230905064857.1503-1-quic_adisi@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index de47838aca4f..0c989a839e56 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5909,6 +5909,21 @@ out:
 	nlmsg_free(msg);
 }
 
+static int nl80211_validate_ap_phy_operation(struct cfg80211_ap_settings *params)
+{
+	struct ieee80211_channel *channel = params->chandef.chan;
+
+	if ((params->he_cap ||  params->he_oper) &&
+	    (channel->flags & IEEE80211_CHAN_NO_HE))
+		return -EOPNOTSUPP;
+
+	if ((params->eht_cap || params->eht_oper) &&
+	    (channel->flags & IEEE80211_CHAN_NO_EHT))
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
 static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6178,6 +6193,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto out_unlock;
 
+	err = nl80211_validate_ap_phy_operation(params);
+	if (err)
+		goto out_unlock;
+
 	if (info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS])
 		params->flags = nla_get_u32(
 			info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]);

From 2d4caa1dbe915654d0e8845758d9c96e721377a8 Mon Sep 17 00:00:00 2001
From: Gregory Greenman <gregory.greenman@intel.com>
Date: Tue, 5 Sep 2023 16:29:57 +0300
Subject: [PATCH 2028/2047] iwlwifi: mvm: handle PS changes in vif_cfg_changed

Handling of BSS_CHANGED_PS was missing in vif_cfg_changed
callback. Fix it.

Fixes: 22c588343529 ("wifi: iwlwifi: mvm: replace bss_info_changed() with vif_cfg/link_info_changed()")
Reported-by: Sultan Alsawaf <sultan@kerneltoast.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230905162939.5ef0c8230de6.Ieed265014988c50ec68fbff6d33821e4215f987f@changeid
[note: patch looks bigger than it is due to reindentation]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 .../wireless/intel/iwlwifi/mvm/mld-mac80211.c | 117 +++++++++---------
 1 file changed, 61 insertions(+), 56 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
index 8b6c641772ee..b719843e9457 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
@@ -731,73 +731,78 @@ static void iwl_mvm_mld_vif_cfg_changed_station(struct iwl_mvm *mvm,
 
 	mvmvif->associated = vif->cfg.assoc;
 
-	if (!(changes & BSS_CHANGED_ASSOC))
-		return;
+	if (changes & BSS_CHANGED_ASSOC) {
+		if (vif->cfg.assoc) {
+			/* clear statistics to get clean beacon counter */
+			iwl_mvm_request_statistics(mvm, true);
+			iwl_mvm_sf_update(mvm, vif, false);
+			iwl_mvm_power_vif_assoc(mvm, vif);
 
-	if (vif->cfg.assoc) {
-		/* clear statistics to get clean beacon counter */
-		iwl_mvm_request_statistics(mvm, true);
-		iwl_mvm_sf_update(mvm, vif, false);
-		iwl_mvm_power_vif_assoc(mvm, vif);
+			for_each_mvm_vif_valid_link(mvmvif, i) {
+				memset(&mvmvif->link[i]->beacon_stats, 0,
+				       sizeof(mvmvif->link[i]->beacon_stats));
 
-		for_each_mvm_vif_valid_link(mvmvif, i) {
-			memset(&mvmvif->link[i]->beacon_stats, 0,
-			       sizeof(mvmvif->link[i]->beacon_stats));
+				if (vif->p2p) {
+					iwl_mvm_update_smps(mvm, vif,
+							    IWL_MVM_SMPS_REQ_PROT,
+							    IEEE80211_SMPS_DYNAMIC, i);
+				}
 
-			if (vif->p2p) {
-				iwl_mvm_update_smps(mvm, vif,
-						    IWL_MVM_SMPS_REQ_PROT,
-						    IEEE80211_SMPS_DYNAMIC, i);
+				rcu_read_lock();
+				link_conf = rcu_dereference(vif->link_conf[i]);
+				if (link_conf && !link_conf->dtim_period)
+					protect = true;
+				rcu_read_unlock();
 			}
 
-			rcu_read_lock();
-			link_conf = rcu_dereference(vif->link_conf[i]);
-			if (link_conf && !link_conf->dtim_period)
-				protect = true;
-			rcu_read_unlock();
-		}
+			if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) &&
+			    protect) {
+				/* If we're not restarting and still haven't
+				 * heard a beacon (dtim period unknown) then
+				 * make sure we still have enough minimum time
+				 * remaining in the time event, since the auth
+				 * might actually have taken quite a while
+				 * (especially for SAE) and so the remaining
+				 * time could be small without us having heard
+				 * a beacon yet.
+				 */
+				iwl_mvm_protect_assoc(mvm, vif, 0);
+			}
 
-		if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) &&
-		    protect) {
-			/* If we're not restarting and still haven't
-			 * heard a beacon (dtim period unknown) then
-			 * make sure we still have enough minimum time
-			 * remaining in the time event, since the auth
-			 * might actually have taken quite a while
-			 * (especially for SAE) and so the remaining
-			 * time could be small without us having heard
-			 * a beacon yet.
+			iwl_mvm_sf_update(mvm, vif, false);
+
+			/* FIXME: need to decide about misbehaving AP handling */
+			iwl_mvm_power_vif_assoc(mvm, vif);
+		} else if (iwl_mvm_mld_vif_have_valid_ap_sta(mvmvif)) {
+			iwl_mvm_mei_host_disassociated(mvm);
+
+			/* If update fails - SF might be running in associated
+			 * mode while disassociated - which is forbidden.
 			 */
-			iwl_mvm_protect_assoc(mvm, vif, 0);
+			ret = iwl_mvm_sf_update(mvm, vif, false);
+			WARN_ONCE(ret &&
+				  !test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED,
+					    &mvm->status),
+				  "Failed to update SF upon disassociation\n");
+
+			/* If we get an assert during the connection (after the
+			 * station has been added, but before the vif is set
+			 * to associated), mac80211 will re-add the station and
+			 * then configure the vif. Since the vif is not
+			 * associated, we would remove the station here and
+			 * this would fail the recovery.
+			 */
+			iwl_mvm_mld_vif_delete_all_stas(mvm, vif);
 		}
 
-		iwl_mvm_sf_update(mvm, vif, false);
-
-		/* FIXME: need to decide about misbehaving AP handling */
-		iwl_mvm_power_vif_assoc(mvm, vif);
-	} else if (iwl_mvm_mld_vif_have_valid_ap_sta(mvmvif)) {
-		iwl_mvm_mei_host_disassociated(mvm);
-
-		/* If update fails - SF might be running in associated
-		 * mode while disassociated - which is forbidden.
-		 */
-		ret = iwl_mvm_sf_update(mvm, vif, false);
-		WARN_ONCE(ret &&
-			  !test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED,
-				    &mvm->status),
-			  "Failed to update SF upon disassociation\n");
-
-		/* If we get an assert during the connection (after the
-		 * station has been added, but before the vif is set
-		 * to associated), mac80211 will re-add the station and
-		 * then configure the vif. Since the vif is not
-		 * associated, we would remove the station here and
-		 * this would fail the recovery.
-		 */
-		iwl_mvm_mld_vif_delete_all_stas(mvm, vif);
+		iwl_mvm_bss_info_changed_station_assoc(mvm, vif, changes);
 	}
 
-	iwl_mvm_bss_info_changed_station_assoc(mvm, vif, changes);
+	if (changes & BSS_CHANGED_PS) {
+		ret = iwl_mvm_power_update_mac(mvm);
+		if (ret)
+			IWL_ERR(mvm, "failed to update power mode\n");
+	}
 }
 
 static void

From 424c82e8ad56756bb98b08268ffcf68d12d183eb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 16 Jun 2023 11:03:34 +0200
Subject: [PATCH 2029/2047] wifi: iwlwifi: dbg_ini: fix structure packing

The iwl_fw_ini_error_dump_range structure has conflicting alignment
requirements for the inner union and the outer struct:

In file included from drivers/net/wireless/intel/iwlwifi/fw/dbg.c:9:
drivers/net/wireless/intel/iwlwifi/fw/error-dump.h:312:2: error: field  within 'struct iwl_fw_ini_error_dump_range' is less aligned than 'union iwl_fw_ini_error_dump_range::(anonymous at drivers/net/wireless/intel/iwlwifi/fw/error-dump.h:312:2)' and is usually due to 'struct iwl_fw_ini_error_dump_range' being packed, which can lead to unaligned accesses [-Werror,-Wunaligned-access]
        union {

As the original intention was apparently to make the entire structure
unaligned, mark the innermost members the same way so the union
becomes packed as well.

Fixes: 973193554cae6 ("iwlwifi: dbg_ini: dump headers cleanup")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230616090343.2454061-1-arnd@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/error-dump.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
index f5e08988dc7b..06d6f7f66430 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
@@ -310,9 +310,9 @@ struct iwl_fw_ini_fifo_hdr {
 struct iwl_fw_ini_error_dump_range {
 	__le32 range_data_size;
 	union {
-		__le32 internal_base_addr;
-		__le64 dram_base_addr;
-		__le32 page_num;
+		__le32 internal_base_addr __packed;
+		__le64 dram_base_addr __packed;
+		__le32 page_num __packed;
 		struct iwl_fw_ini_fifo_hdr fifo_hdr;
 		struct iwl_cmd_header fw_pkt_hdr;
 	};

From e8fbe99e87877f0412655f40d7c45bf8471470ac Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Tue, 8 Aug 2023 13:56:05 -0700
Subject: [PATCH 2030/2047] wifi: iwlwifi: Ensure ack flag is properly cleared.

Debugging indicates that nothing else is clearing the info->flags,
so some frames were flagged as ACKed when they should not be.
Explicitly clear the ack flag to ensure this does not happen.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Acked-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230808205605.4105670-1-greearb@candelatech.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 36d70d589aed..898dca393643 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -1612,6 +1612,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 		iwl_trans_free_tx_cmd(mvm->trans, info->driver_data[1]);
 
 		memset(&info->status, 0, sizeof(info->status));
+		info->flags &= ~(IEEE80211_TX_STAT_ACK | IEEE80211_TX_STAT_TX_FILTERED);
 
 		/* inform mac80211 about what happened with the frame */
 		switch (status & TX_STATUS_MSK) {
@@ -1964,6 +1965,8 @@ static void iwl_mvm_tx_reclaim(struct iwl_mvm *mvm, int sta_id, int tid,
 		 */
 		if (!is_flush)
 			info->flags |= IEEE80211_TX_STAT_ACK;
+		else
+			info->flags &= ~IEEE80211_TX_STAT_ACK;
 	}
 
 	/*

From 8ba438ef3cacc4808a63ed0ce24d4f0942cfe55d Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 23 Jul 2023 22:24:59 +0200
Subject: [PATCH 2031/2047] wifi: iwlwifi: mvm: Fix a memory corruption issue

A few lines above, space is kzalloc()'ed for:
	sizeof(struct iwl_nvm_data) +
	sizeof(struct ieee80211_channel) +
	sizeof(struct ieee80211_rate)

'mvm->nvm_data' is a 'struct iwl_nvm_data', so it is fine.

At the end of this structure, there is the 'channels' flex array.
Each element is of type 'struct ieee80211_channel'.
So only 1 element is allocated in this array.

When doing:
  mvm->nvm_data->bands[0].channels = mvm->nvm_data->channels;
We point at the first element of the 'channels' flex array.
So this is fine.

However, when doing:
  mvm->nvm_data->bands[0].bitrates =
			(void *)((u8 *)mvm->nvm_data->channels + 1);
because of the "(u8 *)" cast, we add only 1 to the address of the beginning
of the flex array.

It is likely that we want point at the 'struct ieee80211_rate' allocated
just after.

Remove the spurious casting so that the pointer arithmetic works as
expected.

Fixes: 8ca151b568b6 ("iwlwifi: add the MVM driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/23f0ec986ef1529055f4f93dcb3940a6cf8d9a94.1690143750.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 1f5db65a088d..1d5ee4330f29 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -802,7 +802,7 @@ out:
 		mvm->nvm_data->bands[0].n_channels = 1;
 		mvm->nvm_data->bands[0].n_bitrates = 1;
 		mvm->nvm_data->bands[0].bitrates =
-			(void *)((u8 *)mvm->nvm_data->channels + 1);
+			(void *)(mvm->nvm_data->channels + 1);
 		mvm->nvm_data->bands[0].bitrates->hw_value = 10;
 	}
 

From 37c20b2effe987b806c8de6d12978e4ffeff026f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 16 Aug 2023 15:38:04 +0200
Subject: [PATCH 2032/2047] wifi: cfg80211: fix cqm_config access race

Max Schulze reports crashes with brcmfmac. The reason seems
to be a race between userspace removing the CQM config and
the driver calling cfg80211_cqm_rssi_notify(), where if the
data is freed while cfg80211_cqm_rssi_notify() runs it will
crash since it assumes wdev->cqm_config is set. This can't
be fixed with a simple non-NULL check since there's nothing
we can do for locking easily, so use RCU instead to protect
the pointer, but that requires pulling the updates out into
an asynchronous worker so they can sleep and call back into
the driver.

Since we need to change the free anyway, also change it to
go back to the old settings if changing the settings fails.

Reported-and-tested-by: Max Schulze <max.schulze@online.de>
Closes: https://lore.kernel.org/r/ac96309a-8d8d-4435-36e6-6d152eb31876@online.de
Fixes: 4a4b8169501b ("cfg80211: Accept multiple RSSI thresholds for CQM")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h |  3 +-
 net/wireless/core.c    | 14 +++----
 net/wireless/core.h    |  7 +++-
 net/wireless/nl80211.c | 95 +++++++++++++++++++++++++++---------------
 4 files changed, 76 insertions(+), 43 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ed3bc2a78d82..aebfa54d547a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6013,7 +6013,8 @@ struct wireless_dev {
 	} wext;
 #endif
 
-	struct cfg80211_cqm_config *cqm_config;
+	struct wiphy_work cqm_rssi_work;
+	struct cfg80211_cqm_config __rcu *cqm_config;
 
 	struct list_head pmsr_list;
 	spinlock_t pmsr_lock;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 25bc2e50a061..64e861617110 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1181,16 +1181,11 @@ void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked,
 }
 EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason);
 
-void cfg80211_cqm_config_free(struct wireless_dev *wdev)
-{
-	kfree(wdev->cqm_config);
-	wdev->cqm_config = NULL;
-}
-
 static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
 				      bool unregister_netdev)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct cfg80211_cqm_config *cqm_config;
 	unsigned int link_id;
 
 	ASSERT_RTNL();
@@ -1227,7 +1222,10 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
 	kfree_sensitive(wdev->wext.keys);
 	wdev->wext.keys = NULL;
 #endif
-	cfg80211_cqm_config_free(wdev);
+	wiphy_work_cancel(wdev->wiphy, &wdev->cqm_rssi_work);
+	/* deleted from the list, so can't be found from nl80211 any more */
+	cqm_config = rcu_access_pointer(wdev->cqm_config);
+	kfree_rcu(cqm_config, rcu_head);
 
 	/*
 	 * Ensure that all events have been processed and
@@ -1379,6 +1377,8 @@ void cfg80211_init_wdev(struct wireless_dev *wdev)
 	wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
 #endif
 
+	wiphy_work_init(&wdev->cqm_rssi_work, cfg80211_cqm_rssi_notify_work);
+
 	if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT)
 		wdev->ps = true;
 	else
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 507d184b8b40..ba9c7170afa4 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -295,12 +295,17 @@ struct cfg80211_beacon_registration {
 };
 
 struct cfg80211_cqm_config {
+	struct rcu_head rcu_head;
 	u32 rssi_hyst;
 	s32 last_rssi_event_value;
+	enum nl80211_cqm_rssi_threshold_event last_rssi_event_type;
 	int n_rssi_thresholds;
 	s32 rssi_thresholds[] __counted_by(n_rssi_thresholds);
 };
 
+void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy,
+				   struct wiphy_work *work);
+
 void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);
 
 /* free object */
@@ -566,8 +571,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 #define CFG80211_DEV_WARN_ON(cond)	({bool __r = (cond); __r; })
 #endif
 
-void cfg80211_cqm_config_free(struct wireless_dev *wdev);
-
 void cfg80211_release_pmsr(struct wireless_dev *wdev, u32 portid);
 void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev);
 void cfg80211_pmsr_free_wk(struct work_struct *work);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0c989a839e56..7a88361b3414 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -12815,7 +12815,8 @@ static int nl80211_set_cqm_txe(struct genl_info *info,
 }
 
 static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
-				    struct net_device *dev)
+				    struct net_device *dev,
+				    struct cfg80211_cqm_config *cqm_config)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	s32 last, low, high;
@@ -12824,7 +12825,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
 	int err;
 
 	/* RSSI reporting disabled? */
-	if (!wdev->cqm_config)
+	if (!cqm_config)
 		return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0);
 
 	/*
@@ -12833,7 +12834,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
 	 * connection is established and enough beacons received to calculate
 	 * the average.
 	 */
-	if (!wdev->cqm_config->last_rssi_event_value &&
+	if (!cqm_config->last_rssi_event_value &&
 	    wdev->links[0].client.current_bss &&
 	    rdev->ops->get_station) {
 		struct station_info sinfo = {};
@@ -12847,30 +12848,30 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
 
 		cfg80211_sinfo_release_content(&sinfo);
 		if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG))
-			wdev->cqm_config->last_rssi_event_value =
+			cqm_config->last_rssi_event_value =
 				(s8) sinfo.rx_beacon_signal_avg;
 	}
 
-	last = wdev->cqm_config->last_rssi_event_value;
-	hyst = wdev->cqm_config->rssi_hyst;
-	n = wdev->cqm_config->n_rssi_thresholds;
+	last = cqm_config->last_rssi_event_value;
+	hyst = cqm_config->rssi_hyst;
+	n = cqm_config->n_rssi_thresholds;
 
 	for (i = 0; i < n; i++) {
 		i = array_index_nospec(i, n);
-		if (last < wdev->cqm_config->rssi_thresholds[i])
+		if (last < cqm_config->rssi_thresholds[i])
 			break;
 	}
 
 	low_index = i - 1;
 	if (low_index >= 0) {
 		low_index = array_index_nospec(low_index, n);
-		low = wdev->cqm_config->rssi_thresholds[low_index] - hyst;
+		low = cqm_config->rssi_thresholds[low_index] - hyst;
 	} else {
 		low = S32_MIN;
 	}
 	if (i < n) {
 		i = array_index_nospec(i, n);
-		high = wdev->cqm_config->rssi_thresholds[i] + hyst - 1;
+		high = cqm_config->rssi_thresholds[i] + hyst - 1;
 	} else {
 		high = S32_MAX;
 	}
@@ -12883,6 +12884,7 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
 				u32 hysteresis)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct cfg80211_cqm_config *cqm_config = NULL, *old;
 	struct net_device *dev = info->user_ptr[1];
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int i, err;
@@ -12900,10 +12902,6 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
 	    wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
 		return -EOPNOTSUPP;
 
-	wdev_lock(wdev);
-	cfg80211_cqm_config_free(wdev);
-	wdev_unlock(wdev);
-
 	if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) {
 		if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */
 			return rdev_set_cqm_rssi_config(rdev, dev, 0, 0);
@@ -12920,9 +12918,10 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
 		n_thresholds = 0;
 
 	wdev_lock(wdev);
-	if (n_thresholds) {
-		struct cfg80211_cqm_config *cqm_config;
+	old = rcu_dereference_protected(wdev->cqm_config,
+					lockdep_is_held(&wdev->mtx));
 
+	if (n_thresholds) {
 		cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds,
 						 n_thresholds),
 				     GFP_KERNEL);
@@ -12937,11 +12936,18 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
 		       flex_array_size(cqm_config, rssi_thresholds,
 				       n_thresholds));
 
-		wdev->cqm_config = cqm_config;
+		rcu_assign_pointer(wdev->cqm_config, cqm_config);
+	} else {
+		RCU_INIT_POINTER(wdev->cqm_config, NULL);
 	}
 
-	err = cfg80211_cqm_rssi_update(rdev, dev);
-
+	err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config);
+	if (err) {
+		rcu_assign_pointer(wdev->cqm_config, old);
+		kfree_rcu(cqm_config, rcu_head);
+	} else {
+		kfree_rcu(old, rcu_head);
+	}
 unlock:
 	wdev_unlock(wdev);
 
@@ -19092,9 +19098,8 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 			      enum nl80211_cqm_rssi_threshold_event rssi_event,
 			      s32 rssi_level, gfp_t gfp)
 {
-	struct sk_buff *msg;
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct cfg80211_cqm_config *cqm_config;
 
 	trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level);
 
@@ -19102,18 +19107,41 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 		    rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH))
 		return;
 
-	if (wdev->cqm_config) {
-		wdev->cqm_config->last_rssi_event_value = rssi_level;
-
-		cfg80211_cqm_rssi_update(rdev, dev);
-
-		if (rssi_level == 0)
-			rssi_level = wdev->cqm_config->last_rssi_event_value;
+	rcu_read_lock();
+	cqm_config = rcu_dereference(wdev->cqm_config);
+	if (cqm_config) {
+		cqm_config->last_rssi_event_value = rssi_level;
+		cqm_config->last_rssi_event_type = rssi_event;
+		wiphy_work_queue(wdev->wiphy, &wdev->cqm_rssi_work);
 	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
 
-	msg = cfg80211_prepare_cqm(dev, NULL, gfp);
+void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work)
+{
+	struct wireless_dev *wdev = container_of(work, struct wireless_dev,
+						 cqm_rssi_work);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+	enum nl80211_cqm_rssi_threshold_event rssi_event;
+	struct cfg80211_cqm_config *cqm_config;
+	struct sk_buff *msg;
+	s32 rssi_level;
+
+	wdev_lock(wdev);
+	cqm_config = rcu_dereference_protected(wdev->cqm_config,
+					       lockdep_is_held(&wdev->mtx));
+	if (!wdev->cqm_config)
+		goto unlock;
+
+	cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config);
+
+	rssi_level = cqm_config->last_rssi_event_value;
+	rssi_event = cqm_config->last_rssi_event_type;
+
+	msg = cfg80211_prepare_cqm(wdev->netdev, NULL, GFP_KERNEL);
 	if (!msg)
-		return;
+		goto unlock;
 
 	if (nla_put_u32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
 			rssi_event))
@@ -19123,14 +19151,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 				      rssi_level))
 		goto nla_put_failure;
 
-	cfg80211_send_cqm(msg, gfp);
+	cfg80211_send_cqm(msg, GFP_KERNEL);
 
-	return;
+	goto unlock;
 
  nla_put_failure:
 	nlmsg_free(msg);
+ unlock:
+	wdev_unlock(wdev);
 }
-EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
 
 void cfg80211_cqm_txe_notify(struct net_device *dev,
 			     const u8 *peer, u32 num_packets,

From d1383077c225ceb87ac7a3b56b2c505193f77ed7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 13 Sep 2023 09:36:57 +0200
Subject: [PATCH 2033/2047] wifi: cfg80211: add missing kernel-doc for
 cqm_rssi_work

As reported by Stephen, I neglected to add the kernel-doc
for the new struct member. Fix that.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Fixes: 37c20b2effe9 ("wifi: cfg80211: fix cqm_config access race")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index aebfa54d547a..7192346e4a22 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5941,6 +5941,7 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy,
  * @event_lock: (private) lock for event list
  * @owner_nlportid: (private) owner socket port ID
  * @nl_owner_dead: (private) owner socket went away
+ * @cqm_rssi_work: (private) CQM RSSI reporting work
  * @cqm_config: (private) nl80211 RSSI monitor state
  * @pmsr_list: (private) peer measurement requests
  * @pmsr_lock: (private) peer measurements requests/results lock

From 6e48ebffc2db5419b3a51cfc509bde442252b356 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 13 Sep 2023 07:01:34 +0200
Subject: [PATCH 2034/2047] wifi: mac80211: fix mesh id corruption on 32 bit
 systems
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since the changed field size was increased to u64, mesh_bss_info_changed
pulls invalid bits from the first 3 bytes of the mesh id, clears them, and
passes them on to ieee80211_link_info_change_notify, because
ifmsh->mbss_changed was not updated to match its size.
Fix this by turning into ifmsh->mbss_changed into an unsigned long array with
64 bit size.

Fixes: 15ddba5f4311 ("wifi: mac80211: consistently use u64 for BSS changes")
Reported-by: Thomas Hühn <thomas.huehn@hs-nordhausen.de>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20230913050134.53536-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 2 +-
 net/mac80211/mesh.c        | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 06bd406846d2..b3d00259e1d6 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -676,7 +676,7 @@ struct ieee80211_if_mesh {
 	struct timer_list mesh_path_root_timer;
 
 	unsigned long wrkq_flags;
-	unsigned long mbss_changed;
+	unsigned long mbss_changed[64 / BITS_PER_LONG];
 
 	bool userspace_handles_dfs;
 
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index af8c5fc2db14..e31c312c124a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1175,7 +1175,7 @@ void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 
 	/* if we race with running work, worst case this work becomes a noop */
 	for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE)
-		set_bit(bit, &ifmsh->mbss_changed);
+		set_bit(bit, ifmsh->mbss_changed);
 	set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags);
 	wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
 }
@@ -1257,7 +1257,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 
 	/* clear any mesh work (for next join) we may have accrued */
 	ifmsh->wrkq_flags = 0;
-	ifmsh->mbss_changed = 0;
+	memset(ifmsh->mbss_changed, 0, sizeof(ifmsh->mbss_changed));
 
 	local->fif_other_bss--;
 	atomic_dec(&local->iff_allmultis);
@@ -1724,9 +1724,9 @@ static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata)
 	u32 bit;
 	u64 changed = 0;
 
-	for_each_set_bit(bit, &ifmsh->mbss_changed,
+	for_each_set_bit(bit, ifmsh->mbss_changed,
 			 sizeof(changed) * BITS_PER_BYTE) {
-		clear_bit(bit, &ifmsh->mbss_changed);
+		clear_bit(bit, ifmsh->mbss_changed);
 		changed |= BIT(bit);
 	}
 

From 2c3dfba4cf84ac4f306cc6653b37b6dd6859ae9d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 14 Sep 2023 15:45:17 +0200
Subject: [PATCH 2035/2047] rfkill: sync before userspace visibility/changes

If userspace quickly opens /dev/rfkill after a new
instance was created, it might see the old state of
the instance from before the sync work runs and may
even _change_ the state, only to have the sync work
change it again.

Fix this by doing the sync inline where needed, not
just for /dev/rfkill but also for sysfs.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/rfkill/core.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 01fca7a10b4b..08630896b6c8 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -48,6 +48,7 @@ struct rfkill {
 	bool			persistent;
 	bool			polling_paused;
 	bool			suspended;
+	bool			need_sync;
 
 	const struct rfkill_ops	*ops;
 	void			*data;
@@ -368,6 +369,17 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
 		rfkill_event(rfkill);
 }
 
+static void rfkill_sync(struct rfkill *rfkill)
+{
+	lockdep_assert_held(&rfkill_global_mutex);
+
+	if (!rfkill->need_sync)
+		return;
+
+	rfkill_set_block(rfkill, rfkill_global_states[rfkill->type].cur);
+	rfkill->need_sync = false;
+}
+
 static void rfkill_update_global_state(enum rfkill_type type, bool blocked)
 {
 	int i;
@@ -730,6 +742,10 @@ static ssize_t soft_show(struct device *dev, struct device_attribute *attr,
 {
 	struct rfkill *rfkill = to_rfkill(dev);
 
+	mutex_lock(&rfkill_global_mutex);
+	rfkill_sync(rfkill);
+	mutex_unlock(&rfkill_global_mutex);
+
 	return sysfs_emit(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0);
 }
 
@@ -751,6 +767,7 @@ static ssize_t soft_store(struct device *dev, struct device_attribute *attr,
 		return -EINVAL;
 
 	mutex_lock(&rfkill_global_mutex);
+	rfkill_sync(rfkill);
 	rfkill_set_block(rfkill, state);
 	mutex_unlock(&rfkill_global_mutex);
 
@@ -783,6 +800,10 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr,
 {
 	struct rfkill *rfkill = to_rfkill(dev);
 
+	mutex_lock(&rfkill_global_mutex);
+	rfkill_sync(rfkill);
+	mutex_unlock(&rfkill_global_mutex);
+
 	return sysfs_emit(buf, "%d\n", user_state_from_blocked(rfkill->state));
 }
 
@@ -805,6 +826,7 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr,
 		return -EINVAL;
 
 	mutex_lock(&rfkill_global_mutex);
+	rfkill_sync(rfkill);
 	rfkill_set_block(rfkill, state == RFKILL_USER_STATE_SOFT_BLOCKED);
 	mutex_unlock(&rfkill_global_mutex);
 
@@ -1032,14 +1054,10 @@ static void rfkill_uevent_work(struct work_struct *work)
 
 static void rfkill_sync_work(struct work_struct *work)
 {
-	struct rfkill *rfkill;
-	bool cur;
-
-	rfkill = container_of(work, struct rfkill, sync_work);
+	struct rfkill *rfkill = container_of(work, struct rfkill, sync_work);
 
 	mutex_lock(&rfkill_global_mutex);
-	cur = rfkill_global_states[rfkill->type].cur;
-	rfkill_set_block(rfkill, cur);
+	rfkill_sync(rfkill);
 	mutex_unlock(&rfkill_global_mutex);
 }
 
@@ -1087,6 +1105,7 @@ int __must_check rfkill_register(struct rfkill *rfkill)
 			round_jiffies_relative(POLL_INTERVAL));
 
 	if (!rfkill->persistent || rfkill_epo_lock_active) {
+		rfkill->need_sync = true;
 		schedule_work(&rfkill->sync_work);
 	} else {
 #ifdef CONFIG_RFKILL_INPUT
@@ -1171,6 +1190,7 @@ static int rfkill_fop_open(struct inode *inode, struct file *file)
 		ev = kzalloc(sizeof(*ev), GFP_KERNEL);
 		if (!ev)
 			goto free;
+		rfkill_sync(rfkill);
 		rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD);
 		list_add_tail(&ev->list, &data->events);
 	}

From 2e1b3ae3e1f2cf5a3c9c05d5f961d7d4257b489f Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 7 Sep 2023 09:16:14 +0200
Subject: [PATCH 2036/2047] wifi: rtw88: rtw8723d: Fix MAC address offset in
 EEPROM

The MAC address is stored at offset 0x107 in the EEPROM, like correctly
stated in the comment. Add a two bytes reserved field right before the
MAC address to shift it from offset 0x105 to 0x107.

With this the MAC address returned from my RTL8723du wifi stick can be
correctly decoded as "Shenzhen Four Seas Global Link Network Technology
Co., Ltd."

Fixes: 87caeef032fc ("wifi: rtw88: Add rtw8723du chipset support")
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reported-by: Yanik Fuchs <Yanik.fuchs@mbv.ch>
Cc: stable@vger.kernel.org
Acked-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230907071614.2032404-1-s.hauer@pengutronix.de
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index 3642a2c7f80c..2434e2480cbe 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -46,6 +46,7 @@ struct rtw8723du_efuse {
 	u8 vender_id[2];                /* 0x100 */
 	u8 product_id[2];               /* 0x102 */
 	u8 usb_option;                  /* 0x104 */
+	u8 res5[2];			/* 0x105 */
 	u8 mac_addr[ETH_ALEN];          /* 0x107 */
 };
 

From aef7a0300047e7b4707ea0411dc9597cba108fc8 Mon Sep 17 00:00:00 2001
From: Pin-yen Lin <treapking@chromium.org>
Date: Fri, 8 Sep 2023 18:41:12 +0800
Subject: [PATCH 2037/2047] wifi: mwifiex: Fix oob check condition in
 mwifiex_process_rx_packet

Only skip the code path trying to access the rfc1042 headers when the
buffer is too small, so the driver can still process packets without
rfc1042 headers.

Fixes: 119585281617 ("wifi: mwifiex: Fix OOB and integer underflow when rx packets")
Signed-off-by: Pin-yen Lin <treapking@chromium.org>
Acked-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Matthew Wang <matthewmwang@chromium.org>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230908104308.1546501-1-treapking@chromium.org
---
 drivers/net/wireless/marvell/mwifiex/sta_rx.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/sta_rx.c b/drivers/net/wireless/marvell/mwifiex/sta_rx.c
index 65420ad67416..257737137cd7 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_rx.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_rx.c
@@ -86,7 +86,8 @@ int mwifiex_process_rx_packet(struct mwifiex_private *priv,
 	rx_pkt_len = le16_to_cpu(local_rx_pd->rx_pkt_length);
 	rx_pkt_hdr = (void *)local_rx_pd + rx_pkt_off;
 
-	if (sizeof(*rx_pkt_hdr) + rx_pkt_off > skb->len) {
+	if (sizeof(rx_pkt_hdr->eth803_hdr) + sizeof(rfc1042_header) +
+	    rx_pkt_off > skb->len) {
 		mwifiex_dbg(priv->adapter, ERROR,
 			    "wrong rx packet offset: len=%d, rx_pkt_off=%d\n",
 			    skb->len, rx_pkt_off);
@@ -95,12 +96,13 @@ int mwifiex_process_rx_packet(struct mwifiex_private *priv,
 		return -1;
 	}
 
-	if ((!memcmp(&rx_pkt_hdr->rfc1042_hdr, bridge_tunnel_header,
-		     sizeof(bridge_tunnel_header))) ||
-	    (!memcmp(&rx_pkt_hdr->rfc1042_hdr, rfc1042_header,
-		     sizeof(rfc1042_header)) &&
-	     ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_AARP &&
-	     ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_IPX)) {
+	if (sizeof(*rx_pkt_hdr) + rx_pkt_off <= skb->len &&
+	    ((!memcmp(&rx_pkt_hdr->rfc1042_hdr, bridge_tunnel_header,
+		      sizeof(bridge_tunnel_header))) ||
+	     (!memcmp(&rx_pkt_hdr->rfc1042_hdr, rfc1042_header,
+		      sizeof(rfc1042_header)) &&
+	      ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_AARP &&
+	      ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_IPX))) {
 		/*
 		 *  Replace the 803 header and rfc1042 header (llc/snap) with an
 		 *    EthernetII header, keep the src/dst and snap_type

From 4fed494abcd4fde5c24de19160e93814f912fdb3 Mon Sep 17 00:00:00 2001
From: Juerg Haefliger <juerg.haefliger@canonical.com>
Date: Thu, 14 Sep 2023 09:02:27 +0200
Subject: [PATCH 2038/2047] wifi: brcmfmac: Replace 1-element arrays with
 flexible arrays

Since commit 2d47c6956ab3 ("ubsan: Tighten UBSAN_BOUNDS on GCC"),
UBSAN_BOUNDS no longer pretends 1-element arrays are unbounded. Walking
'element' and 'channel_list' will trigger warnings, so make them proper
flexible arrays.

False positive warnings were:

  UBSAN: array-index-out-of-bounds in drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c:6984:20
  index 1 is out of range for type '__le32 [1]'

  UBSAN: array-index-out-of-bounds in drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c:1126:27
  index 1 is out of range for type '__le16 [1]'

for these lines of code:

  6884  ch.chspec = (u16)le32_to_cpu(list->element[i]);

  1126  params_le->channel_list[i] = cpu_to_le16(chanspec);

Cc: stable@vger.kernel.org # 6.5+
Signed-off-by: Juerg Haefliger <juerg.haefliger@canonical.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230914070227.12028-1-juerg.haefliger@canonical.com
---
 .../wireless/broadcom/brcm80211/brcmfmac/fwil_types.h    | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
index bece26741d3a..611d1a6aabb9 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
@@ -442,7 +442,12 @@ struct brcmf_scan_params_v2_le {
 				 * fixed parameter portion is assumed, otherwise
 				 * ssid in the fixed portion is ignored
 				 */
-	__le16 channel_list[1];	/* list of chanspecs */
+	union {
+		__le16 padding;	/* Reserve space for at least 1 entry for abort
+				 * which uses an on stack brcmf_scan_params_v2_le
+				 */
+		DECLARE_FLEX_ARRAY(__le16, channel_list);	/* chanspecs */
+	};
 };
 
 struct brcmf_scan_results {
@@ -702,7 +707,7 @@ struct brcmf_sta_info_le {
 
 struct brcmf_chanspec_list {
 	__le32	count;		/* # of entries */
-	__le32	element[1];	/* variable length uint32 list */
+	__le32  element[];	/* variable length uint32 list */
 };
 
 /*

From 684e45e120b82deccaf8b85633905304a3bbf56d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 19 Sep 2023 21:47:47 +0200
Subject: [PATCH 2039/2047] wifi: mt76: mt76x02: fix MT76x0 external LNA gain
 handling

On MT76x0, LNA gain should be applied for both external and internal LNA.
On MT76x2, LNA gain should be treated as 0 for external LNA.
Move the LNA type based logic to mt76x2 in order to fix mt76x0.

Fixes: 2daa67588f34 ("mt76x0: unify lna_gain parsing")
Reported-by: Shiji Yang <yangshiji66@outlook.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230919194747.31647-1-nbd@nbd.name
---
 drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c |  7 -------
 drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c  | 13 +++++++++++--
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c
index 0acabba2d1a5..5d402cf2951c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c
@@ -131,15 +131,8 @@ u8 mt76x02_get_lna_gain(struct mt76x02_dev *dev,
 			s8 *lna_2g, s8 *lna_5g,
 			struct ieee80211_channel *chan)
 {
-	u16 val;
 	u8 lna;
 
-	val = mt76x02_eeprom_get(dev, MT_EE_NIC_CONF_1);
-	if (val & MT_EE_NIC_CONF_1_LNA_EXT_2G)
-		*lna_2g = 0;
-	if (val & MT_EE_NIC_CONF_1_LNA_EXT_5G)
-		memset(lna_5g, 0, sizeof(s8) * 3);
-
 	if (chan->band == NL80211_BAND_2GHZ)
 		lna = *lna_2g;
 	else if (chan->hw_value <= 64)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
index d5809408d1d3..8c01855885ce 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
@@ -256,7 +256,8 @@ void mt76x2_read_rx_gain(struct mt76x02_dev *dev)
 	struct ieee80211_channel *chan = dev->mphy.chandef.chan;
 	int channel = chan->hw_value;
 	s8 lna_5g[3], lna_2g;
-	u8 lna;
+	bool use_lna;
+	u8 lna = 0;
 	u16 val;
 
 	if (chan->band == NL80211_BAND_2GHZ)
@@ -275,7 +276,15 @@ void mt76x2_read_rx_gain(struct mt76x02_dev *dev)
 	dev->cal.rx.mcu_gain |= (lna_5g[1] & 0xff) << 16;
 	dev->cal.rx.mcu_gain |= (lna_5g[2] & 0xff) << 24;
 
-	lna = mt76x02_get_lna_gain(dev, &lna_2g, lna_5g, chan);
+	val = mt76x02_eeprom_get(dev, MT_EE_NIC_CONF_1);
+	if (chan->band == NL80211_BAND_2GHZ)
+		use_lna = !(val & MT_EE_NIC_CONF_1_LNA_EXT_2G);
+	else
+		use_lna = !(val & MT_EE_NIC_CONF_1_LNA_EXT_5G);
+
+	if (use_lna)
+		lna = mt76x02_get_lna_gain(dev, &lna_2g, lna_5g, chan);
+
 	dev->cal.rx.lna_gain = mt76x02_sign_extend(lna, 8);
 }
 EXPORT_SYMBOL_GPL(mt76x2_read_rx_gain);

From 31db78a4923ef5e2008f2eed321811ca79e7f71b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 19 Sep 2023 08:34:15 +0200
Subject: [PATCH 2040/2047] wifi: mac80211: fix potential key use-after-free

When ieee80211_key_link() is called by ieee80211_gtk_rekey_add()
but returns 0 due to KRACK protection (identical key reinstall),
ieee80211_gtk_rekey_add() will still return a pointer into the
key, in a potential use-after-free. This normally doesn't happen
since it's only called by iwlwifi in case of WoWLAN rekey offload
which has its own KRACK protection, but still better to fix, do
that by returning an error code and converting that to success on
the cfg80211 boundary only, leaving the error for bad callers of
ieee80211_gtk_rekey_add().

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Fixes: fdf7cb4185b6 ("mac80211: accept key reinstall without changing anything")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 3 +++
 net/mac80211/key.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 45e7a5d9c7d9..e883c41a2163 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -566,6 +566,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	}
 
 	err = ieee80211_key_link(key, link, sta);
+	/* KRACK protection, shouldn't happen but just silently accept key */
+	if (err == -EALREADY)
+		err = 0;
 
  out_unlock:
 	mutex_unlock(&local->sta_mtx);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 13050dc9321f..84ba20c3e3dc 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -905,7 +905,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
 	 */
 	if (ieee80211_key_identical(sdata, old_key, key)) {
 		ieee80211_key_free_unused(key);
-		ret = 0;
+		ret = -EALREADY;
 		goto out;
 	}
 

From d097ae01ebd48adc028aebcf760117a5317975dc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 19 Sep 2023 08:34:16 +0200
Subject: [PATCH 2041/2047] wifi: mac80211: fix potential key leak

When returning from ieee80211_key_link(), the key needs to
have been freed or successfully installed. This was missed
in a number of error paths, fix it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/key.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 84ba20c3e3dc..0665ff5e456e 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -802,6 +802,9 @@ static void ieee80211_key_destroy(struct ieee80211_key *key,
 
 void ieee80211_key_free_unused(struct ieee80211_key *key)
 {
+	if (!key)
+		return;
+
 	WARN_ON(key->sdata || key->local);
 	ieee80211_key_free_common(key);
 }
@@ -854,7 +857,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
 	 * can cause warnings to appear.
 	 */
 	bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION;
-	int ret = -EOPNOTSUPP;
+	int ret;
 
 	mutex_lock(&sdata->local->key_mtx);
 
@@ -868,8 +871,10 @@ int ieee80211_key_link(struct ieee80211_key *key,
 		 * the same cipher. Enforce the assumption for pairwise keys.
 		 */
 		if ((alt_key && alt_key->conf.cipher != key->conf.cipher) ||
-		    (old_key && old_key->conf.cipher != key->conf.cipher))
+		    (old_key && old_key->conf.cipher != key->conf.cipher)) {
+			ret = -EOPNOTSUPP;
 			goto out;
+		}
 	} else if (sta) {
 		struct link_sta_info *link_sta = &sta->deflink;
 		int link_id = key->conf.link_id;
@@ -895,8 +900,10 @@ int ieee80211_key_link(struct ieee80211_key *key,
 
 	/* Non-pairwise keys must also not switch the cipher on rekey */
 	if (!pairwise) {
-		if (old_key && old_key->conf.cipher != key->conf.cipher)
+		if (old_key && old_key->conf.cipher != key->conf.cipher) {
+			ret = -EOPNOTSUPP;
 			goto out;
+		}
 	}
 
 	/*
@@ -904,9 +911,8 @@ int ieee80211_key_link(struct ieee80211_key *key,
 	 * new version of the key to avoid nonce reuse or replay issues.
 	 */
 	if (ieee80211_key_identical(sdata, old_key, key)) {
-		ieee80211_key_free_unused(key);
 		ret = -EALREADY;
-		goto out;
+		goto unlock;
 	}
 
 	key->local = sdata->local;
@@ -930,7 +936,11 @@ int ieee80211_key_link(struct ieee80211_key *key,
 		ieee80211_key_free(key, delay_tailroom);
 	}
 
+	key = NULL;
+
  out:
+	ieee80211_key_free_unused(key);
+ unlock:
 	mutex_unlock(&sdata->local->key_mtx);
 
 	return ret;

From 0914468adf92296c4cba8a2134e06e3dea150f2e Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 18 Sep 2023 14:10:54 +0300
Subject: [PATCH 2042/2047] wifi: cfg80211: Fix 6GHz scan configuration

When the scan request includes a non broadcast BSSID, when adding the
scan parameters for 6GHz collocated scanning, do not include entries
that do not match the given BSSID.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230918140607.6d31d2a96baf.I6c4e3e3075d1d1878ee41f45190fdc6b86f18708@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/scan.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 0cf1ce7b6934..939deecf0bbe 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -908,6 +908,10 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
 		    !cfg80211_find_ssid_match(ap, request))
 			continue;
 
+		if (!is_broadcast_ether_addr(request->bssid) &&
+		    !ether_addr_equal(request->bssid, ap->bssid))
+			continue;
+
 		if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid)
 			continue;
 

From 084cf2aeca97566db4fa15d55653c1cba2db83ed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 18 Sep 2023 14:10:55 +0300
Subject: [PATCH 2043/2047] wifi: mac80211: work around Cisco AP 9115 VHT MPDU
 length

Cisco AP module 9115 with FW 17.3 has a bug and sends a too
large maximum MPDU length in the association response
(indicating 12k) that it cannot actually process.

Work around that by taking the minimum between what's in the
association response and the BSS elements (from beacon or
probe response).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230918140607.d1966a9a532e.I090225babb7cd4d1081ee9acd40e7de7e41c15ae@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c         |  3 ++-
 net/mac80211/ibss.c        |  2 +-
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mesh_plink.c  |  2 +-
 net/mac80211/mlme.c        | 27 +++++++++++++++++++++++++--
 net/mac80211/vht.c         | 16 ++++++++++++++--
 6 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e883c41a2163..0e3a1753a51c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1860,7 +1860,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
 	/* VHT can override some HT caps such as the A-MSDU max length */
 	if (params->vht_capa)
 		ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
-						    params->vht_capa, link_sta);
+						    params->vht_capa, NULL,
+						    link_sta);
 
 	if (params->he_capa)
 		ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index e1900077bc4b..5542c93edfba 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1072,7 +1072,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
 						   &chandef);
 			memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie));
 			ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
-							    &cap_ie,
+							    &cap_ie, NULL,
 							    &sta->deflink);
 			if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap)))
 				rates_updated |= true;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b3d00259e1d6..98ef1fe1226e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2141,6 +2141,7 @@ void
 ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_supported_band *sband,
 				    const struct ieee80211_vht_cap *vht_cap_ie,
+				    const struct ieee80211_vht_cap *vht_cap_ie2,
 				    struct link_sta_info *link_sta);
 enum ieee80211_sta_rx_bandwidth
 ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index f3d5bb0a59f1..a1e526419e9d 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -451,7 +451,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
 		changed |= IEEE80211_RC_BW_CHANGED;
 
 	ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
-					    elems->vht_cap_elem,
+					    elems->vht_cap_elem, NULL,
 					    &sta->deflink);
 
 	ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 46d46cfab6c8..0e61eb5a29d1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4202,10 +4202,33 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
 						  elems->ht_cap_elem,
 						  link_sta);
 
-	if (elems->vht_cap_elem && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT))
+	if (elems->vht_cap_elem &&
+	    !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) {
+		const struct ieee80211_vht_cap *bss_vht_cap = NULL;
+		const struct cfg80211_bss_ies *ies;
+
+		/*
+		 * Cisco AP module 9115 with FW 17.3 has a bug and sends a
+		 * too large maximum MPDU length in the association response
+		 * (indicating 12k) that it cannot actually process ...
+		 * Work around that.
+		 */
+		rcu_read_lock();
+		ies = rcu_dereference(cbss->ies);
+		if (ies) {
+			const struct element *elem;
+
+			elem = cfg80211_find_elem(WLAN_EID_VHT_CAPABILITY,
+						  ies->data, ies->len);
+			if (elem && elem->datalen >= sizeof(*bss_vht_cap))
+				bss_vht_cap = (const void *)elem->data;
+		}
+
 		ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
 						    elems->vht_cap_elem,
-						    link_sta);
+						    bss_vht_cap, link_sta);
+		rcu_read_unlock();
+	}
 
 	if (elems->he_operation && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) &&
 	    elems->he_cap) {
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index c1250aa47808..b3a5c3e96a72 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -4,7 +4,7 @@
  *
  * Portions of this file
  * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2022 Intel Corporation
+ * Copyright (C) 2018 - 2023 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -116,12 +116,14 @@ void
 ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_supported_band *sband,
 				    const struct ieee80211_vht_cap *vht_cap_ie,
+				    const struct ieee80211_vht_cap *vht_cap_ie2,
 				    struct link_sta_info *link_sta)
 {
 	struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap;
 	struct ieee80211_sta_vht_cap own_cap;
 	u32 cap_info, i;
 	bool have_80mhz;
+	u32 mpdu_len;
 
 	memset(vht_cap, 0, sizeof(*vht_cap));
 
@@ -317,11 +319,21 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 
 	link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta);
 
+	/*
+	 * Work around the Cisco 9115 FW 17.3 bug by taking the min of
+	 * both reported MPDU lengths.
+	 */
+	mpdu_len = vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK;
+	if (vht_cap_ie2)
+		mpdu_len = min_t(u32, mpdu_len,
+				 le32_get_bits(vht_cap_ie2->vht_cap_info,
+					       IEEE80211_VHT_CAP_MAX_MPDU_MASK));
+
 	/*
 	 * FIXME - should the amsdu len be per link? store per link
 	 * and maintain a minimum?
 	 */
-	switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) {
+	switch (mpdu_len) {
 	case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
 		link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
 		break;

From 61304336c67358d49a989e5e0060d8c99bad6ca8 Mon Sep 17 00:00:00 2001
From: Wen Gong <quic_wgong@quicinc.com>
Date: Tue, 1 Aug 2023 02:47:51 -0400
Subject: [PATCH 2044/2047] wifi: mac80211: allow transmitting EAPOL frames
 with tainted key

Lower layer device driver stop/wake TX by calling ieee80211_stop_queue()/
ieee80211_wake_queue() while hw scan. Sometimes hw scan and PTK rekey are
running in parallel, when M4 sent from wpa_supplicant arrive while the TX
queue is stopped, then the M4 will pending send, and then new key install
from wpa_supplicant. After TX queue wake up by lower layer device driver,
the M4 will be dropped by below call stack.

When key install started, the current key flag is set KEY_FLAG_TAINTED in
ieee80211_pairwise_rekey(), and then mac80211 wait key install complete by
lower layer device driver. Meanwhile ieee80211_tx_h_select_key() will return
TX_DROP for the M4 in step 12 below, and then ieee80211_free_txskb() called
by ieee80211_tx_dequeue(), so the M4 will not send and free, then the rekey
process failed becaue AP not receive M4. Please see details in steps below.

There are a interval between KEY_FLAG_TAINTED set for current key flag and
install key complete by lower layer device driver, the KEY_FLAG_TAINTED is
set in this interval, all packet including M4 will be dropped in this
interval, the interval is step 8~13 as below.

issue steps:
      TX thread                 install key thread
1.   stop_queue                      -idle-
2.   sending M4                      -idle-
3.   M4 pending                      -idle-
4.     -idle-                  starting install key from wpa_supplicant
5.     -idle-                  =>ieee80211_key_replace()
6.     -idle-                  =>ieee80211_pairwise_rekey() and set
                                 currently key->flags |= KEY_FLAG_TAINTED
7.     -idle-                  =>ieee80211_key_enable_hw_accel()
8.     -idle-                  =>drv_set_key() and waiting key install
                                 complete from lower layer device driver
9.   wake_queue                     -waiting state-
10.  re-sending M4                  -waiting state-
11.  =>ieee80211_tx_h_select_key()  -waiting state-
12.  drop M4 by KEY_FLAG_TAINTED    -waiting state-
13.    -idle-                   install key complete with success/fail
                                  success: clear flag KEY_FLAG_TAINTED
                                  fail: start disconnect

Hence add check in step 11 above to allow the EAPOL send out in the
interval. If lower layer device driver use the old key/cipher to encrypt
the M4, then AP received/decrypt M4 correctly, after M4 send out, lower
layer device driver install the new key/cipher to hardware and return
success.

If lower layer device driver use new key/cipher to send the M4, then AP
will/should drop the M4, then it is same result with this issue, AP will/
should kick out station as well as this issue.

issue log:
kworker/u16:4-5238  [000]  6456.108926: stop_queue:           phy1 queue:0, reason:0
wpa_supplicant-961  [003]  6456.119737: rdev_tx_control_port: wiphy_name=phy1 name=wlan0 ifindex=6 dest=ARRAY[9e, 05, 31, 20, 9b, d0] proto=36488 unencrypted=0
wpa_supplicant-961  [003]  6456.119839: rdev_return_int_cookie: phy1, returned 0, cookie: 504
wpa_supplicant-961  [003]  6456.120287: rdev_add_key:         phy1, netdev:wlan0(6), key_index: 0, mode: 0, pairwise: true, mac addr: 9e:05:31:20:9b:d0
wpa_supplicant-961  [003]  6456.120453: drv_set_key:          phy1 vif:wlan0(2) sta:9e:05:31:20:9b:d0 cipher:0xfac04, flags=0x9, keyidx=0, hw_key_idx=0
kworker/u16:9-3829  [001]  6456.168240: wake_queue:           phy1 queue:0, reason:0
kworker/u16:9-3829  [001]  6456.168255: drv_wake_tx_queue:    phy1 vif:wlan0(2) sta:9e:05:31:20:9b:d0 ac:0 tid:7
kworker/u16:9-3829  [001]  6456.168305: cfg80211_control_port_tx_status: wdev(1), cookie: 504, ack: false
wpa_supplicant-961  [003]  6459.167982: drv_return_int:       phy1 - -110

issue call stack:
nl80211_frame_tx_status+0x230/0x340 [cfg80211]
cfg80211_control_port_tx_status+0x1c/0x28 [cfg80211]
ieee80211_report_used_skb+0x374/0x3e8 [mac80211]
ieee80211_free_txskb+0x24/0x40 [mac80211]
ieee80211_tx_dequeue+0x644/0x954 [mac80211]
ath10k_mac_tx_push_txq+0xac/0x238 [ath10k_core]
ath10k_mac_op_wake_tx_queue+0xac/0xe0 [ath10k_core]
drv_wake_tx_queue+0x80/0x168 [mac80211]
__ieee80211_wake_txqs+0xe8/0x1c8 [mac80211]
_ieee80211_wake_txqs+0xb4/0x120 [mac80211]
ieee80211_wake_txqs+0x48/0x80 [mac80211]
tasklet_action_common+0xa8/0x254
tasklet_action+0x2c/0x38
__do_softirq+0xdc/0x384

Signed-off-by: Wen Gong <quic_wgong@quicinc.com>
Link: https://lore.kernel.org/r/20230801064751.25803-1-quic_wgong@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7fe7280e8437..d45d4be63dd8 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -665,7 +665,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 		}
 
 		if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED &&
-			     !ieee80211_is_deauth(hdr->frame_control)))
+			     !ieee80211_is_deauth(hdr->frame_control)) &&
+			     tx->skb->protocol != tx->sdata->control_port_protocol)
 			return TX_DROP;
 
 		if (!skip_hw && tx->key &&

From 334bf33eec5701a1e4e967bcb7cc8611a998334b Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Mon, 25 Sep 2023 17:18:56 +0200
Subject: [PATCH 2045/2047] wifi: cfg80211: avoid leaking stack data into trace

If the structure is not initialized then boolean types might be copied
into the tracing data without being initialised. This causes data from
the stack to leak into the trace and also triggers a UBSAN failure which
can easily be avoided here.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Link: https://lore.kernel.org/r/20230925171855.a9271ef53b05.I8180bae663984c91a3e036b87f36a640ba409817@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7a88361b3414..931a03f4549c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8501,7 +8501,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb,
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct mesh_config cfg;
+	struct mesh_config cfg = {};
 	u32 mask;
 	int err;
 

From aaba3cd33fc9593a858beeee419c0e6671ee9551 Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Mon, 25 Sep 2023 17:30:29 +0200
Subject: [PATCH 2046/2047] wifi: mac80211: Create resources for disabled links

When associating to an MLD AP, links may be disabled. Create all
resources associated with a disabled link so that we can later enable it
without having to create these resources on the fly.

Fixes: 6d543b34dbcf ("wifi: mac80211: Support disabled links during association")
Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Link: https://lore.kernel.org/r/20230925173028.f9afdb26f6c7.I4e6e199aaefc1bf017362d64f3869645fa6830b5@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0e61eb5a29d1..0c9198997482 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -5130,9 +5130,10 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 				continue;
 
 			valid_links |= BIT(link_id);
-			if (assoc_data->link[link_id].disabled) {
+			if (assoc_data->link[link_id].disabled)
 				dormant_links |= BIT(link_id);
-			} else if (link_id != assoc_data->assoc_link_id) {
+
+			if (link_id != assoc_data->assoc_link_id) {
 				err = ieee80211_sta_allocate_link(sta, link_id);
 				if (err)
 					goto out_err;
@@ -5147,7 +5148,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		struct ieee80211_link_data *link;
 		struct link_sta_info *link_sta;
 
-		if (!cbss || assoc_data->link[link_id].disabled)
+		if (!cbss)
 			continue;
 
 		link = sdata_dereference(sdata->link[link_id], sdata);

From 22061bfc57fe08c77141dc876b4af75603c4d61d Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Tue, 26 Sep 2023 16:55:50 +0300
Subject: [PATCH 2047/2047] wifi: iwlwifi: mvm: Fix incorrect usage of scan API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The support for using link ID in the scan request API was only
added in version 16. However, the code wrongly enabled this
API usage also for older versions. Fix it.

Reported-by: Antoine Beaupré <anarcat@debian.org>
Fixes: e98b23d0d7b8 ("wifi: iwlwifi: mvm: Add support for SCAN API version 16")
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230926165546.086e635fbbe6.Ia660f35ca0b1079f2c2ea92fd8d14d8101a89d03@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index c1d9ce753468..3cbe2c0b8d6b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -2342,7 +2342,7 @@ iwl_mvm_scan_umac_fill_general_p_v12(struct iwl_mvm *mvm,
 	if (gen_flags & IWL_UMAC_SCAN_GEN_FLAGS_V2_FRAGMENTED_LMAC2)
 		gp->num_of_fragments[SCAN_HB_LMAC_IDX] = IWL_SCAN_NUM_OF_FRAGS;
 
-	if (version < 12) {
+	if (version < 16) {
 		gp->scan_start_mac_or_link_id = scan_vif->id;
 	} else {
 		struct iwl_mvm_vif_link_info *link_info;